ibis 表关联
import ibis
import ibis.selectors as s
from ibis import _
ibis.options.interactive = True
con=ibis.duckdb.connect('/mnt/d/xg.db')
mon3=con.table('mon3')
mon4=con.table('mon4')
mon5=con.table('mon5')
mon6=con.table('mon6')
mon7=con.table('mon7')
mon8=con.table('mon8')
mon9=con.table('mon9')
mon10=con.table('mon10')
mon11=con.table('mon11')
mon12=con.table('mon12')
jg=con.read_xlsx('/mnt/d/疫苗批号价格.xlsx')
jg=(
jg
.distinct(on=['疫苗批号','价格'],keep='first')
.select('疫苗批号','价格')
)
#7026,2021030002G
(
mon4
.group_by('YM_PH')
.aggregate(n=_.GRDA_CODE.count())
.asof_join(jg,on=('YM_PH','疫苗批号'))
.filter(_.价格==90)
)
#3,NCOV202104002V
(
mon5
.group_by('YM_PH')
.aggregate(n=_.GRDA_CODE.count())
.asof_join(jg,on=('YM_PH','疫苗批号'))
.filter(_.价格==80)
)
#2,202104003K
(
mon5
.group_by('YM_PH')
.aggregate(n=_.GRDA_CODE.count())
.asof_join(jg,on=('YM_PH','疫苗批号'))
.filter(_.价格==85.5)
)
#3,2021113913,2021113912
(
mon6
.group_by('YM_PH')
.aggregate(n=_.GRDA_CODE.count())
.asof_join(jg,on=('YM_PH','疫苗批号'))
.filter(_.价格==21)
)
tmp1=(
mon4
.filter(_.YM_PH=='2021030002G')
.head(7026)
.drop("AAA")
.execute()
)
tmp2=(
mon5
.filter(_.YM_PH=='NCOV202104002V')
.head(3)
.drop(['column00','AAA'])
.execute()
)
tmp3=(
mon5
.filter(_.YM_PH=='202104003K')
.head(2)
.drop(['column00','AAA'])
.execute()
)
tmp4=(
mon6
.filter(_.YM_PH.isin(['2021113913','2021113912']))
.drop(['column00','AAA'])
.execute()
)
import pandas as pd
tmp=pd.concat([tmp1,tmp2,tmp3,tmp4])
tmp=con.read_xlsx('/mnt/d/新冠个案7034.xlsx')
zd=con.read_xlsx('/mnt/d/疫苗字典.xlsx')
cs=con.read_xlsx('/mnt/d/生产厂家.xlsx')
zd=(
zd
.distinct(on=('SCCJ_MC','YM_PH'),keep='first')
.select('SCCJ_MC','YM_PH')
.asof_join(cs,on=('SCCJ_MC','SCCJ_MC'))
.drop('SCCJ_MC_right')
)
tmp=(
tmp
.asof_join(zd,on=('YM_PH','YM_PH'))
.mutate(CONFIG_CS_LSH=_.CONFIG_CS_LSH_right)
.mutate(SCCJ_MC=_.SCCJ_MC_right)
.drop('SCCJ_MC_right','YM_PH_right','CONFIG_CS_LSH_right')
)
# tmp.execute().to_excel('/mnt/d/新冠个案7034.xlsx',index=False)
tj=(
tmp
.asof_join(jg,on=('YM_PH','疫苗批号'))
.group_by('SCCJ_MC','疫苗批号','价格')
.aggregate(jc=_.GRDA_CODE.count(),
fy=_.价格.sum())
)
tj.execute().to_excel('/mnt/d/新冠个案7034tj.xlsx',index=False)
sh=con.read_xlsx('/mnt/c/Users/xuefl/Downloads/副本分厂家损耗金额.xlsx')
bz=con.read_xlsx('/mnt/c/Users/xuefl/Downloads/副本分厂家分单价金额.xlsx')
# asof_join 通常用于时间序列近似连接,而不是简单的相等条件连接。如果你只是想进行普通的相等条件连接,建议使用 join、left_join、right_join 或 outer_join 方法。
(
bz
.mutate(入库剂次数=_.入库剂次数.cast('int64'),
接种剂次数=_.接种剂次数.cast('int64'),
已付金额=_.已付金额.cast('int64'),
接种金额=_.接种金额.cast('int64'))
.left_join(sh,[('生产厂家','sccj_mc'),('单价','疫苗价格')])
)
评论
发表评论