pandas case_when 使用
jz= pd.read_sql(sqltxt, conn).rename(columns=str.lower)
jz=(
jz
.assign(
age=lambda x: (x['jz_sj']-x['csrq']).dt.days//365,
age_group=lambda df:df.age.case_when(
[
(lambda s:(s>=15) & (s<=44),'15-44'),
(lambda s:(s>=45) & (s<=59),'45-59'),
(lambda s:s>=60,'>60'),
(lambda s:s<15,'<15'),
]
),
year=lambda x: x['jz_sj'].dt.strftime('%Y')
)
)
(
jz.query("age_group!='unknown' & jzdd_dm.str.len()==10")
.groupby(['jzdd_mc','jzdd_dm','year','age_group'],as_index=False).agg(n=('ym_mc',np.size))
.pivot_table(index=['jzdd_mc','jzdd_dm'],columns=['year','age_group'],values='n')
.sort_values('jzdd_dm')
)
评论
发表评论