pandas 计算年龄分组计算非重复计数和年龄小于7岁非重复计数
import pandas as pd
import numpy as np
import janitor
sc=(
pd.read_excel("/mnt/c/Users/xuefeng/Downloads/非重卡删除.xlsx",dtype={'SC_GLDW_BM':'object','YM_BM':'object'}).clean_names()
)
(
sc.query('ym_mc.str.startswith("新冠")')
.astype({'sc_gldw_bm':'string'})
.assign(shi=lambda x:x.sc_gldw_bm.str[0:4])
.groupby('shi')
.agg(count=('shi','count'))
.reset_index()
.sort_values('shi')
.to_excel("/mnt/c/Users/xuefeng/Downloads/非重卡删除1.xlsx")
)
test = (
sc.assign(xian=sc.sc_gldw_bm.str[:6], shi=sc.sc_gldw_bm.str.slice(0,4),csrq=sc.zjhm.str[6:14])
.query("sc_gldw_bm.str.startswith('6211') & csrq.str.len()==8 & csrq.str.slice(0,2) in ('19','20')")
.assign(age=lambda x:(pd.to_datetime(x.jz_sj)-pd.to_datetime(x.csrq,format='%Y%m%d',errors='coerce'))/pd.Timedelta(days=365.25))
)
(
test
.groupby('xian')
.agg(n=('zjhm', 'nunique'), age7=('zjhm', lambda x: x.loc[test.age <= 7].nunique()))
)
评论
发表评论