pandas 条件赋值
sqltxt = '''
select t.grda_code,
t.grda_xm,
e.csrq,
t.ym_mc,
t.jz_sj,
t.jz_zc,
t.ym_yxq,
t.ym_ph,
t.ym_bm,
t.ymsccj_mc,
t.jzdd_mc,
t.jzdd_dm
from inoc_jzjl t, grda_cr e
where e.grda_cr_lsh = t.grda_et_lsh
and t.config_ymxx_lsh in ('2901','2902','2903','2904','2905','2906')
and t.jz_sj>=to_date('2021-01-01','yyyy-mm-dd')
and jcqk='1'
'''
jzjl=pd.read_sql(sqltxt,con)
jzjl.columns=[str(item).lower() for item in jzjl.columns]
jzjl['jz_sj']=jzjl['jz_sj'].dt.date
jzjl['csrq']=pd.to_datetime(jzjl['csrq'],format='%Y%m%d')
jzjl=jzjl[jzjl['ym_ph'].isin(['20210411','20210310','20210205','20210206','20201212','20210102'])]
test=jzjl
test['pc']=np.where(test['ym_ph'].isin(['20210411','20210310']),'第二批','第一批')
test['pc'].value_counts()
test.info()
test['csrq']=pd.to_datetime(test['csrq'],format='%Y%m%d')
test['jz_sj']=pd.to_datetime(test['jz_sj'])
#计算年龄
test['age']=round(((test['jz_sj']-test['csrq'])/pd.Timedelta(1, 'D'))/365,0)
test['xian']=test['jzdd_dm'].str[0:6]
test['agegrp']='unknow'
test.loc[test['age'].between(0, 30, inclusive=True), 'agegrp'] = '0-30'
test.loc[test['age'].between(31, 100, inclusive=True), 'agegrp'] = '31-100'
test['agegrp'].value_counts()
test.loc[test['ym_ph'].isin(['20210411','20210310']), 'pc'] = '第二批'
test.loc[~test['ym_ph'].isin(['20210411','20210310']), 'pc'] = '第一批'
test['agegrp']='unknow'
test.loc[(test['age']>=0) & (test['age']<=30), 'agegrp'] = '0-30'
test.loc[test['age']>30, 'agegrp'] = '>31'
test['agegrp'] = test.age.apply(lambda x: '0-30' if x<=30 else '>30')
test['agegrp'] = pd.cut(test['age'], bins = [-1, 30, 50, 60, 100], labels = ['0-30', '30-50', '50-60', '>60'])
tj=test.groupby(['pc','xian','jz_zc']).agg(n = pd.NamedAgg('xian', 'count')).reset_index()
tj.to_excel(r"C:\Users\xuefe\Desktop\tj.xlsx")
评论
发表评论