pandas 条件赋值

 sqltxt = ''' 

 select t.grda_code,

       t.grda_xm,

       e.csrq,

       t.ym_mc,

       t.jz_sj,

       t.jz_zc,

       t.ym_yxq,

       t.ym_ph,

       t.ym_bm,

       t.ymsccj_mc,

       t.jzdd_mc,

       t.jzdd_dm

  from inoc_jzjl t, grda_cr e

 where e.grda_cr_lsh = t.grda_et_lsh

 and t.config_ymxx_lsh in ('2901','2902','2903','2904','2905','2906')

 and  t.jz_sj>=to_date('2021-01-01','yyyy-mm-dd')

 and jcqk='1'

 '''

 

jzjl=pd.read_sql(sqltxt,con)

jzjl.columns=[str(item).lower() for item in jzjl.columns]

jzjl['jz_sj']=jzjl['jz_sj'].dt.date

jzjl['csrq']=pd.to_datetime(jzjl['csrq'],format='%Y%m%d')


jzjl=jzjl[jzjl['ym_ph'].isin(['20210411','20210310','20210205','20210206','20201212','20210102'])]


test=jzjl


test['pc']=np.where(test['ym_ph'].isin(['20210411','20210310']),'第二批','第一批')

test['pc'].value_counts()


test.info()

test['csrq']=pd.to_datetime(test['csrq'],format='%Y%m%d')

test['jz_sj']=pd.to_datetime(test['jz_sj'])

#计算年龄

test['age']=round(((test['jz_sj']-test['csrq'])/pd.Timedelta(1, 'D'))/365,0)

test['xian']=test['jzdd_dm'].str[0:6]


test['agegrp']='unknow'

test.loc[test['age'].between(0, 30, inclusive=True), 'agegrp'] = '0-30'

test.loc[test['age'].between(31, 100, inclusive=True), 'agegrp'] = '31-100'

test['agegrp'].value_counts()


test.loc[test['ym_ph'].isin(['20210411','20210310']), 'pc'] = '第二批'

test.loc[~test['ym_ph'].isin(['20210411','20210310']), 'pc'] = '第一批'


test['agegrp']='unknow'

test.loc[(test['age']>=0) & (test['age']<=30), 'agegrp'] = '0-30'

test.loc[test['age']>30, 'agegrp'] = '>31'


test['agegrp'] = test.age.apply(lambda x: '0-30' if x<=30 else '>30')


test['agegrp'] = pd.cut(test['age'], bins = [-1, 30, 50, 60, 100], labels = ['0-30', '30-50', '50-60', '>60'])


tj=test.groupby(['pc','xian','jz_zc']).agg(n = pd.NamedAgg('xian', 'count')).reset_index()

tj.to_excel(r"C:\Users\xuefe\Desktop\tj.xlsx")

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理