pandas 使用np完成条件赋值
import pandas as pd
import numpy as np
import janitor
df=(
pd.read_excel(r"/mnt/c/users/xuefeng/desktop/rk.xlsx")
.clean_names()
.astype({'xt_rksj':'datetime64[ns]'})
.query("xt_rkjgdm.notnull()")
)
df=(
df.assign(
gp=np.select(
[
df.sum_x_rksl_ < 10000,
(df.sum_x_rksl_ >= 10000) & (df.sum_x_rksl_ < 20000),
df.sum_x_rksl_ >= 20000,
],
['低', '中', '高']
,default='不详'
)
)
)
(
df
.assign(grp=np.select([df.xt_rksj <= '2023-01-17',df.xt_rksj > '2023-01-17'],
['前', '后'], default='不详'))
.groupby(['grp','sccj_mc','ym_mc'],as_index=False)
.agg(s=pd.NamedAgg('sum_x_rksl_',lambda x:np.sum(x*df.ymgg_bm)))
)
评论
发表评论