计算年龄且分组
df <- read.csv(windows_to_wsl_path('C:\\Users\\xuefeng\\Desktop\\个案.csv'),fileEncoding = "GB18030") %>%
clean_names() %>%
filter(ym_mc=='麻腮风疫苗')
df %>%
separate(jz_sj, into = c("jz_sj", "rest"), sep = "\\s+", extra = "drop") %>%
mutate(jz_sj=ymd(jz_sj),csrq=ymd(csrq)) %>%
mutate(age = as.numeric(difftime(jz_sj, csrq, units = "days")/365)) %>%
filter(age<10 & age>0 & jz_zc<=2) %>%
mutate(jz_grp = case_when(
year(jz_sj) == 2018 ~ "2018",
year(jz_sj) == 2019 ~ "2019",
year(jz_sj) == 2020 & month(jz_sj) <= 5 ~ "2020(1-5月)",
year(jz_sj) == 2020 & month(jz_sj) >= 6 ~ "2020(6-12月)",
year(jz_sj) == 2021 ~ "2021",
year(jz_sj) == 2022 ~ "2022",
TRUE ~ "其他")) -> test
df=pd.read_csv(windows_to_wsl_path(r"C:\Users\xuefeng\Desktop\个案.csv"),encoding='GB18030',dtype={'YM_BM':str}).query("YM_MC=='麻腮风疫苗'").clean_names()
test = df.assign(jz_sj=pd.to_datetime(df.jz_sj.str.split("\s+").str[0],format="%Y-%m-%d"),csrq=pd.to_datetime(df.csrq, format="%Y-%m-%d"),age=lambda x:(x.jz_sj-x.csrq).dt.days/365,grp=lambda x:np.select([(x.jz_sj.dt.year==2018),(x.jz_sj.dt.year==2019),((x.jz_sj.dt.year==2020) & (x.jz_sj.dt.month<=5)),((x.jz_sj.dt.year==2020) & (x.jz_sj.dt.month>=6)),(x.jz_sj.dt.year==2021),(x.jz_sj.dt.year==2022)],['2018','2019','2020(1-5月)','2020(6-12月)','2021','2022'],default='不详'))
评论
发表评论