计算年龄且分组

 df <- read.csv(windows_to_wsl_path('C:\\Users\\xuefeng\\Desktop\\个案.csv'),fileEncoding = "GB18030") %>% 

  clean_names() %>% 

  filter(ym_mc=='麻腮风疫苗')


df %>% 

  separate(jz_sj, into = c("jz_sj", "rest"), sep = "\\s+", extra = "drop") %>% 

  mutate(jz_sj=ymd(jz_sj),csrq=ymd(csrq)) %>% 

  mutate(age = as.numeric(difftime(jz_sj, csrq, units = "days")/365)) %>% 

  filter(age<10 & age>0 & jz_zc<=2) %>% 

  mutate(jz_grp = case_when(

    year(jz_sj) == 2018 ~ "2018",

    year(jz_sj) == 2019 ~ "2019",

    year(jz_sj) == 2020 & month(jz_sj) <= 5 ~ "2020(1-5月)",

    year(jz_sj) == 2020 & month(jz_sj) >= 6 ~ "2020(6-12月)",

    year(jz_sj) == 2021 ~ "2021",

    year(jz_sj) == 2022 ~ "2022",

    TRUE ~ "其他")) -> test



df=pd.read_csv(windows_to_wsl_path(r"C:\Users\xuefeng\Desktop\个案.csv"),encoding='GB18030',dtype={'YM_BM':str}).query("YM_MC=='麻腮风疫苗'").clean_names()

test = df.assign(jz_sj=pd.to_datetime(df.jz_sj.str.split("\s+").str[0],format="%Y-%m-%d"),csrq=pd.to_datetime(df.csrq, format="%Y-%m-%d"),age=lambda x:(x.jz_sj-x.csrq).dt.days/365,grp=lambda x:np.select([(x.jz_sj.dt.year==2018),(x.jz_sj.dt.year==2019),((x.jz_sj.dt.year==2020) & (x.jz_sj.dt.month<=5)),((x.jz_sj.dt.year==2020) & (x.jz_sj.dt.month>=6)),(x.jz_sj.dt.year==2021),(x.jz_sj.dt.year==2022)],['2018','2019','2020(1-5月)','2020(6-12月)','2021','2022'],default='不详'))

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理