R 地址拆分

十一月 17, 2022

library(tidyverse)

library(lubridate)

library(showtext)

library(janitor)

library(openxlsx)

library(stringi)

showtext_auto()

#时间需要修改

yw_date <- ymd('2022-11-16')

`%nin%` = Negate(`%in%`)

place_cut <- function(data) {

m=stri_match_all_regex(data,'[中]{0,1}[国]{0,1}([\u4e00-\u9fa5]*?(?:省|自治区|市|新疆|广西|内蒙古|宁夏))([\u4e00-\u9fa5]*?(?:市|区|县|自治州|盟)){0,1}([\u4e00-\u9fa5]*?(?:市|区|县|旗)){0,1}([\u4e00-\u9fa5]*?(?:乡|镇|街道|苏木)){0,1}([\u4e00-\u9fa5]*?(?:\\S+)){0,1}')

sheng=m[[1]][,2]

shi=m[[1]][,3]

xian=m[[1]][,4]

dizhi=str_c(sheng,shi,xian,sep=',')

return(dizhi)

}

ka <- read.csv('/mnt/d/1116 24时/报告卡.csv',fileEncoding = 'GB18030') %>%

mutate(有效证件号=toupper(str_remove_all(有效证件号,"'")),

报告卡录入时间=ymd_hms(报告卡录入时间),

订正终审时间=case_when(str_detect(订正终审时间,"\\.") ~ "",

TRUE ~ as.character(订正终审时间)),

订正报告时间=case_when(str_detect(订正报告时间,"\\.") ~ "",

TRUE ~ as.character(订正报告时间))) %>%

mutate(订正终审时间=ymd_hms(订正终审时间),

订正报告时间=ymd_hms(订正报告时间)) %>%

tidyr::unite(id,c('患者姓名','有效证件号'))

chu <- read.csv('/mnt/d/1116 24时/新冠阳性初筛报告卡.csv',fileEncoding = 'GB18030') %>%

mutate(证件号=toupper(str_remove_all(证件号,"'")),

录入时间=ymd_hms(录入时间),

dizhi=purrr::map_chr(报告地区,place_cut)) %>%

tidyr::separate(dizhi,c("sheng","shi","xian"),sep=',') %>%

tidyr::unite(id,c('姓名','证件号'))

#业务一

chu %>%

filter(卡片状态!='已删除' & 转归状态!='排除' & floor_date(录入时间,unit = 'day')==yw_date) ->chu_clean

chu_clean %>%

write.xlsx('/mnt/d/1116 24时/初筛清理后.xlsx')

#业务二

chu_clean %>%

get_dupes(id) %>%

write.xlsx('/mnt/d/1116 24时/重复.xlsx')

#业务三

ka %>%

filter(审核状态=='已终审卡' & floor_date(订正终审时间,unit = 'day')<=yw_date) -> tmp

chu %>%

filter(id %nin% tmp$id) %>%

group_by(shi,xian) %>%

summarise(n=n()) %>%

write.xlsx("/mnt/d/1116 24时/现有初筛统计.xlsx")

搜索此博客

xuefliang

R 地址拆分

评论

发表评论

此博客中的热门博文

windows 命令行下查看端口占用情况的方法

Android 7.0 开启Google Now

Rstudio 使用代理

R 地址拆分

评论

发表评论

此博客中的热门博文

windows 命令行下 查看端口占用情况的方法

Android 7.0 开启Google Now

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法