R 地址拆分
library(tidyverse)
library(lubridate)
library(showtext)
library(janitor)
library(openxlsx)
library(stringi)
showtext_auto()
#时间需要修改
yw_date <- ymd('2022-11-16')
`%nin%` = Negate(`%in%`)
place_cut <- function(data) {
m=stri_match_all_regex(data,'[中]{0,1}[国]{0,1}([\u4e00-\u9fa5]*?(?:省|自治区|市|新疆|广西|内蒙古|宁夏))([\u4e00-\u9fa5]*?(?:市|区|县|自治州|盟)){0,1}([\u4e00-\u9fa5]*?(?:市|区|县|旗)){0,1}([\u4e00-\u9fa5]*?(?:乡|镇|街道|苏木)){0,1}([\u4e00-\u9fa5]*?(?:\\S+)){0,1}')
sheng=m[[1]][,2]
shi=m[[1]][,3]
xian=m[[1]][,4]
dizhi=str_c(sheng,shi,xian,sep=',')
return(dizhi)
}
ka <- read.csv('/mnt/d/1116 24时/报告卡.csv',fileEncoding = 'GB18030') %>%
mutate(有效证件号=toupper(str_remove_all(有效证件号,"'")),
报告卡录入时间=ymd_hms(报告卡录入时间),
订正终审时间=case_when(str_detect(订正终审时间,"\\.") ~ "",
TRUE ~ as.character(订正终审时间)),
订正报告时间=case_when(str_detect(订正报告时间,"\\.") ~ "",
TRUE ~ as.character(订正报告时间))) %>%
mutate(订正终审时间=ymd_hms(订正终审时间),
订正报告时间=ymd_hms(订正报告时间)) %>%
tidyr::unite(id,c('患者姓名','有效证件号'))
chu <- read.csv('/mnt/d/1116 24时/新冠阳性初筛报告卡.csv',fileEncoding = 'GB18030') %>%
mutate(证件号=toupper(str_remove_all(证件号,"'")),
录入时间=ymd_hms(录入时间),
dizhi=purrr::map_chr(报告地区,place_cut)) %>%
tidyr::separate(dizhi,c("sheng","shi","xian"),sep=',') %>%
tidyr::unite(id,c('姓名','证件号'))
#业务一
chu %>%
filter(卡片状态!='已删除' & 转归状态!='排除' & floor_date(录入时间,unit = 'day')==yw_date) ->chu_clean
chu_clean %>%
write.xlsx('/mnt/d/1116 24时/初筛清理后.xlsx')
#业务二
chu_clean %>%
get_dupes(id) %>%
write.xlsx('/mnt/d/1116 24时/重复.xlsx')
#业务三
ka %>%
filter(审核状态=='已终审卡' & floor_date(订正终审时间,unit = 'day')<=yw_date) -> tmp
chu %>%
filter(id %nin% tmp$id) %>%
group_by(shi,xian) %>%
summarise(n=n()) %>%
write.xlsx("/mnt/d/1116 24时/现有初筛统计.xlsx")
评论
发表评论