博文

目前显示的是 十二月, 2022的博文

接种率数据处理

import math import pandas as pd import numpy as np import janitor import oracledb df=( pd.read_excel( '/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx' , sheet_name = 'sheet1' , skiprows = 3 ). clean_names(). apply( lambda x: x.fillna( method = "ffill" ) if x.name in [ 'unnamed_0' , 'unnamed_1' , 'unnamed_2' ] else x, axis = 0 ). filter( regex = '^un| 率 ' ). query( "unnamed_2 not in [' 总人数 ',' 全程接种 ']" ). apply( lambda x: pd.to_numeric(x.str.replace( "%" , "" ), errors = 'coerce' ) if ' 率 ' in x.name else x, axis = 0 ) ) # 重命名前 4 列 df.rename( columns ={df.columns[ 0 ]: 'dq' , df.columns[ 1 ]: 'bm' , df.columns[ 2 ]: 'ym' , df.columns[ 3 ]: 'jc' }, inplace = True ) # 重命名后 18 列 for i in range ( 4 , 22 ): df.rename( columns ={df.columns[i]: f'age_ { i - 3 } ' }, inplace = True ) # 按条件分别进行填充 # 'f' indi

出生队列接种率处理

 library(tidyverse) library(lubridate) library(showtext) library(janitor) library(openxlsx) library(magrittr) library(hablar) library(readxl) library(ggcharts) library(purrr) showtext_auto(enable = TRUE) `%nin%` = Negate(`%in%`) df <- readxl::read_excel('/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx',sheet = 1,skip = 3) %>%    fill(c(`...1`,`...2`,`...3`),.direction='down') %>%    filter(`...3` %nin% c('总人数','全程接种')) %>%    select(starts_with('...')  | contains("接种率")) %>%    map_dfc(str_remove_all, pattern = "%") %>%    mutate_at(vars(contains("接种率")), as.numeric) %>%    select(-`接种率(%)...22`) # df %>%  #   write.xlsx('/mnt/c/Users/xuefe/Downloads/jzl.xlsx') names(df) <- c('dqmc','bm','ym','jc','age_1','age_2','age_3','age_4','age_5','age_6','age_7','age_8','age_9',