博文

接种率数据处理

import math import pandas as pd import numpy as np import janitor import oracledb df=( pd.read_excel( '/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx' , sheet_name = 'sheet1' , skiprows = 3 ). clean_names(). apply( lambda x: x.fillna( method = "ffill" ) if x.name in [ 'unnamed_0' , 'unnamed_1' , 'unnamed_2' ] else x, axis = 0 ). filter( regex = '^un| 率 ' ). query( "unnamed_2 not in [' 总人数 ',' 全程接种 ']" ). apply( lambda x: pd.to_numeric(x.str.replace( "%" , "" ), errors = 'coerce' ) if ' 率 ' in x.name else x, axis = 0 ) ) # 重命名前 4 列 df.rename( columns ={df.columns[ 0 ]: 'dq' , df.columns[ 1 ]: 'bm' , df.columns[ 2 ]: 'ym' , df.columns[ 3 ]: 'jc' }, inplace = True ) # 重命名后 18 列 for i in range ( 4 , 22 ): df.rename( columns ={df.columns[i]: f'age_ { i - 3 } ' }, inplace = True ) # 按条件分别进行填充 # 'f' indi...

出生队列接种率处理

 library(tidyverse) library(lubridate) library(showtext) library(janitor) library(openxlsx) library(magrittr) library(hablar) library(readxl) library(ggcharts) library(purrr) showtext_auto(enable = TRUE) `%nin%` = Negate(`%in%`) df <- readxl::read_excel('/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx',sheet = 1,skip = 3) %>%    fill(c(`...1`,`...2`,`...3`),.direction='down') %>%    filter(`...3` %nin% c('总人数','全程接种')) %>%    select(starts_with('...')  | contains("接种率")) %>%    map_dfc(str_remove_all, pattern = "%") %>%    mutate_at(vars(contains("接种率")), as.numeric) %>%    select(-`接种率(%)...22`) # df %>%  #   write.xlsx('/mnt/c/Users/xuefe/Downloads/jzl.xlsx') names(df) <- c('dqmc','bm','ym','jc','age_1','age_2','age_3','age_4','age_5','age_6','age_7','age_8','age_9',               ...

RJDBC链接oracle数据库

1 安装RJDBC 2 安装 Oracle RJDBC驱动 ( JDBC and UCP Downloads page (oracle.com) ) 3 下载并赋予执行权限 chmod 755 ojdbc8.jar library(RJDBC) jdbcDriver =JDBC("oracle.jdbc.OracleDriver",classPath="/usr/bin/ojdbc8.jar") con =dbConnect(jdbcDriver, "jdbc:oracle:thin:@//192.168.30.48:1521/JZDB1", "username", "password")

R 地址拆分

library(tidyverse) library(lubridate) library(showtext) library(janitor) library(openxlsx) library(stringi) showtext_auto() #时间需要修改 yw_date <- ymd('2022-11-16') `%nin%` = Negate(`%in%`) place_cut <- function(data) {   m=stri_match_all_regex(data,'[中]{0,1}[国]{0,1}([\u4e00-\u9fa5]*?(?:省|自治区|市|新疆|广西|内蒙古|宁夏))([\u4e00-\u9fa5]*?(?:市|区|县|自治州|盟)){0,1}([\u4e00-\u9fa5]*?(?:市|区|县|旗)){0,1}([\u4e00-\u9fa5]*?(?:乡|镇|街道|苏木)){0,1}([\u4e00-\u9fa5]*?(?:\\S+)){0,1}')   sheng=m[[1]][,2]   shi=m[[1]][,3]   xian=m[[1]][,4]   dizhi=str_c(sheng,shi,xian,sep=',')   return(dizhi) } ka <- read.csv('/mnt/d/1116 24时/报告卡.csv',fileEncoding = 'GB18030') %>%    mutate(有效证件号=toupper(str_remove_all(有效证件号,"'")),          报告卡录入时间=ymd_hms(报告卡录入时间),          订正终审时间=case_when(str_detect(订正终审时间,"\\.") ~ "",                           TRUE  ~ as.character(订正终审时...

pathlib 学习

  import os import pathlib from pathlib import Path import aspose.words as aw def convert_path ( win_path ) : tmp_path = win_path.replace ( ":" , "" ) tmp_path = tmp_path.replace ( " \\ " , "/" ) tmp_path = tmp_path.lower () tmp_path = "/mnt/" + tmp_path return ( tmp_path ) url = pathlib.Path ( convert_path ( r"C:\Users\xuefe\Downloads\Video" ) ) files = list ( url.glob ( '*.*' ) ) # pathlib.PosixPath 转 字符串 for file in files: file= str ( file ) doc = aw.Document ( file, aw.loading.LoadOptions ( "2022" ) ) doc.save ( file ) # file_dir = convert_path(r"C:\Users\xuefe\Downloads\Video") # files = os.listdir(file_dir) # # for file in files: # file_p=os.path.join(file_dir,file) # print(type(file_p)) # doc = aw.Document(file_p, aw.loading.LoadOptions("2022")) # doc.save(file_p) cwd = pathlib.Path.cwd () # 获取当前目录 home = Path.home () # 获...

tinytex包使用

 # 下载宏包,先修改为国内镜像源,比如改为清华大学的镜像源 tinytex::tlmgr_repo(url = "http://mirrors.tuna.tsinghua.edu.cn/CTAN/") tinytex::install_tinytex(version = "latest") tinytex::tlmgr_install('elegantbook') tinytex::tlmgr_install('fandol') #默认安装路径 tinytex::tinytex_root()  # 查看轻量级已包含哪些宏包 tl_pkgs()  # 解析错误日志,看看是缺什么宏包造成的 parse_packages("test.log")

pandas 条件赋值

  df["WhereCol"] = np.where((df.Cat == "A") & (df.B > 10), 1, 0) conditions = [ (df.Cat == "A") & (df.B > 10), (df.Cat == "B") & (df.B > 10) ] values = [1, 2] df["SelectCol"] = np.select(conditions, values, default=0) xg.case_when ( xg.A3.str.slice ( 16 , 17 ) .astype ( int ) % 2 == 0 , ' 女 ' , ~xg.A3.str.slice ( 16 , 17 ) .astype ( int ) % 2 == 0 , ' 男 ' , ' 不详 ' , column_name = " 性别 " ) xg=xg.case_when ( xg.A3.str.contains ( '^62' ) , ' 甘肃 ' , ~xg.A3.str.contains ( '^62' ) , ' 外省 ' , ' 其他 ' , column_name = " 归属 " )