博文

gtsummary 制表

library(gtsummary) library(knitr) theme_gtsummary_language("en", big.mark = "") options(knitr.kable.NA = '') library(flextable) library(officer)  #分类变量显示百分比,2位小数 set_gtsummary_theme(list(   `tbl_summary-fn:percent_fun` = function(x) sprintf("%.2f", x * 100) )) theme_gtsummary_journal("lancet") # 允许不同风格相互叠加 theme_gtsummary_compact() my_test <- function(data, variable, by, ...) {   if (is.numeric(data[[variable]])) {     # For continuous variables, use oneway.test     oneway.test(data[[variable]] ~ as.factor(data[[by]])) %>%       broom::tidy() %>%       select(statistic, p.value)   } else {     # For categorical variables, use chisq.test     chisq.test(table(data[[variable]], data[[by]])) %>%       broom::tidy() %>%       select(statistic, p.value)   } } my_table <- df %>%   select(服药依从性, 年龄, 民族, 性别, 文化程度, 职业, 婚姻状况, 经济状况, 户别, 医疗费用支付方式, 家族史) %>%   tbl_summary(by = 服药依从性, statistic = (     list(       all_continuous()

ibis 测试

import ibis import pandas as pd from ibis import _ penguins = ibis . examples .penguins. fetch () df = pd . DataFrame (     [[ "a" , 1 , 2 ], [ "b" , 3 , 4 ]],     columns = [ "one" , "two" , "three" ],     index = [ 5 , 6 ], ) t = ibis . memtable ( df , name = "t" ) ( penguins . filter ([ _ .species == "Adelie" , _ .island.isin([ "Biscoe" , "Dream" ])])) ( penguins . limit ( 11 , offset = 9 )) ( penguins . order_by ([ _ .island, _ .bill_length_mm.desc()])) ( penguins . select ( _ .bill_length_mm, _ .sex)) ( penguins . rename ( bill_length = "bill_length_mm" , bill_depth = "bill_depth_mm" )) (     penguins . mutate ( ones = 1 , bill_length = _ .bill_length_mm / 10 )     . mutate ( bill_length_squared = _ .bill_length ** 2 )     . select ( _ .ones, _ .bill_length_mm, _ .bill_length, _ .bill_length_squared) ) (     penguins . select (         _ .island,         _ .

pandas case_when 使用

 jz= pd.read_sql(sqltxt, conn).rename(columns=str.lower) jz=(     jz     .assign(         age=lambda x: (x['jz_sj']-x['csrq']).dt.days//365,         age_group=lambda df:df.age.case_when(             [                 (lambda s:(s>=15) & (s<=44),'15-44'),                 (lambda s:(s>=45) & (s<=59),'45-59'),                 (lambda s:s>=60,'>60'),                 (lambda s:s<15,'<15'),             ]         ),         year=lambda x: x['jz_sj'].dt.strftime('%Y')     ) ) (     jz.query("age_group!='unknown' & jzdd_dm.str.len()==10")     .groupby(['jzdd_mc','jzdd_dm','year','age_group'],as_index=False).agg(n=('ym_mc',np.size))     .pivot_table(index=['jzdd_mc','jzdd_dm'],columns=['year','age_group'],values='n')     .sort_values('jzdd_dm') )

出生队列接种率数据预处理

 gnldet <- readxl::read_xlsx('/mnt/c/Users/xuefliang/Downloads/gnldet_jzl.xlsx', sheet = 1, skip = 2) %>%   slice(-1) %>%   fill(区划名称, 区划编码, 疫苗, .direction = "down") %>%   rename(jc = `...4`) %>%   {     new_names <- c()     for (i in 1:18) {       new_names <- c(new_names, paste0(i, "岁接种数"), paste0(i, "岁接种率"))     }     old_names <- colnames(.)     colnames(.)[5:length(old_names)] <- new_names     .   } %>%   mutate(across(contains("率"), ~ as.numeric(str_replace_all(., "%", ""))),          across(contains("数"), ~ as.numeric(.))) %>%   filter(疫苗 != '总人数') gnldet %>%    filter(疫苗=='乙肝疫苗' & jc=='1.0') %>%    select(区划名称,区划编码,疫苗,jc,`1岁接种率`) %>%    mutate(`1岁接种率大于90` = `1岁接种率` > 90) %>%    filter(`1岁接种率大于90`==FALSE) ->test

率及其95%置信区间

  binom.test(3,10,conf.level = 0.95)#x为分子,y为分母 binom.test(30,100,conf.level = 0.95)#x为分子,y为分母

R json解析

 library(jsonlite) ch1 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921201605302722(1).txt') ch2 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622927202110102510(1).txt') ch3 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202303047214(1).txt') ch4 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202403302112(1).txt') ch5 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202403011825(1).txt') person1 <- as_tibble(ch1$data$PersonInfoList) person2 <- as_tibble(ch2$data$PersonInfoList) person3 <- as_tibble(ch3$data$PersonInfoList) person4 <- as_tibble(ch4$data$PersonInfoList) person5 <- as_tibble(ch5$data$PersonInfoList) person <- bind_rows(person1,person2,person3,person4,person5) vaccination <- person %>%   unnest(VaccinationInfoList) %>%    mutate(EntryDate=ymd_hms(EntryDate),UpdateDate=ymd_hms(UpdateDate),UplodeDate=ymd_hms(UplodeDate)) %>%    mutate(scjs = case_when(     difftime(UplodeDate, EntryD

按疫苗类别接种剂次排序后重新赋值

    def hbv ( self ):         df = self . _df         df .query( "vaccination_code in ['0201', '0202', '0203']" , inplace = True )         df [ 'vaccine_name' ] = '乙肝疫苗'         df = df .groupby( 'id_x' , group_keys = False ).apply(             lambda x : x .sort_values( by = 'vaccination_date' , ascending = True )             .assign( jc = range ( 1 , len ( x ) + 1 ))             .reset_index( drop = True )         )         return df bind_rows(jzjl,jzjl2) %>% mutate(mc = if_else(mc %in% c('麻腮风疫苗', '麻风疫苗'), '含麻疹成分疫苗', mc)) %>% filter(mc=='含麻疹成分疫苗') %>% group_by(grda_code) %>% arrange(jz_sj) %>% mutate(jz_zc = row_number()) ->tmp1