博文

目前显示的是 2024的博文

gtsummary 制表

library(gtsummary) library(knitr) theme_gtsummary_language("en", big.mark = "") options(knitr.kable.NA = '') library(flextable) library(officer)  #分类变量显示百分比,2位小数 set_gtsummary_theme(list(   `tbl_summary-fn:percent_fun` = function(x) sprintf("%.2f", x * 100) )) theme_gtsummary_journal("lancet") # 允许不同风格相互叠加 theme_gtsummary_compact() my_test <- function(data, variable, by, ...) {   if (is.numeric(data[[variable]])) {     # For continuous variables, use oneway.test     oneway.test(data[[variable]] ~ as.factor(data[[by]])) %>%       broom::tidy() %>%       select(statistic, p.value)   } else {     # For categorical variables, use chisq.test     chisq.test(table(data[[variable]], data[[by]])) %>%       broom::tidy() %>%       select(statistic, p.value)   } } my_table <- df %>%   select(服药依从性, 年龄, 民族, 性别, 文化程度, 职业, 婚姻状况, 经济状况, 户别, 医疗费用支付方式, 家族史) %>%   tbl_summary(by = 服药依从性, statistic = (     list(       all_continuous()

ibis 测试

import ibis import pandas as pd from ibis import _ penguins = ibis . examples .penguins. fetch () df = pd . DataFrame (     [[ "a" , 1 , 2 ], [ "b" , 3 , 4 ]],     columns = [ "one" , "two" , "three" ],     index = [ 5 , 6 ], ) t = ibis . memtable ( df , name = "t" ) ( penguins . filter ([ _ .species == "Adelie" , _ .island.isin([ "Biscoe" , "Dream" ])])) ( penguins . limit ( 11 , offset = 9 )) ( penguins . order_by ([ _ .island, _ .bill_length_mm.desc()])) ( penguins . select ( _ .bill_length_mm, _ .sex)) ( penguins . rename ( bill_length = "bill_length_mm" , bill_depth = "bill_depth_mm" )) (     penguins . mutate ( ones = 1 , bill_length = _ .bill_length_mm / 10 )     . mutate ( bill_length_squared = _ .bill_length ** 2 )     . select ( _ .ones, _ .bill_length_mm, _ .bill_length, _ .bill_length_squared) ) (     penguins . select (         _ .island,         _ .

pandas case_when 使用

 jz= pd.read_sql(sqltxt, conn).rename(columns=str.lower) jz=(     jz     .assign(         age=lambda x: (x['jz_sj']-x['csrq']).dt.days//365,         age_group=lambda df:df.age.case_when(             [                 (lambda s:(s>=15) & (s<=44),'15-44'),                 (lambda s:(s>=45) & (s<=59),'45-59'),                 (lambda s:s>=60,'>60'),                 (lambda s:s<15,'<15'),             ]         ),         year=lambda x: x['jz_sj'].dt.strftime('%Y')     ) ) (     jz.query("age_group!='unknown' & jzdd_dm.str.len()==10")     .groupby(['jzdd_mc','jzdd_dm','year','age_group'],as_index=False).agg(n=('ym_mc',np.size))     .pivot_table(index=['jzdd_mc','jzdd_dm'],columns=['year','age_group'],values='n')     .sort_values('jzdd_dm') )

出生队列接种率数据预处理

 gnldet <- readxl::read_xlsx('/mnt/c/Users/xuefliang/Downloads/gnldet_jzl.xlsx', sheet = 1, skip = 2) %>%   slice(-1) %>%   fill(区划名称, 区划编码, 疫苗, .direction = "down") %>%   rename(jc = `...4`) %>%   {     new_names <- c()     for (i in 1:18) {       new_names <- c(new_names, paste0(i, "岁接种数"), paste0(i, "岁接种率"))     }     old_names <- colnames(.)     colnames(.)[5:length(old_names)] <- new_names     .   } %>%   mutate(across(contains("率"), ~ as.numeric(str_replace_all(., "%", ""))),          across(contains("数"), ~ as.numeric(.))) %>%   filter(疫苗 != '总人数') gnldet %>%    filter(疫苗=='乙肝疫苗' & jc=='1.0') %>%    select(区划名称,区划编码,疫苗,jc,`1岁接种率`) %>%    mutate(`1岁接种率大于90` = `1岁接种率` > 90) %>%    filter(`1岁接种率大于90`==FALSE) ->test

率及其95%置信区间

  binom.test(3,10,conf.level = 0.95)#x为分子,y为分母 binom.test(30,100,conf.level = 0.95)#x为分子,y为分母

R json解析

 library(jsonlite) ch1 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921201605302722(1).txt') ch2 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622927202110102510(1).txt') ch3 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202303047214(1).txt') ch4 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202403302112(1).txt') ch5 <- fromJSON('/mnt/c/Users/xuefliang/Desktop/622921202403011825(1).txt') person1 <- as_tibble(ch1$data$PersonInfoList) person2 <- as_tibble(ch2$data$PersonInfoList) person3 <- as_tibble(ch3$data$PersonInfoList) person4 <- as_tibble(ch4$data$PersonInfoList) person5 <- as_tibble(ch5$data$PersonInfoList) person <- bind_rows(person1,person2,person3,person4,person5) vaccination <- person %>%   unnest(VaccinationInfoList) %>%    mutate(EntryDate=ymd_hms(EntryDate),UpdateDate=ymd_hms(UpdateDate),UplodeDate=ymd_hms(UplodeDate)) %>%    mutate(scjs = case_when(     difftime(UplodeDate, EntryD

按疫苗类别接种剂次排序后重新赋值

    def hbv ( self ):         df = self . _df         df .query( "vaccination_code in ['0201', '0202', '0203']" , inplace = True )         df [ 'vaccine_name' ] = '乙肝疫苗'         df = df .groupby( 'id_x' , group_keys = False ).apply(             lambda x : x .sort_values( by = 'vaccination_date' , ascending = True )             .assign( jc = range ( 1 , len ( x ) + 1 ))             .reset_index( drop = True )         )         return df bind_rows(jzjl,jzjl2) %>% mutate(mc = if_else(mc %in% c('麻腮风疫苗', '麻风疫苗'), '含麻疹成分疫苗', mc)) %>% filter(mc=='含麻疹成分疫苗') %>% group_by(grda_code) %>% arrange(jz_sj) %>% mutate(jz_zc = row_number()) ->tmp1

接种月龄计算

 brk <- read_csv("/mnt/c/Users/xuefeng/Desktop/brk.csv",locale = locale(encoding = 'GB18030')) %>%    clean_names() %>%    mutate(jz_sj=ymd_hms(jz_sj),csrq=ymd(csrq),jzyl = interval(csrq, jz_sj) %/% months(1),shi=str_sub(gldw_bm,1,4)) brk %>%    filter(csrq <= ymd('2023-12-30') & jz_zc == 1) %>%    group_by(shi) %>%    summarise(count = n(),             jzyl_3 = sum(jzyl == 3,na.rm = T),jzyl_3/count*100) %>%    writexl::write_xlsx("/mnt/c/Users/xuefeng/Desktop/brk2024_3.xlsx") import pandas as pd import numpy as np import janitor from pandas . tseries . offsets import MonthEnd from datetime import datetime brk = pd . read_csv ( "/mnt/c/Users/xuefeng/Desktop/brk.csv" , encoding = 'GB18030' ) brk = (     brk     . rename ( columns = lambda x : x .lower().replace( ' ' , '_' ))     . assign ( jz_sj = lambda df : pd . to_datetime ( df [ 'jz_sj' ], format = '%Y-%m-

office 激活

 irm https://massgrave.dev/get | iex 

docker 安装 ollama

 docker run -d --gpus=all -e OLLAMA_ORIGINS="*" -v /root/.ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama docker exec -it ollama ollama run gemma:7b docker exec -it ollama ollama pull nomic-embed-text:latest

spyder 启动

 dbus-launch ibus-daemon -drx spyder

单位名称相似判断

 # 安装和加载stringdist包 install.packages("stringdist") library(stringdist) # 定义字符串 str1 <- "疾控中心" str2 <- "疾病预防控制中心" str1 <- "卫健委" str2 <- "卫生健康委员会" # 计算 Jaro-Winkler 距离 jw_distance <- stringdist(str1, str2, method = "jaccard") # 自定义字符串相似度计算函数 custom_similarity <- function(str1, str2) {   # 将字符串转换为字符向量   chars1 <- strsplit(str1, "")[[1]]   chars2 <- strsplit(str2, "")[[1]]      # 获取字符向量的长度   len1 <- length(chars1)   len2 <- length(chars2)      # 如果 str1 比 str2 长,则交换它们的位置   if (len1 > len2) {     temp <- chars1     chars1 <- chars2     chars2 <- temp     len1 <- length(chars1)     len2 <- length(chars2)   }      # 检查 chars1 中的字符是否按顺序出现在 chars2 中   index <- 1   match_count <- 0   for (i in 1:len1) {     found <- FALSE     for (j in index:len2) {       if (j <= len2 && chars1[i] == chars2[j]) {         found <- TRUE         match_count <- ma

ollama 配置 环境变量

  OLLAMA_ORIGINS="*" OLLAMA_HOST=‘0.0.0.0:11434 ’

cudf 安装

conda create --name myenv -c conda-forge python=3.10 conda activate myenv conda env list conda install -c rapidsai -c conda-forge -c nvidia  cudf=24.02  python=3.10 cuda-version=12.2 conda install -c rapidsai dask-cudf

windows 远程访问ollama

1  在Windows环境下,缺省情况下API接口暂时只能通过 127.0.0.1 或者 localhost访问;如果需要通过网络访问在终端上部署的ollama模型,需要增加环境变量:OLLAMA_HOST,其值设置为:0.0.0.0:11434,环境变量设置完成后,需要重启 ollama app.exe 服务 2.防火墙开放11434端口。

use GFWList in SmartProxy

  Enter " https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt " for URL. Make sure "Enabled" is checked. Select "Base64" for Obfuscation. Select "AutoProxy/GFWList" for Format.

naniar 缺失值

  library ( naniar ) remotes :: install_github ( "njtierney/naniar" ) airquality %>% group_by ( Month ) %>% naniar :: miss_var_summary ( ) ggplot ( data = airquality , aes ( x = Ozone , y = Solar.R ) ) + geom_miss_point ( )

ggthemr包

library ( ggthemr ) ## devtools::install_github('cttobin/ggthemr') ggthemr ( "light" ) mtcars %>% mutate ( cyl = factor ( cyl ) ) %>% ggplot ( aes ( x = mpg , fill = cyl , colour = cyl ) ) + geom_density ( alpha = 0.75 ) + labs ( fill = "Cylinders" , colour = "Cylinders" , x = "MPG" , y = "Density" ) + legend_top ( ) ggthemr_reset ( )

huxtable 做表

  library ( huxtable ) table1 <- cbind.data.frame ( mod $ mchoice , mod2 $ mchoice ) table1 %>%   as_hux ( add_colnames = FALSE ) %>%   set_number_format ( 2 )     %>%   map_text_color ( by_cols ( "darkred" , "blue" , "darkgreen" )) %>%   add_colnames ( "Criteria" ) %>%   set_header_rows ( 1 , TRUE ) %>%   add_rownames () %>%   set_bottom_border ( row = c ( 1 , 7 ), col = everywhere ) %>%   set_top_border ( row = 1 , col = everywhere ) %>%   set_bold ( 1 , everywhere ) %>%   style_headers ( bold = TRUE , text_color = "red" ) %>%   set_caption ( '(#tab:mchoicenyspatial) Model choice criteria for various models fitted to the nyspatial data set.' )

inla 发生Error “dsparseModelMatrix” object: superclass "xMatrix" not defined错误

  在从Matrix < 1.6-2切换到Matrix >= 1.6-2之后,您必须从源代码重新安装MatrixModels。 install.packages ( "MatrixModels" , type = "source" )