博文

R 读取文件夹下的所有excel文件

 library(readxl) library(purrr) library(dplyr) # 设置文件夹路径 folder_path <- "/mnt/c/Users/xuefl/Downloads/新冠存疑分市州" # 获取文件夹中所有的xlsx文件 xlsx_files <- list.files(path = folder_path, pattern = "\\.xlsx$", full.names = TRUE) # 如果没有找到任何xlsx文件,输出警告 if (length(xlsx_files) == 0) {   warning("未在指定文件夹找到任何xlsx文件") } # 创建一个函数来读取Excel文件,并添加文件名作为标识 read_excel_with_source <- function(file_path) {   file_name <- basename(file_path)   df <- try(read_excel(file_path), silent = TRUE)      if (inherits(df, "try-error")) {     warning(paste("无法读取文件:", file_name))     return(NULL)   }      # 添加文件名作为源标识   df$source_file <- file_name      return(df) } # 使用map函数批量读取所有xlsx文件 all_data_list <- map(xlsx_files, read_excel_with_source) # 移除读取失败的NULL元素 all_data_list <- all_data_list[!sapply(all_data_list, is.null)] # 检查是否所有数据框的列数相同 col_counts <- sapply(all_data_list, ncol) if (length(unique(col_counts)) ...

ibis 表关联

  import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect( '/mnt/d/xg.db' ) mon3 = con .table( 'mon3' ) mon4 = con .table( 'mon4' ) mon5 = con .table( 'mon5' ) mon6 = con .table( 'mon6' ) mon7 = con .table( 'mon7' ) mon8 = con .table( 'mon8' ) mon9 = con .table( 'mon9' ) mon10 = con .table( 'mon10' ) mon11 = con .table( 'mon11' ) mon12 = con .table( 'mon12' ) jg = con .read_xlsx( '/mnt/d/疫苗批号价格.xlsx' ) jg = (     jg     .distinct( on = [ '疫苗批号' , '价格' ], keep = 'first' )     .select( '疫苗批号' , '价格' ) ) #7026,2021030002G (     mon4     .group_by( 'YM_PH' )     .aggregate( n = _ .GRDA_CODE.count())     .asof_join( jg , on = ( 'YM_PH' , '疫苗批号' ))     .filter( _ .价格 == 90 ) ) #3,NCOV202104002V (     mon5     .group_by( 'YM_PH' )     .aggregate( n = _ ...

贝叶斯卡方

 library(BayesFactor) # 创建一个2x2列联表作为例子 data <- matrix(c(15, 25, 35, 25), nrow = 2) colnames(data) <- c("Treatment", "Control") rownames(data) <- c("Success", "Failure") data # 使用传统卡方检验 chisq.test(data) # 使用贝叶斯卡方检验 - 通过计算贝叶斯因子 bf <- contingencyTableBF(data, sampleType = "jointMulti") bf # 解释贝叶斯因子 extractBF(bf) # contingencyTableBF计算了独立假设与非独立假设之间的贝叶斯因子。如果贝叶斯因子大于1表明是非独立的。 # BF = 1:没有证据支持任一假设。 # BF = 1-3:证据“微弱”(anecdotal evidence)。 # BF = 3-10:证据“中等”(moderate evidence)。 # BF > 10:证据“强”(strong evidence)。 library(brms) # 创建二项式数据 successes <- c(15, 35)  # Treatment和Control组的成功次数 trials <- c(15+25, 35+25)  # Treatment和Control组的总试验次数 treatment <- c(1, 0)  # 1=Treatment, 0=Control df2 <- data.frame(successes=successes, trials=trials, treatment=treatment) # 拟合贝叶斯逻辑回归模型 brm_model <- brm(successes | trials(trials) ~ treatment,                   data = df2,             ...

ibis 随机抽样

  #随机抽样 tmp = (     result     .filter( _ .A == '兰州' )     .order_by( ibis . random ())   # 随机排序     .limit( 2 )   # 取前 2 行 ) #按比例随机抽样 tmp = (     result     .sample( 0.1 ) )

ibis 向下进行缺失值填充

import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect() #10.3.1 print ( ibis . __version__ ) # three =con.read_xlsx("/mnt/c/Users/xuefl/Downloads/24年报损疫苗tj.xlsx") # mon3=con.read_csv('/mnt/d/2021/3.csv') # con.list_tables() test = con .read_xlsx( "/mnt/c/Users/xuefl/Downloads/工作簿1.xlsx" ) # 创建窗口对象 window = ibis . window (     preceding = None ,   # 无限前行     following = 0 ,     # 当前行     order_by = 'rowid'   # 使用行ID确保正确的顺序,如果你的表没有rowid,可能需要先添加一个 ) # 实现向下填充,类似dplyr的fill(.direction = 'down') test = (     test     .mutate( rowid = ibis . row_number (). over ())     .mutate(         A_filled = lambda t : t .A.max().over( window )     )     # 可以选择删除rowid列和原始A列,或重命名A_filled为A     .drop( 'rowid' )     .rename( A_filled = 'A' ) ) ##### def fill ( self , order_by = 'rowid' ):...

ibis实战

  import ibis import pandas as pd from ibis import _ import ibis . selectors as s from datetime import datetime , timedelta from dateutil . relativedelta import relativedelta ibis . options . interactive = True con = ibis .duckdb.connect() brk = con .read_xlsx( "/mnt/c/users/Administrator/Downloads/report.xlsx" ) tj = (     brk     .mutate( mon = _ .时间.substr( 5 , 2 ))     .group_by( 'mon' )     .aggregate(         fbs = _ .发病数.sum()     )     .order_by( 'mon' )     .execute() ) tj .to_excel( "/mnt/c/users/Administrator/Downloads/tj.xlsx" ) bl = con .read_csv( "/mnt/c/users/Administrator/Downloads/2020.csv" ) tmp = (     bl     .mutate( age = ibis . case (). when ( _ .年龄.contains( '月' ), 0 )             . when ( _ .年龄.contains( '岁' ), _ .年龄.re_extract( r ' ( \d + ) 岁' , 1 ).cast( 'int' ))           ...

ibis 10.3支持read_xlsx to_xlsx

  import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect() #10.3.1 print ( ibis . __version__ ) test = con .read_xlsx( "/mnt/c/Users/xuefl/Downloads/24年报损疫苗tj.xlsx" ) con .list_tables()