博文

目前显示的是 2025的博文

R 读取文件夹下的所有excel文件

 library(readxl) library(purrr) library(dplyr) # 设置文件夹路径 folder_path <- "/mnt/c/Users/xuefl/Downloads/新冠存疑分市州" # 获取文件夹中所有的xlsx文件 xlsx_files <- list.files(path = folder_path, pattern = "\\.xlsx$", full.names = TRUE) # 如果没有找到任何xlsx文件,输出警告 if (length(xlsx_files) == 0) {   warning("未在指定文件夹找到任何xlsx文件") } # 创建一个函数来读取Excel文件,并添加文件名作为标识 read_excel_with_source <- function(file_path) {   file_name <- basename(file_path)   df <- try(read_excel(file_path), silent = TRUE)      if (inherits(df, "try-error")) {     warning(paste("无法读取文件:", file_name))     return(NULL)   }      # 添加文件名作为源标识   df$source_file <- file_name      return(df) } # 使用map函数批量读取所有xlsx文件 all_data_list <- map(xlsx_files, read_excel_with_source) # 移除读取失败的NULL元素 all_data_list <- all_data_list[!sapply(all_data_list, is.null)] # 检查是否所有数据框的列数相同 col_counts <- sapply(all_data_list, ncol) if (length(unique(col_counts)) ...

ibis 表关联

  import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect( '/mnt/d/xg.db' ) mon3 = con .table( 'mon3' ) mon4 = con .table( 'mon4' ) mon5 = con .table( 'mon5' ) mon6 = con .table( 'mon6' ) mon7 = con .table( 'mon7' ) mon8 = con .table( 'mon8' ) mon9 = con .table( 'mon9' ) mon10 = con .table( 'mon10' ) mon11 = con .table( 'mon11' ) mon12 = con .table( 'mon12' ) jg = con .read_xlsx( '/mnt/d/疫苗批号价格.xlsx' ) jg = (     jg     .distinct( on = [ '疫苗批号' , '价格' ], keep = 'first' )     .select( '疫苗批号' , '价格' ) ) #7026,2021030002G (     mon4     .group_by( 'YM_PH' )     .aggregate( n = _ .GRDA_CODE.count())     .asof_join( jg , on = ( 'YM_PH' , '疫苗批号' ))     .filter( _ .价格 == 90 ) ) #3,NCOV202104002V (     mon5     .group_by( 'YM_PH' )     .aggregate( n = _ ...

贝叶斯卡方

 library(BayesFactor) # 创建一个2x2列联表作为例子 data <- matrix(c(15, 25, 35, 25), nrow = 2) colnames(data) <- c("Treatment", "Control") rownames(data) <- c("Success", "Failure") data # 使用传统卡方检验 chisq.test(data) # 使用贝叶斯卡方检验 - 通过计算贝叶斯因子 bf <- contingencyTableBF(data, sampleType = "jointMulti") bf # 解释贝叶斯因子 extractBF(bf) # contingencyTableBF计算了独立假设与非独立假设之间的贝叶斯因子。如果贝叶斯因子大于1表明是非独立的。 # BF = 1:没有证据支持任一假设。 # BF = 1-3:证据“微弱”(anecdotal evidence)。 # BF = 3-10:证据“中等”(moderate evidence)。 # BF > 10:证据“强”(strong evidence)。 library(brms) # 创建二项式数据 successes <- c(15, 35)  # Treatment和Control组的成功次数 trials <- c(15+25, 35+25)  # Treatment和Control组的总试验次数 treatment <- c(1, 0)  # 1=Treatment, 0=Control df2 <- data.frame(successes=successes, trials=trials, treatment=treatment) # 拟合贝叶斯逻辑回归模型 brm_model <- brm(successes | trials(trials) ~ treatment,                   data = df2,             ...

ibis 随机抽样

  #随机抽样 tmp = (     result     .filter( _ .A == '兰州' )     .order_by( ibis . random ())   # 随机排序     .limit( 2 )   # 取前 2 行 ) #按比例随机抽样 tmp = (     result     .sample( 0.1 ) )

ibis 向下进行缺失值填充

import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect() #10.3.1 print ( ibis . __version__ ) # three =con.read_xlsx("/mnt/c/Users/xuefl/Downloads/24年报损疫苗tj.xlsx") # mon3=con.read_csv('/mnt/d/2021/3.csv') # con.list_tables() test = con .read_xlsx( "/mnt/c/Users/xuefl/Downloads/工作簿1.xlsx" ) # 创建窗口对象 window = ibis . window (     preceding = None ,   # 无限前行     following = 0 ,     # 当前行     order_by = 'rowid'   # 使用行ID确保正确的顺序,如果你的表没有rowid,可能需要先添加一个 ) # 实现向下填充,类似dplyr的fill(.direction = 'down') test = (     test     .mutate( rowid = ibis . row_number (). over ())     .mutate(         A_filled = lambda t : t .A.max().over( window )     )     # 可以选择删除rowid列和原始A列,或重命名A_filled为A     .drop( 'rowid' )     .rename( A_filled = 'A' ) ) ##### def fill ( self , order_by = 'rowid' ):...

ibis实战

  import ibis import pandas as pd from ibis import _ import ibis . selectors as s from datetime import datetime , timedelta from dateutil . relativedelta import relativedelta ibis . options . interactive = True con = ibis .duckdb.connect() brk = con .read_xlsx( "/mnt/c/users/Administrator/Downloads/report.xlsx" ) tj = (     brk     .mutate( mon = _ .时间.substr( 5 , 2 ))     .group_by( 'mon' )     .aggregate(         fbs = _ .发病数.sum()     )     .order_by( 'mon' )     .execute() ) tj .to_excel( "/mnt/c/users/Administrator/Downloads/tj.xlsx" ) bl = con .read_csv( "/mnt/c/users/Administrator/Downloads/2020.csv" ) tmp = (     bl     .mutate( age = ibis . case (). when ( _ .年龄.contains( '月' ), 0 )             . when ( _ .年龄.contains( '岁' ), _ .年龄.re_extract( r ' ( \d + ) 岁' , 1 ).cast( 'int' ))           ...

ibis 10.3支持read_xlsx to_xlsx

  import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect() #10.3.1 print ( ibis . __version__ ) test = con .read_xlsx( "/mnt/c/Users/xuefl/Downloads/24年报损疫苗tj.xlsx" ) con .list_tables()

ibis 按周统计

  import ibis import pandas as pd from ibis import _ import ibis . selectors as s ibis . options . interactive = True def load_excel_to_ibis ( excel_path ):     try :         # 读取Excel文件         df = pd . read_excel ( excel_path )                 # 将所有object类型的列转换为字符串         for col in df . select_dtypes ( include = [ 'object' ]). columns :             df [ col ] = df [ col ]. astype ( str )                 # 转换为Ibis表格         ibis_table = ibis . memtable ( df )         return ibis_table         except Exception as e :         print ( f "错误: { str ( e ) } " )         return None dq = load_excel_to_ibis ( "/mnt/c/users/xuefe/Downloads/百日咳分日发病.xlsx" ) tj = (     dq  ...

ibis

  import ibis import pandas as pd from ibis import _ import ibis.selectors as s from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta ibis.options.interactive = True # con = ibis.duckdb.connect("/mnt/d/xg.db") con = ibis.connect( "duckdb:///mnt/d/duckdb.db" ) # con.drop_table("XG") con.list_tables() table3=con.table( "table3" ) table4=con.table( "table4" ) table5=con.table( "table5" ) table6=con.table( "table6" ) (     table6     .group_by( 'YM_PH' )     .aggregate(n=_.count()) ) (     table5     .filter(_.YM_PH.isnull())     .count() ) table6.columns table3.schema table3=(     table3.drop( 'AAA' ) ) table4=(     table4.drop( 'AAA' ) ) table5=(table5.drop([ 'column00' , 'AAA' ])) table6=(table5.drop( 'column00' , 'AAA' )) table=(table3.union(table4).union(table5).union(table6)) (     table     .count() ) con.create...

python 使用duckdb导入csv

  import duckdb # Create a DuckDB connection con = duckdb.connect( '/mnt/d/duckdb.db' ) # Create a table con.execute( """ CREATE TABLE table5 AS SELECT * FROM read_csv_auto('/mnt/d/5.csv',     all_varchar=1,     null_padding=true,     strict_mode=false,     ignore_errors=true) """ ) con.execute( "CREATE TABLE table6 as select * from read_csv_auto('/mnt/d/5.csv',all_varchar=1,null_padding=true,strict_mode=false)" ) con.sql( 'SHOW ALL TABLES' )

ibis 表 连接

  import ibis import duckdb import pandas as pd from ibis import _ import ibis . selectors as s from datetime import datetime , timedelta from typing import Dict , List , Tuple from dateutil . relativedelta import relativedelta ibis . options . interactive = True def load_excel_to_ibis ( excel_path ):     try :         # 读取Excel文件         df = pd . read_excel ( excel_path )                 # 将所有object类型的列转换为字符串         for col in df . select_dtypes ( include = [ 'object' ]). columns :             df [ col ] = df [ col ]. astype ( str )                 # 转换为Ibis表格         ibis_table = ibis . memtable ( df )         return ibis_table         except Exception as e :         print ( f "错误: { str ...

ibis 中使用duckdb

import duckdb import pandas as pd import numpy as np import ibis from ibis import _ import ibis . selectors as s import ibis . expr . operations as ops ibis . options . interactive = True # Create a DuckDB connection # con = duckdb.connect('/mnt/d/duckdb.db') # # Create a table # con.execute("CREATE TABLE XG as select * from read_csv_auto('/mnt/c/Users/xuefliang/Downloads/新冠存疑2.csv')") # con.sql('SHOW ALL TABLES') con = ibis . connect ( "duckdb:/mnt/d/duckdb.db" ) # con.create_table( #     "penguins", ibis.examples.penguins.fetch().to_pyarrow(), overwrite = True # ) con . list_tables () xg = con . table ( "XG" ) xg . head () (     xg     . group_by ( "YM_PH" )     . aggregate (         count = lambda _ : _ .count(),         distinct_count = lambda _ : _ .ZJHM.nunique()     ) ) tmp = (     xg     . mutate ( CSRQ = _ .CSRQ.cast( "string" ))     ...