博文

目前显示的是 2025的博文

使用INLAspacetime完成时空建模

library(sf) library(fdmr) library(fmesher) library(INLA) library(inlabru) library(INLAspacetime) library(tidyverse) # 1. 加载数据并转换为 sf 对象 sf_data % st_as_sf(coords = c("LONG", "LAT"), crs = 4326) %>% mutate(mapp = 0) covid19_sf % st_as_sf(coords = c("LONG", "LAT"), crs = 4326) # 2. 计算网格参数,sf_data 的 geometry 类型是 sfc_MULTIPOLYGON coords 1) = 0.1 ) ) components

基于扩散的时空Gaussian Matérn场扩展

# DEMF模型总结:基于扩散的时空Gaussian Matérn场扩展 ## 概述 DEMF(Diffusion-based Spatio-temporal Extension of Matérn Fields)是一种通过随机偏微分方程(SPDE)将Gaussian Matérn场扩展到时空领域的随机过程家族。文章定义了四种具体模型:DEMF(1,0,2)、DEMF(1,2,1)、DEMF(2,0,2)和DEMF(2,2,0)。 ## 模型参数与特征 ### 参数定义 所有DEMF模型通过六个参数定义: - **光滑度参数**:(αₜ, αₛ, αₑ) - 控制时间、空间和噪声过程特性 - **尺度参数**:(γₜ, γₛ, γₑ) - 控制时空相关范围 ### 四种模型对比 | 模型 | (αₜ, αₛ, αₑ) | 时间光滑度 νₜ | 空间光滑度 νₛ | 类型 | 可分性 | |------|---------------|---------------|---------------|------|--------| | DEMF(1,0,2) | (1,0,2) | 1/2 | 1 | 可分模型(1阶) | 可分 | | DEMF(1,2,1) | (1,2,1) | 1/2 | 1 | 临界扩散 | 非可分 | | DEMF(2,0,2) | (2,0,2) | 3/2 | 1 | 可分模型(2阶) | 可分 | | DEMF(2,2,0) | (2,2,0) | 1 | 2 | 迭代扩散 | 完全非可分 | ## 主要共同特征 ### 理论基础 - **SPDE定义**:所有模型基于统一的SPDE框架 - **空间边际**:均为Matérn协方差形式 - **物理可解释性**:基于扩散过程,适合环境/气候数据建模 ### 实现特点 - **R-INLA兼容**:通过有限元方法实现稀疏表示 - **全球应用**:均可用于全球温度数据等大规模时空建模 - **非欧空间支持**:可扩展到球面、流形、网络等复杂几何结构 ## 关键差异 ### 1. 可分性特征 **可分模型**(DEMF(1,0,2), DEMF(2,0,2)): - 协方差函数:R(hₛ, hₜ) = Rₛ(hₛ)Rₜ(hₜ) - 时间边际协方差为Matérn形式 -...

时空模型2

  ```{r} library ( INLA ) library ( inlabru ) library ( magrittr ) library ( sf ) library ( tidyverse ) sf_data <- readRDS ( 'data/spatial_data.rds' ) # 基于最早日期计算连续周数 st_covid <- readRDS ( 'data/st_covid.rds' ) |>   mutate ( week = as.numeric ( difftime ( date , min ( date , na.rm = TRUE ), units = "weeks" )) %/% 1 + 1 ) |>   left_join ( sf_data @ data |> dplyr :: select ( MSOA11CD , LONG , LAT ), by = "MSOA11CD" ) |>   drop_na ( cases , Population , IMD , perc.wb , perc.ba , age1 , pm25 ) |>   st_as_sf ( coords = c ( "LONG" , "LAT" )) ``` ```{r} coordinates <- st_coordinates ( st_covid )   # st_covid geometry 类型是 sfc_POINT locs <- unique ( coordinates ) # # 计算每个多边形的质心 # centroids <- st_centroid(sf_data) # locs <- st_coordinates(centroids) # 计算初始参数 initial_range <- diff ( range ( locs [, 1 ])) / 3 max_edge <- initial_range / 2 # 构建网格 mesh <- fm_mesh_2d_inla (   st_covid ,   ma...

拟合时空耦合模型

library ( sf ) library ( fdmr ) library ( fmesher ) library ( INLA ) library ( inlabru ) library ( tidyverse ) # 1. 加载数据并转换为 sf 对象 sf_data <- readRDS ( 'data/spatial_dataBris.rds' ) %>%   st_as_sf ( coords = c ( "LONG" , "LAT" ) , crs = 4326 ) %>%   mutate ( mapp = 0 ) covid19_sf <- readRDS ( 'data/covid19_dataBris.rds' ) %>%   st_as_sf ( coords = c ( "LONG" , "LAT" ) , crs = 4326 ) # 2. 计算网格参数,sf_data 的 geometry 类型是 sfc_MULTIPOLYGON coords <- st_coordinates ( sf_data ) initial_range <- diff ( range ( coords [ , "X" ])) / 3 max_edge <- initial_range / 2 # 3.mesh mesh <- fm_mesh_2d_inla ( sf_data ,   max.edge = c ( 1 , 2 ) * max_edge ,   offset = c ( initial_range , initial_range ) ,   cutoff = max_edge / 7 ) plot ( mesh ) # 5. 定义 SPDE 模型 prior_range <- initial_range spde <- INLA :: inla.spde2.pcmatern (   mesh = mesh ,   prior.range = c ( prior_...

inlabru 时空分析

library ( INLA ) # 用于之后的建模 library ( MASS ) # 用于真实的多元正态分布模拟 library ( tidyverse ) library ( inlabru ) library ( fdmr ) library ( sf ) library ( inlatools )   covid19_data <- fdmr :: load_tutorial_data ( dataset = "priors" , filename = "covid19_dataBris.rds" ) covid19_sf <- covid19_data |> st_as_sf ( coords = c ( "LONG" , "LAT" )) coordinates <- st_coordinates ( covid19_sf ) locs <- unique ( coordinates ) # 计算初始参数 initial_range <- diff ( range ( locs [, 1 ])) / 3 max_edge <- initial_range / 2 # 2. 构建改进的网格,mesh从地图构建,此处不对。 mesh <- fm_mesh_2d (   loc = locs ,   max.edge = c ( 1 , 2 ) * max_edge ,   offset = c ( initial_range , initial_range ),   cutoff = max_edge / 5 ) # 3. 定义SPDE模型 spde <- inla.spde2.pcmatern (   mesh = mesh ,   prior.range = c ( initial_range , 0.5 ),   prior.sigma = c ( 1 , 0.01 ) ) # 4. 设置时间趋势先验, rw1_prior <- list ( theta = list ( prior = "pc.prec" , param = c ( 1 , ...

计算工作日

 library(tidyverse) # 定义节假日和补班日 holidays_2024 <- as.Date(c(   "2024-01-01",  # 元旦   "2024-02-10", "2024-02-11", "2024-02-12", "2024-02-13", "2024-02-14", "2024-02-15", "2024-02-16",  # 春节   "2024-04-04", "2024-04-05", "2024-04-06",  # 清明节   "2024-05-01", "2024-05-02", "2024-05-03",  # 劳动节   "2024-06-08", "2024-06-09", "2024-06-10",  # 端午节   "2024-09-15", "2024-09-16", "2024-09-17",  # 中秋节   "2024-10-01", "2024-10-02", "2024-10-03", "2024-10-04", "2024-10-05", "2024-10-06", "2024-10-07"  # 国庆节 )) workdays_2024 <- as.Date(c(   "2024-02-04", "2024-02-18",  # 春节补班   "2024-04-07",  # 清明补班   "2024-04-28", "2024-05-11",  # 劳动节补班   "2024-06-02",  # 端午补班   "20...

open remote wsl for positron install

图片
  1 、安装 Positron 2 、 Positron 中安装 kv9898.open-remote-wsl-0.0.9.vsix 扩展 3 、 Press Ctrl+Shift+P (or Cmd+Shift+P on macOS) Run the command:   Register this WSL extension for launching Positron from WSL 4 、打开虚拟网卡模式和 Ubuntu , Press Ctrl+Shift+P (or Cmd+Shift+P on macOS) Run the command:   Remote-WSL Connect to WSL

INLA mesh

 library(INLA) library(sf) library(gstat) library(tidyverse) data(sic97) # 直接在一个流程中准备全部数据 df_rain <- sic_full %>%    st_as_sf() %>%    mutate(     altitude = raster::extract(raster::raster(demstd), as.matrix(st_coordinates(.))),     coords = st_coordinates(.)   ) %>%    mutate(     lat = coords[, 1] / 1000,     lon = coords[, 2] / 1000   ) %>%    dplyr::select(ID, rainfall, altitude, lat, lon) # 检查空间依赖性 df_rain %>%     as_Spatial() %>%   variogram(rainfall ~ 1, data = .) %>%   graphics::plot() # 创建网格 loc <- cbind(df_rain$lat, df_rain$lon) Mesh <- inla.mesh.2d(   loc.domain = loc,   max.edge = c(20, 100),   cutoff = 1 ) ggplot() +   inlabru::gg(Mesh) +   geom_point(data = data.frame(x = loc[,1], y = loc[,2]),              aes(x = x, y = y), color = "red", shape = 1) +   theme_...

R 读取文件夹下的所有excel文件

 library(readxl) library(purrr) library(dplyr) # 设置文件夹路径 folder_path <- "/mnt/c/Users/xuefl/Downloads/新冠存疑分市州" # 获取文件夹中所有的xlsx文件 xlsx_files <- list.files(path = folder_path, pattern = "\\.xlsx$", full.names = TRUE) # 如果没有找到任何xlsx文件,输出警告 if (length(xlsx_files) == 0) {   warning("未在指定文件夹找到任何xlsx文件") } # 创建一个函数来读取Excel文件,并添加文件名作为标识 read_excel_with_source <- function(file_path) {   file_name <- basename(file_path)   df <- try(read_excel(file_path), silent = TRUE)      if (inherits(df, "try-error")) {     warning(paste("无法读取文件:", file_name))     return(NULL)   }      # 添加文件名作为源标识   df$source_file <- file_name      return(df) } # 使用map函数批量读取所有xlsx文件 all_data_list <- map(xlsx_files, read_excel_with_source) # 移除读取失败的NULL元素 all_data_list <- all_data_list[!sapply(all_data_list, is.null)] # 检查是否所有数据框的列数相同 col_counts <- sapply(all_data_list, ncol) if (length(unique(col_counts)) ...

ibis 表关联

  import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect( '/mnt/d/xg.db' ) mon3 = con .table( 'mon3' ) mon4 = con .table( 'mon4' ) mon5 = con .table( 'mon5' ) mon6 = con .table( 'mon6' ) mon7 = con .table( 'mon7' ) mon8 = con .table( 'mon8' ) mon9 = con .table( 'mon9' ) mon10 = con .table( 'mon10' ) mon11 = con .table( 'mon11' ) mon12 = con .table( 'mon12' ) jg = con .read_xlsx( '/mnt/d/疫苗批号价格.xlsx' ) jg = (     jg     .distinct( on = [ '疫苗批号' , '价格' ], keep = 'first' )     .select( '疫苗批号' , '价格' ) ) #7026,2021030002G (     mon4     .group_by( 'YM_PH' )     .aggregate( n = _ .GRDA_CODE.count())     .asof_join( jg , on = ( 'YM_PH' , '疫苗批号' ))     .filter( _ .价格 == 90 ) ) #3,NCOV202104002V (     mon5     .group_by( 'YM_PH' )     .aggregate( n = _ ...

贝叶斯卡方

 library(BayesFactor) # 创建一个2x2列联表作为例子 data <- matrix(c(15, 25, 35, 25), nrow = 2) colnames(data) <- c("Treatment", "Control") rownames(data) <- c("Success", "Failure") data # 使用传统卡方检验 chisq.test(data) # 使用贝叶斯卡方检验 - 通过计算贝叶斯因子 bf <- contingencyTableBF(data, sampleType = "jointMulti") bf # 解释贝叶斯因子 extractBF(bf) # contingencyTableBF计算了独立假设与非独立假设之间的贝叶斯因子。如果贝叶斯因子大于1表明是非独立的。 # BF = 1:没有证据支持任一假设。 # BF = 1-3:证据“微弱”(anecdotal evidence)。 # BF = 3-10:证据“中等”(moderate evidence)。 # BF > 10:证据“强”(strong evidence)。 library(brms) # 创建二项式数据 successes <- c(15, 35)  # Treatment和Control组的成功次数 trials <- c(15+25, 35+25)  # Treatment和Control组的总试验次数 treatment <- c(1, 0)  # 1=Treatment, 0=Control df2 <- data.frame(successes=successes, trials=trials, treatment=treatment) # 拟合贝叶斯逻辑回归模型 brm_model <- brm(successes | trials(trials) ~ treatment,                   data = df2,             ...

ibis 随机抽样

  #随机抽样 tmp = (     result     .filter( _ .A == '兰州' )     .order_by( ibis . random ())   # 随机排序     .limit( 2 )   # 取前 2 行 ) #按比例随机抽样 tmp = (     result     .sample( 0.1 ) )

ibis 向下进行缺失值填充

import ibis import ibis . selectors as s from ibis import _ ibis . options . interactive = True con = ibis .duckdb.connect() #10.3.1 print ( ibis . __version__ ) # three =con.read_xlsx("/mnt/c/Users/xuefl/Downloads/24年报损疫苗tj.xlsx") # mon3=con.read_csv('/mnt/d/2021/3.csv') # con.list_tables() test = con .read_xlsx( "/mnt/c/Users/xuefl/Downloads/工作簿1.xlsx" ) # 创建窗口对象 window = ibis . window (     preceding = None ,   # 无限前行     following = 0 ,     # 当前行     order_by = 'rowid'   # 使用行ID确保正确的顺序,如果你的表没有rowid,可能需要先添加一个 ) # 实现向下填充,类似dplyr的fill(.direction = 'down') test = (     test     .mutate( rowid = ibis . row_number (). over ())     .mutate(         A_filled = lambda t : t .A.max().over( window )     )     # 可以选择删除rowid列和原始A列,或重命名A_filled为A     .drop( 'rowid' )     .rename( A_filled = 'A' ) ) ##### def fill ( self , order_by = 'rowid' ):...

ibis实战

  import ibis import pandas as pd from ibis import _ import ibis . selectors as s from datetime import datetime , timedelta from dateutil . relativedelta import relativedelta ibis . options . interactive = True con = ibis .duckdb.connect() brk = con .read_xlsx( "/mnt/c/users/Administrator/Downloads/report.xlsx" ) tj = (     brk     .mutate( mon = _ .时间.substr( 5 , 2 ))     .group_by( 'mon' )     .aggregate(         fbs = _ .发病数.sum()     )     .order_by( 'mon' )     .execute() ) tj .to_excel( "/mnt/c/users/Administrator/Downloads/tj.xlsx" ) bl = con .read_csv( "/mnt/c/users/Administrator/Downloads/2020.csv" ) tmp = (     bl     .mutate( age = ibis . case (). when ( _ .年龄.contains( '月' ), 0 )             . when ( _ .年龄.contains( '岁' ), _ .年龄.re_extract( r ' ( \d + ) 岁' , 1 ).cast( 'int' ))           ...