博文

获取经纬度

  library ( sf ) library ( terra ) library ( tidyverse ) library ( readxl ) library ( duckdb ) library ( httr ) # 必须加载,提供GET()函数 library ( jsonlite ) # 解析JSON返回结果 library ( arrow ) # 读取接种单位数据 jzdw <- read_xlsx ( './接种单位列表.xlsx' ) |>   select ( 接种单位名称 , 接种单位所属区县名称 ) |>   distinct () get_jzdw_location_amap <- function (   jzdw_df ,   amap_key ,   max_retry = 3 ,   base_timeout = 15 ) {   # 步骤1:拼接地址   jzdw_df <- jzdw_df |>     mutate (       接种单位所属区县名称 = ifelse (         is.na ( 接种单位所属区县名称 ) | 接种单位所属区县名称 == "" ,         "" ,         接种单位所属区县名称       ) ,       完整地址 = paste0 ( "甘肃省" , 接种单位所属区县名称 , 接种单位名称 ) |>         str_squish () ,       地址编码 = URLencode ( 完整地址 , reserved = TRUE ) ,       行号 = row_number () # 添加行号,方便跟踪进度     )...

R 数据库 延迟测试

  library ( tidyverse ) library ( duckdb ) library ( arrow ) con <- dbConnect (   duckdb () ,   dbdir = "./甘肃省POI数据.duckdb" , # 数据库文件保存路径   read_only = FALSE ) poi <- tbl ( con , "poi" ) poi #查看列名 colnames ( poi ) dbGetQuery ( con , "PRAGMA table_info(poi)" ) |>   select ( name , type ) # 关闭数据库连接 dbDisconnect ( con , shutdown = TRUE ) poi |>   group_by ( 城市 ) |>   summarise ( n = n ()) |>   # show_query()   collect () jzdw <- read_parquet ( './jzdw_with_location_baidu.parquet' ) jzdw |> str () ###parquet文件延迟 wz <- tbl_file ( path = './接种单位位置.parquet' ) wz wz |>   group_by ( 接种单位所属区县名称 ) |>   summarise ( n = n ()) #DuckDB 加速的本地数据操作 library ( duckplyr ) df <- duckdb_tibble ( x = 1 : 3 , y = letters [ 1 : 3 ]) df |> group_by ( y ) |> summarise ( n = n ()) nyc = tbl (   con ,   "read_parquet('nyc-taxi/**/*.parquet', hive_partitioning = true)" ) nc...

apache-superset

 pip install apache-superset export SUPERSET_SECRET_KEY="yEi9YUsuZExpRAZuNV3XbSLcuCcZ7ytdPOA7CCQaLPQ6NxeSRdpJwNUz" export FLASK_APP=superset cat /etc/superset_config.py # 禁用不安全数据库连接的安全检查 PREVENT_UNSAFE_DB_CONNECTIONS = False # 或者,更精确地允许 SQLite 驱动 ALLOWED_DIALECTS = [     "sqlite",     # 其他你需要的驱动,如 'postgresql', 'mysql' 等 ] export SUPERSET_CONFIG_PATH=/etc/superset_config.py superset init superset db upgrade superset fab create-admin \--username admin \--firstname Superset \--lastname Admin \--email 360149345@qq.com \--password admin superset run -p 8088 \--with-threads \--reload \--debugger sqlite:////mnt/c/Users/Administrator/Downloads/database.sqlite/database.sqlite

多线程鸭湖

  from datetime import datetime from datetime import timedelta , date import polars as pl import duckdb as dd import time as time_module from concurrent.futures import ThreadPoolExecutor , as_completed import threading # 初始化 uri = "oracle:// " dd . sql ( "attach 'ducklake:meta.ducklake' as lake;" ) # 如果表存在,先删除 dd . sql ( "DROP TABLE IF EXISTS lake.ods.jzjl;" ) # 创建 schema dd . sql ( "CREATE SCHEMA IF NOT EXISTS lake.ods;" ) # 线程安全的标志 first_batch_lock = threading . Lock () first_batch = True def worker ( current_date ):     """处理单个日期的数据"""     global first_batch         start_time = time_module . time ()     result = {         'date' : current_date ,         'status' : 'success' ,         'rows' : 0 ,         'elapsed' : 0 ,         'error' : None     }         try :   ...