博文

目前显示的是 二月, 2026的博文

多线程鸭湖

  from datetime import datetime from datetime import timedelta , date import polars as pl import duckdb as dd import time as time_module from concurrent.futures import ThreadPoolExecutor , as_completed import threading # 初始化 uri = "oracle:// " dd . sql ( "attach 'ducklake:meta.ducklake' as lake;" ) # 如果表存在,先删除 dd . sql ( "DROP TABLE IF EXISTS lake.ods.jzjl;" ) # 创建 schema dd . sql ( "CREATE SCHEMA IF NOT EXISTS lake.ods;" ) # 线程安全的标志 first_batch_lock = threading . Lock () first_batch = True def worker ( current_date ):     """处理单个日期的数据"""     global first_batch         start_time = time_module . time ()     result = {         'date' : current_date ,         'status' : 'success' ,         'rows' : 0 ,         'elapsed' : 0 ,         'error' : None     }         try :   ...

鸭湖连接oracle

  from datetime import datetime from datetime import timedelta , date import polars as pl import duckdb as dd import time as time_module #初始化 uri = "oracle:// " dd . sql ( "attach 'ducklake:meta.ducklake' as lake;" ) # 如果表存在,先删除 dd . sql ( "DROP TABLE IF EXISTS lake.ods.jzjl;" ) # 创建 schema dd . sql ( "CREATE SCHEMA IF NOT EXISTS lake.ods;" ) # 写数据 bdate = date ( year = 2026 , month = 1 , day = 1 ) edate = date ( year = 2026 , month = 1 , day = 3 ) first_batch = True for i in range (( edate - bdate ). days + 1 ):     start_time = time_module . time ()     current_date = bdate + timedelta ( days = i )         try :         next_date = current_date + timedelta ( days = 1 )         query = f """         SELECT * FROM inoc_jzjl         WHERE jz_sj >= TO_DATE(' { current_date } ', 'YYYY-MM-DD')         AN...