polars duckdb

 import polars as pl

import polars.selectors as cs
import duckdb

con = duckdb.connect("/mnt/c/Users/Administrator/Downloads/流感肺炎/fy.db")

# 获取所有表名
tables = con.execute("SHOW TABLES").pl()
print(tables)

# 查看 test 表的列信息
schema = con.execute("DESCRIBE test").pl()
print(schema)

# 读数据库表
test = con.execute("""
    SELECT * FROM test
""").pl(lazy=True)

# 比较慢
# test2 = pl.read_database("SELECT * FROM test", connection=con).lazy()

test.collect_schema()

# 关闭连接
con.close()


#######################
import polars as pl
import polars.selectors as cs
import duckdb

con = duckdb.connect("/mnt/c/Users/Administrator/Downloads/流感肺炎/fy.db")

temp = (
    pl.scan_csv(
        "/mnt/c/Users/Administrator/Downloads/流感肺炎/接种.csv", infer_schema=None
    )
    .rename(lambda col: col.lower())
    .with_columns(
        pl.col("csrq").str.to_date(strict=False),
        pl.col("jz_sj").str.to_datetime(strict=False).dt.date(),
    )
    .with_columns(age=((pl.col("jz_sj") - pl.col("csrq")).dt.total_days()) / 365)
    .rename({'???"grda_code"': "grda_code"})
    .collect(engine="streaming")
)
# 写入数据库
con.execute("CREATE OR REPLACE TABLE test AS SELECT * FROM temp")


# 创建一个表作为测试
con.execute("""
    CREATE TABLE lg AS
    SELECT * FROM read_csv_auto(
        './接种.csv',
        types={'CSRQ': 'VARCHAR','JZ_SJ':'VARCHAR'},
        dateformat='%Y/%m/%d',
        timestampformat='%Y/%m/%d %H:%M:%S'
    )
""")

##较慢 mamba install SQLAlchemy duckdb-engine
import polars as pl
from sqlalchemy import create_engine

conn = create_engine(f"duckdb:///fy.db")

query = "SELECT * FROM test"

test = pl.read_database(query=query, connection=conn.connect())

评论

此博客中的热门博文

Rstudio 使用代理

ShadowsocksR Plus 就回来了~