polars duckdb
import polars as pl
import polars.selectors as cs
import duckdb
con = duckdb.connect("/mnt/c/Users/Administrator/Downloads/流感肺炎/fy.db")
# 获取所有表名
tables = con.execute("SHOW TABLES").pl()
print(tables)
# 查看 test 表的列信息
schema = con.execute("DESCRIBE test").pl()
print(schema)
# 读数据库表
test = con.execute("""
SELECT * FROM test
""").pl(lazy=True)
# 比较慢
# test2 = pl.read_database("SELECT * FROM test", connection=con).lazy()
test.collect_schema()
# 关闭连接
con.close()
#######################
import polars as pl
import polars.selectors as cs
import duckdb
con = duckdb.connect("/mnt/c/Users/Administrator/Downloads/流感肺炎/fy.db")
temp = (
pl.scan_csv(
"/mnt/c/Users/Administrator/Downloads/流感肺炎/接种.csv", infer_schema=None
)
.rename(lambda col: col.lower())
.with_columns(
pl.col("csrq").str.to_date(strict=False),
pl.col("jz_sj").str.to_datetime(strict=False).dt.date(),
)
.with_columns(age=((pl.col("jz_sj") - pl.col("csrq")).dt.total_days()) / 365)
.rename({'???"grda_code"': "grda_code"})
.collect(engine="streaming")
)
# 写入数据库
con.execute("CREATE OR REPLACE TABLE test AS SELECT * FROM temp")
# 创建一个表作为测试
con.execute("""
CREATE TABLE lg AS
SELECT * FROM read_csv_auto(
'./接种.csv',
types={'CSRQ': 'VARCHAR','JZ_SJ':'VARCHAR'},
dateformat='%Y/%m/%d',
timestampformat='%Y/%m/%d %H:%M:%S'
)
""")
##较慢 mamba install SQLAlchemy duckdb-engine
import polars as pl
from sqlalchemy import create_engine
conn = create_engine(f"duckdb:///fy.db")
query = "SELECT * FROM test"
test = pl.read_database(query=query, connection=conn.connect())
评论
发表评论