ibis 表 连接
import ibis
import duckdb
import pandas as pd
from ibis import _
import ibis.selectors as s
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
from dateutil.relativedelta import relativedelta
ibis.options.interactive = True
def load_excel_to_ibis(excel_path):
try:
# 读取Excel文件
df = pd.read_excel(excel_path)
# 将所有object类型的列转换为字符串
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].astype(str)
# 转换为Ibis表格
ibis_table = ibis.memtable(df)
return ibis_table
except Exception as e:
print(f"错误: {str(e)}")
return None
ga = load_excel_to_ibis("/mnt/c/users/xuefe/Downloads/2023-2024接种个案.xlsx")
jg=load_excel_to_ibis("/mnt/c/users/xuefe/Downloads/疫苗批号价格.xlsx")
jg=(
jg
.select('疫苗批号','价格')
.distinct(keep="first")
)
(
ga
.group_by("ym_ph")
.aggregate(n=_.count())
)
# jg = jg.rename(ym_ph='疫苗批号',dj='价格')
(
ga
.asof_join(jg, on='ym_ph')
.drop("ym_ph_right")
)
(
ga
.asof_join(jg, on=('ym_ph', '疫苗批号'))
.drop('疫苗批号')
)
########################################
from datetime import datetime, timedelta
import ibis
ibis.options.interactive = True
sensors = ibis.memtable(
{
"site": ["a", "b", "a", "b", "a"],
"humidity": [0.3, 0.4, 0.5, 0.6, 0.7],
"event_time": [
datetime(2024, 11, 16, 12, 0, 15, 500000),
datetime(2024, 11, 16, 12, 0, 15, 700000),
datetime(2024, 11, 17, 18, 12, 14, 950000),
datetime(2024, 11, 17, 18, 12, 15, 120000),
datetime(2024, 11, 17, 18, 12, 15, 120000),
],
}
)
events = ibis.memtable(
{
"site": ["a", "b", "a"],
"event_type": [
"cloud coverage",
"rain start",
"rain stop",
],
"event_time": [
datetime(2024, 11, 16, 12, 0, 15, 400000),
datetime(2024, 11, 17, 18, 12, 15, 100000),
datetime(2024, 11, 18, 18, 12, 15, 100000),
],
}
)
tolerance = timedelta(seconds=1)
(
sensors
.asof_join(events, on="event_time", predicates="site",tolerance=tolerance)
.drop("event_time_right")
.order_by("event_time")
)
评论
发表评论