ibis 表 连接

 import ibis

import duckdb
import pandas as pd
from ibis import _
import ibis.selectors as s
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
from dateutil.relativedelta import relativedelta


ibis.options.interactive = True

def load_excel_to_ibis(excel_path):
    try:
        # 读取Excel文件
        df = pd.read_excel(excel_path)
       
        # 将所有object类型的列转换为字符串
        for col in df.select_dtypes(include=['object']).columns:
            df[col] = df[col].astype(str)
       
        # 转换为Ibis表格
        ibis_table = ibis.memtable(df)
        return ibis_table
   
    except Exception as e:
        print(f"错误: {str(e)}")
        return None


ga = load_excel_to_ibis("/mnt/c/users/xuefe/Downloads/2023-2024接种个案.xlsx")

jg=load_excel_to_ibis("/mnt/c/users/xuefe/Downloads/疫苗批号价格.xlsx")

jg=(
    jg
    .select('疫苗批号','价格')
    .distinct(keep="first")
)

(
    ga
    .group_by("ym_ph")
    .aggregate(n=_.count())
)

# jg = jg.rename(ym_ph='疫苗批号',dj='价格')

(
    ga
    .asof_join(jg, on='ym_ph')
    .drop("ym_ph_right")
)

(
    ga
    .asof_join(jg, on=('ym_ph', '疫苗批号'))
    .drop('疫苗批号')
)

########################################
from datetime import datetime, timedelta
import ibis
ibis.options.interactive = True
sensors = ibis.memtable(
    {
        "site": ["a", "b", "a", "b", "a"],
        "humidity": [0.3, 0.4, 0.5, 0.6, 0.7],
        "event_time": [
            datetime(2024, 11, 16, 12, 0, 15, 500000),
            datetime(2024, 11, 16, 12, 0, 15, 700000),
            datetime(2024, 11, 17, 18, 12, 14, 950000),
            datetime(2024, 11, 17, 18, 12, 15, 120000),
            datetime(2024, 11, 17, 18, 12, 15, 120000),
        ],
    }
)
events = ibis.memtable(
    {
        "site": ["a", "b", "a"],
        "event_type": [
            "cloud coverage",
            "rain start",
            "rain stop",
        ],
        "event_time": [
            datetime(2024, 11, 16, 12, 0, 15, 400000),
            datetime(2024, 11, 17, 18, 12, 15, 100000),
            datetime(2024, 11, 18, 18, 12, 15, 100000),
        ],
    }
)

tolerance = timedelta(seconds=1)

(
    sensors
    .asof_join(events, on="event_time", predicates="site",tolerance=tolerance)
    .drop("event_time_right")
    .order_by("event_time")
)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理