ibis实战

 import ibis

import pandas as pd
from ibis import _
import ibis.selectors as s
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

ibis.options.interactive = True

con=ibis.duckdb.connect()

brk=con.read_xlsx("/mnt/c/users/Administrator/Downloads/report.xlsx")

tj=(
    brk
    .mutate(mon=_.时间.substr(5,2))
    .group_by('mon')
    .aggregate(
        fbs=_.发病数.sum()
    )
    .order_by('mon')
    .execute()
)

tj.to_excel("/mnt/c/users/Administrator/Downloads/tj.xlsx")


bl=con.read_csv("/mnt/c/users/Administrator/Downloads/2020.csv")

tmp=(
    bl
    .mutate(age=ibis.case().when(_.年龄.contains('月'),0)
            .when(_.年龄.contains('岁'),_.年龄.re_extract(r'(\d+)岁', 1).cast('int'))
            .else_(0)
            .end())
    .mutate(age_group=ibis.case().when(_.age==0,'0岁')
            .when((_.age<=4) & (_.age>=1),'1-4岁')
            .when((_.age>=5) & (_.age<=9),'5-9岁')
            .when(_.age>=10,'>=10岁')
            .else_('未知')
            .end())
)

(
    tmp
    .group_by("age_group")
    .aggregate(
        n=_.年龄.count()
    )
    .mutate(prop=lambda x: (x.n / x.n.sum()*100).round(2))
    .execute()
    .to_excel("/mnt/c/users/Administrator/Downloads/f2020.xlsx")
)

(
    bl
    .mutate(zjhm=_.有效证件号.replace("'", ""))
    .filter(_.zjhm.notnull())
    .execute()
    .to_excel("/mnt/c/users/Administrator/Downloads/tmp.xlsx")
)

(
    bl
    .mutate(zjhm=_.有效证件号.replace("'", ""))
    .filter(_.zjhm.isin(['621226202411207222','640381201605302517']))
)



#####################################
import ibis
import ibis.selectors as s
from ibis import _
ibis.options.interactive = True

con=ibis.duckdb.connect()

brk=con.read_csv("/mnt/c/Users/xuefl/Downloads/报告卡2025-03-21+11_36_40.csv")

my=con.read_xlsx('/mnt/c/users/xuefl/Downloads/免疫史查询导入模板.xlsx')

brk=(
    brk
    .mutate(age=ibis.case().when(_.年龄.contains('月'),0)
            .when(_.年龄.contains('岁'),_.年龄.re_extract(r'(\d+)岁', 1).cast('int'))
            .else_(0)
            .end())
    .mutate(age_group=ibis.case().when(_.age==0,'0岁')
            .when((_.age<=4) & (_.age>=1),'1-4岁')
            .when((_.age>=5) & (_.age<=9),'5-9岁')
            .when(_.age>=10,'>=10岁')
            .else_('未知')
            .end())
    .mutate(zjhm=_.有效证件号.replace("'",""))
    .asof_join(my,on=('zjhm','身份证号码'))
    .mutate(shi=_.报告单位地区编码.cast('string').substr(0,4))
)

# tmp=(
#     brk
#     .filter(lambda t:t.shi.re_search("^62"))
#     .execute()
# )


tmp=(
    brk
    .filter(_.shi.re_search("^62"))
    .group_by('age_group','百白破疫苗免疫史')
    .aggregate(n=_.卡片ID.count())
    .pivot_wider(names_from='age_group',values_from='n')
    .order_by('百白破疫苗免疫史')
    .select('百白破疫苗免疫史','0岁','1-4岁','5-9岁','>=10岁')
    .execute()
)

# tmp2=(
#     brk
#     .mutate(yue=_.发病日期.cast('date')-_.出生日期)
#     .execute()
# )

tmp2 = (
    brk
    .mutate(发病日期=_.发病日期.cast('date'))
    .mutate(
        # 计算年份差
        years_diff=(_.发病日期.year() - _.出生日期.year()),
        # 计算月份差
        months_diff=(_.发病日期.month() - _.出生日期.month()),
        # 计算日差的调整项
        day_adjust=ibis.case()
            .when(_.发病日期.day() < _.出生日期.day(), -1)
            .else_(0)
            .end()
    )
    # 最终月龄计算:年份差×12 + 月份差 + 日调整
    .mutate(月龄=_.years_diff * 12 + _.months_diff + _.day_adjust)
)

tj=(
    tmp2
    .filter((_.月龄>3) & (_.月龄<18*12))
    .group_by('百白破疫苗免疫史')
    .aggregate(n=_.卡片ID.count())
    .mutate(prop=_.n/5038*100)
)

tj.execute().to_excel('/mnt/c/users/xuefl/Downloads/tj.xlsx',index=False)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理