python出生队列接种率

# -*- coding: utf-8 -*-
import math
import pandas as pd
import numpy as np
import janitor

shi_bm=pd.DataFrame.from_dict({'地区名称': {0: '兰州市',
          1: '嘉峪关市',
          2: '金昌市',
          3: '白银市',
          4: '天水市',
          5: '武威市',
          6: '张掖市',
          7: '平凉市',
          8: '酒泉市',
          9: '庆阳市',
          10: '定西市',
          11: '陇南市',
          12: '临夏回族自治州',
          13: '甘南藏族自治州',
          14: '兰州新区'},
         '地区编码': {0: 6201,
          1: 6202,
          2: 6203,
          3: 6204,
          4: 6205,
          5: 6206,
          6: 6207,
          7: 6208,
          8: 6209,
          9: 6210,
          10: 6211,
          11: 6212,
          12: 6229,
          13: 6230,
          14: 6232}}
).astype({'地区编码':'str'})

xian_bm=pd.DataFrame.from_dict({'地区名称': {1: '城关区',
  37: '七里河区',
  73: '西固区',
  109: '安宁区',
  145: '红古区',
  181: '永登县',
  217: '皋兰县',
  253: '榆中县',
  289: '嘉峪关市',
  325: '金川区',
  361: '永昌县',
  397: '白银区',
  433: '平川区',
  469: '靖远县',
  505: '会宁县',
  541: '景泰县',
  577: '秦州区',
  613: '麦积区',
  649: '清水县',
  685: '秦安县',
  721: '甘谷县',
  757: '武山县',
  793: '张家川回族自治县',
  829: '凉州区',
  865: '民勤县',
  901: '古浪县',
  937: '天祝藏族自治县',
  973: '甘州区',
  1009: '肃南裕固族自治县',
  1045: '民乐县',
  1081: '临泽县',
  1117: '高台县',
  1153: '山丹县',
  1189: '崆峒区',
  1225: '泾川县',
  1261: '灵台县',
  1297: '崇信县',
  1333: '庄浪县',
  1369: '静宁县',
  1405: '华亭市',
  1441: '肃州区',
  1477: '金塔县',
  1513: '瓜州县',
  1549: '肃北蒙古族自治县',
  1585: '阿克塞哈萨克族自治县',
  1621: '玉门市',
  1657: '敦煌市',
  1693: '西峰区',
  1729: '庆城县',
  1765: '环县',
  1801: '华池县',
  1837: '合水县',
  1873: '正宁县',
  1909: '宁县',
  1945: '镇原县',
  1981: '安定区',
  2017: '通渭县',
  2053: '陇西县',
  2089: '渭源县',
  2125: '临洮县',
  2161: '漳县',
  2197: '岷县',
  2233: '武都区',
  2269: '成县',
  2305: '文县',
  2341: '宕昌县',
  2377: '康县',
  2413: '西和县',
  2449: '礼县',
  2485: '徽县',
  2521: '两当县',
  2557: '临夏市',
  2593: '临夏县',
  2629: '康乐县',
  2665: '永靖县',
  2701: '广河县',
  2737: '和政县',
  2773: '东乡族自治县',
  2809: '积石山保安族东乡族撒拉族自治县',
  2845: '合作市',
  2881: '临潭县',
  2917: '卓尼县',
  2953: '舟曲县',
  2989: '迭部县',
  3025: '玛曲县',
  3061: '碌曲县',
  3097: '夏河县',
  3133: '兰州新区'},
 '地区编码': {1: 620102,
  37: 620103,
  73: 620104,
  109: 620105,
  145: 620111,
  181: 620121,
  217: 620122,
  253: 620123,
  289: 620201,
  325: 620302,
  361: 620321,
  397: 620402,
  433: 620403,
  469: 620421,
  505: 620422,
  541: 620423,
  577: 620502,
  613: 620503,
  649: 620521,
  685: 620522,
  721: 620523,
  757: 620524,
  793: 620525,
  829: 620602,
  865: 620621,
  901: 620622,
  937: 620623,
  973: 620702,
  1009: 620721,
  1045: 620722,
  1081: 620723,
  1117: 620724,
  1153: 620725,
  1189: 620802,
  1225: 620821,
  1261: 620822,
  1297: 620823,
  1333: 620825,
  1369: 620826,
  1405: 620881,
  1441: 620902,
  1477: 620921,
  1513: 620922,
  1549: 620923,
  1585: 620924,
  1621: 620981,
  1657: 620982,
  1693: 621002,
  1729: 621021,
  1765: 621022,
  1801: 621023,
  1837: 621024,
  1873: 621025,
  1909: 621026,
  1945: 621027,
  1981: 621102,
  2017: 621121,
  2053: 621122,
  2089: 621123,
  2125: 621124,
  2161: 621125,
  2197: 621126,
  2233: 621202,
  2269: 621221,
  2305: 621222,
  2341: 621223,
  2377: 621224,
  2413: 621225,
  2449: 621226,
  2485: 621227,
  2521: 621228,
  2557: 622901,
  2593: 622921,
  2629: 622922,
  2665: 622923,
  2701: 622924,
  2737: 622925,
  2773: 622926,
  2809: 622927,
  2845: 623001,
  2881: 623021,
  2917: 623022,
  2953: 623023,
  2989: 623024,
  3025: 623025,
  3061: 623026,
  3097: 623027,
  3133: 623201}}).astype({'地区编码':'str'})

def windows_to_wsl_path(windows_path):
    wsl_prefix = '/mnt/'
    path = windows_path.replace(':', '').replace('\\', '/').lower()
    wsl_path = wsl_prefix  + path
    return wsl_path

def clean_file(input_path):
    # Read Excel file and perform data cleaning
    df = (
        pd.read_excel(input_path, sheet_name='sheet1', skiprows=3)
          .clean_names()
          .apply(lambda x: x.fillna(method="ffill") if x.name in ['unnamed_0','unnamed_1','unnamed_2'] else x, axis=0)
          .filter(regex='^un|率')
          .query("unnamed_2 not in ['总人数','全程接种']")
          .apply(lambda x: pd.to_numeric(x.str.replace("%", ""), errors='coerce') if '率' in x.name else x, axis=0)       
    )

    # Rename columns
    rename_dict = {df.columns[0]: 'dq', df.columns[1]: 'bm', df.columns[2]: 'ym', df.columns[3]: 'jc'}
    for i in range(4, 22):
        rename_dict[df.columns[i]] = f'age_{i - 3}'
    df.rename(columns=rename_dict, inplace=True)

    # Fill missing values based on conditions
    df = df.apply(lambda x: x.fillna(0) if pd.api.types.is_numeric_dtype(x.dtype) else x.fillna('.'))

    return df


    # Save cleaned dataframe to a new Excel file
    df.to_excel(output_path)

input_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzl.xlsx')
output_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzltest.xlsx')

jzl=(
     clean_file(input_path)
     .assign(shi=lambda x:x['bm'].apply(lambda x: str(x)[:4]))
     .assign(xian=lambda x:x['bm'].apply(lambda x: str(x)[:6]))
     .assign(bm=lambda x:x['bm'].apply(lambda x: str(x)[:8]))
     .replace({'jc': {'.': 1}}))

shi=(
      jzl
      .groupby(['shi'])
      .agg(乡镇数=('bm', lambda x: x.nunique()))
)

xian=(
      jzl
      .groupby(['xian'])
      .agg(乡镇数=('bm', lambda x: x.nunique()))
)

def calculate_age(jzl_df, dqbm, df,ym_jc, age):
    dq_age = (
        jzl_df
        .query(f"{age}<90")
        .groupby([dqbm,'ym','jc'])
        .agg(不合格乡镇数=('bm', lambda x: x.nunique()))
        .reset_index()
        .merge(df,how='left',left_on=dqbm,right_on=dqbm)
        .assign(不合格率=lambda x:x.不合格乡镇数/x.乡镇数*100)
        .assign(ym_jc=lambda x: x[['ym', 'jc']].apply(lambda x: f"{x[0]}_{x[1]}", axis=1))
        .query(f"ym_jc in {ym_jc}")
        .assign(age=age)
    )
    return dq_age


ym_jc_1=['卡介苗_1','乙肝疫苗_1','乙肝疫苗_2','乙肝疫苗_3','脊灰疫苗_1','脊灰疫苗_2','脊灰疫苗_3','百白破疫苗_1','百白破疫苗_2','百白破疫苗_3','含麻疹类疫苗_1','乙脑疫苗_1','A群流脑多糖疫苗_1','A群流脑多糖疫苗_2']
ym_jc_2=['甲肝疫苗_1','百白破疫苗_4','含麻疹类疫苗_2']
ym_jc_3=['乙脑疫苗_2']
ym_jc_4=['A群C群流脑多糖疫苗_1']
ym_jc_5=['脊灰疫苗_4']
ym_jc_7=['A群C群流脑多糖疫苗_2','白破疫苗_1']

shi_age1=calculate_age(jzl,'shi',shi,ym_jc_1,"age_1")
shi_age2=calculate_age(jzl,'shi',shi,ym_jc_2,"age_2")
shi_age3=calculate_age(jzl,'shi',shi,ym_jc_3,"age_3")
shi_age4=calculate_age(jzl,'shi',shi,ym_jc_4,"age_4")
shi_age5=calculate_age(jzl,'shi',shi,ym_jc_5,"age_5")
shi_age7= calculate_age(jzl,'shi',shi,ym_jc_7,"age_7")

xian_age1=calculate_age(jzl,'xian',xian,ym_jc_1,"age_1")
xian_age2=calculate_age(jzl,'xian',xian,ym_jc_2,"age_2")
xian_age3=calculate_age(jzl,'xian',xian,ym_jc_3,"age_3")
xian_age4=calculate_age(jzl,'xian',xian,ym_jc_4,"age_4")
xian_age5=calculate_age(jzl,'xian',xian,ym_jc_5,"age_5")
xian_age7= calculate_age(jzl,'xian',xian,ym_jc_7,"age_7")

(
    pd.concat([shi_age1,shi_age2,shi_age3,shi_age4,shi_age5,shi_age7])
    .drop('ym_jc',axis=1)
    .merge(shi_bm,how='left',left_on='shi',right_on='地区编码')
    .drop('shi',axis=1)
    .to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\shi_age.xlsx'),index=False)
)

(
    pd.concat([xian_age1,xian_age2,xian_age3,xian_age4,xian_age5,xian_age7])
    .drop('ym_jc',axis=1)
    .merge(xian_bm,how='left',left_on='xian',right_on='地区编码')
    .drop('xian',axis=1)
    .to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\xian_age.xlsx'),index=False)
)
搜索此博客

xuefliang

python出生队列接种率

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下查看端口占用情况的方法

Rstudio 使用代理

python出生队列接种率

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下 查看端口占用情况的方法

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法