出生队列接种率不合格乡镇率

 import math

import pandas as pd

import numpy as np

import janitor

import oracledb


shi_bm=pd.DataFrame.from_dict({'地区名称': {0: '兰州市',

          1: '嘉峪关市',

          2: '金昌市',

          3: '白银市',

          4: '天水市',

          5: '武威市',

          6: '张掖市',

          7: '平凉市',

          8: '酒泉市',

          9: '庆阳市',

          10: '定西市',

          11: '陇南市',

          12: '临夏回族自治州',

          13: '甘南藏族自治州',

          14: '兰州新区'},

         '地区编码': {0: 6201,

          1: 6202,

          2: 6203,

          3: 6204,

          4: 6205,

          5: 6206,

          6: 6207,

          7: 6208,

          8: 6209,

          9: 6210,

          10: 6211,

          11: 6212,

          12: 6229,

          13: 6230,

          14: 6232}}

).astype({'地区编码':'str'})


xian_bm=pd.DataFrame.from_dict({'地区名称': {1: '城关区',

  37: '七里河区',

  73: '西固区',

  109: '安宁区',

  145: '红古区',

  181: '永登县',

  217: '皋兰县',

  253: '榆中县',

  289: '嘉峪关市',

  325: '金川区',

  361: '永昌县',

  397: '白银区',

  433: '平川区',

  469: '靖远县',

  505: '会宁县',

  541: '景泰县',

  577: '秦州区',

  613: '麦积区',

  649: '清水县',

  685: '秦安县',

  721: '甘谷县',

  757: '武山县',

  793: '张家川回族自治县',

  829: '凉州区',

  865: '民勤县',

  901: '古浪县',

  937: '天祝藏族自治县',

  973: '甘州区',

  1009: '肃南裕固族自治县',

  1045: '民乐县',

  1081: '临泽县',

  1117: '高台县',

  1153: '山丹县',

  1189: '崆峒区',

  1225: '泾川县',

  1261: '灵台县',

  1297: '崇信县',

  1333: '庄浪县',

  1369: '静宁县',

  1405: '华亭市',

  1441: '肃州区',

  1477: '金塔县',

  1513: '瓜州县',

  1549: '肃北蒙古族自治县',

  1585: '阿克塞哈萨克族自治县',

  1621: '玉门市',

  1657: '敦煌市',

  1693: '西峰区',

  1729: '庆城县',

  1765: '环县',

  1801: '华池县',

  1837: '合水县',

  1873: '正宁县',

  1909: '宁县',

  1945: '镇原县',

  1981: '安定区',

  2017: '通渭县',

  2053: '陇西县',

  2089: '渭源县',

  2125: '临洮县',

  2161: '漳县',

  2197: '岷县',

  2233: '武都区',

  2269: '成县',

  2305: '文县',

  2341: '宕昌县',

  2377: '康县',

  2413: '西和县',

  2449: '礼县',

  2485: '徽县',

  2521: '两当县',

  2557: '临夏市',

  2593: '临夏县',

  2629: '康乐县',

  2665: '永靖县',

  2701: '广河县',

  2737: '和政县',

  2773: '东乡族自治县',

  2809: '积石山保安族东乡族撒拉族自治县',

  2845: '合作市',

  2881: '临潭县',

  2917: '卓尼县',

  2953: '舟曲县',

  2989: '迭部县',

  3025: '玛曲县',

  3061: '碌曲县',

  3097: '夏河县',

  3133: '兰州新区'},

 '地区编码': {1: 620102,

  37: 620103,

  73: 620104,

  109: 620105,

  145: 620111,

  181: 620121,

  217: 620122,

  253: 620123,

  289: 620201,

  325: 620302,

  361: 620321,

  397: 620402,

  433: 620403,

  469: 620421,

  505: 620422,

  541: 620423,

  577: 620502,

  613: 620503,

  649: 620521,

  685: 620522,

  721: 620523,

  757: 620524,

  793: 620525,

  829: 620602,

  865: 620621,

  901: 620622,

  937: 620623,

  973: 620702,

  1009: 620721,

  1045: 620722,

  1081: 620723,

  1117: 620724,

  1153: 620725,

  1189: 620802,

  1225: 620821,

  1261: 620822,

  1297: 620823,

  1333: 620825,

  1369: 620826,

  1405: 620881,

  1441: 620902,

  1477: 620921,

  1513: 620922,

  1549: 620923,

  1585: 620924,

  1621: 620981,

  1657: 620982,

  1693: 621002,

  1729: 621021,

  1765: 621022,

  1801: 621023,

  1837: 621024,

  1873: 621025,

  1909: 621026,

  1945: 621027,

  1981: 621102,

  2017: 621121,

  2053: 621122,

  2089: 621123,

  2125: 621124,

  2161: 621125,

  2197: 621126,

  2233: 621202,

  2269: 621221,

  2305: 621222,

  2341: 621223,

  2377: 621224,

  2413: 621225,

  2449: 621226,

  2485: 621227,

  2521: 621228,

  2557: 622901,

  2593: 622921,

  2629: 622922,

  2665: 622923,

  2701: 622924,

  2737: 622925,

  2773: 622926,

  2809: 622927,

  2845: 623001,

  2881: 623021,

  2917: 623022,

  2953: 623023,

  2989: 623024,

  3025: 623025,

  3061: 623026,

  3097: 623027,

  3133: 623201}}).astype({'地区编码':'str'})


def windows_to_wsl_path(windows_path):

    wsl_prefix = '/mnt/'

    path = windows_path.replace(':', '').replace('\\', '/').lower()

    wsl_path = wsl_prefix  + path

    return wsl_path


def clean_file(input_path):

    # Read Excel file and perform data cleaning

    df = (

        pd.read_excel(input_path, sheet_name='sheet1', skiprows=3)

          .clean_names()

          .apply(lambda x: x.fillna(method="ffill") if x.name in ['unnamed_0','unnamed_1','unnamed_2'] else x, axis=0)

          .filter(regex='^un|率')

          .query("unnamed_2 not in ['总人数','全程接种']")

          .apply(lambda x: pd.to_numeric(x.str.replace("%", ""), errors='coerce') if '率' in x.name else x, axis=0)       

    )


    # Rename columns

    rename_dict = {df.columns[0]: 'dq', df.columns[1]: 'bm', df.columns[2]: 'ym', df.columns[3]: 'jc'}

    for i in range(4, 22):

        rename_dict[df.columns[i]] = f'age_{i - 3}'

    df.rename(columns=rename_dict, inplace=True)


    # Fill missing values based on conditions

    df = df.apply(lambda x: x.fillna(0) if pd.api.types.is_numeric_dtype(x.dtype) else x.fillna('.'))


    return df



    # Save cleaned dataframe to a new Excel file

    df.to_excel(output_path)


input_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzl.xlsx')

output_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzltest.xlsx')


jzl=(

     clean_file(input_path)

     .assign(shi=lambda x:x['bm'].apply(lambda x: str(x)[:4]))

     .assign(xian=lambda x:x['bm'].apply(lambda x: str(x)[:6]))

     .assign(bm=lambda x:x['bm'].apply(lambda x: str(x)[:8]))

     .replace({'jc': {'.': 1}}))


shi=(

      jzl

      .groupby(['shi'])

      .agg(乡镇数=('bm', lambda x: x.nunique()))

)


xian=(

      jzl

      .groupby(['xian'])

      .agg(乡镇数=('bm', lambda x: x.nunique()))

)


def jzl_age(df,level, age_col,ym_list):

    if level == 'shi':

        group_cols = ['shi', 'ym', 'jc']

        merge_df = shi

    elif level == 'xian':

        group_cols = ['xian', 'ym', 'jc']

        merge_df = xian

    

    age_df = (

        df.query(f"{age_col} < 90")

          .groupby(group_cols)

          .agg(不合格乡镇数=('bm', lambda x: x.nunique())) 

          .reset_index()

          .merge(merge_df, how='left', left_on=group_cols[0], right_on=group_cols[0])

          .assign(不合格率=lambda x: x.不合格乡镇数 / x.乡镇数 * 100)

          .astype({'jc': 'str'})

          .assign(ym_jc=lambda x: x.ym.str.cat(x.jc, sep='_'))

          .query("ym_jc.isin(@ym_list)")

          .assign(age=age_col)

    )

    return age_df


age1_ym_list = ['卡介苗_1','乙肝疫苗_1','乙肝疫苗_2','乙肝疫苗_3','脊灰疫苗_1','脊灰疫苗_2','脊灰疫苗_3','百白破疫苗_1','百白破疫苗_2','百白破疫苗_3','含麻疹类疫苗_1','乙脑疫苗_1','A群流脑多糖疫苗_1','A群流脑多糖疫苗_2']

age2_ym_list=['甲肝疫苗_1','百白破疫苗_4','含麻疹类疫苗_2']

age3_ym_list=['乙脑疫苗_2']

age4_ym_list=['A群C群流脑多糖疫苗_1']

age5_ym_list=['脊灰疫苗_4']

age7_ym_list=['A群C群流脑多糖疫苗_2','白破疫苗_1']


shi_age1=jzl_age(df=jzl, level='shi',age_col='age_1', ym_list=age1_ym_list)

shi_age2=jzl_age(df=jzl, level='shi',age_col='age_2', ym_list=age2_ym_list)

shi_age3=jzl_age(df=jzl, level='shi',age_col='age_3', ym_list=age3_ym_list)

shi_age4=jzl_age(df=jzl, level='shi',age_col='age_4', ym_list=age4_ym_list)

shi_age5=jzl_age(df=jzl, level='shi',age_col='age_5', ym_list=age5_ym_list)

shi_age7=jzl_age(df=jzl, level='shi',age_col='age_7', ym_list=age7_ym_list)


(

    pd.concat([shi_age1,shi_age2,shi_age3,shi_age4,shi_age5,shi_age7])

    .drop('ym_jc',axis=1)

    .merge(shi_bm,how='left',left_on='shi',right_on='地区编码')

    .drop('shi',axis=1)

    .assign(不合格率=lambda x: round(x.不合格率.astype(float), 2))  # 保留2位小数

    .to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\shi_age.xlsx'),index=False)

)



xian_age1 = jzl_age(df=jzl, level='xian',age_col='age_1', ym_list=age1_ym_list)

xian_age2 = jzl_age(df=jzl, level='xian',age_col='age_2', ym_list=age2_ym_list)

xian_age3 = jzl_age(df=jzl, level='xian',age_col='age_3', ym_list=age3_ym_list)

xian_age4 = jzl_age(df=jzl, level='xian',age_col='age_4', ym_list=age4_ym_list)

xian_age5 = jzl_age(df=jzl, level='xian',age_col='age_5', ym_list=age5_ym_list)

xian_age7 = jzl_age(df=jzl, level='xian',age_col='age_7', ym_list=age7_ym_list)



(

    pd.concat([xian_age1,xian_age2,xian_age3,xian_age4,xian_age5,xian_age7])

    .drop('ym_jc',axis=1)

    .merge(xian_bm,how='left',left_on='xian',right_on='地区编码')

    .drop('xian',axis=1)

    .assign(不合格率=lambda x: round(x.不合格率.astype(float), 2))  # 保留2位小数

    .to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\xian_age.xlsx'),index=False)

)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理