出生队列接种率不合格乡镇率

import math

import pandas as pd

import numpy as np

import janitor

import oracledb

shi_bm=pd.DataFrame.from_dict({'地区名称': {0: '兰州市',

1: '嘉峪关市',

2: '金昌市',

3: '白银市',

4: '天水市',

5: '武威市',

6: '张掖市',

7: '平凉市',

8: '酒泉市',

9: '庆阳市',

10: '定西市',

11: '陇南市',

12: '临夏回族自治州',

13: '甘南藏族自治州',

14: '兰州新区'},

'地区编码': {0: 6201,

1: 6202,

2: 6203,

3: 6204,

4: 6205,

5: 6206,

6: 6207,

7: 6208,

8: 6209,

9: 6210,

10: 6211,

11: 6212,

12: 6229,

13: 6230,

14: 6232}}

).astype({'地区编码':'str'})

xian_bm=pd.DataFrame.from_dict({'地区名称': {1: '城关区',

37: '七里河区',

73: '西固区',

109: '安宁区',

145: '红古区',

181: '永登县',

217: '皋兰县',

253: '榆中县',

289: '嘉峪关市',

325: '金川区',

361: '永昌县',

397: '白银区',

433: '平川区',

469: '靖远县',

505: '会宁县',

541: '景泰县',

577: '秦州区',

613: '麦积区',

649: '清水县',

685: '秦安县',

721: '甘谷县',

757: '武山县',

793: '张家川回族自治县',

829: '凉州区',

865: '民勤县',

901: '古浪县',

937: '天祝藏族自治县',

973: '甘州区',

1009: '肃南裕固族自治县',

1045: '民乐县',

1081: '临泽县',

1117: '高台县',

1153: '山丹县',

1189: '崆峒区',

1225: '泾川县',

1261: '灵台县',

1297: '崇信县',

1333: '庄浪县',

1369: '静宁县',

1405: '华亭市',

1441: '肃州区',

1477: '金塔县',

1513: '瓜州县',

1549: '肃北蒙古族自治县',

1585: '阿克塞哈萨克族自治县',

1621: '玉门市',

1657: '敦煌市',

1693: '西峰区',

1729: '庆城县',

1765: '环县',

1801: '华池县',

1837: '合水县',

1873: '正宁县',

1909: '宁县',

1945: '镇原县',

1981: '安定区',

2017: '通渭县',

2053: '陇西县',

2089: '渭源县',

2125: '临洮县',

2161: '漳县',

2197: '岷县',

2233: '武都区',

2269: '成县',

2305: '文县',

2341: '宕昌县',

2377: '康县',

2413: '西和县',

2449: '礼县',

2485: '徽县',

2521: '两当县',

2557: '临夏市',

2593: '临夏县',

2629: '康乐县',

2665: '永靖县',

2701: '广河县',

2737: '和政县',

2773: '东乡族自治县',

2809: '积石山保安族东乡族撒拉族自治县',

2845: '合作市',

2881: '临潭县',

2917: '卓尼县',

2953: '舟曲县',

2989: '迭部县',

3025: '玛曲县',

3061: '碌曲县',

3097: '夏河县',

3133: '兰州新区'},

'地区编码': {1: 620102,

37: 620103,

73: 620104,

109: 620105,

145: 620111,

181: 620121,

217: 620122,

253: 620123,

289: 620201,

325: 620302,

361: 620321,

397: 620402,

433: 620403,

469: 620421,

505: 620422,

541: 620423,

577: 620502,

613: 620503,

649: 620521,

685: 620522,

721: 620523,

757: 620524,

793: 620525,

829: 620602,

865: 620621,

901: 620622,

937: 620623,

973: 620702,

1009: 620721,

1045: 620722,

1081: 620723,

1117: 620724,

1153: 620725,

1189: 620802,

1225: 620821,

1261: 620822,

1297: 620823,

1333: 620825,

1369: 620826,

1405: 620881,

1441: 620902,

1477: 620921,

1513: 620922,

1549: 620923,

1585: 620924,

1621: 620981,

1657: 620982,

1693: 621002,

1729: 621021,

1765: 621022,

1801: 621023,

1837: 621024,

1873: 621025,

1909: 621026,

1945: 621027,

1981: 621102,

2017: 621121,

2053: 621122,

2089: 621123,

2125: 621124,

2161: 621125,

2197: 621126,

2233: 621202,

2269: 621221,

2305: 621222,

2341: 621223,

2377: 621224,

2413: 621225,

2449: 621226,

2485: 621227,

2521: 621228,

2557: 622901,

2593: 622921,

2629: 622922,

2665: 622923,

2701: 622924,

2737: 622925,

2773: 622926,

2809: 622927,

2845: 623001,

2881: 623021,

2917: 623022,

2953: 623023,

2989: 623024,

3025: 623025,

3061: 623026,

3097: 623027,

3133: 623201}}).astype({'地区编码':'str'})

def windows_to_wsl_path(windows_path):

wsl_prefix = '/mnt/'

path = windows_path.replace(':', '').replace('\\', '/').lower()

wsl_path = wsl_prefix + path

return wsl_path

def clean_file(input_path):

# Read Excel file and perform data cleaning

df = (

pd.read_excel(input_path, sheet_name='sheet1', skiprows=3)

.clean_names()

.apply(lambda x: x.fillna(method="ffill") if x.name in ['unnamed_0','unnamed_1','unnamed_2'] else x, axis=0)

.filter(regex='^un|率')

.query("unnamed_2 not in ['总人数','全程接种']")

.apply(lambda x: pd.to_numeric(x.str.replace("%", ""), errors='coerce') if '率' in x.name else x, axis=0)

)

# Rename columns

rename_dict = {df.columns[0]: 'dq', df.columns[1]: 'bm', df.columns[2]: 'ym', df.columns[3]: 'jc'}

for i in range(4, 22):

rename_dict[df.columns[i]] = f'age_{i - 3}'

df.rename(columns=rename_dict, inplace=True)

# Fill missing values based on conditions

df = df.apply(lambda x: x.fillna(0) if pd.api.types.is_numeric_dtype(x.dtype) else x.fillna('.'))

return df

# Save cleaned dataframe to a new Excel file

df.to_excel(output_path)

input_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzl.xlsx')

output_path = windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\gnldet_jzltest.xlsx')

jzl=(

clean_file(input_path)

.assign(shi=lambda x:x['bm'].apply(lambda x: str(x)[:4]))

.assign(xian=lambda x:x['bm'].apply(lambda x: str(x)[:6]))

.assign(bm=lambda x:x['bm'].apply(lambda x: str(x)[:8]))

.replace({'jc': {'.': 1}}))

shi=(

jzl

.groupby(['shi'])

.agg(乡镇数=('bm', lambda x: x.nunique()))

)

xian=(

jzl

.groupby(['xian'])

.agg(乡镇数=('bm', lambda x: x.nunique()))

)

def jzl_age(df,level, age_col,ym_list):

if level == 'shi':

group_cols = ['shi', 'ym', 'jc']

merge_df = shi

elif level == 'xian':

group_cols = ['xian', 'ym', 'jc']

merge_df = xian

age_df = (

df.query(f"{age_col} < 90")

.groupby(group_cols)

.agg(不合格乡镇数=('bm', lambda x: x.nunique()))

.reset_index()

.merge(merge_df, how='left', left_on=group_cols[0], right_on=group_cols[0])

.assign(不合格率=lambda x: x.不合格乡镇数 / x.乡镇数 * 100)

.astype({'jc': 'str'})

.assign(ym_jc=lambda x: x.ym.str.cat(x.jc, sep='_'))

.query("ym_jc.isin(@ym_list)")

.assign(age=age_col)

)

return age_df

age1_ym_list = ['卡介苗_1','乙肝疫苗_1','乙肝疫苗_2','乙肝疫苗_3','脊灰疫苗_1','脊灰疫苗_2','脊灰疫苗_3','百白破疫苗_1','百白破疫苗_2','百白破疫苗_3','含麻疹类疫苗_1','乙脑疫苗_1','A群流脑多糖疫苗_1','A群流脑多糖疫苗_2']

age2_ym_list=['甲肝疫苗_1','百白破疫苗_4','含麻疹类疫苗_2']

age3_ym_list=['乙脑疫苗_2']

age4_ym_list=['A群C群流脑多糖疫苗_1']

age5_ym_list=['脊灰疫苗_4']

age7_ym_list=['A群C群流脑多糖疫苗_2','白破疫苗_1']

shi_age1=jzl_age(df=jzl, level='shi',age_col='age_1', ym_list=age1_ym_list)

shi_age2=jzl_age(df=jzl, level='shi',age_col='age_2', ym_list=age2_ym_list)

shi_age3=jzl_age(df=jzl, level='shi',age_col='age_3', ym_list=age3_ym_list)

shi_age4=jzl_age(df=jzl, level='shi',age_col='age_4', ym_list=age4_ym_list)

shi_age5=jzl_age(df=jzl, level='shi',age_col='age_5', ym_list=age5_ym_list)

shi_age7=jzl_age(df=jzl, level='shi',age_col='age_7', ym_list=age7_ym_list)

(

pd.concat([shi_age1,shi_age2,shi_age3,shi_age4,shi_age5,shi_age7])

.drop('ym_jc',axis=1)

.merge(shi_bm,how='left',left_on='shi',right_on='地区编码')

.drop('shi',axis=1)

.assign(不合格率=lambda x: round(x.不合格率.astype(float), 2)) # 保留2位小数

.to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\shi_age.xlsx'),index=False)

)

xian_age1 = jzl_age(df=jzl, level='xian',age_col='age_1', ym_list=age1_ym_list)

xian_age2 = jzl_age(df=jzl, level='xian',age_col='age_2', ym_list=age2_ym_list)

xian_age3 = jzl_age(df=jzl, level='xian',age_col='age_3', ym_list=age3_ym_list)

xian_age4 = jzl_age(df=jzl, level='xian',age_col='age_4', ym_list=age4_ym_list)

xian_age5 = jzl_age(df=jzl, level='xian',age_col='age_5', ym_list=age5_ym_list)

xian_age7 = jzl_age(df=jzl, level='xian',age_col='age_7', ym_list=age7_ym_list)

(

pd.concat([xian_age1,xian_age2,xian_age3,xian_age4,xian_age5,xian_age7])

.drop('ym_jc',axis=1)

.merge(xian_bm,how='left',left_on='xian',right_on='地区编码')

.drop('xian',axis=1)

.assign(不合格率=lambda x: round(x.不合格率.astype(float), 2)) # 保留2位小数

.to_excel(windows_to_wsl_path(r'C:\Users\xuefeng\Downloads\xian_age.xlsx'),index=False)

)

搜索此博客

xuefliang

出生队列接种率不合格乡镇率

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下查看端口占用情况的方法

Rstudio 使用代理

出生队列接种率不合格乡镇率

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下 查看端口占用情况的方法

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法