import math
import pandas as pd
import numpy as np
import janitor
import oracledb
df=(
pd.read_excel('/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx',sheet_name='sheet1',skiprows=3).
clean_names().
apply(lambda x: x.fillna(method="ffill") if x.name in ['unnamed_0','unnamed_1','unnamed_2'] else x,axis=0).
filter(regex='^un|率').
query("unnamed_2 not in ['总人数','全程接种']").
apply(lambda x: pd.to_numeric(x.str.replace("%", ""), errors='coerce') if '率' in x.name else x,axis=0)
)
# 重命名前 4 列
df.rename(columns={df.columns[0]: 'dq', df.columns[1]: 'bm', df.columns[2]: 'ym', df.columns[3]: 'jc'}, inplace=True)
# 重命名后 18 列
for i in range(4, 22):
df.rename(columns={df.columns[i]: f'age_{i - 3}'}, inplace=True)
# 按条件分别进行填充
# 'f' indicates a floating - point number, 'i' indicates an integer, and 'b' indicates
# a boolean.
df.apply(lambda x: x.fillna(0) if x.dtype.kind in 'biufc' else x.fillna('.'))
评论
发表评论