接种率数据处理

十二月 31, 2022

import math
import pandas as pd
import numpy as np
import janitor
import oracledb

df=(
pd.read_excel('/mnt/c/Users/xuefe/Downloads/gnldet_jzl.xlsx',sheet_name='sheet1',skiprows=3).
    clean_names().
    apply(lambda x: x.fillna(method="ffill") if x.name in ['unnamed_0','unnamed_1','unnamed_2'] else x,axis=0).
    filter(regex='^un|率').
    query("unnamed_2 not in ['总人数','全程接种']").
    apply(lambda x: pd.to_numeric(x.str.replace("%", ""), errors='coerce') if '率' in x.name else x,axis=0)
)

# 重命名前 4 列
df.rename(columns={df.columns[0]: 'dq', df.columns[1]: 'bm', df.columns[2]: 'ym', df.columns[3]: 'jc'}, inplace=True)

# 重命名后 18 列
for i in range(4, 22):
    df.rename(columns={df.columns[i]: f'age_{i - 3}'}, inplace=True)


# 按条件分别进行填充
# 'f' indicates a floating - point number, 'i' indicates an integer, and 'b' indicates
# a boolean.
df.apply(lambda x: x.fillna(0) if x.dtype.kind in 'biufc' else x.fillna('.'))

搜索此博客

xuefliang

接种率数据处理

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下查看端口占用情况的方法

Rstudio 使用代理

接种率数据处理

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下 查看端口占用情况的方法

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法