pandas 长转宽
import cx_Oracle
import os
import re
import math
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.engine import url
from matplotlib import pyplot as plt
os.environ['NLS_LANG'] = 'AMERICAN_AMERICA.ZHS16GBK'
con = cx_Oracle.connect("ipvsdb", "Mygh@2021#", "192.168.30.48/JZDB1")
sqltxt = '''
select i.grda_code,
e.zjhm,
e.csrq,
i.jz_zc,
i.jz_sj,
i.jzdd_dm,
i.ym_bm
from inoc_jzjl i, grda_cr e
where e.grda_cr_lsh = i.grda_et_lsh
and e.jcqk = '1'
and i.config_ymxx_lsh in ('1801','1802','1803')
and i.jz_sj >=to_date('2020-01-01','yyyy-mm-dd')
'''
cxr=pd.read_sql(sqltxt,con)
cxr.columns=[str.lower(item) for item in cxr.columns]
cxr['shi']=cxr['jzdd_dm'].str[0:4]
cxr['xian']=cxr['jzdd_dm'].str[0:6]
cxr['age']=((cxr['jz_sj']-cxr['csrq'])/pd.Timedelta(1, 'D')).fillna(0).astype(int)/365
cxr.info()
cxr[cxr['shi'].str.contains('^62')].groupby(['shi'],as_index=False).agg(n=('shi','count'))
cxr['jz_zc']=cxr['jz_zc'].astype(str)
#长转宽
cxw=cxr[['zjhm','csrq','jz_zc','jz_sj','shi']].pivot_table(index=['zjhm','csrq','shi'],columns='jz_zc',values='jz_sj',aggfunc='max').reset_index()
cxw['jg']=((cxw['2']-cxw['1'])/pd.Timedelta(1,'D'))
cxw['jg']=np.round(cxw['jg'])
cxw[cxw['shi'].str.contains('^62')].groupby(['shi','jg'],as_index=False).agg(n=('shi','count'))
cxr['jz_nian']=cxr['jz_sj'].astype(str).str[0:4]
cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg(n=('jz_nian','count'))
cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg({'zjhm':lambda x: x.nunique()})
cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg({'zjhm':pd.Series.nunique,'jz_nian':np.count_nonzero})
fm=et[et['shi'].str.contains('^62',na=False)].groupby(['shi'],as_index=True).agg(
n = pd.NamedAgg('shi', 'count'),
nan = pd.NamedAgg('mq_zjhm', lambda x: x.isnull().sum()))
fm['shi']=fm.index
fm=fm.reset_index(level=0,drop=True)
评论
发表评论