pandas 长转宽

 import cx_Oracle

import os

import re

import math

import pandas as pd

import numpy as np

from sqlalchemy import create_engine

from sqlalchemy.engine import url

from matplotlib import pyplot as plt 


os.environ['NLS_LANG'] = 'AMERICAN_AMERICA.ZHS16GBK'

con = cx_Oracle.connect("ipvsdb", "Mygh@2021#", "192.168.30.48/JZDB1")


sqltxt = ''' 

select i.grda_code,

       e.zjhm,

       e.csrq,

       i.jz_zc,

       i.jz_sj,

       i.jzdd_dm,

       i.ym_bm

  from inoc_jzjl i, grda_cr e

 where e.grda_cr_lsh = i.grda_et_lsh

   and e.jcqk = '1'

   and i.config_ymxx_lsh in ('1801','1802','1803')

   and i.jz_sj >=to_date('2020-01-01','yyyy-mm-dd')

 '''  

cxr=pd.read_sql(sqltxt,con)

cxr.columns=[str.lower(item) for item in cxr.columns]

cxr['shi']=cxr['jzdd_dm'].str[0:4]

cxr['xian']=cxr['jzdd_dm'].str[0:6]

cxr['age']=((cxr['jz_sj']-cxr['csrq'])/pd.Timedelta(1, 'D')).fillna(0).astype(int)/365

cxr.info()

cxr[cxr['shi'].str.contains('^62')].groupby(['shi'],as_index=False).agg(n=('shi','count'))


cxr['jz_zc']=cxr['jz_zc'].astype(str)

#长转宽

cxw=cxr[['zjhm','csrq','jz_zc','jz_sj','shi']].pivot_table(index=['zjhm','csrq','shi'],columns='jz_zc',values='jz_sj',aggfunc='max').reset_index()

cxw['jg']=((cxw['2']-cxw['1'])/pd.Timedelta(1,'D'))

cxw['jg']=np.round(cxw['jg'])


cxw[cxw['shi'].str.contains('^62')].groupby(['shi','jg'],as_index=False).agg(n=('shi','count'))


cxr['jz_nian']=cxr['jz_sj'].astype(str).str[0:4]


cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg(n=('jz_nian','count'))


cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg({'zjhm':lambda x: x.nunique()})


cxr[cxr['age']>40 & cxr['shi'].str.contains('^62')].groupby(['jz_nian','ym_bm'],as_index=False).agg({'zjhm':pd.Series.nunique,'jz_nian':np.count_nonzero})


fm=et[et['shi'].str.contains('^62',na=False)].groupby(['shi'],as_index=True).agg(

    n = pd.NamedAgg('shi', 'count'),

    nan = pd.NamedAgg('mq_zjhm', lambda x: x.isnull().sum()))

fm['shi']=fm.index

fm=fm.reset_index(level=0,drop=True)


评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理