pandas eval及query


import pandas as pd


data = pd.DataFrame({

    "Student": ["A", "B", "C", "D"], 

    "Physics": [89,34,23,56], 

    "Chemistry": [34,56,98,56], 

    "Math": [34,94,50,59],

    'Name':["LI","Zhang","Wang","Xiao"]

    })


pd.eval("data.Physics+data.Chemistry+data.Math")


data.eval("total=Physics+Chemistry+Math").query('Math>50')


data.eval('total2=Student',engine='python')


data.eval('total2=Student+Name',engine='python')


# 'numexpr': This default engine evaluates pandas objects using

# numexpr for large speed ups in complex expressions with large frames.


# 'python': Performs operations as if you had eval’d in top

# level python. This engine is generally not that useful.


(data.eval('total=sqrt(Math)')

 .query('Chemistry>34'))


data=pd.read_excel(r"C:\Users\xuefe\Desktop\ev71.xlsx")

data.columns=[str(item).lower() for item in data.columns]

data.eval('xian=jzdd_dm.str.slice(0,6)',engine='python').query("xian.str.contains('^62',na=False)",engine='python').groupby(['xian','jz_zc']).agg(n=pd.NamedAgg('xian','count'))


from functools import partial

# 利用partial固化指定参数

func = partial(pd.to_datetime, format='%Y %m %d', errors='coerce')

# func('2021-12-12')

data.eval("jz_sj=@func(jz_sj.str.slice())",engine='python')

#str转datetime

data.eval("csrq=@pd.to_datetime(csrq)",engine='python')


data.eval("age=@pd.to_datetime(jz_sj)-@pd.to_datetime(csrq)",engine='python',inplace=True)


#timedelta64转float

def asage(age):

    return age/pd.to_timedelta(1,'D')


def calage(end,start):

    end=pd.to_datetime(end,format='%Y %m %d',errors='coerce')

    start=pd.to_datetime(start,format='%Y %m %d',errors='coerce')

    return ((end-start)/pd.to_timedelta(1,'D'))/365


data.eval("agey=@asage(age)",engine='python')


data.eval("agey=@calage(jz_sj,csrq)",engine='python',inplace=True)


#str转numeric

data.query("jzdd_dm!='620522019a'",inplace=True)

data.eval("jzdd_dm=@pd.to_numeric(jzdd_dm)",engine='python')


#年龄分组

agegroup2=partial(pd.cut,bins=[-1, 30, 50, 60, 100],labels=['0-30', '30-50', '50-60', '>60'])

data.eval("group=@agegroup2(agey)",engine='python')


data.eval("group=@pd.cut(agey,bins=[-1, 30, 50, 60, 100],labels=['0-30', '30-50', '50-60', '>60'])",engine='python')



#number转 str

pd.Categorical(data.jz_zc)


def convert(x):

    return x.astype(str)


data.eval("jz_zc2=@convert(jz_zc)",engine='python',inplace=True)


data.eval("total=agey>3 & jz_zc2=='2'",engine='python')


def cutgroup(x):

    return 'f1' if x>60 else 'f2'


def cutgroup2(x):

    return x.apply(lambda x: '0-30' if x <= 30 else '>30')


def cutgroup3(x):

    return x.apply(lambda x: '0-30' if x <= 30 else('30-60' if x<60 else '>60'))


def agegroup(x):

    if x >= 60:

        return '>=60'

    elif x >= 30:

        return '30-60'

    else:

        return '0-30'


def cutgroup4(x):

    return x.apply(agegroup)


data.eval("total= @cutgroup4(agey) ",engine='python')


def cutjz(x):

    return x.apply(lambda x: 'pc1' if x in ('1','2')  else 'pc2')


data.eval("total= @cutjz(jz_zc2) ",engine='python')

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理