pandas eval及query
import pandas as pd
data = pd.DataFrame({
"Student": ["A", "B", "C", "D"],
"Physics": [89,34,23,56],
"Chemistry": [34,56,98,56],
"Math": [34,94,50,59],
'Name':["LI","Zhang","Wang","Xiao"]
})
pd.eval("data.Physics+data.Chemistry+data.Math")
data.eval("total=Physics+Chemistry+Math").query('Math>50')
data.eval('total2=Student',engine='python')
data.eval('total2=Student+Name',engine='python')
# 'numexpr': This default engine evaluates pandas objects using
# numexpr for large speed ups in complex expressions with large frames.
# 'python': Performs operations as if you had eval’d in top
# level python. This engine is generally not that useful.
(data.eval('total=sqrt(Math)')
.query('Chemistry>34'))
data=pd.read_excel(r"C:\Users\xuefe\Desktop\ev71.xlsx")
data.columns=[str(item).lower() for item in data.columns]
data.eval('xian=jzdd_dm.str.slice(0,6)',engine='python').query("xian.str.contains('^62',na=False)",engine='python').groupby(['xian','jz_zc']).agg(n=pd.NamedAgg('xian','count'))
from functools import partial
# 利用partial固化指定参数
func = partial(pd.to_datetime, format='%Y %m %d', errors='coerce')
# func('2021-12-12')
data.eval("jz_sj=@func(jz_sj.str.slice())",engine='python')
#str转datetime
data.eval("csrq=@pd.to_datetime(csrq)",engine='python')
data.eval("age=@pd.to_datetime(jz_sj)-@pd.to_datetime(csrq)",engine='python',inplace=True)
#timedelta64转float
def asage(age):
return age/pd.to_timedelta(1,'D')
def calage(end,start):
end=pd.to_datetime(end,format='%Y %m %d',errors='coerce')
start=pd.to_datetime(start,format='%Y %m %d',errors='coerce')
return ((end-start)/pd.to_timedelta(1,'D'))/365
data.eval("agey=@asage(age)",engine='python')
data.eval("agey=@calage(jz_sj,csrq)",engine='python',inplace=True)
#str转numeric
data.query("jzdd_dm!='620522019a'",inplace=True)
data.eval("jzdd_dm=@pd.to_numeric(jzdd_dm)",engine='python')
#年龄分组
agegroup2=partial(pd.cut,bins=[-1, 30, 50, 60, 100],labels=['0-30', '30-50', '50-60', '>60'])
data.eval("group=@agegroup2(agey)",engine='python')
data.eval("group=@pd.cut(agey,bins=[-1, 30, 50, 60, 100],labels=['0-30', '30-50', '50-60', '>60'])",engine='python')
#number转 str
pd.Categorical(data.jz_zc)
def convert(x):
return x.astype(str)
data.eval("jz_zc2=@convert(jz_zc)",engine='python',inplace=True)
data.eval("total=agey>3 & jz_zc2=='2'",engine='python')
def cutgroup(x):
return 'f1' if x>60 else 'f2'
def cutgroup2(x):
return x.apply(lambda x: '0-30' if x <= 30 else '>30')
def cutgroup3(x):
return x.apply(lambda x: '0-30' if x <= 30 else('30-60' if x<60 else '>60'))
def agegroup(x):
if x >= 60:
return '>=60'
elif x >= 30:
return '30-60'
else:
return '0-30'
def cutgroup4(x):
return x.apply(agegroup)
data.eval("total= @cutgroup4(agey) ",engine='python')
def cutjz(x):
return x.apply(lambda x: 'pc1' if x in ('1','2') else 'pc2')
data.eval("total= @cutjz(jz_zc2) ",engine='python')
评论
发表评论