拆分array
hpv2=(
hpv
.assign(sf=hpv.zjhm.str.startswith('62'))
.query("jz_sj >= '2022-01-01' and jz_sj <= '2022-12-31'")
.drop_duplicates(['zjhm'])
.loc[:,'sjhm']
.dropna(axis=0)
.drop_duplicates()
.astype(str)
.str.slice(0,11)
)
# hpv.to_csv('/mnt/d/hpv.csv')
# hpv=pd.read_csv('/mnt/d/hpv.csv')
(
hpv2
.groupby('sf')
.size()
)
#API
import requests
import json
import numpy as np
url = 'https://api.uutool.cn/phone/location_batch'
# data = {'phone': '18093189854,18919826420'}
hpv2=hpv2.to_numpy()
results=[]
arrays = np.array_split(hpv2, len(hpv2) / 400)
for a in arrays:
data={}
data['phone'] = ','.join(a)
response = requests.post(url, data=data)
results.append(json.loads(response.text)['data']['rows'])
DF=pd.DataFrame(columns=['phone', 'isp', 'province', 'city','zip_code','area_code','segment'])
for r in results:
DF=pd.concat([DF,pd.DataFrame(r)])
# DF=DF.append(pd.DataFrame(r))
#method 1
grouped_df=(
DF.
groupby(['province'])
.size()
)
total_rows = len(DF)
percentages = grouped_df.apply(lambda x: x / total_rows * 100)
#method 2
value_counts = DF['province'].value_counts()
percentages = value_counts / total_rows * 100
(
pd.DataFrame([value_counts,percentages]).T
.set_axis(['计数', '占比'], axis='columns')
.to_excel('/mnt/c/Users/xuefe/Desktop/2022HPV接种手机号归属地.xlsx')
)
评论
发表评论