拆分array

 hpv2=(

   hpv

   .assign(sf=hpv.zjhm.str.startswith('62'))

   .query("jz_sj >= '2022-01-01' and jz_sj <= '2022-12-31'")

   .drop_duplicates(['zjhm'])

   .loc[:,'sjhm']

   .dropna(axis=0)

   .drop_duplicates()

   .astype(str)

   .str.slice(0,11)

)



# hpv.to_csv('/mnt/d/hpv.csv')

# hpv=pd.read_csv('/mnt/d/hpv.csv')


(

 hpv2

 .groupby('sf')

 .size()

)


#API

import requests

import json

import numpy as np

url = 'https://api.uutool.cn/phone/location_batch'

# data = {'phone': '18093189854,18919826420'}

hpv2=hpv2.to_numpy()

results=[]

arrays = np.array_split(hpv2, len(hpv2) / 400)

for a in arrays:

    data={}  

    data['phone'] = ','.join(a)

    response = requests.post(url, data=data)

    results.append(json.loads(response.text)['data']['rows'])

    

  

DF=pd.DataFrame(columns=['phone', 'isp', 'province', 'city','zip_code','area_code','segment'])

for r in results:

    DF=pd.concat([DF,pd.DataFrame(r)])

    # DF=DF.append(pd.DataFrame(r))

    

#method 1    

grouped_df=(

    DF.

    groupby(['province'])

    .size()

)

total_rows = len(DF)

percentages = grouped_df.apply(lambda x: x / total_rows * 100)


#method 2

value_counts = DF['province'].value_counts()

percentages = value_counts / total_rows * 100


(

    pd.DataFrame([value_counts,percentages]).T

    .set_axis(['计数', '占比'], axis='columns')

    .to_excel('/mnt/c/Users/xuefe/Desktop/2022HPV接种手机号归属地.xlsx')

)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理