博文

目前显示的是 九月, 2020的博文

分组抽样

 library(sampling) sam_group <- strata(dat,stratanames = 'xiangzhen',size = seq(1:20),method = 'srswr') sam_group <- strata(dat,stratanames = 'xiangzhen',size = rep(3,20),method = 'srswr') #分组抽样 sam_group <- dat %>%    filter(weihao !='000') %>%    group_by(xiangzhen) %>%    slice(sample(3))

python 爬虫

import requests import pandas as pd import numpy as np import matplotlib.pyplot as plt url= 'https://mops.twse.com.tw/mops/web/ajax_t100sb15' proxy = { "http" : "http://127.0.0.1:8889" ,} headers= { 'User-Agent' : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0' } payload={ 'encodeURIComponent' : "1" , 'step' : "1" , 'firstin' : "1" , 'TYPEK' : "sii" , 'RYEAR' : "108" } res=requests.post(url,data=payload,proxies=proxy,headers=headers) #res=requests.post(url,data=payload) print(res.text) dfs=pd.read_html(res.text) df=dfs[ 0 ].iloc[:,[ 0 , 1 , 2 , 5 , 6 , 7 ]] df.head() df.info() df.columns=[ '產業類別' , '公司代號' , '公司名稱' , '平均數108' , '平均數107' , '中位數108' ] df.sort_values( '中位數108' ,ascending= False ) #df.plot(kind='bar',title='bar title&#