xuefliang

博文

目前显示的是 2020的博文

na替换0

十二月 10, 2020

mutate(across(everything(), ~replace_na(.x, 0)))

R 多列类型转换

十二月 09, 2020

mutate_at(vars(contains("数")),as.numeric) mutate_if(str_detect(colnames(.), "数"),as.numeric) mutate_if(is.character,as.numeric) 在dplyr中，选择包含“ Sepal”的变量，并将NA分配给Species为“ setosa”的那些行： iris %>% mutate_at(vars(contains("Sepal")), funs(ifelse(Species == "setosa", NA, .))) iris %>% mutate_at(vars(contains("Sepal")), funs(na_if(Species, "setosa")))

pip conda you-get 使用代理

十一月 20, 2020

pip install --proxy http://127.0.0.1:7890 pandas --upgrade conda config --set proxy_servers.http http://127.0.0.1:7890 you-get -x 127.0.0.1:7890 'https://www.youtube.com/watch?v=jNQXAC9IVRw'

R proxy

十月 09, 2020

Sys.setenv(http_proxy = "http://127.0.0.1:8001") Sys.setenv(https_proxy = "https://127.0.0.1:8001")

分组抽样

九月 28, 2020

library(sampling) sam_group <- strata(dat,stratanames = 'xiangzhen',size = seq(1:20),method = 'srswr') sam_group <- strata(dat,stratanames = 'xiangzhen',size = rep(3,20),method = 'srswr') #分组抽样 sam_group <- dat %>% filter(weihao !='000') %>% group_by(xiangzhen) %>% slice(sample(3))

python 爬虫

九月 15, 2020

import requests import pandas as pd import numpy as np import matplotlib.pyplot as plt url= 'https://mops.twse.com.tw/mops/web/ajax_t100sb15' proxy = { "http" : "http://127.0.0.1:8889" ,} headers= { 'User-Agent' : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0' } payload={ 'encodeURIComponent' : "1" , 'step' : "1" , 'firstin' : "1" , 'TYPEK' : "sii" , 'RYEAR' : "108" } res=requests.post(url,data=payload,proxies=proxy,headers=headers) #res=requests.post(url,data=payload) print(res.text) dfs=pd.read_html(res.text) df=dfs[ 0 ].iloc[:,[ 0 , 1 , 2 , 5 , 6 , 7 ]] df.head() df.info() df.columns=[ '產業類別' , '公司代號' , '公司名稱' , '平均數108' , '平均數107' , '中位數108' ] df.sort_values( '中位數108' ,ascending= False ) #df.plot(kind='bar',title='bar title...

R 添加分隔符

八月 10, 2020

library(tidyverse) library(openxlsx) dat <- read.xlsx('./Documents/镇原县注射器库存.xlsx') %>% select(b) %>% as_vector() %>% str_c(collapse = "','")

R 计算月龄

六月 22, 2020

library(lubridate) calc_age <- function(birthDate, refDate = Sys.Date()) { require(lubridate) period <- as.period(interval(birthDate, refDate),unit = "days") period$day } #满8月龄 ymd('2020-05-31') %m-% months(3*12) ymd('2019-12-31') %m+% months(1:12)

hosts修改

六月 18, 2020

vim /etc/hosts 0.0.0.0 account.jetbrains.com 0.0.0.0 www.jetbrains.com 127.0.0.1 transact.netsarang.com 127.0.0.1 update.netsarang.com 127.0.0.1 www.netsarang.com 127.0.0.1 www.netsarang.co.kr 127.0.0.1 sales.netsarang.com

分组建模

六月 03, 2020

mtcars %>% group_by(cyl) %>% group_modify( ~broom::tidy(lm(mpg ~ wt, data = .)) ) mtcars %>% group_by(cyl) %>% summarise( broom::tidy(lm(mpg ~ wt)) ) mtcars %>% group_by(cyl) %>% summarise( broom::tidy(lm(mpg ~ wt, data = cur_data())) ) mtcars %>% group_by(cyl) %>% nest() %>% mutate(model = purrr::map(data, ~ lm(mpg ~ wt, data = .))) %>% mutate(result = purrr::map(model, ~ broom::tidy(.))) %>% unnest(result) mtcars %>% nest_by(cyl) %>% mutate(model = list(lm(mpg ~ wt, data = data))) %>% summarise(broom::tidy(model)) mtcars %>% nest_by(cyl) %>% summarise( broom::tidy(lm(mpg ~ wt, data = data)) )

LBE 安装

五月 21, 2020

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install(version = "3.11") if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("LBE") sudo R CMD javareconf JAVAC=/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin/javac JAR=/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin/jar JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin/jar

计算接种率

五月 13, 2020

#R 实现 inoc %>% filter(str_sub(gldw_bm,1,2)=='62') %>% mutate(yzyl=floor(day(as.period(interval(csrq,ymd('2021-01-25')),unit = "days"))/30)) %>% filter(yzyl>=8 & yzyl<18) %>% group_by(xian) %>% summarise(deno=n_distinct(grda_et_lsh),num=sum(str_sub(ym_bm,1,2) %in% c('14') & jz_zc %in% c(1),na.rm = TRUE)) %>% mutate(prec=round(num/deno*100,1)) %>% left_join(xzqh,by=c('xian'='dzbm')) %>% arrange(xian) Python实现 (inoc >>head( 1000 ) >>mutate( yzyl =((pd.to_datetime( '2021-01-25' )-_.csrq).dt.days/ 30 ), xian =_.gldw_bm.str[ 0 : 6 ]) >> filter (_.gldw_bm.str[ 0 : 2 ]== '62' ,_.yzyl>= 8 ) >>group_by(_.xian) >>summarize( n =n_distinct(_.grda_code), n3 =(((_.ym_bm.str[ 0 : 2 ]== '12' ) & (_.jz_zc== 1 )).sum( skipna = True ))) >>mutate( pert =np.round(_.n3/_.n* 100 , 2 )) >>arrange(_.xian) )

forcats包

五月 13, 2020

library(tidyverse) x <- c("A","z","g",NA) x %>% as.factor() # 默认按照字母排序 x %>% as_factor() # 默认按照出现顺序排列 gss_cat$relig %>% fct_count() %>% arrange(-n) #fct_anon:用任意数字标识符替换因子级别 gss_cat$relig %>% fct_anon() %>% fct_count() %>% arrange(-n) gss_cat$relig %>% fct_anon("X") %>% fct_count() #fct_c:合并级别，连接因子 fa <- factor("a") fb <- factor("b") fab <- factor(c("a", "b")) c(fa, fb, fab) fct_c(fa, fb, fab) #fct_collapse：因子转换，将多个因子合并 fct_count(gss_cat$partyid) %>% arrange(n) partyid2 <- fct_collapse(gss_cat$partyid, missing = c("No answer", "Don't know"), other = "Other party", rep = c("Strong republican", "Not str republican"), ...

ifelse sqlite

三月 24, 2020

library(DBI) library(openxlsx) library(tidyverse) library(magrittr) library(hablar) con <- dbConnect(RSQLite::SQLite(), "~/Downloads/phone.db") dbListTables(con) phones <- dbGetQuery(con, "select * from phones") #phones <- tbl(con,sql("select * from phones")) %>% collect() regions <- dbGetQuery(con, "select * from regions") phones %<>% left_join(regions,by=c('region_id'='id')) %>% select(number,province,city,zip_code,area_code) dbDisconnect(con) df2015 <- read.xlsx('./Downloads/甘肃全国12320数据至-20200223.xlsx',sheet = 1) %>% mutate(year=2015) df2016 <- read.xlsx('./Downloads/甘肃全国12320数据至-20200223.xlsx',sheet = 2) %>% mutate(year=2016) df2017 <- read.xlsx('./Downloads/甘肃全国12320数据至-20200223.xlsx',sheet = 3) %>% mutate(year=2017) df2018 <- read.xlsx('./Downloads/甘肃全国12320数据至-20200223.xlsx',sheet = 4) %>% mutate(year=2018) df...

hablar 类型转换

三月 22, 2020

library(hablar) mtcars %>% convert(num(gear), chr(mpg), fct(cyl)) df <- data.frame(a = c("A", NA, "B", "C", "C"), b = c(NA, 1, 1, 3, 3), c = c(7, 8, 2, 3, 3), stringsAsFactors = FALSE) # Returns duplicated rows df %>% find_duplicates() df %>% find_duplicates(b:c) df %>% find_na(b)

slider 行

二月 20, 2020

library(slider) library(lubridate) library(tidyverse) x <- c(1, 2, 3, 4, 5) # .before: How many elements before the current one should be included in the window? # .after: How many elements after the current one should be included in the window? # .complete: Should .f only be evaluated when there is enough data to make a complete window?ff # .step: The number of elements to shift forward between calls to .f. slide_vec(x, mean, .before = 1) slide_vec(x, mean, .after = 1) slide_vec(x, sum, .before = 2) slide_vec(x, sum, .before = 2, .complete = T) index_vec <- as.Date("2019-08-29") + c(0, 1, 5, 6) wday_vec <- as.character(wday(index_vec, label = TRUE)) sales_vec <- c(2, 4, 3, 5) company <- tibble(sales = sales_vec, index = index_vec, wday = wday_vec) # Over columns: map(company, ~ .x) # Over rows: # slide(company, ~ .x) ...

代理测试

二月 18, 2020

http代理 curl -x 127.0.0.1:10809 https://google.com sock5代理 curl --socks5 127.0.0.1:10808 https://google.com/

pip upgrade all

二月 18, 2020

import pkg_resources from subprocess import call packages = [dist.project_name for dist in pkg_resources.working_set] call("pip install --upgrade " + ' '.join(packages), shell=True) ----- pip install -U $(pip freeze | awk '{split($0, a, "=="); print a[1]}') ---- pip freeze > pip_frozen.txt pip install -r pip_frozen.txt --upgrade

Python pip配置国内源

二月 18, 2020

1、 Linux平台安装方式：（1）创建 pip.conf 文件首先运行以下命令 cd ~/.pip # 运行此命令切换目录如果提示目录不存在，自行创建一个(如果目录存在，可跳过此步)，如下： mkdir ~/.pip cd ~/.pip 在 .pip 目录下创建一个 pip.conf 文件，如下： touch pip.conf （2）编辑 pip.conf 文件首先打开文件，命令如下： sudo vi ~/.pip/pip.conf 接着，写入以下内容： [global] index-url = https://pypi.tuna.tsinghua.edu.cn/simple [install] trusted-host = https://pypi.tuna.tsinghua.edu.cn # trusted-host 此参数是为了避免麻烦，否则使用的时候可能会提示不受信任然后保存退出即可。　2、Window平台安装方式：（1）新建 pip 配置文件夹，直接在user用户目录中创建一个名为 pip 的文件夹( 即 %HOMEPATH%\pip )，如下图所示：（2）接着在 pip 文件夹中创建一个名为 pip 的文本文件(后缀名由" .txt "改为 " .ini ")，格式如下所示：文件内容如下： [global] index-url = https://pypi.tuna.tsinghua.edu.cn/simple [install] trusted-host = https://pypi.tuna.tsinghua.edu.cn # trusted-host 此参数是为了避免麻烦，否则使用的时候可能会提示不受信任修改完成后保存，启动 cmd ，使用 " pip install xxx "( xxx 为你要下载的包名)，即可默认使用国内源下载。

across and case_when

二月 17, 2020

library(tidyverse) #across将一个或多个函数应用在选择列上 iris %>% as_tibble() %>% mutate(mean=(rowMeans(across(starts_with('Sepal'))))) iris %>% group_by(Species) %>% summarise(across(starts_with('Sepal'),mean)) iris %>% group_by(Species) %>% summarise(across(starts_with('Sepal'),~mean(.x,na.rm = T))) #避免了写多个列的麻烦 iris %>% group_by(Species) %>% summarise(mean=mean(Sepal.Length)) starwars %>% select(name:mass, gender, species) %>% mutate( type = case_when( height > 200 | mass > 200 ~ "large", species == "Droid" ~ "robot", TRUE ~ "other" ) )

pip不能安装、升级、卸载软件问题

二月 17, 2020

Cannot uninstall ‘nibabel’. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall. 问题解析：旧版本依赖多，不能清晰的删除，此时应该忽略旧版本升级，即如下解决办法：sudo pip install nibabel --ignore-installed nibabel 下载安装 https://www.lfd.uci.edu/~gohlke/pythonlibs/ conda update --all --force-reinstall conda update conda --force-reinstall Windows10中，用户目录下.condarc 配置为： channels: – defaults # Show channel URLs when displaying what is going to be downloaded and # in ‘conda list’. The default is False. show_channel_urls: True allow_other_channels: True proxy_servers: http: socks5:// 127.0 . 0.1 : 1080 https: socks5:// 127.0 . 0.1 : 1080 ssl_verify: False

ubuntu install qqff

二月 15, 2020

wget -O- https://deepin-wine.i-m.dev/setup.sh | sh sudo apt install deepin.com.qq.im sudo apt install deepin

R 重采样

二月 11, 2020

library(hyfo) library(lubridate) # Daily to monthly 降采样 TS <- data.frame(Date = seq(ymd('1999-01-01'), length = 365, by = '1 day'), num=runif(365, 3, 10)) TS_new <- resample(TS, method = 'day2mon') # Monthly to daily 升采样 TS <- data.frame(Date = seq(ymd('1999-01-01'), length = 12, by = '1 month'), num=runif(12, 3, 10)) TS_new <- resample(TS, method = 'mon2day') library(dplyr) library(lubridate) set.seed(2017) options(digits=4) expenses <- tibble( date=seq(ymd("2019-01-01"), ymd("2020-12-31"), by=1), amount=rgamma(length(date), shape = 2, scale = 20)) expenses %>% group_by(month=floor_date(date, "month")) %>% summarize(amount=sum(amount)) expenses %>% group_by(month=floor_date(date, "3month")) %>% summarize(amount=sum(amount)) expenses %...

R#将1列拆分成多列

二月 09, 2020

library(tidyverse) library(stringr) df <- read.csv('~/PycharmProjects/datascience/data/911.csv') #将1列拆分成多列 df3 <- df %>% separate(title,c('a','b'),sep=': ') table(df3$a) df <- read.csv('~/PycharmProjects/datascience/data/911.csv',header = T) df2 <- as.tibble(do.call(rbind, str_split(df$title, ': '))) table(df2$V1)

python on Rstudio.py

二月 03, 2020

Sys.setenv(RETICULATE_PYTHON = "/usr/bin/python3.6") # reticulate::py_config() import request import matplotlib import numpy as np import pandas as pd

RMySQL

一月 30, 2020

library(RMySQL) con <- dbConnect(RMySQL::MySQL(), username = "root", password = "123456", host = "127.0.0.1", port = 3306, dbname = "python" ) con <- DBI::dbConnect(odbc::odbc(), driver = "/opt/rstudio-drivers/mysql/bin/lib/libmysqlodbc_sb64.so", database = "python", UID ...