xuefliang

博文

目前显示的是三月, 2016的博文

生存分析surminer包

三月 30, 2016

--- title: "surminer" output: html_document --- #生存分析 ```{r} #参考http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization library("survival") library("survminer") ``` #Draw survival curves without grouping ```{r} # Fit survival curves fit <- survfit(Surv(time, status) ~ 1, data = lung) # Drawing curves ggsurvplot(fit, color = "#2E9FDF") ``` #Draw survival curves with two groups ```{r} # Fit survival curves fit<- survfit(Surv(time, status) ~ sex, data = lung) # Drawing survival curves ggsurvplot(fit) ``` #Change font size, style and color ```{r} # Change font style, size and color # Change only font size ggsurvplot(fit, main = "Survival curve", font.main = 18, font.x = 16, font.y = 16, font.tickslab = 14) # Change font size, style and color at the same time ggsurvplot(fit, main = "Survival curve", font.main = c(16, "bold...

2010-2015年甘肃AEFI监测数据汇总

三月 24, 2016

library(dplyr) library(stringr) aefi <- read.csv("C:\\Users\\liangxf\\Desktop\\页1.csv",header = T,stringsAsFactors = F) for(i in 1:length(aefi$主键)) { if (nchar(aefi$反应分类[i])==0) { aefi$反应分类[i]=aefi$初步分类[i] } } for(i in 1:length(aefi$主键)) { if (nchar(aefi$最终临床诊断[i])==0) { aefi$最终临床诊断[i]=aefi$初步临床诊断[i] } } data <- mutate(aefi,year=substr(aefi$报告卡编号,8,11))%>% filter(year>=2010 & 疫苗1.疫苗名称 %in%c("Hib","百白破(无细胞)","脊灰(灭活)","甲肝(灭活)","狂犬病(Vero)","狂犬病(Vero冻干)","流感(裂解)","流脑A+C（结合）","流脑A群","轮状病毒","麻风","腮腺炎","水痘","乙肝(CHO)","乙肝(酵母)","乙脑(减毒)"))%>% select(year,疫苗1.疫苗名称,反应分类,最终临床诊断) aefinew <- read.csv("C:\\Users\\liangxf\\Desktop\\AEFI个案信息.csv",skip = 1,header = T,stringsAsFactors = F) for(i in 1:length(aefinew$编码)) { if (nchar(aefinew$最终临床诊断[i])==0) ...

ubuntu install sublime

三月 20, 2016

sudo add-apt-repository ppa:webupd8team/sublime-text-3 sudo apt-get update sudo apt-get install sublime-text-installer License Key —– BEGIN LICENSE —– Peter Eriksson Single User License EA7E-890068 8E107C71 3100D6FC 2AC805BF 9E627C77 72E710D7 43392469 D06A2F5B F9304FBD F5AB4DB2 7A95F172 FE68E300 42745819 E94AB2DF C1893094 ECABADC8 71FEE764 20224821 3EABF931 745AF882 87AD0A4B 33C6E377 0210D712 CD2B1178 82601542 C7FD8098 F45D2824 BC7DFB38 F1EBD38A D7A3AFE0 96F938EA 2D90BD72 9E34CDF0 —— END LICENSE ——

安裝 AnyConnect 時出現錯誤訊息「The VPN client agent was unable to create the interprocess communication depot.」

三月 15, 2016

1、取消容许其他网路用户通过此计算机internet链接 2、cmd- services.msc 关闭ics

cygwin 安装及代理配置

三月 14, 2016

配置代理修改C:\cygwin\etc\skel\ C:\cygwin\home\liangxf 和 C:\cygwin\etc\defaults\etc\skel 下的 .bash_profile 文件添加 export http_proxy=http://127.0.0.1:1080 重启后代理生效安装 apt-cyg wget http://apt-cyg.googlecode.com/svn/trunk/apt-cyg 或者 https://codeload.github.com/transcode-open/apt-cyg/zip/master chmod +x apt-cyg mv apt-cyg /usr/local/bin/ 安装包 apt-cyg install util-linux bind-utils openssh curl 查找包 apt-cyg find php 编辑/home/用户名/.bashrc。在末尾添加 PS1="\[\e[1;37m\][\u@\h \[\e[1;34m\]\W\[\e[1;37m\]]\[\e[1;36;5m\]\\$\[\e[m\]" export LC_ALL="en_US.UTF-8" 解决 apt-cyg 安装软件出现的 MD5 sum did not match, exiting 错误修改 /usr/local/bin 目录下的 apt-cyg 文件，查找 MD5 sum ，将 exit 1 行注释掉 1. # check the md5 2. digest = `cat "desc" | awk '/^install: / { print $4; exit }'` 3. digactual = `md5sum $file | awk '{print $1}'` 4. if ! t...

正态分布的极大似然估计

三月 11, 2016

#生成正态分布的随机数，u=0，sigma=1 y <- as.data.frame(rnorm(1000)) #方法一 normal.lik<-function(theta,y){ mu<-theta[1] sigma2<-theta[2] n<-nrow(y) logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2) return(-logl) } optim(c(0,1),normal.lik,y=y) #方法二 normal.lik2<-function(mu,sigma2){ n<-nrow(y) logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2) return(-logl) } library(bbmle) mle2(normal.lik2, start = list(mu=0,sigma2=1))

贝叶斯线性回归

三月 09, 2016

library(LearnBayes) data(birdextinct) attach(birdextinct) birdextinct logtime=log(time) fit=lm(logtime ~ nesting+size+status, data=birdextinct, x=TRUE, y=TRUE) summary(fit) theta.sample <- blinreg(fit$y,fit$x,5000) par(mfrow=c(2,2)) hist(theta.sample$beta[,2], main="NESTING", xlab=expression(beta[1])) hist(theta.sample$beta[,3], main="SIZE", xlab=expression(beta[2])) hist(theta.sample$beta[,4], main="STATUS", xlab=expression(beta[3])) hist(theta.sample$sigma, main="ERROR SD", xlab=expression(sigma)) apply(theta.sample$beta,2, quantile, c(.05,.5,.95)) quantile(theta.sample$sigma,c(.05,.5,.95))

N-N分层贝叶斯模型

三月 09, 2016

y <- c(28, 8, -3, 7, -1, 1, 18, 12) sd <- c(15, 10, 16, 11, 9, 11, 10, 18) v <- sd * sd tau <- c(0:3000)/100 tausq <- tau * tau ptau.y <- rep(0, 3001) vmu <- rep(0, 3001) muhat <- rep(0, 3001) for (i in (1:3001)) { vmu[i] <- 1/sum(1/(tausq[i] + v)) muhat[i] <- vmu[i] * sum(y/(tausq[i] + v)) ptau.y[i] <- sqrt(vmu[i] * prod(1/(tausq[i] + v))) * prod(exp(-0.5 * (y - muhat[i]) * (y - muhat[i])/(tausq[i] + v))) } plot(tau, ptau.y, type = "l", yaxt = "n", xlab = quote(tau)) eth.tauy <- matrix(rep(0, 24008), 8, 3001) sdth.tauy <- matrix(rep(0, 24008), 8, 3001) for (j in (1:8)) { for (i in (1:3001)) { eth.tauy[j, i] <- (y[j]/v[j] + muhat[i]/tausq[i])/(1/v[j] + 1/tau...

单位编码核对

三月 07, 2016

library(dplyr) gavi <- read.csv("C:\\Users\\liangxf\\Desktop\\单位信息_主表.csv",skip = 1,header = T,stringsAsFactors = F)%>% select(name=单位名称,regionid=所在地区Id,code=单位编码) provice <- read.csv("C:\\Users\\liangxf\\Desktop\\单位编码.csv",skip = 3,header = T,stringsAsFactors = F)%>% select(name=全称,code=国标编码)%>% filter(nchar(code)>6) #GAVI系统中有，省系统中无 leftanti <- anti_join(gavi, provice,by=c("code")) #省系统中有，GAVI系统中无 rightanti <- anti_join(provice,gavi,by=c("code"))

极大似然估计（Fitting a Model by Maximum Likelihood）

三月 07, 2016

#拟合正态分布产生一个正态分布的数据 ```{r} set.seed(1001) N <- 100 x <- rnorm(N, mean = 3, sd = 2) mean(x) sd(x) ``` 定义似然函数 ```{r} LL <- function(mu, sigma) { R = dnorm(x, mu, sigma) -sum(log(R)) } ``` ```{r} library(stats4) #由于计算时产生了负值，所以出现警告信息 mle(LL, start = list(mu = 1, sigma=1)) #比如,mean=1,sd=-1,标准差不能为负数 dnorm(x, 1, -1) ``` mle()调用optim(),mle()默认方法BFGS ```{r} library(stats4) mle(LL, start = list(mu = 1, sigma=1), method = "L-BFGS-B", lower = c(-Inf, 0), upper = c(Inf, Inf)) ``` 修改似然函数 ```{r} LL <- function(mu, sigma) { R = suppressWarnings(dnorm(x, mu, sigma)) -sum(log(R)) } mle(LL, start = list(mu = 1, sigma=1)) #初始值选择错误，将对结果又较大影响 mle(LL, start = list(mu = 0, sigma=1)) ``` #拟合线性模型产生数据 ```{r} x <- runif(N) y <- 5 * x + 3 + rnorm(N) fit <- lm(y ~ x) summary(fit) plot(x, y) abline(fit, col = "red") ``` 由于模型不是概率密度函数（pdf），无法像正态分布那样进行精确的似然函数的定义。由于线性模型要求残差符合正态分布，所以定义残差的似然函数 ```{r} LL...

函数优化

三月 07, 2016

R语言中对函数求极值有两种命令，较简单的是optimize函数，在确定搜索范围后可对一元函数求极值，例如： f = function(x) {3*x^4-2*x^3+3*x^2-4*x+5} optimize(f, lower=-20, upper=20) #optimize(f,c(-20,20),maximum=F) 其中$minimum为自变量x的取值，$objective为函数值 #似然函数 f <- function(p) { p^7*(1-p)^3 } #对数似然函数 ll <-function(p) { 7*log(p)+3*log(1-p) } optimize(f, lower=0, upper=1,maximum = T) optimize(ll, lower=0, upper=1,maximum = T) optim函数中第一项为起始点，第二项为目标函数，后面的是所需数据变量，optim命令的默认方法是Nelder-Mead算法，它是求多维函数极值的一种算法，由Nelder和Mead提出，又叫单纯形算法，但和线性规划中的单纯形算法是不同的，由于未利用任何求导运算，算法比较简单，但收敛速度较慢，适合变量数不是很多的方程求极值 normal <- function(theta,x){ mu <- theta[1] sigma2 <- theta[2] n <- length(x) logL <- -0.5*n*log(2*pi)-0.5*n*log(sigma2)-(1/(2*sigma2))*sum((x-mu)**2) return (-logL) } x <- rnorm(1000) optim(c(0,1),normal,x=x) optim(c(0,1),normal,x=x,method="BFGS")

多参数贝叶斯估计

三月 07, 2016

logit<-function(x){ y=log(x/(1-x)) return(y) } bioassay<-data.frame( x <- c(-0.86, -0.30, -0.05, 0.73), n <- c(5, 5, 5, 5), y <- c(0.01, 1, 3, 4.99), r <- logit(y/n)) plot(x,r) lm.bioassay<-lm(formula = r~x) abline(lm.bioassay) summary(lm.bioassay) bioassay.post <- function(alpha = 0.1, beta = 5) { k <- 4 x <- c(-0.86, -0.3, -0.05, 0.73) n <- c(5, 5, 5, 5) y <- c(0, 1, 3, 5) #prod连乘 prod <- 1 prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y * (1/(1 + exp(alpha + beta * x)))^(n - y)) return(prod) } mlpost <- function(alpha = 0.1, beta = 5) { -log(bioassay.post(alpha, beta)) } library(bbmle) mle(mlpost) modedensity <- bioassay.post(0.87, 7.91) alphax <- seq(-5, 10, length = 1000) betay <- seq(-10, 40, length = 1000) #Vectorize,能将一个不能进行向量化运算的函数进行...

贝叶斯估计

三月 06, 2016

bioassay.post <- function(alpha = 0.1, beta = 5) { k <- 4 x <- c(-0.86, -0.3, -0.05, 0.73) n <- c(5, 5, 5, 5) y <- c(0, 1, 3, 5) prod <- 1 prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y * (1/(1 + exp(alpha + beta * x)))^(n - y)) return(prod) } mlpost <- function(alpha = 0.1, beta = 5) { -log(bioassay.post(alpha, beta)) } library(bbmle) mle(mlpost)

R bayes

三月 05, 2016

library(dplyr) library(tidyr) library(ggplot2) x <- seq(0, 1, 0.01) n <- 5 y <- 3 alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5) beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5) z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) z.prio <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) for (i in c(1:9)) { z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i]) z.prio[, i] <- dbeta(x, alpha[i], beta[i]) } z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4, post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9) z.prio <- select(z.prio, prio.V1 = V1, prio.V2 = V2, prio.V3 = V3, prio.V4 = V4, prio.V5 = V5, prio.V6 = V6, prio.V7 = V7, prio.V8 = V8, prio.V9 ...

《R语言与统计分析》 11章代码

三月 03, 2016

#例题 11.3.1 library(dplyr) library(reshape2) library(ggplot2) x <- seq(0, 1, 0.01) n <- 5 y <- 3 alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5) beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5) z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) z.prior <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) for (i in c(1:9)) { z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i]) z.prior[, i] <- dbeta(x, alpha[i], beta[i]) } z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4, post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9) z.prior <- select(z.prior, prior.V1 = V1, prior.V2 = V2, prior.V3 = V3, prior.V4 = V4, prior.V5 = V5, prior.V6 = V6, prior.V7 = V7, prior.V8 = V8, ...