博文

目前显示的是 三月, 2016的博文

生存分析surminer包

--- title: "surminer" output: html_document --- #生存分析 ```{r} #参考http://www.sthda.com/english/wiki/survminer-r-package-survival-data-analysis-and-visualization library("survival") library("survminer") ``` #Draw survival curves without grouping ```{r} # Fit survival curves fit <- survfit(Surv(time, status) ~ 1, data = lung) # Drawing curves ggsurvplot(fit, color = "#2E9FDF") ``` #Draw survival curves with two groups ```{r} # Fit survival curves fit<- survfit(Surv(time, status) ~ sex, data = lung) # Drawing survival curves ggsurvplot(fit) ``` #Change font size, style and color ```{r} # Change font style, size and color # Change only font size ggsurvplot(fit, main = "Survival curve",    font.main = 18,    font.x =  16,    font.y = 16,    font.tickslab = 14) # Change font size, style and color at the same time ggsurvplot(fit, main = "Survival curve",    font.main = c(16, "bold&

2010-2015年甘肃AEFI监测数据汇总

library(dplyr) library(stringr) aefi <- read.csv("C:\\Users\\liangxf\\Desktop\\页1.csv",header = T,stringsAsFactors = F) for(i in 1:length(aefi$主键)) {   if (nchar(aefi$反应分类[i])==0)   {     aefi$反应分类[i]=aefi$初步分类[i]   } } for(i in 1:length(aefi$主键)) {   if (nchar(aefi$最终临床诊断[i])==0)   {     aefi$最终临床诊断[i]=aefi$初步临床诊断[i]   } } data <- mutate(aefi,year=substr(aefi$报告卡编号,8,11))%>%   filter(year>=2010 & 疫苗1.疫苗名称 %in%c("Hib","百白破(无细胞)","脊灰(灭活)","甲肝(灭活)","狂犬病(Vero)","狂犬病(Vero冻干)","流感(裂解)","流脑A+C(结合)","流脑A群","轮状病毒","麻风","腮腺炎","水痘","乙肝(CHO)","乙肝(酵母)","乙脑(减毒)"))%>%   select(year,疫苗1.疫苗名称,反应分类,最终临床诊断) aefinew <- read.csv("C:\\Users\\liangxf\\Desktop\\AEFI个案信息.csv",skip = 1,header = T,stringsAsFactors = F) for(i in 1:length(aefinew$编码)) {   if (nchar(aefinew$最终临床诊断[i])==0)  

ubuntu install sublime

sudo add-apt-repository ppa:webupd8team/sublime-text-3 sudo apt-get update sudo apt-get install sublime-text-installer License Key —– BEGIN LICENSE —– Peter Eriksson Single User License EA7E-890068 8E107C71 3100D6FC 2AC805BF 9E627C77 72E710D7 43392469 D06A2F5B F9304FBD F5AB4DB2 7A95F172 FE68E300 42745819 E94AB2DF C1893094 ECABADC8 71FEE764 20224821 3EABF931 745AF882 87AD0A4B 33C6E377 0210D712 CD2B1178 82601542 C7FD8098 F45D2824 BC7DFB38 F1EBD38A D7A3AFE0 96F938EA 2D90BD72 9E34CDF0 —— END LICENSE ——

安裝 AnyConnect 時出現錯誤訊息「The VPN client agent was unable to create the interprocess communication depot.」

图片
1、取消 容许其他网路用户通过此计算机internet链接 2、cmd- services.msc 关闭ics

cygwin 安装及代理配置

配置代理  修改C:\cygwin\etc\skel\  C:\cygwin\home\liangxf 和    C:\cygwin\etc\defaults\etc\skel 下的 .bash_profile 文件  添加 export http_proxy=http://127.0.0.1:1080  重启后代理生效 安装 apt-cyg wget http://apt-cyg.googlecode.com/svn/trunk/apt-cyg 或者  https://codeload.github.com/transcode-open/apt-cyg/zip/master chmod +x apt-cyg mv apt-cyg /usr/local/bin/ 安装包 apt-cyg install util-linux bind-utils openssh curl 查找包 apt-cyg find php 编辑/home/用户名/.bashrc。在末尾添加 PS1="\[\e[1;37m\][\u@\h \[\e[1;34m\]\W\[\e[1;37m\]]\[\e[1;36;5m\]\\$\[\e[m\]" export LC_ALL="en_US.UTF-8"   解决 apt-cyg 安装软件出现的 MD5 sum did not match, exiting 错误 修改 /usr/local/bin 目录 下的 apt-cyg 文件 ,查找 MD5 sum ,将 exit 1 行注释掉 1.   # check the md5 2.       digest = `cat "desc" | awk '/^install: / { print $4; exit }'` 3.       digactual = `md5sum $file | awk '{print $1}'` 4.        if ! test $digest = $digactual 5.        then 6.         echo MD5 sum did not

正态分布的极大似然估计

#生成正态分布的随机数,u=0,sigma=1 y <- as.data.frame(rnorm(1000)) #方法一 normal.lik<-function(theta,y){   mu<-theta[1]   sigma2<-theta[2]   n<-nrow(y)   logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2)   return(-logl) } optim(c(0,1),normal.lik,y=y) #方法二 normal.lik2<-function(mu,sigma2){   n<-nrow(y)   logl<- -0.5*n*log(2*pi) -0.5*n*log(sigma2) -(1/(2*sigma2))*sum((y-mu)**2)   return(-logl) } library(bbmle) mle2(normal.lik2, start = list(mu=0,sigma2=1))

贝叶斯线性回归

library(LearnBayes) data(birdextinct) attach(birdextinct) birdextinct logtime=log(time) fit=lm(logtime ~ nesting+size+status,          data=birdextinct, x=TRUE, y=TRUE) summary(fit) theta.sample <- blinreg(fit$y,fit$x,5000) par(mfrow=c(2,2)) hist(theta.sample$beta[,2], main="NESTING",        xlab=expression(beta[1])) hist(theta.sample$beta[,3], main="SIZE",        xlab=expression(beta[2])) hist(theta.sample$beta[,4], main="STATUS",        xlab=expression(beta[3])) hist(theta.sample$sigma, main="ERROR SD",        xlab=expression(sigma)) apply(theta.sample$beta,2, quantile, c(.05,.5,.95)) quantile(theta.sample$sigma,c(.05,.5,.95))

N-N分层贝叶斯模型

y <- c(28, 8, -3, 7, -1, 1, 18, 12) sd <- c(15, 10, 16, 11, 9, 11, 10, 18) v <- sd * sd tau <- c(0:3000)/100 tausq <- tau * tau ptau.y <- rep(0, 3001) vmu <- rep(0, 3001) muhat <- rep(0, 3001) for (i in (1:3001)) {   vmu[i] <- 1/sum(1/(tausq[i] + v))   muhat[i] <- vmu[i] * sum(y/(tausq[i] + v))   ptau.y[i] <- sqrt(vmu[i] * prod(1/(tausq[i] + v))) * prod(exp(-0.5 *                                                                   (y - muhat[i]) * (y - muhat[i])/(tausq[i] + v))) } plot(tau, ptau.y, type = "l", yaxt = "n", xlab = quote(tau)) eth.tauy <- matrix(rep(0, 24008), 8, 3001) sdth.tauy <- matrix(rep(0, 24008), 8, 3001) for (j in (1:8)) {   for (i in (1:3001)) {     eth.tauy[j, i] <- (y[j]/v[j] + muhat[i]/tausq[i])/(1/v[j] + 1/tausq[i])     sdth.tauy[j, i] <- sqrt(((1/tausq[i])/(1/v[j] + 1/tausq[i])) *                               ((1/tausq[i])/(1/v[j] + 1/tausq[i])) * vmu[i] + 1/(1/v[j] +    

单位编码核对

library(dplyr) gavi <- read.csv("C:\\Users\\liangxf\\Desktop\\单位信息_主表.csv",skip = 1,header = T,stringsAsFactors = F)%>%   select(name=单位名称,regionid=所在地区Id,code=单位编码) provice <- read.csv("C:\\Users\\liangxf\\Desktop\\单位编码.csv",skip = 3,header = T,stringsAsFactors = F)%>%   select(name=全称,code=国标编码)%>%   filter(nchar(code)>6)   #GAVI系统中有,省系统中无 leftanti <- anti_join(gavi, provice,by=c("code")) #省系统中有,GAVI系统中无 rightanti <- anti_join(provice,gavi,by=c("code"))

极大似然估计(Fitting a Model by Maximum Likelihood)

#拟合正态分布 产生一个正态分布的数据 ```{r} set.seed(1001) N <- 100 x <- rnorm(N, mean = 3, sd = 2) mean(x) sd(x) ``` 定义似然函数 ```{r} LL <- function(mu, sigma) {   R = dnorm(x, mu, sigma)   -sum(log(R)) } ``` ```{r} library(stats4) #由于计算时产生了负值,所以出现警告信息 mle(LL, start = list(mu = 1, sigma=1)) #比如,mean=1,sd=-1,标准差不能为负数 dnorm(x, 1, -1) ``` mle()调用optim(),mle()默认方法BFGS ```{r} library(stats4) mle(LL, start = list(mu = 1, sigma=1), method = "L-BFGS-B", lower = c(-Inf, 0),     upper = c(Inf, Inf)) ``` 修改似然函数 ```{r} LL <- function(mu, sigma) {   R = suppressWarnings(dnorm(x, mu, sigma))   -sum(log(R)) } mle(LL, start = list(mu = 1, sigma=1)) #初始值选择错误,将对结果又较大影响 mle(LL, start = list(mu = 0, sigma=1)) ``` #拟合线性模型 产生数据 ```{r} x <- runif(N) y <- 5 * x + 3 + rnorm(N) fit <- lm(y ~ x) summary(fit) plot(x, y) abline(fit, col = "red") ``` 由于模型不是概率密度函数(pdf),无法像正态分布那样进行精确的似然函数的定义。由于线性模型要求残差符合正态分布,所以定义残差的似然函数 ```{r} LL <- fu

函数优化

R语言中对函数求极值有两种命令,较简单的是optimize函数,在确定搜索范围后可对一元函数求极值,例如: f = function(x) {3*x^4-2*x^3+3*x^2-4*x+5} optimize(f, lower=-20, upper=20) #optimize(f,c(-20,20),maximum=F) 其中$minimum为自变量x的取值,$objective为函数值 #似然函数 f <- function(p) {   p^7*(1-p)^3 } #对数似然函数 ll <-function(p) {   7*log(p)+3*log(1-p) } optimize(f, lower=0, upper=1,maximum = T) optimize(ll, lower=0, upper=1,maximum = T) optim函数中第一项为起始点,第二项为目标函数,后面的是所需数据变量,optim命令的默认方法是Nelder-Mead算法,它是求多维函数极值的一种算法,由Nelder和Mead提出,又叫单纯形算法,但和线性规划中的单纯形算法是不同的,由于未利用任何求导运算,算法比较简单,但收敛速度较慢,适合变量数不是很多的方程求极值 normal <- function(theta,x){   mu <- theta[1]   sigma2 <- theta[2]   n <- length(x)   logL <- -0.5*n*log(2*pi)-0.5*n*log(sigma2)-(1/(2*sigma2))*sum((x-mu)**2)   return (-logL) } x <- rnorm(1000) optim(c(0,1),normal,x=x) optim(c(0,1),normal,x=x,method="BFGS")

多参数贝叶斯估计

logit<-function(x){   y=log(x/(1-x))   return(y) } bioassay<-data.frame( x <- c(-0.86, -0.30, -0.05, 0.73), n <- c(5, 5, 5, 5), y <- c(0.01, 1, 3, 4.99), r <- logit(y/n)) plot(x,r) lm.bioassay<-lm(formula = r~x) abline(lm.bioassay) summary(lm.bioassay) bioassay.post <- function(alpha = 0.1, beta = 5) {   k <- 4   x <- c(-0.86, -0.3, -0.05, 0.73)   n <- c(5, 5, 5, 5)   y <- c(0, 1, 3, 5)   #prod连乘   prod <- 1   prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y *                  (1/(1 + exp(alpha + beta * x)))^(n - y))   return(prod) } mlpost <- function(alpha = 0.1, beta = 5) {   -log(bioassay.post(alpha, beta)) } library(bbmle) mle(mlpost) modedensity <- bioassay.post(0.87, 7.91) alphax <- seq(-5, 10, length = 1000) betay <- seq(-10, 40, length = 1000) #Vectorize,能将一个不能进行向量化运算的函数进行转化,使之具备向量化运算功能。 post <- outer(alphax, betay, Vectorize(bioassay.post)) par(mfrow = c(1, 2)) c

贝叶斯估计

bioassay.post <- function(alpha = 0.1, beta = 5) {   k <- 4   x <- c(-0.86, -0.3, -0.05, 0.73)   n <- c(5, 5, 5, 5)   y <- c(0, 1, 3, 5)   prod <- 1   prod <- prod((exp(alpha + beta * x)/(1 + exp(alpha + beta * x)))^y *                  (1/(1 + exp(alpha + beta * x)))^(n - y))   return(prod) } mlpost <- function(alpha = 0.1, beta = 5) {   -log(bioassay.post(alpha, beta)) } library(bbmle) mle(mlpost)

R bayes

library(dplyr) library(tidyr) library(ggplot2) x <- seq(0, 1, 0.01) n <- 5 y <- 3 alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5) beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5) z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) z.prio <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) for (i in c(1:9)) {   z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i])   z.prio[, i] <- dbeta(x, alpha[i], beta[i]) } z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4,                  post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9) z.prio <- select(z.prio, prio.V1 = V1, prio.V2 = V2, prio.V3 = V3,                   prio.V4 = V4, prio.V5 = V5, prio.V6 = V6, prio.V7 = V7, prio.V8 = V8,                   prio.V9 = V9) z.wide <- cbind(x,z.post,z.prio) z <- gather(z.wide,bayes,value,post.V1:prio.V9)%>%mutate(group=substr(bayes,7,8))%>%mutate

《R语言与统计分析》 11章代码

#例题 11.3.1 library(dplyr) library(reshape2) library(ggplot2) x <- seq(0, 1, 0.01) n <- 5 y <- 3 alpha <- c(0.5, 0.5, 0.5, 1, 1, 1, 1.5, 1.5, 1.5) beta <- c(0.5, 1, 1.5, 0.5, 1, 1.5, 0.5, 1, 1.5) z.post <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) z.prior <- as.data.frame(matrix(0, nrow = length(x), ncol = length(alpha))) for (i in c(1:9)) {   z.post[, i] <- dbeta(x, alpha[i] + y, n - y + beta[i])   z.prior[, i] <- dbeta(x, alpha[i], beta[i]) } z.post <- select(z.post, post.V1 = V1, post.V2 = V2, post.V3 = V3, post.V4 = V4,                  post.V5 = V5, post.V6 =V6, post.V7 = V7, post.V8 = V8, post.V9 = V9) z.prior <- select(z.prior, prior.V1 = V1, prior.V2 = V2, prior.V3 = V3,                   prior.V4 = V4, prior.V5 = V5, prior.V6 = V6, prior.V7 = V7, prior.V8 = V8,                   prior.V9 = V9) z.wide <- cbind(x,z.post,z.prior) z.long1 <- melt(z.wide,id.vars=("x"),measure.vars = c(&q