lasso回归

 library(glmnet)

library(tidyverse)

data("iris")

iris <- iris[,-c(5)]


#自变量重要性

#ridge 自变量重要性变不为0 ,lasso 自变量重要性变为0


par(mfrow=c(1,3))

boxplot((iris$Sepal.Width))$out

boxplot(iris$Petal.Length)$out

boxplot(iris$Petal.Width)$out


set.seed(1117)

subiris <- sample(seq_len(nrow(iris)),size = round(0.7*nrow(iris)))

traindata <- iris[subiris,] %>% as.matrix()

testdata <- iris[-subiris,] %>% as.matrix()

trainx <- traindata[,c(2:4)]

trainy <- traindata[,c(1)]

testx <- testdata[,c(2:4)]

testy <- testdata[,c(1)]


ridge <- cv.glmnet(x=trainx,y=trainy,alpha=0)

#alpha=0为ridge,=1为lasso,k=10,交叉验证


#視覺化&選自變量

coef(ridge, s = "lambda.min") %>%

  as.matrix() %>%

  as.data.frame() %>%

  add_rownames(var = "var") %>%

  rename(coef=`1`) %>% 

  filter(var != "(Intercept)") %>% #剔除截距項

  top_n(3, wt = coef) %>%

  ggplot(aes(coef, reorder(var, coef))) +

  geom_bar(stat = "identity", width=0.2,

           color="blue", fill=rgb(0.1,0.4,0.5,0.7))+

  xlab("Coefficient") +

  ylab(NULL)


#預測

future <- predict(ridge,newx = testx, s = ridge$lambda.min)

future <- as.data.frame(future)

final <- cbind(future,testy) %>% data.frame()

final <- mutate(final,mape=abs(X1-testy)/testy)

mean(final$mape)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理