lasso回归
library(glmnet)
library(tidyverse)
data("iris")
iris <- iris[,-c(5)]
#自变量重要性
#ridge 自变量重要性变不为0 ,lasso 自变量重要性变为0
par(mfrow=c(1,3))
boxplot((iris$Sepal.Width))$out
boxplot(iris$Petal.Length)$out
boxplot(iris$Petal.Width)$out
set.seed(1117)
subiris <- sample(seq_len(nrow(iris)),size = round(0.7*nrow(iris)))
traindata <- iris[subiris,] %>% as.matrix()
testdata <- iris[-subiris,] %>% as.matrix()
trainx <- traindata[,c(2:4)]
trainy <- traindata[,c(1)]
testx <- testdata[,c(2:4)]
testy <- testdata[,c(1)]
ridge <- cv.glmnet(x=trainx,y=trainy,alpha=0)
#alpha=0为ridge,=1为lasso,k=10,交叉验证
#視覺化&選自變量
coef(ridge, s = "lambda.min") %>%
as.matrix() %>%
as.data.frame() %>%
add_rownames(var = "var") %>%
rename(coef=`1`) %>%
filter(var != "(Intercept)") %>% #剔除截距項
top_n(3, wt = coef) %>%
ggplot(aes(coef, reorder(var, coef))) +
geom_bar(stat = "identity", width=0.2,
color="blue", fill=rgb(0.1,0.4,0.5,0.7))+
xlab("Coefficient") +
ylab(NULL)
#預測
future <- predict(ridge,newx = testx, s = ridge$lambda.min)
future <- as.data.frame(future)
final <- cbind(future,testy) %>% data.frame()
final <- mutate(final,mape=abs(X1-testy)/testy)
mean(final$mape)
评论
发表评论