xgb模型

八月 18, 2021

#GB

library(gbm)

set.seed(123)

model <- gbm(formula = Sepal.Length ~ .,distribution = "gaussian",

data = iris,n.trees = 479,

interaction.depth = 1,

shrinkage = 0.1,n.minobsinnode = 15,

bag.fraction = 1,

train.fraction = 0.5,cv.folds = 10)

#分類用 bernoulli 迴歸用gaussian(RMSE)

#XGB

data("iris")

par(mfrow=c(1,4))

boxplot(iris$Sepal.Length)$out

boxplot(iris$Sepal.Width)$out

boxplot(iris$Petal.Length)$out

boxplot(iris$Petal.Width)$out

#因為離群值不多，所以先不轉

n <- nrow(iris)

set.seed(1117)

subiris <- sample(seq_len(n), size = round(0.7 * n))

traindata <- iris[subiris,]

testdata <- iris[ - subiris,]

features <- setdiff(names(iris), "Sepal.Length")

library(vtreat)#單熱

library(dplyr)

onehot <- vtreat::designTreatmentsZ(iris, features, verbose = FALSE)

單熱 <- onehot %>%

magrittr::use_series(scoreFrame) %>%

dplyr::filter(code %in% c("clean", "lev")) %>%

magrittr::use_series(varName)

trainx <- vtreat::prepare(onehot, traindata, varRestriction = 單熱) %>% as.matrix()

trainy <- traindata$Sepal.Length

testx <- vtreat::prepare(onehot, testdata, varRestriction = 單熱) %>% as.matrix()

testy <- testdata$Sepal.Length

library(xgboost)

set.seed(123)

model <- xgboost(data = trainx,

label = trainy,

nrounds = 531,eta=0.1,

min_child_weight=3,max_depth=1,

subsample=1,colsample_bytree=0.9,

objective = "reg:squarederror",verbose = 0)

importance <- xgb.importance(model = model)

par(mfrow=c(1,1))

xgb.plot.importance(importance, top_n = 3, measure = "Gain")

hyper_grid <- expand.grid(eta = c( .05, .1),

max_depth = c( 5, 7),

min_child_weight = c( 3, 5, 7),

subsample = c(.65, .8),

colsample_bytree = c(.8, .9, 1),

nrounds = 0,

RMSE = 0)

nrow(hyper_grid)

for(i in 1:nrow(hyper_grid)) {

params <- list(

eta = hyper_grid$eta[i],

max_depth = hyper_grid$max_depth[i],

min_child_weight = hyper_grid$min_child_weight[i],

subsample = hyper_grid$subsample[i],

colsample_bytree = hyper_grid$colsample_bytree[i]

)

set.seed(123)

xgb.tune <- xgb.cv(

params = params,

data = trainx[,-c(4:6)],

label = trainy,

nrounds = 500,

nfold = 5,

objective = "reg:squarederror", # binary:logistic分類

verbose = 0,

early_stopping_rounds = 10

)

hyper_grid %>% dplyr::arrange(RMSE) %>% head(10)

set.seed(123)

model <- xgboost(data = trainx[,-c(4:6)],

label = trainy,

nrounds = 105,eta=0.05,

min_child_weight=3,max_depth=7,

subsample=0.8,colsample_bytree=1,

objective = "reg:squarederror",verbose = 0)

future <- predict(model,testx[,-c(4:6)])

future <- as.data.frame(future)

final <- cbind(future,testy,testx[,-c(4:6)])

final <- mutate(final,mape=abs(future-testy)/testy)

mean(final$mape)

hyper_grid$nrounds[i] <- which.min(xgb.tune$evaluation_log$test_rmse_mean)

hyper_grid$RMSE[i] <- min(xgb.tune$evaluation_log$test_rmse_mean)

}

搜索此博客

xuefliang

xgb模型

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下查看端口占用情况的方法

Rstudio 使用代理

xgb模型

评论

发表评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

windows 命令行下 查看端口占用情况的方法

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法