决策树

library(naniar) 

data("iris")

iris <- subset(iris,Species!="setosa")

any_na(iris)

n <- nrow(iris)


set.seed(1117)

new <- iris[sample(n),]

t_idx <- sample(seq_len(n), size = round(0.7 * n))

traindata <- iris[t_idx,]

testdata <- iris[ - t_idx,]

library(rpart) 

library(rpart.plot)

dtreeM <- rpart(Species ~ ., data = traindata, 

                method = "class") #數值型改anova

rpart.plot(dtreeM,digits=2,varlen=20)#圖


printcp(dtreeM) #最佳cp=min(xerror) 每次分割能改善模型

剪枝 <- prune(dtreeM,cp = dtreeM$cptable[which.min(dtreeM$cptable[,"xerror"]),"CP"])

future <- predict(剪枝 , testdata, type = "class")

future <- as.data.frame(future)

final <- cbind(future,testdata)

confusion <- table(final$Species,final$future, dnn = c("实际", "预测"))

confusion

accuracy <- sum(diag(confusion)) / sum(confusion)

accuracy

 

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理