决策树
library(naniar)
data("iris")
iris <- subset(iris,Species!="setosa")
any_na(iris)
n <- nrow(iris)
set.seed(1117)
new <- iris[sample(n),]
t_idx <- sample(seq_len(n), size = round(0.7 * n))
traindata <- iris[t_idx,]
testdata <- iris[ - t_idx,]
library(rpart)
library(rpart.plot)
dtreeM <- rpart(Species ~ ., data = traindata,
method = "class") #數值型改anova
rpart.plot(dtreeM,digits=2,varlen=20)#圖
printcp(dtreeM) #最佳cp=min(xerror) 每次分割能改善模型
剪枝 <- prune(dtreeM,cp = dtreeM$cptable[which.min(dtreeM$cptable[,"xerror"]),"CP"])
future <- predict(剪枝 , testdata, type = "class")
future <- as.data.frame(future)
final <- cbind(future,testdata)
confusion <- table(final$Species,final$future, dnn = c("实际", "预测"))
confusion
accuracy <- sum(diag(confusion)) / sum(confusion)
accuracy
评论
发表评论