Deep learning in R with h2o
library(MASS)
library(h2o)
set.seed(123)
DataFrame <- Boston
#Structure of Boston
str(DataFrame)
#Histgram of the Boston data
hist(DataFrame$medv)
#Check the dimention of this data frame
dim(DataFrame)
head(DataFrame)
#min and max value for each of the variable
apply(DataFrame,2,range)
#scale function will give mean=0 and standard deviation=1 for each variable
maxValue <- apply(DataFrame,2,max)
minValue <- apply(DataFrame,2,min)
DataFrame <- as.data.frame(scale(DataFrame,center = minValue,scale = maxValue-minValue))
#h2o initialization
h2o.init(ip="localhost",port = 54321,max_mem_size = "3000m")
#Defining x and y
y <- "medv"
x <- setdiff(colnames(DataFrame),y)
#create the train and test data set
ind <- sample(1:nrow(DataFrame),400)
trainDF <- DataFrame[ind,]
testDF <- DataFrame[-ind,]
#Fitting the model
model <- h2o.deeplearning(x=x,
y=y,
seed = 1234,
training_frame = as.h2o(trainDF),
nfolds = 3,
stopping_rounds = 7,
epochs = 400,
overwrite_with_best_model = T,
activation = "Tanh",
input_dropout_ratio = 0.1,
hidden = c(10,10),
l1=6e-4,
loss = "Automatic",
distribution = "AUTO",
stopping_metric = "MSE")
plot(model)
#predictions
predictions <- as.data.frame(predict(model,as.h2o(testDF)))
str(predictions)
#MSE
sum((predictions$predict-testDF$medv)^2/nrow(testDF))
#plotting actual vs predicted values
plot(testDF$medv,predictions$predict,col="blue",main = 'Real vs Predicted',
pch=1,cex=0.9,type = 'p',xlab = 'Actual',ylab = 'Predicted')
abline(0,1,col="black")
h2o.shutdown(prompt = F)
library(h2o)
set.seed(123)
DataFrame <- Boston
#Structure of Boston
str(DataFrame)
#Histgram of the Boston data
hist(DataFrame$medv)
#Check the dimention of this data frame
dim(DataFrame)
head(DataFrame)
#min and max value for each of the variable
apply(DataFrame,2,range)
#scale function will give mean=0 and standard deviation=1 for each variable
maxValue <- apply(DataFrame,2,max)
minValue <- apply(DataFrame,2,min)
DataFrame <- as.data.frame(scale(DataFrame,center = minValue,scale = maxValue-minValue))
#h2o initialization
h2o.init(ip="localhost",port = 54321,max_mem_size = "3000m")
#Defining x and y
y <- "medv"
x <- setdiff(colnames(DataFrame),y)
#create the train and test data set
ind <- sample(1:nrow(DataFrame),400)
trainDF <- DataFrame[ind,]
testDF <- DataFrame[-ind,]
#Fitting the model
model <- h2o.deeplearning(x=x,
y=y,
seed = 1234,
training_frame = as.h2o(trainDF),
nfolds = 3,
stopping_rounds = 7,
epochs = 400,
overwrite_with_best_model = T,
activation = "Tanh",
input_dropout_ratio = 0.1,
hidden = c(10,10),
l1=6e-4,
loss = "Automatic",
distribution = "AUTO",
stopping_metric = "MSE")
plot(model)
#predictions
predictions <- as.data.frame(predict(model,as.h2o(testDF)))
str(predictions)
#MSE
sum((predictions$predict-testDF$medv)^2/nrow(testDF))
#plotting actual vs predicted values
plot(testDF$medv,predictions$predict,col="blue",main = 'Real vs Predicted',
pch=1,cex=0.9,type = 'p',xlab = 'Actual',ylab = 'Predicted')
abline(0,1,col="black")
h2o.shutdown(prompt = F)
评论
发表评论