################################################## ####################Random Forests################ ################################################## ##########classification################ library(randomForest) data(iris) help(iris) nrow(iris) head(iris) set.seed(71) iris.rf <- randomForest(Species ~ ., data=iris, ntree=2, importance=TRUE, proximity=TRUE) print(iris.rf) iris.rf predict(iris.rf) importance(iris.rf) round(importance(iris.rf), 2) iris.rf$proximity max(iris.rf$proximity) min(iris.rf$proximity) iris.rf$proximity[1:10,] getTree(randomForest(iris[,-5], iris[,5]), 3, labelVar=TRUE) #########Regression:######################## data(airquality) nrow(airquality) head(airquality) #regression tree install.packages("tree") library(tree) rt1 = tree(Ozone ~ .,data=airquality) rt1 plot(rt1); text(rt1) #random forest set.seed(131) newdata<-na.omit(airquality)#construct a new data without the missing value ozone.rf <- randomForest(Ozone ~ ., data=airquality, mtry=3, importance=TRUE, na.action=na.omit) print(ozone.rf) predict(ozone.rf) ## Show "importance" of variables: higher value mean more important: round(importance(ozone.rf), 2) #calculate mse yhat<-ozone.rf$predicted yhatnoob<-predict(ozone.rf,airquality[,-1]) mse<-sum(res^2)/nrow(newdata) #calculate r^2 ssto<-sum((newdata$ozone-mean(newdata$ozone))^2) sse<-sum(res^2) rsq<-(ssto-sse)/ssto rsq