############################################################################### ############################################################################### # Chapter 8 Supplemental ############################################################################### ############################################################################### # Example 5 Using Random Forests # Random Forests - Application to the Chiaretti (2004) data. With respect to... library(randomForest) library("hgu95av2.db"); library(ALL); data(ALL) ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")] pano <- apply(exprs(ALLB123), 1, function(x) anova(lm(x ~ ALLB123$BT))$Pr[1]) names <- featureNames(ALL)[pano<0.000001] symb <- mget(names, env = hgu95av2SYMBOL) ALLBTnames <- ALLB123[names, ] probedat <- as.matrix(exprs(ALLBTnames)) row.names(probedat)<-unlist(symb) diagnosed <- factor(ALLBTnames$BT) rForest <- randomForest(diagnosed ~ ., data = data.frame(t(probedat)), ntree=1000, norm.votes=FALSE) print(rForest) importance(rForest) plot(rForest) legend("topright", legend=colnames(rForest$err.rate), col=1:4, pch=19) # Now try multiple forests recursively. Notice that no two random foests are exactly the same # Can actually create multiple random forests and combine them rf1 <- randomForest(diagnosed ~ ., data = data.frame(t(probedat)), ntree=1000, norm.votes=FALSE) rf2 <- randomForest(diagnosed ~ ., data = data.frame(t(probedat)), ntree=1000, norm.votes=FALSE) rf3 <- randomForest(diagnosed ~ ., data = data.frame(t(probedat)), ntree=1000, norm.votes=FALSE) rf.all <- randomForest::combine(rf1, rf2, rf3) print(rf.all)