############################################## # Looping in R ############################################## # ALWAYS, ALWAYS, ALWAYS use braces to denote the blocks of code that is being looped over!!! # Using braces to denote your code blocks makes your code more readable and less bug prone! # for(variable in sequence) { # statements # } for (index in 1:5) { print(index) } stopCondition = 4 for (index in 1:10) { if (index < stopCondition) { print(index) } else { stop("values need to be < ",stopCondition) } } # while(condition) { # statements # } z <- 0 while(z < 5) { z <- z + 2 print(z) } ############################################## # Heatmaps in R ############################################## library(multtest); data(golub) # No scaling and no dendrogram (clustering) heatmap(golub, Rowv = NA, Colv = NA, scale = "none", col=greenred(75)) # Row scaling and no dendrogram (clustering) heatmap(golub, Rowv = NA, Colv = NA, scale = "row", col=greenred(75)) # No scaling and row dendrogram (clustering) heatmap(golub, Colv = NA, scale = "none", cexRow=0.1) # Order rows by decreasing difference between the mean expression between ALL and AML patients gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML")) mall <- apply(golub[,gol.fac=="ALL"], 1, mean) maml <- apply(golub[,gol.fac=="AML"], 1, mean) o <- order(abs(mall-maml), decreasing=TRUE) # Create a set of biomarkers out of the top 50 biomarkers50 = golub[o[1:50],] dim(biomarkers50) # No scaling and no dendrogram (clustering) heatmap(biomarkers50, Rowv = NA, Colv = NA, scale = "none") # Row scaling and no dendrogram (clustering) heatmap(biomarkers50, Rowv = NA, Colv = NA, scale = "row") # Create a red-green heatmap heatmap(biomarkers50, Rowv = NA, Colv = NA, scale = "none", col=greenred(75)) # Use heatmap.2 in order to add a legend or a histogram or other fancy stuff library("gplots") # No scaling and no dendrogram (clustering) heatmap.2(biomarkers50, Rowv = NA, Colv = NA, scale = "none", col=greenred(75), dendrogram="none", key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5) # Row scaling and no dendrogram (clustering) heatmap.2(biomarkers50, Rowv = NA, Colv = NA, scale = "row", col=greenred(75), dendrogram="none", key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5) # Row scaling and row and column dendrograms (clustering) heatmap.2(biomarkers50, scale = "row", col=greenred(75), dendrogram="both", key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5) # Question 1: If we choose the default dendrogram distance function on the top 50 difference-between-the-mean-expression values as our # classifier, what is our misclassification rate? # Answer: 3 out of 11 - We misclassify patients 28, 32, 35. # Create a new set of biomarkers out of the top 100 biomarkers100 = golub[o[1:100],] # Row scaling and row and column dendrograms (clustering) heatmap.2(biomarkers100, scale = "row", col=greenred(75), dendrogram="both", key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.3) # Question 2: What is our misclassification rate now? # Question 3: For this data and our primitive classifier, is using 100 markers better than using 50?