########################################################### # Problem Set 0: Answering Dr. Kesseli's question # Please provide 1 or 2 sentences answer to the questions # below? ########################################################### library(multtest); data(golub) golubFactor <- factor(golub.cl,levels=0:1, labels= c("ALL","AML")) meanALL <- apply(golub[,golubFactor=="ALL"], 1, mean) meanAML <- apply(golub[,golubFactor=="AML"], 1, mean) geneCor = apply(golub, 1, function(x) {abs(cor(x,golub.cl))}) # pearson pearson = cor(abs(meanALL-meanAML), geneCor) # pearson pearson spearman = cor(abs(meanALL-meanAML), geneCor, method="spearman") spearman #################################################################### # 1. What am I doing here? #################################################################### cor(sample(abs(meanALL-meanAML)), geneCor) #################################################################### # 2. What am I doing here and why? #################################################################### cors = replicate(10000, {cor(sample(abs(meanALL-meanAML)), geneCor)}) probDensity = hist(cors) probDensity #################################################################### # 3. What am I doing here and why? #################################################################### t.test(cors, mu=pearson, alternative="less") # Extra credit! max(abs(meanALL-meanAML)) min(abs(meanALL-meanAML)) max(geneCor) min(geneCor) abs(meanALL-meanAML)[which.max(geneCor)] # the gene with the largest mean expression difference has the highest correlation!! #################################################################### # 4. What am I doing here? #################################################################### medianAll <- apply(golub[,golubFactor=="ALL"], 1, median) medianAml <- apply(golub[,golubFactor=="AML"], 1, median) cor(abs(medianAll-medianAml), geneCor) trimmedMeanAll <- apply(golub[,golubFactor=="ALL"], 1, function(x) mean(x, trim=.1)) trimmedMeanAml <- apply(golub[,golubFactor=="AML"], 1, function(x) mean(x, trim=.1)) cor(abs(trimmedMeanAll-trimmedMeanAml), geneCor) #################################################################### # 5. What am I doing here and why? #################################################################### o <- order(abs(meanALL-meanAML), decreasing=TRUE) cor(abs(meanALL-meanAML)[o[1:5]], geneCor[o[1:5]]) # pearson cor(abs(meanALL-meanAML)[o[1:10]], geneCor[o[1:10]]) # pearson cor(abs(meanALL-meanAML)[o[1:20]], geneCor[o[1:20]]) # pearson cor(abs(meanALL-meanAML)[o[1:50]], geneCor[o[1:50]]) # pearson cor(abs(meanALL-meanAML)[o[1:200]], geneCor[o[1:200]]) # pearson cor(abs(meanALL-meanAML)[o[1:500]], geneCor[o[1:500]]) # pearson cor(abs(meanALL-meanAML)[o[1:1000]], geneCor[o[1:1000]]) # pearson cor(abs(meanALL-meanAML)[o[1:2000]], geneCor[o[1:2000]]) # pearson cor(abs(meanALL-meanAML)[o[1:3000]], geneCor[o[1:3000]]) # pearson cor(abs(meanALL-meanAML), geneCor) o <- order(abs(meanALL-meanAML), decreasing=TRUE) cor(abs(meanALL-meanAML)[o[1:5]], geneCor[o[1:5]], method="spearman") cor(abs(meanALL-meanAML)[o[1:10]], geneCor[o[1:10]], method="spearman") cor(abs(meanALL-meanAML)[o[1:20]], geneCor[o[1:20]], method="spearman") cor(abs(meanALL-meanAML)[o[1:50]], geneCor[o[1:50]], method="spearman") cor(abs(meanALL-meanAML)[o[1:200]], geneCor[o[1:200]], method="spearman") cor(abs(meanALL-meanAML)[o[1:500]], geneCor[o[1:500]], method="spearman") cor(abs(meanALL-meanAML)[o[1:1000]], geneCor[o[1:1000]], method="spearman") cor(abs(meanALL-meanAML)[o[1:2000]], geneCor[o[1:2000]], method="spearman") cor(abs(meanALL-meanAML)[o[1:3000]], geneCor[o[1:3000]], method="spearman") cor(abs(meanALL-meanAML), geneCor, method="spearman") #################################################################### # 6. What am I doing here and why (think "outliers") ? #################################################################### o <- order(abs(trimmedMeanAll-trimmedMeanAml), decreasing=TRUE) cor(abs(trimmedMeanAll-trimmedMeanAml)[o[1:100]], geneCor[o[1:100]]) # pearson o <- order(abs(medianAll-medianAml), decreasing=TRUE) cor(abs(medianAll-medianAml)[o[1:100]], geneCor[o[1:100]]) # pearson o <- order(abs(meanALL-meanAML), decreasing=TRUE) cor(abs(meanALL-meanAML)[o[1:100]], geneCor[o[1:100]]) # pearson o <- order(abs(trimmedMeanAll-trimmedMeanAml), decreasing=TRUE) cor(abs(trimmedMeanAll-trimmedMeanAml)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson o <- order(abs(medianAll-medianAml), decreasing=TRUE) cor(abs(medianAll-medianAml)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson o <- order(abs(meanALL-meanAML), decreasing=TRUE) cor(abs(meanALL-meanAML)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson #################################################################### # 7. Now, you're ready to answer Dr. Kesseli's question. In general, # are the differences between the mean expression values between ALL # and AML patients equal to (i.e. perfectly correlated with) the # correlation weights? In general, are they well-correlated? Are # some of the genes well-correlated? If so, which ones? Is there a # simple relationship between (1) the differences between the mean # expression values between the AML and ALL samples and (2) the weights? # Does it appear that the magnitude of the expression difference has the # same "impact" for all the genes? Is the signal-to-noise ratio the # same for all the genes independent of the magnitude of the expression # difference between the AML and ALL genes? Is there a potential # artifact here? What could it be? #################################################################### #################################################################### # Extra Credit: Now we will exerience first-hand the awesome power of # good figures. What are these plots and what are they telling you? #################################################################### biocLite("edgeR") library(edgeR) maPlot(meanALL, meanAML, normalize=FALSE, lowess=TRUE, # plot Loess curve pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big logAbundance=meanAML+meanALL, # X-values logFC=meanAML/meanALL, # Y-values xlab="mean(AML) + mean(ALL)", ylab="mean(AML) / mean(ALL)") abline(h=1, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=3, lty=3, col="green") # vertical line maPlot(meanAML, meanALL, normalize=FALSE, lowess=TRUE, # plot Loess curve pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big logAbundance=meanAML+meanALL, # X-values logFC=meanALL/meanAML, # Y-values xlab="mean(AML) + mean(ALL)", ylab="mean(ALL) / mean(AML)") abline(h=1, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=3, lty=3, col="green") # vertical line x = meanAML-meanALL y = apply(golub, 1, function(x) {cor(x, golub.cl)} ) plot(x, # X-values y, # Y-values pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big xlab="mean(AML) - mean(ALL)", ylab="Pearson Correlation with Diagnosis") lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y) abline(h=0, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=2, lty=3, col="red") # vertical line x = abs(meanAML-meanALL) y = apply(golub, 1, function(x) {abs(cor(x, golub.cl))} ) plot(x, # X-values y, # Y-values pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big xlab="abs(mean(AML) - mean(ALL))", ylab="Absolute Value of Pearson Correlation with Diagnosis") lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y) abline(h=0, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=2, lty=3, col="red") # vertical line x = meanAML-meanALL y = apply(golub, 1, function(x) {cor(x, golub.cl, method="spearman")} ) plot(x, # X-values y, # Y-values pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big xlab="mean(AML) - mean(ALL)", ylab="Spearman Correlation with Diagnosis") lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y) abline(h=0, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=2, lty=3, col="red") # vertical line x = abs(meanAML-meanALL) y = apply(golub, 1, function(x) {abs(cor(x, golub.cl, method="spearman"))} ) plot(x, # X-values y, # Y-values pch=19, # plot solid circles cex=0.1, # make solid circles very small cex.lab=1.5, # make axis labels big xlab="abs(mean(AML) - mean(ALL))", ylab="Absolute Value of Spearman Correlation with Diagnosis") lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y) abline(h=0, lwd=2, lty=2, col="blue") # horizontal line abline(v=0, lwd=2, lty=3, col="red") # vertical line