###########################################################
# Problem Set 0: Answering Dr. Kesseli's question
# Please provide 1 or 2 sentences answer to the questions
# below?
###########################################################
library(multtest); data(golub)
golubFactor <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))

meanALL <- apply(golub[,golubFactor=="ALL"], 1, mean)
meanAML <- apply(golub[,golubFactor=="AML"], 1, mean)

geneCor = apply(golub, 1, function(x) {abs(cor(x,golub.cl))}) # pearson
pearson = cor(abs(meanALL-meanAML), geneCor) # pearson
pearson
spearman = cor(abs(meanALL-meanAML), geneCor, method="spearman")
spearman

####################################################################
# 1. What am I doing here?
####################################################################
cor(sample(abs(meanALL-meanAML)), geneCor)

####################################################################
# 2. What am I doing here and why?
####################################################################
cors = replicate(10000, {cor(sample(abs(meanALL-meanAML)), geneCor)})
probDensity = hist(cors)
probDensity

####################################################################
# 3. What am I doing here and why?
####################################################################
t.test(cors, mu=pearson, alternative="less") # Extra credit!

max(abs(meanALL-meanAML))
min(abs(meanALL-meanAML))
max(geneCor)
min(geneCor)
abs(meanALL-meanAML)[which.max(geneCor)] # the gene with the largest mean expression difference has the highest correlation!!

####################################################################
# 4. What am I doing here?
####################################################################
medianAll <- apply(golub[,golubFactor=="ALL"], 1, median)
medianAml <- apply(golub[,golubFactor=="AML"], 1, median)
cor(abs(medianAll-medianAml), geneCor)

trimmedMeanAll <- apply(golub[,golubFactor=="ALL"], 1, function(x) mean(x, trim=.1))
trimmedMeanAml <- apply(golub[,golubFactor=="AML"], 1, function(x) mean(x, trim=.1))
cor(abs(trimmedMeanAll-trimmedMeanAml), geneCor)

####################################################################
# 5. What am I doing here and why?
####################################################################
o <- order(abs(meanALL-meanAML), decreasing=TRUE)
cor(abs(meanALL-meanAML)[o[1:5]], geneCor[o[1:5]]) # pearson
cor(abs(meanALL-meanAML)[o[1:10]], geneCor[o[1:10]]) # pearson
cor(abs(meanALL-meanAML)[o[1:20]], geneCor[o[1:20]]) # pearson
cor(abs(meanALL-meanAML)[o[1:50]], geneCor[o[1:50]]) # pearson
cor(abs(meanALL-meanAML)[o[1:200]], geneCor[o[1:200]]) # pearson
cor(abs(meanALL-meanAML)[o[1:500]], geneCor[o[1:500]]) # pearson
cor(abs(meanALL-meanAML)[o[1:1000]], geneCor[o[1:1000]]) # pearson
cor(abs(meanALL-meanAML)[o[1:2000]], geneCor[o[1:2000]]) # pearson
cor(abs(meanALL-meanAML)[o[1:3000]], geneCor[o[1:3000]]) # pearson
cor(abs(meanALL-meanAML), geneCor)

o <- order(abs(meanALL-meanAML), decreasing=TRUE)
cor(abs(meanALL-meanAML)[o[1:5]], geneCor[o[1:5]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:10]], geneCor[o[1:10]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:20]], geneCor[o[1:20]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:50]], geneCor[o[1:50]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:200]], geneCor[o[1:200]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:500]], geneCor[o[1:500]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:1000]], geneCor[o[1:1000]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:2000]], geneCor[o[1:2000]], method="spearman")
cor(abs(meanALL-meanAML)[o[1:3000]], geneCor[o[1:3000]], method="spearman")
cor(abs(meanALL-meanAML), geneCor, method="spearman")

####################################################################
# 6. What am I doing here and why (think "outliers") ?
####################################################################
o <- order(abs(trimmedMeanAll-trimmedMeanAml), decreasing=TRUE)
cor(abs(trimmedMeanAll-trimmedMeanAml)[o[1:100]], geneCor[o[1:100]]) # pearson
o <- order(abs(medianAll-medianAml), decreasing=TRUE)
cor(abs(medianAll-medianAml)[o[1:100]], geneCor[o[1:100]]) # pearson
o <- order(abs(meanALL-meanAML), decreasing=TRUE)
cor(abs(meanALL-meanAML)[o[1:100]], geneCor[o[1:100]]) # pearson

o <- order(abs(trimmedMeanAll-trimmedMeanAml), decreasing=TRUE)
cor(abs(trimmedMeanAll-trimmedMeanAml)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson
o <- order(abs(medianAll-medianAml), decreasing=TRUE)
cor(abs(medianAll-medianAml)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson
o <- order(abs(meanALL-meanAML), decreasing=TRUE)
cor(abs(meanALL-meanAML)[o[2901:3000]], geneCor[o[2901:3000]]) # pearson

####################################################################
# 7. Now, you're ready to answer Dr. Kesseli's question. In general,
# are the differences between the mean expression values between ALL
# and AML patients equal to (i.e. perfectly correlated with) the
# correlation weights? In general, are they well-correlated? Are
# some of the genes well-correlated? If so, which ones? Is there a
# simple relationship between (1) the differences between the mean
# expression values between the AML and ALL samples and (2) the weights?
# Does it appear that the magnitude of the expression difference has the
# same "impact" for all the genes? Is the signal-to-noise ratio the
# same for all the genes independent of the magnitude of the expression
# difference between the AML and ALL genes? Is there a potential
# artifact here? What could it be?
####################################################################

####################################################################
# Extra Credit: Now we will exerience first-hand the awesome power of
# good figures. What are these plots and what are they telling you?
####################################################################
biocLite("edgeR")
library(edgeR)


maPlot(meanALL,
       meanAML,
       normalize=FALSE,
       lowess=TRUE,  # plot Loess curve
       pch=19,       # plot solid circles
       cex=0.1,      # make solid circles very small
       cex.lab=1.5,  # make axis labels big
       logAbundance=meanAML+meanALL,  # X-values
       logFC=meanAML/meanALL,         # Y-values
       xlab="mean(AML) + mean(ALL)",
       ylab="mean(AML) / mean(ALL)")
abline(h=1, lwd=2, lty=2, col="blue")    # horizontal line
abline(v=0, lwd=3, lty=3, col="green")   # vertical line

maPlot(meanAML,
       meanALL,
       normalize=FALSE,
       lowess=TRUE,  # plot Loess curve
       pch=19,       # plot solid circles
       cex=0.1,      # make solid circles very small
       cex.lab=1.5,  # make axis labels big
       logAbundance=meanAML+meanALL,  # X-values
       logFC=meanALL/meanAML,         # Y-values
       xlab="mean(AML) + mean(ALL)",
       ylab="mean(ALL) / mean(AML)")
abline(h=1, lwd=2, lty=2, col="blue")    # horizontal line
abline(v=0, lwd=3, lty=3, col="green")   # vertical line




x = meanAML-meanALL
y = apply(golub, 1, function(x) {cor(x, golub.cl)} )
plot(x,            # X-values
     y,            # Y-values
     pch=19,       # plot solid circles
     cex=0.1,      # make solid circles very small
     cex.lab=1.5,  # make axis labels big
     xlab="mean(AML) - mean(ALL)",
     ylab="Pearson Correlation with Diagnosis")
lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y)
abline(h=0, lwd=2, lty=2, col="blue")   # horizontal line
abline(v=0, lwd=2, lty=3, col="red")    # vertical line

x = abs(meanAML-meanALL)
y = apply(golub, 1, function(x) {abs(cor(x, golub.cl))} )
plot(x,            # X-values
     y,            # Y-values
     pch=19,       # plot solid circles
     cex=0.1,      # make solid circles very small
     cex.lab=1.5,  # make axis labels big
     xlab="abs(mean(AML) - mean(ALL))",
     ylab="Absolute Value of Pearson Correlation with Diagnosis")
lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y)
abline(h=0, lwd=2, lty=2, col="blue")   # horizontal line
abline(v=0, lwd=2, lty=3, col="red")    # vertical line




x = meanAML-meanALL
y = apply(golub, 1, function(x) {cor(x, golub.cl, method="spearman")} )
plot(x,            # X-values
     y,            # Y-values
     pch=19,       # plot solid circles
     cex=0.1,      # make solid circles very small
     cex.lab=1.5,  # make axis labels big
     xlab="mean(AML) - mean(ALL)",
     ylab="Spearman Correlation with Diagnosis")
lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y)
abline(h=0, lwd=2, lty=2, col="blue")   # horizontal line
abline(v=0, lwd=2, lty=3, col="red")    # vertical line

x = abs(meanAML-meanALL)
y = apply(golub, 1, function(x) {abs(cor(x, golub.cl, method="spearman"))} )
plot(x,            # X-values
     y,            # Y-values
     pch=19,       # plot solid circles
     cex=0.1,      # make solid circles very small
     cex.lab=1.5,  # make axis labels big
     xlab="abs(mean(AML) - mean(ALL))",
     ylab="Absolute Value of Spearman Correlation with Diagnosis")
lines(lowess(x,y), col="orange", lwd=4) # lowess line (x,y)
abline(h=0, lwd=2, lty=2, col="blue")   # horizontal line
abline(v=0, lwd=2, lty=3, col="red")    # vertical line