############################################################################### # Answers to exercises in Chapter 6: Micro Array Analysis. ############################################################################### # 1. Gene filtering on normality per group of B-cell ALL patients. #a library("ALL"); library("limma"); library("annaffy"); library(hgu95av2.db) library("genefilter") data(ALL, package = "ALL") ALLB <- ALL[,ALL$BT %in% c("B1","B2","B3","B4")] f1 <- function(x) (shapiro.test(x)$p.value > 0.05) sel1 <- genefilter(exprs(ALLB[,ALLB$BT=="B1"]), filterfun(f1)) sel2 <- genefilter(exprs(ALLB[,ALLB$BT=="B2"]), filterfun(f1)) sel3 <- genefilter(exprs(ALLB[,ALLB$BT=="B3"]), filterfun(f1)) sel4 <- genefilter(exprs(ALLB[,ALLB$BT=="B4"]), filterfun(f1)) #b selected <- sel1 & sel2 & sel3 & sel4 sum(selected) #c library(limma) selMatrix <- matrix(as.integer(c(sel2,sel3,sel4)),ncol = 3,byrow=FALSE) colnames(selMatrix) <- c("sel2","sel3","sel4") vc <- vennCounts(selMatrix, include="both") vennDiagram(vc) # 2. Analysis of gene expressions of B-cell ALL patients using Limma. data(ALL) ALLB <- ALL[,ALL$BT %in% c("B1","B2","B3","B4")] design.matrix <- model.matrix(~0 + factor(ALLB$BT)) colnames(design.matrix) <- c("B1","B2","B3","B4") cont.matrix <- makeContrasts(B2-B1,B3-B2,B4-B3,levels=factor(ALLB$BT)) fit <- lmFit(ALLB, design.matrix) fit1 <- contrasts.fit(fit, cont.matrix) fit1 <- eBayes(fit1) topTable(fit1, coef=2,number=5,adjust.method="fdr") tab <- topTable(fit1, coef=2, number=20, adjust.method="fdr") annTable <- aafTableAnn(as.character(tab$ID), "hgu95av2.db", aaf.handler()) saveHTML(annTable, "ALLB1234.html", title = "B-cell ALL of stage 1,2,3,4") # 3. Finding a row number: grep("1389_at",row.names(exprs(ALL))) # 4. Remission (genezing) from acute lymphocytic leukemia (ALL). #a library(ALL); data(ALL) table(pData(ALL)$remission) remis <- which(pData(ALL)$remission %in% c("CR","REF")) length(remis) #b ALLremis <- ALL[,remis] remisFactor <-factor(pData(ALLremis)$remission) tTest.pValues <- apply(exprs(ALLremis),1,function(x) t.test(x ~ remisFactor)$p.value) sum(tTest.pValues<0.001) #c library(hgu95av2.db) names <- featureNames(ALLremis)[tTest.pValues < .001] ALLremissel<- ALLremis[names,] symb <- mget(names, env = hgu95av2SYMBOL) genenames <- mget(names,hgu95av2GENENAME) listofgenenames <- as.list(hgu95av2GENENAME) unlistednames <- unlist(listofgenenames[names],use.names=F) #d grep("p53",unlistednames) #e length(unique(unlistednames)) # 5. Remission achieved. #a library(ALL); data(ALL) ALLCRREF <- ALL[,which(ALL$CR %in% c("CR","REF"))] #b tTest.pValues <- apply(exprs(ALLCRREF),1,function(x) t.test(x ~ ALLCRREF$CR)$p.value) sum(tTest.pValues < 0.0001) #c affynames <- featureNames(ALLCRREF)[tTest.pValues < .0001] affynames #d genenames <- mget(affynames, env = hgu95av2GENENAME) sig = length(genenames) sig #e sig.onco = length(grep("oncogene",genenames)) sig.onco #f affytot <- unique(featureNames(ALLCRREF)) genenamestot <- mget(affytot, env = hgu95av2GENENAME) total = length(genenamestot) total total.onco = length(grep("oncogene",genenamestot)) total.onco dat <- matrix(c(sig.onco,sig-sig.onco,total.onco,total-total.onco),2,byrow=TRUE) fisher.test(dat) # 6. Gene filtering of ALL data. library("ALL") data("ALL") table(ALL$BT) ALL.T23 <- ALL[,which(ALL$BT %in% c("T2","T3"))] library(genefilter) f1 <- function(x) (shapiro.test(x)$p.value > 0.05) f2 <- function(x) (t.test(x ~ ALL.T23$BT)$p.value < 0.05) sel1 <- genefilter(exprs(ALL.T23[,ALL.T23$BT=="T2"]), filterfun(f1)) sel2 <- genefilter(exprs(ALL.T23[,ALL.T23$BT=="T3"]), filterfun(f1)) sel3 <- genefilter(exprs(ALL.T23), filterfun(f2)) sum(sel1 & sel2 & sel3) sum(sel1 & sel2) # 7. Stages of B-cell ALL in the ALL data. library("ALL") library("limma") allB <- ALL[,which(ALL$BT %in% c("B1","B2","B3","B4"))] facB123 <- factor(allB$BT) cont.matrix <- makeContrasts(B2-B1,B3-B2,B4-B3, levels=facB123) design.matrix <- model.matrix(~ 0 + facB123) colnames(design.matrix) <- c("B1","B2","B3","B4") fit <- lmFit(allB, design.matrix) fit1 <- contrasts.fit(fit, cont.matrix) fit1 <- eBayes(fit1) topTable(fit1, coef=2,5,adjust.method="BH") sum(fit1$p.value < 0.05) # 8. Analysis of public micro array data. library(GEOquery); library(limma); library(hgu95av2.db); library(annaffy) gds486 <- getGEO("GDS486"); eset486 <- GDS2eSet(gds486,do.log2=TRUE) nrmissing <- apply(exprs(eset486), 1, function(x) sum(is.na(x)) ) eset486sel <- eset486[nrmissing < 1,] pval486sel <- apply(exprs(eset486sel), 1, function(x) t.test(x ~ eset486sel$cell.line)$p.value) pval486 <- nrmissing pval486[pval486==0] <- pval486sel pval486[pval486 > 1] <- 1 gds711 <- getGEO("GDS711"); eset711 <- GDS2eSet(gds711,do.log2=TRUE) nrmissing <- apply(exprs(eset711), 1, function(x) sum(is.na(x)) ) eset711sel <- eset711[nrmissing < 1,] panova711sel <- apply(exprs(eset711sel), 1, function(x) anova(lm(x ~ eset711sel$disease.state))$Pr[1]) pval711sel <- panova711sel pval711 <- nrmissing pval711[pval711==0] <- pval711sel pval711[pval711 > 1] <- 1 gds2126 <- getGEO("GDS2126"); eset2126 <- GDS2eSet(gds2126,do.log2=TRUE) nrmissing <- apply(exprs(eset2126), 1, function(x) sum(is.na(x)) ) eset2126sel <- eset2126[nrmissing < 1,] pval2126sel <- apply(exprs(eset2126sel), 1, function(x) anova(lm(x ~ eset2126sel$disease.state))$Pr[1]) pval2126 <- nrmissing pval2126[pval2126==0] <- pval2126sel pval2126[pval2126 > 1] <- 1 sumpval <- pval486 + pval711 + pval2126 o <- order(sumpval,decreasing=FALSE) genenames <- names(sumpval[o[1:20]]) symb <- "aap" for (i in 1:20) { symb[i] <- get(genenames[i], env = hgu95av2SYMBOL) } symb library("KEGG"); library("GO"); library("annaffy") atab <- aafTableAnn(genenames, "hgu95av2", aaf.handler() ) saveHTML(atab, file="ThreeExperiments.html") # 9. Analysis of genes from a GO search. library(ALL) data(ALL,package="ALL") ALLP <- ALL[,ALL$mol.biol %in% c("ALL1/AF4","BCR/ABL","NEG")] neg <- which(ALLP$mol.biol=="NEG") aal1 <- which(ALLP$mol.biol=="ALL1/AF4") bcr <- which(ALLP$mol.biol=="BCR/ABL") orderpat <- c(neg,aal1,bcr) ALLP <- ALL[,ALL$mol.biol %in% c("ALL1/AF4","BCR/ABL","NEG")] ALLPo <- ALLP[,c(neg,aal1,bcr)] facnr <- c(rep(1,74),rep(2,10),rep(3,37)) nab.factor <- factor(facnr,levels=1:3, labels= c("NEG","ALL1/AF4","BCR/ABL")) anova.pValues <- apply(exprs(ALLPo), 1, function(x) anova(lm(x ~ nab.factor))$Pr[1]) library(GO); library(annotate); library(hgu95av2.db) GOTerm2Tag <- function(term) { GTL <- eapply(GOTERM, function(x) {grep(term, x@Term, value=TRUE)}) Gl <- sapply(GTL, length) names(GTL[Gl > 0]) } GOTerm2Tag("protein-tyrosine kinase") probes <- hgu95av2GO2ALLPROBES$"GO:0004713" sum(anova.pValues[probes] < 0.05) sum(anova.pValues[probes] < 1) sum(anova.pValues < 0.05) sum(anova.pValues < 1) fisher.test(matrix(c(12625, 2581,320,86),2,byrow=TRUE)) # the odds ratio differs significantly from zero, they are more significant