############################################################################### # Answers to exercises in Chapter 3: Important Distributions ############################################################################### # 1. Binomial # (a) P(X = 24) = 0.1046692, P(X <= 24) = 0.5557756, and P(X >= 30) = 0.0746237. dbinom(24, 60, 0.4) pbinom(24, 60, 0.4) sum(dbinom(30:60, 60, 0.4)) # (b) P(20 <= X <= 30) = 0.83856, P(X >= 20) = 0.8830403. sum(dbinom(20:30, 60, 0.4)) sum(dbinom(20:60, 60, 0.4)) # (c) P(20 <= X or X >= 40) = 0.8830403, and P(20 <= X and X >= 10) = 0.999975. sum(dbinom(20:60, 60, 0.4)) sum(dbinom(10:60, 60, 0.4)) # (d) E(X) = 24, var(X) = 3.794733 60 * 0.4 # np sqrt(60 * 0.4 * (1 - 0.4)) # sd = sqrt(variance) = sqrt(np(1-p))) # (e) x_0.025 = 17, x_0.5 = 24, and x_0.975 = 32. qbinom(0.025, 60, 0.4) qbinom(0.5, 60, 0.4) qbinom(0.975, 60, 0.4) # 2. Standard Normal. # (a) P(1.6 < Z < 2.3) = 0.04408. pnorm(2.3, 0, 1) - pnorm(1.6, 0, 1) # (b) P(Z < 1.64) = 0.9495. pnorm(1.64, 0, 1) # (c) P(-1.64 < Z < -1.02) = 0.1034. pnorm(-1.02, 0, 1) - pnorm(-1.64, 0, 1) # (d) P(0 < Z < 1.96) = 0.4750. pnorm(1.96, 0, 1) - pnorm(0, 0, 1) # (e) P(-1.96 < Z < 1.96) = 0.9500. pnorm(1.96, 0, 1) - pnorm(-1.96, 0, 1)\ # (f) z_0.025 = -1.959964, z_0.05 = -1.644854, z_0.5 = 0, z_0.95 = 1.644854, and z_0.975 = 1.959964. qnorm(0.025, 0, 1) qnorm(0.05, 0, 1) qnorm(0.5, 0, 1) qnorm(0.95, 0, 1) qnorm(0.975, 0, 1) # 3. Normal. # (a) P(X < 12) = 0.8413. pnorm(12, 10, 2) # (b) P(X > 8) = 0.8413. 1 - pnorm(8, 10, 2) # (c) P(9 < X < 10.5) = 0.2917. pnorm(10.5, 10, 2) - pnorm(9, 10, 2) # (d) The quantiles x_0.025 = 6.080072, x_0.5 = 10, and x_0.975 = 13.91993. qnorm(0.025, 10, 2) qnorm(0.5, 10, 2) qnorm(0.975, 10, 2) # 4. T-distribution. # (a) P(T_6 < 1) = 0.8220412. pt(1, 6) # (b) P(T_6 > 2) = 0.04621316. 1 - pt(2, 6) # (c) P(-1 < T_6 < 1) = 0.6440823. pt(1, 6) - pt(-1, 6) # (d) P(-2 < T_6 < 2) = 0.9075737. pt(2, 6) - pt(-2, 6) # (e) t_0.025 = -2.446912, t_0.5 = 0, and t_0.975 = 2.446912. qt(0.025, 6) qt(0.5, 6) qt(0.975, 6) # 5. F-distribution. # (a) P(F_8,5 < 3) = 0.8792198. pf(3, 8, 5) # (b) P(F_8,5 > 4) = 0.07169537. 1 - pf(4, 8, 5) # (c) P(1 < F_8,5 < 6) = 0.4931282. pf(6, 8, 5) - pf(1, 8, 5) # (d) The quantiles f_0.025 = 0.2075862, f_0.5 = 1.054510, and f_0.975 = 6.757172. qf(0.025, 8, 5) qf(0.5, 8, 5) qf(0.975, 8, 5) # 6. Chi-squared distribution. # (a) P(X^2_10 < 3) = 0.01857594. pchisq(3, 10) # (b) P(X^2_10 > 4) = 0.947347. 1 - pchisq(4, 10) # (c) P(1 < X^2_10 < 6) = 0.1845646. pchisq(6, 10) - pchisq(1, 10) # (d) The quantiles g_0.025 = 3.246973, g_0.5 = 9.341818, and g_0.975 = 20.48318. qchisq(0.025, 10) qchisq(0.5, 10) qchisq(0.975, 10) # 7. MicroRNA. # (a) P(X = 14) = dbinom(14, 20, 0.7) = 0.191639. dbinom(14, 20, 0.7) # (b) P(X <= 14) = pbinom(14, 20, 0.7) = 0.5836292. pbinom(14, 20, 0.7) # (c) P(X > 10) = 1 - P(X <= 10) = 1 - pbinom(10, 20, 0.7) = 0.9520381. 1 - pbinom(10, 20, 0.7) # (d) P(10 <= X <= 15) = P(X <= 15)-P(X <= 9) = pbinom(15, 20, 0.7) - pbinom(9, 20, 0.7) = 0.7453474. pbinom(15, 20, 0.7) - pbinom(9, 20, 0.7) # (e) 20 * 0.7 # np # (f) sqrt(20 * 0.7 * (1 - 0.7)) # sd = sqrt(variance) = sqrt(np(1-p))) # 8. Zyxin. # (a) P(X <= 1.2) =pnorm(1.2,1.6,0.4)=0.1586553. pnorm(1.2,1.6,0.4) # (b) P(1.2 <= X <= 2.0) =pnorm(2.0,1.6,0.4) - pnorm(1.2,1.6,0.4)=0.6826895. pnorm(2.0,1.6,0.4) - pnorm(1.2,1.6,0.4) # (c) P(2.4 <= X <= 0.8) =pnorm(2.4,1.6,0.4) - pnorm(0.8,1.6,0.4)=0.9544997. pnorm(2.4,1.6,0.4) - pnorm(0.8,1.6,0.4) # (d) x0.025 =qnorm(0.025,1.6,0.4)=0.8160144. Similarly, x0.975 = # 2.383986. qnorm(0.025,1.6,0.4) # (e) x <- rnorm(1000,1.6,0.4) gives mean(x) = 1.608401 and sd(x)=0.4022082. # Both are close to the values in the population. x <- rnorm(1000,1.6,0.4) mean(x) sd(x) # 9. Some computations on Golub et al. (1999) data. # (a) The three largest t-values 57.8, 55.2, and 47.5 are extremely large. # data(golub, package="multtest") golubFactor <- factor(golub.cl,levels=0:1, labels= c("ALL","AML")) tValues <- apply(golub[,golubFactor=="ALL"],1,function(x) sqrt(27) * mean(x)/sd(x)) o <- order(abs(tValues),decreasing=TRUE) tValues[o[1:3]] golub.gnames[o[1:3],2] # (b) The script below gives 1410 ratios between 0.5 and 1.5. all.variances <- apply(golub[,golubFactor=="ALL"],1, var) aml.variances <- apply(golub[,golubFactor=="AML"],1, var) ratios.variances <- all.variances/aml.variances sum( ratios.variances > 0.5 & ratios.variances < 1.5) # 10. Extreme value investigation. The blue line (extreme value) fits to the # black line (density of generated extreme data) much better than the # red line (normal distribution). an <- sqrt(2*log(n)) - 0.5*(log(log(n))+log(4*pi))*(2*log(n))^(-1/2) bn <- (2*log(n))^(-1/2) e <- double(); n <- 10000 # Serfling p.90 for (i in 1:1000) { e[i] <- (max(rnorm(n))-an)/bn } plot(density(e),ylim=c(0,0.5)) f<-function(x) { exp(-x)*exp(-exp(-x)) } curve(f,range(density(e)$x),add=TRUE,col = "blue") curve(dnorm,add=TRUE,col = "red")