###############################################################################
###############################################################################
# Chapter 3
###############################################################################
###############################################################################

########################################
# 3.1 Discrete distributions
########################################

########################################
# 3.1.1 Bernoulli distribution
########################################

########################################
# 3.1.2 Binomial distribution
########################################

# Example 1. Load the TeachingDemos package...
# install.packages("TeachingDemos")
# Note that TeachingDemos requires XQuartz from www.xquartz.org on the Mac.
library(TeachingDemos)
vis.binom()

# Example 2. If we cross two heterozygous carriers...
choose(3,1)* 0.25^1* 0.75^2

for (k in 0:3) {
    print(dbinom(k,3,0.25))
}

pbinom(2,3,0.25)

# Example 3. After four coin tosses, we can....
pbinom(2,4,0.50)  # binomial cumulative distribution function (CDF)


# Example 4. microRNA consists of a sequence composed of nucleotides A, G, U, and C,...
dbinom(14, 22, 0.7)
pbinom(13, 22, 0.7)

sum(dbinom(11:22, 22, 0.7))
1 - pbinom(10, 22, 0.7)

x <- 0:22
plot(x,           # X-values
     dbinom(x,size=22,prob=.7), # binomial mass function
     type="h",      # histogram
     col="blue",
     lwd=4,         # make lines thicker
     xlab="x",
     ylab="Binomial probability mass function f(x)",
     cex.lab=1.5)    # make axis labels big

binomialCDF = stepfun(x, c(0,pbinom(x,size=22,prob=.7))) # create a step function from binomial CDF
plot(binomialCDF, # binomial cumulative function
     col="red",
     vertical=FALSE,
     lwd=4,         # make lines thicker
     xlab="x",
     ylab="Binomial cumulative distribution function F(x)",
     main=NULL,
     cex.lab=1.5)    # make axis labels big)

rbinom(1000, 22, 0.7)

########################################
# 3.1.3 Poisson distribution
########################################

# Poisson approximating the binomial
lambda=4
plot(x,              # X-values
     dpois(x,lambda),# Poisson density function
     type="h",       # histogram
     col=1,
     lwd=8,          # make lines thicker
     ylim=c(0,0.25), # range of the y-axis
     xlab="x",
     ylab="Density function f(x)",
     cex.lab=1.5)    # make axis labels big
for (counter in 1:10) {
    lines(x, dbinom(x, size=counter*10, prob=lambda/(counter*10)), col=counter+1, lwd=2)
}
legend("topright", lty=rep(1,11), lwd=rep(2,11), col=1:11,
       legend=c(expression(Poisson(lambda == 4)),
           as.expression(sapply(seq(10,100, 10), function(x) bquote(B(n ==.(x), p == 4/.(x)))))))

# Example 1. Teaching demo
vis.binom()

# Example 2. Daily Lottery
exp(-2)*2^0/factorial(0)
dpois(0,2)

exp(-2)*2^1/factorial(1)
dpois(1,2)

x <- 0:20
plot(x,           # X-values
     dpois(x,5),  # Poisson mass function
     type="h",    # histogram
     col="blue",
     lwd=4,        # make lines thicker
     xlab="x",
     ylab="Poisson probability mass function f(x)",
     cex.lab=1.5)    # make axis labels big

poissonCDF = stepfun(x, c(0,ppois(x,5))) # create a step function from poisson CDF
plot(poissonCDF, # poisson cumulative function
     col="red",
     vertical=FALSE,
     lwd=4,         # make lines thicker
     xlab="x",
     ylab="Poisson cumulative distribution function F(x)",
     main=NULL,
     cex.lab=1.5)    # make axis labels big)

# Example 3. Finding k-mers
exp(-3.9)*2^0/factorial(0)
dpois(0,3.9)

1 - dpois(0,3.9)

# Example 4. Gaps in an open reading frame
exp(-5)*5^2/factorial(2)
dpois(2,5)

ppois(3,5)

y <- rpois(10000,5)   # 1000 random samples with lambda=5
mean(y)
var(y)

qpois(c(0.025,0.975), lambda=5, lower.tail = TRUE)

sum(dpois(1:10,5))

########################################
# 3.2 Continuous distributions
########################################

########################################
# 3.2.1 Exponential distribution
########################################

# Example 1. Suppose that lambda=5
pexp(.5,5)

f <-function(x) { dexp(x,5) }
plot(f,  # function to plot
     0,  # first x-value
     2,  # last x-value
     xlab="x",
     ylab="Expontential mass function f(x)",
     cex.lab=1.5)  # make axis labels bigger
x1 <- seq(0,0.5,.1) # set of magenta x-values
x2 <- seq(0.5,2,.1) # set of blue x-values
polygon(c(0,x1,.5), c(0,f(x1),0), col="magenta")
polygon(c(.5,x2,2), c(0,f(x2),0), col="lightblue")
arrows(0.75,2.5,0.25,1.6, lwd=3, col="green")
text(0.86, 2.7 - c(0,0.7), cex = 1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 0.5)),
       expression(paste("  = ", integral(f(x) * dx, 0, 0.5)))))

F <- function(x) { pexp(x,5) }
plot(F,             # function
     0,             # start x
     2,             # end x
     cex.lab=1.5,   # make axis labels big
     col="red",
     lwd=6,         # make line thicker
     xlab="x",
     ylab="Exponential cumulative distribution function F(x)")
mtext("0.92",side=2,at=0.92, col="red")
arrows(0.8,0.6,0.5,0.9, lwd=3, col="green")
text(0.65, 0.5 - c(0,.11,.22,.33,.44), cex=1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 0.5)),
       expression(paste("  = ", integral(f(x) * dx, 0, 0.5))),
       expression(paste("  = ", bgroup("[", F(x) ,"]")[0]^0.5)),
       expression(paste("  = ", CDF(0.5))),
       expression(paste("  = ", 0.92))))

1-pexp(2,5)

pexp(2,5)-pexp(.25,5)

qexp(c(0.025,0.975), rate = 5, lower.tail = TRUE, log.p = FALSE)
pexp(0.737775891,5)-pexp(0.005063562,5)

########################################
# 3.2.2 Normal distribution
########################################

# Example 1 . ...
library(TeachingDemos)
vis.normal()

# Example 2. When population X is distributed as N(1.9, .25)...
x <- rnorm(10000,1.9,0.5)
mean(x)
sd(x)

# Example 3. Suppose that the CCND3 (Cyclin D3) gene expression...
pnorm(1.4, 1.9, 0.5)   # left-side tail of the Normal cumulative density function (CDF)

dnormFun <- function(x) { dnorm(x,1.9,0.5) }
x1<- seq(0,1.4,0.1)
x2<- seq(1.4,4,0.1)
plot(dnormFun,      # function
     0,             # start
     4,             # end
     cex.lab=1.5,   # make axis labels big
     xlab="x",
     ylab="Normal probability density function f(x)")
polygon(c(0.0,x1,1.4), c(0,dnormFun(x1),0), col="magenta")
polygon(c(1.4,x2,4.0), c(0,dnormFun(x2),0), col="lightblue")
mtext("1.4",side=1,at=1.4, col="red")
arrows(0.6,0.43,1.0,0.1, lwd=3, col="green")
text(0.3, 0.55 - c(0,.10), cex = 1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 1.4)),
       expression(paste("  = ", integral(f(x) * dx, -infinity, 1.4)))))


pnormFun <- function(x) { pnorm(x,1.9,0.5) }
plot(pnormFun,      # function
     0,             # start
     4,             # end
     cex.lab=1.5,   # make axis labels big
     col="red",
     lwd=6,         # make line thicker
     xlab="x",
     ylab="Normal cumulative distribution function F(x)")
mtext("1.4",side=1,at=1.4, col="red")
mtext("0.16",side=2,at=0.16, col="red")
arrows(0.9,0.45,1.4,0.16, lwd=3, col="green")
text(0.5, 0.9 - c(0,.1,.2,.3,.4), cex=1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 1.4)),
       expression(paste("  = ", integral(f(x) * dx, -infinity, 1.4))),
       expression(paste("  = ", bgroup("[", F(x) ,"]")[-infinity]^1.4)),
       expression(paste("  = ", CDF(1.4))),
       expression(paste("  = ", 0.16))))

1 - pnorm(2.4, 1.9, 0.5)   # right-side tail of normal cumulative density function (CDF)
pnorm(2.4, 1.9, 0.5) - pnorm(1.4, 1.9, 0.5)  # central area of the normal cumulative density function (CDF)

qnorm(0.025,1.9,0.5)   # Normal quantile function
pnorm(0.920018,1.9,0.5)    # left-side tail of the Normal cumulative density function (CDF)

# Normal approximation to the binomial.
x <- 0:20
np=11
xPolygon<- seq(0,20,0.1)
dnormFun <- function(x) { dnorm(x,np,sqrt(np)) }
plot(dnormFun,      # normal density function
     0,             # start
     20,            # end
     col="lightblue",
     ylim=c(0,0.4), # range of the y-axis
     xlab="x",
     ylab="Density function f(x)",
     cex.lab=1.5)   # make axis labels big
polygon(c(0.0,xPolygon,20), c(0,dnormFun(xPolygon),0), col="lightblue")
for (counter in 1:9) {
    lines(x, dbinom(x, size=counter*12, prob=np/(counter*12)), col=counter, lwd=2)
}
legend("topleft", lty=rep(1,10), lwd=rep(2,10), col=c("lightblue",1:9),
       legend=c(expression(Normal(mu == 11, sigma^2 == 11)),
           as.expression(sapply(seq(12,108, 12), function(x) bquote(B(n ==.(x), p == 11/.(x)))))))

# Normal approximation to the Poisson.
x <- 0:20
xPolygon<- seq(0,20,0.1)
dnormFun <- function(x) { dnorm(x,11,sqrt(11)) }
plot(dnormFun,      # normal density function
     0,             # start
     20,            # end
     col="lightblue",
     ylim=c(0,0.4), # range of the y-axis
     xlab="x",
     ylab="Density function f(x)",
     cex.lab=1.5)   # make axis labels big
polygon(c(0.0,xPolygon,20), c(0,dnormFun(xPolygon),0), col="lightblue")
for (counter in 1:11) {
    lines(x, dpois(x,counter), col=counter, lwd=2)
}
legend("topright", lty=rep(1,12), lwd=rep(2,12), col=c("lightblue",1:11),
       legend=c(expression(Normal(mu == 11, sigma^2 == 11)),
           as.expression(sapply(1:11, function(x) bquote(Poisson(lambda ==.(x)))))))

########################################
# 3.2.3 Chi-squared distribution
########################################

# Example 1. Load the TeachingDemos package to view...
library(TeachingDemos)
vis.gamma()

# Example 2. Let's consider the chi-squared variable with 5 degrees of freedom...
pchisq(8, 5)   # left-side tail of the Chi-squared cumulative density function (CDF)

dchisqFun<-function(x) { dchisq(x,5) }
plot(dchisqFun,     # function
     0,             # start
     25,            # end
     cex.lab=1.5,   # make axis labels big
     xlab="x",
     ylab="Chi-Squared probability density function f(x)")
x1 <- seq(0,8,0.1)
x2 <- seq(8,25,0.1)
polygon(c(0,x1,8),  c(0,dchisqFun(x1),0), col="magenta")
polygon(c(8,x2,25), c(0,dchisqFun(x2),0), col="lightblue")
mtext("8",side=1,at=8)
arrows(11,0.07,5,0.07, lwd=3, col="green")
text(13, 0.075 - c(0,.018), cex = 1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 8)),
       expression(paste("  = ", integral(f(x) * dx, 0, 8)))))

pchisqFun<-function(x){pchisq(x,5)}
plot(pchisqFun,     # function
     0,             # start
     20,            # end
     cex.lab=1.5,   # make axis labels big
     col="red",
     lwd=6,         # make line thicker
     xlab="x",
     ylab="Chi-Squared cumulative distribution function F(x)")
mtext("8",   side=1,at=8, col="red")
mtext("0.84",side=2,at=0.84, col="red")
arrows(11,0.74,8,0.84, lwd=3, col="green")
text(12, 0.67 - c(0,.11,.22,.33,.44), cex=1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 8)),
       expression(paste("  = ", integral(f(x) * dx, 0, 8))),
       expression(paste("  = ", bgroup("[", F(x) ,"]")[0]^8)),
       expression(paste("  = ", CDF(8))),
       expression(paste("  = ", 0.84))))

qchisq(0.025, 5, lower.tail=TRUE)

# Example 3. The chi-squared distribution is frequently used as a so-called goodness of fit measure...
library(multtest); data(golub)
golubFactor <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
ccnd3 = grep("CCND3",golub.gnames[ ,2], ignore.case = TRUE)
ccnd3
x <- golub[ccnd3,golubFactor=="ALL"]
z <- (x-1.90)/0.50
sum(z^2)
pchisq(sum(z^2),26, lower.tail=FALSE)

########################################
# 3.2.4 t-Distribution
########################################

# Example 1. Load the TeachingDemos package and execute...
library(TeachingDemos)
vis.t()

# Example 2. A quick NCBI scan makes it reasonable to assume that ...
n <- 11
gdf5 = grep("GDF5",golub.gnames[ ,2], ignore.case = TRUE)
gdf5
x <- golub[gdf5, golubFactor=="AML"]
t.value <- sqrt(n)*(mean(x)-0)/sd(x)
t.value

1 - pt(1.236324, 10)   # right-side tail of the t-distribution cumulative density function (CDF)

f<-function(x) { dt(x,10) }
plot(f,             # function
     -5,            # start
     5,             # end
     cex.lab=1.5,   # make axis labels big
     xlab="x",
     ylab="t-Distribution probability density function f(x)")
x1 <- seq(-5,1.24,0.01)
x2 <- seq(1.24,5,0.01)
polygon(c(-5,x1,1.24), c(0,f(x1),0), col="lightblue")
polygon(c(1.24,x2,5),  c(0,f(x2),0), col="magenta")
mtext("1.24",side=1,at=1.24, col="red")
arrows(2.7,0.20,2.7,0.01, lwd=3, col="green")
text(1.8, 0.3 - c(0,.043, .086), cex = 1.2, adj=c(0,0), col="blue",
     c(expression(P(X >= 1.24)),
       expression(paste("  = ", integral(f(x) * dx, 1.24, infinity))),
       expression(paste("  = ", 1 - integral(f(x) * dx, -infinity, 1.24)))))

F<-function(x) { pt(x,10) }
plot(F,             # function
     -5,            # start
     5,             # end
     cex.lab=1.5,   # make axis labels big
     col="red",
     lwd=6,         # make line thicker
     xlab="x",
     ylab="t-Distribution cumulative distribution function F(x)")
mtext("1.24",side=1,at=1.24, col="red")
mtext("0.88",side=2,at=0.88, col="red")
arrows(-1,0.88,1.24,0.88, lwd=3, col="green")
text(-3.7, 0.85 - c(0,.11,.22,.33,.44,.55), cex=1.2, adj=c(0,0), col="blue",
     c(expression(P(X >= 1.24)),
       expression(paste("  = ", 1 - integral(f(x) * dx, -infinity, 1.24))),
       expression(paste("  = ", 1 - bgroup("[", F(x) ,"]")[-infinity]^1.24)),
       expression(paste("  = ", 1 - CDF(1.24))),
       expression(paste("  = ", 1 - 0.88)),
       expression(paste("  = ", 0.12))))

pt(2, 10) - pt(-2, 10)
qt(0.025, n-1)

########################################
# 3.2.5 F-Distribution
########################################

# Example 1. When the two population variances are in fact equal...
var(golub[1042,golubFactor=="ALL"])/var(golub[1042,golubFactor=="AML"])

pf(0.7116441, 26, 10)   # left-side tail of the F-distribution cumulative density function (CDF)

f<-function(x) { df(x,26,10) }
plot(f,             # function
     0,             # start
     10,            # end
     cex.lab=1.5,   # make axis labels big
     xlab="x",
     ylab="F-Distribution probability density function f(x)")
mtext("0.71",side=1,at=.7,cex=1, col="red")
x1 <- seq(0,0.71,0.01)
x2 <- seq(0.71,10,0.01)
polygon(c(0,x1,.71),  c(0,f(x1),0), col="magenta")
polygon(c(.71,x2,10), c(0,f(x2),0), col="lightblue")
arrows(4.5,.50,0.55,.3, lwd=3, col="green")
text(5.0, 0.53 - c(0,.11), cex = 1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 0.71)),
       expression(paste("  = ", integral(f(x) * dx, 0, 0.71)))))

f<-function(x) { pf(x,26,10) }
plot(f,            # function
     0,            # start
     10,           # end
     cex.lab=1.5,  # make axis labels big
     col="red",
     lwd=6,         # make line thicker
     xlab="x",
     ylab="F-Distribution cumulative distribution function F(x)")
mtext("0.71",side=1,at=.7, cex=1, col="red")
mtext("0.23",side=2,at=.23,cex=1, col="red")
arrows(3,0.3,0.71,0.23, lwd=3, col="green")
text(4.0, 0.5 - c(0,.12,.24,.36,.48), cex=1.2, adj=c(0,0), col="blue",
     c(expression(P(X <= 0.71)),
       expression(paste("  = ", integral(f(x) * dx, 0, 0.71))),
       expression(paste("  = ", bgroup("[", F(x) ,"]")[0]^0.71)),
       expression(paste("  = ", CDF(0.71))),
       expression(paste("  = ", 0.23))))

qf(0.025, 26, 10)