###############################################################################
# Answers to exercises in Chapter 10: Markov Models.
###############################################################################
# 1. Visualize by a transition graph the following transition matrices. Consult your instructor.

# 2. Computing probabilities. The answers are provided by the following.
P <- matrix(c(3/4,1/4,1/2,1/2),2,2,byrow=TRUE)
pi0 <- c(1/2,1/2)
pi0 %*% P

P %*% P
P

# 3. Programming GTR.
#   (a) Program the rate matrix in such a manner that it is simple to
#   adapt for other values of the parameters.
library(Matrix)
piA <- 0.15; piG <- 0.35; piC <- 0.35; piT <- 0.15
alpha <- 4; beta <- 0.5; gamma <- 0.4; delta <- 0.3
epsilon <- 0.2; zeta <- 4
Q <- matrix(data=NA,4,4)
Q[1,2] <- alpha * piG; Q[1,3] <- beta * piC;
Q[1,4] <- gamma * piT
Q[2,1] <- alpha * piA; Q[2,3] <- delta * piC;
Q[2,4] <- epsilon * piT
Q[3,1] <- beta * piA; Q[3,2] <- delta * piG;
Q[3,4] <- delta* piT
Q[4,1] <- gamma * piA; Q[4,2] <- epsilon* piG; Q[4,3] <- zeta * piC
diag(Q) <- 0
diag(Q) <- -apply(Q,1,sum)
Q <- Matrix(Q)
Q
#   (b) The transversion rate is larger then the transition rate because
#   the blocks outside the main diagonal have lower values.
#   (c) The probability transition matrix is
P <- as.matrix(expm(Q))
P
rownames(P) <- colnames(P) <- StateSpace <- c("a","g","c","t")
pi0 <- c(1/4,1/4,1/4,1/4)

markov2 <- function(StateSpace,P,n){
    seq <- matrix(0,nr=n,nc=1)
    seq[1] <- sample(StateSpace,1,replace=T,pi0)

    for(k in 1:(n-1)) {
        seq[k+1] <- sample(StateSpace,1,replace=T,P[seq[k],])
    }

    return(seq)
}
#   (d)
#   (e)
seq <- markov2(StateSpace,P,99)

# 4. Distance according to JC69.
#   (a)
accnr <- paste("AJ5345",26:27,sep="")
seqbin <- read.GenBank(accnr, species.names = TRUE, as.character = FALSE)
#   (b) Two solutions for computing the proportions of different nucleotides are
dist.dna(seqbin, model = "raw")
p <- sum(seq$AJ534526 != seq$AJ534527)/1143
#   (c) Simply insert the obtained p in the formula
d <- -log(1-4*p/3)*3/4