############################################################################### # Answers to exercises in Chapter 10: Markov Models. ############################################################################### # 1. Visualize by a transition graph the following transition matrices. Consult your instructor. # 2. Computing probabilities. The answers are provided by the following. P <- matrix(c(3/4,1/4,1/2,1/2),2,2,byrow=TRUE) pi0 <- c(1/2,1/2) pi0 %*% P P %*% P P # 3. Programming GTR. # (a) Program the rate matrix in such a manner that it is simple to # adapt for other values of the parameters. library(Matrix) piA <- 0.15; piG <- 0.35; piC <- 0.35; piT <- 0.15 alpha <- 4; beta <- 0.5; gamma <- 0.4; delta <- 0.3 epsilon <- 0.2; zeta <- 4 Q <- matrix(data=NA,4,4) Q[1,2] <- alpha * piG; Q[1,3] <- beta * piC; Q[1,4] <- gamma * piT Q[2,1] <- alpha * piA; Q[2,3] <- delta * piC; Q[2,4] <- epsilon * piT Q[3,1] <- beta * piA; Q[3,2] <- delta * piG; Q[3,4] <- delta* piT Q[4,1] <- gamma * piA; Q[4,2] <- epsilon* piG; Q[4,3] <- zeta * piC diag(Q) <- 0 diag(Q) <- -apply(Q,1,sum) Q <- Matrix(Q) Q # (b) The transversion rate is larger then the transition rate because # the blocks outside the main diagonal have lower values. # (c) The probability transition matrix is P <- as.matrix(expm(Q)) P rownames(P) <- colnames(P) <- StateSpace <- c("a","g","c","t") pi0 <- c(1/4,1/4,1/4,1/4) markov2 <- function(StateSpace,P,n){ seq <- matrix(0,nr=n,nc=1) seq[1] <- sample(StateSpace,1,replace=T,pi0) for(k in 1:(n-1)) { seq[k+1] <- sample(StateSpace,1,replace=T,P[seq[k],]) } return(seq) } # (d) # (e) seq <- markov2(StateSpace,P,99) # 4. Distance according to JC69. # (a) accnr <- paste("AJ5345",26:27,sep="") seqbin <- read.GenBank(accnr, species.names = TRUE, as.character = FALSE) # (b) Two solutions for computing the proportions of different nucleotides are dist.dna(seqbin, model = "raw") p <- sum(seq$AJ534526 != seq$AJ534527)/1143 # (c) Simply insert the obtained p in the formula d <- -log(1-4*p/3)*3/4