1 ## mlphylo.R (2008-01-03)
3 ## Estimating Phylogenies by Maximum Likelihood
5 ## Copyright 2006-2008 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 logLik.phylo <- function(object, ...) attr(object, "loglik")
12 deviance.phylo <- function(object, ...) -2*attr(object, "loglik")
14 AIC.phylo <- function(object, ..., k = 2)
16 np <- length(object$edge.length) +
17 length(attr(object, "rates")) +
18 length(attr(object, "alpha")) +
19 length(attr(object, "invar")) +
20 length(attr(object, "xi"))
21 if (!attr(object, "model") %in% c("JC69", "F81"))
23 -2*attr(object, "loglik") + k*np
26 .subst.model <- structure(c(0, 1, 0, 1, 1, 1, 2, 5),
27 names = c("JC69", "K80", "F81", "F84",
28 "HKY85", "T92", "TN93", "GTR"))
31 function(x, phy, model = DNAmodel(), search.tree = FALSE,
32 quiet = FALSE, value = NULL, fixed = FALSE)
34 ## not yet generic....
35 if (class(x) != "DNAbin") stop("DNA sequences not in binary format")
36 if (!is.binary.tree(phy))
37 stop("the initial tree must be dichotomous.")
38 if (!quiet && is.rooted(phy)) {
39 warning("the initial tree is rooted: it will be unrooted.")
42 if (is.null(phy$edge.length))
43 stop("the initial tree must have branch lengths.")
44 if (any(phy$edge.length > 1))
45 stop("some branch lengths are greater than one.")
46 phy <- reorder(phy, "pruningwise")
47 if (!quiet) cat("Preparing the sequences...\n")
48 BF <- if (model$model %in% 1:2) rep(0.25, 4) else base.freq(x)
49 if (is.list(x)) x <- as.matrix(x)
50 if (is.null(rownames(x)))
51 stop("DNA sequences have no names") # safe...
52 if (!all(names(x) %in% phy$tip.label))
53 stop("the names of the DNA sequences and the tip labels
54 of the tree do not match") # safe here also
55 x <- x[phy$tip.label, ]
56 Y <- prepareDNA(x, model)
58 npart <- dim(Y$partition)[2] # the number of overall partitions
59 ## in case of negative branch lengths:
60 phy$edge.length <- abs(phy$edge.length)
61 nb.tip <- length(phy$tip.label)
62 para <- if (Y$npara) rep(1, Y$npara) else 0
63 alpha <- if (Y$nalpha) rep(.5, Y$nalpha) else 0
64 invar <- if (Y$ninvar) rep(0.5, Y$ninvar) else 0
66 if (!is.null(value)) {
67 if (para && !is.null(value$rates))
68 para <- value$rates[1:Y$npara]
69 if (alpha && !is.null(value$alpha))
70 alpha <- value$alpha[1:Y$nalpha]
71 if (invar && !is.null(value$invar))
72 invar <- value$invar[1:Y$ninvar]
76 if (!quiet) cat("Fitting in progress... ")
77 res <- .C("mlphylo_DNAmodel", as.integer(nb.tip), as.integer(S),
78 as.raw(Y$SEQ), as.double(Y$ANC), as.double(Y$w),
79 as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]),
80 as.double(phy$edge.length), as.integer(npart),
81 as.integer(Y$partition), as.integer(Y$model),
82 as.double(Y$xi), as.double(para), as.integer(Y$npara),
83 as.double(alpha), as.integer(Y$nalpha),
84 as.integer(Y$ncat), as.double(invar), as.integer(Y$ninvar),
85 as.double(BF), as.integer(search.tree), as.integer(fixed),
86 as.double(loglik), NAOK = TRUE, PACKAGE = "ape")
87 if (!quiet) cat("DONE!\n")
88 phy$edge.length = res[[8]]
89 attr(phy, "loglik") <- res[[23]]
90 attr(phy, "npart") <- npart
91 attr(phy, "model") <- names(Y$npara)
92 if (para) attr(phy, "rates") <- res[[13]]
93 if (alpha) attr(phy, "alpha") <- res[[15]]
94 if (invar) attr(phy, "invar") <- res[[18]]
95 if (npart > 1) attr(phy, "xi") <- res[[12]]
99 DNAmodel <- function(model = "K80", partition = 1,
100 ncat.isv = 1, invar = FALSE,
101 equal.isv = TRUE, equal.invar = 1)
104 stop("number of categories for inter-site variation cannot exceed 10")
105 structure(list(model = model, partition = partition,
106 ncat.isv = ncat.isv, invar = invar,
107 equal.isv = equal.isv, equal.invar = equal.invar),
111 prepareDNA <- function(X, DNAmodel)
113 L <- dim(X)[2] # already converted as a matrix in mlphylo()
115 npart <- length(unique(DNAmodel$partition))
117 ## find which substitution model:
118 mo <- which(names(.subst.model) == DNAmodel$model)
119 npara <- .subst.model[mo] # keeps the 'names'
121 ## inter-sites variation:
122 nalpha <- as.numeric(DNAmodel$ncat.isv > 1)
123 if (!DNAmodel$equal.isv) nalpha <- npart * nalpha
125 ## proportion of invariants:
126 ninvar <- as.numeric(DNAmodel$invar)
127 if (!DNAmodel$equal.invar) ninvar <- npart * ninvar
129 SEQ <- weight <- part <- NULL
131 ## For each partition...
133 ## extracts the sites in this partition:
134 M <- X[, DNAmodel$partition == i, drop = FALSE]
135 ## convert each column as a character string:
136 M <- apply(M, 2, rawToChar)
137 ## get their frequencies:
139 ## convert back to raw the unique(M):
140 M <- sapply(dimnames(w)[[1]], charToRaw)
141 ## remove useless attributes:
142 colnames(M) <- dimnames(w) <- NULL
146 weight <- c(weight, w)
147 part <- c(part, length(w)) # the length of each partition
150 class(SEQ) <- "DNAbin"
151 ANC <- array(1, c(nrow(SEQ) - 2, ncol(SEQ), 4))
153 ## 'partition' gives the start and end of each partition:
154 partition <- matrix(1, 2, npart)
155 partition[2, ] <- cumsum(part)
157 partition[1, 2:npart] <- partition[2, 1:(npart - 1)] + 1
158 partition[2, npart] <- length(weight)
159 xi <- rep(1, npart - 1)
161 list(SEQ = SEQ, ANC = ANC, weight = weight, partition = partition,
162 model = mo, xi = xi, npara = npara, nalpha = nalpha,
163 ncat = DNAmodel$ncat.isv, ninvar = ninvar)