3 ## Root of Phylogenetic Trees
5 ## Copyright 2004-2009 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 is.rooted <- function(phy)
12 if (!inherits(phy, "phylo"))
13 stop('object "phy" is not of class "phylo"')
14 if (!is.null(phy$root.edge)) return(TRUE)
16 if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
21 unroot <- function(phy)
23 if (!inherits(phy, "phylo"))
24 stop('object "phy" is not of class "phylo"')
25 if (dim(phy$edge)[1] < 3)
26 stop("cannot unroot a tree with two edges.")
27 ## delete FIRST the root.edge (in case this is sufficient to
28 ## unroot the tree, i.e. there is a multichotomy at the root)
29 if (!is.null(phy$root.edge)) phy$root.edge <- NULL
30 if (!is.rooted(phy)) return(phy)
31 ## We remove one of the edges coming from the root, and
32 ## eventually adding the branch length to the other one
33 ## also coming from the root.
34 ## In all cases, the node deleted is the 2nd one (numbered
35 ## nb.tip+2 in `edge'), so we simply need to renumber the
36 ## nodes by adding 1, except the root (this remains the
37 ## origin of the tree).
38 nb.tip <- length(phy$tip.label)
40 EDGEROOT <- which(phy$edge[, 1] == ROOT)
41 ## j: the target where to stick the edge
42 ## i: the edge to delete
43 if (phy$edge[EDGEROOT[1], 2] == ROOT + 1) {
50 ## This should work whether the tree is in pruningwise or
52 phy$edge <- phy$edge[-i, ]
53 nodes <- phy$edge > ROOT # renumber all nodes except the root
54 phy$edge[nodes] <- phy$edge[nodes] - 1
55 if (!is.null(phy$edge.length)) {
56 phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i]
57 phy$edge.length <- phy$edge.length[-i]
59 phy$Nnode <- phy$Nnode - 1
60 if (!is.null(phy$node.label))
61 phy$node.label <- phy$node.label[-2]
65 root <- function(phy, outgroup, node = NULL, resolve.root = FALSE)
67 if (!inherits(phy, "phylo"))
68 stop('object "phy" is not of class "phylo"')
69 ord <- attr(phy, "order")
70 if (!is.null(ord) && ord == "pruningwise") phy <- reorder(phy)
71 n <- length(phy$tip.label)
75 stop("incorrect node#: should be greater than the number of taxa")
79 if (is.numeric(outgroup)) {
80 if (any(outgroup > n))
81 stop("incorrect taxa#: should not be greater than the number of taxa")
82 outgroup <- sort(outgroup) # used below
84 if (is.character(outgroup))
85 outgroup <- which(phy$tip.label %in% outgroup)
86 if (length(outgroup) == n) return(phy)
88 ## First check that the outgroup is monophyletic--
89 ## unless there's only one tip specified of course
90 if (length(outgroup) > 1) {
91 msg <- "the specified outgroup is not monophyletic"
92 seq.nod <- .Call("seq_root2tip", phy$edge, n,
93 phy$Nnode, PACKAGE = "ape")
94 sn <- seq.nod[outgroup]
95 ## We go from the root to the tips: the sequence of nodes
96 ## is identical until the MRCA:
98 i <- 2 # we start at the 2nd position since the root
99 # of the tree is a common ancestor to all tips
101 x <- unique(unlist(lapply(sn, "[", i)))
102 if (length(x) != 1) break
106 ## Check that all descendants of this node
107 ## are included in the outgroup.
108 ## (below is slightly faster than calling "bipartition")
109 desc <- which(unlist(lapply(seq.nod,
110 function(x) any(x %in% newroot))))
111 if (length(outgroup) != length(desc)) stop(msg)
112 ## both vectors below are already sorted:
113 if (!all(outgroup == desc)) stop(msg)
114 } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
117 oldNnode <- phy$Nnode
118 if (newroot == ROOT) {
120 snw <- which(phy$edge[, 1] == newroot)
121 if (length(snw) > 2) {
122 a <- snw[1]:(snw[2] - 1)
124 newnod <- oldNnode + n + 1
125 phy$edge[snw[-1], 1] <- newnod
126 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
128 if (!is.null(phy$edge.length))
130 c(phy$edge.length[a], 0, phy$edge.length[b])
131 phy$Nnode <- phy$Nnode + 1
132 ## node renumbering (see comments below)
133 newNb <- integer(n + oldNnode)
134 newNb[newroot] <- n + 1L
135 sndcol <- phy$edge[, 2] > n
136 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
137 (n + 2):(n + phy$Nnode)
138 phy$edge[, 1] <- newNb[phy$edge[, 1]]
144 phy$root.edge <- NULL # just in case...
145 Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
146 ## if only 2 edges connect to the root, we have to fuse them:
147 fuseRoot <- Nclade == 2
149 start <- which(phy$edge[, 1] == ROOT)
150 end <- c(start[-1] - 1, N)
154 w <- which(phy$edge[, 2] == newroot)
155 nod <- phy$edge[w, 1]
159 ## The loop below starts from the new root and goes up in the edge
160 ## matrix reversing the edges that need to be as well as well
161 ## inverting their order. The other edges must not be changed, so
162 ## their indices are stored in `stack'.
163 ## We then bind the other edges in a straightforward way.
166 ## it is important that the 3 next lines
167 ## are inside this "if" statement
174 if (phy$edge[i, 2] == nod) {
176 o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
180 if (phy$edge[i, 1] == ROOT) break
184 nod <- phy$edge[i, 1]
185 } else stack <- stack + 1L
190 ## we keep the edge leading to the old root if needed:
197 endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
198 if (is.na(endOfOutgroup)) endOfOutgroup <- N
199 endOfClade <- end[end >= endOfOutgroup][1]
201 ## bind the other clades...
202 for (j in 1:Nclade) {
203 if (end[j] == endOfClade) next
204 ## do we have to fuse the two basal edges?
206 phy$edge[start[j], 1] <- phy$edge[i, 2]
207 if (!is.null(phy$edge.length))
208 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
211 # o[NEXT] <- i#start[j]
216 o[NEXT:(NEXT + ne - 1L)] <- s
220 ## possibly bind the edges below the outgroup till the end of the clade
221 if (all(endOfOutgroup != end)) {
222 j <- (endOfOutgroup + 1L):endOfClade
223 ## we must take care that the branch inversions done above
224 ## may have changed the hierarchy of clades here, so we
225 ## travel from the bottom of this series of edges
227 inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
229 if (any(phy$edge[k, 1] == inverted)) {
233 o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
237 } else stack <- stack + 1L
241 ## ... and the outgroup
242 s <- (w + 1L):endOfOutgroup
244 o[NEXT:(NEXT + ne - 1L)] <- s
247 phy$Nnode <- oldNnode - 1
250 phy$edge[INV, ] <- phy$edge[INV, 2:1]
251 phy$edge <- phy$edge[o, ]
252 if (!is.null(phy$edge.length))
253 phy$edge.length <- phy$edge.length[o]
256 newnod <- oldNnode + n + 1
257 if (length(outgroup) == 1L) {
258 wh <- which(phy$edge[, 2] == outgroup)
259 phy$edge[1] <- newnod
261 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
262 snw <- which(phy$edge[, 1] == newroot)
263 phy$edge[snw[length(snw) - 1], 1] <- newnod
264 if (!is.null(phy$edge.length)) {
266 c(0, phy$edge.length[-wh], phy$edge.length[wh])
269 wh <- which(phy$edge[, 1] == newroot)
270 phy$edge[wh[-1], 1] <- newnod
274 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
275 if (!is.null(phy$edge.length)) {
276 tmp <- phy$edge.length[1]
277 phy$edge.length[1] <- 0
279 c(phy$edge.length[s1], tmp, phy$edge.length[s2])
282 ## N <- N + 1L ... not needed
283 phy$Nnode <- phy$Nnode + 1
286 ## The block below renumbers the nodes so that they conform
287 ## to the "phylo" format
288 newNb <- integer(n + oldNnode)
289 newNb[newroot] <- n + 1L
290 sndcol <- phy$edge[, 2] > n
291 ## executed from right to left, so newNb is modified before phy$edge:
292 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
293 (n + 2):(n + phy$Nnode)
294 phy$edge[, 1] <- newNb[phy$edge[, 1]]
296 if (!is.null(phy$node.label)) {
297 newNb <- newNb[-(1:n)]
300 phy$node.label <- phy$node.label[-1]
302 phy$node.label <- phy$node.label[order(newNb)]
304 phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])