3 ## Root of Phylogenetic Trees
5 ## Copyright 2004-2011 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 is.rooted <- function(phy)
12 if (!inherits(phy, "phylo"))
13 stop('object "phy" is not of class "phylo"')
14 if (!is.null(phy$root.edge)) TRUE
16 if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
20 unroot <- function(phy)
22 if (!inherits(phy, "phylo"))
23 stop('object "phy" is not of class "phylo"')
26 stop("cannot unroot a tree with less than three edges.")
28 ## delete FIRST the root.edge (in case this is sufficient to
29 ## unroot the tree, i.e. there is a multichotomy at the root)
30 if (!is.null(phy$root.edge)) phy$root.edge <- NULL
31 if (!is.rooted(phy)) return(phy)
33 n <- length(phy$tip.label)
36 ### EDGEROOT[1]: the edge to delete
37 ### EDGEROOT[2]: the target where to stick the edge to delete
39 ### If the tree is in pruningwise order, then the last two edges
40 ### are those connected to the root; the node situated in
41 ### phy$edge[N - 2L, 1L] will be the new root...
43 ophy <- attr(phy, "order")
44 if (!is.null(ophy) && ophy == "pruningwise") {
45 NEWROOT <- phy$edge[N - 2L, 1L]
46 EDGEROOT <- c(N, N - 1L)
49 ### ... otherwise, we remove one of the edges coming from
50 ### the root, and eventually adding the branch length to
51 ### the other one also coming from the root.
52 ### In all cases, the node deleted is the 2nd one (numbered
53 ### nb.tip+2 in 'edge'), so we simply need to renumber the
54 ### nodes by adding 1, except the root (this remains the
55 ### origin of the tree).
57 EDGEROOT <- which(phy$edge[, 1L] == ROOT)
61 ## make sure EDGEROOT is ordered as described above:
62 if (phy$edge[EDGEROOT[1L], 2L] != NEWROOT)
63 EDGEROOT <- EDGEROOT[2:1]
65 phy$edge <- phy$edge[-EDGEROOT[1L], ]
67 s <- phy$edge == NEWROOT # renumber the new root
70 s <- phy$edge > NEWROOT # renumber all nodes greater than the new root
71 phy$edge[s] <- phy$edge[s] - 1L
73 if (!is.null(phy$edge.length)) {
74 phy$edge.length[EDGEROOT[2L]] <-
75 phy$edge.length[EDGEROOT[2L]] + phy$edge.length[EDGEROOT[1L]]
76 phy$edge.length <- phy$edge.length[-EDGEROOT[1L]]
79 phy$Nnode <- phy$Nnode - 1L
81 if (!is.null(phy$node.label)) {
82 if (NEWROOT == n + 2L)
83 phy$node.label <- phy$node.label[-1]
86 tmp <- lbs[NEWROOT - n]
87 lbs <- lbs[-c(1, NEWROOT)]
88 phy$node.label <- c(tmp, lbs)
94 root <- function(phy, outgroup, node = NULL,
95 resolve.root = FALSE, interactive = FALSE)
97 if (!inherits(phy, "phylo"))
98 stop('object "phy" is not of class "phylo"')
100 n <- length(phy$tip.label)
103 node <- identify(phy)$nodes
104 cat("You have set resolve.root =", resolve.root, "\n")
106 if (!is.null(node)) {
108 stop("incorrect node#: should be greater than the number of taxa")
112 if (is.numeric(outgroup)) {
113 if (any(outgroup > n))
114 stop("incorrect taxa#: should not be greater than the number of taxa")
115 outgroup <- sort(outgroup) # used below
117 if (is.character(outgroup))
118 outgroup <- which(phy$tip.label %in% outgroup)
119 if (length(outgroup) == n) return(phy)
121 ## First check that the outgroup is monophyletic--
122 ## unless there's only one tip specified of course
123 if (length(outgroup) > 1) {
125 ingroup <- (1:n)[-outgroup]
127 for (i in 2:phy$Nnode) {
128 if (identical(pp[[i]], ingroup)) {
132 if (identical(pp[[i]], outgroup)) {
133 newroot <- phy$edge[which(phy$edge[, 2] == i + n), 1]
138 stop("the specified outgroup is not monophyletic")
139 } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
142 oldNnode <- phy$Nnode
143 if (newroot == ROOT) {
145 snw <- which(phy$edge[, 1] == newroot)
146 if (length(snw) > 2) {
147 a <- snw[1]:(snw[2] - 1)
149 newnod <- oldNnode + n + 1
150 phy$edge[snw[-1], 1] <- newnod
151 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
153 if (!is.null(phy$edge.length))
155 c(phy$edge.length[a], 0, phy$edge.length[b])
156 phy$Nnode <- phy$Nnode + 1L
157 ## node renumbering (see comments below)
158 newNb <- integer(n + oldNnode)
159 newNb[newroot] <- n + 1L
160 sndcol <- phy$edge[, 2] > n
161 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
162 (n + 2):(n + phy$Nnode)
163 phy$edge[, 1] <- newNb[phy$edge[, 1]]
169 phy$root.edge <- NULL # just in case...
170 Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
171 ## if only 2 edges connect to the root, we have to fuse them:
172 fuseRoot <- Nclade == 2
174 start <- which(phy$edge[, 1] == ROOT)
175 end <- c(start[-1] - 1, N)
179 w <- which(phy$edge[, 2] == newroot)
180 nod <- phy$edge[w, 1]
184 ## The loop below starts from the new root and goes up in the edge
185 ## matrix reversing the edges that need to be as well as well
186 ## inverting their order. The other edges must not be changed, so
187 ## their indices are stored in `stack'.
188 ## We then bind the other edges in a straightforward way.
191 ## it is important that the 3 next lines
192 ## are inside this "if" statement
199 if (phy$edge[i, 2] == nod) {
201 o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
205 if (phy$edge[i, 1] == ROOT) break
209 nod <- phy$edge[i, 1]
210 } else stack <- stack + 1L
215 ## we keep the edge leading to the old root if needed:
222 endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
223 if (is.na(endOfOutgroup)) endOfOutgroup <- N
224 endOfClade <- end[end >= endOfOutgroup][1]
226 ## bind the other clades...
227 for (j in 1:Nclade) {
228 if (end[j] == endOfClade) next
229 ## do we have to fuse the two basal edges?
231 phy$edge[start[j], 1] <- phy$edge[i, 2]
232 if (!is.null(phy$edge.length))
233 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
236 # o[NEXT] <- i#start[j]
241 o[NEXT:(NEXT + ne - 1L)] <- s
245 ## possibly bind the edges below the outgroup till the end of the clade
246 if (all(endOfOutgroup != end)) {
247 j <- (endOfOutgroup + 1L):endOfClade
248 ## we must take care that the branch inversions done above
249 ## may have changed the hierarchy of clades here, so we
250 ## travel from the bottom of this series of edges
252 inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
254 if (any(phy$edge[k, 1] == inverted)) {
258 o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
262 } else stack <- stack + 1L
266 ## ... and the outgroup
267 s <- (w + 1L):endOfOutgroup
269 o[NEXT:(NEXT + ne - 1L)] <- s
272 phy$Nnode <- oldNnode - 1L
275 phy$edge[INV, ] <- phy$edge[INV, 2:1]
276 phy$edge <- phy$edge[o, ]
277 if (!is.null(phy$edge.length))
278 phy$edge.length <- phy$edge.length[o]
281 newnod <- oldNnode + n + 1
282 if (length(outgroup) == 1L) {
283 wh <- which(phy$edge[, 2] == outgroup)
284 phy$edge[1] <- newnod
286 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
287 snw <- which(phy$edge[, 1] == newroot)
288 phy$edge[snw[length(snw) - 1], 1] <- newnod
289 if (!is.null(phy$edge.length)) {
291 c(0, phy$edge.length[-wh], phy$edge.length[wh])
294 wh <- which(phy$edge[, 1] == newroot)
295 phy$edge[wh[-1], 1] <- newnod
299 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
300 if (!is.null(phy$edge.length)) {
301 tmp <- phy$edge.length[1]
302 phy$edge.length[1] <- 0
304 c(phy$edge.length[s1], tmp, phy$edge.length[s2])
307 ## N <- N + 1L ... not needed
308 phy$Nnode <- phy$Nnode + 1L
311 ## The block below renumbers the nodes so that they conform
312 ## to the "phylo" format
313 newNb <- integer(n + oldNnode)
314 newNb[newroot] <- n + 1L
315 sndcol <- phy$edge[, 2] > n
316 ## executed from right to left, so newNb is modified before phy$edge:
317 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- n + 2:phy$Nnode
318 phy$edge[, 1] <- newNb[phy$edge[, 1]]
320 if (!is.null(phy$node.label)) {
322 newNb <- newNb[-(1:n)]
325 phy$node.label <- phy$node.label[-1]
327 phy$node.label <- phy$node.label[order(newNb)]
329 phy$node.label[is.na(phy$node.label)] <- phy$node.label[1]
330 phy$node.label[1] <- NA
331 ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])
332 ##phy$node.label <- c("NA", phy$node.label)