3 ## Root of Phylogenetic Trees
5 ## Copyright 2004-2011 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 is.rooted <- function(phy)
12 if (!inherits(phy, "phylo"))
13 stop('object "phy" is not of class "phylo"')
14 if (!is.null(phy$root.edge)) TRUE
16 if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
20 unroot <- function(phy)
22 if (!inherits(phy, "phylo"))
23 stop('object "phy" is not of class "phylo"')
24 if (dim(phy$edge)[1] < 3)
25 stop("cannot unroot a tree with less than three edges.")
26 ## delete FIRST the root.edge (in case this is sufficient to
27 ## unroot the tree, i.e. there is a multichotomy at the root)
28 if (!is.null(phy$root.edge)) phy$root.edge <- NULL
29 if (!is.rooted(phy)) return(phy)
30 ## We remove one of the edges coming from the root, and
31 ## eventually adding the branch length to the other one
32 ## also coming from the root.
33 ## In all cases, the node deleted is the 2nd one (numbered
34 ## nb.tip+2 in 'edge'), so we simply need to renumber the
35 ## nodes by adding 1, except the root (this remains the
36 ## origin of the tree).
37 nb.tip <- length(phy$tip.label)
39 EDGEROOT <- which(phy$edge[, 1] == ROOT)
40 ## j: the target where to stick the edge
41 ## i: the edge to delete
42 if (phy$edge[EDGEROOT[1], 2] == ROOT + 1L) {
49 ## This should work whether the tree is in pruningwise or
51 phy$edge <- phy$edge[-i, ]
52 nodes <- phy$edge > ROOT # renumber all nodes except the root
53 phy$edge[nodes] <- phy$edge[nodes] - 1L
54 if (!is.null(phy$edge.length)) {
55 phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i]
56 phy$edge.length <- phy$edge.length[-i]
58 phy$Nnode <- phy$Nnode - 1L
59 if (!is.null(phy$node.label))
60 phy$node.label <- phy$node.label[-2]
64 root <- function(phy, outgroup, node = NULL,
65 resolve.root = FALSE, interactive = FALSE)
67 if (!inherits(phy, "phylo"))
68 stop('object "phy" is not of class "phylo"')
70 n <- length(phy$tip.label)
73 node <- identify(phy)$nodes
74 cat("You have set resolve.root =", resolve.root, "\n")
78 stop("incorrect node#: should be greater than the number of taxa")
82 if (is.numeric(outgroup)) {
83 if (any(outgroup > n))
84 stop("incorrect taxa#: should not be greater than the number of taxa")
85 outgroup <- sort(outgroup) # used below
87 if (is.character(outgroup))
88 outgroup <- which(phy$tip.label %in% outgroup)
89 if (length(outgroup) == n) return(phy)
91 ## First check that the outgroup is monophyletic--
92 ## unless there's only one tip specified of course
93 if (length(outgroup) > 1) {
95 ingroup <- (1:n)[-outgroup]
97 for (i in 2:phy$Nnode) {
98 if (identical(pp[[i]], ingroup)) {
102 if (identical(pp[[i]], outgroup)) {
103 newroot <- phy$edge[which(phy$edge[, 2] == i + n), 1]
108 stop("the specified outgroup is not monophyletic")
109 } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
112 oldNnode <- phy$Nnode
113 if (newroot == ROOT) {
115 snw <- which(phy$edge[, 1] == newroot)
116 if (length(snw) > 2) {
117 a <- snw[1]:(snw[2] - 1)
119 newnod <- oldNnode + n + 1
120 phy$edge[snw[-1], 1] <- newnod
121 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
123 if (!is.null(phy$edge.length))
125 c(phy$edge.length[a], 0, phy$edge.length[b])
126 phy$Nnode <- phy$Nnode + 1L
127 ## node renumbering (see comments below)
128 newNb <- integer(n + oldNnode)
129 newNb[newroot] <- n + 1L
130 sndcol <- phy$edge[, 2] > n
131 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
132 (n + 2):(n + phy$Nnode)
133 phy$edge[, 1] <- newNb[phy$edge[, 1]]
139 phy$root.edge <- NULL # just in case...
140 Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
141 ## if only 2 edges connect to the root, we have to fuse them:
142 fuseRoot <- Nclade == 2
144 start <- which(phy$edge[, 1] == ROOT)
145 end <- c(start[-1] - 1, N)
149 w <- which(phy$edge[, 2] == newroot)
150 nod <- phy$edge[w, 1]
154 ## The loop below starts from the new root and goes up in the edge
155 ## matrix reversing the edges that need to be as well as well
156 ## inverting their order. The other edges must not be changed, so
157 ## their indices are stored in `stack'.
158 ## We then bind the other edges in a straightforward way.
161 ## it is important that the 3 next lines
162 ## are inside this "if" statement
169 if (phy$edge[i, 2] == nod) {
171 o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
175 if (phy$edge[i, 1] == ROOT) break
179 nod <- phy$edge[i, 1]
180 } else stack <- stack + 1L
185 ## we keep the edge leading to the old root if needed:
192 endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
193 if (is.na(endOfOutgroup)) endOfOutgroup <- N
194 endOfClade <- end[end >= endOfOutgroup][1]
196 ## bind the other clades...
197 for (j in 1:Nclade) {
198 if (end[j] == endOfClade) next
199 ## do we have to fuse the two basal edges?
201 phy$edge[start[j], 1] <- phy$edge[i, 2]
202 if (!is.null(phy$edge.length))
203 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
206 # o[NEXT] <- i#start[j]
211 o[NEXT:(NEXT + ne - 1L)] <- s
215 ## possibly bind the edges below the outgroup till the end of the clade
216 if (all(endOfOutgroup != end)) {
217 j <- (endOfOutgroup + 1L):endOfClade
218 ## we must take care that the branch inversions done above
219 ## may have changed the hierarchy of clades here, so we
220 ## travel from the bottom of this series of edges
222 inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
224 if (any(phy$edge[k, 1] == inverted)) {
228 o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
232 } else stack <- stack + 1L
236 ## ... and the outgroup
237 s <- (w + 1L):endOfOutgroup
239 o[NEXT:(NEXT + ne - 1L)] <- s
242 phy$Nnode <- oldNnode - 1L
245 phy$edge[INV, ] <- phy$edge[INV, 2:1]
246 phy$edge <- phy$edge[o, ]
247 if (!is.null(phy$edge.length))
248 phy$edge.length <- phy$edge.length[o]
251 newnod <- oldNnode + n + 1
252 if (length(outgroup) == 1L) {
253 wh <- which(phy$edge[, 2] == outgroup)
254 phy$edge[1] <- newnod
256 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
257 snw <- which(phy$edge[, 1] == newroot)
258 phy$edge[snw[length(snw) - 1], 1] <- newnod
259 if (!is.null(phy$edge.length)) {
261 c(0, phy$edge.length[-wh], phy$edge.length[wh])
264 wh <- which(phy$edge[, 1] == newroot)
265 phy$edge[wh[-1], 1] <- newnod
269 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
270 if (!is.null(phy$edge.length)) {
271 tmp <- phy$edge.length[1]
272 phy$edge.length[1] <- 0
274 c(phy$edge.length[s1], tmp, phy$edge.length[s2])
277 ## N <- N + 1L ... not needed
278 phy$Nnode <- phy$Nnode + 1L
281 ## The block below renumbers the nodes so that they conform
282 ## to the "phylo" format
283 newNb <- integer(n + oldNnode)
284 newNb[newroot] <- n + 1L
285 sndcol <- phy$edge[, 2] > n
286 ## executed from right to left, so newNb is modified before phy$edge:
287 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
288 (n + 2):(n + phy$Nnode)
289 phy$edge[, 1] <- newNb[phy$edge[, 1]]
291 if (!is.null(phy$node.label)) {
293 newNb <- newNb[-(1:n)]
296 phy$node.label <- phy$node.label[-1]
298 phy$node.label <- phy$node.label[order(newNb)]
300 phy$node.label[is.na(phy$node.label)] <- phy$node.label[1]
301 phy$node.label[1] <- NA
302 ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])
303 ##phy$node.label <- c("NA", phy$node.label)