## root.R (2011-08-05) ## Root of Phylogenetic Trees ## Copyright 2004-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.rooted <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') if (!is.null(phy$root.edge)) TRUE else if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2) FALSE else TRUE } unroot <- function(phy) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') N <- dim(phy$edge)[1] if (N < 3) stop("cannot unroot a tree with less than three edges.") ## delete FIRST the root.edge (in case this is sufficient to ## unroot the tree, i.e. there is a multichotomy at the root) if (!is.null(phy$root.edge)) phy$root.edge <- NULL if (!is.rooted(phy)) return(phy) n <- length(phy$tip.label) ROOT <- n + 1L ### EDGEROOT[1]: the edge to delete ### EDGEROOT[2]: the target where to stick the edge to delete ### If the tree is in pruningwise order, then the last two edges ### are those connected to the root; the node situated in ### phy$edge[N - 2L, 1L] will be the new root... ophy <- attr(phy, "order") if (!is.null(ophy) && ophy == "pruningwise") { NEWROOT <- phy$edge[N - 2L, 1L] EDGEROOT <- c(N, N - 1L) } else { ### ... otherwise, we remove one of the edges coming from ### the root, and eventually adding the branch length to ### the other one also coming from the root. ### In all cases, the node deleted is the 2nd one (numbered ### nb.tip+2 in 'edge'), so we simply need to renumber the ### nodes by adding 1, except the root (this remains the ### origin of the tree). EDGEROOT <- which(phy$edge[, 1L] == ROOT) NEWROOT <- ROOT + 1L } ## make sure EDGEROOT is ordered as described above: if (phy$edge[EDGEROOT[1L], 2L] != NEWROOT) EDGEROOT <- EDGEROOT[2:1] phy$edge <- phy$edge[-EDGEROOT[1L], ] s <- phy$edge == NEWROOT # renumber the new root phy$edge[s] <- ROOT s <- phy$edge > NEWROOT # renumber all nodes greater than the new root phy$edge[s] <- phy$edge[s] - 1L if (!is.null(phy$edge.length)) { phy$edge.length[EDGEROOT[2L]] <- phy$edge.length[EDGEROOT[2L]] + phy$edge.length[EDGEROOT[1L]] phy$edge.length <- phy$edge.length[-EDGEROOT[1L]] } phy$Nnode <- phy$Nnode - 1L if (!is.null(phy$node.label)) { if (NEWROOT == n + 2L) phy$node.label <- phy$node.label[-1] else { lbs <- phy$node.label tmp <- lbs[NEWROOT - n] lbs <- lbs[-c(1, NEWROOT)] phy$node.label <- c(tmp, lbs) } } phy } root <- function(phy, outgroup, node = NULL, resolve.root = FALSE, interactive = FALSE) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') phy <- reorder(phy) n <- length(phy$tip.label) ROOT <- n + 1L if (interactive) { node <- identify(phy)$nodes cat("You have set resolve.root =", resolve.root, "\n") } if (!is.null(node)) { if (node <= n) stop("incorrect node#: should be greater than the number of taxa") outgroup <- NULL newroot <- node } else { if (is.numeric(outgroup)) { if (any(outgroup > n)) stop("incorrect taxa#: should not be greater than the number of taxa") outgroup <- sort(outgroup) # used below } if (is.character(outgroup)) outgroup <- which(phy$tip.label %in% outgroup) if (length(outgroup) == n) return(phy) ## First check that the outgroup is monophyletic-- ## unless there's only one tip specified of course if (length(outgroup) > 1) { pp <- prop.part(phy) ingroup <- (1:n)[-outgroup] newroot <- 0L for (i in 2:phy$Nnode) { if (identical(pp[[i]], ingroup)) { newroot <- i + n break } if (identical(pp[[i]], outgroup)) { newroot <- phy$edge[which(phy$edge[, 2] == i + n), 1] break } } if (!newroot) stop("the specified outgroup is not monophyletic") } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1] } N <- Nedge(phy) oldNnode <- phy$Nnode if (newroot == ROOT) { if (resolve.root) { snw <- which(phy$edge[, 1] == newroot) if (length(snw) > 2) { a <- snw[1]:(snw[2] - 1) b <- snw[2]:N newnod <- oldNnode + n + 1 phy$edge[snw[-1], 1] <- newnod phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod), phy$edge[b, ]) if (!is.null(phy$edge.length)) phy$edge.length <- c(phy$edge.length[a], 0, phy$edge.length[b]) phy$Nnode <- phy$Nnode + 1L ## node renumbering (see comments below) newNb <- integer(n + oldNnode) newNb[newroot] <- n + 1L sndcol <- phy$edge[, 2] > n phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- (n + 2):(n + phy$Nnode) phy$edge[, 1] <- newNb[phy$edge[, 1]] } } return(phy) } phy$root.edge <- NULL # just in case... Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node ## if only 2 edges connect to the root, we have to fuse them: fuseRoot <- Nclade == 2 start <- which(phy$edge[, 1] == ROOT) end <- c(start[-1] - 1, N) o <- integer(N) INV <- logical(N) w <- which(phy$edge[, 2] == newroot) nod <- phy$edge[w, 1] i <- w NEXT <- 1L ## The loop below starts from the new root and goes up in the edge ## matrix reversing the edges that need to be as well as well ## inverting their order. The other edges must not be changed, so ## their indices are stored in `stack'. ## We then bind the other edges in a straightforward way. if (nod != ROOT) { ## it is important that the 3 next lines ## are inside this "if" statement o[NEXT] <- w NEXT <- NEXT + 1L INV[w] <- TRUE i <- w - 1L stack <- 0L repeat { if (phy$edge[i, 2] == nod) { if (stack) { o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack) NEXT <- NEXT + stack stack <- 0L } if (phy$edge[i, 1] == ROOT) break o[NEXT] <- i NEXT <- NEXT + 1L INV[i] <- TRUE nod <- phy$edge[i, 1] } else stack <- stack + 1L i <- i - 1L } } ## we keep the edge leading to the old root if needed: if (!fuseRoot) { o[NEXT] <- i INV[i] <- TRUE NEXT <- NEXT + 1L } endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1 if (is.na(endOfOutgroup)) endOfOutgroup <- N endOfClade <- end[end >= endOfOutgroup][1] ## bind the other clades... for (j in 1:Nclade) { if (end[j] == endOfClade) next ## do we have to fuse the two basal edges? if (fuseRoot) { phy$edge[start[j], 1] <- phy$edge[i, 2] if (!is.null(phy$edge.length)) phy$edge.length[start[j]] <- phy$edge.length[start[j]] + phy$edge.length[i] } #else { # o[NEXT] <- i#start[j] # NEXT <- NEXT + 1L #} s <- start[j]:end[j] ne <- length(s) o[NEXT:(NEXT + ne - 1L)] <- s NEXT <- NEXT + ne } ## possibly bind the edges below the outgroup till the end of the clade if (all(endOfOutgroup != end)) { j <- (endOfOutgroup + 1L):endOfClade ## we must take care that the branch inversions done above ## may have changed the hierarchy of clades here, so we ## travel from the bottom of this series of edges stack <- 0L inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used for (k in rev(j)) { if (any(phy$edge[k, 1] == inverted)) { o[NEXT] <- k NEXT <- NEXT + 1L if (stack){ o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack) NEXT <- NEXT + stack stack <- 0L } } else stack <- stack + 1L } } ## ... and the outgroup s <- (w + 1L):endOfOutgroup ne <- length(s) o[NEXT:(NEXT + ne - 1L)] <- s if (fuseRoot) { phy$Nnode <- oldNnode - 1L N <- N - 1L } phy$edge[INV, ] <- phy$edge[INV, 2:1] phy$edge <- phy$edge[o, ] if (!is.null(phy$edge.length)) phy$edge.length <- phy$edge.length[o] if (resolve.root) { newnod <- oldNnode + n + 1 if (length(outgroup) == 1L) { wh <- which(phy$edge[, 2] == outgroup) phy$edge[1] <- newnod phy$edge <- rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ]) snw <- which(phy$edge[, 1] == newroot) phy$edge[snw[length(snw) - 1], 1] <- newnod if (!is.null(phy$edge.length)) { phy$edge.length <- c(0, phy$edge.length[-wh], phy$edge.length[wh]) } } else { wh <- which(phy$edge[, 1] == newroot) phy$edge[wh[-1], 1] <- newnod s1 <- 1:(wh[2] - 1) s2 <- wh[2]:N phy$edge <- rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ]) if (!is.null(phy$edge.length)) { tmp <- phy$edge.length[1] phy$edge.length[1] <- 0 phy$edge.length <- c(phy$edge.length[s1], tmp, phy$edge.length[s2]) } } ## N <- N + 1L ... not needed phy$Nnode <- phy$Nnode + 1L } ## The block below renumbers the nodes so that they conform ## to the "phylo" format newNb <- integer(n + oldNnode) newNb[newroot] <- n + 1L sndcol <- phy$edge[, 2] > n ## executed from right to left, so newNb is modified before phy$edge: phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- n + 2:phy$Nnode phy$edge[, 1] <- newNb[phy$edge[, 1]] if (!is.null(phy$node.label)) { #browser() newNb <- newNb[-(1:n)] if (fuseRoot) { newNb <- newNb[-1] phy$node.label <- phy$node.label[-1] } phy$node.label <- phy$node.label[order(newNb)] if (resolve.root) { phy$node.label[is.na(phy$node.label)] <- phy$node.label[1] phy$node.label[1] <- NA ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1]) ##phy$node.label <- c("NA", phy$node.label) } } phy }