X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Froot.R;h=ac5ec27b5b45d27470b8ca322adc787e86dc6193;hb=17d005bd203296df262aa76d75d165e4953494c6;hp=ba80e4a06dde46345423d8a5d3e6f58006f56849;hpb=c827059eeafc8cbe41c812b26979543ab287803e;p=ape.git diff --git a/R/root.R b/R/root.R index ba80e4a..ac5ec27 100644 --- a/R/root.R +++ b/R/root.R @@ -1,29 +1,28 @@ -## root.R (2007-12-21) +## root.R (2010-02-11) ## Root of Phylogenetic Trees -## Copyright 2004-2007 Emmanuel Paradis +## Copyright 2004-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. is.rooted <- function(phy) { - if (!"phylo" %in% class(phy)) - stop('object "phy" is not of class "phylo"') - if (!is.null(phy$root.edge)) return(TRUE) + if (!inherits(phy, "phylo")) + stop('object "phy" is not of class "phylo"') + if (!is.null(phy$root.edge)) TRUE else - if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2) - return(FALSE) - else return(TRUE) + if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2) + FALSE else TRUE } unroot <- function(phy) { - if (class(phy) != "phylo") - stop('object "phy" is not of class "phylo"') + if (!inherits(phy, "phylo")) + stop('object "phy" is not of class "phylo"') if (dim(phy$edge)[1] < 3) - stop("cannot unroot a tree with two edges.") + stop("cannot unroot a tree with less than three edges.") ## delete FIRST the root.edge (in case this is sufficient to ## unroot the tree, i.e. there is a multichotomy at the root) if (!is.null(phy$root.edge)) phy$root.edge <- NULL @@ -36,11 +35,11 @@ unroot <- function(phy) ## nodes by adding 1, except the root (this remains the ## origin of the tree). nb.tip <- length(phy$tip.label) - ROOT <- nb.tip + 1 + ROOT <- nb.tip + 1L EDGEROOT <- which(phy$edge[, 1] == ROOT) ## j: the target where to stick the edge ## i: the edge to delete - if (phy$edge[EDGEROOT[1], 2] == ROOT + 1) { + if (phy$edge[EDGEROOT[1], 2] == ROOT + 1L) { j <- EDGEROOT[2] i <- EDGEROOT[1] } else { @@ -51,98 +50,271 @@ unroot <- function(phy) ## cladewise order. phy$edge <- phy$edge[-i, ] nodes <- phy$edge > ROOT # renumber all nodes except the root - phy$edge[nodes] <- phy$edge[nodes] - 1 + phy$edge[nodes] <- phy$edge[nodes] - 1L if (!is.null(phy$edge.length)) { phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i] phy$edge.length <- phy$edge.length[-i] } - phy$Nnode <- phy$Nnode - 1 + phy$Nnode <- phy$Nnode - 1L if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[-2] phy } -root <- function(phy, outgroup) +root <- function(phy, outgroup, node = NULL, + resolve.root = FALSE, interactive = FALSE) { - if (class(phy) != "phylo") - stop('object "phy" is not of class "phylo"') - if (is.character(outgroup)) - outgroup <- which(phy$tip.label %in% outgroup) - nb.tip <- length(phy$tip.label) - if (length(outgroup) == nb.tip) return(phy) - - ## First check that the outgroup is monophyletic-- - ## unless there's only one tip specified of course - ROOT <- nb.tip + 1 - if (length(outgroup) > 1) { - msg <- "the specified outgroup is not monophyletic!" - ## If all tips in `tip' are not contiguous, then - ## no need to go further: - if (!all(diff(outgroup) == 1)) stop(msg) - seq.nod <- .Call("seq_root2tip", phy$edge, nb.tip, - phy$Nnode, PACKAGE = "ape") - sn <- seq.nod[outgroup] - ## We go from the root to the tips: the sequence of nodes - ## is identical until the MRCA: - newroot <- ROOT - i <- 2 # we start at the 2nd position since the root - # of the tree is a common ancestor to all tips + if (!inherits(phy, "phylo")) + stop('object "phy" is not of class "phylo"') + phy <- reorder(phy) + n <- length(phy$tip.label) + ROOT <- n + 1L + if (interactive) { + node <- identify(phy)$nodes + cat("You have set resolve.root =", resolve.root, "\n") + } + if (!is.null(node)) { + if (node <= n) + stop("incorrect node#: should be greater than the number of taxa") + outgroup <- NULL + newroot <- node + } else { + if (is.numeric(outgroup)) { + if (any(outgroup > n)) + stop("incorrect taxa#: should not be greater than the number of taxa") + outgroup <- sort(outgroup) # used below + } + if (is.character(outgroup)) + outgroup <- which(phy$tip.label %in% outgroup) + if (length(outgroup) == n) return(phy) + + ## First check that the outgroup is monophyletic-- + ## unless there's only one tip specified of course + if (length(outgroup) > 1) { + seq.nod <- .Call("seq_root2tip", phy$edge, n, + phy$Nnode, PACKAGE = "ape") + sn <- seq.nod[outgroup] + ## We go from the root to the tips: the sequence of nodes + ## is identical until the MRCA: + newroot <- ROOT + i <- 2 # we start at the 2nd position since the root + # of the tree is a common ancestor to all tips + repeat { + x <- unique(unlist(lapply(sn, "[", i))) + if (length(x) != 1) break + newroot <- x + i <- i + 1 + } + ## Check that all descendants of this node + ## are included in the outgroup. + ## (below is slightly faster than calling "bipartition") + desc <- which(unlist(lapply(seq.nod, + function(x) any(x %in% newroot)))) + msg <- "the specified outgroup is not monophyletic" + ingroup <- (1:n)[-outgroup] + ## 'outgroup' and 'desc' are already sorted: + if (newroot != ROOT) { + if (!identical(outgroup, desc) && !identical(ingroup, desc)) + stop(msg) + } else { # otherwise check monophyly of the ingroup + if (!is.monophyletic(phy, ingroup)) stop(msg) + } + } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1] + } + N <- Nedge(phy) + oldNnode <- phy$Nnode + if (newroot == ROOT) { + if (resolve.root) { + snw <- which(phy$edge[, 1] == newroot) + if (length(snw) > 2) { + a <- snw[1]:(snw[2] - 1) + b <- snw[2]:N + newnod <- oldNnode + n + 1 + phy$edge[snw[-1], 1] <- newnod + phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod), + phy$edge[b, ]) + if (!is.null(phy$edge.length)) + phy$edge.length <- + c(phy$edge.length[a], 0, phy$edge.length[b]) + phy$Nnode <- phy$Nnode + 1L + ## node renumbering (see comments below) + newNb <- integer(n + oldNnode) + newNb[newroot] <- n + 1L + sndcol <- phy$edge[, 2] > n + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (n + 2):(n + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + } + } + return(phy) + } + + phy$root.edge <- NULL # just in case... + Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node + ## if only 2 edges connect to the root, we have to fuse them: + fuseRoot <- Nclade == 2 + + start <- which(phy$edge[, 1] == ROOT) + end <- c(start[-1] - 1, N) + o <- integer(N) + INV <- logical(N) + + w <- which(phy$edge[, 2] == newroot) + nod <- phy$edge[w, 1] + i <- w + NEXT <- 1L + + ## The loop below starts from the new root and goes up in the edge + ## matrix reversing the edges that need to be as well as well + ## inverting their order. The other edges must not be changed, so + ## their indices are stored in `stack'. + ## We then bind the other edges in a straightforward way. + + if (nod != ROOT) { + ## it is important that the 3 next lines + ## are inside this "if" statement + o[NEXT] <- w + NEXT <- NEXT + 1L + INV[w] <- TRUE + i <- w - 1L + stack <- 0L repeat { - x <- unique(unlist(lapply(sn, "[", i))) - if (length(x) != 1) break - newroot <- x - i <- i + 1 + if (phy$edge[i, 2] == nod) { + if (stack) { + o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack) + NEXT <- NEXT + stack + stack <- 0L + } + if (phy$edge[i, 1] == ROOT) break + o[NEXT] <- i + NEXT <- NEXT + 1L + INV[i] <- TRUE + nod <- phy$edge[i, 1] + } else stack <- stack + 1L + i <- i - 1L + } + } + + ## we keep the edge leading to the old root if needed: + if (!fuseRoot) { + o[NEXT] <- i + INV[i] <- TRUE + NEXT <- NEXT + 1L + } + + endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1 + if (is.na(endOfOutgroup)) endOfOutgroup <- N + endOfClade <- end[end >= endOfOutgroup][1] + + ## bind the other clades... + for (j in 1:Nclade) { + if (end[j] == endOfClade) next + ## do we have to fuse the two basal edges? + if (fuseRoot) { + phy$edge[start[j], 1] <- phy$edge[i, 2] + if (!is.null(phy$edge.length)) + phy$edge.length[start[j]] <- phy$edge.length[start[j]] + + phy$edge.length[i] + } #else { + # o[NEXT] <- i#start[j] + # NEXT <- NEXT + 1L + #} + s <- start[j]:end[j] + ne <- length(s) + o[NEXT:(NEXT + ne - 1L)] <- s + NEXT <- NEXT + ne + } + + ## possibly bind the edges below the outgroup till the end of the clade + if (all(endOfOutgroup != end)) { + j <- (endOfOutgroup + 1L):endOfClade + ## we must take care that the branch inversions done above + ## may have changed the hierarchy of clades here, so we + ## travel from the bottom of this series of edges + stack <- 0L + inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used + for (k in rev(j)) { + if (any(phy$edge[k, 1] == inverted)) { + o[NEXT] <- k + NEXT <- NEXT + 1L + if (stack){ + o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack) + NEXT <- NEXT + stack + stack <- 0L + } + } else stack <- stack + 1L + } + } + + ## ... and the outgroup + s <- (w + 1L):endOfOutgroup + ne <- length(s) + o[NEXT:(NEXT + ne - 1L)] <- s + + if (fuseRoot) { + phy$Nnode <- oldNnode - 1 + N <- N - 1L + } + phy$edge[INV, ] <- phy$edge[INV, 2:1] + phy$edge <- phy$edge[o, ] + if (!is.null(phy$edge.length)) + phy$edge.length <- phy$edge.length[o] + + if (resolve.root) { + newnod <- oldNnode + n + 1 + if (length(outgroup) == 1L) { + wh <- which(phy$edge[, 2] == outgroup) + phy$edge[1] <- newnod + phy$edge <- + rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ]) + snw <- which(phy$edge[, 1] == newroot) + phy$edge[snw[length(snw) - 1], 1] <- newnod + if (!is.null(phy$edge.length)) { + phy$edge.length <- + c(0, phy$edge.length[-wh], phy$edge.length[wh]) + } + } else { + wh <- which(phy$edge[, 1] == newroot) + phy$edge[wh[-1], 1] <- newnod + s1 <- 1:(wh[2] - 1) + s2 <- wh[2]:N + phy$edge <- + rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ]) + if (!is.null(phy$edge.length)) { + tmp <- phy$edge.length[1] + phy$edge.length[1] <- 0 + phy$edge.length <- + c(phy$edge.length[s1], tmp, phy$edge.length[s2]) + } } - ## Check that all descendants of this node - ## are included in the outgroup - ## (1st solution... there may be something smarter) - desc <- which(unlist(lapply(seq.nod, - function(x) any(x %in% newroot)))) - if (length(outgroup) != length(desc)) stop(msg) - if (!all(sort(outgroup) == sort(desc))) stop(msg) - - } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1] - - if (newroot == ROOT) return(phy) - -### -### The remaining part of the code has not been improved; this -### does not seem obvious. This is delayed... (2006-09-23) -### - - ## Invert all branches from the new root to the old one - i <- which(phy$edge[, 2] == newroot) - nod <- phy$edge[i, 1] - while (nod != ROOT) { - j <- which(phy$edge[, 2] == nod) - phy$edge[i, 1] <- phy$edge[i, 2] - phy$edge[i, 2] <- nod - i <- j - nod <- phy$edge[i, 1] + ## N <- N + 1L ... not needed + phy$Nnode <- phy$Nnode + 1 } - i.oroot <- which(phy$edge[, 1] == ROOT) - ## Unroot the tree if there's a basal dichotomy... - if (length(i.oroot) == 2) { - j <- i.oroot[which(i.oroot != i)] - phy$edge[j, 1] <- phy$edge[i, 2] - phy$edge <- phy$edge[-i, ] - if (!is.null(phy$edge.length)) { - phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i] - phy$edge.length <- phy$edge.length[-i] + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format + newNb <- integer(n + oldNnode) + newNb[newroot] <- n + 1L + sndcol <- phy$edge[, 2] > n + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (n + 2):(n + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + + if (!is.null(phy$node.label)) { + #browser() + newNb <- newNb[-(1:n)] + if (fuseRoot) { + newNb <- newNb[-1] + phy$node.label <- phy$node.label[-1] + } + phy$node.label <- phy$node.label[order(newNb)] + if (resolve.root) { + phy$node.label[is.na(phy$node.label)] <- phy$node.label[1] + phy$node.label[1] <- NA + ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1]) + ##phy$node.label <- c("NA", phy$node.label) } - phy$edge[which(phy$edge == newroot)] <- ROOT - } else { - ## ... otherwise just invert the root with the newroot - phy$edge[which(phy$edge == newroot)] <- ROOT - phy$edge[i.oroot] <- newroot - ## ... and invert finally! (fixed 2005-11-07) - phy$edge[i, ] <- rev(phy$edge[i, ]) } - if (!is.null(phy$node.label)) - ## It's important to not delete the label of the newroot - ## to keep the positions of the other nodes - phy$node.label[1] <- phy$node.label[newroot - nb.tip] - ## Not needed: phy$Nnode <- phy$Nnode - 1 - read.tree(text = write.tree(phy)) + phy }