R/root.R

   1 ## root.R (2009-05-10)
   2
   3 ##   Root of Phylogenetic Trees
   4
   5 ## Copyright 2004-2009 Emmanuel Paradis
   6
   7 ## This file is part of the R-package `ape'.
   8 ## See the file ../COPYING for licensing issues.
   9
  10 is.rooted <- function(phy)
  11 {
  12     if (!inherits(phy, "phylo"))
  13       stop('object "phy" is not of class "phylo"')
  14     if (!is.null(phy$root.edge)) return(TRUE)
  15     else
  16       if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
  17         return(FALSE)
  18       else return(TRUE)
  19 }
  20
  21 unroot <- function(phy)
  22 {
  23     if (!inherits(phy, "phylo"))
  24       stop('object "phy" is not of class "phylo"')
  25     if (dim(phy$edge)[1] < 3)
  26       stop("cannot unroot a tree with two edges.")
  27     ## delete FIRST the root.edge (in case this is sufficient to
  28     ## unroot the tree, i.e. there is a multichotomy at the root)
  29     if (!is.null(phy$root.edge)) phy$root.edge <- NULL
  30     if (!is.rooted(phy)) return(phy)
  31     ## We remove one of the edges coming from the root, and
  32     ## eventually adding the branch length to the other one
  33     ## also coming from the root.
  34     ## In all cases, the node deleted is the 2nd one (numbered
  35     ## nb.tip+2 in `edge'), so we simply need to renumber the
  36     ## nodes by adding 1, except the root (this remains the
  37     ## origin of the tree).
  38     nb.tip <- length(phy$tip.label)
  39     ROOT <- nb.tip + 1
  40     EDGEROOT <- which(phy$edge[, 1] == ROOT)
  41     ## j: the target where to stick the edge
  42     ## i: the edge to delete
  43     if (phy$edge[EDGEROOT[1], 2] == ROOT + 1) {
  44         j <- EDGEROOT[2]
  45         i <- EDGEROOT[1]
  46     } else {
  47         j <- EDGEROOT[1]
  48         i <- EDGEROOT[2]
  49     }
  50     ## This should work whether the tree is in pruningwise or
  51     ## cladewise order.
  52     phy$edge <- phy$edge[-i, ]
  53     nodes <- phy$edge > ROOT # renumber all nodes except the root
  54     phy$edge[nodes] <- phy$edge[nodes] - 1
  55     if (!is.null(phy$edge.length)) {
  56         phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i]
  57         phy$edge.length <- phy$edge.length[-i]
  58     }
  59     phy$Nnode <- phy$Nnode - 1
  60     if (!is.null(phy$node.label))
  61       phy$node.label <- phy$node.label[-2]
  62     phy
  63 }
  64
  65 root <- function(phy, outgroup, node = NULL, resolve.root = FALSE)
  66 {
  67     if (!inherits(phy, "phylo"))
  68       stop('object "phy" is not of class "phylo"')
  69     ord <- attr(phy, "order")
  70     if (!is.null(ord) && ord == "pruningwise") phy <- reorder(phy)
  71     n <- length(phy$tip.label)
  72     ROOT <- n + 1
  73     if (!is.null(node)) {
  74         if (node <= n)
  75             stop("incorrect node#: should be greater than the number of taxa")
  76         outgroup <- NULL
  77         newroot <- node
  78     } else {
  79         if (is.numeric(outgroup)) {
  80             if (any(outgroup > n))
  81                 stop("incorrect taxa#: should not be greater than the number of taxa")
  82             outgroup <- sort(outgroup) # used below
  83         }
  84         if (is.character(outgroup))
  85             outgroup <- which(phy$tip.label %in% outgroup)
  86         if (length(outgroup) == n) return(phy)
  87
  88         ## First check that the outgroup is monophyletic--
  89         ## unless there's only one tip specified of course
  90         if (length(outgroup) > 1) {
  91             msg <- "the specified outgroup is not monophyletic"
  92             seq.nod <- .Call("seq_root2tip", phy$edge, n,
  93                              phy$Nnode, PACKAGE = "ape")
  94             sn <- seq.nod[outgroup]
  95             ## We go from the root to the tips: the sequence of nodes
  96             ## is identical until the MRCA:
  97             newroot <- ROOT
  98             i <- 2 # we start at the 2nd position since the root
  99                    # of the tree is a common ancestor to all tips
 100             repeat {
 101                 x <- unique(unlist(lapply(sn, "[", i)))
 102                 if (length(x) != 1) break
 103                 newroot <- x
 104                 i <- i + 1
 105             }
 106             ## Check that all descendants of this node
 107             ## are included in the outgroup.
 108             ## (below is slightly faster than calling "bipartition")
 109             desc <- which(unlist(lapply(seq.nod,
 110                                         function(x) any(x %in% newroot))))
 111             if (length(outgroup) != length(desc)) stop(msg)
 112             ## both vectors below are already sorted:
 113             if (!all(outgroup == desc)) stop(msg)
 114         } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
 115     }
 116     N <- Nedge(phy)
 117     oldNnode <- phy$Nnode
 118     if (newroot == ROOT) {
 119         if (resolve.root) {
 120             snw <- which(phy$edge[, 1] == newroot)
 121             if (length(snw) > 2) {
 122                 a <- snw[1]:(snw[2] - 1)
 123                 b <- snw[2]:N
 124                 newnod <- oldNnode + n + 1
 125                 phy$edge[snw[-1], 1] <- newnod
 126                 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
 127                                   phy$edge[b, ])
 128                 if (!is.null(phy$edge.length))
 129                 phy$edge.length <-
 130                     c(phy$edge.length[a], 0, phy$edge.length[b])
 131                 phy$Nnode <- phy$Nnode + 1
 132                 ## node renumbering (see comments below)
 133                 newNb <- integer(n + oldNnode)
 134                 newNb[newroot] <- n + 1L
 135                 sndcol <- phy$edge[, 2] > n
 136                 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
 137                     (n + 2):(n + phy$Nnode)
 138                 phy$edge[, 1] <- newNb[phy$edge[, 1]]
 139             }
 140         }
 141         return(phy)
 142     }
 143
 144     phy$root.edge <- NULL # just in case...
 145     Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
 146     ## if only 2 edges connect to the root, we have to fuse them:
 147     fuseRoot <- Nclade == 2
 148
 149     start <- which(phy$edge[, 1] == ROOT)
 150     end <- c(start[-1] - 1, N)
 151     o <- integer(N)
 152     INV <- logical(N)
 153
 154     w <- which(phy$edge[, 2] == newroot)
 155     nod <- phy$edge[w, 1]
 156     i <- w
 157     NEXT <- 1L
 158
 159     ## The loop below starts from the new root and goes up in the edge
 160     ## matrix reversing the edges that need to be as well as well
 161     ## inverting their order. The other edges must not be changed, so
 162     ## their indices are stored in `stack'.
 163     ## We then bind the other edges in a straightforward way.
 164
 165     if (nod != ROOT) {
 166         ## it is important that the 3 next lines
 167         ## are inside this "if" statement
 168         o[NEXT] <- w
 169         NEXT <- NEXT + 1L
 170         INV[w] <- TRUE
 171         i <- w - 1L
 172         stack <- 0L
 173         repeat {
 174             if (phy$edge[i, 2] == nod) {
 175                 if (stack) {
 176                     o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
 177                     NEXT <- NEXT + stack
 178                     stack <- 0L
 179                 }
 180                 if (phy$edge[i, 1] == ROOT) break
 181                 o[NEXT] <- i
 182                 NEXT <- NEXT + 1L
 183                 INV[i] <- TRUE
 184                 nod <- phy$edge[i, 1]
 185             } else stack <- stack + 1L
 186             i <- i - 1L
 187         }
 188     }
 189
 190     ## we keep the edge leading to the old root if needed:
 191     if (!fuseRoot) {
 192         o[NEXT] <- i
 193         INV[i] <- TRUE
 194         NEXT <- NEXT + 1L
 195     }
 196
 197     endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
 198     if (is.na(endOfOutgroup)) endOfOutgroup <- N
 199     endOfClade <- end[end >= endOfOutgroup][1]
 200
 201     ## bind the other clades...
 202     for (j in 1:Nclade) {
 203         if (end[j] == endOfClade) next
 204         ## do we have to fuse the two basal edges?
 205         if (fuseRoot) {
 206             phy$edge[start[j], 1] <- phy$edge[i, 2]
 207             if (!is.null(phy$edge.length))
 208                 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
 209                     phy$edge.length[i]
 210         } #else {
 211           #  o[NEXT] <- i#start[j]
 212           #  NEXT <- NEXT + 1L
 213         #}
 214         s <- start[j]:end[j]
 215         ne <- length(s)
 216         o[NEXT:(NEXT + ne - 1L)] <- s
 217         NEXT <- NEXT + ne
 218     }
 219
 220     ## possibly bind the edges below the outgroup till the end of the clade
 221     if (all(endOfOutgroup != end)) {
 222         j <- (endOfOutgroup + 1L):endOfClade
 223         ## we must take care that the branch inversions done above
 224         ## may have changed the hierarchy of clades here, so we
 225         ## travel from the bottom of this series of edges
 226         stack <- 0L
 227         inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
 228         for (k in rev(j)) {
 229             if (any(phy$edge[k, 1] == inverted)) {
 230                 o[NEXT] <- k
 231                 NEXT <- NEXT + 1L
 232                 if (stack){
 233                     o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
 234                     NEXT <- NEXT + stack
 235                     stack <- 0L
 236                 }
 237             } else stack <- stack + 1L
 238         }
 239     }
 240
 241     ## ... and the outgroup
 242     s <- (w + 1L):endOfOutgroup
 243     ne <- length(s)
 244     o[NEXT:(NEXT + ne - 1L)] <- s
 245
 246     if (fuseRoot) {
 247         phy$Nnode <- oldNnode - 1
 248         N <- N - 1L
 249     }
 250     phy$edge[INV, ] <- phy$edge[INV, 2:1]
 251     phy$edge <- phy$edge[o, ]
 252     if (!is.null(phy$edge.length))
 253         phy$edge.length <- phy$edge.length[o]
 254
 255     if (resolve.root) {
 256         newnod <- oldNnode + n + 1
 257         if (length(outgroup) == 1L) {
 258             wh <- which(phy$edge[, 2] == outgroup)
 259             phy$edge[1] <- newnod
 260             phy$edge <-
 261                 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
 262             snw <- which(phy$edge[, 1] == newroot)
 263             phy$edge[snw[length(snw) - 1], 1] <- newnod
 264             if (!is.null(phy$edge.length)) {
 265                 phy$edge.length <-
 266                     c(0, phy$edge.length[-wh], phy$edge.length[wh])
 267             }
 268         } else {
 269             wh <- which(phy$edge[, 1] == newroot)
 270             phy$edge[wh[-1], 1] <- newnod
 271             s1 <- 1:(wh[2] - 1)
 272             s2 <- wh[2]:N
 273             phy$edge <-
 274                 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
 275             if (!is.null(phy$edge.length)) {
 276                 tmp <- phy$edge.length[1]
 277                 phy$edge.length[1] <- 0
 278                 phy$edge.length <-
 279                     c(phy$edge.length[s1], tmp, phy$edge.length[s2])
 280             }
 281         }
 282         ## N <- N + 1L ... not needed
 283         phy$Nnode <- phy$Nnode + 1
 284     }
 285
 286     ## The block below renumbers the nodes so that they conform
 287     ## to the "phylo" format
 288     newNb <- integer(n + oldNnode)
 289     newNb[newroot] <- n + 1L
 290     sndcol <- phy$edge[, 2] > n
 291     ## executed from right to left, so newNb is modified before phy$edge:
 292     phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
 293         (n + 2):(n + phy$Nnode)
 294     phy$edge[, 1] <- newNb[phy$edge[, 1]]
 295
 296     if (!is.null(phy$node.label)) {
 297         newNb <- newNb[-(1:n)]
 298         if (fuseRoot) {
 299             newNb <- newNb[-1]
 300             phy$node.label <- phy$node.label[-1]
 301         }
 302         phy$node.label <- phy$node.label[order(newNb)]
 303         if (resolve.root)
 304              phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])
 305     }
 306     phy
 307 }