R/root.R

   1 ## root.R (2010-02-11)
   2
   3 ##   Root of Phylogenetic Trees
   4
   5 ## Copyright 2004-2010 Emmanuel Paradis
   6
   7 ## This file is part of the R-package `ape'.
   8 ## See the file ../COPYING for licensing issues.
   9
  10 is.rooted <- function(phy)
  11 {
  12     if (!inherits(phy, "phylo"))
  13         stop('object "phy" is not of class "phylo"')
  14     if (!is.null(phy$root.edge)) TRUE
  15     else
  16         if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
  17             FALSE else TRUE
  18 }
  19
  20 unroot <- function(phy)
  21 {
  22     if (!inherits(phy, "phylo"))
  23         stop('object "phy" is not of class "phylo"')
  24     if (dim(phy$edge)[1] < 3)
  25         stop("cannot unroot a tree with less than three edges.")
  26     ## delete FIRST the root.edge (in case this is sufficient to
  27     ## unroot the tree, i.e. there is a multichotomy at the root)
  28     if (!is.null(phy$root.edge)) phy$root.edge <- NULL
  29     if (!is.rooted(phy)) return(phy)
  30     ## We remove one of the edges coming from the root, and
  31     ## eventually adding the branch length to the other one
  32     ## also coming from the root.
  33     ## In all cases, the node deleted is the 2nd one (numbered
  34     ## nb.tip+2 in `edge'), so we simply need to renumber the
  35     ## nodes by adding 1, except the root (this remains the
  36     ## origin of the tree).
  37     nb.tip <- length(phy$tip.label)
  38     ROOT <- nb.tip + 1L
  39     EDGEROOT <- which(phy$edge[, 1] == ROOT)
  40     ## j: the target where to stick the edge
  41     ## i: the edge to delete
  42     if (phy$edge[EDGEROOT[1], 2] == ROOT + 1L) {
  43         j <- EDGEROOT[2]
  44         i <- EDGEROOT[1]
  45     } else {
  46         j <- EDGEROOT[1]
  47         i <- EDGEROOT[2]
  48     }
  49     ## This should work whether the tree is in pruningwise or
  50     ## cladewise order.
  51     phy$edge <- phy$edge[-i, ]
  52     nodes <- phy$edge > ROOT # renumber all nodes except the root
  53     phy$edge[nodes] <- phy$edge[nodes] - 1L
  54     if (!is.null(phy$edge.length)) {
  55         phy$edge.length[j] <- phy$edge.length[j] + phy$edge.length[i]
  56         phy$edge.length <- phy$edge.length[-i]
  57     }
  58     phy$Nnode <- phy$Nnode - 1L
  59     if (!is.null(phy$node.label))
  60       phy$node.label <- phy$node.label[-2]
  61     phy
  62 }
  63
  64 root <- function(phy, outgroup, node = NULL,
  65                  resolve.root = FALSE, interactive = FALSE)
  66 {
  67     if (!inherits(phy, "phylo"))
  68         stop('object "phy" is not of class "phylo"')
  69     phy <- reorder(phy)
  70     n <- length(phy$tip.label)
  71     ROOT <- n + 1L
  72     if (interactive) {
  73         node <- identify(phy)$nodes
  74         cat("You have set resolve.root =", resolve.root, "\n")
  75     }
  76     if (!is.null(node)) {
  77         if (node <= n)
  78             stop("incorrect node#: should be greater than the number of taxa")
  79         outgroup <- NULL
  80         newroot <- node
  81     } else {
  82         if (is.numeric(outgroup)) {
  83             if (any(outgroup > n))
  84                 stop("incorrect taxa#: should not be greater than the number of taxa")
  85             outgroup <- sort(outgroup) # used below
  86         }
  87         if (is.character(outgroup))
  88             outgroup <- which(phy$tip.label %in% outgroup)
  89         if (length(outgroup) == n) return(phy)
  90
  91         ## First check that the outgroup is monophyletic--
  92         ## unless there's only one tip specified of course
  93         if (length(outgroup) > 1) {
  94             seq.nod <- .Call("seq_root2tip", phy$edge, n,
  95                              phy$Nnode, PACKAGE = "ape")
  96             sn <- seq.nod[outgroup]
  97             ## We go from the root to the tips: the sequence of nodes
  98             ## is identical until the MRCA:
  99             newroot <- ROOT
 100             i <- 2 # we start at the 2nd position since the root
 101                    # of the tree is a common ancestor to all tips
 102             repeat {
 103                 x <- unique(unlist(lapply(sn, "[", i)))
 104                 if (length(x) != 1) break
 105                 newroot <- x
 106                 i <- i + 1
 107             }
 108             ## Check that all descendants of this node
 109             ## are included in the outgroup.
 110             ## (below is slightly faster than calling "bipartition")
 111             desc <- which(unlist(lapply(seq.nod,
 112                                         function(x) any(x %in% newroot))))
 113             msg <- "the specified outgroup is not monophyletic"
 114             ingroup <- (1:n)[-outgroup]
 115             ## 'outgroup' and 'desc' are already sorted:
 116             if (newroot != ROOT) {
 117                 if (!identical(outgroup, desc) && !identical(ingroup, desc))
 118                     stop(msg)
 119             } else { # otherwise check monophyly of the ingroup
 120                 if (!is.monophyletic(phy, ingroup)) stop(msg)
 121             }
 122         } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
 123     }
 124     N <- Nedge(phy)
 125     oldNnode <- phy$Nnode
 126     if (newroot == ROOT) {
 127         if (resolve.root) {
 128             snw <- which(phy$edge[, 1] == newroot)
 129             if (length(snw) > 2) {
 130                 a <- snw[1]:(snw[2] - 1)
 131                 b <- snw[2]:N
 132                 newnod <- oldNnode + n + 1
 133                 phy$edge[snw[-1], 1] <- newnod
 134                 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
 135                                   phy$edge[b, ])
 136                 if (!is.null(phy$edge.length))
 137                 phy$edge.length <-
 138                     c(phy$edge.length[a], 0, phy$edge.length[b])
 139                 phy$Nnode <- phy$Nnode + 1L
 140                 ## node renumbering (see comments below)
 141                 newNb <- integer(n + oldNnode)
 142                 newNb[newroot] <- n + 1L
 143                 sndcol <- phy$edge[, 2] > n
 144                 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
 145                     (n + 2):(n + phy$Nnode)
 146                 phy$edge[, 1] <- newNb[phy$edge[, 1]]
 147             }
 148         }
 149         return(phy)
 150     }
 151
 152     phy$root.edge <- NULL # just in case...
 153     Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
 154     ## if only 2 edges connect to the root, we have to fuse them:
 155     fuseRoot <- Nclade == 2
 156
 157     start <- which(phy$edge[, 1] == ROOT)
 158     end <- c(start[-1] - 1, N)
 159     o <- integer(N)
 160     INV <- logical(N)
 161
 162     w <- which(phy$edge[, 2] == newroot)
 163     nod <- phy$edge[w, 1]
 164     i <- w
 165     NEXT <- 1L
 166
 167     ## The loop below starts from the new root and goes up in the edge
 168     ## matrix reversing the edges that need to be as well as well
 169     ## inverting their order. The other edges must not be changed, so
 170     ## their indices are stored in `stack'.
 171     ## We then bind the other edges in a straightforward way.
 172
 173     if (nod != ROOT) {
 174         ## it is important that the 3 next lines
 175         ## are inside this "if" statement
 176         o[NEXT] <- w
 177         NEXT <- NEXT + 1L
 178         INV[w] <- TRUE
 179         i <- w - 1L
 180         stack <- 0L
 181         repeat {
 182             if (phy$edge[i, 2] == nod) {
 183                 if (stack) {
 184                     o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
 185                     NEXT <- NEXT + stack
 186                     stack <- 0L
 187                 }
 188                 if (phy$edge[i, 1] == ROOT) break
 189                 o[NEXT] <- i
 190                 NEXT <- NEXT + 1L
 191                 INV[i] <- TRUE
 192                 nod <- phy$edge[i, 1]
 193             } else stack <- stack + 1L
 194             i <- i - 1L
 195         }
 196     }
 197
 198     ## we keep the edge leading to the old root if needed:
 199     if (!fuseRoot) {
 200         o[NEXT] <- i
 201         INV[i] <- TRUE
 202         NEXT <- NEXT + 1L
 203     }
 204
 205     endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
 206     if (is.na(endOfOutgroup)) endOfOutgroup <- N
 207     endOfClade <- end[end >= endOfOutgroup][1]
 208
 209     ## bind the other clades...
 210     for (j in 1:Nclade) {
 211         if (end[j] == endOfClade) next
 212         ## do we have to fuse the two basal edges?
 213         if (fuseRoot) {
 214             phy$edge[start[j], 1] <- phy$edge[i, 2]
 215             if (!is.null(phy$edge.length))
 216                 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
 217                     phy$edge.length[i]
 218         } #else {
 219           #  o[NEXT] <- i#start[j]
 220           #  NEXT <- NEXT + 1L
 221         #}
 222         s <- start[j]:end[j]
 223         ne <- length(s)
 224         o[NEXT:(NEXT + ne - 1L)] <- s
 225         NEXT <- NEXT + ne
 226     }
 227
 228     ## possibly bind the edges below the outgroup till the end of the clade
 229     if (all(endOfOutgroup != end)) {
 230         j <- (endOfOutgroup + 1L):endOfClade
 231         ## we must take care that the branch inversions done above
 232         ## may have changed the hierarchy of clades here, so we
 233         ## travel from the bottom of this series of edges
 234         stack <- 0L
 235         inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
 236         for (k in rev(j)) {
 237             if (any(phy$edge[k, 1] == inverted)) {
 238                 o[NEXT] <- k
 239                 NEXT <- NEXT + 1L
 240                 if (stack){
 241                     o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
 242                     NEXT <- NEXT + stack
 243                     stack <- 0L
 244                 }
 245             } else stack <- stack + 1L
 246         }
 247     }
 248
 249     ## ... and the outgroup
 250     s <- (w + 1L):endOfOutgroup
 251     ne <- length(s)
 252     o[NEXT:(NEXT + ne - 1L)] <- s
 253
 254     if (fuseRoot) {
 255         phy$Nnode <- oldNnode - 1
 256         N <- N - 1L
 257     }
 258     phy$edge[INV, ] <- phy$edge[INV, 2:1]
 259     phy$edge <- phy$edge[o, ]
 260     if (!is.null(phy$edge.length))
 261         phy$edge.length <- phy$edge.length[o]
 262
 263     if (resolve.root) {
 264         newnod <- oldNnode + n + 1
 265         if (length(outgroup) == 1L) {
 266             wh <- which(phy$edge[, 2] == outgroup)
 267             phy$edge[1] <- newnod
 268             phy$edge <-
 269                 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
 270             snw <- which(phy$edge[, 1] == newroot)
 271             phy$edge[snw[length(snw) - 1], 1] <- newnod
 272             if (!is.null(phy$edge.length)) {
 273                 phy$edge.length <-
 274                     c(0, phy$edge.length[-wh], phy$edge.length[wh])
 275             }
 276         } else {
 277             wh <- which(phy$edge[, 1] == newroot)
 278             phy$edge[wh[-1], 1] <- newnod
 279             s1 <- 1:(wh[2] - 1)
 280             s2 <- wh[2]:N
 281             phy$edge <-
 282                 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
 283             if (!is.null(phy$edge.length)) {
 284                 tmp <- phy$edge.length[1]
 285                 phy$edge.length[1] <- 0
 286                 phy$edge.length <-
 287                     c(phy$edge.length[s1], tmp, phy$edge.length[s2])
 288             }
 289         }
 290         ## N <- N + 1L ... not needed
 291         phy$Nnode <- phy$Nnode + 1
 292     }
 293
 294     ## The block below renumbers the nodes so that they conform
 295     ## to the "phylo" format
 296     newNb <- integer(n + oldNnode)
 297     newNb[newroot] <- n + 1L
 298     sndcol <- phy$edge[, 2] > n
 299     ## executed from right to left, so newNb is modified before phy$edge:
 300     phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
 301         (n + 2):(n + phy$Nnode)
 302     phy$edge[, 1] <- newNb[phy$edge[, 1]]
 303
 304     if (!is.null(phy$node.label)) {
 305         #browser()
 306         newNb <- newNb[-(1:n)]
 307         if (fuseRoot) {
 308             newNb <- newNb[-1]
 309             phy$node.label <- phy$node.label[-1]
 310         }
 311         phy$node.label <- phy$node.label[order(newNb)]
 312         if (resolve.root) {
 313             phy$node.label[is.na(phy$node.label)] <- phy$node.label[1]
 314             phy$node.label[1] <- NA
 315             ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])
 316             ##phy$node.label <- c("NA", phy$node.label)
 317         }
 318     }
 319     phy
 320 }