R/root.R

   1 ## root.R (2011-08-05)
   2
   3 ##   Root of Phylogenetic Trees
   4
   5 ## Copyright 2004-2011 Emmanuel Paradis
   6
   7 ## This file is part of the R-package `ape'.
   8 ## See the file ../COPYING for licensing issues.
   9
  10 is.rooted <- function(phy)
  11 {
  12     if (!inherits(phy, "phylo"))
  13         stop('object "phy" is not of class "phylo"')
  14     if (!is.null(phy$root.edge)) TRUE
  15     else
  16         if (tabulate(phy$edge[, 1])[length(phy$tip.label) + 1] > 2)
  17             FALSE else TRUE
  18 }
  19
  20 unroot <- function(phy)
  21 {
  22     if (!inherits(phy, "phylo"))
  23         stop('object "phy" is not of class "phylo"')
  24     N <- dim(phy$edge)[1]
  25     if (N < 3)
  26         stop("cannot unroot a tree with less than three edges.")
  27
  28     ## delete FIRST the root.edge (in case this is sufficient to
  29     ## unroot the tree, i.e. there is a multichotomy at the root)
  30     if (!is.null(phy$root.edge)) phy$root.edge <- NULL
  31     if (!is.rooted(phy)) return(phy)
  32
  33     n <- length(phy$tip.label)
  34     ROOT <- n + 1L
  35
  36 ### EDGEROOT[1]: the edge to delete
  37 ### EDGEROOT[2]: the target where to stick the edge to delete
  38
  39 ### If the tree is in pruningwise order, then the last two edges
  40 ### are those connected to the root; the node situated in
  41 ### phy$edge[N - 2L, 1L] will be the new root...
  42
  43     ophy <- attr(phy, "order")
  44     if (!is.null(ophy) && ophy == "pruningwise") {
  45         NEWROOT <- phy$edge[N - 2L, 1L]
  46         EDGEROOT <- c(N, N - 1L)
  47     } else {
  48
  49 ### ... otherwise, we remove one of the edges coming from
  50 ### the root, and eventually adding the branch length to
  51 ### the other one also coming from the root.
  52 ### In all cases, the node deleted is the 2nd one (numbered
  53 ### nb.tip+2 in 'edge'), so we simply need to renumber the
  54 ### nodes by adding 1, except the root (this remains the
  55 ### origin of the tree).
  56
  57         EDGEROOT <- which(phy$edge[, 1L] == ROOT)
  58         NEWROOT <- ROOT + 1L
  59     }
  60
  61     ## make sure EDGEROOT is ordered as described above:
  62     if (phy$edge[EDGEROOT[1L], 2L] != NEWROOT)
  63         EDGEROOT <- EDGEROOT[2:1]
  64
  65     phy$edge <- phy$edge[-EDGEROOT[1L], ]
  66
  67     s <- phy$edge == NEWROOT # renumber the new root
  68     phy$edge[s] <- ROOT
  69
  70     s <- phy$edge > NEWROOT # renumber all nodes greater than the new root
  71     phy$edge[s] <- phy$edge[s] - 1L
  72
  73     if (!is.null(phy$edge.length)) {
  74         phy$edge.length[EDGEROOT[2L]] <-
  75             phy$edge.length[EDGEROOT[2L]] + phy$edge.length[EDGEROOT[1L]]
  76         phy$edge.length <- phy$edge.length[-EDGEROOT[1L]]
  77     }
  78
  79     phy$Nnode <- phy$Nnode - 1L
  80
  81     if (!is.null(phy$node.label)) {
  82         if (NEWROOT == n + 2L)
  83             phy$node.label <- phy$node.label[-1]
  84         else {
  85             lbs <- phy$node.label
  86             tmp <- lbs[NEWROOT - n]
  87             lbs <- lbs[-c(1, NEWROOT)]
  88             phy$node.label <- c(tmp, lbs)
  89         }
  90     }
  91     phy
  92 }
  93
  94 root <- function(phy, outgroup, node = NULL,
  95                  resolve.root = FALSE, interactive = FALSE)
  96 {
  97     if (!inherits(phy, "phylo"))
  98         stop('object "phy" is not of class "phylo"')
  99     phy <- reorder(phy)
 100     n <- length(phy$tip.label)
 101     ROOT <- n + 1L
 102     if (interactive) {
 103         node <- identify(phy)$nodes
 104         cat("You have set resolve.root =", resolve.root, "\n")
 105     }
 106     if (!is.null(node)) {
 107         if (node <= n)
 108             stop("incorrect node#: should be greater than the number of taxa")
 109         outgroup <- NULL
 110         newroot <- node
 111     } else {
 112         if (is.numeric(outgroup)) {
 113             if (any(outgroup > n))
 114                 stop("incorrect taxa#: should not be greater than the number of taxa")
 115             outgroup <- sort(outgroup) # used below
 116         }
 117         if (is.character(outgroup))
 118             outgroup <- which(phy$tip.label %in% outgroup)
 119         if (length(outgroup) == n) return(phy)
 120
 121         ## First check that the outgroup is monophyletic--
 122         ## unless there's only one tip specified of course
 123         if (length(outgroup) > 1) {
 124             pp <- prop.part(phy)
 125             ingroup <- (1:n)[-outgroup]
 126             newroot <- 0L
 127             for (i in 2:phy$Nnode) {
 128                 if (identical(pp[[i]], ingroup)) {
 129                     newroot <- i + n
 130                     break
 131                 }
 132                 if (identical(pp[[i]], outgroup)) {
 133                     newroot <- phy$edge[which(phy$edge[, 2] == i + n), 1]
 134                     break
 135                 }
 136             }
 137             if (!newroot)
 138                 stop("the specified outgroup is not monophyletic")
 139         } else newroot <- phy$edge[which(phy$edge[, 2] == outgroup), 1]
 140     }
 141     N <- Nedge(phy)
 142     oldNnode <- phy$Nnode
 143     if (newroot == ROOT) {
 144         if (resolve.root) {
 145             snw <- which(phy$edge[, 1] == newroot)
 146             if (length(snw) > 2) {
 147                 a <- snw[1]:(snw[2] - 1)
 148                 b <- snw[2]:N
 149                 newnod <- oldNnode + n + 1
 150                 phy$edge[snw[-1], 1] <- newnod
 151                 phy$edge <- rbind(phy$edge[a, ], c(ROOT, newnod),
 152                                   phy$edge[b, ])
 153                 if (!is.null(phy$edge.length))
 154                     phy$edge.length <-
 155                         c(phy$edge.length[a], 0, phy$edge.length[b])
 156                 phy$Nnode <- phy$Nnode + 1L
 157                 ## node renumbering (see comments below)
 158                 newNb <- integer(n + oldNnode)
 159                 newNb[newroot] <- n + 1L
 160                 sndcol <- phy$edge[, 2] > n
 161                 phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
 162                     (n + 2):(n + phy$Nnode)
 163                 phy$edge[, 1] <- newNb[phy$edge[, 1]]
 164             }
 165         }
 166         return(phy)
 167     }
 168
 169     phy$root.edge <- NULL # just in case...
 170     Nclade <- tabulate(phy$edge[, 1])[ROOT] # degree of the root node
 171     ## if only 2 edges connect to the root, we have to fuse them:
 172     fuseRoot <- Nclade == 2
 173
 174     start <- which(phy$edge[, 1] == ROOT)
 175     end <- c(start[-1] - 1, N)
 176     o <- integer(N)
 177     INV <- logical(N)
 178
 179     w <- which(phy$edge[, 2] == newroot)
 180     nod <- phy$edge[w, 1]
 181     i <- w
 182     NEXT <- 1L
 183
 184     ## The loop below starts from the new root and goes up in the edge
 185     ## matrix reversing the edges that need to be as well as well
 186     ## inverting their order. The other edges must not be changed, so
 187     ## their indices are stored in `stack'.
 188     ## We then bind the other edges in a straightforward way.
 189
 190     if (nod != ROOT) {
 191         ## it is important that the 3 next lines
 192         ## are inside this "if" statement
 193         o[NEXT] <- w
 194         NEXT <- NEXT + 1L
 195         INV[w] <- TRUE
 196         i <- w - 1L
 197         stack <- 0L
 198         repeat {
 199             if (phy$edge[i, 2] == nod) {
 200                 if (stack) {
 201                     o[NEXT:(NEXT + stack - 1L)] <- (i + 1L):(i + stack)
 202                     NEXT <- NEXT + stack
 203                     stack <- 0L
 204                 }
 205                 if (phy$edge[i, 1] == ROOT) break
 206                 o[NEXT] <- i
 207                 NEXT <- NEXT + 1L
 208                 INV[i] <- TRUE
 209                 nod <- phy$edge[i, 1]
 210             } else stack <- stack + 1L
 211             i <- i - 1L
 212         }
 213     }
 214
 215     ## we keep the edge leading to the old root if needed:
 216     if (!fuseRoot) {
 217         o[NEXT] <- i
 218         INV[i] <- TRUE
 219         NEXT <- NEXT + 1L
 220     }
 221
 222     endOfOutgroup <- which(phy$edge[(w+1):N, 1] < newroot)[1] + w - 1
 223     if (is.na(endOfOutgroup)) endOfOutgroup <- N
 224     endOfClade <- end[end >= endOfOutgroup][1]
 225
 226     ## bind the other clades...
 227     for (j in 1:Nclade) {
 228         if (end[j] == endOfClade) next
 229         ## do we have to fuse the two basal edges?
 230         if (fuseRoot) {
 231             phy$edge[start[j], 1] <- phy$edge[i, 2]
 232             if (!is.null(phy$edge.length))
 233                 phy$edge.length[start[j]] <- phy$edge.length[start[j]] +
 234                     phy$edge.length[i]
 235         } #else {
 236           #  o[NEXT] <- i#start[j]
 237           #  NEXT <- NEXT + 1L
 238         #}
 239         s <- start[j]:end[j]
 240         ne <- length(s)
 241         o[NEXT:(NEXT + ne - 1L)] <- s
 242         NEXT <- NEXT + ne
 243     }
 244
 245     ## possibly bind the edges below the outgroup till the end of the clade
 246     if (all(endOfOutgroup != end)) {
 247         j <- (endOfOutgroup + 1L):endOfClade
 248         ## we must take care that the branch inversions done above
 249         ## may have changed the hierarchy of clades here, so we
 250         ## travel from the bottom of this series of edges
 251         stack <- 0L
 252         inverted <- phy$edge[INV, 1] # <- fails if ', 2]' is used
 253         for (k in rev(j)) {
 254             if (any(phy$edge[k, 1] == inverted)) {
 255                 o[NEXT] <- k
 256                 NEXT <- NEXT + 1L
 257                 if (stack){
 258                     o[NEXT:(NEXT + stack - 1L)] <- (k + 1L):(k + stack)
 259                     NEXT <- NEXT + stack
 260                     stack <- 0L
 261                 }
 262             } else stack <- stack + 1L
 263         }
 264     }
 265
 266     ## ... and the outgroup
 267     s <- (w + 1L):endOfOutgroup
 268     ne <- length(s)
 269     o[NEXT:(NEXT + ne - 1L)] <- s
 270
 271     if (fuseRoot) {
 272         phy$Nnode <- oldNnode - 1L
 273         N <- N - 1L
 274     }
 275     phy$edge[INV, ] <- phy$edge[INV, 2:1]
 276     phy$edge <- phy$edge[o, ]
 277     if (!is.null(phy$edge.length))
 278         phy$edge.length <- phy$edge.length[o]
 279
 280     if (resolve.root) {
 281         newnod <- oldNnode + n + 1
 282         if (length(outgroup) == 1L) {
 283             wh <- which(phy$edge[, 2] == outgroup)
 284             phy$edge[1] <- newnod
 285             phy$edge <-
 286                 rbind(c(newroot, newnod), phy$edge[-wh, ], phy$edge[wh, ])
 287             snw <- which(phy$edge[, 1] == newroot)
 288             phy$edge[snw[length(snw) - 1], 1] <- newnod
 289             if (!is.null(phy$edge.length)) {
 290                 phy$edge.length <-
 291                     c(0, phy$edge.length[-wh], phy$edge.length[wh])
 292             }
 293         } else {
 294             wh <- which(phy$edge[, 1] == newroot)
 295             phy$edge[wh[-1], 1] <- newnod
 296             s1 <- 1:(wh[2] - 1)
 297             s2 <- wh[2]:N
 298             phy$edge <-
 299                 rbind(phy$edge[s1, ], c(newroot, newnod), phy$edge[s2, ])
 300             if (!is.null(phy$edge.length)) {
 301                 tmp <- phy$edge.length[1]
 302                 phy$edge.length[1] <- 0
 303                 phy$edge.length <-
 304                     c(phy$edge.length[s1], tmp, phy$edge.length[s2])
 305             }
 306         }
 307         ## N <- N + 1L ... not needed
 308         phy$Nnode <- phy$Nnode + 1L
 309     }
 310
 311     ## The block below renumbers the nodes so that they conform
 312     ## to the "phylo" format
 313     newNb <- integer(n + oldNnode)
 314     newNb[newroot] <- n + 1L
 315     sndcol <- phy$edge[, 2] > n
 316     ## executed from right to left, so newNb is modified before phy$edge:
 317     phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- n + 2:phy$Nnode
 318     phy$edge[, 1] <- newNb[phy$edge[, 1]]
 319
 320     if (!is.null(phy$node.label)) {
 321         #browser()
 322         newNb <- newNb[-(1:n)]
 323         if (fuseRoot) {
 324             newNb <- newNb[-1]
 325             phy$node.label <- phy$node.label[-1]
 326         }
 327         phy$node.label <- phy$node.label[order(newNb)]
 328         if (resolve.root) {
 329             phy$node.label[is.na(phy$node.label)] <- phy$node.label[1]
 330             phy$node.label[1] <- NA
 331             ##phy$node.label <- c(phy$node.label[1], NA, phy$node.label[-1])
 332             ##phy$node.label <- c("NA", phy$node.label)
 333         }
 334     }
 335     phy
 336 }