some small changes

[ape.git] / R / drop.tip.R
diff --git a/R/drop.tip.R b/R/drop.tip.R

index ee92d02fc8180b12bec505fbb3d4421bd6a489c5..5b635f680f4459a579242dfc4fdf2c549d9b5f22 100644 (file)
--- a/R/drop.tip.R
+++ b/R/drop.tip.R
@@ -1,158 +1,229 @@
-## drop.tip.R (2007-12-21)
+## drop.tip.R (2011-11-21)
  
  ##   Remove Tips in a Phylogenetic Tree
  
-## Copyright 2003-2007 Emmanuel Paradis
+## Copyright 2003-2011 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
-drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE,
-                     root.edge = 0)
+extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE)
  {
-    if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"")
-    phy <- new2old.phylo(phy)
-    if (subtree) {
-        trim.internal <- TRUE
-        edge.bak <- phy$edge
+    Ntip <- length(phy$tip.label)
+    ROOT <- Ntip + 1
+    Nedge <- dim(phy$edge)[1]
+    wbl <- !is.null(phy$edge.length)
+    if (interactive) node <- identify(phy)$nodes else {
+        if (length(node) > 1) {
+            node <- node[1]
+            warning("only the first value of 'node' has been considered")
+        }
+        if (is.character(node)) {
+            if (is.null(phy$node.label))
+                stop("the tree has no node labels")
+            node <- which(phy$node.label %in% node) + Ntip
+        }
+        if (node <= Ntip)
+            stop("node number must be greater than the number of tips")
      }
-    tmp <- as.numeric(phy$edge)
-    nb.tip <- max(tmp)
-    nb.node <- -min(tmp)
-    nobr <- is.null(phy$edge.length)
-    if (is.numeric(tip)) tip <- phy$tip.label[tip]
-    ## find the tips to drop...:
-    del <- phy$tip.label %in% tip
-    ## ... and the corresponding terminal branches:
-    ind <- which(phy$edge[, 2] %in% as.character(which(del)))
-    ## drop them...:
-    phy$edge <- phy$edge[-ind, ]
-    ## ... and the lengths if applies:
-    if (!nobr) phy$edge.length <- phy$edge.length[-ind]
-    ## drop the tip labels:
-    phy$tip.label <- phy$tip.label[!del]
-    if (trim.internal) {
-        if (root.edge) {
-            ## find the MRCA of the remaining tips:
-            seq.nod <- list()
-            ## This is modified since some tips were deleted!!
-            for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) {
-                vec <- i
-                j <- i
-                while (j != "-1") {
-                    ind <- which(phy$edge[, 2] == j)
-                    j <- phy$edge[ind, 1]
-                    vec <- c(vec, j)
-                }
-                seq.nod[[i]] <- vec
-            }
-            sn <- lapply(seq.nod, rev)
-            i <- 1
-            x <- unlist(lapply(sn, function(x) x[i]))
-            while (length(unique(x)) == 1) {
-                x <- unlist(lapply(sn, function(x) x[i]))
-                i <-  i + 1
-            }
-            MRCA <- sn[[1]][i - 2]
-            newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge
-            for (i in 1:root.edge) {
-                ind <- which(phy$edge[, 2] == MRCA)
-                newrootedge <- newrootedge + phy$edge.length[ind]
-                MRCA <- phy$edge[ind, 1]
-                if (MRCA == "-1" && i < root.edge) {
-                    newrootedge <- newrootedge
-                    break
-                }
-            }
-            phy$root.edge <- newrootedge
-        } else {
-            if (!is.null(phy$root.edge)) phy$root.edge <- NULL
+    if (node == ROOT) return(phy)
+    phy <- reorder(phy) # insure it is in cladewise order
+    root.node <- which(phy$edge[, 2] == node)
+    start <- root.node + 1 # start of the clade looked for
+    anc <- phy$edge[root.node, 1] # the ancestor of 'node'
+    next.anc <- which(phy$edge[-(1:start), 1] <= anc) # find the next occurence of 'anc' or an 'older' node
+
+    keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge
+
+    if (root.edge) {
+        NewRootEdge <- phy$edge.length[root.node]
+        root.edge <- root.edge - 1
+        while (root.edge) {
+            if (anc == ROOT) break
+            i <- which(phy$edge[, 2] ==  anc)
+            NewRootEdge <- NewRootEdge + phy$edge.length[i]
+            root.edge <- root.edge - 1
+            anc <- phy$edge[i, 1]
          }
-        while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) {
-            temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
-            k <- temp %in% phy$edge[, 1]
-            ind <- phy$edge[, 2] %in% temp[!k]
-            phy$edge <- phy$edge[!ind, ]
-            if (!nobr) phy$edge.length <- phy$edge.length[!ind]
+        if (root.edge && !is.null(phy$root.edge))
+            NewRootEdge <- NewRootEdge + phy$root.edge
+        phy$root.edge <- NewRootEdge
+    }
+
+    phy$edge <- phy$edge[keep, ]
+    if (wbl) phy$edge.length <- phy$edge.length[keep]
+    TIPS <- phy$edge[, 2] <= Ntip
+    tip <- phy$edge[TIPS, 2]
+    ## Fix by Ludovic Mallet and Mahendra Mariadassou (2011-11-21):
+    name <- vector("character", length(tip))
+    name[order(tip)] <- phy$tip.label[tip]
+    phy$tip.label <- name
+    ## End of fix
+    ## keep the ordering so no need to reorder tip.label:
+    phy$edge[TIPS, 2] <- order(tip)
+    if (!is.null(phy$node.label))
+        phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
+    Ntip <- length(phy$tip.label)
+    phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
+    ## The block below renumbers the nodes so that they conform
+    ## to the "phylo" format -- same as in root()
+    newNb <- integer(Ntip + phy$Nnode)
+    newNb[node] <- Ntip + 1L
+    sndcol <- phy$edge[, 2] > Ntip
+    ## executed from right to left, so newNb is modified before phy$edge:
+    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+        (Ntip + 2):(Ntip + phy$Nnode)
+    phy$edge[, 1] <- newNb[phy$edge[, 1]]
+    phy
+}
+
+drop.tip <-
+    function(phy, tip, trim.internal = TRUE, subtree = FALSE,
+             root.edge = 0, rooted = is.rooted(phy), interactive = FALSE)
+{
+    if (!inherits(phy, "phylo"))
+        stop('object "phy" is not of class "phylo"')
+    if (!length(tip)) return(phy)
+
+    Ntip <- length(phy$tip.label)
+    ## find the tips to drop:
+    if (interactive) {
+        cat("Left-click close to the tips you want to drop; right-click when finished...\n")
+        xy <- locator()
+        nToDrop <- length(xy$x)
+        tip <- integer(nToDrop)
+        lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
+        for (i in 1:nToDrop) {
+            d <- sqrt((xy$x[i] - lastPP$xx)^2 + (xy$y[i] - lastPP$yy)^2)
+            tip[i] <- which.min(d)
          }
      } else {
-        temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
-        k <- temp %in% phy$edge[, 1]
-        ind <- phy$edge[, 2] %in% temp[!k]
-        phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind)))
-        if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) else {
-            new.tip.label <- phy$node.label[!k]
-            phy$node.label <- phy$node.label[k]
-        }
-        phy$tip.label <- c(phy$tip.label, new.tip.label)
+        if (is.character(tip))
+            tip <- which(phy$tip.label %in% tip)
+    }
+    if (any(tip > Ntip))
+        warning("some tip numbers were higher than the number of tips")
+
+    if (!rooted && subtree) {
+        phy <- root(phy, (1:Ntip)[-tip][1])
+        root.edge <- 0
      }
-    useless.nodes <- names(which(table(phy$edge[, 1]) == 1))
+
+    phy <- reorder(phy)
+    NEWROOT <- ROOT <- Ntip + 1
+    Nnode <- phy$Nnode
+    Nedge <- dim(phy$edge)[1]
      if (subtree) {
-        if (!nobr) mnbr <- mean(phy$edge.length)
-        if (length(useless.nodes) == 1) n <- length(tip) else {
-            seq.nod <- list()
-            wh <- numeric(0)
-            for (i in as.character(which(del))) { # it is not needed to loop through all tips!
-                vec <- i
-                j <- i
-                while (!(j %in% useless.nodes)) {
-                    ind <- which(edge.bak[, 2] == j)
-                    wh <- c(wh, ind)
-                    j <- edge.bak[ind, 1]
-                    vec <- c(vec, j)
-                }
-                seq.nod[[i]] <- vec
-            }
-            n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1])))
+        trim.internal <- TRUE
+        tr <- reorder(phy, "pruningwise")
+        N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode),
+                as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]),
+                as.integer(Nedge), double(Ntip + Nnode),
+                DUP = FALSE, PACKAGE = "ape")[[6]]
+    }
+    wbl <- !is.null(phy$edge.length)
+    edge1 <- phy$edge[, 1] # local copies
+    edge2 <- phy$edge[, 2] #
+    keep <- !logical(Nedge)
+
+    ## delete the terminal edges given by `tip':
+    keep[match(tip, edge2)] <- FALSE
+
+    if (trim.internal) {
+        ints <- edge2 > Ntip
+        ## delete the internal edges that do not have anymore
+        ## descendants (ie, they are in the 2nd col of `edge' but
+        ## not in the 1st one)
+        repeat {
+            sel <- !(edge2 %in% edge1[keep]) & ints & keep
+            if (!sum(sel)) break
+            keep[sel] <- FALSE
          }
-        new.lab <- paste("[", n, "_tips]", sep = "")
-        for (i in 1:length(useless.nodes)) {
-            wh <- which(phy$edge[, 1] == useless.nodes[i])
-            phy$tip.label <- c(phy$tip.label, new.lab[i])
-            if (wh == dim(phy$edge)[1]) {
-                phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i)))
-                if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr)
-            } else {
-                phy$edge <- rbind(phy$edge[1:wh, ],
-                                  c(useless.nodes[i], as.character(nb.tip + i)),
-                                  phy$edge[(wh + 1):dim(phy$edge)[1], ])
-                if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr,
-                                                phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)])
-            }
+        if (subtree) {
+            ## keep the subtending edge(s):
+            subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep]
+            keep[subt] <- TRUE
          }
-    } else {
-        for (i in useless.nodes) {
-            ind1 <- which(phy$edge[, 1] == i)
-            ind2 <- which(phy$edge[, 2] == i)
-            phy$edge[ind2, 2] <- phy$edge[ind1, 2]
-            phy$edge <- phy$edge[-ind1, ]
-            if (!nobr) {
-                phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1]
-                phy$edge.length <- phy$edge.length[-ind1]
+        if (root.edge && wbl) {
+            degree <- tabulate(edge1[keep])
+            if (degree[ROOT] == 1) {
+                j <- integer(0) # will store the indices of the edges below the new root
+                repeat {
+                    i <- which(edge1 == NEWROOT & keep)
+                    j <- c(i, j)
+                    NEWROOT <- edge2[i]
+                    degree <- tabulate(edge1[keep])
+                    if (degree[NEWROOT] > 1) break
+                }
+                keep[j] <- FALSE
+                if (length(j) > root.edge) j <- 1:root.edge
+                NewRootEdge <- sum(phy$edge.length[j])
+                if (length(j) < root.edge && !is.null(phy$root.edge))
+                    NewRootEdge <- NewRootEdge + phy$root.edge
+                phy$root.edge <- NewRootEdge
              }
          }
      }
-    tmp <- as.numeric(phy$edge)
-    if (!is.null(phy$node.label)) {
-        x <- unique(tmp)
-        x <- x[x < 0]
-        phy$node.label <- phy$node.label[-x]
+
+    if (!root.edge) phy$root.edge <- NULL
+
+    ## drop the edges
+    phy$edge <- phy$edge[keep, ]
+    if (wbl) phy$edge.length <- phy$edge.length[keep]
+
+    ## find the new terminal edges (works whatever 'subtree' and 'trim.internal'):
+    TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1])
+
+    ## get the old No. of the nodes and tips that become tips:
+    oldNo.ofNewTips <- phy$edge[TERMS, 2]
+
+    ## in case some tips are dropped but kept because of 'subtree = TRUE':
+    if (subtree) {
+        i <- which(tip %in% oldNo.ofNewTips)
+        if (length(i)) {
+            phy$tip.label[tip[i]] <- "[1_tip]"
+            tip <- tip[-i]
+        }
      }
-    n <- length(tmp)
-    nodes <- tmp < 0
-    ind.nodes <- (1:n)[nodes]
-    ind.tips <- (1:n)[!nodes]
-    new.nodes <- -as.numeric(factor(-tmp[nodes]))
-    new.tips <- as.numeric(factor(tmp[!nodes]))
-    tmp[ind.nodes] <- new.nodes
-    tmp[ind.tips] <- new.tips
-    dim(tmp) <- c(n / 2, 2)
-    mode(tmp) <- "character"
-    phy$edge <- tmp
-    phy <- old2new.phylo(phy)
-    if (!trim.internal || subtree) {
-        S <- write.tree(phy)
-        phy <- if (nobr) clado.build(S) else tree.build(S)
+
+    n <- length(oldNo.ofNewTips) # the new number of tips in the tree
+
+    ## the tips may not be sorted in increasing order in the
+    ## 2nd col of edge, so no need to reorder $tip.label
+    phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2])
+    phy$tip.label <- phy$tip.label[-tip]
+
+    ## make new tip labels if necessary:
+    if (subtree || !trim.internal) {
+        ## get the numbers of the nodes that become tips:
+        node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip]
+        new.tip.label <- if (subtree) {
+            paste("[", N[node2tip], "_tips]", sep = "")
+        } else {
+            if (is.null(phy$node.label)) rep("NA", length(node2tip))
+            else phy$node.label[node2tip - Ntip]
+        }
+        if (!is.null(phy$node.label))
+            phy$node.label <- phy$node.label[-(node2tip - Ntip)]
+        phy$tip.label <- c(phy$tip.label, new.tip.label)
      }
-    phy
+
+    ## update node.label if needed:
+    if (!is.null(phy$node.label))
+        phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
+
+    phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode
+
+    ## The block below renumbers the nodes so that they conform
+    ## to the "phylo" format -- same as in root()
+    newNb <- integer(n + phy$Nnode)
+    newNb[NEWROOT] <- n + 1L
+    sndcol <- phy$edge[, 2] > n
+    ## executed from right to left, so newNb is modified before phy$edge:
+    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+        (n + 2):(n + phy$Nnode)
+    phy$edge[, 1] <- newNb[phy$edge[, 1]]
+    storage.mode(phy$edge) <- "integer"
+    collapse.singles(phy)
  }
+