-## drop.tip.R (2007-12-21)
+## drop.tip.R (2011-11-21)
## Remove Tips in a Phylogenetic Tree
-## Copyright 2003-2007 Emmanuel Paradis
+## Copyright 2003-2011 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE,
- root.edge = 0)
+extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE)
{
- if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"")
- phy <- new2old.phylo(phy)
- if (subtree) {
- trim.internal <- TRUE
- edge.bak <- phy$edge
+ Ntip <- length(phy$tip.label)
+ ROOT <- Ntip + 1
+ Nedge <- dim(phy$edge)[1]
+ wbl <- !is.null(phy$edge.length)
+ if (interactive) node <- identify(phy)$nodes else {
+ if (length(node) > 1) {
+ node <- node[1]
+ warning("only the first value of 'node' has been considered")
+ }
+ if (is.character(node)) {
+ if (is.null(phy$node.label))
+ stop("the tree has no node labels")
+ node <- which(phy$node.label %in% node) + Ntip
+ }
+ if (node <= Ntip)
+ stop("node number must be greater than the number of tips")
}
- tmp <- as.numeric(phy$edge)
- nb.tip <- max(tmp)
- nb.node <- -min(tmp)
- nobr <- is.null(phy$edge.length)
- if (is.numeric(tip)) tip <- phy$tip.label[tip]
- ## find the tips to drop...:
- del <- phy$tip.label %in% tip
- ## ... and the corresponding terminal branches:
- ind <- which(phy$edge[, 2] %in% as.character(which(del)))
- ## drop them...:
- phy$edge <- phy$edge[-ind, ]
- ## ... and the lengths if applies:
- if (!nobr) phy$edge.length <- phy$edge.length[-ind]
- ## drop the tip labels:
- phy$tip.label <- phy$tip.label[!del]
- if (trim.internal) {
- if (root.edge) {
- ## find the MRCA of the remaining tips:
- seq.nod <- list()
- ## This is modified since some tips were deleted!!
- for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) {
- vec <- i
- j <- i
- while (j != "-1") {
- ind <- which(phy$edge[, 2] == j)
- j <- phy$edge[ind, 1]
- vec <- c(vec, j)
- }
- seq.nod[[i]] <- vec
- }
- sn <- lapply(seq.nod, rev)
- i <- 1
- x <- unlist(lapply(sn, function(x) x[i]))
- while (length(unique(x)) == 1) {
- x <- unlist(lapply(sn, function(x) x[i]))
- i <- i + 1
- }
- MRCA <- sn[[1]][i - 2]
- newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge
- for (i in 1:root.edge) {
- ind <- which(phy$edge[, 2] == MRCA)
- newrootedge <- newrootedge + phy$edge.length[ind]
- MRCA <- phy$edge[ind, 1]
- if (MRCA == "-1" && i < root.edge) {
- newrootedge <- newrootedge
- break
- }
- }
- phy$root.edge <- newrootedge
- } else {
- if (!is.null(phy$root.edge)) phy$root.edge <- NULL
+ if (node == ROOT) return(phy)
+ phy <- reorder(phy) # insure it is in cladewise order
+ root.node <- which(phy$edge[, 2] == node)
+ start <- root.node + 1 # start of the clade looked for
+ anc <- phy$edge[root.node, 1] # the ancestor of 'node'
+ next.anc <- which(phy$edge[-(1:start), 1] <= anc) # find the next occurence of 'anc' or an 'older' node
+
+ keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge
+
+ if (root.edge) {
+ NewRootEdge <- phy$edge.length[root.node]
+ root.edge <- root.edge - 1
+ while (root.edge) {
+ if (anc == ROOT) break
+ i <- which(phy$edge[, 2] == anc)
+ NewRootEdge <- NewRootEdge + phy$edge.length[i]
+ root.edge <- root.edge - 1
+ anc <- phy$edge[i, 1]
}
- while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) {
- temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
- k <- temp %in% phy$edge[, 1]
- ind <- phy$edge[, 2] %in% temp[!k]
- phy$edge <- phy$edge[!ind, ]
- if (!nobr) phy$edge.length <- phy$edge.length[!ind]
+ if (root.edge && !is.null(phy$root.edge))
+ NewRootEdge <- NewRootEdge + phy$root.edge
+ phy$root.edge <- NewRootEdge
+ }
+
+ phy$edge <- phy$edge[keep, ]
+ if (wbl) phy$edge.length <- phy$edge.length[keep]
+ TIPS <- phy$edge[, 2] <= Ntip
+ tip <- phy$edge[TIPS, 2]
+ ## Fix by Ludovic Mallet and Mahendra Mariadassou (2011-11-21):
+ name <- vector("character", length(tip))
+ name[order(tip)] <- phy$tip.label[tip]
+ phy$tip.label <- name
+ ## End of fix
+ ## keep the ordering so no need to reorder tip.label:
+ phy$edge[TIPS, 2] <- order(tip)
+ if (!is.null(phy$node.label))
+ phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
+ Ntip <- length(phy$tip.label)
+ phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
+ ## The block below renumbers the nodes so that they conform
+ ## to the "phylo" format -- same as in root()
+ newNb <- integer(Ntip + phy$Nnode)
+ newNb[node] <- Ntip + 1L
+ sndcol <- phy$edge[, 2] > Ntip
+ ## executed from right to left, so newNb is modified before phy$edge:
+ phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+ (Ntip + 2):(Ntip + phy$Nnode)
+ phy$edge[, 1] <- newNb[phy$edge[, 1]]
+ phy
+}
+
+drop.tip <-
+ function(phy, tip, trim.internal = TRUE, subtree = FALSE,
+ root.edge = 0, rooted = is.rooted(phy), interactive = FALSE)
+{
+ if (!inherits(phy, "phylo"))
+ stop('object "phy" is not of class "phylo"')
+ if (!length(tip)) return(phy)
+
+ Ntip <- length(phy$tip.label)
+ ## find the tips to drop:
+ if (interactive) {
+ cat("Left-click close to the tips you want to drop; right-click when finished...\n")
+ xy <- locator()
+ nToDrop <- length(xy$x)
+ tip <- integer(nToDrop)
+ lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
+ for (i in 1:nToDrop) {
+ d <- sqrt((xy$x[i] - lastPP$xx)^2 + (xy$y[i] - lastPP$yy)^2)
+ tip[i] <- which.min(d)
}
} else {
- temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
- k <- temp %in% phy$edge[, 1]
- ind <- phy$edge[, 2] %in% temp[!k]
- phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind)))
- if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) else {
- new.tip.label <- phy$node.label[!k]
- phy$node.label <- phy$node.label[k]
- }
- phy$tip.label <- c(phy$tip.label, new.tip.label)
+ if (is.character(tip))
+ tip <- which(phy$tip.label %in% tip)
+ }
+ if (any(tip > Ntip))
+ warning("some tip numbers were higher than the number of tips")
+
+ if (!rooted && subtree) {
+ phy <- root(phy, (1:Ntip)[-tip][1])
+ root.edge <- 0
}
- useless.nodes <- names(which(table(phy$edge[, 1]) == 1))
+
+ phy <- reorder(phy)
+ NEWROOT <- ROOT <- Ntip + 1
+ Nnode <- phy$Nnode
+ Nedge <- dim(phy$edge)[1]
if (subtree) {
- if (!nobr) mnbr <- mean(phy$edge.length)
- if (length(useless.nodes) == 1) n <- length(tip) else {
- seq.nod <- list()
- wh <- numeric(0)
- for (i in as.character(which(del))) { # it is not needed to loop through all tips!
- vec <- i
- j <- i
- while (!(j %in% useless.nodes)) {
- ind <- which(edge.bak[, 2] == j)
- wh <- c(wh, ind)
- j <- edge.bak[ind, 1]
- vec <- c(vec, j)
- }
- seq.nod[[i]] <- vec
- }
- n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1])))
+ trim.internal <- TRUE
+ tr <- reorder(phy, "pruningwise")
+ N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode),
+ as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]),
+ as.integer(Nedge), double(Ntip + Nnode),
+ DUP = FALSE, PACKAGE = "ape")[[6]]
+ }
+ wbl <- !is.null(phy$edge.length)
+ edge1 <- phy$edge[, 1] # local copies
+ edge2 <- phy$edge[, 2] #
+ keep <- !logical(Nedge)
+
+ ## delete the terminal edges given by `tip':
+ keep[match(tip, edge2)] <- FALSE
+
+ if (trim.internal) {
+ ints <- edge2 > Ntip
+ ## delete the internal edges that do not have anymore
+ ## descendants (ie, they are in the 2nd col of `edge' but
+ ## not in the 1st one)
+ repeat {
+ sel <- !(edge2 %in% edge1[keep]) & ints & keep
+ if (!sum(sel)) break
+ keep[sel] <- FALSE
}
- new.lab <- paste("[", n, "_tips]", sep = "")
- for (i in 1:length(useless.nodes)) {
- wh <- which(phy$edge[, 1] == useless.nodes[i])
- phy$tip.label <- c(phy$tip.label, new.lab[i])
- if (wh == dim(phy$edge)[1]) {
- phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i)))
- if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr)
- } else {
- phy$edge <- rbind(phy$edge[1:wh, ],
- c(useless.nodes[i], as.character(nb.tip + i)),
- phy$edge[(wh + 1):dim(phy$edge)[1], ])
- if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr,
- phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)])
- }
+ if (subtree) {
+ ## keep the subtending edge(s):
+ subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep]
+ keep[subt] <- TRUE
}
- } else {
- for (i in useless.nodes) {
- ind1 <- which(phy$edge[, 1] == i)
- ind2 <- which(phy$edge[, 2] == i)
- phy$edge[ind2, 2] <- phy$edge[ind1, 2]
- phy$edge <- phy$edge[-ind1, ]
- if (!nobr) {
- phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1]
- phy$edge.length <- phy$edge.length[-ind1]
+ if (root.edge && wbl) {
+ degree <- tabulate(edge1[keep])
+ if (degree[ROOT] == 1) {
+ j <- integer(0) # will store the indices of the edges below the new root
+ repeat {
+ i <- which(edge1 == NEWROOT & keep)
+ j <- c(i, j)
+ NEWROOT <- edge2[i]
+ degree <- tabulate(edge1[keep])
+ if (degree[NEWROOT] > 1) break
+ }
+ keep[j] <- FALSE
+ if (length(j) > root.edge) j <- 1:root.edge
+ NewRootEdge <- sum(phy$edge.length[j])
+ if (length(j) < root.edge && !is.null(phy$root.edge))
+ NewRootEdge <- NewRootEdge + phy$root.edge
+ phy$root.edge <- NewRootEdge
}
}
}
- tmp <- as.numeric(phy$edge)
- if (!is.null(phy$node.label)) {
- x <- unique(tmp)
- x <- x[x < 0]
- phy$node.label <- phy$node.label[-x]
+
+ if (!root.edge) phy$root.edge <- NULL
+
+ ## drop the edges
+ phy$edge <- phy$edge[keep, ]
+ if (wbl) phy$edge.length <- phy$edge.length[keep]
+
+ ## find the new terminal edges (works whatever 'subtree' and 'trim.internal'):
+ TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1])
+
+ ## get the old No. of the nodes and tips that become tips:
+ oldNo.ofNewTips <- phy$edge[TERMS, 2]
+
+ ## in case some tips are dropped but kept because of 'subtree = TRUE':
+ if (subtree) {
+ i <- which(tip %in% oldNo.ofNewTips)
+ if (length(i)) {
+ phy$tip.label[tip[i]] <- "[1_tip]"
+ tip <- tip[-i]
+ }
}
- n <- length(tmp)
- nodes <- tmp < 0
- ind.nodes <- (1:n)[nodes]
- ind.tips <- (1:n)[!nodes]
- new.nodes <- -as.numeric(factor(-tmp[nodes]))
- new.tips <- as.numeric(factor(tmp[!nodes]))
- tmp[ind.nodes] <- new.nodes
- tmp[ind.tips] <- new.tips
- dim(tmp) <- c(n / 2, 2)
- mode(tmp) <- "character"
- phy$edge <- tmp
- phy <- old2new.phylo(phy)
- if (!trim.internal || subtree) {
- S <- write.tree(phy)
- phy <- if (nobr) clado.build(S) else tree.build(S)
+
+ n <- length(oldNo.ofNewTips) # the new number of tips in the tree
+
+ ## the tips may not be sorted in increasing order in the
+ ## 2nd col of edge, so no need to reorder $tip.label
+ phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2])
+ phy$tip.label <- phy$tip.label[-tip]
+
+ ## make new tip labels if necessary:
+ if (subtree || !trim.internal) {
+ ## get the numbers of the nodes that become tips:
+ node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip]
+ new.tip.label <- if (subtree) {
+ paste("[", N[node2tip], "_tips]", sep = "")
+ } else {
+ if (is.null(phy$node.label)) rep("NA", length(node2tip))
+ else phy$node.label[node2tip - Ntip]
+ }
+ if (!is.null(phy$node.label))
+ phy$node.label <- phy$node.label[-(node2tip - Ntip)]
+ phy$tip.label <- c(phy$tip.label, new.tip.label)
}
- phy
+
+ ## update node.label if needed:
+ if (!is.null(phy$node.label))
+ phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
+
+ phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode
+
+ ## The block below renumbers the nodes so that they conform
+ ## to the "phylo" format -- same as in root()
+ newNb <- integer(n + phy$Nnode)
+ newNb[NEWROOT] <- n + 1L
+ sndcol <- phy$edge[, 2] > n
+ ## executed from right to left, so newNb is modified before phy$edge:
+ phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+ (n + 2):(n + phy$Nnode)
+ phy$edge[, 1] <- newNb[phy$edge[, 1]]
+ storage.mode(phy$edge) <- "integer"
+ collapse.singles(phy)
}
+