X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fdrop.tip.R;h=5b635f680f4459a579242dfc4fdf2c549d9b5f22;hb=eccd02caa284c933ccabf54c2b52d2c871cbd8e8;hp=d65fd3616966e5e6ca95b86afc7acf0db7f3d76a;hpb=cc53d4b238132388c49561f647b8839d5bd6ef1e;p=ape.git diff --git a/R/drop.tip.R b/R/drop.tip.R index d65fd36..5b635f6 100644 --- a/R/drop.tip.R +++ b/R/drop.tip.R @@ -1,160 +1,229 @@ -## drop.tip.R (2008-04-17) +## drop.tip.R (2011-11-21) ## Remove Tips in a Phylogenetic Tree -## Copyright 2003-2008 Emmanuel Paradis +## Copyright 2003-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE, - root.edge = 0) +extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE) { - if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") - phy <- new2old.phylo(phy) - if (subtree) { - trim.internal <- TRUE - edge.bak <- phy$edge + Ntip <- length(phy$tip.label) + ROOT <- Ntip + 1 + Nedge <- dim(phy$edge)[1] + wbl <- !is.null(phy$edge.length) + if (interactive) node <- identify(phy)$nodes else { + if (length(node) > 1) { + node <- node[1] + warning("only the first value of 'node' has been considered") + } + if (is.character(node)) { + if (is.null(phy$node.label)) + stop("the tree has no node labels") + node <- which(phy$node.label %in% node) + Ntip + } + if (node <= Ntip) + stop("node number must be greater than the number of tips") } - tmp <- as.numeric(phy$edge) - nb.tip <- max(tmp) - ## fix by Yan Wong: - nodes <- setdiff(tmp,1:nb.tip) #not sure if this also needs sorting into order - ## end - nobr <- is.null(phy$edge.length) - if (is.numeric(tip)) tip <- phy$tip.label[tip] - ## find the tips to drop...: - del <- phy$tip.label %in% tip - ## ... and the corresponding terminal branches: - ind <- which(phy$edge[, 2] %in% as.character(which(del))) - ## drop them...: - phy$edge <- phy$edge[-ind, ] - ## ... and the lengths if applies: - if (!nobr) phy$edge.length <- phy$edge.length[-ind] - ## drop the tip labels: - phy$tip.label <- phy$tip.label[!del] - if (trim.internal) { - if (root.edge) { - ## find the MRCA of the remaining tips: - seq.nod <- list() - ## This is modified since some tips were deleted!! - for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) { - vec <- i - j <- i - while (j != "-1") { - ind <- which(phy$edge[, 2] == j) - j <- phy$edge[ind, 1] - vec <- c(vec, j) - } - seq.nod[[i]] <- vec - } - sn <- lapply(seq.nod, rev) - i <- 1 - x <- unlist(lapply(sn, function(x) x[i])) - while (length(unique(x)) == 1) { - x <- unlist(lapply(sn, function(x) x[i])) - i <- i + 1 - } - MRCA <- sn[[1]][i - 2] - newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge - for (i in 1:root.edge) { - ind <- which(phy$edge[, 2] == MRCA) - newrootedge <- newrootedge + phy$edge.length[ind] - MRCA <- phy$edge[ind, 1] - if (MRCA == "-1" && i < root.edge) { - newrootedge <- newrootedge - break - } - } - phy$root.edge <- newrootedge - } else { - if (!is.null(phy$root.edge)) phy$root.edge <- NULL + if (node == ROOT) return(phy) + phy <- reorder(phy) # insure it is in cladewise order + root.node <- which(phy$edge[, 2] == node) + start <- root.node + 1 # start of the clade looked for + anc <- phy$edge[root.node, 1] # the ancestor of 'node' + next.anc <- which(phy$edge[-(1:start), 1] <= anc) # find the next occurence of 'anc' or an 'older' node + + keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge + + if (root.edge) { + NewRootEdge <- phy$edge.length[root.node] + root.edge <- root.edge - 1 + while (root.edge) { + if (anc == ROOT) break + i <- which(phy$edge[, 2] == anc) + NewRootEdge <- NewRootEdge + phy$edge.length[i] + root.edge <- root.edge - 1 + anc <- phy$edge[i, 1] } - while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) { - temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] - k <- temp %in% phy$edge[, 1] - ind <- phy$edge[, 2] %in% temp[!k] - phy$edge <- phy$edge[!ind, ] - if (!nobr) phy$edge.length <- phy$edge.length[!ind] + if (root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge + } + + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + TIPS <- phy$edge[, 2] <= Ntip + tip <- phy$edge[TIPS, 2] + ## Fix by Ludovic Mallet and Mahendra Mariadassou (2011-11-21): + name <- vector("character", length(tip)) + name[order(tip)] <- phy$tip.label[tip] + phy$tip.label <- name + ## End of fix + ## keep the ordering so no need to reorder tip.label: + phy$edge[TIPS, 2] <- order(tip) + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] + Ntip <- length(phy$tip.label) + phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(Ntip + phy$Nnode) + newNb[node] <- Ntip + 1L + sndcol <- phy$edge[, 2] > Ntip + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (Ntip + 2):(Ntip + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + phy +} + +drop.tip <- + function(phy, tip, trim.internal = TRUE, subtree = FALSE, + root.edge = 0, rooted = is.rooted(phy), interactive = FALSE) +{ + if (!inherits(phy, "phylo")) + stop('object "phy" is not of class "phylo"') + if (!length(tip)) return(phy) + + Ntip <- length(phy$tip.label) + ## find the tips to drop: + if (interactive) { + cat("Left-click close to the tips you want to drop; right-click when finished...\n") + xy <- locator() + nToDrop <- length(xy$x) + tip <- integer(nToDrop) + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + for (i in 1:nToDrop) { + d <- sqrt((xy$x[i] - lastPP$xx)^2 + (xy$y[i] - lastPP$yy)^2) + tip[i] <- which.min(d) } } else { - ## fix by Yan Wong: - k <- nodes %in% phy$edge[, 1] #nodes that have descendants - ind <- phy$edge[, 2] %in% nodes[!k] - phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind))) - if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) - else new.tip.label <- phy$node.label[!k] - phy$tip.label <- c(phy$tip.label, new.tip.label) - #N.B. phy$node.label can be left: it is altered later - ## end + if (is.character(tip)) + tip <- which(phy$tip.label %in% tip) + } + if (any(tip > Ntip)) + warning("some tip numbers were higher than the number of tips") + + if (!rooted && subtree) { + phy <- root(phy, (1:Ntip)[-tip][1]) + root.edge <- 0 } - useless.nodes <- names(which(table(phy$edge[, 1]) == 1)) + + phy <- reorder(phy) + NEWROOT <- ROOT <- Ntip + 1 + Nnode <- phy$Nnode + Nedge <- dim(phy$edge)[1] if (subtree) { - if (!nobr) mnbr <- mean(phy$edge.length) - if (length(useless.nodes) == 1) n <- length(tip) else { - seq.nod <- list() - wh <- numeric(0) - for (i in as.character(which(del))) { # it is not needed to loop through all tips! - vec <- i - j <- i - while (!(j %in% useless.nodes)) { - ind <- which(edge.bak[, 2] == j) - wh <- c(wh, ind) - j <- edge.bak[ind, 1] - vec <- c(vec, j) - } - seq.nod[[i]] <- vec - } - n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1]))) + trim.internal <- TRUE + tr <- reorder(phy, "pruningwise") + N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode), + as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), + as.integer(Nedge), double(Ntip + Nnode), + DUP = FALSE, PACKAGE = "ape")[[6]] + } + wbl <- !is.null(phy$edge.length) + edge1 <- phy$edge[, 1] # local copies + edge2 <- phy$edge[, 2] # + keep <- !logical(Nedge) + + ## delete the terminal edges given by `tip': + keep[match(tip, edge2)] <- FALSE + + if (trim.internal) { + ints <- edge2 > Ntip + ## delete the internal edges that do not have anymore + ## descendants (ie, they are in the 2nd col of `edge' but + ## not in the 1st one) + repeat { + sel <- !(edge2 %in% edge1[keep]) & ints & keep + if (!sum(sel)) break + keep[sel] <- FALSE } - new.lab <- paste("[", n, "_tips]", sep = "") - for (i in 1:length(useless.nodes)) { - wh <- which(phy$edge[, 1] == useless.nodes[i]) - phy$tip.label <- c(phy$tip.label, new.lab[i]) - if (wh == dim(phy$edge)[1]) { - phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i))) - if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr) - } else { - phy$edge <- rbind(phy$edge[1:wh, ], - c(useless.nodes[i], as.character(nb.tip + i)), - phy$edge[(wh + 1):dim(phy$edge)[1], ]) - if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr, - phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)]) - } + if (subtree) { + ## keep the subtending edge(s): + subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep] + keep[subt] <- TRUE } - } else { - for (i in useless.nodes) { - ind1 <- which(phy$edge[, 1] == i) - ind2 <- which(phy$edge[, 2] == i) - phy$edge[ind2, 2] <- phy$edge[ind1, 2] - phy$edge <- phy$edge[-ind1, ] - if (!nobr) { - phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1] - phy$edge.length <- phy$edge.length[-ind1] + if (root.edge && wbl) { + degree <- tabulate(edge1[keep]) + if (degree[ROOT] == 1) { + j <- integer(0) # will store the indices of the edges below the new root + repeat { + i <- which(edge1 == NEWROOT & keep) + j <- c(i, j) + NEWROOT <- edge2[i] + degree <- tabulate(edge1[keep]) + if (degree[NEWROOT] > 1) break + } + keep[j] <- FALSE + if (length(j) > root.edge) j <- 1:root.edge + NewRootEdge <- sum(phy$edge.length[j]) + if (length(j) < root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge } } } - tmp <- as.numeric(phy$edge) - if (!is.null(phy$node.label)) { - x <- unique(tmp) - x <- x[x < 0] - phy$node.label <- phy$node.label[-x] + + if (!root.edge) phy$root.edge <- NULL + + ## drop the edges + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + + ## find the new terminal edges (works whatever 'subtree' and 'trim.internal'): + TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]) + + ## get the old No. of the nodes and tips that become tips: + oldNo.ofNewTips <- phy$edge[TERMS, 2] + + ## in case some tips are dropped but kept because of 'subtree = TRUE': + if (subtree) { + i <- which(tip %in% oldNo.ofNewTips) + if (length(i)) { + phy$tip.label[tip[i]] <- "[1_tip]" + tip <- tip[-i] + } } - n <- length(tmp) - nodes <- tmp < 0 - ind.nodes <- (1:n)[nodes] - ind.tips <- (1:n)[!nodes] - new.nodes <- -as.numeric(factor(-tmp[nodes])) - new.tips <- as.numeric(factor(tmp[!nodes])) - tmp[ind.nodes] <- new.nodes - tmp[ind.tips] <- new.tips - dim(tmp) <- c(n / 2, 2) - mode(tmp) <- "character" - phy$edge <- tmp - phy <- old2new.phylo(phy) - if (!trim.internal || subtree) { - S <- write.tree(phy) - phy <- if (nobr) clado.build(S) else tree.build(S) + + n <- length(oldNo.ofNewTips) # the new number of tips in the tree + + ## the tips may not be sorted in increasing order in the + ## 2nd col of edge, so no need to reorder $tip.label + phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]) + phy$tip.label <- phy$tip.label[-tip] + + ## make new tip labels if necessary: + if (subtree || !trim.internal) { + ## get the numbers of the nodes that become tips: + node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip] + new.tip.label <- if (subtree) { + paste("[", N[node2tip], "_tips]", sep = "") + } else { + if (is.null(phy$node.label)) rep("NA", length(node2tip)) + else phy$node.label[node2tip - Ntip] + } + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[-(node2tip - Ntip)] + phy$tip.label <- c(phy$tip.label, new.tip.label) } - phy + + ## update node.label if needed: + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] + + phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode + + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(n + phy$Nnode) + newNb[NEWROOT] <- n + 1L + sndcol <- phy$edge[, 2] > n + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (n + 2):(n + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + storage.mode(phy$edge) <- "integer" + collapse.singles(phy) } +