X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fdrop.tip.R;h=d819c757ce1c845d5165a555b9063668f5460962;hb=f3426364b40c7c0e6aadf6ea2690716425abdfc9;hp=d65fd3616966e5e6ca95b86afc7acf0db7f3d76a;hpb=3f528bd2c47301b64ed03bd28039bbf22a7510ad;p=ape.git diff --git a/R/drop.tip.R b/R/drop.tip.R index d65fd36..d819c75 100644 --- a/R/drop.tip.R +++ b/R/drop.tip.R @@ -1,160 +1,181 @@ -## drop.tip.R (2008-04-17) +## drop.tip.R (2009-01-07) ## Remove Tips in a Phylogenetic Tree -## Copyright 2003-2008 Emmanuel Paradis +## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE, - root.edge = 0) +extract.clade <- function(phy, node, root.edge = 0) { - if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") - phy <- new2old.phylo(phy) + Ntip <- length(phy$tip.label) + ROOT <- Ntip + 1 + Nedge <- dim(phy$edge)[1] + wbl <- !is.null(phy$edge.length) + if (length(node) > 1) { + node <- node[1] + warning("only the first value of 'node' has been considered") + } + if (is.character(node)) { + if (is.null(phy$node.label)) + stop("the tree has no node labels") + node <- which(phy$node.label %in% node) + Ntip + } + if (node <= Ntip) stop("node number must be greater than the number of tips") + if (node == ROOT) return(phy) + phy <- reorder(phy) # insure it is in cladewise order + root.node <- which(phy$edge[, 2] == node) + start <- root.node + 1 # start of the clade looked for + anc <- phy$edge[root.node, 1] # the ancestor of 'node' + next.anc <- which(phy$edge[-(1:start), 1] == anc) # find the next occurence of 'anc' + + keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge + + if (root.edge) { + NewRootEdge <- phy$edge.length[root.node] + root.edge <- root.edge - 1 + while (root.edge) { + if (anc == ROOT) break + i <- which(phy$edge[, 2] == anc) + NewRootEdge <- NewRootEdge + phy$edge.length[i] + root.edge <- root.edge - 1 + anc <- phy$edge[i, 1] + } + if (root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge + } + + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + TIPS <- phy$edge[, 2] <= Ntip + tip <- phy$edge[TIPS, 2] + phy$tip.label <- phy$tip.label[tip] + ## keep the ordering so no need to reorder tip.label: + phy$edge[TIPS, 2] <- order(tip) + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] + Ntip <- length(phy$tip.label) + phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(Ntip + phy$Nnode) + newNb[node] <- Ntip + 1L + sndcol <- phy$edge[, 2] > Ntip + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (Ntip + 2):(Ntip + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + phy +} + +drop.tip <- + function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0) +{ + if (class(phy) != "phylo") + stop('object "phy" is not of class "phylo"') + Ntip <- length(phy$tip.label) + NEWROOT <- ROOT <- Ntip + 1 + Nnode <- phy$Nnode + Nedge <- dim(phy$edge)[1] if (subtree) { trim.internal <- TRUE - edge.bak <- phy$edge + tr <- reorder(phy, "pruningwise") + N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode), + as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), + as.integer(Nedge), double(Ntip + Nnode), + DUP = FALSE, PACKAGE = "ape")[[6]] } - tmp <- as.numeric(phy$edge) - nb.tip <- max(tmp) - ## fix by Yan Wong: - nodes <- setdiff(tmp,1:nb.tip) #not sure if this also needs sorting into order - ## end - nobr <- is.null(phy$edge.length) - if (is.numeric(tip)) tip <- phy$tip.label[tip] - ## find the tips to drop...: - del <- phy$tip.label %in% tip - ## ... and the corresponding terminal branches: - ind <- which(phy$edge[, 2] %in% as.character(which(del))) - ## drop them...: - phy$edge <- phy$edge[-ind, ] - ## ... and the lengths if applies: - if (!nobr) phy$edge.length <- phy$edge.length[-ind] - ## drop the tip labels: - phy$tip.label <- phy$tip.label[!del] + wbl <- !is.null(phy$edge.length) + edge1 <- phy$edge[, 1] # local copies + edge2 <- phy$edge[, 2] # + keep <- !logical(Nedge) + ## find the tips to drop: + if (is.character(tip)) + tip <- which(phy$tip.label %in% tip) + trms <- edge2 <= Ntip + ## delete the terminal edges given by `tip': + keep[match(tip, edge2)] <- FALSE + if (trim.internal) { - if (root.edge) { - ## find the MRCA of the remaining tips: - seq.nod <- list() - ## This is modified since some tips were deleted!! - for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) { - vec <- i - j <- i - while (j != "-1") { - ind <- which(phy$edge[, 2] == j) - j <- phy$edge[ind, 1] - vec <- c(vec, j) - } - seq.nod[[i]] <- vec - } - sn <- lapply(seq.nod, rev) - i <- 1 - x <- unlist(lapply(sn, function(x) x[i])) - while (length(unique(x)) == 1) { - x <- unlist(lapply(sn, function(x) x[i])) - i <- i + 1 - } - MRCA <- sn[[1]][i - 2] - newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge - for (i in 1:root.edge) { - ind <- which(phy$edge[, 2] == MRCA) - newrootedge <- newrootedge + phy$edge.length[ind] - MRCA <- phy$edge[ind, 1] - if (MRCA == "-1" && i < root.edge) { - newrootedge <- newrootedge - break - } - } - phy$root.edge <- newrootedge - } else { - if (!is.null(phy$root.edge)) phy$root.edge <- NULL + ## delete the internal edges that do not have descendants + ## anymore (ie, they are in the 2nd column of `edge' but + ## not in the 1st one) + repeat { + sel <- !(edge2 %in% edge1[keep]) & !trms & keep + if (!sum(sel)) break + keep[sel] <- FALSE } - while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) { - temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] - k <- temp %in% phy$edge[, 1] - ind <- phy$edge[, 2] %in% temp[!k] - phy$edge <- phy$edge[!ind, ] - if (!nobr) phy$edge.length <- phy$edge.length[!ind] + if (subtree) { + ## keep the subtending edge(s): + subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep] + ## 'if (... ' needed below? + if (any(subt)) keep[which(subt)] <- TRUE } - } else { - ## fix by Yan Wong: - k <- nodes %in% phy$edge[, 1] #nodes that have descendants - ind <- phy$edge[, 2] %in% nodes[!k] - phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind))) - if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) - else new.tip.label <- phy$node.label[!k] - phy$tip.label <- c(phy$tip.label, new.tip.label) - #N.B. phy$node.label can be left: it is altered later - ## end - } - useless.nodes <- names(which(table(phy$edge[, 1]) == 1)) - if (subtree) { - if (!nobr) mnbr <- mean(phy$edge.length) - if (length(useless.nodes) == 1) n <- length(tip) else { - seq.nod <- list() - wh <- numeric(0) - for (i in as.character(which(del))) { # it is not needed to loop through all tips! - vec <- i - j <- i - while (!(j %in% useless.nodes)) { - ind <- which(edge.bak[, 2] == j) - wh <- c(wh, ind) - j <- edge.bak[ind, 1] - vec <- c(vec, j) + if (root.edge && wbl) { + degree <- tabulate(edge1[keep]) + if (degree[ROOT] == 1) { + j <- integer(0) # will store the indices of the edges below the new root + repeat { + i <- which(edge1 == NEWROOT & keep) + j <- c(i, j) + NEWROOT <- edge2[i] + degree <- tabulate(edge1[keep]) + if (degree[NEWROOT] > 1) break } - seq.nod[[i]] <- vec - } - n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1]))) - } - new.lab <- paste("[", n, "_tips]", sep = "") - for (i in 1:length(useless.nodes)) { - wh <- which(phy$edge[, 1] == useless.nodes[i]) - phy$tip.label <- c(phy$tip.label, new.lab[i]) - if (wh == dim(phy$edge)[1]) { - phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i))) - if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr) - } else { - phy$edge <- rbind(phy$edge[1:wh, ], - c(useless.nodes[i], as.character(nb.tip + i)), - phy$edge[(wh + 1):dim(phy$edge)[1], ]) - if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr, - phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)]) - } - } - } else { - for (i in useless.nodes) { - ind1 <- which(phy$edge[, 1] == i) - ind2 <- which(phy$edge[, 2] == i) - phy$edge[ind2, 2] <- phy$edge[ind1, 2] - phy$edge <- phy$edge[-ind1, ] - if (!nobr) { - phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1] - phy$edge.length <- phy$edge.length[-ind1] + keep[j] <- FALSE + if (length(j) > root.edge) j <- 1:root.edge + NewRootEdge <- sum(phy$edge.length[j]) + if (length(j) < root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge } } } - tmp <- as.numeric(phy$edge) - if (!is.null(phy$node.label)) { - x <- unique(tmp) - x <- x[x < 0] - phy$node.label <- phy$node.label[-x] - } - n <- length(tmp) - nodes <- tmp < 0 - ind.nodes <- (1:n)[nodes] - ind.tips <- (1:n)[!nodes] - new.nodes <- -as.numeric(factor(-tmp[nodes])) - new.tips <- as.numeric(factor(tmp[!nodes])) - tmp[ind.nodes] <- new.nodes - tmp[ind.tips] <- new.tips - dim(tmp) <- c(n / 2, 2) - mode(tmp) <- "character" - phy$edge <- tmp - phy <- old2new.phylo(phy) - if (!trim.internal || subtree) { - S <- write.tree(phy) - phy <- if (nobr) clado.build(S) else tree.build(S) + + if (!root.edge) phy$root.edge <- NULL + + ## upate the tree; 1) drop the edges and tip labels + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + phy$tip.label <- phy$tip.label[-tip] + ## 2) renumber the remaining tips now + TIPS <- phy$edge[, 2] <= Ntip + ## keep the ordering so no need to reorder tip.label: + phy$edge[TIPS, 2] <- order(phy$edge[TIPS, 2]) + Ntip <- length(phy$tip.label) # update Ntip + + ## make new tip labels if necessary + if (subtree || !trim.internal) { + new.trms <- !(phy$edge[, 2] %in% phy$edge[, 1]) & phy$edge[, 2] > Ntip + node2tip <- phy$edge[new.trms, 2] + if (subtree) + new.lab <- paste("[", N[node2tip], "_tips]", sep = "") + else { + new.lab <- + if (is.null(phy$node.label)) rep("NA", length(node2tip)) + else phy$node.label[node2tip - Ntip] + } + ## change the #'s in the edge matrix + new.tip <- Ntip + 1:length(node2tip) + phy$edge[new.trms, 2] <- new.tip + phy$tip.label[new.tip] <- new.lab + Ntip <- length(phy$tip.label) + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[-(node2tip - Ntip)] } - phy + phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L # 3) update phy$Nnode + + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(Ntip + phy$Nnode) + newNb[NEWROOT] <- Ntip + 1L + sndcol <- phy$edge[, 2] > Ntip + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (Ntip + 2):(Ntip + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + + collapse.singles(phy) }