## drop.tip.R (2007-12-21)

##   Remove Tips in a Phylogenetic Tree

## Copyright 2003-2007 Emmanuel Paradis

## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.

drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE,
                     root.edge = 0)
{
    if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"")
    phy <- new2old.phylo(phy)
    if (subtree) {
        trim.internal <- TRUE
        edge.bak <- phy$edge
    }
    tmp <- as.numeric(phy$edge)
    nb.tip <- max(tmp)
    nb.node <- -min(tmp)
    nobr <- is.null(phy$edge.length)
    if (is.numeric(tip)) tip <- phy$tip.label[tip]
    ## find the tips to drop...:
    del <- phy$tip.label %in% tip
    ## ... and the corresponding terminal branches:
    ind <- which(phy$edge[, 2] %in% as.character(which(del)))
    ## drop them...:
    phy$edge <- phy$edge[-ind, ]
    ## ... and the lengths if applies:
    if (!nobr) phy$edge.length <- phy$edge.length[-ind]
    ## drop the tip labels:
    phy$tip.label <- phy$tip.label[!del]
    if (trim.internal) {
        if (root.edge) {
            ## find the MRCA of the remaining tips:
            seq.nod <- list()
            ## This is modified since some tips were deleted!!
            for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) {
                vec <- i
                j <- i
                while (j != "-1") {
                    ind <- which(phy$edge[, 2] == j)
                    j <- phy$edge[ind, 1]
                    vec <- c(vec, j)
                }
                seq.nod[[i]] <- vec
            }
            sn <- lapply(seq.nod, rev)
            i <- 1
            x <- unlist(lapply(sn, function(x) x[i]))
            while (length(unique(x)) == 1) {
                x <- unlist(lapply(sn, function(x) x[i]))
                i <-  i + 1
            }
            MRCA <- sn[[1]][i - 2]
            newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge
            for (i in 1:root.edge) {
                ind <- which(phy$edge[, 2] == MRCA)
                newrootedge <- newrootedge + phy$edge.length[ind]
                MRCA <- phy$edge[ind, 1]
                if (MRCA == "-1" && i < root.edge) {
                    newrootedge <- newrootedge
                    break
                }
            }
            phy$root.edge <- newrootedge
        } else {
            if (!is.null(phy$root.edge)) phy$root.edge <- NULL
        }
        while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) {
            temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
            k <- temp %in% phy$edge[, 1]
            ind <- phy$edge[, 2] %in% temp[!k]
            phy$edge <- phy$edge[!ind, ]
            if (!nobr) phy$edge.length <- phy$edge.length[!ind]
        }
    } else {
        temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
        k <- temp %in% phy$edge[, 1]
        ind <- phy$edge[, 2] %in% temp[!k]
        phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind)))
        if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) else {
            new.tip.label <- phy$node.label[!k]
            phy$node.label <- phy$node.label[k]
        }
        phy$tip.label <- c(phy$tip.label, new.tip.label)
    }
    useless.nodes <- names(which(table(phy$edge[, 1]) == 1))
    if (subtree) {
        if (!nobr) mnbr <- mean(phy$edge.length)
        if (length(useless.nodes) == 1) n <- length(tip) else {
            seq.nod <- list()
            wh <- numeric(0)
            for (i in as.character(which(del))) { # it is not needed to loop through all tips!
                vec <- i
                j <- i
                while (!(j %in% useless.nodes)) {
                    ind <- which(edge.bak[, 2] == j)
                    wh <- c(wh, ind)
                    j <- edge.bak[ind, 1]
                    vec <- c(vec, j)
                }
                seq.nod[[i]] <- vec
            }
            n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1])))
        }
        new.lab <- paste("[", n, "_tips]", sep = "")
        for (i in 1:length(useless.nodes)) {
            wh <- which(phy$edge[, 1] == useless.nodes[i])
            phy$tip.label <- c(phy$tip.label, new.lab[i])
            if (wh == dim(phy$edge)[1]) {
                phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i)))
                if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr)
            } else {
                phy$edge <- rbind(phy$edge[1:wh, ],
                                  c(useless.nodes[i], as.character(nb.tip + i)),
                                  phy$edge[(wh + 1):dim(phy$edge)[1], ])
                if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr,
                                                phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)])
            }
        }
    } else {
        for (i in useless.nodes) {
            ind1 <- which(phy$edge[, 1] == i)
            ind2 <- which(phy$edge[, 2] == i)
            phy$edge[ind2, 2] <- phy$edge[ind1, 2]
            phy$edge <- phy$edge[-ind1, ]
            if (!nobr) {
                phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1]
                phy$edge.length <- phy$edge.length[-ind1]
            }
        }
    }
    tmp <- as.numeric(phy$edge)
    if (!is.null(phy$node.label)) {
        x <- unique(tmp)
        x <- x[x < 0]
        phy$node.label <- phy$node.label[-x]
    }
    n <- length(tmp)
    nodes <- tmp < 0
    ind.nodes <- (1:n)[nodes]
    ind.tips <- (1:n)[!nodes]
    new.nodes <- -as.numeric(factor(-tmp[nodes]))
    new.tips <- as.numeric(factor(tmp[!nodes]))
    tmp[ind.nodes] <- new.nodes
    tmp[ind.tips] <- new.tips
    dim(tmp) <- c(n / 2, 2)
    mode(tmp) <- "character"
    phy$edge <- tmp
    phy <- old2new.phylo(phy)
    if (!trim.internal || subtree) {
        S <- write.tree(phy)
        phy <- if (nobr) clado.build(S) else tree.build(S)
    }
    phy
}