## multi2di.R (2010-01-23) ## Collapse and Resolve Multichotomies ## Copyright 2005-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. multi2di <- function(phy, random = TRUE) { degree <- tabulate(phy$edge[, 1]) target <- which(degree > 2) if (!length(target)) return(phy) nb.edge <- dim(phy$edge)[1] n <- length(phy$tip.label) nextnode <- n + phy$Nnode + 1 new.edge <- edge2delete <- NULL wbl <- FALSE if (!is.null(phy$edge.length)) { wbl <- TRUE new.edge.length <- NULL } for (node in target) { ind <- which(phy$edge[, 1] == node) N <- length(ind) desc <- phy$edge[ind, 2] if (random) { ## if we shuffle the descendants, we need to eventually ## reorder the corresponding branch lenghts (see below) ## so we store the result of sample() tmp <- sample(length(desc)) desc <- desc[tmp] res <- rtree(N)$edge } else { res <- matrix(0, 2*N - 2, 2) res[, 1] <- N + rep(1:(N - 1), each = 2) res[, 2] <- N + rep(2:N, each = 2) res[seq(1, by = 2, length.out = N - 1), 2] <- 1:(N - 1) res[length(res)] <- N } if (wbl) { ## keep the branch lengths coming from `node' el <- numeric(dim(res)[1]) # initialized with 0's el[res[, 2] <= N] <- if (random) phy$edge.length[ind][tmp] else phy$edge.length[ind] } ## now substitute the nodes in `res' ## `node' stays at the "root" of these new ## edges whereas their "tips" are `desc' Nodes <- c(node, seq(from = nextnode, length.out = N - 2)) res[, 1] <- Nodes[res[, 1] - N] tmp <- res[, 2] > N res[tmp, 2] <- Nodes[res[tmp, 2] - N] res[!tmp, 2] <- desc[res[!tmp, 2]] new.edge <- rbind(new.edge, res) edge2delete <- c(edge2delete, ind) if (wbl) new.edge.length <- c(new.edge.length, el) nextnode <- nextnode + N - 2 phy$Nnode <- phy$Nnode + N - 2 } phy$edge <- rbind(phy$edge[-edge2delete, ], new.edge) if (wbl) phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length) if (!is.null(attr(phy, "order"))) attr(phy, "order") <- NULL if (!is.null(phy$node.label)) phy$node.label <- c(phy$node.label, rep("", phy$Nnode - length(phy$node.label))) phy <- reorder(phy) ## the node numbers are not in increasing order in edge[, 2]: this ## will confuse drop.tip and other functions (root), so renumber them newNb <- integer(phy$Nnode) newNb[1] <- n + 1L sndcol <- phy$edge[, 2] > n ## reorder node labels before changing edge: if (!is.null(phy$node.label)) { o <- 1 + rank(phy$edge[sndcol, 2]) ## the root's label is not changed: phy$node.label <- phy$node.label[c(1, o)] } ## executed from right to left, so newNb is modified before phy$edge: phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2] - n] <- n + 2:phy$Nnode phy$edge[, 1] <- newNb[phy$edge[, 1] - n] phy } di2multi <- function(phy, tol = 1e-8) { if (is.null(phy$edge.length)) stop("the tree has no branch length") ## We select only the internal branches which are ## significantly small: ind <- which(phy$edge.length < tol & phy$edge[, 2] > length(phy$tip.label)) n <- length(ind) if (!n) return(phy) ## recursive function to `propagate' node #'s in case ## there is a series of consecutive edges to remove foo <- function(ancestor, des2del) { wh <- which(phy$edge[, 1] == des2del) for (k in wh) { if (phy$edge[k, 2] %in% node2del) foo(ancestor, phy$edge[k, 2]) else phy$edge[k, 1] <<- ancestor } } node2del <- phy$edge[ind, 2] anc <- phy$edge[ind, 1] for (i in 1:n) { if (anc[i] %in% node2del) next foo(anc[i], node2del[i]) } phy$edge <- phy$edge[-ind, ] phy$edge.length <- phy$edge.length[-ind] phy$Nnode <- phy$Nnode - n ## Now we renumber the nodes that need to be: sel <- phy$edge > min(node2del) for (i in which(sel)) phy$edge[i] <- phy$edge[i] - sum(node2del < phy$edge[i]) if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[-(node2del - length(phy$tip.label))] phy }