From 15e231b55ef0be61c20bfc82efd2316e085122a9 Mon Sep 17 00:00:00 2001 From: paradis Date: Sat, 23 Jan 2010 15:17:12 +0000 Subject: [PATCH] new dist.topo (to be finished) and modified multi2di git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@106 6e262413-ae40-0410-9e79-b911bd7a66b7 --- ChangeLog | 5 +++++ R/dist.topo.R | 32 +++++++++++++++++--------------- R/multi2di.R | 31 +++++++++++++++++++++++++------ Thanks | 8 ++++---- man/dist.topo.Rd | 19 ++++++++++++++----- 5 files changed, 65 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3d55ac2..600e598 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,12 +16,17 @@ NEW FEATURES o add.scale.bar() has a new option 'ask' to draw interactively. + o The branch length score replaces the geodesic distance in dist.topo. + BUG FIXES o add.scale.bar() drew the bar outside the plotting region with the default options with unrooted or radial trees. + o dist.topo() made R stuck when the trees had different sizes (thanks + to Otto Cordero for the fix). + CHANGES IN APE VERSION 2.4-1 diff --git a/R/dist.topo.R b/R/dist.topo.R index f82dd65..f2a10ae 100644 --- a/R/dist.topo.R +++ b/R/dist.topo.R @@ -1,24 +1,25 @@ -## dist.topo.R (2009-07-06) +## dist.topo.R (2010-01-22) ## Topological Distances, Tree Bipartitions, ## Consensus Trees, and Bootstrapping Phylogenies -## Copyright 2005-2009 Emmanuel Paradis +## Copyright 2005-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. dist.topo <- function(x, y, method = "PH85") { - if (method == "BHV01" && (is.null(x$edge.length) || is.null(y$edge.length))) + if (method == "score" && (is.null(x$edge.length) || is.null(y$edge.length))) stop("trees must have branch lengths for Billera et al.'s distance.") - n <- length(x$tip.label) - bp1 <- .Call("bipartition", x$edge, n, x$Nnode, PACKAGE = "ape") + nx <- length(x$tip.label) + bp1 <- .Call("bipartition", x$edge, nx, x$Nnode, PACKAGE = "ape") bp1 <- lapply(bp1, function(xx) sort(x$tip.label[xx])) + ny <- length(y$tip.label) # fix by Otto Cordero ## fix by Tim Wallstrom: - bp2.tmp <- .Call("bipartition", y$edge, n, y$Nnode, PACKAGE = "ape") + bp2.tmp <- .Call("bipartition", y$edge, ny, y$Nnode, PACKAGE = "ape") bp2 <- lapply(bp2.tmp, function(xx) sort(y$tip.label[xx])) - bp2.comp <- lapply(bp2.tmp, function(xx) setdiff(1:n, xx)) + bp2.comp <- lapply(bp2.tmp, function(xx) setdiff(1:ny, xx)) bp2.comp <- lapply(bp2.comp, function(xx) sort(y$tip.label[xx])) ## End q1 <- length(bp1) @@ -27,8 +28,7 @@ dist.topo <- function(x, y, method = "PH85") p <- 0 for (i in 1:q1) { for (j in 1:q2) { - if (identical(bp1[[i]], bp2[[j]]) | - identical(bp1[[i]], bp2.comp[[j]])) { + if (identical(bp1[[i]], bp2[[j]]) | identical(bp1[[i]], bp2.comp[[j]])) { p <- p + 1 break } @@ -37,25 +37,27 @@ dist.topo <- function(x, y, method = "PH85") dT <- q1 + q2 - 2 * p # same than: ##dT <- if (q1 == q2) 2*(q1 - p) else 2*(min(q1, q2) - p) + abs(q1 - q2) } - if (method == "BHV01") { + if (method == "score") { dT <- 0 found1 <- FALSE found2 <- logical(q2) found2[1] <- TRUE for (i in 2:q1) { for (j in 2:q2) { - if (identical(bp1[[i]], bp2[[j]])) { - dT <- dT + abs(x$edge.length[which(x$edge[, 2] == n + i)] - - y$edge.length[which(y$edge[, 2] == n + j)]) + if (identical(bp1[[i]], bp2[[j]]) | identical(bp1[[i]], bp2.comp[[j]])) { + if (i == 19) browser() + dT <- dT + (x$edge.length[which(x$edge[, 2] == nx + i)] - + y$edge.length[which(y$edge[, 2] == ny + j)])^2 found1 <- found2[j] <- TRUE break } } if (found1) found1 <- FALSE - else dT <- dT + x$edge.length[which(x$edge[, 2] == n + i)] + else dT <- dT + (x$edge.length[which(x$edge[, 2] == nx + i)])^2 } if (!all(found2)) - dT <- dT + sum(y$edge.length[y$edge[, 2] %in% (n + which(!found2))]) + dT <- dT + sum((y$edge.length[y$edge[, 2] %in% (ny + which(!found2))])^2) + dT <- sqrt(dT) } dT } diff --git a/R/multi2di.R b/R/multi2di.R index 5099fd1..2f9b62b 100644 --- a/R/multi2di.R +++ b/R/multi2di.R @@ -1,8 +1,8 @@ -## multi2di.R (2008-04-09) +## multi2di.R (2010-01-23) ## Collapse and Resolve Multichotomies -## Copyright 2005-2008 Emmanuel Paradis +## Copyright 2005-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -13,7 +13,8 @@ multi2di <- function(phy, random = TRUE) target <- which(degree > 2) if (!length(target)) return(phy) nb.edge <- dim(phy$edge)[1] - nextnode <- length(phy$tip.label) + phy$Nnode + 1 + n <- length(phy$tip.label) + nextnode <- n + phy$Nnode + 1 new.edge <- edge2delete <- NULL wbl <- FALSE if (!is.null(phy$edge.length)) { @@ -61,13 +62,31 @@ multi2di <- function(phy, random = TRUE) } phy$edge <- rbind(phy$edge[-edge2delete, ], new.edge) if (wbl) - phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length) + phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length) if (!is.null(attr(phy, "order"))) attr(phy, "order") <- NULL if (!is.null(phy$node.label)) phy$node.label <- c(phy$node.label, rep("", phy$Nnode - length(phy$node.label))) - reorder(phy) - ##read.tree(text = write.tree(phy)) + phy <- reorder(phy) + + ## the node numbers are not in increasing order in edge[, 2]: this + ## will confuse drop.tip and other functions (root), so renumber them + newNb <- integer(phy$Nnode) + newNb[1] <- n + 1L + sndcol <- phy$edge[, 2] > n + + ## reorder node labels before changing edge: + if (!is.null(phy$node.label)) { + o <- 1 + rank(phy$edge[sndcol, 2]) + ## the root's label is not changed: + phy$node.label <- phy$node.label[c(1, o)] + } + + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2] - n] <- + n + 2:phy$Nnode + phy$edge[, 1] <- newNb[phy$edge[, 1] - n] + phy } di2multi <- function(phy, tol = 1e-8) diff --git a/Thanks b/Thanks index 5d1b3f9..3a8555a 100644 --- a/Thanks +++ b/Thanks @@ -5,10 +5,10 @@ Many users gave important feed-back with their encouragements, comments, or bug reports: thanks to all of you! Significant bug fixes were provided by Cécile Ané, James Bullard, -Éric Durand, Olivier François, Rich FitzJohn, Bret Larget, Nick Matzke, -Michael Phelan, Elizabeth Purdom, Dan Rabosky, Klaus Schliep, Tim -Wallstrom, Li-San Wang, Yan Wong, Peter Wragg, and Janet Young. Contact -me if I forgot someone. +Otto Cordero, Éric Durand, Olivier François, Rich FitzJohn, Bret +Larget, Nick Matzke, Michael Phelan, Elizabeth Purdom, Dan Rabosky, +Klaus Schliep, Tim Wallstrom, Li-San Wang, Yan Wong, Peter Wragg, +and Janet Young. Contact me if I forgot someone. Kurt Hornik, of the R Core Team, helped in several occasions to fix some problems and bugs. diff --git a/man/dist.topo.Rd b/man/dist.topo.Rd index d10817c..3fbedcd 100644 --- a/man/dist.topo.Rd +++ b/man/dist.topo.Rd @@ -8,7 +8,7 @@ dist.topo(x, y, method = "PH85") \item{x}{an object of class \code{"phylo"}.} \item{y}{an object of class \code{"phylo"}.} \item{method}{a character string giving the method to be used: either - \code{"PH85"}, or \code{"BHV01"}.} + \code{"PH85"}, or \code{"score"}.} } \description{ This function computes the topological distance between two @@ -19,22 +19,31 @@ dist.topo(x, y, method = "PH85") } \details{ Two methods are available: the one by Penny and Hendy (1985), and the - one by Billera et al. (2001). + branch length score by Kuhner and Felsenstein (1994). The topological distance is defined as twice the number of internal branches defining different bipartitions of the tips (Penny and Hendy 1985). Rzhetsky and Nei (1992) proposed a modification of the original formula to take multifurcations into account. - Billera et al. (2001) developed a distance from the geometry of a tree - space. The distance between two trees can be seen as the sum of the - branch lengths that need be erased to have two similar trees. + The branch length score may be seen as similar to the previous + distance but taking branch lengths into account. Kuhner and + Felsenstein (1994) proposed to calculate the square root of the sum of + the squared differences of the (internal) branch lengths defining + similar bipartitions (or splits) in both trees. +} +\note{ + The geodesic distance of Billera et al. (2001) has been disabled. } \references{ Billera, L. J., Holmes, S. P. and Vogtmann, K. (2001) Geometry of the space of phylogenetic trees. \emph{Advances in Applied Mathematics}, \bold{27}, 733--767. + Kuhner, M. K. and Felsenstein, J. (1994) Simulation comparison of + phylogeny algorithms under equal and unequal evolutionary rates. + \emph{Molecular Biology and Evolution}, \bold{11}, 459--468. + Nei, M. and Kumar, S. (2000) \emph{Molecular evolution and phylogenetics}. Oxford: Oxford University Press. -- 2.39.2