new dist.topo (to be finished) and modified multi2di

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)
diff --git a/ChangeLog b/ChangeLog

index 3d55ac2e92a2c68f6d1a42692b7467d39eb3d740..600e598bc667f1603115ef305a440cdc8900ca2f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -16,12 +16,17 @@ NEW FEATURES
  
      o add.scale.bar() has a new option 'ask' to draw interactively.
  
+    o The branch length score replaces the geodesic distance in dist.topo.
+
  
  BUG FIXES
  
      o add.scale.bar() drew the bar outside the plotting region with the
        default options with unrooted or radial trees.
  
+    o dist.topo() made R stuck when the trees had different sizes (thanks
+      to Otto Cordero for the fix).
+
  
  
                 CHANGES IN APE VERSION 2.4-1
diff --git a/R/dist.topo.R b/R/dist.topo.R

index f82dd65193695e63301d706ac71bd63395212a5c..f2a10ae26352e16325763cea2cfda19fa5bcad46 100644 (file)
--- a/R/dist.topo.R
+++ b/R/dist.topo.R
@@ -1,24 +1,25 @@
-## dist.topo.R (2009-07-06)
+## dist.topo.R (2010-01-22)
  
  ##      Topological Distances, Tree Bipartitions,
  ##   Consensus Trees, and Bootstrapping Phylogenies
  
-## Copyright 2005-2009 Emmanuel Paradis
+## Copyright 2005-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
  dist.topo <- function(x, y, method = "PH85")
  {
-    if (method == "BHV01" && (is.null(x$edge.length) || is.null(y$edge.length)))
+    if (method == "score" && (is.null(x$edge.length) || is.null(y$edge.length)))
          stop("trees must have branch lengths for Billera et al.'s distance.")
-    n <- length(x$tip.label)
-    bp1 <- .Call("bipartition", x$edge, n, x$Nnode, PACKAGE = "ape")
+    nx <- length(x$tip.label)
+    bp1 <- .Call("bipartition", x$edge, nx, x$Nnode, PACKAGE = "ape")
      bp1 <- lapply(bp1, function(xx) sort(x$tip.label[xx]))
+    ny <- length(y$tip.label) # fix by Otto Cordero
      ## fix by Tim Wallstrom:
-    bp2.tmp <- .Call("bipartition", y$edge, n, y$Nnode, PACKAGE = "ape")
+    bp2.tmp <- .Call("bipartition", y$edge, ny, y$Nnode, PACKAGE = "ape")
      bp2 <- lapply(bp2.tmp, function(xx) sort(y$tip.label[xx]))
-    bp2.comp <- lapply(bp2.tmp, function(xx) setdiff(1:n, xx))
+    bp2.comp <- lapply(bp2.tmp, function(xx) setdiff(1:ny, xx))
      bp2.comp <- lapply(bp2.comp, function(xx) sort(y$tip.label[xx]))
      ## End
      q1 <- length(bp1)
@@ -27,8 +28,7 @@ dist.topo <- function(x, y, method = "PH85")
          p <- 0
          for (i in 1:q1) {
              for (j in 1:q2) {
-                if (identical(bp1[[i]], bp2[[j]]) |
-                    identical(bp1[[i]], bp2.comp[[j]])) {
+                if (identical(bp1[[i]], bp2[[j]]) | identical(bp1[[i]], bp2.comp[[j]])) {
                      p <- p + 1
                      break
                  }
@@ -37,25 +37,27 @@ dist.topo <- function(x, y, method = "PH85")
          dT <- q1 + q2 - 2 * p # same than:
          ##dT <- if (q1 == q2) 2*(q1 - p) else 2*(min(q1, q2) - p) + abs(q1 - q2)
      }
-    if (method == "BHV01") {
+    if (method == "score") {
          dT <- 0
          found1 <- FALSE
          found2 <- logical(q2)
          found2[1] <- TRUE
          for (i in 2:q1) {
              for (j in 2:q2) {
-                if (identical(bp1[[i]], bp2[[j]])) {
-                    dT <- dT + abs(x$edge.length[which(x$edge[, 2] == n + i)] -
-                                   y$edge.length[which(y$edge[, 2] == n + j)])
+                if (identical(bp1[[i]], bp2[[j]]) | identical(bp1[[i]], bp2.comp[[j]])) {
+                    if (i == 19) browser()
+                    dT <- dT + (x$edge.length[which(x$edge[, 2] == nx + i)] -
+                                y$edge.length[which(y$edge[, 2] == ny + j)])^2
                      found1 <- found2[j] <- TRUE
                      break
                  }
              }
              if (found1) found1 <- FALSE
-            else dT <- dT + x$edge.length[which(x$edge[, 2] == n + i)]
+            else dT <- dT + (x$edge.length[which(x$edge[, 2] == nx + i)])^2
          }
          if (!all(found2))
-          dT <- dT + sum(y$edge.length[y$edge[, 2] %in% (n + which(!found2))])
+            dT <- dT + sum((y$edge.length[y$edge[, 2] %in% (ny + which(!found2))])^2)
+        dT <- sqrt(dT)
      }
      dT
  }
diff --git a/R/multi2di.R b/R/multi2di.R

index 5099fd12a60c6b796268aaefb911ea6421bbcf67..2f9b62b59618e12a9eeab6aaef65854c2c6382ec 100644 (file)
--- a/R/multi2di.R
+++ b/R/multi2di.R
@@ -1,8 +1,8 @@
-## multi2di.R (2008-04-09)
+## multi2di.R (2010-01-23)
  
  ##   Collapse and Resolve Multichotomies
  
-## Copyright 2005-2008 Emmanuel Paradis
+## Copyright 2005-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
@@ -13,7 +13,8 @@ multi2di <- function(phy, random = TRUE)
      target <- which(degree > 2)
      if (!length(target)) return(phy)
      nb.edge <- dim(phy$edge)[1]
-    nextnode <- length(phy$tip.label) + phy$Nnode + 1
+    n <- length(phy$tip.label)
+    nextnode <- n + phy$Nnode + 1
      new.edge <- edge2delete <- NULL
      wbl <- FALSE
      if (!is.null(phy$edge.length)) {
@@ -61,13 +62,31 @@ multi2di <- function(phy, random = TRUE)
      }
      phy$edge <- rbind(phy$edge[-edge2delete, ], new.edge)
      if (wbl)
-      phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length)
+        phy$edge.length <- c(phy$edge.length[-edge2delete], new.edge.length)
      if (!is.null(attr(phy, "order"))) attr(phy, "order") <- NULL
      if (!is.null(phy$node.label))
          phy$node.label <-
              c(phy$node.label, rep("", phy$Nnode - length(phy$node.label)))
-    reorder(phy)
-    ##read.tree(text = write.tree(phy))
+    phy <- reorder(phy)
+
+    ## the node numbers are not in increasing order in edge[, 2]: this
+    ## will confuse drop.tip and other functions (root), so renumber them
+    newNb <- integer(phy$Nnode)
+    newNb[1] <- n + 1L
+    sndcol <- phy$edge[, 2] > n
+
+    ## reorder node labels before changing edge:
+    if (!is.null(phy$node.label)) {
+        o <- 1 + rank(phy$edge[sndcol, 2])
+        ## the root's label is not changed:
+        phy$node.label <- phy$node.label[c(1, o)]
+    }
+
+    ## executed from right to left, so newNb is modified before phy$edge:
+    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2] - n] <-
+        n + 2:phy$Nnode
+    phy$edge[, 1] <- newNb[phy$edge[, 1] - n]
+    phy
  }
  
  di2multi <- function(phy, tol = 1e-8)
diff --git a/Thanks b/Thanks

index 5d1b3f972020e72de44f9cd3fdfae07b0337d679..3a8555ac37aec2a77f77560c8bab3b6efc97f3ca 100644 (file)
--- a/Thanks
+++ b/Thanks
@@ -5,10 +5,10 @@ Many users gave important feed-back with their encouragements,
  comments, or bug reports: thanks to all of you!
  
  Significant bug fixes were provided by Cécile Ané, James Bullard,
-Éric Durand, Olivier François, Rich FitzJohn, Bret Larget, Nick Matzke,
-Michael Phelan, Elizabeth Purdom, Dan Rabosky, Klaus Schliep, Tim
-Wallstrom, Li-San Wang, Yan Wong, Peter Wragg, and Janet Young. Contact
-me if I forgot someone.
+Otto Cordero, Éric Durand, Olivier François, Rich FitzJohn, Bret
+Larget, Nick Matzke, Michael Phelan, Elizabeth Purdom, Dan Rabosky,
+Klaus Schliep, Tim Wallstrom, Li-San Wang, Yan Wong, Peter Wragg,
+and Janet Young. Contact me if I forgot someone.
  
  Kurt Hornik, of the R Core Team, helped in several occasions to
  fix some problems and bugs.
diff --git a/man/dist.topo.Rd b/man/dist.topo.Rd

index d10817c57fe82576a7dc867e618018b67821fb13..3fbedcd0761dc0649bf62775394ecb6a373d9f21 100644 (file)
--- a/man/dist.topo.Rd
+++ b/man/dist.topo.Rd
@@ -8,7 +8,7 @@ dist.topo(x, y, method = "PH85")
    \item{x}{an object of class \code{"phylo"}.}
    \item{y}{an object of class \code{"phylo"}.}
    \item{method}{a character string giving the method to be used: either
-    \code{"PH85"}, or \code{"BHV01"}.}
+    \code{"PH85"}, or \code{"score"}.}
  }
  \description{
    This function computes the topological distance between two
@@ -19,22 +19,31 @@ dist.topo(x, y, method = "PH85")
  }
  \details{
    Two methods are available: the one by Penny and Hendy (1985), and the
-  one by Billera et al. (2001).
+  branch length score by Kuhner and Felsenstein (1994).
  
    The topological distance is defined as twice the number of internal
    branches defining different bipartitions of the tips (Penny and Hendy
    1985). Rzhetsky and Nei (1992) proposed a modification of the original
    formula to take multifurcations into account.
  
-  Billera et al. (2001) developed a distance from the geometry of a tree
-  space. The distance between two trees can be seen as the sum of the
-  branch lengths that need be erased to have two similar trees.
+  The branch length score may be seen as similar to the previous
+  distance but taking branch lengths into account. Kuhner and
+  Felsenstein (1994) proposed to calculate the square root of the sum of
+  the squared differences of the (internal) branch lengths defining
+  similar bipartitions (or splits) in both trees.
+}
+\note{
+  The geodesic distance of Billera et al. (2001) has been disabled.
  }
  \references{
    Billera, L. J., Holmes, S. P. and Vogtmann, K. (2001) Geometry of the
    space of phylogenetic trees. \emph{Advances in Applied Mathematics},
    \bold{27}, 733--767.
  
+  Kuhner, M. K. and Felsenstein, J. (1994) Simulation comparison of
+  phylogeny algorithms under equal and unequal evolutionary rates.
+  \emph{Molecular Biology and Evolution}, \bold{11}, 459--468.
+
    Nei, M. and Kumar, S. (2000) \emph{Molecular evolution and
    phylogenetics}. Oxford: Oxford University Press.
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Sat, 23 Jan 2010 15:17:12 +0000 (15:17 +0000)
ChangeLog		patch \| blob \| history
R/dist.topo.R		patch \| blob \| history
R/multi2di.R		patch \| blob \| history
Thanks		patch \| blob \| history
man/dist.topo.Rd		patch \| blob \| history