From 984f527e672e911c74f5c2f09ad98a934312fe2f Mon Sep 17 00:00:00 2001 From: paradis Date: Mon, 2 Apr 2012 02:35:39 +0000 Subject: [PATCH] bunch of fixes for ape 3.0-2 git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@184 6e262413-ae40-0410-9e79-b911bd7a66b7 --- DESCRIPTION | 18 +-- NEWS | 9 +- R/SDM.R | 216 ++++++++++++++++++--------------- man/DNAbin.Rd | 2 +- man/SDM.Rd | 11 +- man/ape-package.Rd | 8 +- man/bionj.Rd | 5 +- man/clustal.Rd | 2 +- man/diversity.contrast.test.Rd | 11 +- man/image.DNAbin.Rd | 4 +- man/mcconwaysims.test.Rd | 4 + man/mvr.Rd | 11 ++ man/nj.Rd | 2 +- man/njs.Rd | 12 ++ man/richness.yule.test.Rd | 7 +- man/slowinskiguyer.test.Rd | 10 +- man/trex.Rd | 2 +- man/triangMtd.Rd | 6 +- man/yule.time.Rd | 2 +- src/bionjs.c | 19 ++- src/mvr.c | 13 +- src/mvrs.c | 7 +- src/njs.c | 52 +++++--- src/triangMtd.c | 6 +- src/triangMtds.c | 6 +- 25 files changed, 265 insertions(+), 180 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a048969..db205a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 3.0-2 -Date: 2012-03-30 +Date: 2012-04-02 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis @@ -8,20 +8,6 @@ Depends: R (>= 2.6.0) Suggests: gee Imports: gee, nlme, lattice ZipData: no -Description: ape provides functions for reading, writing, plotting, - and manipulating phylogenetic trees, analyses of comparative data in - a phylogenetic framework, ancestral character analyses, analyses of - diversification and macroevolution, computing distances from allelic - and nucleotide data, reading and writing nucleotide sequences, and - several tools such as Mantel's test, minimum spanning tree, - generalized skyline plots, graphical exploration of phylogenetic - data (alex, trex, kronoviz), estimation of absolute evolutionary - rates and clock-like trees using mean path lengths and penalized - likelihood. Phylogeny estimation can be done with the NJ, BIONJ, ME, - MVR, SDM, and triangle methods, and several methods handling - incomplete distance matrices (NJ*, BIONJ*, MVR*, and the - corresponding triangle method). Some functions call external - applications (PhyML, Clustal, T-Coffee, Muscle) whose results are - returned into R. +Description: ape provides functions for reading, writing, plotting, and manipulating phylogenetic trees, analyses of comparative data in a phylogenetic framework, ancestral character analyses, analyses of diversification and macroevolution, computing distances from allelic and nucleotide data, reading and writing nucleotide sequences, and several tools such as Mantel's test, minimum spanning tree, generalized skyline plots, graphical exploration of phylogenetic data (alex, trex, kronoviz), estimation of absolute evolutionary rates and clock-like trees using mean path lengths and penalized likelihood. Phylogeny estimation can be done with the NJ, BIONJ, ME, MVR, SDM, and triangle methods, and several methods handling incomplete distance matrices (NJ*, BIONJ*, MVR*, and the corresponding triangle method). Some functions call external applications (PhyML, Clustal, T-Coffee, Muscle) whose results are returned into R. License: GPL (>= 2) URL: http://ape.mpl.ird.fr/ diff --git a/NEWS b/NEWS index acbbbf9..409322f 100644 --- a/NEWS +++ b/NEWS @@ -18,7 +18,7 @@ BUG FIXES o mltt.plot(, backward = FALSE) did not set the x-axis correctly. o A bug was introduced in prop.clades() with ape 3.0. The help page - has been clarified relative to the use of the 'rooted' option. + has been clarified relative to the use of the option 'rooted'. o mantel.test() printed a useless warning message. @@ -32,6 +32,9 @@ BUG FIXES o njs(), bionjs(), and mvrs() now return an error if 'fs < 1'. + o SDM() did not work correctly. The code has also been generally + improved. + OTHER CHANGES @@ -39,6 +42,10 @@ OTHER CHANGES o The option 'original.data' of write.nexus() has been removed. + o The files bionjs.c, mvr.c, mvrs.c, njs.c, triangMtd.c, and + triangMtds.c have been improved which should fix some bugs in + the corresponding functions. + CHANGES IN APE VERSION 3.0-1 diff --git a/R/SDM.R b/R/SDM.R index faa263e..d2a6686 100644 --- a/R/SDM.R +++ b/R/SDM.R @@ -1,8 +1,8 @@ -## SDM.R (2011-10-11) +## SDM.R (2012-04-02) ## Construction of Consensus Distance Matrix With SDM -## Copyright 2011 Andrei-Alin Popescu +## Copyright 2011-2012 Andrei-Alin Popescu ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -11,23 +11,30 @@ SDM <- function(...) { st <- list(...) # first half contains matrices, second half s_p k <- length(st)/2 - labels <- attr(as.dist(st[[1]]), "Labels") - tot <- length(rownames(st[[1]])) - for (i in 2:k) { - labels <- union(labels, attr(as.dist(st[[i]]), "Labels")) - tot <- tot + length(rownames(st[[i]])) - } - sp <- mat.or.vec(1,k) - for (i in c(k + 1:k)) - sp[i - k] <- st[[i]] + ONEtoK <- seq_len(k) + + ## make sure we have only matrices: + for (i in ONEtoK) st[[i]] <- as.matrix(st[[i]]) + + ## store the rownames of each matrix in a list because they are often called: + ROWNAMES <- lapply(st[ONEtoK], rownames) + + ## the number of rows of each matrix: + NROWS <- sapply(ROWNAMES, length) + tot <- sum(NROWS) + + labels <- unique(unlist(ROWNAMES)) + sp <- unlist(st[k + ONEtoK]) - astart <- mat.or.vec(1, tot) # start of aip, astart[p] is start of aip + astart <- numeric(tot) # start of aip, astart[p] is start of aip astart[1] <- k for (i in 2:k) - astart[i] <- astart[i - 1] + length(rownames(st[[i - 1]])) + astart[i] <- astart[i - 1] + NROWS[i - 1] - a <- mat.or.vec(1, k + tot + k + length(labels)) - ## first k are alphas, subseqeunt ones aip + ## apparently erased by the operation below so no need to initialize: + ## a <- mat.or.vec(1, k + tot + k + length(labels)) + + ## first k are alphas, subsequent ones aip ## each matrix p starting at astart[p], next are ## Lagrange multipliers, miu, niu, lambda in that order n <- length(labels) @@ -35,60 +42,66 @@ SDM <- function(...) niustart <- miustart + n lambstart <- niustart + k - 1 - X <- mat.or.vec(n, n) - V <- mat.or.vec(n, n) - w <- mat.or.vec(n, n) - - col <- mat.or.vec(k + tot + k + length(labels), 1) # free terms of system + X <- matrix(0, n, n, dimnames = list(labels, labels)) + V <- w <- X - dimnames(X) <- list(labels, labels) - dimnames(V) <- list(labels, labels) - dimnames(W) <- list(labels, labels) + tmp <- 2 * k + tot + n + col <- numeric(tmp) # free terms of system for (i in 1:(n - 1)) { for (j in (i + 1):n) { - for (p in 1:k) { - d <- st[[p]] - if (is.element(rownames(X)[i], rownames(d)) & is.element(colnames(X)[j], colnames(d))) { + for (p in ONEtoK) { + ## d <- st[[p]] # not needed anymore + if (is.element(labels[i], ROWNAMES[[p]]) && is.element(labels[j], ROWNAMES[[p]])) { w[i, j] <- w[j, i] <- w[i, j] + sp[p] } } } } - Q <- mat.or.vec(length(labels) + k + k + tot, length(labels) + k + k + tot) + ONEtoN <- seq_len(n) + + Q <- matrix(0, tmp, tmp) ## first decompose first sum in paper - for (p in 1:k) { + for (p in ONEtoK) { d_p <- st[[p]] - for (l in 1:k) { # first compute coeficients of alphas + for (l in ONEtoK) { # first compute coefficients of alphas + d <- st[[l]] sum <- 0 dijp <- -1 if (l == p) { # calculate alpha_p - for (i in 1:n) { - for (j in 1:n) { # check if {i,j}\subset L_l - d <- st[[l]] - if (i != j & is.element(labels[i],rownames(d)) & is.element(labels[j],colnames(d))) { - dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]] - sum <- sum + ((dij*dij) - sp[l]*dij*dij/w[i,j]) - ipos <- which(rownames(d) == labels[i]) - jpos <- which(rownames(d) == labels[j]) - Q[p, astart[l] + ipos] <- Q[p, astart[l] + ipos] + (dij - (sp[l]*dij/w[i,j])) - Q[p, astart[l] + jpos] <- Q[p, astart[l] + jpos] + (dij - (sp[l]*dij/w[i,j])) + for (i in ONEtoN) { + for (j in ONEtoN) { # check if {i,j}\subset L_l + if (i == j) next # make sure i != j + ## d <- st[[l]] # <- moved-up + pos <- match(labels[c(i, j)], ROWNAMES[[l]]) # <- returns NA if not in this matrix + if (all(!is.na(pos))) { + ipos <- pos[1L] + jpos <- pos[2L] + dij <- d[ipos, jpos] + sum <- sum + dij * dij - sp[l] * dij * dij / w[i,j] + tmp2 <- dij - sp[l] * dij / w[i,j] + Q[p, astart[l] + ipos] <- Q[p, astart[l] + ipos] + tmp2 + Q[p, astart[l] + jpos] <- Q[p, astart[l] + jpos] + tmp2 } } } } else { - for (i in 1:n) { - for (j in 1:n) { # check if {i,j}\subset L_l - d <- st[[l]] - if (i != j & is.element(labels[i], rownames(d)) & is.element(labels[j], colnames(d)) & is.element(labels[i], rownames(d_p)) & is.element(labels[j], colnames(d_p))) { - dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]] - dijp <- d_p[rownames(d_p) == labels[i], colnames(d_p) == labels[j]] - sum <- sum - sp[l]*dij*dijp/w[i, j] - ipos <- which(rownames(d) == labels[i]) - jpos <- which(rownames(d) == labels[j]) - Q[p,astart[l] + ipos] <- Q[p, astart[l] + ipos] - sp[l]*dijp/w[i, j] - Q[p,astart[l] + jpos] <- Q[p, astart[l] + jpos] - sp[l]*dijp/w[i, j] + for (i in ONEtoN) { + for (j in ONEtoN) { # check if {i,j}\subset L_l + if (i == j) next + ## d <- st[[l]] # <- moved-up + pos <- match(labels[c(i, j)], ROWNAMES[[l]]) + posp <- match(labels[c(i, j)], ROWNAMES[[p]]) + if (all(!is.na(pos)) && all(!is.na(posp))) { + ipos <- pos[1L] + jpos <- pos[2L] + dij <- d[ipos, jpos] + dijp <- d_p[posp[1L], posp[2L]] + sum <- sum - sp[l] * dij * dijp / w[i, j] + tmp2 <- sp[l] * dijp / w[i, j] + Q[p,astart[l] + ipos] <- Q[p, astart[l] + ipos] - tmp2 + Q[p,astart[l] + jpos] <- Q[p, astart[l] + jpos] - tmp2 } } } @@ -97,34 +110,42 @@ SDM <- function(...) } Q[p, lambstart + 1] <- 1 } + r <- k - for (p in 1:k) { + + for (p in ONEtoK) { dp <- st[[p]] - for (i in 1:n) { - if (is.element(labels[i], rownames(dp))) { + for (i in ONEtoN) { + if (is.element(labels[i], ROWNAMES[[p]])) { r <- r + 1 - for (l in 1:k) { + for (l in ONEtoK) { d <- st[[l]] if (l == p) { - for (j in 1:n) { - if (i != j & is.element(labels[j], rownames(dp))) { - dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]] - Q[r, l] <- Q[r, l] + (dij - sp[l]*dij/w[i, j]) - ipos <- which(rownames(d) == labels[i]) - jpos <- which(rownames(d) == labels[j]) - Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] + (1 - sp[l]/w[i, j]) - Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] + (1 - sp[l]/w[i, j]) + ipos <- match(labels[i], ROWNAMES[[p]]) + for (j in ONEtoN) { + if (i == j) next + jpos <- match(labels[j], ROWNAMES[[p]]) + if (!is.na(jpos)) { + dij <- d[ipos, jpos] + Q[r, l] <- Q[r, l] + dij - sp[l] * dij / w[i, j] + tmp2 <- 1 - sp[l] / w[i, j] + Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] + tmp2 + Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] + tmp2 } } } else { - for (j in 1:n) { - if (i != j & is.element(labels[j], rownames(dp)) & is.element(labels[i], rownames(d)) & is.element(labels[j], colnames(d))) { - dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]] - Q[r,l] <- Q[r,l] - sp[l]*dij/w[i, j] - ipos <- which(rownames(d) == labels[i]) - jpos <- which(rownames(d) == labels[j]) - Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] - sp[l]/w[i, j] - Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] - sp[l]/w[i, j] + for (j in ONEtoN) { + if (i == j) next + if (!is.element(labels[j], rownames(dp))) next + pos <- match(labels[c(i, j)], ROWNAMES[[l]]) + if (all(!is.na(pos))) { + ipos <- pos[1L] + jpos <- pos[2L] + dij <- d[ipos, jpos] + Q[r, l] <- Q[r, l] - sp[l] * dij / w[i, j] + tmp2 <- sp[l]/w[i, j] + Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] - tmp2 + Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] - tmp2 } } } @@ -135,48 +156,51 @@ SDM <- function(...) } } } + r <- r + 1 col[r] <- k - for (i in 1:k) Q[r,i] <- 1 + Q[r, ONEtoK] <- 1 + ## for (i in 1:k) Q[r, i] <- 1 - for (i in 1:n) { + for (i in ONEtoN) { r <- r + 1 - for (p in 1:k) { - d <- st[[p]] - if (is.element(labels[i], rownames(d))) { - ipos <- which(rownames(d) == labels[i]) - Q[r, astart[p] + ipos] <- 1 - } + for (p in ONEtoK) { + ## d <- st[[p]] # not needed + ipos <- match(labels[i], ROWNAMES[[p]]) + if (!is.na(ipos)) Q[r, astart[p] + ipos] <- 1 } } + for (p in 1:(k - 1)) { r <- r + 1 - for (i in 1:n) { - d <- st[[p]] - if (is.element(labels[i], rownames(d))) { - ipos <- which(rownames(d) == labels[i]) - Q[r, astart[p] + ipos] <- 1 - } + for (i in ONEtoN) { + ## d <- st[[p]] + ipos <- match(labels[i], ROWNAMES[[p]]) + if (!is.na(ipos)) Q[r, astart[p] + ipos] <- 1 } } a <- solve(Q, col, 1e-19) - for (i in 1:n) { - for (j in 1:n) { + for (i in ONEtoN) { + for (j in ONEtoN) { + if (i == j) { + X[i, j] <- V[i, j] <- 0 + next + } sum <- 0 sumv <- 0 - for (p in 1:k) { + for (p in ONEtoK) { d <- st[[p]] - if (is.element(labels[i], rownames(d)) & is.element(labels[j], rownames(d))) { - ipos <- which(rownames(d) == labels[i]) - jpos <- which(rownames(d) == labels[j]) - sum <- sum + sp[p]*(a[p]*d[ipos, jpos] + a[astart[p] + ipos] + a[astart[p] + jpos]) - sumv <- sumv + sp[p]*(a[p]*d[ipos, jpos])*(a[p]*d[ipos, jpos]) + pos <- match(labels[c(i, j)], ROWNAMES[[p]]) + if (all(!is.na(pos))) { + ipos <- pos[1L] + jpos <- pos[2L] + dij <- d[ipos, jpos] + sum <- sum + sp[p] * (a[p] * dij + a[astart[p] + ipos] + a[astart[p] + jpos]) + sumv <- sumv + sp[p] * (a[p] * dij)^2 } } - X[i, j] <- sum/w[i, j] - V[i, j] <- sumv/(w[i, j] * w[i, j]) - if (i == j) - X[i, j] <- V[i, j] <- 0 + X[i, j] <- sum / w[i, j] + V[i, j] <- sumv / (w[i, j])^2 } } list(X, V) diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd index bd9abcc..020f6f7 100644 --- a/man/DNAbin.Rd +++ b/man/DNAbin.Rd @@ -88,7 +88,7 @@ \author{Emmanuel Paradis} \seealso{ \code{\link{as.DNAbin}}, \code{\link{read.dna}}, - \code{\link{read.GenBank}}, \code{\link{write.dna}}, , + \code{\link{read.GenBank}}, \code{\link{write.dna}}, \code{\link{image.DNAbin}} The corresponding generic functions are documented in the package diff --git a/man/SDM.Rd b/man/SDM.Rd index bceedf4..e3c0028 100644 --- a/man/SDM.Rd +++ b/man/SDM.Rd @@ -10,9 +10,10 @@ SDM(...) } \arguments{ \item{\dots}{2n elements (with n > 1), the first n elements are the - distance matrices, the next n elements are the sequence length from - which the matrices have been estimated (can be seen as a degree of - confidence in matrices).} + distance matrices: these can be (symmetric) matrices, objects of + class \code{"dist"}, or a mix of both. The next n elements are the + sequence length from which the matrices have been estimated (can be + seen as a degree of confidence in matrices).} } \details{ Reconstructs a consensus distance matrix from a set of input distance @@ -31,4 +32,8 @@ SDM(...) phylogenomics. \emph{Systematic Biology}, \bold{55}, 740--755. } \author{Andrei Popescu \email{niteloserpopescu@gmail.com}} +\seealso{ + \code{\link{bionj}}, \code{\link{fastme}}, \code{\link{njs}}, + \code{\link{mvrs}}, \code{\link{triangMtd}} +} \keyword{models} diff --git a/man/ape-package.Rd b/man/ape-package.Rd index 30fa3cd..920cda3 100644 --- a/man/ape-package.Rd +++ b/man/ape-package.Rd @@ -22,14 +22,14 @@ Analyses of Phylogenetics and Evolution Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, - Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, - Damien de Vienne + Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin + Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis } \references{ - Paradis, E. (2006) \emph{Analyses of Phylogenetics and Evolution with - R.} New York: Springer. + Paradis, E. (2012) \emph{Analysis of Phylogenetics and Evolution with + R (Second Edition).} New York: Springer. Paradis, E., Claude, J. and Strimmer, K. (2004) APE: analyses of phylogenetics and evolution in R language. \emph{Bioinformatics}, diff --git a/man/bionj.Rd b/man/bionj.Rd index 18b06b4..724d52f 100644 --- a/man/bionj.Rd +++ b/man/bionj.Rd @@ -25,9 +25,8 @@ bionj(X) ported to \R by Vincent Lefort \email{vincent.lefort@lirmm.fr} } \seealso{ - \code{\link{nj}}, \code{\link{fastme}}, - \code{\link{write.tree}}, \code{\link{read.tree}}, - \code{\link{dist.dna}} + \code{\link{nj}}, \code{\link{fastme}}, \code{\link{mvr}}, + \code{\link{SDM}}, \code{\link{dist.dna}} } \examples{ ### From Saitou and Nei (1987, Table 1): diff --git a/man/clustal.Rd b/man/clustal.Rd index 78f07fc..0e00198 100644 --- a/man/clustal.Rd +++ b/man/clustal.Rd @@ -62,7 +62,7 @@ tcoffee(x, exec = "t_coffee", MoreArgs = "", quiet = TRUE) } \author{Emmanuel Paradis} \seealso{ - \code{\link{image.DNAbin}}, \code{\link{del.gaps}} + \code{\link{image.DNAbin}}, \code{\link{del.gaps}}, \code{\link{alex}} The package \pkg{phyloch} which has similar functions for the MAFFT and Prank. diff --git a/man/diversity.contrast.test.Rd b/man/diversity.contrast.test.Rd index dd6e7ca..c55acc7 100644 --- a/man/diversity.contrast.test.Rd +++ b/man/diversity.contrast.test.Rd @@ -31,10 +31,11 @@ diversity.contrast.test(x, method = "ratiolog", If \code{method = "ratiolog"}, the test described in Barraclough et al. (1996) is performed. If \code{method = "proportion"}, the version in Barraclough et al. (1995) is used. If \code{method = "difference"}, - the signed difference is used (Sargent 2004). If \code{method = - "logratio"}, then this is Wiegmann et al.'s (1993) version. These + the signed difference is used (Sargent 2004). If \code{method = "logratio"}, + then this is Wiegmann et al.'s (1993) version. These four tests are essentially different versions of the same test (Vamosi - and Vamosi 2005, Vamosi 2007). + and Vamosi 2005, Vamosi 2007). See Paradis (2012) for a comparison of + their statistical performance with other tests. If \code{nrep = 0}, a Wilcoxon test is done on the species diversity contrasts with the null hypothesis is that they are distributed around @@ -57,6 +58,10 @@ diversity.contrast.test(x, method = "ratiolog", flowering plants (angiosperms). \emph{Proceedings of the Royal Society of London. Series B. Biological Sciences}, \bold{263}, 589--591. + Paradis, E. (2012) Shift in diversification in sister-clade + comparisons: a more powerful test. \emph{Evolution}, \bold{66}, + 288--295. + Sargent, R. D. (2004) Floral symmetry affects speciation rates in angiosperms. \emph{Proceedings of the Royal Society of London. Series B. Biological Sciences}, \bold{271}, 603--608. diff --git a/man/image.DNAbin.Rd b/man/image.DNAbin.Rd index 9bb04a1..a0eaa13 100644 --- a/man/image.DNAbin.Rd +++ b/man/image.DNAbin.Rd @@ -43,8 +43,8 @@ } \author{Emmanuel Paradis} \seealso{ - \code{\link{DNAbin}}, \code{\link{del.gaps}}, \code{\link{clustal}}, - \code{\link[graphics]{grid}} + \code{\link{DNAbin}}, \code{\link{del.gaps}}, \code{\link{alex}}, + \code{\link{clustal}}, \code{\link[graphics]{grid}} } \examples{ data(woodmouse) diff --git a/man/mcconwaysims.test.Rd b/man/mcconwaysims.test.Rd index 1fb3c6b..35549b2 100644 --- a/man/mcconwaysims.test.Rd +++ b/man/mcconwaysims.test.Rd @@ -32,6 +32,10 @@ mcconwaysims.test(x) McConway, K. J. and Sims, H. J. (2004) A likelihood-based method for testing for nonstochastic variation of diversification rates in phylogenies. \emph{Evolution}, \bold{58}, 12--23. + + Paradis, E. (2012) Shift in diversification in sister-clade + comparisons: a more powerful test. \emph{Evolution}, \bold{66}, + 288--295. } \author{Emmanuel Paradis} \seealso{ diff --git a/man/mvr.Rd b/man/mvr.Rd index b6984fd..fbebb99 100644 --- a/man/mvr.Rd +++ b/man/mvr.Rd @@ -31,4 +31,15 @@ mvrs(X, V, fs = 15) Classification}, \bold{17}, 67--99. } \author{Andrei Popescu \email{niteloserpopescu@gmail.com}} +\seealso{ + \code{\link{bionj}}, \code{\link{fastme}}, \code{\link{njs}}, + \code{\link{SDM}} +} +\examples{ +data(woodmouse) +rt <- dist.dna(woodmouse, variance = TRUE) +v <- attr(rt, "variance") +tr <- mvr(rt, v) +plot(tr, "u") +} \keyword{models} diff --git a/man/nj.Rd b/man/nj.Rd index 88d2258..3bd7f2f 100644 --- a/man/nj.Rd +++ b/man/nj.Rd @@ -27,7 +27,7 @@ nj(X) \seealso{ \code{\link{write.tree}}, \code{\link{read.tree}}, \code{\link{dist.dna}}, \code{\link{bionj}}, - \code{\link{fastme}} + \code{\link{fastme}}, \code{\link{njs}} } \examples{ ### From Saitou and Nei (1987, Table 1): diff --git a/man/njs.Rd b/man/njs.Rd index 04bafb9..b400e26 100644 --- a/man/njs.Rd +++ b/man/njs.Rd @@ -30,4 +30,16 @@ bionjs(X, fs = 15) \url{http://www.biomedcentral.com/1471-2105/9/166} } \author{Andrei Popescu \email{niteloserpopescu@gmail.com}} +\seealso{ + \code{\link{nj}}, \code{\link{bionj}}, \code{\link{triangMtds}} +} +\examples{ +data(woodmouse) +d <- dist.dna(woodmouse) +dm <- d +dm[sample(length(dm), size = 3)] <- NA +dist.topo(njs(dm), nj(d)) # often 0 +dm[sample(length(dm), size = 10)] <- NA +dist.topo(njs(dm), nj(d)) # sometimes 0 +} \keyword{models} diff --git a/man/richness.yule.test.Rd b/man/richness.yule.test.Rd index 21df590..91177f6 100644 --- a/man/richness.yule.test.Rd +++ b/man/richness.yule.test.Rd @@ -22,12 +22,13 @@ richness.yule.test(x, t) freedom (= 1), and the \emph{P}-value. } \references{ - Paradis, E. Shift in diversification in sister-clade comparisons: a - more powerful test. (manuscript submitted) + Paradis, E. (2012) Shift in diversification in sister-clade + comparisons: a more powerful test. \emph{Evolution}, \bold{66}, + 288--295. } \author{Emmanuel Paradis} \seealso{ - \code{\link{slowinskiguyer.test}}, \code{\link{mcconwaysims.test}} + \code{\link{slowinskiguyer.test}}, \code{\link{mcconwaysims.test}}, \code{\link{diversity.contrast.test}} } \examples{ diff --git a/man/slowinskiguyer.test.Rd b/man/slowinskiguyer.test.Rd index 67877e2..0d2c90c 100644 --- a/man/slowinskiguyer.test.Rd +++ b/man/slowinskiguyer.test.Rd @@ -24,8 +24,8 @@ slowinskiguyer.test(x, detail = FALSE) then the null hypothesis cannot be rejected. The present function has mainly a historical interest. The - Slowinski--Guyer test generally performs poorly: see McConway and Sims - (2004) for an alternative and the functions cited below. + Slowinski--Guyer test generally performs poorly: see Paradis (2012) + alternatives and the functions cited below. } \value{ a data frame with the \eqn{\chi^2}{chi2}, the number of degrees of @@ -34,9 +34,9 @@ slowinskiguyer.test(x, detail = FALSE) \emph{P}-values for each pair of sister-clades. } \references{ - McConway, K. J. and Sims, H. J. (2004) A likelihood-based method for - testing for nonstochastic variation of diversification rates in - phylogenies. \emph{Evolution}, \bold{58}, 12--23. + Paradis, E. (2012) Shift in diversification in sister-clade + comparisons: a more powerful test. \emph{Evolution}, \bold{66}, + 288--295. Slowinski, J. B. and Guyer, C. (1993) Testing whether certain traits have caused amplified diversification: an improved method based on a diff --git a/man/trex.Rd b/man/trex.Rd index 7b06b6b..d1c7752 100644 --- a/man/trex.Rd +++ b/man/trex.Rd @@ -40,7 +40,7 @@ trex(phy, title = TRUE, subbg = "lightyellow3", If \code{title = TRUE}, a default title is printed on the new window using the node label, or the node number if there are no node labels in the tree. If \code{title = FALSE}, no title is printed. If - \code{title} is a character string, this is used for the title. + \code{title} is a character string, it is used for the title. } \value{ an object of class \code{"phylo"} if \code{return.tree = TRUE} diff --git a/man/triangMtd.Rd b/man/triangMtd.Rd index d90ff1e..af58c51 100644 --- a/man/triangMtd.Rd +++ b/man/triangMtd.Rd @@ -10,7 +10,7 @@ triangMtds(X) \item{X}{a distance matrix}. } \description{ - Fast distance-based costruction method. Should only be used when + Fast distance-based construction method. Should only be used when distance measures are fairly reliable. } \value{ @@ -20,4 +20,8 @@ triangMtds(X) \url{http://archive.numdam.org/ARCHIVE/RO/RO_2001__35_2/RO_2001__35_2_283_0/RO_2001__35_2_283_0.pdf} } \author{Andrei Popescu \email{niteloserpopescu@gmail.com}} +\seealso{ + \code{\link{nj}}, \code{\link{bionj}}, \code{\link{fastme}}, + \code{\link{njs}}, \code{\link{mvrs}}, \code{\link{SDM}} +} \keyword{models} diff --git a/man/yule.time.Rd b/man/yule.time.Rd index ccc0d5a..be82f70 100644 --- a/man/yule.time.Rd +++ b/man/yule.time.Rd @@ -48,7 +48,7 @@ yule.time(phy, birth, BIRTH = NULL, root.time = 0, opti = "nlm", start = 0.01) } \author{Emmanuel Paradis} \references{ - Hubert, N., Paradis, E., Bruggemann, H. & Planes, S. (2011) Community + Hubert, N., Paradis, E., Bruggemann, H. and Planes, S. (2011) Community assembly and diversification in Indo-Pacific coral reef fishes. \emph{Ecology and Evolution}, \bold{1}, 229--277. } diff --git a/src/bionjs.c b/src/bionjs.c index b3f599b..2ec13d4 100644 --- a/src/bionjs.c +++ b/src/bionjs.c @@ -1,6 +1,6 @@ -/* bionjs.c 2011-10-11 */ +/* bionjs.c 2012-04-02 */ -/* Copyright 2011 Andrei-Alin Popescu */ +/* Copyright 2011-2012 Andrei-Alin Popescu */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -65,6 +65,10 @@ void bionjs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int* if(k==i || k==j) { + /* added 2012-04-02: */ + if(i!=k)R[give_index(i,j,n)]+=D[give_index(i,k,n)]; + if(j!=k)R[give_index(i,j,n)]+=D[give_index(j,k,n)]; + /* end of addition */ s[give_index(i,j,n)]++; continue; } @@ -141,9 +145,9 @@ void bionjs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int* } } - /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S); + //Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S); - for(i=1;i= 0) return 1; + if (t >= 0 - 1e-10) return 1; return 0; } @@ -30,6 +30,7 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS { for (j = i + 1; j <= n; j++) {if(D[give_index(i,j,n)]==-1){sww=1;continue;} + if(s[give_index(i,j,n)]<=2)continue; //Rprintf("R[%i,%i]=%f\n",i,j,R[give_index(i,j,n)]); //Rprintf("s[%i,%i]=%i\n",i,j,s[give_index(i,j,n)]); //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]); @@ -63,6 +64,7 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS //calculate N*(x,y) for(i=0;imax){max=nb;} cFS[i]=nb; @@ -97,9 +99,10 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS Rprintf("\n"); }*/ max=-1e50; - //on the fron of the array containing max N*xy values compute cxy + //on the front of the array containing max N*xy values compute cxy for(i=0;imax){max=nb;} cFS[i]=nb; @@ -137,6 +140,7 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS //on the front of the array containing max C*xy compute m*xy for(i=0;imax){max=nb;} cFS[i]=nb; @@ -173,9 +177,10 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS //and calculate cnxy on these values, but this time we do not shift, but simply //return the pair realising the maximum, stored at iPos max=-1e50; - int iPos=-1; + int iPos=0; for(i=0;imax){max=nb;iPos=i;} cFS[i]=nb; @@ -185,6 +190,10 @@ void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS Rprintf("i=%i ",iFS[0]); Rprintf("j=%i ",jFS[0]); Rprintf("\n");*/ + if(iFS[iPos]==0 || jFS[iPos]==0) + { + error("distance information insufficient to construct a tree, cannot calculate agglomeration criterion"); + } *x=iFS[iPos];*y=jFS[iPos]; } @@ -195,14 +204,14 @@ double cnxy(int x, int y, int n,double* D) double nMeanXY=0; //Rprintf("cN[%i,%i]\n",x,y); for(i=1;i<=n;i++) - {if(i==x || i==y)continue; + { for(j=1;j<=n;j++) {if(i==j)continue; - if(j==y || j==x)continue; + if((i==x && j==y) || (j==x && i==y))continue; double n1=0; double n2=0; - n1=D[give_index(i,x,n)]; - n2=D[give_index(j,y,n)]; + if(i!=x)n1=D[give_index(i,x,n)]; + if(j!=y)n2=D[give_index(j,y,n)]; if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue; nMeanXY+=(n1+n2-D[give_index(x,y,n)]-D[give_index(i,j,n)]); //Rprintf("cnMeanXY after (%i,%i)=%f\n",i,j,nMeanXY); @@ -247,11 +256,11 @@ int mxy(int x,int y,int n,double* D) int ymx=0; for(i=1;i<=n;i++) { - if(mx[i]==1 && my[i]==0) + if(i!=x && mx[i]==1 && my[i]==0) { xmy++; } - if(my[i]==1 && mx[i]==0) + if(i!=y && my[i]==1 && mx[i]==0) { ymx++; } @@ -267,14 +276,14 @@ double nxy(int x, int y, int n,double* D) double nMeanXY=0; //Rprintf("N[%i,%i]\n",x,y); for(i=1;i<=n;i++) - {if(i==x || i==y)continue; + { for(j=1;j<=n;j++) {if(i==j)continue; - if(j==x || j==y)continue; + if((i==x && j==y) || (j==x && i==y))continue; double n1=0; double n2=0; - n1=D[give_index(i,x,n)]; - n2=D[give_index(j,y,n)]; + if(i!=x)n1=D[give_index(i,x,n)]; + if(j!=y)n2=D[give_index(j,y,n)]; if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue; sCXY++; //Rprintf("considered pair (%i,%i)\n",i,j); @@ -283,6 +292,7 @@ double nxy(int x, int y, int n,double* D) } } //Rprintf("sCXY=%i",sCXY); + if(sCXY==0) return 0; return nMeanXY/sCXY; } @@ -293,10 +303,10 @@ int cxy(int x, int y, int n,double* D) int j=0; for(i=1;i<=n;i++) - {if(i==x || i==y)continue; + { for(j=1;j<=n;j++) {if(i==j)continue; - if(j==x || j==y)continue; + if((i==x && j==y) || (j==x && i==y))continue; double n1=0; double n2=0; if(i!=x)n1=D[give_index(i,x,n)]; @@ -360,6 +370,8 @@ void njs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int *fsS if(k==i || k==j) { s[give_index(i,j,n)]++; + if(i!=k)R[give_index(i,j,n)]+=D[give_index(i,k,n)]; + if(j!=k)R[give_index(i,j,n)]+=D[give_index(j,k,n)]; continue; } if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue; @@ -502,7 +514,7 @@ void njs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int *fsS //if complete distanes, use N-2, else use S int down=B; if(sw==1){down=s[give_index(OTU1,OTU2,n)]-2;} - if(down==0) + if(down<=0) {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2); } //Rprintf("down=%i\n",down); @@ -566,6 +578,8 @@ void njs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int *fsS for(j=1;j