From 1df144a18356d9b329324324bc2f78cfdf1cea3d Mon Sep 17 00:00:00 2001 From: paradis Date: Tue, 1 May 2012 02:25:53 +0000 Subject: [PATCH] various fixes in C files git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@187 6e262413-ae40-0410-9e79-b911bd7a66b7 --- DESCRIPTION | 4 +- NEWS | 35 +- R/ape-defunct.R | 35 -- R/me.R | 68 ++-- R/nj.R | 2 +- R/read.dna.R | 10 +- Thanks | 4 +- man/CADM.global.Rd | 40 +- man/GC.content.Rd | 28 -- man/ace.Rd | 2 +- man/ape-defunct.Rd | 58 --- man/base.freq.Rd | 19 +- man/bionj.Rd | 2 +- man/del.gaps.Rd | 3 +- man/fastme.Rd | 1 + man/rTraitCont.Rd | 6 +- man/seg.sites.Rd | 4 +- src/BIONJ.c | 888 +++++++++++++-------------------------------- src/NNI.c | 7 + src/SPR.c | 12 +- src/TBR.c | 12 +- src/bNNI.c | 7 + src/delta_plot.c | 27 +- src/dist_dna.c | 46 --- src/heap.c | 7 + src/me.c | 61 ++-- src/me.h | 31 +- src/me_balanced.c | 14 +- src/me_ols.c | 12 +- src/newick.c | 232 ------------ src/tree_phylo.c | 32 +- 31 files changed, 498 insertions(+), 1211 deletions(-) delete mode 100644 R/ape-defunct.R delete mode 100644 man/GC.content.Rd delete mode 100644 man/ape-defunct.Rd delete mode 100644 src/newick.c diff --git a/DESCRIPTION b/DESCRIPTION index 2877fec..36e0a8e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape -Version: 3.0-3 -Date: 2012-04-20 +Version: 3.0-4 +Date: 2012-04-30 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/NEWS b/NEWS index 3660ac4..6d74722 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,29 @@ - CHANGES IN APE VERSION 3.0-2 + CHANGES IN APE VERSION 3.0-4 + + +BUG FIXES + + o read.dna() failed to read interleaved files if the first line + uses tabulations instead of white spaces. + +OTHER CHANGES + + o The files ape-defunct.R and ape-defunct.Rd, which have not been + modified for almost two years, have been removed. + + o The C code of bionj() has been reworked: it is more stable (by + avoiding passing character strings), slightly faster (by about + 20%), and more accurate numerically. + + o The C code of fastme.*() has been slightly modified and should + be more stable by avoiding passing character strings (the + results are identical to the previous versions). + + o The file src/newick.c has been removed. + + + + CHANGES IN APE VERSION 3.0-3 BUG FIXES @@ -7,6 +32,14 @@ BUG FIXES a result is returned in most cases. +OTHER CHANGES + + o Because of problems with character string manipulation in C, the + examples in ?bionj and in ?fastme have been disallowed. In the + meantime, these functions might be unstable. This will be solved + for the next release. + + CHANGES IN APE VERSION 3.0-2 diff --git a/R/ape-defunct.R b/R/ape-defunct.R deleted file mode 100644 index c89dc06..0000000 --- a/R/ape-defunct.R +++ /dev/null @@ -1,35 +0,0 @@ -klastorin <- function(phy) - .Defunct(msg = '\'klastorin\' has been removed from ape, - see help("ape-defunct") for details.') - -mlphylo <- function(...) - .Defunct(msg = '\'mlphylo\' has been removed from ape, - see help("ape-defunct") for details.') - -DNAmodel <- function(...) - .Defunct(msg = '\'DNAmodel\' has been removed from ape, - see help("ape-defunct") for details.') - -sh.test <- function(...) - .Defunct(msg = '\'sh.test\' has been removed from ape, - see help("ape-defunct") for details.') - -evolve.phylo <- function(phy, value, var) - .Defunct(msg = '\'evolve.phylo\' has been removed from ape, - see help("ape-defunct") for details.') - -plot.ancestral <- function(...) - .Defunct(msg = '\'plot.ancestral\' has been removed from ape, - see help("ape-defunct") for details.') - -chronogram <- function(...) - .Defunct(msg = '\'chronogram\' has been removed from ape, - see help("ape-defunct") for details.') - -ratogram <- function(...) - .Defunct(msg = '\'ratogram\' has been removed from ape, - see help("ape-defunct") for details.') - -NPRS.criterion <- function(...) - .Defunct(msg = '\'NPRS.criterion\' has been removed from ape, - see help("ape-defunct") for details.') diff --git a/R/me.R b/R/me.R index 96c7e65..1006c64 100644 --- a/R/me.R +++ b/R/me.R @@ -1,9 +1,9 @@ -## me.R (2011-05-12) +## me.R (2012-04-30) ## Tree Estimation Based on Minimum Evolution Algorithm ## Copyright 2007 Vincent Lefort with modifications by -## Emmanuel Paradis (2008-2011) +## Emmanuel Paradis (2008-2012) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -12,18 +12,16 @@ fastme.bal <- function(X, nni = TRUE, spr = TRUE, tbr = TRUE) { if (is.matrix(X)) X <- as.dist(X) N <- as.integer(attr(X, "Size")) - labels <- sprintf("%6s", 1:N) - edge1 <- edge2 <- integer(2*N - 3) - - ans <- .C("me_b", as.double(X), N, labels, as.integer(nni), - as.integer(spr), as.integer(tbr), edge1, edge2, - double(2*N - 3), character(N), PACKAGE = "ape") - - labels <- substr(ans[[10]], 1, 6) - LABS <- attr(X, "Labels") - labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] - else gsub("^ ", "", labels) - structure(list(edge = cbind(ans[[7]], ans[[8]]), edge.length = ans[[9]], + nedge <- 2L * N - 3L + ans <- .C("me_b", as.double(X), N, 1:N, as.integer(nni), + as.integer(spr), as.integer(tbr), integer(nedge), + integer(nedge), double(nedge), + DUP = FALSE, NAOK = TRUE, PACKAGE = "ape") + labels <- attr(X, "Labels") + if (is.null(labels)) labels <- as.character(1:N) + labels <- labels[ans[[3]]] + structure(list(edge = cbind(ans[[7]], ans[[8]]), + edge.length = ans[[9]], tip.label = labels, Nnode = N - 2L), class = "phylo") } @@ -32,16 +30,15 @@ fastme.ols <- function(X, nni = TRUE) { if (is.matrix(X)) X <- as.dist(X) N <- as.integer(attr(X, "Size")) - labels <- sprintf("%6s", 1:N) - edge1 <- edge2 <- integer(2*N - 3) - ans <- .C("me_o", as.double(X), N, labels, as.integer(nni), - edge1, edge2, double(2*N - 3), character(N), - PACKAGE = "ape") - labels <- substr(ans[[8]], 1, 6) - LABS <- attr(X, "Labels") - labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] - else gsub("^ ", "", labels) - structure(list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]], + nedge <- 2L * N - 3L + ans <- .C("me_o", as.double(X), N, 1:N, as.integer(nni), + integer(nedge), integer(nedge), double(nedge), + DUP = FALSE, NAOK = TRUE, PACKAGE = "ape") + labels <- attr(X, "Labels") + if (is.null(labels)) labels <- as.character(1:N) + labels <- labels[ans[[3]]] + structure(list(edge = cbind(ans[[5]], ans[[6]]), + edge.length = ans[[7]], tip.label = labels, Nnode = N - 2L), class = "phylo") } @@ -49,18 +46,19 @@ fastme.ols <- function(X, nni = TRUE) bionj <- function(X) { if (is.matrix(X)) X <- as.dist(X) + if (any(is.na(X))) + stop("missing values are not allowed in the distance matrix.\nConsider using bionjs()") if (any(X > 100)) stop("at least one distance was greater than 100") N <- as.integer(attr(X, "Size")) - labels <- sprintf("%6s", 1:N) - edge1 <- edge2 <- integer(2*N - 3) - ans <- .C("bionj", as.double(X), N, labels, edge1, edge2, - double(2*N - 3), character(N), PACKAGE = "ape") - labels <- substr(ans[[7]], 1, 6) - LABS <- attr(X, "Labels") - labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] - else gsub("^ ", "", labels) - structure(list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]], - tip.label = labels, Nnode = N - 2L), - class = "phylo") + + ans <- .C("bionj", as.double(X), N, integer(2 * N - 3), + integer(2 * N - 3), double(2*N - 3), + DUP = FALSE, NAOK = TRUE, PACKAGE = "ape") + labels <- attr(X, "Labels") + if (is.null(labels)) labels <- as.character(1:N) + obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]], + tip.label = labels, Nnode = N - 2L) + class(obj) <- "phylo" + reorder(obj) } diff --git a/R/nj.R b/R/nj.R index dada70e..b12bd42 100644 --- a/R/nj.R +++ b/R/nj.R @@ -11,7 +11,7 @@ nj <- function(X) { if (is.matrix(X)) X <- as.dist(X) if (any(is.na(X))) - stop("missing values are not allowed in the distance matrix") + stop("missing values are not allowed in the distance matrix\nConsider using njs()") N <- attr(X, "Size") labels <- attr(X, "Labels") if (is.null(labels)) labels <- as.character(1:N) diff --git a/R/read.dna.R b/R/read.dna.R index 88f3d61..3e2b980 100644 --- a/R/read.dna.R +++ b/R/read.dna.R @@ -1,8 +1,8 @@ -## read.dna.R (2011-02-01) +## read.dna.R (2012-04-26) ## Read DNA Sequences in a File -## Copyright 2003-2011 Emmanuel Paradis +## Copyright 2003-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -28,9 +28,9 @@ read.dna <- function(file, format = "interleaved", skip = 0, skip = skip, nlines = nlines, comment.char = comment.char) pat.base <- "[-AaCcGgTtUuMmRrWwSsYyKkVvHhDdBbNn?]{10}" if (phylip) { - ## need to remove the possible leading spaces in the first line - fl <- gsub("^ +", "", X[1]) - fl <- as.numeric(unlist(strsplit(fl, " +"))) + ## need to remove the possible leading spaces and/or tabs in the first line + fl <- gsub("^[[:blank:]]+", "", X[1]) + fl <- as.numeric(unlist(strsplit(fl, "[[:blank:]]+"))) if (length(fl) != 2 || any(is.na(fl))) stop("the first line of the file must contain the dimensions of the data") n <- fl[1] diff --git a/Thanks b/Thanks index f6c513d..75d8199 100644 --- a/Thanks +++ b/Thanks @@ -12,8 +12,8 @@ Elizabeth Purdom, Dan Rabosky, Filipe Vieira, Tim Wallstrom, Li-San Wang, Yan Wong, Peter Wragg, Janet Young, and Jinlong Zhang. Contact me if I forgot someone. -Kurt Hornik, of the R Core Team, helped in several occasions to -fix some problems and bugs. +Kurt Hornik and Brian Ripley, of the R Core Team, helped on several +occasions to fix some problems and bugs. Financial support was provided in the early development of APE by the French "Programme inter-EPST Bioinformatique" (2001-2003). diff --git a/man/CADM.global.Rd b/man/CADM.global.Rd index 05bf426..007918b 100644 --- a/man/CADM.global.Rd +++ b/man/CADM.global.Rd @@ -68,35 +68,35 @@ If parameter \code{mantel} is TRUE, tables of Mantel statistics and P-values are \item{Mantel.prob }{One-tailed P-values associated with the Mantel correlations of the previous table. The probabilities are computed in the right-hand tail. H0 is tested against the alternative one-tailed hypothesis that the Mantel correlation under test is positive. No correction is made for multiple testing. } } -\references{ -Campbell, V., P. Legendre and F.-J. Lapointe. 2009. Assessing congruence among ultrametric distance matrices. Journal of Classification 26: 103-117. +\references{ +Campbell, V., Legendre, P. and Lapointe, F.-J. (2009) Assessing congruence among ultrametric distance matrices. \emph{Journal of Classification}, \bold{26}, 103--117. -Campbell, V., P. Legendre and F.-J. Lapointe. 2011. The performance of the Congruence Among Distance Matrices (CADM) test in phylogenetic analysis. BMC Evolutionary Biology 11: 64. http://www.biomedcentral.com/1471-2148/11/64. +Campbell, V., Legendre, P. and Lapointe, F.-J. (2011) The performance of the Congruence Among Distance Matrices (CADM) test in phylogenetic analysis. \emph{BMC Evolutionary Biology}, \bold{11}, 64. \url{http://www.biomedcentral.com/1471-2148/11/64}. -Friedman, M. 1937. The use of ranks to avoid the assumption of normality implicit in the analysis of variance. Journal of the American Statistical Association 32: 675-701. +Friedman, M. (1937) The use of ranks to avoid the assumption of normality implicit in the analysis of variance. \emph{Journal of the American Statistical Association}, \bold{32}, 675--701. -Kendall, M. G. and B. Babington Smith. 1939. The problem of m rankings. Annals of Mathematical Statistics 10: 275-287. +Kendall, M. G. and Babington Smith, B. (1939) The problem of m rankings. \emph{Annals of Mathematical Statistics}, \bold{10}, 275--287. -Lapointe, F.-J., J. A. W. Kirsch and J. M. Hutcheon. 1999. Total evidence, consensus, and bat phylogeny: a distance-based approach. Molecular Phylogenetics and Evolution 11: 55-66. +Lapointe, F.-J., Kirsch, J. A. W. and Hutcheon, J. M. (1999) Total evidence, consensus, and bat phylogeny: a distance-based approach. \emph{Molecular Phylogenetics and Evolution}, \bold{11}, 55--66. -Legendre, P. 2010. Coefficient of concordance. Pp. 164-169 in: Encyclopedia of Research Design, Vol. 1. N. J. Salkind, ed. SAGE Publications, Inc., Los Angeles. +Legendre, P. (2010) Coefficient of concordance. Pp. 164-169 in: Encyclopedia of Research Design, Vol. 1. N. J. Salkind, ed. SAGE Publications, Inc., Los Angeles. -Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance matrices: single malt Scotch whiskies revisited. Australian and New Zealand Journal of Statistics 46: 615-629. +Legendre, P. and Lapointe, F.-J. (2004) Assessing congruence among distance matrices: single malt Scotch whiskies +revisited. \emph{Australian and New Zealand Journal of Statistics}, \bold{46}, 615--629. -Legendre, P. et F.-J. Lapointe. 2005. Congruence entre matrices de distance. P. 178-181 in: Makarenkov, V., G. Cucumel et F.-J. Lapointe [eds] Comptes rendus des 12emes Rencontres de la Societe Francophone de Classification, Montreal, 30 mai - 1er juin 2005. +Legendre, P. and Lapointe, F.-J. (2005) Congruence entre matrices de distance. P. 178-181 in: Makarenkov, V., G. Cucumel et F.-J. Lapointe [eds] Comptes rendus des 12emes Rencontres de la Societe Francophone de Classification, Montreal, 30 mai - 1er juin 2005. -Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for the behavioral sciences. 2nd edition. McGraw-Hill, New York. +Siegel, S. and Castellan, N. J., Jr. (1988) \emph{Nonparametric statistics for the behavioral sciences. 2nd edition}. New York: McGraw-Hill. } -\author{ Pierre Legendre, Universite de Montreal } +\author{Pierre Legendre, Universite de Montreal} \examples{ - -# Examples 1 and 2: 5 genetic distance matrices computed from simulated DNA -# sequences representing 50 taxa having evolved along additive trees with -# identical evolutionary parameters (GTR+ Gamma + I). Distance matrices were -# computed from the DNA sequence matrices using a p distance corrected with the -# same parameters as those used to simulate the DNA sequences. See Campbell et +# Examples 1 and 2: 5 genetic distance matrices computed from simulated DNA +# sequences representing 50 taxa having evolved along additive trees with +# identical evolutionary parameters (GTR+ Gamma + I). Distance matrices were +# computed from the DNA sequence matrices using a p distance corrected with the +# same parameters as those used to simulate the DNA sequences. See Campbell et # al. (2009) for details. # Example 1: five independent additive trees. Data provided by V. Campbell. @@ -104,15 +104,15 @@ Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for the behav data(mat5Mrand) res.global <- CADM.global(mat5Mrand, 5, 50) -# Example 2: three partly similar trees, two independent trees. +# Example 2: three partly similar trees, two independent trees. # Data provided by V. Campbell. data(mat5M3ID) res.global <- CADM.global(mat5M3ID, 5, 50) res.post <- CADM.post(mat5M3ID, 5, 50, mantel=TRUE) -# Example 3: three matrices respectively representing Serological -# (asymmetric), DNA hybridization (asymmetric) and Anatomical (symmetric) +# Example 3: three matrices respectively representing Serological +# (asymmetric), DNA hybridization (asymmetric) and Anatomical (symmetric) # distances among 9 families. Data from Lapointe et al. (1999). data(mat3) diff --git a/man/GC.content.Rd b/man/GC.content.Rd deleted file mode 100644 index dc59d07..0000000 --- a/man/GC.content.Rd +++ /dev/null @@ -1,28 +0,0 @@ -\name{GC.content} -\alias{GC.content} -\title{Content in GC from DNA Sequences} -\usage{ -GC.content(x) -} -\arguments{ - \item{x}{a vector, a matrix, a data frame, or a list which contains - the DNA sequences.} -} -\description{ - This function computes the percentage of G+C in a sample of DNA sequences. -} -\details{ - The percentage of G+C is computed over all sequences in the - sample. All missing or unknown sites are discarded from the - computations. The present function actually uses the function - \code{base.freq}. -} -\value{ - A single numeric value is returned. -} -\author{Emmanuel Paradis} -\seealso{ - \code{\link{base.freq}}, \code{\link{seg.sites}}, - \code{\link{nuc.div}} -} -\keyword{univar} diff --git a/man/ace.Rd b/man/ace.Rd index 1fc5d01..9686022 100644 --- a/man/ace.Rd +++ b/man/ace.Rd @@ -56,7 +56,7 @@ ace(x, phy, type = "continuous", method = "ML", CI = TRUE, Akaike information criterion of a tree. If no such values are available, \code{NULL} is returned. - \code{anova} is another generic function that is used to compare + \code{anova} is another generic function which is used to compare nested models: the significance of the additional parameter(s) is tested with likelihood ratio tests. You must ensure that the models are effectively nested (if they are not, the results will be diff --git a/man/ape-defunct.Rd b/man/ape-defunct.Rd deleted file mode 100644 index 95dc2ec..0000000 --- a/man/ape-defunct.Rd +++ /dev/null @@ -1,58 +0,0 @@ -\name{ape-defunct} -\alias{ape-defunct} -\alias{klastorin} -\alias{mlphylo} -\alias{DNAmodel} -\alias{sh.test} -\alias{heterozygosity} -\alias{H} -\alias{nuc.div} -\alias{theta.h} -\alias{theta.k} -\alias{theta.s} -\alias{evolve.phylo} -\alias{plot.ancestral} -\alias{chronogram} -\alias{ratogram} -\alias{NPRS.criterion} -\title{Defunct Ape Functions} -\description{ - These functions have been removed from \pkg{ape} or moved to another - package. -} -\usage{ -klastorin(phy) -mlphylo(...) -DNAmodel(...) -sh.test(...) -heterozygosity(x, variance = FALSE) -H(x, variance = FALSE) -nuc.div(x, variance = FALSE, pairwise.deletion = FALSE) -theta.h(x, standard.error = FALSE) -theta.k(x, n = NULL, k = NULL) -theta.s(s, n, variance = FALSE) -evolve.phylo(phy, value, var) -plot.ancestral(...) -chronogram(...) -ratogram(...) -NPRS.criterion(...) -} -\details{ - \code{klastorin} has been removed because it does not seem to be used - and this helped to clear some internal C code (this function may be - put back with a different coding). - - \code{mlphylo}, \code{DNAmodel} and \code{sh.test} have been removed: - see the package \pkg{phangorn} for a much better implementation of - these methods (and others). - - \code{heterozygosity}, \code{nuc.div}, \code{theta.h}, \code{theta.k} - and \code{theta.s} have been moved to \pkg{pegas}. - - \code{evolve.phylo} and \code{plot.ancestral} have been deprecated by - the new function \code{\link{rTraitCont}}. - - \code{chronogram}, \code{ratogram}, and \code{NPRS.criterion} have - ceased to be maintained: consider using \code{\link{chronopl}}. -} -\keyword{internal} diff --git a/man/base.freq.Rd b/man/base.freq.Rd index 56a5e2e..8832fcc 100644 --- a/man/base.freq.Rd +++ b/man/base.freq.Rd @@ -1,5 +1,6 @@ \name{base.freq} \alias{base.freq} +\alias{GC.content} \alias{Ftab} \title{Base frequencies from DNA Sequences} \description{ @@ -7,11 +8,15 @@ the four DNA bases (adenine, cytosine, guanine, and thymidine) from a sample of sequences. + \code{GC.content} computes the proportion of G+C (using the previous + function). All missing or unknown sites are ignored. + \code{Ftab} computes the contingency table with the absolute frequencies of the DNA bases from a pair of sequences. } \usage{ base.freq(x, freq = FALSE, all = FALSE) +GC.content(x) Ftab(x, y = NULL) } \arguments{ @@ -26,8 +31,7 @@ Ftab(x, y = NULL) } \details{ The base frequencies are computed over all sequences in the - sample. All missing or unknown sites are discarded from the - computations. + sample. For \code{Ftab}, if the argument \code{y} is given then both \code{x} and \code{y} are coerced as vectors and must be of equal length. If @@ -35,18 +39,21 @@ Ftab(x, y = NULL) the two first sequences are used. } \value{ - A numeric vector with names \code{c("a", "c", "g", "t")}, or a four by - four matrix with similar dimnames. + A numeric vector with names \code{c("a", "c", "g", "t")} (and possibly + \code{"r", "m", ...}, a single numeric value, or a four by four matrix + with similar dimnames. } \author{Emmanuel Paradis} \seealso{ - \code{\link{GC.content}}, \code{\link{seg.sites}}, - \code{\link{nuc.div}}, \code{\link{DNAbin}} + \code{\link{seg.sites}}, \code{\link[pegas]{nuc.div}}, + \code{\link{DNAbin}} } \examples{ data(woodmouse) base.freq(woodmouse) base.freq(woodmouse, TRUE) +base.freq(woodmouse, TRUE, TRUE) +GC.content(woodmouse) Ftab(woodmouse) Ftab(woodmouse[1, ], woodmouse[2, ]) # same than above Ftab(woodmouse[14:15, ]) # between the last two diff --git a/man/bionj.Rd b/man/bionj.Rd index 724d52f..3fe1a79 100644 --- a/man/bionj.Rd +++ b/man/bionj.Rd @@ -26,7 +26,7 @@ bionj(X) } \seealso{ \code{\link{nj}}, \code{\link{fastme}}, \code{\link{mvr}}, - \code{\link{SDM}}, \code{\link{dist.dna}} + \code{\link{bionjs}}, \code{\link{SDM}}, \code{\link{dist.dna}} } \examples{ ### From Saitou and Nei (1987, Table 1): diff --git a/man/del.gaps.Rd b/man/del.gaps.Rd index d4ef6a3..7a9ff18 100644 --- a/man/del.gaps.Rd +++ b/man/del.gaps.Rd @@ -26,7 +26,6 @@ del.gaps(x) \author{Emmanuel Paradis} \seealso{ \code{\link{base.freq}}, \code{\link{GC.content}}, - \code{\link{theta.s}}, \code{\link{nuc.div}}, \code{\link{seg.sites}}, - \code{\link{image.DNAbin}} + \code{\link{seg.sites}}, \code{\link{image.DNAbin}} } \keyword{univar} diff --git a/man/fastme.Rd b/man/fastme.Rd index 9174b48..cd7af9e 100644 --- a/man/fastme.Rd +++ b/man/fastme.Rd @@ -1,4 +1,5 @@ \name{FastME} +\alias{FastME} \alias{fastme} \alias{fastme.bal} \alias{fastme.ols} diff --git a/man/rTraitCont.Rd b/man/rTraitCont.Rd index 50fd073..9e96f9e 100644 --- a/man/rTraitCont.Rd +++ b/man/rTraitCont.Rd @@ -26,7 +26,7 @@ rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0, \description{ This function simulates the evolution of a continuous character along a phylogeny. The calculation is done recursively from the root. See - Paradis (2006, p. 151) for a brief introduction. + Paradis (2012, pp. 232 and 324) for an introduction. } \details{ There are three possibilities to specify \code{model}: @@ -62,8 +62,8 @@ rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0, Ornstein-Uhlenbeck process and its integral. \emph{Physical Review E}, \bold{54}, 2084--2091. - Paradis, E. (2006) \emph{Analyses of Phylogenetics and Evolution with - R.} New York: Springer. + Paradis, E. (2012) \emph{Analysis of Phylogenetics and Evolution with + R (Second Edition).} New York: Springer. } \author{Emmanuel Paradis} \seealso{ diff --git a/man/seg.sites.Rd b/man/seg.sites.Rd index d03b56a..81f046a 100644 --- a/man/seg.sites.Rd +++ b/man/seg.sites.Rd @@ -28,8 +28,8 @@ seg.sites(x) results if there are ambiguous bases in the data. } \seealso{ - \code{\link{base.freq}}, \code{\link{GC.content}}, - \code{\link{theta.s}}, \code{\link{nuc.div}} + \code{\link{base.freq}}, \code{\link[pegas]{theta.s}}, + \code{\link[pegas]{nuc.div}} } \examples{ data(woodmouse) diff --git a/src/BIONJ.c b/src/BIONJ.c index d08d69d..56f2e51 100644 --- a/src/BIONJ.c +++ b/src/BIONJ.c @@ -1,727 +1,341 @@ -/* BIONJ.c 2012-02-09 */ +/* BIONJ.c 2012-04-30 */ /* Copyright 2007-2008 Olivier Gascuel, Hoa Sien Cuong, - R port by Vincent Lefort, bionj() below modified by Emmanuel Paradis */ + R port by Vincent Lefort and Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; BIONJ program ; -; ; -; Olivier Gascuel ; -; ; -; GERAD - Montreal- Canada ; -; olivierg@crt.umontreal.ca ; -; ; -; LIRMM - Montpellier- France ; -; gascuel@lirmm.fr ; -; ; -; UNIX version, written in C ; -; by Hoa Sien Cuong (Univ. Montreal) ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ +/* BIONJ program + + Olivier Gascuel + + GERAD - Montreal- Canada + olivierg@crt.umontreal.ca + + LIRMM - Montpellier- France + gascuel@lirmm.fr + + UNIX version, written in C + by Hoa Sien Cuong (Univ. Montreal) */ #include "me.h" -void Initialize(float **delta, double *X, char **labels, int n, POINTERS *trees); -void Print_outputChar(int i, POINTERS *trees, char *output); -void bionj(double *X, int *N, char **labels, int *edge1, int *edge2, double *el, char **tl); -int Symmetrize(float **delta, int n); -void Concatenate(char chain1[MAX_LABEL_LENGTH], int ind, POINTERS *trees, int post); +void Initialize(float **delta, double *X, int n); +void bionj(double *X, int *N, int *edge1, int *edge2, double *el); float Distance(int i, int j, float **delta); float Variance(int i, int j, float **delta); int Emptied(int i, float **delta); float Sum_S(int i, float **delta); void Compute_sums_Sx(float **delta, int n); void Best_pair(float **delta, int r, int *a, int *b, int n); -float Finish_branch_length(int i, int j, int k, float **delta); -void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree); float Agglomerative_criterion(int i, int j, float **delta, int r); float Branch_length(int a, int b, float **delta, int r); float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta); float Reduction10(int a, int b, int i, float lamda, float vab, float **delta); float Lamda(int a, int b, float vab, float **delta, int n, int r); -/* void printMat(float **delta, int n); */ - -/*;;;;;;;;;;; INPUT, OUTPUT, INITIALIZATION ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; ; -; The delta matrix is read from the input-file. ; -; It is recommended to put it and the executable in ; -; a special directory. The input-file and output-file ; -; can be given as arguments to the executable by ; -; typing them after the executable (Bionj input-file ; -; output-file) or by typing them when asked by the ; -; program. The input-file has to be formated according ; -; the PHYLIP standard. The output file is formated ; -; according to the NEWWICK standard. ; -; ; -; The lower-half of the delta matrix is occupied by ; -; dissimilarities. The upper-half of the matrix is ; -; occupied by variances. The first column ; -; is initialized as 0; during the algorithm some ; -; indices are no more used, and the corresponding ; -; positions in the first column are set to 1. ; -; ; -; This delta matix is made symmetrical using the rule: ; -; Dij = Dji <- (Dij + Dji)/2. The diagonal is set to 0; ; -; during the further steps of the algorithm, it is used ; -; to store the sums Sx. ; -; ; -; A second array, trees, is used to store taxon names. ; -; During the further steps of the algoritm, some ; -; positions in this array are emptied while the others ; -; are used to store subtrees. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - -/*;;;;;;;;;;;;;;;;;;;;;;;;;; Initialize ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function reads an input file and return the ; -; delta matrix and trees: the list of taxa. ; -; ; -; input : ; -; float **delta : delta matrix ; -; FILE *input : pointer to input file ; -; int n : number of taxa ; -; char **trees : list of taxa ; -; ; -; return value: ; -; float **delta : delta matrix ; -; char *trees : list of taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -//void Initialize(float **delta, FILE *input, int n, POINTERS *trees) -void Initialize(float **delta, double *X, char **labels, int n, POINTERS *trees) -{ - int lig; /* matrix line */ - int col; /* matrix column */ -// float distance; - //char name_taxon[LEN]; /* taxon name */ - char name_taxon[MAX_LABEL_LENGTH]; - char format[MAX_DIGITS]; - WORD *name; - - snprintf (format, MAX_DIGITS, "%%%ds", MAX_LABEL_LENGTH); - - for(lig=1; lig <= n; lig++) - { - //fscanf(input,"%s",name_taxon); /* read taxon name */ - //fscanf (input, format, name_taxon); /* read taxon name */ - strncpy (name_taxon, labels[lig-1], MAX_LABEL_LENGTH); - name=(WORD *)calloc(1,sizeof(WORD)); /* taxon name is */ - if(name == NULL) /* put in trees */ - { - error("out of memory"); - } - else - { - strncpy (name->name, name_taxon, MAX_LABEL_LENGTH); - name->suiv=NULL; - trees[lig].head=name; - trees[lig].tail=name; - for(col=lig; col <= n; col++) - { - //fscanf(input,"%f",&distance); /* read the distance */ -// &distance = X[XINDEX(lig,col)]; - delta[col][lig]=X[XINDEX(lig,col)]; - delta[lig][col]=X[XINDEX(lig,col)]; - if (lig==col) - delta[lig][col]=0; - } - } - } - return; -} +/* INPUT, OUTPUT, INITIALIZATION -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Print_output;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; ; -; Description : This function prints out the tree in the output file. ; -; ; -; input : ; -; POINTERS *trees : pointer to the subtrees. ; -; int i : indicate the subtree i to be printed. ; -: FILE *output : pointer to the output file. ; -; ; -; return value: The phylogenetic tree in the output file. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -void Print_outputChar(int i, POINTERS *trees, char *output) + The lower-half of the delta matrix is occupied by + dissimilarities. The upper-half of the matrix is + occupied by variances. The first column + is initialized as 0; during the algorithm some + indices are no more used, and the corresponding + positions in the first column are set to 1. */ + +/* -- Initialize -- + + This function reads an input data and returns the delta matrix + + input: float **delta : delta matrix + double *X : distances sent from R as a lower triangle matrix + int n : number of taxa + + output: float **delta : delta matrix initialized */ + +void Initialize(float **delta, double *X, int n) { - WORD *parcour; - parcour=trees[i].head; - while (parcour != NULL && (strlen (output) + strlen (parcour->name) < MAX_INPUT_SIZE)) - { - output = strncat (output, parcour->name, strlen (parcour->name)); - parcour=parcour->suiv; - } - return; + int i, j; /* matrix line and column indices */ + int k = 0; /* index along X */ + + for (i = 1; i < n; i++) + for (j = i + 1; j <= n; j++) + delta[i][j] = delta[j][i] = X[k++]; + + for (i = 1; i <= n; i++) + delta[i][i] = delta[i][0] = 0; } -//tree *bionj (FILE *input, boolean isNJ) -void bionj(double *X, int *N, char **labels, - int *edge1, int *edge2, double *el, char **tl) +void bionj(double *X, int *N, int *edge1, int *edge2, double *el) { - POINTERS *trees; /* list of subtrees */ - tree *T; /* the returned tree */ - char *chain1; /* stringized branch-length */ - char *str; /* the string containing final tree */ - int *a, *b; /* pair to be agglomerated */ - float **delta; /* delta matrix */ - float la; /* first taxon branch-length */ - float lb; /* second taxon branch-length */ - float vab; /* variance of Dab */ - float lamda = 0.5; - int i; -// int ok; - int r; /* number of subtrees */ - int n; /* number of taxa */ - int x, y; -// double t; - a=(int*)calloc(1,sizeof(int)); - b=(int*)calloc(1,sizeof(int)); - chain1=(char *)R_alloc(MAX_LABEL_LENGTH, sizeof(char)); - str = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(chain1)) - strncpy(chain1, "", strlen(chain1)); - if (strlen(str)) - strncpy(str, "", strlen(str)); - /* end */ - -// fscanf(input,"%d",&n); - n = *N; - /* Create the delta matrix */ - delta=(float **)calloc(n+1,sizeof(float*)); - for(i=1; i<= n; i++) - { - delta[i]=(float *)calloc(n+1, sizeof(float)); - if(delta[i] == NULL) - { - error("out of memory"); + int *a, *b; /* pair to be agglomerated */ + float **delta; /* delta matrix */ + float la; /* first taxon branch-length */ + float lb; /* second taxon branch-length */ + float vab; /* variance of Dab */ + float lamda = 0.5; + + int r; /* number of subtrees */ + int n; /* number of taxa */ + int i, x, y, curnod, k; + + int *ilab; /* indices of the tips (used as "labels") */ + + a = (int*)R_alloc(1, sizeof(int)); + b = (int*)R_alloc(1, sizeof(int)); + + n = *N; + + /* Create the delta matrix */ + delta = (float **)R_alloc(n + 1, sizeof(float*)); + for (i = 1; i <= n; i++) + delta[i] = (float *)R_alloc(n + 1, sizeof(float)); + + /* initialise */ + r = n; + *a = *b = 0; + Initialize(delta, X, n); + + ilab = (int *)R_alloc(n + 1, sizeof(int)); + for (i = 1; i <= n; i++) ilab[i] = i; + + curnod = 2 * n - 2; + k = 0; + + while (r > 3) { + + Compute_sums_Sx(delta, n); /* compute the sum Sx */ + Best_pair(delta, r, a, b, n); /* find the best pair by */ + vab = Variance(*a, *b, delta); /* minimizing (1) */ + la = Branch_length(*a, *b, delta, r); /* compute branch-lengths */ + lb = Branch_length(*b, *a, delta, r); /* using formula (2) */ + + lamda = Lamda(*a, *b, vab, delta, n, r); /* compute lambda* using (9)*/ + + edge1[k] = edge1[k + 1] = curnod; + edge2[k] = ilab[*a]; + edge2[k + 1] = ilab[*b]; + el[k] = la; + el[k + 1] = lb; + k = k + 2; + + for (i = 1; i <= n; i++) { + if (Emptied(i,delta) || (i == *a) || (i == *b)) continue; + if(*a > i) { + x = *a; + y = i; + } else { + x = i; + y = *a; + } + + /* apply reduction formulae 4 and 10 to delta */ + delta[x][y] = Reduction4(*a, la, *b, lb, i, lamda, delta); + delta[y][x] = Reduction10(*a, *b, i, lamda, vab, delta); + } + + delta[*b][0] = 1.0; /* make the b line empty */ + ilab[*a] = curnod; + curnod--; + r = r - 1; } - } - trees=(POINTERS *)calloc(n+1,sizeof(POINTERS)); - if(trees == NULL) - { - error("out of memory"); - } - /* initialise and symmetrize the running delta matrix */ - r=n; - *a=0; - *b=0; - Initialize(delta, X, labels, n, trees); -// ok=Symmetrize(delta, n); - -// if(!ok) -// Rprintf("\n The matrix is not symmetric.\n "); - while (r > 3) /* until r=3 */ - { - Compute_sums_Sx(delta, n); /* compute the sum Sx */ - Best_pair(delta, r, a, b, n); /* find the best pair by */ - vab=Variance(*a, *b, delta); /* minimizing (1) */ - la=Branch_length(*a, *b, delta, r); /* compute branch-lengths */ - lb=Branch_length(*b, *a, delta, r); /* using formula (2) */ -// if (!isNJ) - lamda=Lamda(*a, *b, vab, delta, n, r); /* compute lambda* using (9)*/ - for(i=1; i <= n; i++) - { - if(!Emptied(i,delta) && (i != *a) && (i != *b)) - { - if(*a > i) - { - x=*a; - y=i; - } - else - { - x=i; - y=*a; /* apply reduction formulae */ - } /* 4 and 10 to delta */ - delta[x][y]=Reduction4(*a, la, *b, lb, i, lamda, delta); - delta[y][x]=Reduction10(*a, *b, i, lamda, vab, delta); - } - } - strncpy(chain1,"",1); /* agglomerate the subtrees */ - strncat(chain1,"(",1); /* a and b together with the*/ - Concatenate(chain1, *a, trees, 0); /* branch-lengths according */ - strncpy(chain1,"",1); /* to the NEWWICK format */ - strncat(chain1,":",1); - snprintf(chain1+strlen(chain1),MAX_LABEL_LENGTH,"%f",la); - - strncat(chain1,",",1); - Concatenate(chain1,*a, trees, 1); - trees[*a].tail->suiv=trees[*b].head; - trees[*a].tail=trees[*b].tail; - strncpy(chain1,"",1); - strncat(chain1,":",1); - snprintf(chain1+strlen(chain1),MAX_LABEL_LENGTH,"%f",lb); - - strncat(chain1,")",1); - Concatenate(chain1, *a, trees, 1); - delta[*b][0]=1.0; /* make the b line empty */ - trees[*b].head=NULL; - trees[*b].tail=NULL; - r=r-1; - } - - FinishStr (delta, n, trees, str); /* compute the branch-lengths*/ - /* of the last three subtrees*/ - /* and print the tree in the */ - /* output-file */ - T = readNewickString (str, n); - T = detrifurcate(T); -// compareSets(T,species); - partitionSizes(T); - - tree2phylo(T, edge1, edge2, el, tl, n); /* by EP */ - - for(i=1; i<=n; i++) - { - delta[i][0]=0.0; - trees[i].head=NULL; - trees[i].tail=NULL; - } - free(delta); - free(trees); - /* free (str); */ - /* free (chain1); */ - free(a); - free(b); - freeTree(T); - T = NULL; -} -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Utilities ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Symmetrize ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function verifies if the delta matrix is symmetric; ; -; if not the matrix is made symmetric. ; -; ; -; input : ; -; float **delta : delta matrix ; -; int n : number of taxa ; -; ; -; return value: ; -; int symmetric : indicate if the matrix has been made ; -; symmetric or not ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -int Symmetrize(float **delta, int n) -{ - int lig; /* matrix line */ - int col; /* matrix column */ - float value; /* symmetrized value */ - int symmetric; - - symmetric=1; - for(lig=1; lig <= n; lig++) - { - for(col=1; col < lig; col++) - { - if(delta[lig][col] != delta[col][lig]) - { - value= (delta[lig][col]+delta[col][lig])/2; - delta[lig][col]=value; - delta[col][lig]=value; - symmetric=0; - } - } - } - return(symmetric); -} + /* finalise the three basal edges */ + int last[3]; + i = 1; + k = 0; + while (k < 3) { + if (!Emptied(i, delta)) last[k++] = i; + i++; + } + + for (i = 0, k = 2 * n - 4; i < 3; i++, k--) { + edge1[k] = curnod; /* <- the root at this stage */ + edge2[k] = ilab[last[i]]; + } + double D[3]; + D[0] = Distance(last[0], last[1], delta); + D[1] = Distance(last[0], last[2], delta); + D[2] = Distance(last[1], last[2], delta); -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Concatenate ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; ; -; Description : This function concatenates a string to another. ; -; ; -; input : ; -; char *chain1 : the string to be concatenated. ; -; int ind : indicate the subtree to which concatenate the ; -; string ; -; POINTERS *trees : pointer to subtrees. ; -; int post : position to which concatenate (front (0) or ; -; end (1)) ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -//void Concatenate(char chain1[LEN], int ind, POINTERS *trees, int post) -void Concatenate(char chain1[MAX_LABEL_LENGTH], int ind, POINTERS *trees, int post) -{ - WORD *bran; - - bran=(WORD *)calloc(1,sizeof(WORD)); - if(bran == NULL) - { - error("out of memory"); - } - else - { - strncpy(bran->name,chain1,MAX_LABEL_LENGTH); - bran->suiv=NULL; - } - if(post == 0) - { - bran->suiv=trees[ind].head; - trees[ind].head=bran; - } - else - { - trees[ind].tail->suiv=bran; - trees[ind].tail=trees[ind].tail->suiv; - } + el[2 * n - 4] = (D[0] + D[1] - D[2])/2; + el[2 * n - 5] = (D[0] + D[2] - D[1])/2; + el[2 * n - 6] = (D[2] + D[1] - D[0])/2; } +/* -- Distance -- + + This function retrieves and returns the distance between taxa + i and j from the delta matrix. + + input: int i : taxon i + int j : taxon j + float **delta : the delta matrix -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Distance;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieve ant return de distance between taxa ; -; i and j from the delta matrix. ; -; ; -; input : ; -; int i : taxon i ; -; int j : taxon j ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float distance : dissimilarity between the two taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + output: float distance : dissimilarity between the two taxa */ float Distance(int i, int j, float **delta) { - if(i > j) - return(delta[i][j]); - else - return(delta[j][i]); + if (i > j) return(delta[i][j]); + else return(delta[j][i]); } +/* -- Variance -- -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Variance;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieve and return the variance of the ; -; distance between i and j, from the delta matrix. ; -; ; -; input : ; -; int i : taxon i ; -; int j : taxon j ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float distance : the variance of Dij ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + This function retrieves and returns the variance of the + distance between i and j, from the delta matrix. + + input: int i : taxon i + int j : taxon j + float **delta : the delta matrix + + output: float distance : the variance of Dij */ float Variance(int i, int j, float **delta) { - if(i > j) - return(delta[j][i]); - else - return(delta[i][j]); + if (i > j) return(delta[j][i]); + else return(delta[i][j]); } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Emptied ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function verifie if a line is emptied or not. ; -; ; -; input : ; -; int i : subtree (or line) i ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; 0 : if not emptied. ; -; 1 : if emptied. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -int Emptied(int i, float **delta) /* test if the ith line is emptied */ +/* -- Emptied -- + + This function tests if a line is emptied or not. + + input: int i : subtree (or line) i + float **delta : the delta matrix + + output: 0 : if not emptied + 1 : if emptied */ + +int Emptied(int i, float **delta) { - return((int)delta[i][0]); + return((int)delta[i][0]); } +/* -- Sum_S -- + + This function retrieves the sum Sx from the diagonal + of the delta matrix -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Sum_S;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieves the sum Sx from the diagonal ; -; of the delta matrix. ; -; ; -; input : ; -; int i : subtree i ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float delta[i][i] : sum Si ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -float Sum_S(int i, float **delta) /* get sum Si form the diagonal */ + input: int i : subtree i + float **delta : the delta matrix + + output: float delta[i][i] : sum Si */ + +float Sum_S(int i, float **delta) { - return(delta[i][i]); + return(delta[i][i]); } +/* -- Compute_sums_Sx -- + + This function computes the sums Sx and stores them in the + diagonal the delta matrix. + + input: float **delta : the delta matrix + int n : the number of taxa */ -/*;;;;;;;;;;;;;;;;;;;;;;;Compute_sums_Sx;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function computes the sums Sx and store them in the ; -; diagonal the delta matrix. ; -; ; -; input : ; -; float **delta : the delta matrix. ; -; int n : the number of taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ void Compute_sums_Sx(float **delta, int n) { - float sum; - int i; - int j; - - for(i= 1; i <= n ; i++) - { - if(!Emptied(i,delta)) - { - sum=0; - for(j=1; j <=n; j++) - { - if(i != j && !Emptied(j,delta)) /* compute the sum Si */ - sum=sum + Distance(i,j,delta); - } + float sum; + int i, j; + + for (i = 1; i <= n ; i++) { + if (Emptied(i, delta)) continue; + sum = 0; + for (j = 1; j <= n; j++) { + if (i == j || Emptied(j, delta)) continue; + sum += Distance(i, j, delta); /* compute the sum Si */ + } + delta[i][i] = sum; } - delta[i][i]=sum; /* store the sum Si in */ - } /* delta’s diagonal */ } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Best_pair;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function finds the best pair to be agglomerated by ; -; minimizing the agglomerative criterion (1). ; -; ; -; input : ; -; float **delta : the delta matrix ; -; int r : number of subtrees ; -; int *a : contain the first taxon of the pair ; -; int *b : contain the second taxon of the pair ; -; int n : number of taxa ; -; ; -; return value: ; -; int *a : the first taxon of the pair ; -; int *b : the second taxon of the pair ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ +/* -- Best_pair -- -void Best_pair(float **delta, int r, int *a, int *b, int n) -{ - float Qxy; /* value of the criterion calculated*/ - int x,y; /* the pair which is tested */ - float Qmin; /* current minimun of the criterion */ - - Qmin=1.0e300; - for(x=1; x <= n; x++) - { - if(!Emptied(x,delta)) - { - for(y=1; y < x; y++) - { - if(!Emptied(y,delta)) - { - Qxy=Agglomerative_criterion(x,y,delta,r); - if(Qxy < Qmin-0.000001) - { - Qmin=Qxy; - *a=x; - *b=y; - } - } - } - } - } -} + This function finds the best pair to be agglomerated by + minimizing the agglomerative criterion (1). + input: float **delta : the delta matrix + int r : number of subtrees + int *a : contain the first taxon of the pair + int *b : contain the second taxon of the pair + int n : number of taxa + + output: int *a : the first taxon of the pair + int *b : the second taxon of the pair */ -/*;;;;;;;;;;;;;;;;;;;;;;Finish_branch_length;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : Compute the length of the branch attached ; -; to the subtree i, during the final step ; -; ; -; input : ; -; int i : position of subtree i ; -; int j : position of subtree j ; -; int k : position of subtree k ; -; float **delta : ; -; ; -; return value: ; -; float length : The length of the branch ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -float Finish_branch_length(int i, int j, int k, float **delta) -{ - float length; - length=0.5*(Distance(i,j,delta) + Distance(i,k,delta) - -Distance(j,k,delta)); - return(length); -} -void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree) +void Best_pair(float **delta, int r, int *a, int *b, int n) { - int l=1; - int i=0; - float length; - char *tmp; - WORD *bidon; - WORD *ele; - int last[3]; /* the last three subtrees */ - - while(l <= n) - { /* find the last tree subtree */ - if(!Emptied(l, delta)) - { - last[i]=l; - i++; - } - l++; - } - tmp = (char*) calloc (12, sizeof(char)); - StrTree[0]='('; - - length=Finish_branch_length(last[0],last[1],last[2],delta); - Print_outputChar (last[0], trees, StrTree); - snprintf (tmp, 12, "%f,", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - length=Finish_branch_length(last[1],last[0],last[2],delta); - Print_outputChar (last[1], trees, StrTree); - snprintf (tmp, 12, "%f,", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - length=Finish_branch_length(last[2],last[1],last[0],delta); - Print_outputChar (last[2], trees, StrTree); - snprintf (tmp, 12, "%f", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - if (strlen (StrTree) < MAX_INPUT_SIZE-3) - strncat (StrTree, ");", 3); - - free (tmp); - for(i=0; i < 3; i++) - { - bidon=trees[last[i]].head; - ele=bidon; - while(bidon!=NULL) - { - ele=ele->suiv; - free(bidon); - bidon=ele; + float Qxy; /* value of the criterion calculated */ + int x, y; /* the pair which is tested */ + float Qmin; /* current minimun of the criterion */ + + Qmin = 1.0e300; + for (x = 1; x <= n; x++) { + if (Emptied(x, delta)) continue; + for (y = 1; y < x; y++) { + if (Emptied(y, delta)) continue; + Qxy = Agglomerative_criterion(x, y, delta, r); + if (Qxy < Qmin - 0.000001) { + Qmin = Qxy; + *a = x; + *b = y; + } + } } - } - return; } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Formulae ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - +/* Formulae */ +/* Formula (1) */ float Agglomerative_criterion(int i, int j, float **delta, int r) { - float Qij; - Qij=(r-2)*Distance(i,j,delta) /* Formula (1) */ - -Sum_S(i,delta) - -Sum_S(j,delta); - - return(Qij); + return((r - 2) * Distance(i, j, delta) - + Sum_S(i, delta) - Sum_S(j, delta)); } - +/* Formula (2) */ float Branch_length(int a, int b, float **delta, int r) { - float length; - length=0.5*(Distance(a,b,delta) /* Formula (2) */ - +(Sum_S(a,delta) - -Sum_S(b,delta))/(r-2)); - return(length); + return(0.5 * (Distance(a, b, delta) + + (Sum_S(a, delta) - Sum_S(b, delta))/(r - 2))); } - +/* Formula (4) */ float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta) { - float Dui; - Dui=lamda*(Distance(a,i,delta)-la) - +(1-lamda)*(Distance(b,i,delta)-lb); /* Formula (4) */ - return(Dui); + return(lamda * (Distance(a, i, delta) - la) + + (1 - lamda) * (Distance(b, i, delta) - lb)); } - +/* Formula (10) */ float Reduction10(int a, int b, int i, float lamda, float vab, float **delta) { - float Vci; - Vci=lamda*Variance(a,i,delta)+(1-lamda)*Variance(b,i,delta) - -lamda*(1-lamda)*vab; /*Formula (10) */ - return(Vci); + return(lamda * Variance(a, i, delta) + (1 - lamda) * Variance(b, i, delta) + - lamda * (1 - lamda) * vab); } float Lamda(int a, int b, float vab, float **delta, int n, int r) { - float lamda=0.0; - int i; - - if(vab==0.0) - lamda=0.5; - else - { - for(i=1; i <= n ; i++) - { - if(a != i && b != i && !Emptied(i,delta)) - lamda=lamda + (Variance(b,i,delta) - Variance(a,i,delta)); + float lamda = 0.0; + int i; + + if (vab == 0.0) lamda = 0.5; else { + for (i = 1; i <= n ; i++) { + if (a == i || b == i || Emptied(i, delta)) continue; + lamda += (Variance(b, i, delta) - Variance(a, i, delta)); + } + lamda = 0.5 + lamda/(2 * (r - 2) * vab); /* Formula (9) */ } - lamda=0.5 + lamda/(2*(r-2)*vab); - } /* Formula (9) and the */ - if(lamda > 1.0) /* constraint that lamda*/ - lamda = 1.0; /* belongs to [0,1] */ - if(lamda < 0.0) - lamda=0.0; - return(lamda); + if (lamda > 1.0) lamda = 1.0; /* force 0 < lamda < 1 */ + if (lamda < 0.0) lamda = 0.0; + return(lamda); } -/* -void printMat(float **delta, int n) -{ - int i, j; - for (i=1; i<=n; i++) { - for (j=1; j<=n; j++) - Rprintf ("%f ", delta[i][j]); - Rprintf("\n"); - } - Rprintf("\n"); - return; -} -*/ diff --git a/src/NNI.c b/src/NNI.c index a63c725..08df539 100644 --- a/src/NNI.c +++ b/src/NNI.c @@ -1,3 +1,10 @@ +/* NNI.c 2007-09-05 */ + +/* Copyright 2007 Vincent Lefort */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" //boolean leaf(node *v); diff --git a/src/SPR.c b/src/SPR.c index 82f0a67..881cd8f 100644 --- a/src/SPR.c +++ b/src/SPR.c @@ -1,8 +1,10 @@ -/* #include */ -/* #include */ -/* #include */ -/* #include "graph.h" */ -/* #include "fastme.h" */ +/* SPR.c 2009-01-12 */ + +/* Copyright 2009 Richard Desper */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" /*functions from bNNI.c*/ diff --git a/src/TBR.c b/src/TBR.c index d52f4b2..80050f2 100644 --- a/src/TBR.c +++ b/src/TBR.c @@ -1,8 +1,10 @@ -/* #include */ -/* #include */ -/* #include */ -/* #include "graph.h" */ -/* #include */ +/* TBR.c 2009-01-12 */ + +/* Copyright 2009 Richard Desper */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" /*functions from me_balanced.c*/ diff --git a/src/bNNI.c b/src/bNNI.c index 9363ea5..3f101ed 100644 --- a/src/bNNI.c +++ b/src/bNNI.c @@ -1,3 +1,10 @@ +/* bNNI.c 2007-09-05 */ + +/* Copyright 2007 Vincent Lefort */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" /*boolean leaf(node *v); diff --git a/src/delta_plot.c b/src/delta_plot.c index 73afbd4..193c109 100644 --- a/src/delta_plot.c +++ b/src/delta_plot.c @@ -22,31 +22,11 @@ void delta_plot(double *D, int *size, int *nbins, int *counts, double *deltabar) xu = xy + 1; for (u = y + 1; u < n - 1; u++, xu++, yu++) { uv = u*n - u*(u + 1)/2; /* do NOT factorize */ - dxu = D[xu]; - dyu = D[yu]; - xv = xu + 1; - yv = yu + 1; + dxu = D[xu]; dyu = D[yu]; + xv = xu + 1; yv = yu + 1; for (v = u + 1; v < n; v++, xv++, yv++, uv++) { - dxv = D[xv]; - dyv = D[yv]; - duv = D[uv]; - /* if (dxy <= dxu && dxy <= dxv && dxy <= dyu && dxy <= dyv && dxy <= duv) k=1; */ -/* else if (duv <= dxy && duv <= dxu && duv <= dxv && duv <= dyu && duv <= dyv) k=1; */ -/* else if (dxu <= dxy && dxu <= dxv && dxu <= dyu && dxu <= dyv && dxu <= duv) k=2; */ -/* else if (dyv <= dxy && dyv <= dxu && dyv <= dxv && dyv <= dyu && dyv <= duv) k=2; */ -/* else if (dxv <= dxy && dxv <= dxu && dxv <= dyu && dxv <= dyv && dxv <= duv) k=3; */ -/* else if (dyu <= dxy && dyu <= dxu && dyu <= dxv && dyu <= dyv && dyu <= duv) k=3; */ - //Rprintf("%d\t%d\t%d\t%d\t%d\t%d\t\n", xy, xu, xv, yu, yv, uv); - //Rprintf("dxy = %f\tdxu = %f\tdyu = %f\n", dxy, dxu, dyu); - //Rprintf("D[xv] = %f\tD[yv] = %f\tD[uv] = %f\n", D[xv], D[yv], D[uv]); - /* switch (k) { */ -/* case 1 : A = dxv + dyu; B = dxu + dyv; C = dxy + duv; break; */ -/* case 2 : A = dxv + dyu; B = dxy + duv; C = dxu + dyv; break; */ -/* case 3 : A = dxu + dyv; B = dxy + duv; C = dxv + dyu; break; */ -/* } */ - //Rprintf("A = %f\tB = %f\tC = %f\n", A, B, C); + dxv = D[xv]; dyv = D[yv]; duv = D[uv]; A = dxv + dyu; B = dxu + dyv; C = dxy + duv; - //Rprintf("A = %f\tB = %f\tC = %f\n", A, B, C); if (A == B && B == C) delta = 0; else while (1) { if (C <= B && B <= A) {delta = (A - B)/(A - C); break;} if (B <= C && C <= A) {delta = (A - C)/(A - B); break;} @@ -57,7 +37,6 @@ void delta_plot(double *D, int *size, int *nbins, int *counts, double *deltabar) } /* if (delta == 1) i = nb - 1; else */ i = delta * nb; - //Rprintf("delta = %f\ti = %d\n", delta, i); counts[i] += 1; deltabar[x] += delta; deltabar[y] += delta; diff --git a/src/dist_dna.c b/src/dist_dna.c index ab630a9..67522ff 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1040,52 +1040,6 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -/* void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) */ -/* { */ -/* int i, m; */ - -/* m = 0; */ -/* for (i = 0; i < *n; i++) { */ -/* if (KnownBase(x[i])) { */ -/* m++; */ -/* switch (x[i]) { */ -/* case 136 : BF[0]++; break; */ -/* case 40 : BF[1]++; break; */ -/* case 72 : BF[2]++; break; */ -/* case 24 : BF[3]++; break; */ -/* } */ -/* } */ -/* } */ -/* if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; */ -/* } */ - -/* void BaseProportion(unsigned char *x, int *n, double *BF) */ -/* { */ -/* int i; */ - -/* for (i = 0; i < *n; i++) { */ -/* switch (x[i]) { */ -/* case 136 : BF[0]++; break; */ -/* case 40 : BF[1]++; break; */ -/* case 72 : BF[2]++; break; */ -/* case 24 : BF[3]++; break; */ -/* case 192 : BF[4]++; break; */ -/* case 160 : BF[5]++; break; */ -/* case 144 : BF[6]++; break; */ -/* case 96 : BF[7]++; break; */ -/* case 80 : BF[8]++; break; */ -/* case 48 : BF[9]++; break; */ -/* case 224 : BF[10]++; break; */ -/* case 176 : BF[11]++; break; */ -/* case 208 : BF[12]++; break; */ -/* case 112 : BF[13]++; break; */ -/* case 240 : BF[14]++; break; */ -/* case 4 : BF[15]++; break; */ -/* case 2 : BF[16]++; break; */ -/* } */ -/* } */ -/* } */ - /* a hash table is much faster than switch (2012-01-10) */ void BaseProportion(unsigned char *x, int *n, double *BF) { diff --git a/src/heap.c b/src/heap.c index fa007f3..ddc1e84 100644 --- a/src/heap.c +++ b/src/heap.c @@ -1,3 +1,10 @@ +/* heap.c 2007-09-05 */ + +/* Copyright 2007 Vincent Lefort */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" int *initPerm(int size) diff --git a/src/me.c b/src/me.c index 6841802..8a00591 100644 --- a/src/me.c +++ b/src/me.c @@ -1,7 +1,7 @@ -/* me.c 2012-02-09 */ +/* me.c 2012-04-30 */ /* Copyright 2007-2008 Olivier Gascuel, Rick Desper, - R port by Vincent Lefort, me_*() below modified by Emmanuel Paradis */ + R port by Vincent Lefort and Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -26,9 +26,9 @@ void SPR(tree *T, double **D, double **A, int *count); void TBR(tree *T, double **D, double **A); -void me_b(double *X, int *N, char **labels, +void me_b(double *X, int *N, int *labels, int *nni, int *spr, int *tbr, - int *edge1, int *edge2, double *el, char **tl) + int *edge1, int *edge2, double *el) { double **D, **A; set *species, *slooper; @@ -57,7 +57,7 @@ void me_b(double *X, int *N, char **labels, if (*spr) SPR(T, D, A, &nniCount); if (*tbr) TBR(T, D, A); - tree2phylo(T, edge1, edge2, el, tl, n); + tree2phylo(T, edge1, edge2, el, labels, n); freeMatrix(D,n); freeMatrix(A,2*n - 2); @@ -66,8 +66,8 @@ void me_b(double *X, int *N, char **labels, T = NULL; } -void me_o(double *X, int *N, char **labels, int *nni, - int *edge1, int *edge2, double *el, char **tl) +void me_o(double *X, int *N, int *labels, int *nni, + int *edge1, int *edge2, double *el) { double **D, **A; set *species, *slooper; @@ -96,7 +96,7 @@ void me_o(double *X, int *N, char **labels, int *nni, NNI(T,A,&nniCount,D,n); assignOLSWeights(T,A); - tree2phylo(T, edge1, edge2, el, tl, n); + tree2phylo(T, edge1, edge2, el, labels, n); freeMatrix(D,n); freeMatrix(A,2*n - 2); @@ -105,11 +105,11 @@ void me_o(double *X, int *N, char **labels, int *nni, T = NULL; } -/************************************************************************* +/* - MATRIX FUNCTIONS + -- MATRIX FUNCTIONS -- -*************************************************************************/ +*/ double **initDoubleMatrix(int d) { @@ -125,9 +125,10 @@ double **initDoubleMatrix(int d) return(A); } -double **loadMatrix (double *X, char **labels, int n, set *S) +//double **loadMatrix (double *X, char **labels, int n, set *S) +double **loadMatrix (double *X, int *labels, int n, set *S) { - char nextString[MAX_LABEL_LENGTH]; +// char nextString[MAX_LABEL_LENGTH]; node *v; double **table; int i, j, a, b; @@ -138,9 +139,10 @@ double **loadMatrix (double *X, char **labels, int n, set *S) for(i=0; iindex2 = i; S = addToSet(v,S); for (j=i; jlabel,label,NODE_LABEL_LENGTH); +// strncpy(newNode->label,label,NODE_LABEL_LENGTH); + newNode->label = label; newNode->index = index; newNode->index2 = -1; newNode->parentEdge = parentEdge; @@ -274,7 +279,7 @@ tree *detrifurcate(tree *T) return(T); if (NULL != v->parentEdge) { - error("root %s is poorly rooted.", v->label); + error("root %d is poorly rooted.", v->label); } for(e = v->middleEdge, v->middleEdge = NULL; NULL != e; e = f ) { @@ -303,7 +308,8 @@ void compareSets(tree *T, set *S) for(X = S; NULL != X; X = X->secondNode) { w = X->firstNode; - if (0 == strcmp(v->label,w->label)) +// if (0 == strcmp(v->label,w->label)) + if (v->label == w->label) { v->index2 = w->index2; w->index2 = -1; @@ -316,7 +322,8 @@ void compareSets(tree *T, set *S) for(X = S; NULL != X; X = X->secondNode) { w = X->firstNode; - if (0 == strcmp(v->label,w->label)) +// if (0 == strcmp(v->label,w->label)) + if (v->label == w->label) { v->index2 = w->index2; w->index2 = -1; @@ -325,7 +332,7 @@ void compareSets(tree *T, set *S) } if (-1 == v->index2) { - error("leaf %s in tree not in distance matrix.", v->label); + error("leaf %d in tree not in distance matrix.", v->label); } e = depthFirstTraverse(T,NULL); while (NULL != e) @@ -333,14 +340,14 @@ void compareSets(tree *T, set *S) v = e->head; if ((leaf(v)) && (-1 == v->index2)) { - error("leaf %s in tree not in distance matrix.", v->label); + error("leaf %d in tree not in distance matrix.", v->label); } e = depthFirstTraverse(T,e); } for(X = S; NULL != X; X = X->secondNode) if (X->firstNode->index2 > -1) { - error("node %s in matrix but not a leaf in tree.", X->firstNode->label); + error("node %d in matrix but not a leaf in tree.", X->firstNode->label); } return; } diff --git a/src/me.h b/src/me.h index 0616592..dc5e3ee 100644 --- a/src/me.h +++ b/src/me.h @@ -1,4 +1,4 @@ -/* me.h 2008-01-14 */ +/* me.h 2012-04-30 */ /* Copyright 2007-2008 Vincent Lefort, modified by Emmanuel Paradis */ @@ -28,9 +28,9 @@ #ifndef MAX_LABEL_LENGTH #define MAX_LABEL_LENGTH 30 #endif -#ifndef NODE_LABEL_LENGTH -#define NODE_LABEL_LENGTH 30 -#endif +//#ifndef NODE_LABEL_LENGTH +//#define NODE_LABEL_LENGTH 30 +//#endif #ifndef EDGE_LABEL_LENGTH #define EDGE_LABEL_LENGTH 30 #endif @@ -77,7 +77,7 @@ typedef struct pointers } POINTERS; typedef struct node { - char label[NODE_LABEL_LENGTH]; + int label; /* char label[NODE_LABEL_LENGTH]; */ struct edge *parentEdge; struct edge *leftEdge; struct edge *middleEdge; @@ -109,14 +109,13 @@ typedef struct set struct set *secondNode; } set; -void me_b(double *X, int *N, char **labels, int *nni, int *spr, int *tbr, int *edge1, int *edge2, double *el, char **tl); -void me_o(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl); -//int whiteSpace(char c); +void me_b(double *X, int *N, int *labels, int *nni, int *spr, int *tbr, int *edge1, int *edge2, double *el); +void me_o(double *X, int *N, int *labels, int *nni, int *edge1, int *edge2, double *el); double **initDoubleMatrix(int d); -double **loadMatrix (double *X, char **labels, int n, set *S); +double **loadMatrix (double *X, int *labels, int n, set *S); set *addToSet(node *v, set *X); -node *makeNewNode(char *label, int i); -node *makeNode(char *label, edge *parentEdge, int index); +node *makeNewNode(int label, int i); +node *makeNode(int label, edge *parentEdge, int index); node *copyNode(node *v); edge *siblingEdge(edge *e); edge *makeEdge(char *label, node *tail, node *head, double weight); @@ -134,9 +133,9 @@ void freeMatrix(double **D, int size); void freeSet(set *S); void freeTree(tree *T); void freeSubTree(edge *e); -int whiteSpace(char c); int leaf(node *v); -node *decodeNewickSubtree(char *treeString, tree *T, int *uCount); -tree *readNewickString (char *str, int numLeaves); -void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, char **tl); -void tree2phylo(tree *T, int *edge1, int *edge2, double *el, char **tl, int n); +/* int whiteSpace(char c); */ +/* node *decodeNewickSubtree(char *treeString, tree *T, int *uCount); */ +/* tree *readNewickString (char *str, int numLeaves); */ +void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, int *ilab); +void tree2phylo(tree *T, int *edge1, int *edge2, double *el, int *ilab, int n); diff --git a/src/me_balanced.c b/src/me_balanced.c index 4484d18..cd13569 100644 --- a/src/me_balanced.c +++ b/src/me_balanced.c @@ -1,4 +1,12 @@ -#include "me.h" +/* me_balanced.c 2012-04-30 */ + +/* Copyright 2007 Vincent Lefort + BMEsplitEdge() modified by Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + +##include "me.h" void BalWFext(edge *e, double **A) /*works except when e is the one edge inserted to new vertex v by firstInsert*/ @@ -248,10 +256,10 @@ void BMEsplitEdge(tree *T, node *v, edge *e, double **A) edge *newPendantEdge; edge *newInternalEdge; node *newNode; - char nodeLabel[NODE_LABEL_LENGTH]; + int nodeLabel = 0;//char nodeLabel[NODE_LABEL_LENGTH]; char edgeLabel1[EDGE_LABEL_LENGTH]; char edgeLabel2[EDGE_LABEL_LENGTH]; - snprintf(nodeLabel,1,""); + //snprintf(nodeLabel,1,""); //sprintf(edgeLabel1,"E%d",T->size); //sprintf(edgeLabel2,"E%d",T->size+1); snprintf(edgeLabel1,EDGE_LABEL_LENGTH,"E%d",T->size); diff --git a/src/me_ols.c b/src/me_ols.c index d9ce297..cd15644 100644 --- a/src/me_ols.c +++ b/src/me_ols.c @@ -1,3 +1,11 @@ +/* me_ols.c 2012-04-30 */ + +/* Copyright 2007 Vincent Lefort + GMEsplitEdge() modified by Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" /*from NNI.c*/ @@ -396,13 +404,13 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) void GMEsplitEdge(tree *T, node *v, edge *e, double **A) { - char nodelabel[NODE_LABEL_LENGTH]; + int nodelabel = 0;//char nodelabel[NODE_LABEL_LENGTH]; char edgelabel[EDGE_LABEL_LENGTH]; edge *newPendantEdge; edge *newInternalEdge; node *newNode; - snprintf(nodelabel,1,""); + //snprintf(nodelabel,1,""); newNode = makeNewNode(nodelabel,T->size + 1); //sprintf(edgelabel,"E%d",T->size); diff --git a/src/newick.c b/src/newick.c deleted file mode 100644 index 09380da..0000000 --- a/src/newick.c +++ /dev/null @@ -1,232 +0,0 @@ -/* newick.c 2012-02-09 */ - -/* Copyright 2007-2008 Vincent Lefort */ - -/* This file is part of the R-package `ape'. */ -/* See the file ../COPYING for licensing issues. */ - -#include "me.h" - -int nodeCount; -int edgeCount; - -int whiteSpace(char c) -{ - if ((' ' == c) || ('\t' == c) || ('\n' == c)) - return(1); - else return(0); -} - -int leaf(node *v) -{ - int count = 0; - if (NULL != v->parentEdge) - count++; - if (NULL != v->leftEdge) - count++; - if (NULL != v->rightEdge) - count++; - if (NULL != v->middleEdge) - count++; - if (count > 1) - return(0); - return(1); -} - -/*decodeNewickSubtree is used to turn a string of the form - "(v1:d1,v2:d2,(subtree) v3:d3....vk:dk) subroot:d," into a subtree - rooted at subrooted, with corresponding subtrees and leaves at v1 - through vk. It is called recursively on subtrees*/ - -node *decodeNewickSubtree(char *treeString, tree *T, int *uCount) -{ - node *thisNode; - node *centerNode; - double thisWeight; - edge *thisEdge; -// char label[MAX_LABEL_LENGTH]; - char stringWeight[MAX_LABEL_LENGTH]; - int state; - int i = 0; - int j; - int left,right; - int parcount; -// snprintf(label,14,"Default_Label\0"); - left = right = 0; - parcount = 0; - state = ReadOpenParenthesis; - if('(' == treeString[0]) - parcount++; - //centerNode = makeNode(label,NULL,nodeCount++); - centerNode = makeNode("",NULL,nodeCount++); - T->size++; - while(parcount > 0) - { - while(whiteSpace(treeString[i])) - i++; - switch(state) - { - case(ReadOpenParenthesis): - if('(' != treeString[0]) - { - error("error reading subtree"); - } - i++; - state = ReadSubTree; - break; - case(ReadSubTree): - if('(' == treeString[i]) /*if treeString[i] is a left parenthesis, - we scan down the string until we find its partner. - the relative number of '('s vs. ')'s is counted - by the variable parcount*/ - { - left = i++; - parcount++; - while(parcount > 1) - { - while (('(' != treeString[i]) && (')' != treeString[i])) - i++; /*skip over characters which are not parentheses*/ - if('(' == treeString[i]) - parcount++; - else if (')' == treeString[i]) - parcount--; - i++; - } /*end while */ - right = i; /*at this point, the subtree string goes from - treeString[left] to treeString[right - 1]*/ - thisNode = decodeNewickSubtree(treeString + left,T,uCount); /*note that this - step will put - thisNode in T*/ - i = right; /*having created the node for the subtree, we move - to assigning the label for the new node. - treeString[right] will be the start of this label */ - } /* end if ('(' == treeString[i]) condition */ - else - { - //thisNode = makeNode(label,NULL,nodeCount++); - thisNode = makeNode("",NULL,nodeCount++); - T->size++; - } - state = ReadLabel; - break; - case(ReadLabel): - left = i; /*recall "left" is the left marker for the substring, "right" the right*/ - if (':' == treeString[i]) /*this means an internal node?*/ - { - //sprintf(thisNode->label,"I%d",(*uCount)++); - //snprintf(thisNode->label,MAX_LABEL_LENGTH,"I%d",(*uCount)++); - (*uCount)++; - right = i; - } - else - { - while((':' != treeString[i]) && (',' != treeString[i]) && (')' != treeString[i])) - i++; - right = i; - j = 0; - for(i = left; i < right;i++) - if(!(whiteSpace(treeString[i]))) - thisNode->label[j++] = treeString[i]; - thisNode->label[j] = '\0'; - } - if(':' == treeString[right]) - state = ReadWeight; - else - { - state = AddEdge; - thisWeight = 0.0; - } - i = right + 1; - break; - case(ReadWeight): - left = i; - while - ((',' != treeString[i]) && (')' != treeString[i])) - i++; - right = i; - j = 0; - for(i = left; i < right; i++) - stringWeight[j++] = treeString[i]; - stringWeight[j] = '\0'; - thisWeight = atof(stringWeight); - state=AddEdge; - break; - case(AddEdge): - //thisEdge = makeEdge(label,centerNode,thisNode,thisWeight); - thisEdge = makeEdge("",centerNode,thisNode,thisWeight); - thisNode->parentEdge = thisEdge; - if (NULL == centerNode->leftEdge) - centerNode->leftEdge = thisEdge; - else if (NULL == centerNode->rightEdge) - centerNode->rightEdge = thisEdge; - else if (NULL == centerNode->middleEdge) - centerNode->middleEdge = thisEdge; - else - { - error("node %s has too many (>3) children.", centerNode->label); - } - //sprintf(thisEdge->label,"E%d",edgeCount++); - //snprintf(thisEdge->label,MAX_LABEL_LENGTH,"E%d",edgeCount++); - edgeCount++; - i = right + 1; - if (',' == treeString[right]) - state = ReadSubTree; - else - parcount--; - break; - } - } - return(centerNode); -} - -tree *readNewickString (char *str, int numLeaves) -{ - tree *T; - node *centerNode; - int i = 0; - int j = 0; - int inputLength; - int uCount = 0; - int parCount = 0; - char rootLabel[MAX_LABEL_LENGTH]; - nodeCount = edgeCount = 0; - - T = newTree(); - - if ('(' != str[0]) - { - error("generated tree does not start with '('"); - } - inputLength = strlen (str)+1; - for(i = 0; i < inputLength; i++) - { - if ('(' == str[i]) - parCount++; - else if (')' == str[i]) - parCount--; - if (parCount > 0) - ; - else if (0 == parCount) - { - i++; -/* if(';' == str[i]) - sprintf(rootLabel,"URoot"); - else - {*/ - while(';' != str[i]) - if (!(whiteSpace (str[i++]))) - rootLabel[j++] = str[i-1]; /*be careful here */ - rootLabel[j] = '\0'; -// } - i = inputLength; - } - else if (parCount < 0) - { - error("generated tree has too many right parentheses"); - } - } - centerNode = decodeNewickSubtree (str, T, &uCount); - snprintf (centerNode->label, MAX_LABEL_LENGTH, "%s", rootLabel); /* added "%s" following Jos Kafer's suggestion (2010-11-23) */ - T->root = centerNode; - return(T); -} diff --git a/src/tree_phylo.c b/src/tree_phylo.c index 63f677d..17badca 100644 --- a/src/tree_phylo.c +++ b/src/tree_phylo.c @@ -1,31 +1,39 @@ -/* tree_phylo.c 2008-01-14 */ +/* tree_phylo.c 2012-04-30 */ -/* Copyright 2008 Emmanuel Paradis */ +/* Copyright 2008-2012 Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ #include "me.h" -/* from newick.c */ -int leaf(node *v); - static int curnod, curtip, iedge; #define DO_EDGE\ el[iedge] = EDGE->distance;\ if (leaf(EDGE->head)) {\ edge2[iedge] = curtip;\ - strncpy(tl[curtip - 1], EDGE->head->label, 6);\ + ilab[curtip - 1] = EDGE->head->label;\ iedge++;\ curtip++;\ } else {\ edge2[iedge] = curnod;\ iedge++;\ - subtree2phylo(EDGE->head, edge1, edge2, el, tl);\ + subtree2phylo(EDGE->head, edge1, edge2, el, ilab);\ } -void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, char **tl) +int leaf(node *v) +{ + int count = 0; + if (NULL != v->parentEdge) count++; + if (NULL != v->leftEdge) count++; + if (NULL != v->rightEdge) count++; + if (NULL != v->middleEdge) count++; + if (count > 1) return(0); + return(1); +} + +void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, int *ilab) { edge *EDGE; int localnode; @@ -44,9 +52,9 @@ void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, char **tl) /* transforms a 'tree' struc of pointers into an object of class "phylo" assumes the tree is unrooted and binary, so there are 2n - 3 edges -assumes labels are 6-char long +assumes labels are int */ -void tree2phylo(tree *T, int *edge1, int *edge2, double *el, char **tl, int n) +void tree2phylo(tree *T, int *edge1, int *edge2, double *el, int *ilab, int n) { edge *EDGE; curnod = n + 1; /* the root for ape */ @@ -57,7 +65,7 @@ void tree2phylo(tree *T, int *edge1, int *edge2, double *el, char **tl, int n) EDGE = T->root->leftEdge; edge1[0] = curnod; edge2[0] = 1; /* <- the 1st tip */ - strncpy(tl[0], T->root->label, 6); + ilab[0] = T->root->label; el[0] = EDGE->distance; /* now can initialize these two: */ curtip = 2; /* <- the 2nd tip */ @@ -67,5 +75,5 @@ void tree2phylo(tree *T, int *edge1, int *edge2, double *el, char **tl, int n) /* 'T->root->leftEdge->head' is the root for ape, so don't need to test if it's a leaf */ - subtree2phylo(EDGE->head, edge1, edge2, el, tl); + subtree2phylo(EDGE->head, edge1, edge2, el, ilab); } -- 2.39.2