From d88302b4735b5b7c9132387090bb592d906fe1cb Mon Sep 17 00:00:00 2001 From: paradis Date: Mon, 14 Jan 2008 13:33:26 +0000 Subject: [PATCH] stabler and faster C code for ME and BIONJ package loading now silent and/or when needed fix small bug in identify.phylo git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@13 6e262413-ae40-0410-9e79-b911bd7a66b7 --- Changes | 12 ++ DESCRIPTION | 2 +- R/MoranI.R | 16 +-- R/compar.gee.R | 5 +- R/identify.phylo.R | 6 +- R/me.R | 59 +++++---- R/nodelabels.R | 2 +- R/plot.phylo.R | 5 +- R/rtree.R | 8 +- R/varcomp.R | 2 +- R/zzz.R | 8 +- man/fastme.Rd | 2 - man/rtree.Rd | 17 +-- src/BIONJ.c | 227 +++++++++++----------------------- src/bipartition.c | 3 +- src/me.c | 69 +++-------- src/me.h | 41 +++---- src/newick.c | 295 ++------------------------------------------- 18 files changed, 201 insertions(+), 578 deletions(-) diff --git a/Changes b/Changes index adeee97..af1c218 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,18 @@ NEW FEATURES o The new function rmtree generates lists of random trees. + o rcoal() now generates a genuine coalescent tree by default + (thanks to Vladimir Minin for the code). + + +OTHER CHANGES + + o The internal codes of bionj(), fastme.bal(), and fastme.ols() + have been improved so that they are stabler and faster. + + o R packages used by ape are now loaded silently; lattice and gee + are loaded only when needed. + CHANGES IN APE VERSION 2.1 diff --git a/DESCRIPTION b/DESCRIPTION index 60c2368..b4aa6ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.1-1 -Date: 2008-01-08 +Date: 2008-01-14 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, diff --git a/R/MoranI.R b/R/MoranI.R index b049e08..d71574b 100644 --- a/R/MoranI.R +++ b/R/MoranI.R @@ -1,8 +1,8 @@ -## MoranI.R (2007-12-26) +## MoranI.R (2008-01-14) ## Moran's I Autocorrelation Index -## Copyright 2004 Julien Dutheil, 2007 Emmanuel Paradis +## Copyright 2004 Julien Dutheil, 2007-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -180,12 +180,12 @@ plot.correlogramList <- BG <- col[(pval < test.level) + 1] if (lattice) { ## trellis.par.set(list(plot.symbol=list(pch=19))) - xyplot(obs ~ gr | vars, xlab = xlab, ylab = ylab, - panel = function(x, y) { - panel.lines(x, y, lty = 2) - panel.points(x, y, cex = cex, pch = 19, col = BG) - #panel.abline(h = 0, lty = 3) - }) + lattice::xyplot(obs ~ gr | vars, xlab = xlab, ylab = ylab, + panel = function(x, y) { + lattice::panel.lines(x, y, lty = 2) + lattice::panel.points(x, y, cex = cex, pch = 19, col = BG) + ##lattice::panel.abline(h = 0, lty = 3) + }) } else { if (pch > 20 && pch < 26) { bg <- col diff --git a/R/compar.gee.R b/R/compar.gee.R index ecaa79a..f8a7b6a 100644 --- a/R/compar.gee.R +++ b/R/compar.gee.R @@ -1,8 +1,8 @@ -## compar.gee.R (2006-10-11) +## compar.gee.R (2008-01-14) ## Comparative Analysis with GEEs -## Copyright 2002-2006 Emmanuel Paradis +## Copyright 2002-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -10,6 +10,7 @@ compar.gee <- function(formula, data = NULL, family = "gaussian", phy, scale.fix = FALSE, scale.value = 1) { + require(gee, quietly = TRUE) if (is.null(data)) data <- parent.frame() else { if(!any(is.na(match(rownames(data), phy$tip.label)))) data <- data[phy$tip.label, ] diff --git a/R/identify.phylo.R b/R/identify.phylo.R index 46ab427..62476db 100644 --- a/R/identify.phylo.R +++ b/R/identify.phylo.R @@ -1,8 +1,8 @@ -## identify.phylo.R (2007-12-14) +## identify.phylo.R (2008-01-14) ## Graphical Identification of Nodes and Tips -## Copyright 2007 Emmanuel Paradis +## Copyright 2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -10,12 +10,12 @@ identify.phylo <- function(x, nodes = TRUE, tips = FALSE, labels = FALSE, ...) { + cat("Click close to a node of the tree...\n") xy <- locator(1) Ntip <- .last_plot.phylo$Ntip d <- sqrt((xy$x - .last_plot.phylo$xx)^2 + (xy$y - .last_plot.phylo$yy)^2) NODE <- which.min(d) - print(NODE) res <- list() if (NODE <= Ntip) { res$tips <- if (labels) x$tip.label[NODE] else NODE diff --git a/R/me.R b/R/me.R index 25cd4ab..93cebd3 100644 --- a/R/me.R +++ b/R/me.R @@ -1,41 +1,48 @@ -## me.R (2007-07-12) +## me.R (2008-01-12) -## Tree Estimation Based on Minimum Evolution Algorithm +## Tree Estimation Based on Minimum Evolution Algorithm -## Copyright 2007 Vincent Lefort +## Copyright 2007 Vincent Lefort with modifications by +## Emmanuel Paradis (2008) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -fastme.bal <- function(X, nni = TRUE) +.fastme <- function(X, nni, lib) { if (is.matrix(X)) X <- as.dist(X) - N <- attr(X, "Size") - labels <- attr(X, "Labels") - if (is.null(labels)) labels <- as.character(1:N) - ans <- .C("me_b", as.double(X), as.integer(N), as.character(labels), - "", as.integer(nni), PACKAGE = "ape") - read.tree(text = ans[[4]]) + N <- as.integer(attr(X, "Size")) + labels <- sprintf("%6s", 1:N) + edge1 <- edge2 <- integer(2*N - 3) + ans <- .C(lib, as.double(X), N, labels, as.integer(nni), + edge1, edge2, double(2*N - 3), character(N), + PACKAGE = "ape") + labels <- substr(ans[[8]], 1, 6) + LABS <- attr(X, "Labels") + labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] + else gsub("^ ", "", labels) + structure(list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]], + tip.label = labels, Nnode = N - 2), + class = "phylo") } -fastme.ols <- function(X, nni = TRUE) -{ - if (is.matrix(X)) X <- as.dist(X) - N <- attr(X, "Size") - labels <- attr(X, "Labels") - if (is.null(labels)) labels <- as.character(1:N) - ans <- .C("me_o", as.double(X), as.integer(N), as.character(labels), - "", as.integer(nni), PACKAGE = "ape") - read.tree(text = ans[[4]]) -} +fastme.bal <- function(X, nni = TRUE) .fastme(X, nni, "me_b") + +fastme.ols <- function(X, nni = TRUE) .fastme(X, nni, "me_o") bionj <- function(X) { if (is.matrix(X)) X <- as.dist(X) - N <- attr(X, "Size") - labels <- attr(X, "Labels") - if (is.null(labels)) labels <- as.character(1:N) - ans <- .C("bionj", as.double(X), as.integer(N), - as.character(labels), "", PACKAGE = "ape") - read.tree(text = ans[[4]]) + N <- as.integer(attr(X, "Size")) + labels <- sprintf("%6s", 1:N) + edge1 <- edge2 <- integer(2*N - 3) + ans <- .C("bionj", as.double(X), N, labels, edge1, edge2, + double(2*N - 3), character(N), PACKAGE = "ape") + labels <- substr(ans[[7]], 1, 6) + LABS <- attr(X, "Labels") + labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] + else gsub("^ ", "", labels) + structure(list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]], + tip.label = labels, Nnode = N - 2), + class = "phylo") } diff --git a/R/nodelabels.R b/R/nodelabels.R index aba9286..674d691 100644 --- a/R/nodelabels.R +++ b/R/nodelabels.R @@ -1,6 +1,6 @@ ## nodelabels.R (2007-03-05) -## Labelling the Nodes and the Tips of a Tree +## Labelling Trees ## Copyright 2004-2007 Emmanuel Paradis, 2006 Ben Bolker, and 2006 Jim Lemon diff --git a/R/plot.phylo.R b/R/plot.phylo.R index 76a1d6b..ff2317c 100644 --- a/R/plot.phylo.R +++ b/R/plot.phylo.R @@ -1,8 +1,8 @@ -## plot.phylo.R (2007-12-22) +## plot.phylo.R (2008-01-12) ## Plot Phylogenies -## Copyright 2002-2007 Emmanuel Paradis +## Copyright 2002-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -509,6 +509,7 @@ plot.multiPhylo <- function(x, layout = 1, ...) { if (layout > 1) layout(matrix(1:layout, ceiling(sqrt(layout)), byrow = TRUE)) + else layout(matrix(1)) if (!par("ask")) { par(ask = TRUE) on.exit(par(ask = FALSE)) diff --git a/R/rtree.R b/R/rtree.R index 6d0172c..cba5d4e 100644 --- a/R/rtree.R +++ b/R/rtree.R @@ -1,4 +1,4 @@ -## rtree.R (2008-01-08) +## rtree.R (2008-01-13) ## Generates Random Trees @@ -100,11 +100,13 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...) phy } -rcoal <- function(n, tip.label = NULL, br = rexp, ...) +rcoal <- function(n, tip.label = NULL, br = "coalescent", ...) { nbr <- 2*n - 2 edge <- matrix(NA, nbr, 2) - x <- br(n - 1, ...) # coalescence times + ## coalescence times by default: + x <- if (is.character(br)) 2*rexp(n - 1)/(n:2 * (n - 1):1) + else br(n - 1, ...) if (n == 2) { edge[] <- c(3, 3, 1:2) edge.length <- rep(x, 2) diff --git a/R/varcomp.R b/R/varcomp.R index 81a709a..1cdcb5d 100644 --- a/R/varcomp.R +++ b/R/varcomp.R @@ -26,7 +26,7 @@ varcomp <- function(x, scale = FALSE, cum = FALSE) plot.varcomp <- function(x, xlab = "Levels", ylab = "Variance", type = "b", ...) { if (!("varcomp" %in% class(x))) stop("Object \"x\" is not of class \"varcomp\"") - return(xyplot(x ~ ordered(names(x), levels=rev(names(x))), xlab=xlab, ylab=ylab, type=type, ...)) + return(lattice::xyplot(x ~ ordered(names(x), levels=rev(names(x))), xlab=xlab, ylab=ylab, type=type, ...)) } # For debuging: diff --git a/R/zzz.R b/R/zzz.R index 67c781d..b860273 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,15 +1,13 @@ -## zzz.R (2003-05-05) +## zzz.R (2008-01-14) ## Library Loading -## Copyright 2003 Emmanuel Paradis +## Copyright 2003-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .First.lib <- function(lib, pkg) { - require(gee) - require(nlme) - require(lattice) + require(nlme, quietly = TRUE) library.dynam("ape", pkg, lib) } diff --git a/man/fastme.Rd b/man/fastme.Rd index cd8b4ed..897ffa0 100644 --- a/man/fastme.Rd +++ b/man/fastme.Rd @@ -35,7 +35,6 @@ \code{\link{dist.dna}}, \code{\link{mlphylo}} } \examples{ -\dontrun{ ### From Saitou and Nei (1987, Table 1): x <- c(7, 8, 11, 13, 16, 13, 17, 5, 8, 10, 13, 10, 14, 5, 7, 10, 7, 11, 8, 11, 8, 12, @@ -51,6 +50,5 @@ data(woodmouse) trw <- fastme.bal(dist.dna(woodmouse)) plot(trw) } -} \keyword{models} diff --git a/man/rtree.Rd b/man/rtree.Rd index 63cbf79..264ba5d 100644 --- a/man/rtree.Rd +++ b/man/rtree.Rd @@ -1,10 +1,11 @@ \name{rtree} \alias{rtree} \alias{rcoal} +\alias{rmtree} \title{Generates Random Trees} \usage{ rtree(n, rooted = TRUE, tip.label = NULL, br = runif, ...) -rcoal(n, tip.label = NULL, br = rexp, ...) +rcoal(n, tip.label = NULL, br = "coalescent", ...) rmtree(N, n, rooted = TRUE, tip.label = NULL, br = runif, ...) } \arguments{ @@ -14,8 +15,9 @@ rmtree(N, n, rooted = TRUE, tip.label = NULL, br = runif, ...) \item{tip.label}{a character vector giving the tip labels; if not specified, the tips "t1", "t2", ..., are given.} \item{br}{either an R function used to generate the branch lengths - (\code{rtree}) or the coalescence times (\code{rcoal}), or - \code{NULL} to give no branch lengths in the tree.} + (\code{rtree} or \code{NULL} to give no branch lengths) or the + coalescence times (\code{rcoal}). For the latter, a genuine + coalescent tree is simulated by default.} \item{...}{further argument(s) to be passed to \code{br}.} \item{N}{an integer giving the number of trees to generate.} } @@ -27,14 +29,13 @@ rmtree(N, n, rooted = TRUE, tip.label = NULL, br = runif, ...) } \details{ The trees generated are bifurcating. If \code{rooted = FALSE} in - (\code{rtree}), the tree is trifurcating at its `root'. + (\code{rtree}), the tree is trifurcating at its root. The default function to generate branch lengths in \code{rtree} is - \code{runif}. In \code{rcoal} \code{rexp} is used to generate the - inter-node distances. If further arguments are passed to \code{br}, - they need to be tagged (e.g., \code{min = 0, max = 10}). + \code{runif}. If further arguments are passed to \code{br}, they need + to be tagged (e.g., \code{min = 0, max = 10}). - \code{rmtree} calls successively \code{rmtree} and set the class of + \code{rmtree} calls successively \code{rtree} and set the class of the returned object appropriately. } \value{ diff --git a/src/BIONJ.c b/src/BIONJ.c index d615c91..f8bf683 100644 --- a/src/BIONJ.c +++ b/src/BIONJ.c @@ -1,3 +1,11 @@ +/* BIONJ.c 2008-01-14 */ + +/* Copyright 2007-2008 Olivier Gascuel, Hoa Sien Cuong, + R port by Vincent Lefort, bionj() below modified by Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; ; BIONJ program ; @@ -15,12 +23,11 @@ ; ; \*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ -//#include "BIONJ.h" #include "me.h" void Initialize(float **delta, double *X, char **labels, int n, POINTERS *trees); void Print_outputChar(int i, POINTERS *trees, char *output); -void bionj (double *X, int *N, char **labels, char **treeStr); +void bionj(double *X, int *N, char **labels, int *edge1, int *edge2, double *el, char **tl); int Symmetrize(float **delta, int n); void Concatenate(char chain1[MAX_LABEL_LENGTH], int ind, POINTERS *trees, int post); float Distance(int i, int j, float **delta); @@ -159,46 +166,33 @@ void Print_outputChar(int i, POINTERS *trees, char *output) return; } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; Main program ; -; ; -; argc is the number of arguments ; -; **argv contains the arguments: ; -; the first argument has to be BIONJ; ; -; the second is the inptu-file; ; -; the third is the output-file. ; -; When the input and output files are ; -; not given, the user is asked for them. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - //tree *bionj (FILE *input, boolean isNJ) -void bionj (double *X, int *N, char **labels, char **treeStr) +void bionj(double *X, int *N, char **labels, + int *edge1, int *edge2, double *el, char **tl) { - POINTERS *trees; /* list of subtrees */ - tree *T; /* the returned tree */ - char *chain1; /* stringized branch-length */ -// char *chain2; /* idem */ - char *str; /* the string containing final tree */ - int *a, *b; /* pair to be agglomerated */ - float **delta; /* delta matrix */ - float la; /* first taxon branch-length */ - float lb; /* second taxon branch-length */ - float vab; /* variance of Dab */ + POINTERS *trees; /* list of subtrees */ + tree *T; /* the returned tree */ + char *chain1; /* stringized branch-length */ + char *str; /* the string containing final tree */ + int *a, *b; /* pair to be agglomerated */ + float **delta; /* delta matrix */ + float la; /* first taxon branch-length */ + float lb; /* second taxon branch-length */ + float vab; /* variance of Dab */ float lamda = 0.5; int i; // int ok; - int r; /* number of subtrees */ - int n; /* number of taxa */ + int r; /* number of subtrees */ + int n; /* number of taxa */ int x, y; // double t; a=(int*)calloc(1,sizeof(int)); b=(int*)calloc(1,sizeof(int)); - chain1=(char *)calloc(MAX_LABEL_LENGTH,sizeof(char)); + chain1=(char *)R_alloc(MAX_LABEL_LENGTH, sizeof(char)); str = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); /* added by EP */ + if (strlen(chain1)) + strncpy(chain1, "", strlen(chain1)); if (strlen(str)) strncpy(str, "", strlen(str)); /* end */ @@ -223,15 +217,15 @@ void bionj (double *X, int *N, char **labels, char **treeStr) exit(0); } /* initialise and symmetrize the running delta matrix */ - r=n; - *a=0; - *b=0; - Initialize(delta, X, labels, n, trees); -// ok=Symmetrize(delta, n); - -// if(!ok) -// Rprintf("\n The matrix is not symmetric.\n "); - while (r > 3) /* until r=3 */ + r=n; + *a=0; + *b=0; + Initialize(delta, X, labels, n, trees); +// ok=Symmetrize(delta, n); + +// if(!ok) +// Rprintf("\n The matrix is not symmetric.\n "); + while (r > 3) /* until r=3 */ { Compute_sums_Sx(delta, n); /* compute the sum Sx */ Best_pair(delta, r, a, b, n); /* find the best pair by */ @@ -281,37 +275,31 @@ void bionj (double *X, int *N, char **labels, char **treeStr) r=r-1; } - FinishStr (delta, n, trees, str); /* compute the branch-lengths*/ - /* of the last three subtrees*/ - /* and print the tree in the */ - /* output-file */ - T = readNewickString (str, n); - T = detrifurcate(T); -// compareSets(T,species); - partitionSizes(T); - *treeStr = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(*treeStr)) - strncpy(*treeStr, "", strlen(*treeStr)); - /* end */ - NewickPrintTreeStr (T, *treeStr); - - for(i=1; i<=n; i++) - { - delta[i][0]=0.0; - trees[i].head=NULL; - trees[i].tail=NULL; - } + FinishStr (delta, n, trees, str); /* compute the branch-lengths*/ + /* of the last three subtrees*/ + /* and print the tree in the */ + /* output-file */ + T = readNewickString (str, n); + T = detrifurcate(T); +// compareSets(T,species); + partitionSizes(T); + + tree2phylo(T, edge1, edge2, el, tl, n); /* by EP */ + + for(i=1; i<=n; i++) + { + delta[i][0]=0.0; + trees[i].head=NULL; + trees[i].tail=NULL; + } free(delta); free(trees); /* free (str); */ - free (chain1); - free (a); - free (b); + /* free (chain1); */ + free(a); + free(b); freeTree(T); T = NULL; -// return (ret); - return; } /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ @@ -321,7 +309,6 @@ void bionj (double *X, int *N, char **labels, char **treeStr) \*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - /*;;;;;;;;;;;;;;;;;;;;;;;;;;; Symmetrize ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ ; ; ; Description : This function verifies if the delta matrix is symmetric; ; @@ -362,8 +349,6 @@ int Symmetrize(float **delta, int n) } - - /*;;;;;;;;;;;;;;;;;;;;;;;;;;; Concatenate ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ ; ; ; ; @@ -599,77 +584,6 @@ float Finish_branch_length(int i, int j, int k, float **delta) return(length); } - -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Finish;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function compute the length of the lasts three ; -; subtrees and write the tree in the output file. ; -; ; -; input : ; -; float **delta : the delta matrix ; -; int n : the number of taxa ; -; WORD *trees : list of subtrees ; -; ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ -/* -void Finish(float **delta, int n, POINTERS *trees, FILE *output) -{ - int l=1; - int i=0; - float length; - WORD *bidon; - WORD *ele; - int last[3]; // the last three subtrees - - while(l <= n) - { // find the last tree subtree - if(!Emptied(l, delta)) - { - last[i]=l; - i++; - } - l++; - } - - length=Finish_branch_length(last[0],last[1],last[2],delta); - fprintf(output,"("); - Print_output(last[0],trees,output); - fprintf(output,":"); -// gcvt(length,PREC, str); -// fprintf(output,"%s,",str); - fprintf(output,"%f,",length); - - length=Finish_branch_length(last[1],last[0],last[2],delta); - Print_output(last[1],trees,output); - fprintf(output,":"); -// gcvt(length,PREC, str); -// fprintf(output,"%s,",str); - fprintf(output,"%f,",length); - - length=Finish_branch_length(last[2],last[1],last[0],delta); - Print_output(last[2],trees,output); - fprintf(output,":"); -// gcvt(length,PREC,str); -// fprintf(output,"%s",str); - fprintf(output,"%f",length); - fprintf(output,");"); - fprintf(output,"\n"); - - for(i=0; i < 3; i++) - { - bidon=trees[last[i]].head; - ele=bidon; - while(bidon!=NULL) - { - ele=ele->suiv; - free(bidon); - bidon=ele; - } - } -} -*/ - void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree) { int l=1; @@ -717,7 +631,7 @@ void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree) } if (strlen (StrTree) < MAX_INPUT_SIZE-3) - strncat (StrTree, ");\n", 3); + strncat (StrTree, ");", 3); free (tmp); for(i=0; i < 3; i++) @@ -794,23 +708,24 @@ float Lamda(int a, int b, float vab, float **delta, int n, int r) lamda=lamda + (Variance(b,i,delta) - Variance(a,i,delta)); } lamda=0.5 + lamda/(2*(r-2)*vab); - } /* Formula (9) and the */ - if(lamda > 1.0) /* constraint that lamda*/ - lamda = 1.0; /* belongs to [0,1] */ + } /* Formula (9) and the */ + if(lamda > 1.0) /* constraint that lamda*/ + lamda = 1.0; /* belongs to [0,1] */ if(lamda < 0.0) lamda=0.0; return(lamda); } - -/* void printMat(float **delta, int n) */ -/* { */ -/* int i, j; */ -/* for (i=1; i<=n; i++) { */ -/* for (j=1; j<=n; j++) */ -/* Rprintf ("%f ", delta[i][j]); */ -/* Rprintf("\n"); */ -/* } */ -/* Rprintf("\n"); */ -/* return; */ -/* } */ +/* +void printMat(float **delta, int n) +{ + int i, j; + for (i=1; i<=n; i++) { + for (j=1; j<=n; j++) + Rprintf ("%f ", delta[i][j]); + Rprintf("\n"); + } + Rprintf("\n"); + return; +} +*/ diff --git a/src/bipartition.c b/src/bipartition.c index 86d5bbd..aeb6977 100644 --- a/src/bipartition.c +++ b/src/bipartition.c @@ -112,7 +112,8 @@ SEXP bipartition(SEXP edge, SEXP nbtip, SEXP nbnode) return ans; } /* bipartition */ -/* From R-ext: */ +/* From R-ext manual + (not the same than in library/stats/src/nls.c) */ SEXP getListElement(SEXP list, char *str) { SEXP elmt = R_NilValue, names = getAttrib(list, R_NamesSymbol); diff --git a/src/me.c b/src/me.c index 5b383f3..69d1cb2 100644 --- a/src/me.c +++ b/src/me.c @@ -1,11 +1,19 @@ +/* me.c 2008-01-14 */ + +/* Copyright 2007-2008 Olivier Gascuel, Rick Desper, + R port by Vincent Lefort, me_*() below modified by Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" //functions from me_balanced.c -tree *BMEaddSpecies(tree *T,node *v, double **D, double **A); +tree *BMEaddSpecies(tree *T, node *v, double **D, double **A); void assignBMEWeights(tree *T, double **A); void makeBMEAveragesTable(tree *T, double **D, double **A); //functions from me_ols.c -tree *GMEaddSpecies(tree *T,node *v, double **D, double **A); +tree *GMEaddSpecies(tree *T, node *v, double **D, double **A); void assignOLSWeights(tree *T, double **A); void makeOLSAveragesTable(tree *T, double **D, double **A); //functions from bNNI.c @@ -14,13 +22,13 @@ void bNNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies void NNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies); -void me_b(double *X, int *N, char **labels, char **treeStr, int *nni) +void me_b(double *X, int *N, char **labels, int *nni, + int *edge1, int *edge2, double *el, char **tl) { double **D, **A; set *species, *slooper; node *addNode; tree *T; - char *str; int n, nniCount; n = *N; @@ -29,53 +37,35 @@ void me_b(double *X, int *N, char **labels, char **treeStr, int *nni) species = (set *) malloc(sizeof(set)); species->firstNode = NULL; species->secondNode = NULL; - str = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(str)) - strncpy(str, "", strlen(str)); - /* end */ - D = loadMatrix (X, labels, n, species); - A = initDoubleMatrix(2 * n - 2); + D = loadMatrix(X, labels, n, species); + A = initDoubleMatrix(2*n - 2); for(slooper = species; NULL != slooper; slooper = slooper->secondNode) { addNode = copyNode(slooper->firstNode); - T = BMEaddSpecies(T,addNode,D,A); + T = BMEaddSpecies(T, addNode, D, A); } // Compute bNNI if (*nni == 1) - bNNI(T,A,&nniCount,D,n); + bNNI(T, A, &nniCount, D, n); assignBMEWeights(T,A); - NewickPrintTreeStr(T,str); - - if (strlen (str) < MAX_INPUT_SIZE -1) - { - *treeStr = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(*treeStr)) - strncpy(*treeStr, "", strlen(*treeStr)); - /* end */ - strncpy (*treeStr, str, strlen(str)); - } + tree2phylo(T, edge1, edge2, el, tl, n); -/* free (str); */ freeMatrix(D,n); freeMatrix(A,2*n - 2); freeSet(species); freeTree(T); T = NULL; - - /* return; */ } -void me_o(double *X, int *N, char **labels, char **treeStr, int *nni) +void me_o(double *X, int *N, char **labels, int *nni, + int *edge1, int *edge2, double *el, char **tl) { double **D, **A; set *species, *slooper; node *addNode; tree *T; - char *str; int n, nniCount; n = *N; @@ -84,11 +74,6 @@ void me_o(double *X, int *N, char **labels, char **treeStr, int *nni) species = (set *) malloc(sizeof(set)); species->firstNode = NULL; species->secondNode = NULL; - str = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(str)) - strncpy(str, "", strlen(str)); - /* end */ D = loadMatrix (X, labels, n, species); A = initDoubleMatrix(2 * n - 2); @@ -104,26 +89,13 @@ void me_o(double *X, int *N, char **labels, char **treeStr, int *nni) NNI(T,A,&nniCount,D,n); assignOLSWeights(T,A); - NewickPrintTreeStr(T,str); - - if (strlen (str) < MAX_INPUT_SIZE -1) - { - *treeStr = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(*treeStr)) - strncpy(*treeStr, "", strlen(*treeStr)); - /* end */ - strncpy (*treeStr, str, strlen (str)); - } + tree2phylo(T, edge1, edge2, el, tl, n); - /* free (str); */ freeMatrix(D,n); freeMatrix(A,2*n - 2); freeSet(species); freeTree(T); T = NULL; - - return; } /************************************************************************* @@ -516,4 +488,3 @@ void freeSubTree(edge *e) e->head = NULL; free(e); } - diff --git a/src/me.h b/src/me.h index 3ceb771..f049ef6 100644 --- a/src/me.h +++ b/src/me.h @@ -1,11 +1,10 @@ -//#include -//#include -//#include -//#include -//#include -//#include -//#include "main.h" -//#include "graph.h" +/* me.h 2008-01-14 */ + +/* Copyright 2007-2008 Vincent Lefort, modified by Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include #ifndef NONE @@ -38,9 +37,9 @@ #ifndef MAX_DIGITS #define MAX_DIGITS 20 #endif -#ifndef INPUT_SIZE -#define INPUT_SIZE 100 -#endif +/* #ifndef INPUT_SIZE */ +/* #define INPUT_SIZE 100 */ +/* #endif */ #ifndef MAX_INPUT_SIZE #define MAX_INPUT_SIZE 100000 #endif @@ -69,13 +68,13 @@ typedef struct word { char name[MAX_LABEL_LENGTH]; struct word *suiv; -}WORD; +} WORD; typedef struct pointers { WORD *head; WORD *tail; -}POINTERS; +} POINTERS; typedef struct node { char label[NODE_LABEL_LENGTH]; @@ -110,9 +109,9 @@ typedef struct set struct set *secondNode; } set; -void me_b(double *X, int *N, char **labels, char **treeStr, int *nni); -void me_o(double *X, int *N, char **labels, char **treeStr, int *nni); -int whiteSpace(char c); +void me_b(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl); +void me_o(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl); +//int whiteSpace(char c); double **initDoubleMatrix(int d); double **loadMatrix (double *X, char **labels, int n, set *S); set *addToSet(node *v, set *X); @@ -135,11 +134,9 @@ void freeMatrix(double **D, int size); void freeSet(set *S); void freeTree(tree *T); void freeSubTree(edge *e); +int whiteSpace(char c); int leaf(node *v); -tree *readNewickString (char *str, int numLeaves); node *decodeNewickSubtree(char *treeString, tree *T, int *uCount); -void NewickPrintSubtree(tree *T, edge *e, char *str); -void NewickPrintBinaryTree(tree *T, char *str); -void NewickPrintTrinaryTree(tree *T, char *str); -void NewickPrintTreeStr(tree *T, char *str); - +tree *readNewickString (char *str, int numLeaves); +void subtree2phylo(node *parent, int *edge1, int *edge2, double *el, char **tl); +void tree2phylo(tree *T, int *edge1, int *edge2, double *el, char **tl, int n); diff --git a/src/newick.c b/src/newick.c index 07cd96d..d0a4f15 100644 --- a/src/newick.c +++ b/src/newick.c @@ -1,11 +1,10 @@ -/*#include -#include -#include -#include -#include "graph.h" -#include "newick.h" -#include "main.h" -*/ +/* newick.c 2008-01-14 */ + +/* Copyright 2007-2008 Vincent Lefort */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + #include "me.h" int nodeCount; @@ -233,285 +232,5 @@ tree *readNewickString (char *str, int numLeaves) centerNode = decodeNewickSubtree (str, T, &uCount); snprintf (centerNode->label, MAX_LABEL_LENGTH, rootLabel); T->root = centerNode; - return (T); -} - -tree *loadNewickTree(FILE *ifile, int numLeaves) -{ -// char label[] = "EmptyEdge"; - tree *T; - node *centerNode; - int i = 0; - int j = 0; - int inputLength; - int uCount = 0; - int parCount = 0; - char c; - int Comment; - char *nextString; - char rootLabel[MAX_LABEL_LENGTH]; - nodeCount = edgeCount = 0; - T = newTree(); - nextString = (char *) malloc(numLeaves*INPUT_SIZE*sizeof(char)); - if (NULL == nextString) - nextString = (char *) malloc(MAX_INPUT_SIZE*sizeof(char)); - Comment = 0; - while(1 == fscanf(ifile,"%c",&c)) - { - if('[' == c) - Comment = 1; - else if (']' == c) - Comment = 0; - else if (!(Comment)) - { - if(whiteSpace(c)) - { - if (i > 0) - nextString[i++] = ' '; - } - else /*note that this else goes with if(whiteSpace(c))*/ - nextString[i++] = c; - if (';' == c) - break; - } - } - if ('(' != nextString[0]) - { - fprintf(stderr,"Error reading input file - does not start with '('.\n"); - exit(EXIT_FAILURE); - } - inputLength = i; - for(i = 0; i < inputLength;i++) - { - if ('(' == nextString[i]) - parCount++; - else if (')' == nextString[i]) - parCount--; - if (parCount > 0) - ; - else if (0 == parCount) - { - i++; -/* if(';' == nextString[i]) - sprintf(rootLabel,"URoot"); - else - {*/ - while(';' != nextString[i]) - if(!(whiteSpace(nextString[i++]))) - rootLabel[j++] = nextString[i-1]; /*be careful here */ - rootLabel[j] = '\0'; -// } - i = inputLength; - } - else if (parCount < 0) - { - fprintf(stderr,"Error reading tree input file. Too many right parentheses.\n"); - exit(EXIT_FAILURE); - } - } - centerNode = decodeNewickSubtree(nextString,T,&uCount); - snprintf(centerNode->label, MAX_LABEL_LENGTH, rootLabel); - T->root = centerNode; - free(nextString); return(T); } - -double GetSubTreeLength (tree *T, edge *e) -{ - double ret = 0; - - if ( (NULL != e) && (! leaf(e->head) )) { - ret += GetSubTreeLength (T, e->head->leftEdge); - ret += GetSubTreeLength (T, e->head->rightEdge); - } - ret += e->distance; - return ret; -} - -void NewickPrintSubtree(tree *T, edge *e, char *str) -{ - char *tmp; - if (NULL == e) - { - Rprintf("Error with Newick Printing routine.\n"); - exit(0); - } - if(!(leaf(e->head))) - { - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, "(", 1); - NewickPrintSubtree(T,e->head->leftEdge,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ",", 1); - NewickPrintSubtree(T,e->head->rightEdge,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ")", 1); - } - if (strlen (str) < MAX_INPUT_SIZE - strlen (e->head->label) -1) - strncat (str, e->head->label, strlen (e->head->label)); - - if (strlen (str) < MAX_INPUT_SIZE - 2) - strncat (str, ":", 1); - - tmp = (char *)R_alloc(INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(tmp)) - strncpy(tmp, "", strlen(tmp)); - /* end */ - snprintf (tmp, INPUT_SIZE, "%lf", e->distance); - if (strlen (str) < MAX_INPUT_SIZE - strlen (tmp) -1) - strncat (str, tmp, strlen (tmp)); - - /* free (tmp); */ - return; -} - -double GetBinaryTreeLength (tree *T) -{ - double ret = 0; - edge *e, *f; - node *rootchild; - e = T->root->leftEdge; - rootchild = e->head; - - f = rootchild->leftEdge; - if (NULL != f) - ret += GetSubTreeLength (T, f); - f = rootchild->rightEdge; - if (NULL != f) - ret += GetSubTreeLength (T, f); - ret += e->distance; - return ret; -} - -void NewickPrintBinaryTree(tree *T, char *str) -{ - edge *e, *f; - node *rootchild; - char *tmp; - e = T->root->leftEdge; - rootchild = e->head; - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, "(", 1); - f = rootchild->leftEdge; - if (NULL != f) - { - NewickPrintSubtree(T,f,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ",", 1); - } - f = rootchild->rightEdge; - if (NULL != f) - { - NewickPrintSubtree(T,f,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ",", 1); - } - if (strlen (str) < MAX_INPUT_SIZE - strlen (T->root->label) -1) - strncat (str, T->root->label, strlen (T->root->label)); - - if (strlen (str) < MAX_INPUT_SIZE - 2) - strncat (str, ":", 1); - - tmp = (char *)R_alloc(INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(tmp)) - strncpy(tmp, "", strlen(tmp)); - /* end */ - snprintf (tmp, INPUT_SIZE, "%lf", e->distance); - if (strlen (str) < MAX_INPUT_SIZE - strlen (tmp) -1) - strncat (str, tmp, strlen (tmp)); - - if (strlen (str) < MAX_INPUT_SIZE - 2) - strncat (str, ")", 1); - - if (NULL != rootchild->label) - if (strlen (str) < MAX_INPUT_SIZE - strlen (rootchild->label) -1) - strncat (str, T->root->label, strlen (rootchild->label)); - - if (strlen (str) < MAX_INPUT_SIZE - 3) - strncat (str, ";\n", 2); - - /* free (tmp); */ - return; -} - -double GetTrinaryTreeLength (tree *T) -{ - double ret = 0; - edge *f; - f = T->root->leftEdge; - if (NULL != f) - ret += GetSubTreeLength (T, f); - f = T->root->rightEdge; - if (NULL != f) - ret += GetSubTreeLength (T, f); - f = T->root->middleEdge; - if (NULL != f) - ret += GetSubTreeLength (T, f); - - return ret; -} - -void NewickPrintTrinaryTree(tree *T, char *str) -{ - edge *f; - f = T->root->leftEdge; - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, "(", 1); - if (NULL != f) - { - NewickPrintSubtree(T,f,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ",", 1); - } - f = T->root->rightEdge; - if (NULL != f) - { - NewickPrintSubtree(T,f,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ",", 1); - } - f = T->root->middleEdge; - if (NULL != f) - { - NewickPrintSubtree(T,f,str); - if (strlen (str) < MAX_INPUT_SIZE -2) - strncat (str, ")", 1); - } - if (NULL != T->root->label) - if (strlen (str) < MAX_INPUT_SIZE - strlen (T->root->label) -1) - strncat (str, T->root->label, strlen (T->root->label)); - if (strlen (str) < MAX_INPUT_SIZE - 3) - strncat (str, ";\n", 2); - return; -} - -void NewickPrintTreeStr(tree *T, char *str) -{ - if (leaf(T->root)) - NewickPrintBinaryTree(T,str); - else - NewickPrintTrinaryTree(T,str); -} - -double GetTreeLength (tree *T) -{ - double ret = 0; - if (leaf(T->root)) - ret = GetBinaryTreeLength (T); - else - ret = GetTrinaryTreeLength (T); - return ret; -} -/* -void NewickPrintTree(tree *T, FILE *ofile) -{ - if (leaf(T->root)) - NewickPrintBinaryTree(T,ofile); - else - NewickPrintTrinaryTree(T,ofile); -} -*/ -//edge *depthFirstTraverse(tree *T, edge *e); - -- 2.39.2