From bfaeca35ec130110327a3bb7a1f0fe3b66076a95 Mon Sep 17 00:00:00 2001 From: paradis Date: Tue, 1 Mar 2011 10:02:08 +0000 Subject: [PATCH] bug fixing in read.nexus() + Others.... git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@147 6e262413-ae40-0410-9e79-b911bd7a66b7 --- ChangeLog | 31 ++++++++++ DESCRIPTION | 4 +- R/DNA.R | 19 ++++-- R/as.matching.R | 18 +++--- R/dist.topo.R | 6 +- R/read.nexus.R | 20 ++----- R/write.nexus.R | 18 ++++-- man/as.matching.Rd | 5 +- man/bd.ext.Rd | 2 +- man/rTraitDisc.Rd | 2 +- src/dist_dna.c | 45 +++++++++++--- src/tree_build.c | 144 ++++++++++++++++++++++++++++++--------------- 12 files changed, 214 insertions(+), 100 deletions(-) diff --git a/ChangeLog b/ChangeLog index 72198d9..c88556f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ + CHANGES IN APE VERSION 2.6-4 + + +NEW FEATURES + + o base.freq() gains an option 'all' to count all the possible bases + including the ambiguous ones (defaults to FALSE). + + o read.nexus() now writes tree names in the NEXUS file if given a list + of trees with names. + + +BUG FIXES + + o prop.part() failed in some situations with unrooted trees. + + o read.nexus() shuffled node labels when a TRANSLATE block was + present + + +OTHER CHANGES + + o BaseProportion in src/dist_dna.c has been modified. + + o A number of functions in src/tree_build.c have been modified. + + o The matching representation has now only two columns as the third + column was repetitive. + + + CHANGES IN APE VERSION 2.6-3 diff --git a/DESCRIPTION b/DESCRIPTION index 1bf01d2..7a2aa94 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape -Version: 2.6-3 -Date: 2011-02-17 +Version: 2.6-4 +Date: 2011-02-28 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/DNA.R b/R/DNA.R index 722f7b4..e71f142 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,8 +1,8 @@ -## DNA.R (2010-10-19) +## DNA.R (2011-02-18) ## Manipulations and Comparisons of DNA Sequences -## Copyright 2002-2010 Emmanuel Paradis +## Copyright 2002-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -281,13 +281,20 @@ as.character.DNAbin <- function(x, ...) if (is.list(x)) lapply(x, f) else f(x) } -base.freq <- function(x, freq = FALSE) +base.freq <- function(x, freq = FALSE, all = FALSE) { if (is.list(x)) x <- unlist(x) n <- length(x) - BF <- .C("BaseProportion", x, n, double(4), freq, - DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]] - names(BF) <- letters[c(1, 3, 7, 20)] + BF <-.C("BaseProportion", x, n, double(17), + DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]] + names(BF) <- c("a", "c", "g", "t", "r", "m", "w", "s", + "k", "y", "v", "h", "d", "b", "n", "-", "?") + if (all) { + if (!freq) BF <- BF / n + } else { + BF <- BF[1:4] + if (!freq) BF <- BF / sum(BF) + } BF } diff --git a/R/as.matching.R b/R/as.matching.R index 15eb0e7..25b8ca3 100644 --- a/R/as.matching.R +++ b/R/as.matching.R @@ -1,8 +1,8 @@ -## as.matching.R (2010-09-29) +## as.matching.R (2011-02-26) ## Conversion Between Phylo and Matching Objects -## Copyright 2005-2010 Emmanuel Paradis +## Copyright 2005-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -14,7 +14,7 @@ as.matching.phylo <- function(x, labels = TRUE, ...) nb.tip <- length(x$tip.label) nb.node <- x$Nnode if (nb.tip != nb.node + 1) - stop("the tree must be dichotomous AND rooted.") + stop("the tree must be dichotomous AND rooted.") x <- reorder(x, "pruningwise") mat <- matrix(x$edge[, 2], ncol = 2, byrow = TRUE) nodes <- x$edge[seq(by = 2, length.out = nb.node), 1] @@ -23,7 +23,7 @@ as.matching.phylo <- function(x, labels = TRUE, ...) new.nodes <- 1:nb.node + nb.tip sel <- !is.na(O) mat[sel] <- new.nodes[O[sel]] - mat <- cbind(t(apply(mat, 1, sort)), new.nodes, deparse.level = 0) + mat <- t(apply(mat, 1, sort)) obj <- list(matching = mat) if (!is.null(x$edge.length)) @@ -31,7 +31,7 @@ as.matching.phylo <- function(x, labels = TRUE, ...) if (labels) { obj$tip.label <- x$tip.label if (!is.null(x$node.label)) - obj$node.label <- x$node.label[match(new.nodes, nodes)] + obj$node.label <- x$node.label[match(new.nodes, nodes)] } class(obj) <- "matching" obj @@ -39,16 +39,16 @@ as.matching.phylo <- function(x, labels = TRUE, ...) as.phylo.matching <- function(x, ...) { - N <- 2*dim(x$matching)[1] + nb.node <- dim(x$matching)[1] + nb.tip <- nb.node + 1 + N <- 2 * nb.node edge <- matrix(NA, N, 2) - nb.tip <- (N + 2)/2 - nb.node <- nb.tip - 1 new.nodes <- numeric(N + 1) new.nodes[N + 1] <- nb.tip + 1 nextnode <- nb.tip + 2 j <- 1 for (i in nb.node:1) { - edge[j:(j + 1), 1] <- new.nodes[x$matching[i, 3]] + edge[j:(j + 1), 1] <- new.nodes[i + nb.tip] for (k in 1:2) { if (x$matching[i, k] > nb.tip) { edge[j + k - 1, 2] <- new.nodes[x$matching[i, k]] <- nextnode diff --git a/R/dist.topo.R b/R/dist.topo.R index fa9e869..d94866a 100644 --- a/R/dist.topo.R +++ b/R/dist.topo.R @@ -1,9 +1,9 @@ -## dist.topo.R (2010-11-18) +## dist.topo.R (2011-02-21) ## Topological Distances, Tree Bipartitions, ## Consensus Trees, and Bootstrapping Phylogenies -## Copyright 2005-2010 Emmanuel Paradis +## Copyright 2005-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -104,7 +104,7 @@ prop.part <- function(..., check.labels = TRUE) ## ntree <- length(obj) if (ntree == 1) check.labels <- FALSE - if (check.labels) .compressTipLabel(obj) # no need to store cause uncompress later + if (check.labels) obj <- .compressTipLabel(obj) # fix by Klaus Schliep (2011-02-21) for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer" ## ## The 1st must have tip labels diff --git a/R/read.nexus.R b/R/read.nexus.R index 77d5333..03aa34b 100644 --- a/R/read.nexus.R +++ b/R/read.nexus.R @@ -1,28 +1,20 @@ -## read.nexus.R (2010-09-27) +## read.nexus.R (2011-02-28) ## Read Tree File in Nexus Format -## Copyright 2003-2009 Emmanuel Paradis and 2010 Klaus Schliep +## Copyright 2003-2011 Emmanuel Paradis and 2010 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .treeBuildWithTokens <- function(x) { - ## remove potential node labels; see ?read.nexus for justification - node.label <- gsub("[:;].*$", "", strsplit(x, ")")[[1]][-1]) - has.node.labels <- FALSE - if (any(node.label != "")) { - x <- gsub(")[^:]*:", "):", x) - x <- gsub(")[^:]*;", ");", x) # if there's no root edge - has.node.labels <- TRUE - } - phy <- .Call("treeBuildWithTokens", x, PACKAGE = "ape") + phy <- .Call("treeBuildWithTokens", x, PACKAGE = "apex") dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) - nms <- c("edge", "edge.length", "Nnode", "root.edge") - if (length(phy) == 3) nms <- nms[-4] + nms <- c("edge", "edge.length", "Nnode", "node.label", "root.edge") + if (length(phy) == 4) nms <- nms[-5] names(phy) <- nms - if (has.node.labels) phy$node.label <- node.label + if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" phy } diff --git a/R/write.nexus.R b/R/write.nexus.R index 14f7734..2a3c32a 100644 --- a/R/write.nexus.R +++ b/R/write.nexus.R @@ -1,8 +1,8 @@ -## write.nexus.R (2009-10-27) +## write.nexus.R (2011-02-26) ## Write Tree File in Nexus Format -## Copyright 2003-2009 Emmanuel Paradis +## Copyright 2003-2011x Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -76,13 +76,21 @@ the original data won't be written with the tree.")) } } else { for (i in 1:ntree) - obj[[i]]$tip.label <- checkLabel(obj[[i]]$tip.label) + obj[[i]]$tip.label <- checkLabel(obj[[i]]$tip.label) } + + title <- names(obj) + if (is.null(title)) + title <- rep("UNTITLED", ntree) + else { + if (any(s <- title == "")) title[s] <- "UNTITLED" + } + for (i in 1:ntree) { if (class(obj[[i]]) != "phylo") next if (is.rooted(obj[[i]])) - cat("\tTREE * UNTITLED = [&R] ", file = file, append = TRUE) - else cat("\tTREE * UNTITLED = [&U] ", file = file, append = TRUE) + cat("\tTREE *,", title[i], "= [&R] ", file = file, append = TRUE) + else cat("\tTREE *", title[i], "= [&U] ", file = file, append = TRUE) cat(write.tree(obj[[i]], file = ""), "\n", sep = "", file = file, append = TRUE) } diff --git a/man/as.matching.Rd b/man/as.matching.Rd index ece8f32..29d665c 100644 --- a/man/as.matching.Rd +++ b/man/as.matching.Rd @@ -34,9 +34,8 @@ as.matching(x, ...) \code{as.matching} returns an object of class \code{"matching"} with the following component: - \item{matching}{a three-column numeric matrix where the first two - columns represent the sibling pairs, and the third one the - corresponding ancestor.} + \item{matching}{a two-column numeric matrix where the columns + represent the sibling pairs.} \item{tip.label}{(optional) a character vector giving the tip labels where the ith element is the label of the tip numbered i in \code{matching}.} diff --git a/man/bd.ext.Rd b/man/bd.ext.Rd index f35236d..1cccd8c 100644 --- a/man/bd.ext.Rd +++ b/man/bd.ext.Rd @@ -56,7 +56,7 @@ bd.ext(phy, S, conditional = TRUE) I. J. (2007) Exceptional among-lineage variation in diversification rates during the radiation of Australia's most diverse vertebrate clade. \emph{Proceedings of the Royal Society of London. Series - B. Biological Sciences}, \bold{274}, 2915-2923. + B. Biological Sciences}, \bold{274}, 2915--2923. } \author{Emmanuel Paradis} \seealso{ diff --git a/man/rTraitDisc.Rd b/man/rTraitDisc.Rd index e65a54b..b6b619c 100644 --- a/man/rTraitDisc.Rd +++ b/man/rTraitDisc.Rd @@ -4,7 +4,7 @@ \usage{ rTraitDisc(phy, model = "ER", k = if (is.matrix(model)) ncol(model) else 2, rate = 0.1, states = LETTERS[1:k], freq = rep(1/k, k), - ancestor = FALSE, root.value = 1) + ancestor = FALSE, root.value = 1, ...) } \arguments{ \item{phy}{an object of class \code{"phylo"}.} diff --git a/src/dist_dna.c b/src/dist_dna.c index b8797e9..62f8f64 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,6 +1,6 @@ -/* dist_dna.c 2010-09-16 */ +/* dist_dna.c 2011-02-18 */ -/* Copyright 2005-2010 Emmanuel Paradis +/* Copyright 2005-2011 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -970,23 +970,50 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) +/* void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) */ +/* { */ +/* int i, m; */ + +/* m = 0; */ +/* for (i = 0; i < *n; i++) { */ +/* if (KnownBase(x[i])) { */ +/* m++; */ +/* switch (x[i]) { */ +/* case 136 : BF[0]++; break; */ +/* case 40 : BF[1]++; break; */ +/* case 72 : BF[2]++; break; */ +/* case 24 : BF[3]++; break; */ +/* } */ +/* } */ +/* } */ +/* if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; */ +} + +void BaseProportion(unsigned char *x, int *n, double *BF) { - int i, m; + int i; - m = 0; for (i = 0; i < *n; i++) { - if (KnownBase(x[i])) { - m++; switch (x[i]) { case 136 : BF[0]++; break; case 40 : BF[1]++; break; case 72 : BF[2]++; break; case 24 : BF[3]++; break; + case 192 : BF[4]++; break; + case 160 : BF[5]++; break; + case 144 : BF[6]++; break; + case 96 : BF[7]++; break; + case 80 : BF[8]++; break; + case 48 : BF[9]++; break; + case 224 : BF[10]++; break; + case 176 : BF[11]++; break; + case 208 : BF[12]++; break; + case 112 : BF[13]++; break; + case 240 : BF[14]++; break; + case 4 : BF[15]++; break; + case 2 : BF[16]++; break; } - } } - if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; } void SegSites(unsigned char *x, int *n, int *s, int *seg) diff --git a/src/tree_build.c b/src/tree_build.c index aeb917c..eb47337 100644 --- a/src/tree_build.c +++ b/src/tree_build.c @@ -1,6 +1,6 @@ -/* tree_build.c 2009-11-21 */ +/* tree_build.c 2011-02-28 */ -/* Copyright 2008-2009 Emmanuel Paradis */ +/* Copyright 2008-2011 Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -12,86 +12,126 @@ static int str2int(char *x, int n) { int i, k = 1, ans = 0; - for (i = n - 2; i >= 0; i--, k *= 10) + for (i = n - 1; i >= 0; i--, k *= 10) ans += ((int)x[i] - 48) * k; return ans; } -void decode_edge(const char *x, int a, int b, int *node, double *w) +void extract_portion_Newick(const char *x, int a, int b, char *y) +{ + int i, j; + + for (i = a, j = 0; i <= b; i++, j++) y[j] = x[i]; + + y[j] = '\0'; +} + +void decode_terminal_edge_token(const char *x, int a, int b, int *node, double *w) +{ + int co = a; + char *endstr, str[100]; + + while (x[co] != ':') co++; + + extract_portion_Newick(x, a, co - 1, str); + *node = str2int(str, co - a); + extract_portion_Newick(x, co + 1, b, str); + *w = R_strtod(str, &endstr); +} + +void decode_internal_edge(const char *x, int a, int b, char *lab, double *w) { int i, k, co = a; char *endstr, str[100]; while (x[co] != ':') co++; - if (a == co) *node = 0; - else { - for (i = a, k = 0; i < co; i++, k++) str[k] = x[i]; - str[k] = '\0'; - *node = str2int(str, k + 1); - } - for (i = co + 1, k = 0; i <= b; i++, k++) str[k] = x[i]; - str[k] = '\0'; + + if (a == co) lab[0] = '\0'; /* if no node label */ + else extract_portion_Newick(x, a, co - 1, lab); + + extract_portion_Newick(x, co + 1, b, str); *w = R_strtod(str, &endstr); } -#define ADD_INTERNAL_EDGE\ - e[j] = curnode;\ - e[j + nedge] = curnode = ++node;\ +#define ADD_INTERNAL_EDGE \ + e[j] = curnode; \ + e[j + nedge] = curnode = ++node; \ + stack_internal[k++] = j; \ j++ -#define ADD_TERMINAL_EDGE\ - e[j] = curnode;\ - decode_edge(x, pr + 1, ps - 1, &tmpi, &tmpd);\ - e[j + nedge] = tmpi;\ - el[j] = tmpd;\ +#define ADD_TERMINAL_EDGE \ + e[j] = curnode; \ + decode_terminal_edge_token(x, pr + 1, ps - 1, &tmpi, &tmpd); \ + e[j + nedge] = tmpi; \ + el[j] = tmpd; \ j++ -#define GO_DOWN\ - l = j - 1;\ - while (e[l + nedge] != curnode) l--;\ - decode_edge(x, ps + 1, pt - 1, &tmpi, &tmpd);\ - el[l] = tmpd;\ +#define GO_DOWN \ + decode_internal_edge(x, ps + 1, pt - 1, lab, &tmpd); \ + SET_STRING_ELT(node_label, curnode - 1 - ntip, mkChar(lab)); \ + l = stack_internal[--k]; \ + el[l] = tmpd; \ curnode = e[l] SEXP treeBuildWithTokens(SEXP nwk) { const char *x; - int n, i, ntip = 1, nnode = 0, nedge, *e, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l; + int n, i, ntip = 1, nnode = 0, nedge, *e, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l, k, stack_internal[10000]; double *el, tmpd; - SEXP edge, edge_length, Nnode, phy; + char lab[512]; + SEXP edge, edge_length, Nnode, node_label, phy; PROTECT(nwk = coerceVector(nwk, STRSXP)); x = CHAR(STRING_ELT(nwk, 0)); n = strlen(x); skeleton = (int *)R_alloc(n, sizeof(int *)); + +/* first pass on the Newick string to localize parentheses and commas */ for (i = 0; i < n; i++) { - if (x[i] == '(' || x[i] == ',' || x[i] == ')') { + if (x[i] == '(') { skeleton[nsk] = i; nsk++; - switch(x[i]) { - case '(': break; - case ',': ntip++; break; - case ')': nnode++; break; - } + continue; + } + if (x[i] == ',') { + skeleton[nsk] = i; + nsk++; + ntip++; + continue; + } + if (x[i] == ')') { + skeleton[nsk] = i; + nsk++; + nnode++; } } + nedge = ntip + nnode - 1; PROTECT(Nnode = allocVector(INTSXP, 1)); PROTECT(edge = allocVector(INTSXP, nedge*2)); PROTECT(edge_length = allocVector(REALSXP, nedge)); + PROTECT(node_label = allocVector(STRSXP, nnode)); INTEGER(Nnode)[0] = nnode; e = INTEGER(edge); el = REAL(edge_length); curnode = node = ntip + 1; - j = 0; - + k = j = 0; +/* j: index of the current position in the edge matrix */ +/* k: index of the current position in stack_internal */ +/* stack_internal is a simple array storing the indices of the + successive internal edges from the root; it's a stack so it is + incremented every time an internal edge is added, and decremented + every GO_DOWN step. This makes easy to the index of the + subtending edge. */ + +/* second pass on the Newick string to build the "phylo" object elements */ for (i = 1; i < nsk - 1; i++) { ps = skeleton[i]; - Rprintf(""); /* <- again !!!! */ +// Rprintf(""); /* <- again !!!! */ if (x[ps] == '(') { ADD_INTERNAL_EDGE; continue; @@ -105,7 +145,7 @@ SEXP treeBuildWithTokens(SEXP nwk) continue; } if (x[ps] == ')') { - pt = skeleton[i + 1]; + pt = skeleton[i + 1]; // <- utile ??? if (x[pr] == ',') { ADD_TERMINAL_EDGE; GO_DOWN; @@ -124,22 +164,32 @@ SEXP treeBuildWithTokens(SEXP nwk) ADD_TERMINAL_EDGE; } -/* is there a root edge? */ +/* is there a root edge and/or root label? */ if (ps < n - 2) { - PROTECT(phy = allocVector(VECSXP, 4)); - SEXP root_edge; - decode_edge(x, ps + 1, n - 2, &tmpi, &tmpd); - PROTECT(root_edge = allocVector(REALSXP, 1)); - REAL(root_edge)[0] = tmpd; - SET_VECTOR_ELT(phy, 3, root_edge); - UNPROTECT(1); - } else PROTECT(phy = allocVector(VECSXP, 3)); + i = ps + 1; + while (i < n - 2 && x[i] != ':') i++; + if (i < n - 2) { + PROTECT(phy = allocVector(VECSXP, 5)); + SEXP root_edge; + decode_internal_edge(x, ps + 1, n - 2, lab, &tmpd); + PROTECT(root_edge = allocVector(REALSXP, 1)); + REAL(root_edge)[0] = tmpd; + SET_VECTOR_ELT(phy, 4, root_edge); + UNPROTECT(1); + SET_STRING_ELT(node_label, 0, mkChar(lab)); + } else { + extract_portion_Newick(x, ps + 1, n - 2, lab); + SET_STRING_ELT(node_label, 0, mkChar(lab)); + PROTECT(phy = allocVector(VECSXP, 4)); + } + } else PROTECT(phy = allocVector(VECSXP, 4)); SET_VECTOR_ELT(phy, 0, edge); SET_VECTOR_ELT(phy, 1, edge_length); SET_VECTOR_ELT(phy, 2, Nnode); + SET_VECTOR_ELT(phy, 3, node_label); - UNPROTECT(5); + UNPROTECT(6); return phy; } -- 2.39.2