From: paradis Date: Wed, 16 Sep 2009 13:02:36 +0000 (+0000) Subject: bug fix in seg.sites + new option in base.freq X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=8883719a9139685f26d4c0c4cb26872d5f6d0d96 bug fix in seg.sites + new option in base.freq git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@89 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/ChangeLog b/ChangeLog index 5028e76..b9e9e59 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ + CHANGES IN APE VERSION 2.3-4 + + +NEW FEATURES + + o base.freq() has a new option 'freq' to return the counts; the + default is still to return the proportions. + + +BUG FIXES + + o seg.sites() did not handle ambiguous nucleotides correctly: they are + now ignored. + + CHANGES IN APE VERSION 2.3-3 diff --git a/DESCRIPTION b/DESCRIPTION index 85aee0b..4aa608c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape -Version: 2.3-3 -Date: 2009-09-09 +Version: 2.3-4 +Date: 2009-09-16 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Gangolf Jobb, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/DNA.R b/R/DNA.R index dd9c60b..919f0ec 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -259,11 +259,11 @@ as.character.DNAbin <- function(x, ...) if (is.list(x)) lapply(x, f) else f(x) } -base.freq <- function(x) +base.freq <- function(x, freq = FALSE) { if (is.list(x)) x <- unlist(x) n <- length(x) - BF <- .C("BaseProportion", x, n, double(4), + BF <- .C("BaseProportion", x, n, double(4), freq, DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]] names(BF) <- letters[c(1, 3, 7, 20)] BF diff --git a/man/base.freq.Rd b/man/base.freq.Rd index 3847d9c..96414a5 100644 --- a/man/base.freq.Rd +++ b/man/base.freq.Rd @@ -2,14 +2,16 @@ \alias{base.freq} \title{Base frequencies from DNA Sequences} \usage{ -base.freq(x) +base.freq(x, freq = FALSE) } \arguments{ \item{x}{a vector, a matrix, or a list which contains the DNA sequences.} + \item{freq}{a logical specifying whether to return the proportions + (the default) or the absolute frequencies (counts).} } \description{ - This function computes the relative frequencies (i.e. percentages) of + This function computes the relative frequencies (i.e. proportions) of the four DNA bases (adenine, cytosine, guanine, and thymidine) from a sample of sequences. } @@ -19,7 +21,7 @@ base.freq(x) computations. } \value{ - A numeric vector stoting the relative frequencies with names + A numeric vector storing the relative frequencies with names \code{c("a", "c", "g", "t")}. } \author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}} diff --git a/man/seg.sites.Rd b/man/seg.sites.Rd index 19efd2b..8f97c52 100644 --- a/man/seg.sites.Rd +++ b/man/seg.sites.Rd @@ -15,10 +15,11 @@ seg.sites(x) } \details{ If the sequences are in a list, all the sequences must be of the same - length. + length. Ambiguous nucleotides are ignored. } \value{ - A numeric vector giving the indices of the segregating sites. + A numeric (integer) vector giving the indices of the segregating + sites. } \author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}} \note{ diff --git a/src/dist_dna.c b/src/dist_dna.c index d56c794..2aa6da6 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,4 +1,4 @@ -/* dist_dna.c 2008-12-22 */ +/* dist_dna.c 2009-09-16 */ /* Copyright 2005-2008 Emmanuel Paradis @@ -12,7 +12,7 @@ #define LN4 1.386294361119890572454 /* returns 8 if the base is known surely, 0 otherwise */ -#define KnownBase(a) a & 8 +#define KnownBase(a) (a & 8) /* returns 1 if the base is adenine surely, 0 otherwise */ #define IsAdenine(a) a == 136 @@ -951,7 +951,7 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -void BaseProportion(unsigned char *x, int *n, double *BF) +void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) { int i, m; @@ -967,7 +967,7 @@ void BaseProportion(unsigned char *x, int *n, double *BF) } } } - for (i = 0; i < 4; i++) BF[i] /= m; + if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; } void SegSites(unsigned char *x, int *n, int *s, int *seg) @@ -981,7 +981,7 @@ void SegSites(unsigned char *x, int *n, int *s, int *seg) basis = x[i]; i++; while (i < *n * (j + 1)) { - if (x[i] == basis) i++; + if (!KnownBase(x[i]) || x[i] == basis) i++; else { seg[j] = 1; break;