From 06e83c6878153f8e7999c0470263c40aad4db258 Mon Sep 17 00:00:00 2001 From: paradis Date: Fri, 17 Feb 2012 14:14:39 +0000 Subject: [PATCH] various bug fixes since the release of ape 3.0 git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@179 6e262413-ae40-0410-9e79-b911bd7a66b7 --- DESCRIPTION | 2 +- NEWS | 6 +++- R/DNA.R | 15 ++++++--- R/read.GenBank.R | 6 ++-- man/dist.dna.Rd | 15 +++++++-- src/dist_dna.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- src/mvr.c | 6 ++-- src/mvrs.c | 9 ++---- 8 files changed, 114 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6ad0dbc..8d34b18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 3.0-1 -Date: 2012-02-14 +Date: 2012-02-17 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/NEWS b/NEWS index 53eb924..57a418c 100644 --- a/NEWS +++ b/NEWS @@ -3,8 +3,10 @@ NEW FEATURES + o dist.dna() has two new models: "indel" and "indelblock". + o bind.tree() now accepts 'position' > 0 when the trees have no - banch lengths permitting to create a node in 'x' when grafting + banch length permitting to create a node in 'x' when grafting 'y' (see ?bind.tree for details). @@ -13,6 +15,8 @@ BUG FIXES o cophyloplot( , rotate = TRUE) made R hanged after a few clicks. Also the tree is no more plotted twice. + o read.GenBank() has been updated to work with EFetch 2.0. + CHANGES IN APE VERSION 3.0 diff --git a/R/DNA.R b/R/DNA.R index 54898cf..abf3a43 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2012-01-10) +## DNA.R (2012-02-14) ## Manipulations and Comparisons of DNA Sequences @@ -227,7 +227,7 @@ print.DNAbin <- function(x, printlen = 6, digits = 3, ...) as.DNAbin <- function(x, ...) UseMethod("as.DNAbin") ._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k", - "y", "v", "h", "d", "b", "n", "-", "?") + "y", "v", "h", "d", "b", "n", "-", "?") ._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80, 48, 224, 176, 208, 112, 240, 4, 2) @@ -353,16 +353,17 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE, as.matrix = FALSE) { MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93", - "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV") + "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV", + "INDEL", "INDELBLOCK") imod <- pmatch(toupper(model), MODELS) if (is.na(imod)) stop(paste("'model' must be one of:", paste("\"", MODELS, "\"", sep = "", collapse = " "))) if (imod == 11 && variance) { - warning("computing variance not available for model BH87.") + warning("computing variance not available for model BH87") variance <- FALSE } - if (gamma && imod %in% c(1, 5:7, 9:15)) { + if (gamma && imod %in% c(1, 5:7, 9:17)) { warning(paste("gamma-correction not available for model", model)) gamma <- FALSE } @@ -371,9 +372,13 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE, n <- dim(x) s <- n[2] n <- n[1] + if (imod %in% c(4, 6:8)) { BF <- if (is.null(base.freq)) base.freq(x) else base.freq } else BF <- 0 + + if (imod %in% 16:17) pairwise.deletion <- TRUE + if (!pairwise.deletion) { keep <- .C("GlobalDeletionDNA", x, n, s, rep(1L, s), PACKAGE = "ape")[[4]] diff --git a/R/read.GenBank.R b/R/read.GenBank.R index 527f2dd..23bf4db 100644 --- a/R/read.GenBank.R +++ b/R/read.GenBank.R @@ -1,8 +1,8 @@ -## read.GenBank.R (2010-07-22) +## read.GenBank.R (2012-02-17) ## Read DNA Sequences from GenBank via Internet -## Copyright 2002-2010 Emmanuel Paradis +## Copyright 2002-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -22,7 +22,7 @@ read.GenBank <- if (i == nrequest) b <- N URL <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=", paste(access.nb[a:b], collapse = ","), - "&rettype=gb", sep = "") + "&rettype=gb&retmode=text", sep = "") X <- c(X, scan(file = URL, what = "", sep = "\n", quiet = TRUE)) } FI <- grep("^ {0,}ORIGIN", X) + 1 diff --git a/man/dist.dna.Rd b/man/dist.dna.Rd index 83211d0..bc5d485 100644 --- a/man/dist.dna.Rd +++ b/man/dist.dna.Rd @@ -14,8 +14,8 @@ dist.dna(x, model = "K80", variance = FALSE, used; must be one of \code{"raw"}, \code{"N"}, \code{"TS"}, \code{"TV"}, \code{"JC69"}, \code{"K80"} (the default), \code{"F81"}, \code{"K81"}, \code{"F84"}, \code{"BH87"}, - \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, or - \code{"paralin"}.} + \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, + \code{"paralin"}, \code{"indel"}, or \code{"indelblock"}.} \item{variance}{a logical indicating whether to compute the variances of the distances; defaults to \code{FALSE} so the variances are not computed.} @@ -24,7 +24,8 @@ dist.dna(x, model = "K80", variance = FALSE, FALSE} so no correction is applied).} \item{pairwise.deletion}{a logical indicating whether to delete the sites with missing data in a pairwise way. The default is to delete - the sites with at least one missing data for all sequences.} + the sites with at least one missing data for all sequences (ignored + if \code{model = "indel"} or \code{"indelblock"}).} \item{base.freq}{the base frequencies to be used in the computations (if applicable, i.e. if \code{method = "F84"}). By default, the base frequencies are computed from the whole sample of sequences.} @@ -121,6 +122,14 @@ dist.dna(x, model = "K80", variance = FALSE, \item{\code{paralin}: }{Lake (1994) developed the paralinear distance which can be viewed as another variant of the Barry--Hartigan distance.} + + \item{\code{indel}: }{this counts the number of sites where there an + insertion/deletion gap in one sequence and not in the other.} + + \item{\code{indelblock}: }{same than before but contiguous gaps are + counted as a single unit. Note that the distance between `-A-' and + `A--' is 3 because there are three different blocks of gaps, whereas + the ``indel'' distance will be 2.} }} \value{ an object of class \link[stats]{dist} (by default), or a numeric diff --git a/src/dist_dna.c b/src/dist_dna.c index 61f32d2..289be08 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,12 +1,12 @@ -/* dist_dna.c 2012-02-13 */ +/* dist_dna.c 2012-02-14 */ /* Copyright 2005-2012 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ -#include #include +#include "ape.h" /* from R: print(log(4), d = 22) */ #define LN4 1.386294361119890572454 @@ -73,8 +73,83 @@ double detFourByFour(double *x) }\ if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; +void distDNA_indel(unsigned char *x, int *n, int *s, double *d) +{ + int i1, i2, s1, s2, target, N; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + N = 0; + + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1 += *n, s2 += *n) + if ((x[s1] ^ x[s2]) & 4) N++; + + d[target] = ((double) N); + target++; + } + } +} + +#define X(i, j) i - 1 + *n * (j - 1) + +#define DINDEX(i, j) n * (i - 1) - i*(i - 1)/2 + j - i - 1 + +int give_index(int i, int j, int n) +{ + if (i > j) return(DINDEX(j, i)); + else return(DINDEX(i, j)); +} +void distDNA_indelblock(unsigned char *x, int *n, int *s, double *d) +{ + int i1, i2, s1, s2, target, N, start_block, end_block; + + for (i1 = 1; i1 <= *n; i1++) { + +/* find a block of one or more '-' */ + + s1 = 1; + while (s1 < *s) { + if (x[X(i1, s1)] == 4) { + start_block = s1; + while (x[X(i1, s1)] == 4) s1++; + end_block = s1 - 1; + +/* then look if the same block is present in all the other sequences */ + + for (i2 = 1; i2 <= *n; i2++) { + if (i2 == i1) continue; + + target = give_index(i1, i2, *n); + + if (start_block > 1) { + if (x[X(i2, start_block - 1)] == 4) { + d[target]++; + continue; + } + } + if (end_block < *s) { + if (x[X(i2, end_block + 1)] == 4) { + d[target]++; + continue; + } + } + for (s2 = start_block; s2 <= end_block; s2++) { + if (x[X(i2, s2)] != 4) { + d[target]++; + continue; + } + } + } + s1 = end_block + 1; + } + s1++; + } + } +} +#undef X void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel) { diff --git a/src/mvr.c b/src/mvr.c index 5f9fc0c..829a839 100644 --- a/src/mvr.c +++ b/src/mvr.c @@ -1,6 +1,6 @@ -/* mvr.c 2011-10-11 */ +/* mvr.c 2012-02-17 */ -/* Copyright 2011 Andrei-Alin Popescu */ +/* Copyright 2011-2012 Andrei-Alin Popescu */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -142,7 +142,7 @@ void mvr(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_lengt double xi = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ double yi = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]); - new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k]); + new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k+1]); new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]); ij++; } diff --git a/src/mvrs.c b/src/mvrs.c index d2aa342..14b5fac 100644 --- a/src/mvrs.c +++ b/src/mvrs.c @@ -1,6 +1,6 @@ -/* mvrs.c 2011-10-11 */ +/* mvrs.c 2012-02-17 */ -/* Copyright 2011 Andrei-Alin Popescu */ +/* Copyright 2011-2012 Andrei-Alin Popescu */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -129,7 +129,7 @@ void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_leng for (j = i + 1; j <= n; j++) { //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B); A=S[i]+S[j]-B*D[give_index(i,j,n)]; - Rprintf("Q[%i,%i]=%f\n",i,j,A); + //Rprintf("Q[%i,%i]=%f\n",i,j,A); if (A > smallest_S) { OTU1 = i; OTU2 = j; @@ -392,6 +392,3 @@ void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_leng edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; } - - - -- 2.39.2