various bug fixes since the release of ape 3.0

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)
diff --git a/DESCRIPTION b/DESCRIPTION

index 6ad0dbccdabaf6a5411bd723fa6a974dde67736e..8d34b1865ec83e2b59fe20a3445a82166bff2c67 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
  Version: 3.0-1
-Date: 2012-02-14
+Date: 2012-02-17
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/NEWS b/NEWS

index 53eb924bb2734247fdcdc6ac271698869bb7d741..57a418c6ccc6900c31d67a1e6dab4f121120cda2 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,8 +3,10 @@
  
  NEW FEATURES
  
+    o dist.dna() has two new models: "indel" and "indelblock".
+
      o bind.tree() now accepts 'position' > 0 when the trees have no
-      banch lengths permitting to create a node in 'x' when grafting
+      banch length permitting to create a node in 'x' when grafting
        'y' (see ?bind.tree for details).
  
  
@@ -13,6 +15,8 @@ BUG FIXES
      o cophyloplot( , rotate = TRUE) made R hanged after a few clicks.
        Also the tree is no more plotted twice.
  
+    o read.GenBank() has been updated to work with EFetch 2.0.
+
  
  
                 CHANGES IN APE VERSION 3.0
diff --git a/R/DNA.R b/R/DNA.R

index 54898cf4413358c94edf245d056baf1ab46b22b6..abf3a4343a33b4d456a6d1edfe549855249d55d0 100644 (file)
--- a/R/DNA.R
+++ b/R/DNA.R
@@ -1,4 +1,4 @@
-## DNA.R (2012-01-10)
+## DNA.R (2012-02-14)
  
  ##   Manipulations and Comparisons of DNA Sequences
  
@@ -227,7 +227,7 @@ print.DNAbin <- function(x, printlen = 6, digits = 3, ...)
  as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")
  
  ._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k",
-           "y", "v", "h",  "d", "b", "n", "-", "?")
+           "y", "v", "h", "d", "b", "n", "-", "?")
  
  ._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80,
             48, 224, 176, 208, 112, 240, 4, 2)
@@ -353,16 +353,17 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE,
                       as.matrix = FALSE)
  {
      MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93",
-                "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV")
+                "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV",
+                "INDEL", "INDELBLOCK")
      imod <- pmatch(toupper(model), MODELS)
      if (is.na(imod))
          stop(paste("'model' must be one of:",
                     paste("\"", MODELS, "\"", sep = "", collapse = " ")))
      if (imod == 11 && variance) {
-        warning("computing variance not available for model BH87.")
+        warning("computing variance not available for model BH87")
          variance <- FALSE
      }
-    if (gamma && imod %in% c(1, 5:7, 9:15)) {
+    if (gamma && imod %in% c(1, 5:7, 9:17)) {
          warning(paste("gamma-correction not available for model", model))
          gamma <- FALSE
      }
@@ -371,9 +372,13 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE,
      n <- dim(x)
      s <- n[2]
      n <- n[1]
+
      if (imod %in% c(4, 6:8)) {
          BF <- if (is.null(base.freq)) base.freq(x) else base.freq
      } else BF <- 0
+
+    if (imod %in% 16:17) pairwise.deletion <- TRUE
+
      if (!pairwise.deletion) {
          keep <- .C("GlobalDeletionDNA", x, n, s,
                     rep(1L, s), PACKAGE = "ape")[[4]]
diff --git a/R/read.GenBank.R b/R/read.GenBank.R

index 527f2dd51474cae6ef9b29a5bb9fcdfd2042b54c..23bf4db16773a0878373f11e73ef0046c8ab8c1e 100644 (file)
--- a/R/read.GenBank.R
+++ b/R/read.GenBank.R
@@ -1,8 +1,8 @@
-## read.GenBank.R (2010-07-22)
+## read.GenBank.R (2012-02-17)
  
  ##   Read DNA Sequences from GenBank via Internet
  
-## Copyright 2002-2010 Emmanuel Paradis
+## Copyright 2002-2012 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
@@ -22,7 +22,7 @@ read.GenBank <-
          if (i == nrequest) b <- N
          URL <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=",
                       paste(access.nb[a:b], collapse = ","),
-                     "&rettype=gb", sep = "")
+                     "&rettype=gb&retmode=text", sep = "")
          X <- c(X, scan(file = URL, what = "", sep = "\n", quiet = TRUE))
      }
      FI <- grep("^ {0,}ORIGIN", X) + 1
diff --git a/man/dist.dna.Rd b/man/dist.dna.Rd

index 83211d0bf96e002b119d4c3487b3a852f633f25e..bc5d4858a46ad3cf6a28fb1ce043731f25926d73 100644 (file)
--- a/man/dist.dna.Rd
+++ b/man/dist.dna.Rd
@@ -14,8 +14,8 @@ dist.dna(x, model = "K80", variance = FALSE,
      used; must be one of \code{"raw"}, \code{"N"}, \code{"TS"},
      \code{"TV"}, \code{"JC69"}, \code{"K80"} (the default),
      \code{"F81"}, \code{"K81"}, \code{"F84"}, \code{"BH87"},
-    \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, or
-    \code{"paralin"}.}
+    \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"},
+    \code{"paralin"}, \code{"indel"}, or \code{"indelblock"}.}
    \item{variance}{a logical indicating whether to compute the variances
      of the distances; defaults to \code{FALSE} so the variances are not
      computed.}
@@ -24,7 +24,8 @@ dist.dna(x, model = "K80", variance = FALSE,
        FALSE} so no correction is applied).}
    \item{pairwise.deletion}{a logical indicating whether to delete the
      sites with missing data in a pairwise way. The default is to delete
-    the sites with at least one missing data for all sequences.}
+    the sites with at least one missing data for all sequences (ignored
+    if \code{model = "indel"} or \code{"indelblock"}).}
    \item{base.freq}{the base frequencies to be used in the computations
      (if applicable, i.e. if \code{method = "F84"}). By default, the
      base frequencies are computed from the whole sample of sequences.}
@@ -121,6 +122,14 @@ dist.dna(x, model = "K80", variance = FALSE,
  
    \item{\code{paralin}: }{Lake (1994) developed the paralinear distance which
      can be viewed as another variant of the Barry--Hartigan distance.}
+
+  \item{\code{indel}: }{this counts the number of sites where there an
+    insertion/deletion gap in one sequence and not in the other.}
+
+  \item{\code{indelblock}: }{same than before but contiguous gaps are
+    counted as a single unit. Note that the distance between `-A-' and
+    `A--' is 3 because there are three different blocks of gaps, whereas
+    the ``indel'' distance will be 2.}
  }}
  \value{
    an object of class \link[stats]{dist} (by default), or a numeric
diff --git a/src/dist_dna.c b/src/dist_dna.c

index 61f32d29ca3a55e5d526fe3b3de8a09d7e3f1c95..289be088b37878de150195605ce39241be6bb539 100644 (file)
--- a/src/dist_dna.c
+++ b/src/dist_dna.c
@@ -1,12 +1,12 @@
-/* dist_dna.c       2012-02-13 */
+/* dist_dna.c       2012-02-14 */
  
  /* Copyright 2005-2012 Emmanuel Paradis
  
  /* This file is part of the R-package `ape'. */
  /* See the file ../COPYING for licensing issues. */
  
-#include <R.h>
  #include <R_ext/Lapack.h>
+#include "ape.h"
  
  /* from R: print(log(4), d = 22) */
  #define LN4 1.386294361119890572454
@@ -73,8 +73,83 @@ double detFourByFour(double *x)
      }\
      if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++;
  
+void distDNA_indel(unsigned char *x, int *n, int *s, double *d)
+{
+       int i1, i2, s1, s2, target, N;
+
+       target = 0;
+       for (i1 = 1; i1 < *n; i1++) {
+               for (i2 = i1 + 1; i2 <= *n; i2++) {
+                       N = 0;
+
+                       for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1 += *n, s2 += *n)
+                               if ((x[s1] ^ x[s2]) & 4) N++;
+
+                       d[target] = ((double) N);
+                       target++;
+               }
+       }
+}
+
+#define X(i, j) i - 1 + *n * (j - 1)
+
+#define DINDEX(i, j) n * (i - 1) - i*(i - 1)/2 + j - i - 1
+
+int give_index(int i, int j, int n)
+{
+       if (i > j) return(DINDEX(j, i));
+       else return(DINDEX(i, j));
+}
  
+void distDNA_indelblock(unsigned char *x, int *n, int *s, double *d)
+{
+       int i1, i2, s1, s2, target, N, start_block, end_block;
+
+       for (i1 = 1; i1 <= *n; i1++) {
+
+/* find a block of one or more '-' */
+
+               s1 = 1;
+               while (s1 < *s) {
+                       if (x[X(i1, s1)] == 4) {
+                               start_block = s1;
+                               while (x[X(i1, s1)] == 4) s1++;
+                               end_block = s1 - 1;
+
+/* then look if the same block is present in all the other sequences */
+
+                               for (i2 = 1; i2 <= *n; i2++) {
+                                       if (i2 == i1) continue;
+
+                                       target = give_index(i1, i2, *n);
+
+                                       if (start_block > 1) {
+                                               if (x[X(i2, start_block - 1)] == 4) {
+                                                       d[target]++;
+                                                       continue;
+                                               }
+                                       }
+                                       if (end_block < *s) {
+                                               if (x[X(i2, end_block + 1)] == 4) {
+                                                       d[target]++;
+                                                       continue;
+                                               }
+                                       }
+                                       for (s2 = start_block; s2 <= end_block; s2++) {
+                                               if (x[X(i2, s2)] != 4) {
+                                                       d[target]++;
+                                                       continue;
+                                               }
+                                       }
+                               }
+                               s1 = end_block + 1;
+                       }
+                       s1++;
+               }
+       }
+}
  
+#undef X
  
  void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel)
  {
diff --git a/src/mvr.c b/src/mvr.c

index 5f9fc0cd7e5b795758e2344a7b9b846275dea5fa..829a839c1706efcdb0322c2e0b35543a12a65cd6 100644 (file)
--- a/src/mvr.c
+++ b/src/mvr.c
@@ -1,6 +1,6 @@
-/* mvr.c    2011-10-11 */
+/* mvr.c    2012-02-17 */
  
-/* Copyright 2011 Andrei-Alin Popescu */
+/* Copyright 2011-2012 Andrei-Alin Popescu */
  
  /* This file is part of the R-package `ape'. */
  /* See the file ../COPYING for licensing issues. */
@@ -142,7 +142,7 @@ void mvr(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_lengt
                         double xi = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */
                         double yi = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */
                          double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
-                       new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k]);
+                       new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k+1]);
                          new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
                         ij++;
                 }
diff --git a/src/mvrs.c b/src/mvrs.c

index d2aa342135a68536e407bfdeb62b3446a6f849e4..14b5fac267fed528b82d8fcd1a9d24c24f2ec4e4 100644 (file)
--- a/src/mvrs.c
+++ b/src/mvrs.c
@@ -1,6 +1,6 @@
-/* mvrs.c    2011-10-11 */
+/* mvrs.c    2012-02-17 */
  
-/* Copyright 2011 Andrei-Alin Popescu */
+/* Copyright 2011-2012 Andrei-Alin Popescu */
  
  /* This file is part of the R-package `ape'. */
  /* See the file ../COPYING for licensing issues. */
@@ -129,7 +129,7 @@ void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_leng
                          for (j = i + 1; j <= n; j++) {
                                  //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
                                  A=S[i]+S[j]-B*D[give_index(i,j,n)];
-                                Rprintf("Q[%i,%i]=%f\n",i,j,A);
+                                //Rprintf("Q[%i,%i]=%f\n",i,j,A);
                                 if (A > smallest_S) {
                                         OTU1 = i;
                                         OTU2 = j;
@@ -392,6 +392,3 @@ void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_leng
         edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
         edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
  }
-
-
-
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 17 Feb 2012 14:14:39 +0000 (14:14 +0000)
DESCRIPTION		patch \| blob \| history
NEWS		patch \| blob \| history
R/DNA.R		patch \| blob \| history
R/read.GenBank.R		patch \| blob \| history
man/dist.dna.Rd		patch \| blob \| history
src/dist_dna.c		patch \| blob \| history
src/mvr.c		patch \| blob \| history
src/mvrs.c		patch \| blob \| history