Package: ape
Version: 3.0-1
-Date: 2012-02-14
+Date: 2012-02-17
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
NEW FEATURES
+ o dist.dna() has two new models: "indel" and "indelblock".
+
o bind.tree() now accepts 'position' > 0 when the trees have no
- banch lengths permitting to create a node in 'x' when grafting
+ banch length permitting to create a node in 'x' when grafting
'y' (see ?bind.tree for details).
o cophyloplot( , rotate = TRUE) made R hanged after a few clicks.
Also the tree is no more plotted twice.
+ o read.GenBank() has been updated to work with EFetch 2.0.
+
CHANGES IN APE VERSION 3.0
-## DNA.R (2012-01-10)
+## DNA.R (2012-02-14)
## Manipulations and Comparisons of DNA Sequences
as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")
._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k",
- "y", "v", "h", "d", "b", "n", "-", "?")
+ "y", "v", "h", "d", "b", "n", "-", "?")
._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80,
48, 224, 176, 208, 112, 240, 4, 2)
as.matrix = FALSE)
{
MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93",
- "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV")
+ "GG95", "LOGDET", "BH87", "PARALIN", "N", "TS", "TV",
+ "INDEL", "INDELBLOCK")
imod <- pmatch(toupper(model), MODELS)
if (is.na(imod))
stop(paste("'model' must be one of:",
paste("\"", MODELS, "\"", sep = "", collapse = " ")))
if (imod == 11 && variance) {
- warning("computing variance not available for model BH87.")
+ warning("computing variance not available for model BH87")
variance <- FALSE
}
- if (gamma && imod %in% c(1, 5:7, 9:15)) {
+ if (gamma && imod %in% c(1, 5:7, 9:17)) {
warning(paste("gamma-correction not available for model", model))
gamma <- FALSE
}
n <- dim(x)
s <- n[2]
n <- n[1]
+
if (imod %in% c(4, 6:8)) {
BF <- if (is.null(base.freq)) base.freq(x) else base.freq
} else BF <- 0
+
+ if (imod %in% 16:17) pairwise.deletion <- TRUE
+
if (!pairwise.deletion) {
keep <- .C("GlobalDeletionDNA", x, n, s,
rep(1L, s), PACKAGE = "ape")[[4]]
-## read.GenBank.R (2010-07-22)
+## read.GenBank.R (2012-02-17)
## Read DNA Sequences from GenBank via Internet
-## Copyright 2002-2010 Emmanuel Paradis
+## Copyright 2002-2012 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
if (i == nrequest) b <- N
URL <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=",
paste(access.nb[a:b], collapse = ","),
- "&rettype=gb", sep = "")
+ "&rettype=gb&retmode=text", sep = "")
X <- c(X, scan(file = URL, what = "", sep = "\n", quiet = TRUE))
}
FI <- grep("^ {0,}ORIGIN", X) + 1
used; must be one of \code{"raw"}, \code{"N"}, \code{"TS"},
\code{"TV"}, \code{"JC69"}, \code{"K80"} (the default),
\code{"F81"}, \code{"K81"}, \code{"F84"}, \code{"BH87"},
- \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, or
- \code{"paralin"}.}
+ \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"},
+ \code{"paralin"}, \code{"indel"}, or \code{"indelblock"}.}
\item{variance}{a logical indicating whether to compute the variances
of the distances; defaults to \code{FALSE} so the variances are not
computed.}
FALSE} so no correction is applied).}
\item{pairwise.deletion}{a logical indicating whether to delete the
sites with missing data in a pairwise way. The default is to delete
- the sites with at least one missing data for all sequences.}
+ the sites with at least one missing data for all sequences (ignored
+ if \code{model = "indel"} or \code{"indelblock"}).}
\item{base.freq}{the base frequencies to be used in the computations
(if applicable, i.e. if \code{method = "F84"}). By default, the
base frequencies are computed from the whole sample of sequences.}
\item{\code{paralin}: }{Lake (1994) developed the paralinear distance which
can be viewed as another variant of the Barry--Hartigan distance.}
+
+ \item{\code{indel}: }{this counts the number of sites where there an
+ insertion/deletion gap in one sequence and not in the other.}
+
+ \item{\code{indelblock}: }{same than before but contiguous gaps are
+ counted as a single unit. Note that the distance between `-A-' and
+ `A--' is 3 because there are three different blocks of gaps, whereas
+ the ``indel'' distance will be 2.}
}}
\value{
an object of class \link[stats]{dist} (by default), or a numeric
-/* dist_dna.c 2012-02-13 */
+/* dist_dna.c 2012-02-14 */
/* Copyright 2005-2012 Emmanuel Paradis
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
-#include <R.h>
#include <R_ext/Lapack.h>
+#include "ape.h"
/* from R: print(log(4), d = 22) */
#define LN4 1.386294361119890572454
}\
if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++;
+void distDNA_indel(unsigned char *x, int *n, int *s, double *d)
+{
+ int i1, i2, s1, s2, target, N;
+
+ target = 0;
+ for (i1 = 1; i1 < *n; i1++) {
+ for (i2 = i1 + 1; i2 <= *n; i2++) {
+ N = 0;
+
+ for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1 += *n, s2 += *n)
+ if ((x[s1] ^ x[s2]) & 4) N++;
+
+ d[target] = ((double) N);
+ target++;
+ }
+ }
+}
+
+#define X(i, j) i - 1 + *n * (j - 1)
+
+#define DINDEX(i, j) n * (i - 1) - i*(i - 1)/2 + j - i - 1
+
+int give_index(int i, int j, int n)
+{
+ if (i > j) return(DINDEX(j, i));
+ else return(DINDEX(i, j));
+}
+void distDNA_indelblock(unsigned char *x, int *n, int *s, double *d)
+{
+ int i1, i2, s1, s2, target, N, start_block, end_block;
+
+ for (i1 = 1; i1 <= *n; i1++) {
+
+/* find a block of one or more '-' */
+
+ s1 = 1;
+ while (s1 < *s) {
+ if (x[X(i1, s1)] == 4) {
+ start_block = s1;
+ while (x[X(i1, s1)] == 4) s1++;
+ end_block = s1 - 1;
+
+/* then look if the same block is present in all the other sequences */
+
+ for (i2 = 1; i2 <= *n; i2++) {
+ if (i2 == i1) continue;
+
+ target = give_index(i1, i2, *n);
+
+ if (start_block > 1) {
+ if (x[X(i2, start_block - 1)] == 4) {
+ d[target]++;
+ continue;
+ }
+ }
+ if (end_block < *s) {
+ if (x[X(i2, end_block + 1)] == 4) {
+ d[target]++;
+ continue;
+ }
+ }
+ for (s2 = start_block; s2 <= end_block; s2++) {
+ if (x[X(i2, s2)] != 4) {
+ d[target]++;
+ continue;
+ }
+ }
+ }
+ s1 = end_block + 1;
+ }
+ s1++;
+ }
+ }
+}
+#undef X
void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel)
{
-/* mvr.c 2011-10-11 */
+/* mvr.c 2012-02-17 */
-/* Copyright 2011 Andrei-Alin Popescu */
+/* Copyright 2011-2012 Andrei-Alin Popescu */
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
double xi = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */
double yi = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */
double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
- new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k]);
+ new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k+1]);
new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
ij++;
}
-/* mvrs.c 2011-10-11 */
+/* mvrs.c 2012-02-17 */
-/* Copyright 2011 Andrei-Alin Popescu */
+/* Copyright 2011-2012 Andrei-Alin Popescu */
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
for (j = i + 1; j <= n; j++) {
//Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
A=S[i]+S[j]-B*D[give_index(i,j,n)];
- Rprintf("Q[%i,%i]=%f\n",i,j,A);
+ //Rprintf("Q[%i,%i]=%f\n",i,j,A);
if (A > smallest_S) {
OTU1 = i;
OTU2 = j;
edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
}
-
-
-