From dff741171e7afe3f9aaa2d9cb19c2f91995e8623 Mon Sep 17 00:00:00 2001 From: paradis Date: Wed, 28 Nov 2012 10:02:30 +0000 Subject: [PATCH] adding the new function 'where' git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@200 6e262413-ae40-0410-9e79-b911bd7a66b7 --- DESCRIPTION | 2 +- NEWS | 2 ++ R/DNA.R | 28 +++++++++++++++++++++++++++- man/consensus.Rd | 4 ++++ man/where.Rd | 35 +++++++++++++++++++++++++++++++++++ src/dist_dna.c | 23 ++++++++++++++++++++++- 6 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 man/where.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 83eab43..a81d208 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 3.0-7 -Date: 2012-11-22 +Date: 2012-11-28 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/NEWS b/NEWS index 4425348..9d57433 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,8 @@ NEW FEATURES + o The new function 'where' searches patterns in DNA sequences. + o pic() gains an option 'rescaled.tree = FALSE' to return the tree with its branch lengths rescaled for the PICs calculation. diff --git a/R/DNA.R b/R/DNA.R index b426dbd..1d0ce79 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2012-09-13) +## DNA.R (2012-11-28) ## Manipulations and Comparisons of DNA Sequences @@ -471,3 +471,29 @@ image.DNAbin <- function(x, what, col, bg = "white", xlab = "", ylab = "", horiz = TRUE, xpd = TRUE) } } + +where <- function(x, pattern) +{ + pat <- as.DNAbin(strsplit(pattern, NULL)[[1]]) + p <- as.integer(length(pat)) + + foo <- function(x, pat, p) { + s <- as.integer(length(x)) + if (s < p) stop("sequence shorter than the pattern") + ans <- .C("where", x, pat, s, p, integer(s), 0L, + DUP = FALSE, NAOK = TRUE, PACKAGE = "apex") + n <- ans[[6]] + if (n) ans[[5]][seq_len(n)] - p + 2L else integer() + } + + if (is.list(x)) return(lapply(x, foo, pat = pat, p = p)) + if (is.matrix(x)) { + n <- nrow(x) + res <- vector("list", n) + for (i in seq_along(n)) + res[[i]] <- foo(x[i, , drop = TRUE], pat, p) + names(res) <- rownames(x) + return(res) + } + foo(x, pat, p) # if x is a vector +} diff --git a/man/consensus.Rd b/man/consensus.Rd index 2f7bdeb..ffaa58f 100644 --- a/man/consensus.Rd +++ b/man/consensus.Rd @@ -31,6 +31,10 @@ consensus(..., p = 1, check.labels = TRUE) \value{ an object of class \code{"phylo"}. } +\references{ + Felsenstein, J. (2004) \emph{Inferring Phylogenies}. Sunderland: + Sinauer Associates. +} \author{Emmanuel Paradis} \seealso{ \code{\link{prop.part}}, \code{\link{dist.topo}} diff --git a/man/where.Rd b/man/where.Rd new file mode 100644 index 0000000..faddeba --- /dev/null +++ b/man/where.Rd @@ -0,0 +1,35 @@ +\names{where} +\alias{where} +\title{Find Patterns in DNA Sequences} +\description{ + This function finds patterns in a single or a set of DNA sequences. +} +\usage{ +where(x, pattern) +} +\arguments{ + \item{x}{an object of class \code{"DNAbin"}.} + \item{pattern}{a character string to be searched in \code{x}.} +} +\details{ + If \code{x} is a vector, the function returns a single vector giving + the position(s) where the pattern was found. If \code{x} is a matrix + or a list, it returns a list with the positions of the pattern for + each sequence. + + Patterns may be overlapping. For instance, if \code{pattern = "tata"} + and the sequence starts with `tatata', then the vector returned will + be c(1, 3). +} +\value{ + a vector of integers or a list of such vectors. +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{DNAbin}}, \code{\link{image.DNAbin}} +} +\examples{ +data(woodmouse) +where(woodmouse, "tata") +} +\keyword{manip} diff --git a/src/dist_dna.c b/src/dist_dna.c index 67522ff..c087212 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,4 +1,4 @@ -/* dist_dna.c 2012-02-14 */ +/* dist_dna.c 2012-11-28 */ /* Copyright 2005-2012 Emmanuel Paradis @@ -1143,3 +1143,24 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, case 17 : distDNA_indelblock(x, n, s, d); break; } } + +void where(unsigned char *x, unsigned char *pat, int *s, int *p, + int *ans, int *n) +{ + int i, j, k, ln; + + ln = 0; /* local n */ + + for (i = 0; i <= *s - *p; i++) { + k = i; j = 0; + while (1) { + if (x[k] != pat[j]) break; + j++; k++; + if (j == *p) { + ans[ln++] = k - 1; + break; + } + } + } + *n = ln; +} -- 2.39.2