-## DNA.R (2012-02-14)
+## DNA.R (2013-01-04)
## Manipulations and Comparisons of DNA Sequences
-## Copyright 2002-2012 Emmanuel Paradis
+## Copyright 2002-2013 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
"[.DNAbin" <- function(x, i, j, drop = FALSE)
{
- oc <- oldClass(x)
- class(x) <- NULL
- if (is.matrix(x)) {
- if (nargs() == 2 && !missing(i)) ans <- x[i]
- else {
- nd <- dim(x)
- if (missing(i)) i <- 1:nd[1]
- if (missing(j)) j <- 1:nd[2]
- ans <- x[i, j, drop = drop]
- }
- } else {
- if (missing(i)) i <- 1:length(x)
- ans <- x[i]
- }
- class(ans) <- oc
+ ans <- NextMethod("[", drop = drop)
+ class(ans) <- "DNAbin"
ans
}
as.matrix.DNAbin <- function(x, ...)
{
- if (is.list(x)) {
- if (length(unique(unlist(lapply(x, length)))) != 1)
- stop("DNA sequences in list not of the same length.")
- nms <- names(x)
- n <- length(x)
- s <- length(x[[1]])
- x <- matrix(unlist(x), n, s, byrow = TRUE)
- rownames(x) <- nms
- class(x) <- "DNAbin"
+ if (is.matrix(x)) return(x)
+ if (is.vector(x)) {
+ dim(x) <- c(1, length(x))
+ return(x)
}
- x
+ s <- unique(unlist(lapply(x, length)))
+ if (length(s) != 1)
+ stop("DNA sequences in list not of the same length.")
+ nms <- names(x)
+ n <- length(x)
+ y <- matrix(as.raw(0), n, s)
+ for (i in seq_len(n)) y[i, ] <- x[[i]]
+ rownames(y) <- nms
+ class(y) <- "DNAbin"
+ y
}
as.list.DNAbin <- function(x, ...)
nms <- names(x)
if (n == 1) {
cat("1 DNA sequence in binary format stored in a list.\n\n")
- cat("Sequence length:", length(x[[1]]), "\n\n")
+ nTot <- length(x[[1]])
+ cat("Sequence length:", nTot, "\n\n")
cat("Label:", nms, "\n\n")
} else {
cat(n, "DNA sequences in binary format stored in a list.\n\n")
tmp <- unlist(lapply(x, length))
- mini <- min(tmp)
- maxi <- max(tmp)
+ nTot <- sum(tmp)
+ mini <- range(tmp)
+ maxi <- mini[2]
+ mini <- mini[1]
if (mini == maxi)
cat("All sequences of same length:", maxi, "\n")
else {
}
cat("\nLabels:", paste(nms, collapse = " "), TAIL)
}
- } else if (is.matrix(x)) {
- nd <- dim(x)
- nms <- rownames(x)
- cat(nd[1], "DNA sequences in binary format stored in a matrix.\n\n")
- cat("All sequences of same length:", nd[2], "\n")
- TAIL <- "\n\n"
- if (printlen < nd[1]) {
- nms <- nms[1:printlen]
- TAIL <- "...\n\n"
- }
- cat("\nLabels:", paste(nms, collapse = " "), TAIL)
} else {
- cat("1 DNA sequence in binary format stored in a vector.\n\n")
- cat("Sequence length:", length(x), "\n\n")
+ nTot <- length(x)
+ if (is.matrix(x)) {
+ nd <- dim(x)
+ nms <- rownames(x)
+ cat(nd[1], "DNA sequences in binary format stored in a matrix.\n\n")
+ cat("All sequences of same length:", nd[2], "\n")
+ TAIL <- "\n\n"
+ if (printlen < nd[1]) {
+ nms <- nms[1:printlen]
+ TAIL <- "...\n\n"
+ }
+ cat("\nLabels:", paste(nms, collapse = " "), TAIL)
+ } else {
+ cat("1 DNA sequence in binary format stored in a vector.\n\n")
+ cat("Sequence length:", nTot, "\n\n")
+ }
}
- cat("Base composition:\n")
- print(round(base.freq(x), digits))
+ if (nTot <= 1e6) {
+ cat("Base composition:\n")
+ print(round(base.freq(x), digits))
+ } else cat("More than 1 million nucleotides: not printing base composition\n")
}
as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")
base.freq <- function(x, freq = FALSE, all = FALSE)
{
- if (is.list(x)) x <- unlist(x)
- n <- length(x)
- BF <-.C("BaseProportion", x, n, double(17),
- DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]]
+ f <- function(x)
+ .C("BaseProportion", x, as.integer(length(x)), double(17),
+ DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]]
+
+ if (is.list(x)) {
+ BF <- rowSums(sapply(x, f))
+ n <- sum(sapply(x, length))
+ } else {
+ n <- length(x)
+ BF <-.C("BaseProportion", x, as.integer(n), double(17),
+ DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[3]]
+ }
+
names(BF) <- c("a", "c", "g", "t", "r", "m", "w", "s",
"k", "y", "v", "h", "d", "b", "n", "-", "?")
if (all) {
n <- n[1]
}
if (n == 1) return(integer(0))
- ans <- .C("SegSites", x, n, s, integer(s),
- DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+ ans <- .C("SegSites", x, as.integer(n), as.integer(s),
+ integer(s), DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
which(as.logical(ans[[4]]))
}
Ndist <- if (imod == 11) n*n else n*(n - 1)/2
var <- if (variance) double(Ndist) else 0
if (!gamma) gamma <- alpha <- 0
- else alpha <- gamma <- 1
- d <- .C("dist_dna", x, n, s, imod, double(Ndist), BF,
- as.integer(pairwise.deletion), as.integer(variance),
- var, as.integer(gamma), alpha, DUP = FALSE, NAOK = TRUE,
- PACKAGE = "ape")
+ else {
+ alpha <- gamma
+ gamma <- 1
+ }
+ d <- .C("dist_dna", x, as.integer(n), as.integer(s), imod,
+ double(Ndist), BF, as.integer(pairwise.deletion),
+ as.integer(variance), var, as.integer(gamma),
+ alpha, DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
if (variance) var <- d[[9]]
d <- d[[5]]
if (imod == 11) {
horiz = TRUE, xpd = TRUE)
}
}
+
+where <- function(x, pattern)
+{
+ pat <- as.DNAbin(strsplit(pattern, NULL)[[1]])
+ p <- as.integer(length(pat))
+
+ foo <- function(x, pat, p) {
+ s <- as.integer(length(x))
+ if (s < p) stop("sequence shorter than the pattern")
+ ans <- .C("where", x, pat, s, p, integer(s), 0L,
+ DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+ n <- ans[[6]]
+ if (n) ans[[5]][seq_len(n)] - p + 2L else integer()
+ }
+
+ if (is.list(x)) return(lapply(x, foo, pat = pat, p = p))
+ if (is.matrix(x)) {
+ n <- nrow(x)
+ res <- vector("list", n)
+ for (i in seq_along(n))
+ res[[i]] <- foo(x[i, , drop = TRUE], pat, p)
+ names(res) <- rownames(x)
+ return(res)
+ }
+ foo(x, pat, p) # if x is a vector
+}