o There is now a print method for results from ace().
+ o There is a labels() method for objects of class "DNAbin".
+
+ o read.dna() has a new option 'as.matrix' to possibly force sequences
+ in a FASTA file to be stored in a matrix (see ?read.dna for details).
+
BUG FIXES
o nj() has been improved and is now about 30% faster.
+ o The default option 'drop' of [.DNAbin has been changed to FALSE to
+ avoid dropping rownames when selecting a single sequence.
+
+ o print.DNAbin() has been changed to summary.DNAbin() which has been
+ removed.
+
CHANGES IN APE VERSION 2.5-1
Package: ape
Version: 2.5-2
-Date: 2010-05-14
+Date: 2010-05-17
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
-## DNA.R (2010-03-16)
+## DNA.R (2010-05-17)
## Manipulations and Comparisons of DNA Sequences
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
+labels.DNAbin <- function(object, ...)
+{
+ if (is.list(object)) return(names(object))
+ if (is.matrix(object)) return(rownames(object))
+ NULL
+}
+
del.gaps <- function(x)
{
deleteGaps <- function(x) {
obj
}
-"[.DNAbin" <- function(x, i, j, drop = TRUE)
+"[.DNAbin" <- function(x, i, j, drop = FALSE)
{
oc <- oldClass(x)
class(x) <- NULL
structure(NextMethod("c"), class = "DNAbin")
}
-print.DNAbin <- function(x, ...)
-{
- n <- 1 # <- if is.vector(x)
- if (is.list(x)) n <- length(x)
- else if (is.matrix(x)) n <- dim(x)[1]
- if (n > 1) cat(n, "DNA sequences in binary format.\n")
- else cat("1 DNA sequence in binary format.\n")
-}
-
-summary.DNAbin <- function(object, printlen = 6, digits = 3, ...)
+print.DNAbin <- function(x, printlen = 6, digits = 3, ...)
{
- if (is.list(object)) {
- n <- length(object)
- nms <- names(object)
+ if (is.list(x)) {
+ n <- length(x)
+ nms <- names(x)
if (n == 1) {
cat("1 DNA sequence in binary format stored in a list.\n\n")
- cat("Sequence length:", length(object[[1]]), "\n\n")
+ cat("Sequence length:", length(x[[1]]), "\n\n")
cat("Label:", nms, "\n\n")
} else {
cat(n, "DNA sequences in binary format stored in a list.\n\n")
- tmp <- unlist(lapply(object, length))
+ tmp <- unlist(lapply(x, length))
mini <- min(tmp)
maxi <- max(tmp)
if (mini == maxi)
}
cat("\nLabels:", paste(nms, collapse = " "), TAIL)
}
- } else if (is.matrix(object)) {
- nd <- dim(object)
- nms <- rownames(object)
+ } else if (is.matrix(x)) {
+ nd <- dim(x)
+ nms <- rownames(x)
cat(nd[1], "DNA sequences in binary format stored in a matrix.\n\n")
cat("All sequences of same length:", nd[2], "\n")
TAIL <- "\n\n"
cat("\nLabels:", paste(nms, collapse = " "), TAIL)
} else {
cat("1 DNA sequence in binary format stored in a vector.\n\n")
- cat("Sequence length:", length(object), "\n\n")
+ cat("Sequence length:", length(x), "\n\n")
}
cat("Base composition:\n")
- print(round(base.freq(object), digits))
+ print(round(base.freq(x), digits))
}
as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")
-## read.dna.R (2008-07-03)
+## read.dna.R (2010-05-17)
## Read DNA Sequences in a File
-## Copyright 2003-2008 Emmanuel Paradis
+## Copyright 2003-2010 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
read.dna <- function(file, format = "interleaved", skip = 0,
nlines = 0, comment.char = "#", seq.names = NULL,
- as.character = FALSE)
+ as.character = FALSE, as.matrix = NULL)
{
getTaxaNames <- function(x) {
x <- sub("^['\" ]+", "", x) # remove the leading quotes and spaces
} else {
names(obj) <- seq.names
obj <- lapply(obj, tolower)
+ LENGTHS <- unique(unlist(lapply(obj, length)))
+ allSameLength <- length(LENGTHS) == 1
+ if (is.logical(as.matrix) && as.matrix && !allSameLength)
+ stop("sequences in FASTA file not of the same length")
+ if (is.null(as.matrix) && allSameLength)
+ as.matrix <- TRUE
+ if (as.matrix) {
+ obj <- matrix(unlist(obj), ncol = LENGTHS, byrow = TRUE)
+ rownames(obj) <- seq.names
+ }
}
if (!as.character) obj <- as.DNAbin(obj)
obj
\name{DNAbin}
\alias{DNAbin}
\alias{print.DNAbin}
-\alias{summary.DNAbin}
\alias{[.DNAbin}
\alias{rbind.DNAbin}
\alias{cbind.DNAbin}
\alias{as.matrix.DNAbin}
\alias{c.DNAbin}
+\alias{labels.DNAbin}
\title{Manipulate DNA Sequences in Bit-Level Format}
\description{
These functions help to manipulate DNA sequences coded in the
bit-level coding scheme.
}
\usage{
-\method{print}{DNAbin}(x, \dots)
-\method{summary}{DNAbin}(object, printlen = 6, digits = 3, \dots)
+\method{print}{DNAbin}(x, printlen = 6, digits = 3, \dots)
\method{rbind}{DNAbin}(\dots)
\method{cbind}{DNAbin}(\dots, check.names = TRUE, fill.with.gaps = FALSE,
quiet = FALSE)
-\method{[}{DNAbin}(x, i, j, drop = TRUE)
+\method{[}{DNAbin}(x, i, j, drop = FALSE)
\method{as.matrix}{DNAbin}(x, \dots)
\method{c}{DNAbin}(\dots, recursive = FALSE)
+\method{labels}{DNAbin}(object, \dots)
}
\arguments{
\item{x, object}{an object of class \code{"DNAbin"}.}
\item{\dots}{either further arguments to be passed to or from other
- methods in the case of \code{print}, \code{summary}, and
- \code{as.matrix}, or a series of objects of class \code{"DNAbin"} in
- the case of \code{rbind}, \code{cbind}, and \code{c}.}
+ methods in the case of \code{print}, \code{as.matrix}, and
+ \code{labels}, or a series of objects of class \code{"DNAbin"} in the
+ case of \code{rbind}, \code{cbind}, and \code{c}.}
\item{printlen}{the number of labels to print (6 by default).}
\item{digits}{the number of digits to print (3 by default).}
\item{check.names}{a logical specifying whether to check the rownames
\item{i, j}{indices of the rows and/or columns to select or to drop.
They may be numeric, logical, or character (in the same way than for
standard R objects).}
- \item{drop}{logical; if \code{TRUE} (the default), the returned object
- is of the lowest possible dimension.}
+ \item{drop}{logical; if \code{TRUE}, the returned object is of the
+ lowest possible dimension.}
\item{recursive}{for compatibility with the generic (unused).}
}
\details{
DNA sequences stored as objects of class \code{"DNAbin"}. They are
used in the same way than the standard R functions to manipulate
vectors, matrices, and lists. Additionally, the operators \code{[[}
- and \code{$} may be used to extract a vector from a list.
+ and \code{$} may be used to extract a vector from a list. Note that
+ the default of \code{drop} is not the same than the generic operator:
+ this is to avoid dropping rownames when selecting a single sequence.
These functions are provided to manipulate easily DNA sequences coded
with the bit-level coding scheme. The latter allows much faster
\usage{
read.dna(file, format = "interleaved", skip = 0,
nlines = 0, comment.char = "#", seq.names = NULL,
- as.character = FALSE)
+ as.character = FALSE, as.matrix = NULL)
}
\arguments{
\item{file}{a file name specified by either a variable of mode character,
names read in the file are used.}
\item{as.character}{a logical controlling whether to return the
sequences as an object of class \code{"DNAbin"} (the default).}
+ \item{as.matrix}{(used if \code{format = "fasta"}) one of the three
+ followings: (i) \code{NULL}: returns the sequences in a matrix if
+ they are of the same length, otherwise in a list; (ii) \code{TRUE}:
+ returns the sequences in a matrix, or stops with an error if they
+ are of different lengths; (iii) \code{FALSE}: always returns the
+ sequences in a list.}
}
\description{
This function reads DNA sequences in a file, and returns a matrix or a