From dfabbb4721bc499ee9e5ee9aadcfd59ff3d4f223 Mon Sep 17 00:00:00 2001 From: paradis Date: Mon, 17 May 2010 08:38:00 +0000 Subject: [PATCH] various changes to DNAbin functions + new labels.DNAbin() git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@121 6e262413-ae40-0410-9e79-b911bd7a66b7 --- ChangeLog | 11 +++++++++++ DESCRIPTION | 2 +- R/DNA.R | 42 ++++++++++++++++++++---------------------- R/read.dna.R | 16 +++++++++++++--- man/DNAbin.Rd | 22 ++++++++++++---------- man/read.dna.Rd | 8 +++++++- 6 files changed, 64 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index aa93c53..229acbe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,11 @@ NEW FEATURES o There is now a print method for results from ace(). + o There is a labels() method for objects of class "DNAbin". + + o read.dna() has a new option 'as.matrix' to possibly force sequences + in a FASTA file to be stored in a matrix (see ?read.dna for details). + BUG FIXES @@ -27,6 +32,12 @@ OTHER CHANGES o nj() has been improved and is now about 30% faster. + o The default option 'drop' of [.DNAbin has been changed to FALSE to + avoid dropping rownames when selecting a single sequence. + + o print.DNAbin() has been changed to summary.DNAbin() which has been + removed. + CHANGES IN APE VERSION 2.5-1 diff --git a/DESCRIPTION b/DESCRIPTION index 7f0b445..ebf8a80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.5-2 -Date: 2010-05-14 +Date: 2010-05-17 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/DNA.R b/R/DNA.R index e7c0e3a..dea2d60 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2010-03-16) +## DNA.R (2010-05-17) ## Manipulations and Comparisons of DNA Sequences @@ -7,6 +7,13 @@ ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. +labels.DNAbin <- function(object, ...) +{ + if (is.list(object)) return(names(object)) + if (is.matrix(object)) return(rownames(object)) + NULL +} + del.gaps <- function(x) { deleteGaps <- function(x) { @@ -49,7 +56,7 @@ as.alignment <- function(x) obj } -"[.DNAbin" <- function(x, i, j, drop = TRUE) +"[.DNAbin" <- function(x, i, j, drop = FALSE) { oc <- oldClass(x) class(x) <- NULL @@ -156,27 +163,18 @@ c.DNAbin <- function(..., recursive = FALSE) structure(NextMethod("c"), class = "DNAbin") } -print.DNAbin <- function(x, ...) -{ - n <- 1 # <- if is.vector(x) - if (is.list(x)) n <- length(x) - else if (is.matrix(x)) n <- dim(x)[1] - if (n > 1) cat(n, "DNA sequences in binary format.\n") - else cat("1 DNA sequence in binary format.\n") -} - -summary.DNAbin <- function(object, printlen = 6, digits = 3, ...) +print.DNAbin <- function(x, printlen = 6, digits = 3, ...) { - if (is.list(object)) { - n <- length(object) - nms <- names(object) + if (is.list(x)) { + n <- length(x) + nms <- names(x) if (n == 1) { cat("1 DNA sequence in binary format stored in a list.\n\n") - cat("Sequence length:", length(object[[1]]), "\n\n") + cat("Sequence length:", length(x[[1]]), "\n\n") cat("Label:", nms, "\n\n") } else { cat(n, "DNA sequences in binary format stored in a list.\n\n") - tmp <- unlist(lapply(object, length)) + tmp <- unlist(lapply(x, length)) mini <- min(tmp) maxi <- max(tmp) if (mini == maxi) @@ -193,9 +191,9 @@ summary.DNAbin <- function(object, printlen = 6, digits = 3, ...) } cat("\nLabels:", paste(nms, collapse = " "), TAIL) } - } else if (is.matrix(object)) { - nd <- dim(object) - nms <- rownames(object) + } else if (is.matrix(x)) { + nd <- dim(x) + nms <- rownames(x) cat(nd[1], "DNA sequences in binary format stored in a matrix.\n\n") cat("All sequences of same length:", nd[2], "\n") TAIL <- "\n\n" @@ -206,10 +204,10 @@ summary.DNAbin <- function(object, printlen = 6, digits = 3, ...) cat("\nLabels:", paste(nms, collapse = " "), TAIL) } else { cat("1 DNA sequence in binary format stored in a vector.\n\n") - cat("Sequence length:", length(object), "\n\n") + cat("Sequence length:", length(x), "\n\n") } cat("Base composition:\n") - print(round(base.freq(object), digits)) + print(round(base.freq(x), digits)) } as.DNAbin <- function(x, ...) UseMethod("as.DNAbin") diff --git a/R/read.dna.R b/R/read.dna.R index 8117906..98ebd65 100644 --- a/R/read.dna.R +++ b/R/read.dna.R @@ -1,15 +1,15 @@ -## read.dna.R (2008-07-03) +## read.dna.R (2010-05-17) ## Read DNA Sequences in a File -## Copyright 2003-2008 Emmanuel Paradis +## Copyright 2003-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. read.dna <- function(file, format = "interleaved", skip = 0, nlines = 0, comment.char = "#", seq.names = NULL, - as.character = FALSE) + as.character = FALSE, as.matrix = NULL) { getTaxaNames <- function(x) { x <- sub("^['\" ]+", "", x) # remove the leading quotes and spaces @@ -105,6 +105,16 @@ read.dna <- function(file, format = "interleaved", skip = 0, } else { names(obj) <- seq.names obj <- lapply(obj, tolower) + LENGTHS <- unique(unlist(lapply(obj, length))) + allSameLength <- length(LENGTHS) == 1 + if (is.logical(as.matrix) && as.matrix && !allSameLength) + stop("sequences in FASTA file not of the same length") + if (is.null(as.matrix) && allSameLength) + as.matrix <- TRUE + if (as.matrix) { + obj <- matrix(unlist(obj), ncol = LENGTHS, byrow = TRUE) + rownames(obj) <- seq.names + } } if (!as.character) obj <- as.DNAbin(obj) obj diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd index 701fab6..1b0ad70 100644 --- a/man/DNAbin.Rd +++ b/man/DNAbin.Rd @@ -1,33 +1,33 @@ \name{DNAbin} \alias{DNAbin} \alias{print.DNAbin} -\alias{summary.DNAbin} \alias{[.DNAbin} \alias{rbind.DNAbin} \alias{cbind.DNAbin} \alias{as.matrix.DNAbin} \alias{c.DNAbin} +\alias{labels.DNAbin} \title{Manipulate DNA Sequences in Bit-Level Format} \description{ These functions help to manipulate DNA sequences coded in the bit-level coding scheme. } \usage{ -\method{print}{DNAbin}(x, \dots) -\method{summary}{DNAbin}(object, printlen = 6, digits = 3, \dots) +\method{print}{DNAbin}(x, printlen = 6, digits = 3, \dots) \method{rbind}{DNAbin}(\dots) \method{cbind}{DNAbin}(\dots, check.names = TRUE, fill.with.gaps = FALSE, quiet = FALSE) -\method{[}{DNAbin}(x, i, j, drop = TRUE) +\method{[}{DNAbin}(x, i, j, drop = FALSE) \method{as.matrix}{DNAbin}(x, \dots) \method{c}{DNAbin}(\dots, recursive = FALSE) +\method{labels}{DNAbin}(object, \dots) } \arguments{ \item{x, object}{an object of class \code{"DNAbin"}.} \item{\dots}{either further arguments to be passed to or from other - methods in the case of \code{print}, \code{summary}, and - \code{as.matrix}, or a series of objects of class \code{"DNAbin"} in - the case of \code{rbind}, \code{cbind}, and \code{c}.} + methods in the case of \code{print}, \code{as.matrix}, and + \code{labels}, or a series of objects of class \code{"DNAbin"} in the + case of \code{rbind}, \code{cbind}, and \code{c}.} \item{printlen}{the number of labels to print (6 by default).} \item{digits}{the number of digits to print (3 by default).} \item{check.names}{a logical specifying whether to check the rownames @@ -41,8 +41,8 @@ \item{i, j}{indices of the rows and/or columns to select or to drop. They may be numeric, logical, or character (in the same way than for standard R objects).} - \item{drop}{logical; if \code{TRUE} (the default), the returned object - is of the lowest possible dimension.} + \item{drop}{logical; if \code{TRUE}, the returned object is of the + lowest possible dimension.} \item{recursive}{for compatibility with the generic (unused).} } \details{ @@ -50,7 +50,9 @@ DNA sequences stored as objects of class \code{"DNAbin"}. They are used in the same way than the standard R functions to manipulate vectors, matrices, and lists. Additionally, the operators \code{[[} - and \code{$} may be used to extract a vector from a list. + and \code{$} may be used to extract a vector from a list. Note that + the default of \code{drop} is not the same than the generic operator: + this is to avoid dropping rownames when selecting a single sequence. These functions are provided to manipulate easily DNA sequences coded with the bit-level coding scheme. The latter allows much faster diff --git a/man/read.dna.Rd b/man/read.dna.Rd index 4c02232..3f35031 100644 --- a/man/read.dna.Rd +++ b/man/read.dna.Rd @@ -4,7 +4,7 @@ \usage{ read.dna(file, format = "interleaved", skip = 0, nlines = 0, comment.char = "#", seq.names = NULL, - as.character = FALSE) + as.character = FALSE, as.matrix = NULL) } \arguments{ \item{file}{a file name specified by either a variable of mode character, @@ -23,6 +23,12 @@ read.dna(file, format = "interleaved", skip = 0, names read in the file are used.} \item{as.character}{a logical controlling whether to return the sequences as an object of class \code{"DNAbin"} (the default).} + \item{as.matrix}{(used if \code{format = "fasta"}) one of the three + followings: (i) \code{NULL}: returns the sequences in a matrix if + they are of the same length, otherwise in a list; (ii) \code{TRUE}: + returns the sequences in a matrix, or stops with an error if they + are of different lengths; (iii) \code{FALSE}: always returns the + sequences in a list.} } \description{ This function reads DNA sequences in a file, and returns a matrix or a -- 2.39.2