various changes to DNAbin functions + new labels.DNAbin()

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)
diff --git a/ChangeLog b/ChangeLog

index aa93c53dcbea22fccc55b7a439a96f91c28381f2..229acbec29ae826afeb53fdf749dcc7e8540fe84 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,6 +5,11 @@ NEW FEATURES
  
      o There is now a print method for results from ace().
  
+    o There is a labels() method for objects of class "DNAbin".
+
+    o read.dna() has a new option 'as.matrix' to possibly force sequences
+      in a FASTA file to be stored in a matrix (see ?read.dna for details).
+
  
  BUG FIXES
  
@@ -27,6 +32,12 @@ OTHER CHANGES
  
      o nj() has been improved and is now about 30% faster.
  
+    o The default option 'drop' of [.DNAbin has been changed to FALSE to
+      avoid dropping rownames when selecting a single sequence.
+
+    o print.DNAbin() has been changed to summary.DNAbin() which has been
+      removed.
+
  
  
                 CHANGES IN APE VERSION 2.5-1
diff --git a/DESCRIPTION b/DESCRIPTION

index 7f0b4455b3ac9bdcb955ca21b276eef0160723e4..ebf8a808681095fe8bfdd17a4062a60c1a10a07c 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
  Version: 2.5-2
-Date: 2010-05-14
+Date: 2010-05-17
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/R/DNA.R b/R/DNA.R

index e7c0e3a85d74a51518b2a12b1a6593614877d6dc..dea2d60ddffbb907e575fa2596de76b83ab7ef48 100644 (file)
--- a/R/DNA.R
+++ b/R/DNA.R
@@ -1,4 +1,4 @@
-## DNA.R (2010-03-16)
+## DNA.R (2010-05-17)
  
  ##   Manipulations and Comparisons of DNA Sequences
  
@@ -7,6 +7,13 @@
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
+labels.DNAbin <- function(object, ...)
+{
+    if (is.list(object)) return(names(object))
+    if (is.matrix(object)) return(rownames(object))
+    NULL
+}
+
  del.gaps <- function(x)
  {
      deleteGaps <- function(x) {
@@ -49,7 +56,7 @@ as.alignment <- function(x)
      obj
  }
  
-"[.DNAbin" <- function(x, i, j, drop = TRUE)
+"[.DNAbin" <- function(x, i, j, drop = FALSE)
  {
      oc <- oldClass(x)
      class(x) <- NULL
@@ -156,27 +163,18 @@ c.DNAbin <- function(..., recursive = FALSE)
      structure(NextMethod("c"), class = "DNAbin")
  }
  
-print.DNAbin <- function(x, ...)
-{
-    n <- 1 # <- if is.vector(x)
-    if (is.list(x)) n <- length(x)
-    else if (is.matrix(x)) n <- dim(x)[1]
-    if (n > 1) cat(n, "DNA sequences in binary format.\n")
-    else cat("1 DNA sequence in binary format.\n")
-}
-
-summary.DNAbin <- function(object, printlen = 6, digits = 3, ...)
+print.DNAbin <- function(x, printlen = 6, digits = 3, ...)
  {
-    if (is.list(object)) {
-        n <- length(object)
-        nms <- names(object)
+    if (is.list(x)) {
+        n <- length(x)
+        nms <- names(x)
          if (n == 1) {
              cat("1 DNA sequence in binary format stored in a list.\n\n")
-            cat("Sequence length:", length(object[[1]]), "\n\n")
+            cat("Sequence length:", length(x[[1]]), "\n\n")
              cat("Label:", nms, "\n\n")
          } else {
              cat(n, "DNA sequences in binary format stored in a list.\n\n")
-            tmp <- unlist(lapply(object, length))
+            tmp <- unlist(lapply(x, length))
              mini <- min(tmp)
              maxi <- max(tmp)
              if (mini == maxi)
@@ -193,9 +191,9 @@ summary.DNAbin <- function(object, printlen = 6, digits = 3, ...)
              }
              cat("\nLabels:", paste(nms, collapse = " "), TAIL)
          }
-    } else if (is.matrix(object)) {
-        nd <- dim(object)
-        nms <- rownames(object)
+    } else if (is.matrix(x)) {
+        nd <- dim(x)
+        nms <- rownames(x)
          cat(nd[1], "DNA sequences in binary format stored in a matrix.\n\n")
          cat("All sequences of same length:", nd[2], "\n")
          TAIL <- "\n\n"
@@ -206,10 +204,10 @@ summary.DNAbin <- function(object, printlen = 6, digits = 3, ...)
          cat("\nLabels:", paste(nms, collapse = " "), TAIL)
      } else {
          cat("1 DNA sequence in binary format stored in a vector.\n\n")
-        cat("Sequence length:", length(object), "\n\n")
+        cat("Sequence length:", length(x), "\n\n")
      }
      cat("Base composition:\n")
-    print(round(base.freq(object), digits))
+    print(round(base.freq(x), digits))
  }
  
  as.DNAbin <- function(x, ...) UseMethod("as.DNAbin")
diff --git a/R/read.dna.R b/R/read.dna.R

index 8117906a5bdc54cdc9901f78b21f7b2f8b8e386b..98ebd65ca71c67e2812a50dec6bf118e3b023f5e 100644 (file)
--- a/R/read.dna.R
+++ b/R/read.dna.R
@@ -1,15 +1,15 @@
-## read.dna.R (2008-07-03)
+## read.dna.R (2010-05-17)
  
  ##   Read DNA Sequences in a File
  
-## Copyright 2003-2008 Emmanuel Paradis
+## Copyright 2003-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
  read.dna <- function(file, format = "interleaved", skip = 0,
                       nlines = 0, comment.char = "#", seq.names = NULL,
-                     as.character = FALSE)
+                     as.character = FALSE, as.matrix = NULL)
  {
      getTaxaNames <- function(x) {
          x <- sub("^['\" ]+", "", x) # remove the leading quotes and spaces
@@ -105,6 +105,16 @@ read.dna <- function(file, format = "interleaved", skip = 0,
      } else {
          names(obj) <- seq.names
          obj <- lapply(obj, tolower)
+        LENGTHS <- unique(unlist(lapply(obj, length)))
+        allSameLength <- length(LENGTHS) == 1
+        if (is.logical(as.matrix) && as.matrix && !allSameLength)
+            stop("sequences in FASTA file not of the same length")
+        if (is.null(as.matrix) && allSameLength)
+            as.matrix <- TRUE
+        if (as.matrix) {
+            obj <- matrix(unlist(obj), ncol = LENGTHS, byrow = TRUE)
+            rownames(obj) <- seq.names
+        }
      }
      if (!as.character) obj <- as.DNAbin(obj)
      obj
diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd

index 701fab66d213e039827b15171452ba03361382a8..1b0ad705bccbc7c3a31da72528d462719bec7a62 100644 (file)
--- a/man/DNAbin.Rd
+++ b/man/DNAbin.Rd
@@ -1,33 +1,33 @@
  \name{DNAbin}
  \alias{DNAbin}
  \alias{print.DNAbin}
-\alias{summary.DNAbin}
  \alias{[.DNAbin}
  \alias{rbind.DNAbin}
  \alias{cbind.DNAbin}
  \alias{as.matrix.DNAbin}
  \alias{c.DNAbin}
+\alias{labels.DNAbin}
  \title{Manipulate DNA Sequences in Bit-Level Format}
  \description{
    These functions help to manipulate DNA sequences coded in the
    bit-level coding scheme.
  }
  \usage{
-\method{print}{DNAbin}(x, \dots)
-\method{summary}{DNAbin}(object, printlen = 6, digits = 3, \dots)
+\method{print}{DNAbin}(x, printlen = 6, digits = 3, \dots)
  \method{rbind}{DNAbin}(\dots)
  \method{cbind}{DNAbin}(\dots, check.names = TRUE, fill.with.gaps = FALSE,
               quiet = FALSE)
-\method{[}{DNAbin}(x, i, j, drop = TRUE)
+\method{[}{DNAbin}(x, i, j, drop = FALSE)
  \method{as.matrix}{DNAbin}(x, \dots)
  \method{c}{DNAbin}(\dots, recursive = FALSE)
+\method{labels}{DNAbin}(object, \dots)
  }
  \arguments{
    \item{x, object}{an object of class \code{"DNAbin"}.}
    \item{\dots}{either further arguments to be passed to or from other
-    methods in the case of \code{print}, \code{summary}, and
-    \code{as.matrix}, or a series of objects of class \code{"DNAbin"} in
-    the case of \code{rbind}, \code{cbind}, and \code{c}.}
+    methods in the case of \code{print}, \code{as.matrix}, and
+    \code{labels}, or a series of objects of class \code{"DNAbin"} in the
+    case of \code{rbind}, \code{cbind}, and \code{c}.}
    \item{printlen}{the number of labels to print (6 by default).}
    \item{digits}{the number of digits to print (3 by default).}
    \item{check.names}{a logical specifying whether to check the rownames
@@ -41,8 +41,8 @@
    \item{i, j}{indices of the rows and/or columns to select or to drop.
      They may be numeric, logical, or character (in the same way than for
      standard R objects).}
-  \item{drop}{logical; if \code{TRUE} (the default), the returned object
-    is of the lowest possible dimension.}
+  \item{drop}{logical; if \code{TRUE}, the returned object is of the
+    lowest possible dimension.}
    \item{recursive}{for compatibility with the generic (unused).}
  }
  \details{
@@ -50,7 +50,9 @@
    DNA sequences stored as objects of class \code{"DNAbin"}. They are
    used in the same way than the standard R functions to manipulate
    vectors, matrices, and lists. Additionally, the operators \code{[[}
-  and \code{$} may be used to extract a vector from a list.
+  and \code{$} may be used to extract a vector from a list. Note that
+  the default of \code{drop} is not the same than the generic operator:
+  this is to avoid dropping rownames when selecting a single sequence.
  
    These functions are provided to manipulate easily DNA sequences coded
    with the bit-level coding scheme. The latter allows much faster
diff --git a/man/read.dna.Rd b/man/read.dna.Rd

index 4c02232ba8d332708162c35d257fdac66066415b..3f350314155250032c3e5d461a1e4c13567bb337 100644 (file)
--- a/man/read.dna.Rd
+++ b/man/read.dna.Rd
@@ -4,7 +4,7 @@
  \usage{
  read.dna(file, format = "interleaved", skip = 0,
           nlines = 0, comment.char = "#", seq.names = NULL,
-         as.character = FALSE)
+         as.character = FALSE, as.matrix = NULL)
  }
  \arguments{
    \item{file}{a file name specified by either a variable of mode character,
@@ -23,6 +23,12 @@ read.dna(file, format = "interleaved", skip = 0,
      names read in the file are used.}
    \item{as.character}{a logical controlling whether to return the
      sequences as an object of class \code{"DNAbin"} (the default).}
+  \item{as.matrix}{(used if \code{format = "fasta"}) one of the three
+    followings: (i) \code{NULL}: returns the sequences in a matrix if
+    they are of the same length, otherwise in a list; (ii) \code{TRUE}:
+    returns the sequences in a matrix, or stops with an error if they
+    are of different lengths; (iii) \code{FALSE}: always returns the
+    sequences in a list.}
  }
  \description{
    This function reads DNA sequences in a file, and returns a matrix or a
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 17 May 2010 08:38:00 +0000 (08:38 +0000)
ChangeLog		patch \| blob \| history
DESCRIPTION		patch \| blob \| history
R/DNA.R		patch \| blob \| history
R/read.dna.R		patch \| blob \| history
man/DNAbin.Rd		patch \| blob \| history
man/read.dna.Rd		patch \| blob \| history