From f820fadc890587f8a3499f96b07b99fa80fc62ab Mon Sep 17 00:00:00 2001 From: paradis Date: Thu, 9 Oct 2008 08:39:56 +0000 Subject: [PATCH] improved cbind.DNAbin for ape 2.2-2 git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@53 6e262413-ae40-0410-9e79-b911bd7a66b7 --- ChangeLog | 7 +++++-- R/DNA.R | 46 ++++++++++++++++++++++++++++++++++------------ man/DNAbin.Rd | 30 ++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index a3923a6..e2fc4ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,9 @@ NEW FEATURES o dist.gene() has been substantially improved and gains an option 'pairwise.deletion'. + o cbind.DNAbin() has a new option 'fill.with.gaps' and is now + more flexible. + BUG FIXES @@ -28,8 +31,8 @@ OTHER CHANGES o phymltest() has been updated for PhyML 3.0 and gains an option 'append', whereas the option 'path2exec' has been removed. - o rbind.DNAbin() now accepts a single matrix which is returned - unchanged instead of an error. + o rbind.DNAbin() and cbind.DNAbin() now accept a single matrix + which is returned unchanged (instead of an error). o The data sets bird.orders and bird.families are now stored as Newick strings; i.e., the command data(bird.orders) calls diff --git a/R/DNA.R b/R/DNA.R index 9bcc285..7bc0431 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -96,27 +96,49 @@ rbind.DNAbin <- function(...) structure(obj[[1]], class = "DNAbin") } -cbind.DNAbin <- function(..., check.names = TRUE, fill.with.gaps = FALSE, - quiet = TRUE) +cbind.DNAbin <- + function(..., check.names = TRUE, fill.with.gaps = FALSE, + quiet = FALSE) ### works only with matrices for the moment { obj <- list(...) n <- length(obj) if (n == 1) return(obj[[1]]) NR <- unlist(lapply(obj, nrow)) - if (length(unique(NR)) > 1) - stop("matrices do not have the same number of rows.") for (i in 1:n) class(obj[[i]]) <- NULL - nms <- rownames(obj[[1]]) if (check.names) { - for (i in 2:n) - if (all(rownames(obj[[i]]) %in% nms)) - obj[[i]] <- obj[[i]][nms, ] - else stop("rownames do not match among matrices.") + nms <- unlist(lapply(obj, rownames)) + if (fill.with.gaps) { + NC <- unlist(lapply(obj, ncol)) + nms <- unique(nms) + ans <- matrix(as.raw(4), length(nms), sum(NC)) + rownames(ans) <- nms + from <- 1 + for (i in 1:n) { + to <- from + NC[i] - 1 + tmp <- rownames(obj[[i]]) + nmsi <- tmp[tmp %in% nms] + ans[nmsi, from:to] <- obj[[i]][nmsi, , drop = FALSE] + from <- to + 1 + } + } else { + tab <- table(nms) + ubi <- tab == n + nms <- names(tab)[which(ubi)] + ans <- obj[[1]][nms, , drop = FALSE] + for (i in 2:n) + ans <- cbind(ans, obj[[i]][nms, , drop = FALSE]) + if (!quiet && !all(ubi)) + warning("some rows were dropped.") + } + } else { + if (length(unique(NR)) > 1) + stop("matrices do not have the same number of rows.") + ans <- matrix(unlist(obj), NR) + rownames(ans) <- rownames(obj[[1]]) } - ans <- matrix(unlist(obj), NR) - rownames(ans) <- nms - structure(ans, class = "DNAbin") + class(ans) <- "DNAbin" + ans } print.DNAbin <- function(x, ...) diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd index ca1b23e..87ec965 100644 --- a/man/DNAbin.Rd +++ b/man/DNAbin.Rd @@ -15,7 +15,8 @@ \method{print}{DNAbin}(x, \dots) \method{summary}{DNAbin}(object, printlen = 6, digits = 3, \dots) \method{rbind}{DNAbin}(\dots) -\method{cbind}{DNAbin}(\dots, check.names = TRUE) +\method{cbind}{DNAbin}(\dots, check.names = TRUE, fill.with.gaps = FALSE, + quiet = FALSE) \method{[}{DNAbin}(x, i, j, drop = TRUE) \method{as.matrix}{DNAbin}(x, \dots) } @@ -29,6 +30,12 @@ \item{digits}{the number of digits to print (3 by default).} \item{check.names}{a logical specifying whether to check the rownames before binding the columns (see details).} + \item{fill.with.gaps}{a logical indicating whether to keep all + possible individuals as indicating by the rownames, and eventually + filling the missing data with insertion gaps (ignored if + \code{check.names = FALSE}).} + \item{quiet}{a logical to switch off warning messages when some rows + are dropped.} \item{i, j}{indices of the rows and/or columns to select or to drop. They may be numeric, logical, or character (in the same way than for standard R objects).} @@ -47,11 +54,14 @@ comparisons of sequences, as well as storing them in less memory compared to the format used before \pkg{ape} 1.10. - For \code{cbind}, if \code{"check.names = TRUE"}, the rownames of each - matrix are checked, and the rows are reordered if necessary. If the - rownames differ among matrices, an error occurs. If - \code{"check.names = FALSE"}, the matrices are simply binded and the - rownames of the first matrix are used. + For \code{cbind}, the default behaviour is to keep only individuals + (as indicated by the rownames) for which there are no missing data. If + \code{fill.with.gaps = TRUE}, a `complete' matrix is returned, + enventually with insertion gaps as missing data. If \code{check.names + = TRUE} (the default), the rownames of each matrix are checked, and + the rows are reordered if necessary. If \code{check.names = FALSE}, + the matrices must all have the same number of rows, and are simply + binded; the rownames of the first matrix are used. See the examples. \code{as.matrix} may be used to convert DNA sequences (of the same length) stored in a list into a matrix while keeping the names and the @@ -82,5 +92,13 @@ summary(woodmouse[1:5, 1:300], 15, 6) ### Just to show how distances could be influenced by sampling: dist.dna(woodmouse[1:2, ]) dist.dna(woodmouse[1:3, ]) +### cbind and its options: +x <- woodmouse[1:2, 1:5] +y <- woodmouse[2:4, 6:10] +as.character(cbind(x, y)) # gives warning +as.character(cbind(x, y, fill.with.gaps = TRUE)) +\dontrun{ +as.character(cbind(x, y, check.names = FALSE)) # gives an error +} } \keyword{manip} -- 2.39.2