From: paradis Date: Wed, 16 Mar 2011 02:47:44 +0000 (+0000) Subject: new image.DNAbin() X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=8fa54a671f763f10f68bfe660b6a5949123d3d41 new image.DNAbin() git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@149 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/ChangeLog b/ChangeLog index e163a85..32f99c1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ NEW FEATURES + o There is a new image() method for "DNAbin" objects: it plots DNA + alignments in a flexible and efficient way. + o base.freq() gains an option 'all' to count all the possible bases including the ambiguous ones (defaults to FALSE). @@ -30,7 +33,7 @@ OTHER CHANGES o A number of functions in src/tree_build.c have been modified. o The matching representation has now only two columns as the third - column was repetitive. + column was redundant. diff --git a/DESCRIPTION b/DESCRIPTION index e364002..97b7a0b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.6-4 -Date: 2011-03-02 +Date: 2011-03-16 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/DNA.R b/R/DNA.R index e71f142..bec37d3 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2011-02-18) +## DNA.R (2011-03-15) ## Manipulations and Comparisons of DNA Sequences @@ -226,9 +226,11 @@ print.DNAbin <- function(x, printlen = 6, digits = 3, ...) as.DNAbin <- function(x, ...) UseMethod("as.DNAbin") -._cs_<- letters[c(1, 7, 3, 20, 18, 13, 23, 19, 11, 25, 22, 8, 4, 2, 14)] +._cs_ <- c("a", "g", "c", "t", "r", "m", "w", "s", "k", + "y", "v", "h", "d", "b", "n", "-", "?") -._bs_<- c(136, 72, 40, 24, 192, 160, 144, 96, 80, 48, 224, 176, 208, 112, 240) +._bs_ <- c(136, 72, 40, 24, 192, 160, 144, 96, 80, + 48, 224, 176, 208, 112, 240, 4, 2) as.DNAbin.character <- function(x, ...) { @@ -403,3 +405,53 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE, if (variance) attr(d, "variance") <- var d } + +image.DNAbin <- function(x, what, col, bg = "white", xlab = "", ylab = "", + show.labels = TRUE, cex.lab = 1, legend = TRUE, ...) +{ + what <- + if (missing(what)) c("a", "g", "c", "t", "n", "-") else tolower(what) + if (missing(col)) + col <- c("red", "yellow", "green", "blue", "grey", "black") + n <- (dx <- dim(x))[1] # number of sequences + s <- dx[2] # number of sites + y <- integer(N <- length(x)) + ncl <- length(what) + col <- rep(col, length.out = ncl) + sm <- 0L + for (i in ncl:1) { + k <- ._bs_[._cs_ == what[i]] + sel <- which(x == k) + if (ll <- length(sel)) { + y[sel] <- i + sm <- sm + ll + } else { + what <- what[-i] + col <- col[-i] + } + } + dim(y) <- dx + ## if there's no 0 in y, must drop 'bg' from the cols passed to image: + if (sm == N) { + leg.co <- co <- col + leg.txt <- toupper(what) + } else { + co <- c(bg, col) + leg.txt <- c(toupper(what), "others") + leg.co <- c(col, bg) + } + yaxt <- if (show.labels) "n" else "s" + image(1:s, 1:n, t(y), col = co, xlab = xlab, + ylab = ylab, yaxt = yaxt, ...) + if (show.labels) + mtext(rownames(x), side = 2, line = 0.1, at = 1:n, + cex = cex.lab, adj = 1, las = 1) + if (legend) { + psr <- par("usr") + xx <- psr[2]/2 + yy <- psr[4] * (0.5 + 0.5/par("plt")[4]) + legend(xx, yy, legend = leg.txt, pch = 22, pt.bg = leg.co, + pt.cex = 2, bty = "n", xjust = 0.5, yjust = 0.5, + horiz = TRUE, xpd = TRUE) + } +} diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd index 18d112a..8892853 100644 --- a/man/DNAbin.Rd +++ b/man/DNAbin.Rd @@ -85,7 +85,8 @@ \author{Emmanuel Paradis} \seealso{ \code{\link{as.DNAbin}}, \code{\link{read.dna}}, - \code{\link{read.GenBank}}, \code{\link{write.dna}} + \code{\link{read.GenBank}}, \code{\link{write.dna}}, , + \code{\link{image.DNAbin}} The corresponding generic functions are documented in the package \pkg{base}. diff --git a/man/boot.phylo.Rd b/man/boot.phylo.Rd index f0a650b..448a4d8 100644 --- a/man/boot.phylo.Rd +++ b/man/boot.phylo.Rd @@ -44,7 +44,9 @@ prop.clades(phy, ..., part = NULL) These functions analyse bipartitions found in a series of trees. \code{prop.part} counts the number of bipartitions found in a series - of trees given as \code{\dots}. + of trees given as \code{\dots}. If a single tree is passed, the + returned object is a list of vectors with the tips descending from + each node (i.e., clade compositions indexed by node number). \code{prop.clades} counts the number of times the bipartitions present in \code{phy} are present in a series of trees given as \code{\dots} or diff --git a/man/del.gaps.Rd b/man/del.gaps.Rd index d25d7ba..d4ef6a3 100644 --- a/man/del.gaps.Rd +++ b/man/del.gaps.Rd @@ -26,6 +26,7 @@ del.gaps(x) \author{Emmanuel Paradis} \seealso{ \code{\link{base.freq}}, \code{\link{GC.content}}, - \code{\link{theta.s}}, \code{\link{nuc.div}}, \code{\link{seg.sites}} + \code{\link{theta.s}}, \code{\link{nuc.div}}, \code{\link{seg.sites}}, + \code{\link{image.DNAbin}} } \keyword{univar} diff --git a/man/image.DNAbin.Rd b/man/image.DNAbin.Rd new file mode 100644 index 0000000..7ad62df --- /dev/null +++ b/man/image.DNAbin.Rd @@ -0,0 +1,64 @@ +\name{image.DNAbin} +\alias{image.DNAbin} +\title{Plot of DNA Sequence Alignement} +\description{ + This function plots an image of an alignment of nucleotide sequences. +} +\usage{ +\method{image}{DNAbin}(x, what, col, bg = "white", xlab = "", ylab = "", + show.labels = TRUE, cex.lab = 1, legend = TRUE, ...) +} +\arguments{ + \item{x}{a matrix of DNA sequences (class \code{"DNAbin"}).} + \item{what}{a vector of characters specifying the bases to + visualize. If missing, this is set to ``a'', ``g'', ``c'', ``t'', + ``n'', and ``-'' (in this order).} + \item{col}{a vector of colours. If missing, this is set to ``red'', + ``yellow'', ``green'', ``blue'', ``grey'', and ``black''. If it is + shorter (or longer) than \code{what}, it is recycled (or shortened).} + \item{bg}{the colour used for nucleotides whose base is not among + \code{what}.} + \item{xlab}{the label for the \emph{x}-axis; none by default.} + \item{ylab}{Idem for the \emph{y}-axis. Note that by default, the + labels of the sequences are printed on the \emph{y}-axis (see next option).} + \item{show.labels}{a logical controlling whether the sequence labels + are printed (\code{TRUE} by default).} + \item{cex.lab}{a single numeric controlling the size of the sequence labels. + Use \code{cex.axis} to control the size of the annotations on the \emph{x}-axis.} + \item{legend}{a logical controlling whether the legend is plotted + (\code{TRUE} by default).} + \item{\dots}{further arguments passed to + \code{\link[graphics]{image.default}} (e.g., \code{cex.axis}).} +} +\details{ + The idea of this function is to allow fleixble plotting and colouring + of a nucleotide alignment. By default, the most common bases (a, g, c, + t, and n) and alignment gap are plotted using a standard colour + scheme. + + It is possible to plot only one base specified as \code{what} with a + chosen colour: this might be useful to check, for instance, the + distribution of alignment gaps (\code{image(x, "-")}) or missing data + (see examples). +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{DNAbin}}, \code{\link{del.gaps}}, \code{\link{clustal}} +} +\examples{ +data(woodmouse) +image(woodmouse) +image(woodmouse, "n", "blue") # show missing data +image(woodmouse, c("g", "c"), "green") # G+C +par(mfcol = c(2, 2)) +### barcoding style: +for (x in c("a", "g", "c", "t")) + image(woodmouse, x, "black", cex.lab = 0.5, cex.axis = 0.7) +par(mfcol = c(1, 1)) +### zoom on a portion of the data: +image(woodmouse[11:15, 1:50], c("a", "n"), c("blue", "grey")) +### see the guanines on a black background: +image(woodmouse, "g", "yellow", "black") +} +\keyword{hplot} +