From: paradis Date: Wed, 16 Mar 2011 08:50:32 +0000 (+0000) Subject: a bunch of new stuff (see ChangeLog) X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=453ad4ce9e573998f28185d92c8d71367dd32f23 a bunch of new stuff (see ChangeLog) git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@150 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/ChangeLog b/ChangeLog index 32f99c1..a635615 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,11 +6,19 @@ NEW FEATURES o There is a new image() method for "DNAbin" objects: it plots DNA alignments in a flexible and efficient way. + o Two new functions as.network.phylo and as.igraph.phylo convert + trees of class "phylo" into these respective network classes + defined in the packages of the same names. + + o The three new functions clustal, muscle, and tcoffee perform + nucleotide sequence alignment by calling the external programs + of the same names. + o base.freq() gains an option 'all' to count all the possible bases including the ambiguous ones (defaults to FALSE). - o read.nexus() now writes tree names in the NEXUS file if given a list - of trees with names. + o read.nexus() now writes tree names in the NEXUS file if given a + list of trees with names. BUG FIXES diff --git a/R/DNA.R b/R/DNA.R index bec37d3..747117d 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2011-03-15) +## DNA.R (2011-03-16) ## Manipulations and Comparisons of DNA Sequences diff --git a/R/as.phylo.R b/R/as.phylo.R index e8d0f66..8ebd540 100644 --- a/R/as.phylo.R +++ b/R/as.phylo.R @@ -1,8 +1,8 @@ -## as.phylo.R (2010-12-15) +## as.phylo.R (2011-03-16) ## Conversion Among Tree Objects -## Copyright 2005-2010 Emmanuel Paradis +## Copyright 2005-2011 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -110,3 +110,24 @@ as.hclust.phylo <- function(x, ...) class(obj) <- "hclust" obj } + +as.network.phylo <- function(x, directed = is.rooted(x), ...) +{ + if (is.null(x$node.label)) x <- makeNodeLabel(x) + res <- network(x$edge, directed = directed, ...) + network.vertex.names(res) <- c(x$tip.label, x$node.label) + res +} + +as.igraph <- function(x, ...) UseMethod("as.igraph") + +as.igraph.phylo <- function(x, directed = is.rooted(x), use.labels = TRUE, ...) +{ + ## local copy because x will be changed before evaluating is.rooted(x): + directed <- directed + if (use.labels) { + if (is.null(x$node.label)) x <- makeNodeLabel(x) + x$edge <- matrix(c(x$tip.label, x$node.label)[x$edge], ncol = 2) + } else x$edge <- x$edge - 1L + graph.edgelist(x$edge, directed = directed, ...) +} diff --git a/R/clustal.R b/R/clustal.R new file mode 100644 index 0000000..3cfd1c0 --- /dev/null +++ b/R/clustal.R @@ -0,0 +1,72 @@ +## clustal.R (2011-03-16) + +## Multiple Sequence Alignment with External Applications + +## Copyright 2011 Emmanuel Paradis + +## This file is part of the R-package `ape'. +## See the file ../COPYING for licensing issues. + +clustal <- function(x, pw.gapopen = 10, pw.gapext = 0.1, + gapopen = 10, gapext = 0.2, exec = NULL, + MoreArgs = "", quiet = TRUE) +{ + os <- Sys.info()[1] + if (is.null(exec)) { + if (os == "Linux") exec <- "clustalw" + if (os == "Darwin") exec <- "clustalw2" + if (os == "Windows") exec <- "C:/Program Files/ClustalW2/clustalw2" + } + + if (missing(x)) { + system(paste(exec, "-help")) + return(invisible(NULL)) + } + + d <- tempdir() + inf <- paste(d, "input_clustal.fas", sep = "/") + outf <- paste(d, "input_clustal.aln", sep = "/") + write.dna(x, inf, "fasta") + prefix <- c("-INFILE", "-PWGAPOPEN", "-PWGAPEXT", "-GAPOPEN", "-GAPEXT") + suffix <- c(inf, pw.gapopen, pw.gapext, gapopen, gapext) + opts <- paste(prefix, suffix, sep = "=", collapse = " ") + opts <- paste(opts, MoreArgs) + system(paste(exec, opts), ignore.stdout = quiet) + read.dna(outf, "clustal") +} + +muscle <- function(x, exec = "muscle", MoreArgs = "", quiet = TRUE) +{ + if (missing(x)) { + system(exec) + return(invisible(NULL)) + } + + d <- tempdir() + inf <- paste(d, "input_muscle.fas", sep = "/") + outf <- paste(d, "output_muscle.fas", sep = "/") + write.dna(x, inf, "fasta") + opts <- paste("-in", inf, "-out", outf) + if (quiet) opts <- paste(opts, "-quiet") + opts <- paste(opts, MoreArgs) + system(paste(exec, opts)) + read.dna(outf, "fasta") +} + +tcoffee <- function(x, exec = "t_coffee", MoreArgs = "", quiet = TRUE) +{ + if (missing(x)) { + system(exec) + return(invisible(NULL)) + } + + d <- tempdir() + od <- setwd(d) + on.exit(setwd(od)) + inf <- "input_tcoffee.fas" + write.dna(x, inf, "fasta") + opts <- paste(inf, MoreArgs) + if (quiet) opts <- paste(opts, "-quiet=nothing") + system(paste(exec, opts)) + read.dna("input_tcoffee.aln", "clustal") +} diff --git a/man/as.phylo.Rd b/man/as.phylo.Rd index eb34dc3..86b48ba 100644 --- a/man/as.phylo.Rd +++ b/man/as.phylo.Rd @@ -5,7 +5,31 @@ \alias{as.hclust.phylo} \alias{old2new.phylo} \alias{new2old.phylo} -\title{Conversion Among Tree Objects} +\alias{as.network.phylo} +\alias{as.igraph} +\alias{as.igraph.phylo} +\title{Conversion Among Tree and Network Objects} +\description{ + \code{as.phylo} is a generic function which converts an object into a + tree of class \code{"phylo"}. There are currently two methods for + objects of class \code{"hclust"} and of class \code{"phylog"} + (implemented in the package ade4). + + \code{as.hclust.phylo} is a method of the generic + \code{\link[stats]{as.hclust}} which converts an object of class + \code{"phylo"} into one of class \code{"hclust"}. This can used to + convert an object of class \code{"phylo"} into one of class + \code{"dendrogram"} (see examples). + + \code{as.network} and \code{as.igraph} convert trees of class + \code{"phylo"} into these respective classes defined in the packages + of the same names. Note that the generic of the former is defined in + \pkg{network}. + + \code{old2new.phylo} and \code{new2old.phylo} are utility functions + for converting between the old and new coding of the class + \code{"phylo"}. +} \usage{ as.phylo(x, ...) \method{as.phylo}{hclust}(x, ...) @@ -13,29 +37,24 @@ as.phylo(x, ...) \method{as.hclust}{phylo}(x, ...) old2new.phylo(phy) new2old.phylo(phy) +\method{as.network}{phylo}(x, directed = is.rooted(x), ...) +as.igraph(x, ...) +\method{as.igraph}{phylo}(x, directed = is.rooted(x), use.labels = TRUE, ...) } \arguments{ \item{x}{an object to be converted into another class.} + \item{directed}{a logical value: should the network be directed? By + default, this depends on whether the tree is rooted or not.} + \item{use.labels}{a logical specifying whether to use labels to build + the network of class \code{"igraph"}. If \code{TRUE} and the tree + has no node labels, then some default labels are created first. If + \code{FALSE}, the network is built with integers.} \item{\dots}{further arguments to be passed to or from other methods.} \item{phy}{an object of class \code{"phylo"}.} } -\description{ - \code{as.phylo} is a generic function which converts an object into a - tree of class \code{"phylo"}. There are currently two methods for this - generic for objects of class \code{"hclust"} and of class - \code{"phylog"} (implemented in the package ade4). - \code{as.hclust.phylo} is a method of the generic - \code{\link[stats]{as.hclust}} which converts an object of class - \code{"phylo"} into one of class \code{"hclust"}. This can used to - convert an object of class \code{"phylo"} into one of class - \code{"dendrogram"} (see examples). - - \code{old2new.phylo} and \code{new2old.phylo} are utility functions - for converting between the old and new coding of the class - \code{"phylo"}. -} \value{ - An object of class \code{"hclust"} or \code{"phylo"}. + An object of class \code{"hclust"}, \code{"phylo"}, \code{"network"}, + or \code{"igraph"}. } \author{Emmanuel Paradis} \seealso{ @@ -63,7 +82,22 @@ plot(dend) layout(matrix(1:2, 2, 1)) plot(bird.orders, font = 1, no.margin = TRUE) par(mar = c(0, 0, 0, 8)) -plot((dend), horiz = TRUE) +plot(dend, horiz = TRUE) layout(matrix(1, 1, 1)) + +### convert into networks: +if (require(network)) { + x <- as.network(rtree(10)) + print(x) + plot(x, vertex.cex = 1:4) + plot(x, displaylabels = TRUE) +} +tr <- rtree(5) +if (require(igraph)) { + print((x <- as.igraph(tr))) + plot(x) + print(as.igraph(tr, TRUE, FALSE)) + print(as.igraph(tr, FALSE, FALSE)) +} } \keyword{manip} diff --git a/man/clustal.Rd b/man/clustal.Rd new file mode 100644 index 0000000..8bb5cfb --- /dev/null +++ b/man/clustal.Rd @@ -0,0 +1,86 @@ +\name{clustal} +\alias{clustal} +\alias{muscle} +\alias{tcoffee} +\title{Multiple Sequence Alignment with External Applications} +\description{ + These functions call their respective program from \R to align a set of + nucleotide sequences of class \code{"DNAbin"}. +} +\usage{ +clustal(x, pw.gapopen = 10, pw.gapext = 0.1, + gapopen = 10, gapext = 0.2, exec = NULL, + MoreArgs = "", quiet = TRUE) +muscle(x, exec = "muscle", MoreArgs = "", quiet = TRUE) +tcoffee(x, exec = "t_coffee", MoreArgs = "", quiet = TRUE) +} +\arguments{ + \item{x}{an object of class \code{"DNAbin"}.} + \item{pw.gapopen, pw.gapext}{gap opening and gap extension penalties + used by Clustal during pairwise alignments.} + \item{gapopen, gapext}{idem for global alignment.} + \item{exec}{a character string giving the name of the program, with + its path if necessary. \code{clustal} tries to guess it depending on + the operating system (see details).} + \item{MoreArgs}{a character string giving additional options.} + \item{quiet}{a logical: the default is to not print on \R's console the + messages from the external program.} +} +\details{ + \code{clustal} tries to guess the name of the executable program + depending on the operating system. Specifically, the followings are + used: ``clustalw'' under Linux, ``clustalw2'' under MacOS, or + ``C:/Program Files/ClustalW2/clustalw2'' under Windows. + + The calculations are done in a temporary directory which is deleted + when \R is quit. So it is possible to find the files created by the + last call in the directory printed by \code{tempdir()}. + + When called without arguments (i.e., \code{clustal()}, \dots), the + function prints the options of the program which may be passed to + \code{MoreArgs}. +} +\value{ + an object of class \code{"DNAbin"} with the aligned sequences. +} +\references{ + Chenna, R., Sugawara, H., Koike, T., Lopez, R., Gibson, T. J., + Higgins, D. G. and Thompson, J. D. (2003) Multiple sequence alignment + with the Clustal series of programs. \emph{Nucleic Acids Research} + \bold{31}, 3497--3500. + + \url{http://www.clustal.org/} + + Edgar, R. C. (2004) MUSCLE: Multiple sequence alignment with high + accuracy and high throughput. \emph{Nucleic Acids Research}, + \bold{32}, 1792--1797. + + \url{http://www.drive5.com/muscle/muscle_userguide3.8.html} + + Notredame, C., Higgins, D. and Heringa, J. (2000) T-Coffee: A novel + method for multiple sequence alignments. \emph{Journal of Molecular + Biology}, \bold{302}, 205--217. + + \url{http://www.tcoffee.org/Documentation/t_coffee/t_coffee_technical.htm} +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{image.DNAbin}}, \code{\link{del.gaps}} + + The package \pkg{phyloch} which has similar functions for the MAFFT + and Prank. +} +\examples{ +\dontrun{ +### display the options: +clustal() +muscle() +tcoffee() + +data(woodmouse) +### open gaps more easily: +clustal(woodmouse, pw.gapopen = 1, pw.gapext = 1) +### T-Coffee requires negative values (quite slow; muscle is much faster): +tcoffee(woodmouse, MoreArgs = "-gapopen=-10 -gapext=-2") +}} +\keyword{manip} diff --git a/man/image.DNAbin.Rd b/man/image.DNAbin.Rd index 7ad62df..26059de 100644 --- a/man/image.DNAbin.Rd +++ b/man/image.DNAbin.Rd @@ -43,7 +43,8 @@ } \author{Emmanuel Paradis} \seealso{ - \code{\link{DNAbin}}, \code{\link{del.gaps}}, \code{\link{clustal}} + \code{\link{DNAbin}}, \code{\link{del.gaps}}, \code{\link{clustal}}, + \code{\link[graphics]{grid}} } \examples{ data(woodmouse) @@ -57,6 +58,7 @@ for (x in c("a", "g", "c", "t")) par(mfcol = c(1, 1)) ### zoom on a portion of the data: image(woodmouse[11:15, 1:50], c("a", "n"), c("blue", "grey")) +grid(50, 5, col = "black") ### see the guanines on a black background: image(woodmouse, "g", "yellow", "black") }