From 285d0e6700c8381d66b5c671aacd9494fa8ad444 Mon Sep 17 00:00:00 2001 From: paradis Date: Wed, 8 Oct 2008 14:42:35 +0000 Subject: [PATCH] updated phymltest() for PhyML 3.0 + changed rbind.DNAbin() git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@52 6e262413-ae40-0410-9e79-b911bd7a66b7 --- Changes => ChangeLog | 6 +++ DESCRIPTION | 4 +- R/DNA.R | 34 ++++++------ R/phymltest.R | 121 ++++++++++++++++++------------------------- man/phymltest.Rd | 56 ++++++++++---------- 5 files changed, 102 insertions(+), 119 deletions(-) rename Changes => ChangeLog (99%) diff --git a/Changes b/ChangeLog similarity index 99% rename from Changes rename to ChangeLog index 854ae19..a3923a6 100644 --- a/Changes +++ b/ChangeLog @@ -25,6 +25,12 @@ BUG FIXES OTHER CHANGES + o phymltest() has been updated for PhyML 3.0 and gains an option + 'append', whereas the option 'path2exec' has been removed. + + o rbind.DNAbin() now accepts a single matrix which is returned + unchanged instead of an error. + o The data sets bird.orders and bird.families are now stored as Newick strings; i.e., the command data(bird.orders) calls read.tree(). diff --git a/DESCRIPTION b/DESCRIPTION index 6debdb3..ba82d6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Package: ape Version: 2.2-2 -Date: 2008-10-02 +Date: 2008-10-08 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Gangolf Jobb, Christoph Heibl, Vincent Lefort, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne -Maintainer: Emmanuel Paradis +Maintainer: Emmanuel Paradis Depends: R (>= 2.6.0) Suggests: gee, nlme, lattice ZipData: no diff --git a/R/DNA.R b/R/DNA.R index 6704bbe..9bcc285 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2008-10-02) +## DNA.R (2008-10-08) ## Manipulations and Comparisons of DNA Sequences @@ -86,32 +86,30 @@ rbind.DNAbin <- function(...) ### works only with matrices for the moment { obj <- list(...) - nobj <- length(obj) - if (nobj == 1) stop("only one matrix to bind.") - NC <- ncol(obj[[1]]) - for (i in 2:nobj) - if(ncol(obj[[i]]) != NC) + n <- length(obj) + if (n == 1) return(obj[[1]]) + NC <- unlist(lapply(obj, ncol)) + if (length(unique(NC)) > 1) stop("matrices do not have the same number of columns.") - for (i in 1:nobj) class(obj[[i]]) <- NULL - ans <- obj[[1]] - for (i in 2:nobj) ans <- rbind(ans, obj[[i]]) - structure(ans, class = "DNAbin") + for (i in 1:n) class(obj[[i]]) <- NULL + for (i in 2:n) obj[[1]] <- rbind(obj[[1]], obj[[i]]) + structure(obj[[1]], class = "DNAbin") } -cbind.DNAbin <- function(..., check.names = TRUE) +cbind.DNAbin <- function(..., check.names = TRUE, fill.with.gaps = FALSE, + quiet = TRUE) ### works only with matrices for the moment { obj <- list(...) - nobj <- length(obj) - if (nobj == 1) stop("only one matrix to bind.") - NR <- nrow(obj[[1]]) - for (i in 2:nobj) - if(nrow(obj[[i]]) != NR) + n <- length(obj) + if (n == 1) return(obj[[1]]) + NR <- unlist(lapply(obj, nrow)) + if (length(unique(NR)) > 1) stop("matrices do not have the same number of rows.") - for (i in 1:nobj) class(obj[[i]]) <- NULL + for (i in 1:n) class(obj[[i]]) <- NULL nms <- rownames(obj[[1]]) if (check.names) { - for (i in 2:nobj) + for (i in 2:n) if (all(rownames(obj[[i]]) %in% nms)) obj[[i]] <- obj[[i]][nms, ] else stop("rownames do not match among matrices.") diff --git a/R/phymltest.R b/R/phymltest.R index 0cbbc49..a354507 100644 --- a/R/phymltest.R +++ b/R/phymltest.R @@ -1,86 +1,67 @@ -## phymltest.R (2005-11-10) +## phymltest.R (2008-10-08) -## Fits a Bunch of Models with PHYML +## Fits a Bunch of Models with PhyML -## Copyright 2004-2005 Emmanuel Paradis +## Copyright 2004-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -.phymltest.model <- c("JC69", "JC69+I", "JC69+G", "JC69+I+G", - "K80", "K80+I", "K80+G", "K80+I+G", - "F81", "F81+I", "F81+G", "F81+I+G", - "F84", "F84+I", "F84+G", "F84+I+G", - "HKY85", "HKY85+I", "HKY85+G", "HKY85+I+G", - "TN93", "TN93+I", "TN93+G", "TN93+I+G", - "GTR", "GTR+I", "GTR+G", "GTR+I+G") +.phymltest.model <- + c("JC69", "JC69+I", "JC69+G", "JC69+I+G", + "K80", "K80+I", "K80+G", "K80+I+G", + "F81", "F81+I", "F81+G", "F81+I+G", + "F84", "F84+I", "F84+G", "F84+I+G", + "HKY85", "HKY85+I", "HKY85+G", "HKY85+I+G", + "TN93", "TN93+I", "TN93+G", "TN93+I+G", + "GTR", "GTR+I", "GTR+G", "GTR+I+G") -.phymltest.nfp <- c(1, 2, 2, 3, 2, 3, 3, 4, 4, 5, 5, 6, 5, 6, 6, 7, - 5, 6, 6, 7, 6, 7, 7, 8, 9, 10, 10, 11) +.phymltest.nfp <- + c(1, 2, 2, 3, 2, 3, 3, 4, 4, 5, 5, 6, 5, 6, 6, 7, + 5, 6, 6, 7, 6, 7, 7, 8, 9, 10, 10, 11) phymltest <- function(seqfile, format = "interleaved", itree = NULL, - exclude = NULL, execname, path2exec = NULL) + exclude = NULL, execname = NULL, append = TRUE) { - windoz <- .Platform$OS.type == "windows" - if (missing(execname)) { - if (windoz) execname <- "phyml_w32" - else stop("you must give an executable file name for PHYML") + os <- Sys.info()[1] + ## default names of PhyML: + if (is.null(execname)) { + if (os == "Linux") execname <- "phyml_3.0_linux32" + if (os == "Darwin") execname <- "phyml_3.0_macintel" + if (os == "Windows") execname <- "phyml_3.0_win32" } - outfile <- paste(seqfile, "_phyml_stat.txt", sep = "") - inp <- seqfile - if (file.exists(outfile)) inp <- c(inp, "A") - if (file.exists(paste(seqfile, "_phyml_tree.txt", sep = ""))) - inp <- c(inp, "A") - if (format != "interleaved") inp <- c(inp, "I") - if (!is.null(itree)) inp <- c(inp, "U", itree) + if (is.null(execname)) + stop("you must give an executable file name for PHYML") N <- length(.phymltest.model) - input.model <- list(c(rep("M", 5), "Y"), - c(rep("M", 5), "V", rep("Y", 2)), - c(rep("M", 5), "R", "A", rep("Y", 2)), - c(rep("M", 5), "R", "A", "Y", "V", rep("Y", 2)), - c(rep("M", 6), "T", rep("Y", 2)), - c(rep("M", 6), "T", "Y", "V", rep("Y", 2)), - c(rep("M", 6), "T", "Y", "R", "A", rep("Y", 2)), - c(rep("M", 6), "T", "Y", "R", "A", "Y", "V", rep("Y", 2)), - c(rep("M", 7), "Y"), - c(rep("M", 7), "V", rep("Y", 2)), - c(rep("M", 7), "R", "A", rep("Y", 2)), - c(rep("M", 7), "V", "Y", "R", "A", rep("Y", 2)), - c("M", "T", rep("Y", 2)), - c("M", "T", "Y", "V", rep("Y", 2)), - c("M", "T", "Y", "R", "A", rep("Y", 2)), - c("M", "T", "Y", "V", "Y", "R", "A", rep("Y", 2)), - c("T", rep("Y", 2)), - c("T", "Y", "V", rep("Y", 2)), - c("T", "Y", "R", "A", rep("Y", 2)), - c("T", "Y", "V", "Y", "R", "A", rep("Y", 2)), - c(rep("M", 2), "T", rep("Y", 2)), - c(rep("M", 2), "T", "Y", "V", rep("Y", 2)), - c(rep("M", 2), "T", "Y", "R", "A", rep("Y", 2)), - c(rep("M", 2), "T", "Y", "R", "A", "Y", "V", rep("Y", 2)), - c(rep("M", 3), "Y"), - c(rep("M", 3), "V", rep("Y", 2)), - c(rep("M", 3), "R", "A", rep("Y", 2)), - c(rep("M", 3), "V", "Y", "R", "A", rep("Y", 2))) - loglik <- numeric(N) - names(input.model) <- names(loglik) <- .phymltest.model - if (is.null(path2exec)) exec <- execname - else exec <- paste(path2exec, execname, sep = "/") - imod <- if (is.null(exclude)) 1:N else (1:N)[!.phymltest.model %in% exclude] - for (i in imod) { - if (i == 2) { - if (length(inp) == 1) inp <- c(inp, rep("A", 2)) - else if (inp[2] != "A") inp <- c(inp[1], rep("A", 2), inp[2:length(inp)]) - } - if (windoz) system(exec, input = c(inp, input.model[[i]])) - else { - cat(c(inp, input.model[[i]]), file = "f", sep = "\n") - system(paste(exec, "f", sep = " < ")) - } - loglik[i] <- scan(paste(seqfile, "_phyml_lk.txt", sep = ""), quiet = TRUE) + format <- match.arg(format, c("interleaved", "sequential")) + fmt <- rep("", N) + if (format != "interleaved") fmt[] <- "-q" + boot <- rep("-b 0", N) # to avoid any testing + mdl <- paste("-m", rep(c("JC69", "K80", "F81", "HKY85", "F84", "TN93", "GTR"), each = 4)) + tstv <- rep("-t e", N) # ignored by PhyML with JC69 or F81 + inv <- rep(c("", "-v e"), length.out = N) + ## no need to use the -c option of PhyML (4 categories by default if '-a e' is set): + alpha <- rep(rep(c("", "-a e"), each = 2), length.out = N) + tree <- rep("", N) + if (!is.null(itree)) tree[] <- paste("-u ", itree) + + cmd <- paste(execname, "-i", seqfile, fmt, boot, mdl, tstv, inv, alpha, tree, "--append ") + outfile <- paste(seqfile, "_phyml_stats.txt", sep = "") + if (!append) { + unlink(outfile) + unlink(paste(seqfile, "_phyml_tree.txt", sep = "")) } - unlink("f") - loglik <- loglik[imod] + imod <- 1:N + if (!is.null(exclude)) imod <- imod[!.phymltest.model %in% exclude] + + for (i in imod) system(cmd[i]) + + l <- readLines(outfile) + l <- grep("Log-likelihood:", l, value = TRUE) + ## in case there were already some results in the output file: + if (dd <- length(l) - length(imod)) l <- l[-(1:dd)] + loglik <- as.numeric(sub(". Log-likelihood:", "", l)) + names(loglik) <- .phymltest.model[imod] class(loglik) <- "phymltest" loglik } diff --git a/man/phymltest.Rd b/man/phymltest.Rd index 6381954..737beed 100644 --- a/man/phymltest.Rd +++ b/man/phymltest.Rd @@ -3,32 +3,33 @@ \alias{print.phymltest} \alias{summary.phymltest} \alias{plot.phymltest} -\title{Fits a Bunch of Models with PHYML} +\title{Fits a Bunch of Models with PhyML} \usage{ phymltest(seqfile, format = "interleaved", itree = NULL, - exclude = NULL, execname, path2exec = NULL) + exclude = NULL, execname = NULL, append = TRUE) \method{print}{phymltest}(x, ...) \method{summary}{phymltest}(object, ...) \method{plot}{phymltest}(x, main = NULL, col = "blue", ...) } \arguments{ \item{seqfile}{a character string giving the name of the file that - contains the DNA sequences to be analysed by PHYML.} + contains the DNA sequences to be analysed by PhyML.} \item{format}{a character string specifying the format of the DNA sequences: either \code{"interleaved"} (the default), or \code{"sequential"}.} \item{itree}{a character string giving the name of a file with a tree - in Newick format to be used as an initial tree by PHYML. If - \code{NULL} (the default), PHYML uses a ``BIONJ'' tree.} + in Newick format to be used as an initial tree by PhyML. If + \code{NULL} (the default), PhyML uses a ``BIONJ'' tree.} \item{exclude}{a vector of mode character giving the models to be excluded from the analysis. These must be among those below, and follow the same syntax.} - \item{execname}{a character string specifying the name of the PHYML - binary file. This argument can be left missing under Windows: the - default name \code{"phyml_w32"} will then be used.} - \item{path2exec}{a character string giving the path to the PHYML - binary file. If \code{NULL} the file must be accessible to R (either - it is in the computer path, or it is in R's working directory).} + \item{execname}{a character string specifying the name of the PhyML + executable. This argument can be left as \code{NULL} if PhyML's + default names are used: \code{"phyml_3.0_linux32"}, + \code{"phyml_3.0_macintel"}, or \code{"phyml_3.0_win32.exe"}, under + Linux, MacOS, or Windows respectively.} + \item{append}{a logical indicating whether to erase previous PhyML + output files if present; the default is to not erase.} \item{x}{an object of class \code{"phymltest"}.} \item{object}{an object of class \code{"phymltest"}.} \item{main}{a title for the plot; if left \code{NULL}, a title is made @@ -39,27 +40,20 @@ phymltest(seqfile, format = "interleaved", itree = NULL, \item{...}{further arguments passed to or from other methods.} } \description{ - This function calls the software PHYML and fits successively 28 models - of DNA evolution. The results are saved on disk, as PHYML usually - does, and returned in R as a vector with the log-likelihood value of - each model. + This function calls PhyML and fits successively 28 models of DNA + evolution. The results are saved on disk, as PhyML usually does, and + returned in R as a vector with the log-likelihood value of each model. } \details{ - The present function has been tested with version 2.4 of PHYML; it - should also work with version 2.3, but it won't work with version 2.1. - - Under unix-like systems, it seems necessary to run R from csh or a - similar shell (sh might not work). + The present function requires version 3.0 of PhyML; it won't work with + older versions. The user must take care to set correctly the three different paths - involved here: the path to PHYML's binary, the path to the sequence + involved here: the path to PhyML's binary, the path to the sequence file, and the path to R's working directory. The function should work if all three paths are different. Obviously, there should be no problem if they are all the same. - If the usual output files of PHYML already exist, they are not - deleted and PHYML's results are appended. - The following syntax is used for the models: "X[Y][Z]00[+I][+G]" @@ -79,16 +73,20 @@ phymltest(seqfile, format = "interleaved", itree = NULL, are described in the help page of \code{\link{dist.dna}}. When a gamma distribution of substitution rates is specified, four - categories are used (which is PHYML's default behaviour), and the + categories are used (which is PhyML's default behaviour), and the ``alpha'' parameter is estimated from the data. For the models with a different substition rate for transitions and transversions, these rates are left free and estimated from the data - (and not constrained with a ratio of 4 as in PHYML's default). + (and not constrained with a ratio of 4 as in PhyML's default). + + The option \code{path2exec} has been removed in the present version: + the path to PhyML's executable can be specified with the option + \code{execname}. } \note{ It is important to note that the models fitted by this function is - only a small fraction of the models possible with PHYML. For instance, + only a small fraction of the models possible with PhyML. For instance, it is possible to vary the number of categories in the (discretized) gamma distribution of substitution rates, and many parameters can be fixed by the user. The results from the present function should rather @@ -126,7 +124,7 @@ phymltest(seqfile, format = "interleaved", itree = NULL, } \examples{ ### A `fake' example with random likelihood values: it does not -### make sense, but does not need PHYML and gives you a flavour +### make sense, but does not need PhyML and gives you a flavour ### of what the output looks like: x <- runif(28, -100, -50) names(x) <- .phymltest.model @@ -135,7 +133,7 @@ x summary(x) plot(x) plot(x, main = "", col = "red") -### This example needs PHYML, copy/paste or type the +### This example needs PhyML, copy/paste or type the ### following commands if you want to try them, eventually ### changing setwd() and the options of phymltest() \dontrun{ -- 2.39.5