NEW FEATURES
+ o The new function makeLabel() helps to modify labels of trees,
+ lists of trees, or DNA sequences, with several utilities to
+ truncate and/or make them unique, substituting some
+ characters, and so on.
+
o The new function del.gaps() removes insertion gaps ("-") in a
set of DNA sequences.
+ o read.dna() can now read Clustal files (*.aln).
+
BUG FIXES
Minin.
o The option 'check.labels' of consensus() and prop.part() is now
- TRUE.
+ TRUE by default.
+
+ o write.dna() now does not truncate names to 10 characters with
+ the Phylip formats.
Package: ape
Version: 2.2-1
-Date: 2008-06-28
+Date: 2008-07-03
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong,
Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel,
--- /dev/null
+## makeLabel.R (2008-07-03)
+
+## Label Management
+
+## Copyright 2008 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+makeLabel <- function(x, ...) UseMethod("makeLabel")
+
+makeLabel.character <- function(x, len = 99, space = "_",
+ make.unique = TRUE, illegal = "():;,[]", quote = FALSE, ...)
+{
+ x <- gsub("[[:space:]]", space, x)
+ if (illegal != "") {
+ illegal <- unlist(strsplit(illegal, NULL))
+ for (i in illegal) x <- gsub(i, "", x, fixed = TRUE)
+ }
+ if (quote) len <- len - 2
+ nc <- nchar(x) > len
+ if (any(nc)) x[nc] <- substr(x[nc], 1, len)
+ tab <- table(x)
+ if (all(tab == 1)) make.unique <- FALSE
+ if (make.unique) {
+ dup <- tab[which(tab > 1)]
+ nms <- names(dup)
+ for (i in 1:length(dup)) {
+ j <- which(x == nms[i])
+ end <- nchar(x[j][1])
+ ## w: number of characters to be added as suffix
+ w <- floor(log10(dup[i])) + 1
+ suffix <- formatC(1:dup[i], width = w, flag = "0")
+ if (end + w > len) {
+ start <- end - w + 1
+ substr(x[j], start, end) <- suffix
+ } else x[j] <- paste(x[j], suffix, sep = "")
+ }
+ }
+ if (quote) x <- paste('"', x, '"', sep = "")
+ x
+}
+
+makeLabel.phylo <- function(x, tips = TRUE, nodes = TRUE, ...)
+{
+ if (tips)
+ x$tip.label <- makeLabel.character(x$tip.label, ...)
+ if (!is.null(x$node.label) && nodes)
+ x$node.label <- makeLabel.character(x$node.label, ...)
+ x
+}
+
+makeLabel.multiPhylo <- function(x, tips = TRUE, nodes = TRUE, ...)
+{
+ y <- attr("TipLabel", x)
+ if (is.null(y)) {
+ for (i in 1:length(x))
+ x[[i]] <- makeLabel.phylo(x[[i]], tips = tips, nodes = nodes, ...)
+ } else {
+ attr("TipLabel", x) <- makeLabel.character(y, ...)
+ }
+ x
+}
+
+makeLabel.DNAbin <- function(x, ...)
+{
+ if (is.vector(x) || is.list(x))
+ names(x) <- makeLabel.character(names(x), ...)
+ else rownames(x) <- makeLabel.character(rownames(x), ...)
+ x
+}
-## read.dna.R (2008-03-28)
+## read.dna.R (2008-07-03)
## Read DNA Sequences in a File
as.character = FALSE)
{
getTaxaNames <- function(x) {
- x <- sub("^ +", "", x) # remove the leading spaces
- x <- sub(" +$", "", x) # remove the trailing spaces
- x <- sub("^['\"]", "", x) # remove the leading quotes
- x <- sub("['\"]$", "", x) # remove the trailing quotes
+ x <- sub("^['\" ]+", "", x) # remove the leading quotes and spaces
+ x <- sub("['\" ]+$", "", x) # " " trailing " " "
x
}
- format <- match.arg(format, c("interleaved", "sequential", "fasta"))
- phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
- X <- scan(file = file, what = character(), sep = "\n", quiet = TRUE,
+ getNucleotide <- function(x) {
+ x <- gsub(" ", "", x)
+ x <- strsplit(x, NULL)
+ unlist(x)
+ }
+ formats <- c("interleaved", "sequential", "fasta", "clustal")
+ format <- match.arg(format, formats)
+ phylip <- if (format %in% formats[1:2]) TRUE else FALSE
+ X <- scan(file = file, what = "", sep = "\n", quiet = TRUE,
skip = skip, nlines = nlines, comment.char = comment.char)
+ pat.base <- "[-AaCcGgTtUuMmRrWwSsYyKkVvHhDdBbNn?]{10}"
if (phylip) {
- fl <- X[1]
- oop <- options(warn = -1)
## need to remove the possible leading spaces in the first line
- fl.num <- as.numeric(unlist(strsplit(gsub("^ +", "", fl), " +")))
- options(oop)
- if (all(is.na(fl.num)))
- stop("the first line of the file must contain the dimensions of the data")
- if (length(fl.num) != 2)
- stop("the first line of the file must contain TWO numbers")
- else {
- n <- fl.num[1]
- s <- fl.num[2]
- }
+ fl <- gsub("^ +", "", X[1])
+ fl <- as.numeric(unlist(strsplit(fl, " +")))
+ if (length(fl) != 2 || any(is.na(fl)))
+ stop("the first line of the file must contain the dimensions of the data")
+ n <- fl[1]
+ s <- fl[2]
+ obj <- matrix("", n, s)
X <- X[-1]
- obj <- vector("character", n*s)
- dim(obj) <- c(n, s)
}
if (format == "interleaved") {
- fl <- X[1]
- fl <- unlist(strsplit(fl, NULL))
- bases <- grep("[-AaCcGgTtUuMmRrWwSsYyKkVvHhDdBbNn?]", fl)
- z <- diff(bases)
- for (i in 1:length(z)) if (all(z[i:(i + 8)] == 1)) break
- start.seq <- bases[i]
+ start.seq <- regexpr(pat.base, X[1])[1]
if (is.null(seq.names))
- seq.names <- getTaxaNames(substr(X[1:n], 1, start.seq - 1))
+ seq.names <- getTaxaNames(substr(X[1:n], 1, start.seq - 1))
X[1:n] <- substr(X[1:n], start.seq, nchar(X[1:n]))
- X <- gsub(" ", "", X)
nl <- length(X)
for (i in 1:n)
- obj[i, ] <- unlist(strsplit(X[seq(i, nl, n)], NULL))
+ obj[i, ] <- getNucleotide(X[seq(i, nl, n)])
}
if (format == "sequential") {
- fl <- X[1]
taxa <- character(n)
j <- 1
for (i in 1:n) {
- bases <- grep("[-AaCcGgTtUuMmRrWwSsYyKkVvHhDdBbNn?]",
- unlist(strsplit(X[j], NULL)))
- z <- diff(bases)
- for (k in 1:length(z)) if (all(z[k:(k + 8)] == 1)) break
- start.seq <- bases[k]
+ start.seq <- regexpr(pat.base, X[j])[1]
taxa[i] <- substr(X[j], 1, start.seq - 1)
- sequ <- substr(X[j], start.seq, nchar(X[j]))
- sequ <- gsub(" ", "", sequ)
+ sequ <- getNucleotide(substr(X[j], start.seq, nchar(X[j])))
j <- j + 1
- while (nchar(sequ) < s) {
- sequ <- paste(sequ, gsub(" " , "", X[j]), sep = "")
+ while (length(sequ) < s) {
+ sequ <- c(sequ, getNucleotide(X[j]))
j <- j + 1
}
- obj[i, ] <- unlist(strsplit(sequ, NULL))
+ obj[i, ] <- sequ
}
if (is.null(seq.names)) seq.names <- getTaxaNames(taxa)
}
}
start <- c(start, length(X) + 1) # this avoids the following to crash when `i = n'
for (i in 1:n)
- obj[[i]] <- unlist(strsplit(gsub(" ", "",
- X[(start[i] + 1):(start[i + 1] - 1)]),
- NULL))
+ obj[[i]] <- getNucleotide(X[(start[i] + 1):(start[i + 1] - 1)])
}
- if (phylip) {
+ if (format == "clustal") {
+ X <- X[-1]
+ ## find where the 1st sequence starts
+ start.seq <- regexpr(pat.base, X[1])[1]
+ ## find the lines with *********....
+ nspaces <- paste("^ {", start.seq - 1, "}", sep = "", collapse = "")
+ stars <- grep(nspaces, X)
+ ## we now know how many sequences in the file:
+ n <- stars[1] - 1
+ ## get the sequence names in the same way than "interleaved":
+ if (is.null(seq.names))
+ seq.names <- getTaxaNames(substr(X[1:n], 1, start.seq - 1))
+ ## need to remove the sequence names before getting the sequences:
+ X <- substr(X, start.seq, nchar(X))
+ nl <- length(X)
+ ## find the length of the 1st sequence:
+ tmp <- getNucleotide(X[seq(1, nl, n + 1)])
+ s <- length(tmp)
+ obj <- matrix("", n, s)
+ obj[1, ] <- tmp
+ for (i in 2:n)
+ obj[i, ] <- getNucleotide(X[seq(i, nl, n + 1)])
+ }
+ if (format != "fasta") {
rownames(obj) <- seq.names
obj <- tolower(obj)
} else {
-## write.dna.R (2003-12-29)
+## write.dna.R (2008-07-03)
## Write DNA Sequences in a File
-## Copyright 2003-2006 Emmanuel Paradis
+## Copyright 2003-2008 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
format <- match.arg(format, c("interleaved", "sequential", "fasta"))
phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
if (class(x) == "DNAbin") x <- as.character(x)
+ aligned <- TRUE
if (is.matrix(x)) {
- N <- dim(x)[1]
+ N <- dim(x)
+ S <- N[2]
+ N <- N[1]
xx <- vector("list", N)
for (i in 1:N) xx[[i]] <- x[i, ]
names(xx) <- rownames(x)
x <- xx
rm(xx)
- } else N <- length(x)
+ } else {
+ N <- length(x)
+ S <- unique(unlist(lapply(x, length)))
+ if (length(S) > 1) aligned <- FALSE
+ }
if (is.null(names(x))) names(x) <- as.character(1:N)
if (is.null(indent))
indent <- if (phylip) 10 else 0
- if (indent == "") indent <- 0
- if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "")
+ if (is.numeric(indent))
+ indent <- paste(rep(" ", indent), collapse = "")
if (format == "interleaved") {
- if (blocksep) {
- blockseparation <- TRUE
- blocksep <- paste(rep("\n", blocksep), collapse = "")
- } else blockseparation <- FALSE
+ blocksep <- paste(rep("\n", blocksep), collapse = "")
if (nbcol < 0) format <- "sequential"
}
zz <- if (append) file(file, "a") else file(file, "w")
+ on.exit(close(zz))
if (phylip) {
- S <- unique(unlist(lapply(x, length)))
- ## check that all sequences have the same length
- if (length(S) != 1)
- stop("sequences must have the same length for interleaved or sequential format.")
- ## truncate names if necessary
- if (any(nchar(names(x)) > 10)) {
- warning("at least one name was longer than 10 characters;\nthey will be truncated which may lead to some redundancy.\n")
- names(x) <- substr(names(x), 1, 10)
- }
- ## left justify
- names(x) <- sprintf("%-10s", names(x))
+ if (!aligned)
+ stop("sequences must have the same length for
+ interleaved or sequential format.")
cat(N, " ", S, "\n", sep = "", file = zz)
if (nbcol < 0) {
nb.block <- 1
- nbcol <- totalcol <- ceiling(S / colw)
+ nbcol <- totalcol <- ceiling(S/colw)
} else {
- nb.block <- ceiling(S / (colw * nbcol))
- totalcol <- ceiling(S / colw)
+ nb.block <- ceiling(S/(colw * nbcol))
+ totalcol <- ceiling(S/colw)
}
## Prepare the sequences in a matrix whose elements are
## strings with `colw' characters.
- SEQ <- matrix(NA, N, totalcol)
- mode(SEQ) <- "character"
+ SEQ <- matrix("", N, totalcol)
for (i in 1:N) {
- X <- paste(x[[i]], collapse= "")
- for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
+ X <- paste(x[[i]], collapse = "")
+ for (j in 1:totalcol)
+ SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
}
+ ## Prepare the names so that they all have the same nb of chars
+ max.nc <- max(nchar(names(x)))
+ ## in case all names are 10 char long or less, sequences are
+ ## always started on the 11th column of the file
+ if (max.nc < 11) max.nc <- 9
+ fmt <- paste("%-", max.nc + 1, "s", sep = "")
+ names(x) <- sprintf(fmt, names(x))
}
if (format == "interleaved") {
## Write the first block with the taxon names
- if (nb.block == 1) {
- for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, ], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
- } else {
- for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ colsel <- if (nb.block == 1) 1:totalcol else 1:nbcol
+ for (i in 1:N) {
+ cat(names(x)[i], file = zz)
+ cat(SEQ[i, colsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
- ## Write the other blocks
+ ## Write eventually the other blocks
if (nb.block > 1) {
for (k in 2:nb.block) {
- if (blockseparation) cat(blocksep, file = zz)
- if (k == nb.block) {
- for (i in 1:N) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
- } else {
- for (i in 1:N) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ cat(blocksep, file = zz)
+ endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
+ for (i in 1:N) {
+ cat(indent, file = zz)
+ cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
+
}
if (format == "sequential") {
if (nb.block == 1) {
for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ cat(names(x)[i], file = zz)
+ cat(SEQ[i, ], sep = colsep, file = zz)
+ cat("\n", file = zz)
+ }
} else {
for (i in 1:N) {
cat(names(x)[i], file = zz)
cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
cat("\n", file = zz)
for (k in 2:nb.block) {
- if (k == nb.block) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- } else {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
+ cat(indent, file = zz)
+ cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
for (i in 1:N) {
cat(">", names(x)[i], file = zz)
cat("\n", file = zz)
- X <- paste(x[[i]], collapse= "")
+ X <- paste(x[[i]], collapse = "")
S <- length(x[[i]])
- if (nbcol < 0) {
- nb.block <- 1
- nbcol <- totalcol <- ceiling(S / colw)
- } else {
- totalcol <- ceiling(S / colw)
- nb.block <- ceiling(totalcol / nbcol)
- }
+ totalcol <- ceiling(S/colw)
+ if (nbcol < 0) nbcol <- totalcol
+ nb.lines <- ceiling(totalcol/nbcol)
SEQ <- character(totalcol)
- for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
- for (k in 1:nb.block) {
- if (k == nb.block) {
- cat(indent, file = zz)
- cat(SEQ[(1 + (k - 1)*nbcol):length(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- } else {
- cat(indent, file = zz)
- cat(SEQ[(1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ for (j in 1:totalcol)
+ SEQ[j] <- substr(X, 1 + (j - 1) * colw, colw + (j - 1) * colw)
+ for (k in 1:nb.lines) {
+ endsel <-
+ if (k == nb.lines) length(SEQ) else nbcol + (k - 1)*nbcol
+ cat(indent, file = zz)
+ cat(SEQ[(1 + (k - 1)*nbcol):endsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
- close(zz)
}
--- /dev/null
+\name{makeLabel}
+\alias{makeLabel}
+\title{Label Management}
+\usage{
+makeLabel(x, ...)
+\method{makeLabel}{character}(x, len = 99, space = "_", make.unique = TRUE,
+ illegal = "():;,[]", quote = FALSE, ...)
+\method{makeLabel}{phylo}(x, tips = TRUE, nodes = TRUE, ...)
+\method{makeLabel}{multiPhylo}(x, tips = TRUE, nodes = TRUE, ...)
+\method{makeLabel}{DNAbin}(x, ...)
+}
+\arguments{
+ \item{x}{a vector of mode character or an object for which labels are
+ to be changed.}
+ \item{len}{the maximum length of the labels: those longer than `len'
+ will be truncated.}
+ \item{space}{the character to replace spaces, tabulations, and
+ linebreaks.}
+ \item{make.unique}{a logical specifying whether duplicate labels
+ should be made unique by appending numerals; \code{TRUE} by
+ default.}
+ \item{illegal}{a string specifying the characters to be deleted.}
+ \item{quote}{a logical specifying whether to quote the labels;
+ \code{FALSE} by default.}
+ \item{tips}{a logical specifying whether tip labels are to be
+ modified; \code{TRUE} by default.}
+ \item{nodes}{a logical specifying whether node labels are to be
+ modified; \code{TRUE} by default.}
+ \item{...}{further arguments to be passed to or from other methods.}
+}
+\description{
+ This is a generic function with methods for character vectors, trees
+ of class \code{"phylo"}, lists of trees of class \code{"multiPhylo"},
+ and DNA sequences of class \code{"DNAbin"}. All options for the class
+ character may be used in the other methods.
+}
+\details{
+ The option \code{make.unique} does not work exactly in the same way
+ then the function of the same name: numbers are suffixed to all labels
+ that are identical (without separator). See the examples.
+
+ If there are 10--99 identical labels, the labels returned are "xxx01",
+ "xxx02", etc, or "xxx001", "xxx002", etc, if they are 100--999, and so
+ on. The number of digits added preserves the option `len'.
+
+ The default for `len' makes labels short enough to be read by
+ PHYML. Clustal accepts labels up to 30 character long.
+}
+\note{
+ The current version does not perform well when trying to make very
+ short unique labels (e.g., less than 5 character long).
+}
+\value{
+ An object of the appropriate class.
+}
+\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
+\seealso{
+ \code{\link[base]{make.unique}}, \code{\link[base]{make.names}},
+ \code{\link[base]{abbreviate}}
+}
+\examples{
+x <- rep("a", 3)
+makeLabel(x)
+make.unique(x) # <- from R's base
+x <- rep("aaaaa", 2)
+makeLabel(x, len = 3) # made unique and of length 3
+makeLabel(x, len = 3, make.unique = FALSE)
+}
\item{file}{a file name specified by either a variable of mode character,
or a double-quoted string.}
\item{format}{a character string specifying the format of the DNA
- sequences. Three choices are possible: \code{"interleaved"},
- \code{"sequential"}, or \code{"fasta"}, or any unambiguous
- abbreviation of these.}
+ sequences. Four choices are possible: \code{"interleaved"},
+ \code{"sequential"}, \code{"clustal"}, or \code{"fasta"}, or any
+ unambiguous abbreviation of these.}
\item{skip}{the number of lines of the input file to skip before
beginning to read data.}
\item{nlines}{the number of lines to be read (by default the file is
\details{
This function follows the interleaved and sequential formats defined
in PHYLIP (Felsenstein, 1993) but with the original feature than there
- is no restriction on the lengths of the taxa names (though a data file
- with 10-characters-long taxa names is fine as well). For these two
+ is no restriction on the lengths of the taxa names. For these two
formats, the first line of the file must contain the dimensions of the
data (the numbers of taxa and the numbers of nucleotides); the
sequences are considered as aligned and thus must be of the same
sequences are then read until the number of nucleotides specified in
the first line of the file is reached. This is repeated for each taxa.}
+ \item{Clustal:}{this is the format output by the Clustal programs
+ (.aln). It is somehow similar to the interleaved format: the
+ differences being that the dimensions of the data are not indicated
+ in the file, and the names of the sequences are repeated in each block.}
+
\item{FASTA:}{This looks like the sequential format but the taxa names
(or rather a description of the sequence) are on separate lines
beginning with a `greater than' character ``>'' (there may be
" ACTAAAAATT ATCAATCACT",
file = "exdna.txt", sep = "\n")
ex.dna2 <- read.dna("exdna.txt")
+### ... in clustal format...
+cat("CLUSTAL (ape) multiple sequence alignment", "",
+"No305 NTTCGAAAAACACACCCACTACTAAAANTTATCAGTCACT",
+"No304 ATTCGAAAAACACACCCACTACTAAAAATTATCAACCACT",
+"No306 ATTCGAAAAACACACCCACTACTAAAAATTATCAATCACT",
+" ************************** ****** ****",
+file = "exdna.txt", sep = "\n")
+ex.dna3 <- read.dna("exdna.txt", format = "clustal")
### ... and in FASTA format
cat("> No305",
"NTTCGAAAAACACACCCACTACTAAAANTTATCAGTCACT",
"> No306",
"ATTCGAAAAACACACCCACTACTAAAAATTATCAATCACT",
file = "exdna.txt", sep = "\n")
-ex.dna3 <- read.dna("exdna.txt", format = "fasta")
-### These are the same!
+ex.dna4 <- read.dna("exdna.txt", format = "fasta")
+### The first three are the same!
identical(ex.dna, ex.dna2)
identical(ex.dna, ex.dna3)
+identical(ex.dna, ex.dna4)
unlink("exdna.txt") # clean-up
}
\keyword{IO}
}
\description{
This function writes in a file a list of DNA sequences in sequential,
- interleaved, or FASTA format. The names of the vectors of the list are
- used as taxa names.
+ interleaved, or FASTA format.
}
\details{
- The same three formats are supported in the present function than in
- \code{\link{read.dna}}: see its help page and the references below for
- a description of these formats.
+ Three formats are supported in the present function: see the help page
+ of \code{\link{read.dna}} and the references below for a description.
If the sequences have no names, then they are given "1", "2", ... as
names in the file.
With the interleaved and sequential formats, the sequences must be all
- of the same length; if the taxon names are longer than 10 characters,
- they are truncated and a warning message is issued.
+ of the same length. The names of the sequences are not truncated.
The argument `indent' specifies how the rows of nucleotides are
indented. In the interleaved and sequential formats, the rows with
it is a numeric). For example, specifying `indent = " "' or `indent
= 3' will have exactly the same effect (use `indent = "\t"' for a
tabulation).
+
+ The different options are intended to give flexibility in formatting
+ the sequences. For instance, if the sequences are very long it may be
+ judicious to remove all the spaces beween columns (colsep = ""), in
+ the margins (indent = 0), and between the blocks (blocksep = 0) to
+ produce a smaller file.
}
\note{
Specifying a negative value for `nbcol' (meaning that the nucleotides
are printed on a single line) gives the same result for the
interleaved and sequential formats.
- The different options are intended to give flexibility in formatting
- the sequences. For instance, if the sequences are very long it may be
- judicious to remove all the spaces beween columns `(colsep = ""), in
- the margins (indent = 0), and between the blocks (blocksep = 0) to
- produce a smaller file.
+ The names of the sequences may be truncated with the function
+ \code{\link{makeLabel}}. In particular, Clustal is limited to 30
+ characters, whereas PHYML seems limited to 99 characters.
+
+ The FASTA format may be used as input for Clustal.
}
\value{
None (invisible `NULL').
\url{http://evolution.genetics.washington.edu/phylip/phylip.html}
}
\seealso{
- \code{\link{read.dna}}, \code{\link{read.GenBank}}
+ \code{\link{read.dna}}, \code{\link{read.GenBank}},
+ \code{\link{makeLabel}}
}
\keyword{IO}