-## write.dna.R (2003-12-29)
+## write.dna.R (2009-05-10)
## Write DNA Sequences in a File
-## Copyright 2003-2006 Emmanuel Paradis
+## Copyright 2003-2009 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
{
format <- match.arg(format, c("interleaved", "sequential", "fasta"))
phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
- if (class(x) == "DNAbin") x <- as.character(x)
+ if (inherits(x, "DNAbin")) x <- as.character(x)
+ aligned <- TRUE
if (is.matrix(x)) {
- N <- dim(x)[1]
+ N <- dim(x)
+ S <- N[2]
+ N <- N[1]
xx <- vector("list", N)
for (i in 1:N) xx[[i]] <- x[i, ]
names(xx) <- rownames(x)
x <- xx
rm(xx)
- } else N <- length(x)
+ } else {
+ N <- length(x)
+ S <- unique(unlist(lapply(x, length)))
+ if (length(S) > 1) aligned <- FALSE
+ }
if (is.null(names(x))) names(x) <- as.character(1:N)
if (is.null(indent))
indent <- if (phylip) 10 else 0
- if (indent == "") indent <- 0
- if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "")
+ if (is.numeric(indent))
+ indent <- paste(rep(" ", indent), collapse = "")
if (format == "interleaved") {
- if (blocksep) {
- blockseparation <- TRUE
- blocksep <- paste(rep("\n", blocksep), collapse = "")
- } else blockseparation <- FALSE
+ blocksep <- paste(rep("\n", blocksep), collapse = "")
if (nbcol < 0) format <- "sequential"
}
zz <- if (append) file(file, "a") else file(file, "w")
+ on.exit(close(zz))
if (phylip) {
- S <- unique(unlist(lapply(x, length)))
- ## check that all sequences have the same length
- if (length(S) != 1)
- stop("sequences must have the same length for interleaved or sequential format.")
- ## truncate names if necessary
- if (any(nchar(names(x)) > 10)) {
- warning("at least one name was longer than 10 characters;\nthey will be truncated which may lead to some redundancy.\n")
- names(x) <- substr(names(x), 1, 10)
- }
- ## left justify
- names(x) <- sprintf("%-10s", names(x))
+ if (!aligned)
+ stop("sequences must have the same length for
+ interleaved or sequential format.")
cat(N, " ", S, "\n", sep = "", file = zz)
if (nbcol < 0) {
nb.block <- 1
- nbcol <- totalcol <- ceiling(S / colw)
+ nbcol <- totalcol <- ceiling(S/colw)
} else {
- nb.block <- ceiling(S / (colw * nbcol))
- totalcol <- ceiling(S / colw)
+ nb.block <- ceiling(S/(colw * nbcol))
+ totalcol <- ceiling(S/colw)
}
## Prepare the sequences in a matrix whose elements are
## strings with `colw' characters.
- SEQ <- matrix(NA, N, totalcol)
- mode(SEQ) <- "character"
+ SEQ <- matrix("", N, totalcol)
for (i in 1:N) {
- X <- paste(x[[i]], collapse= "")
- for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
+ X <- paste(x[[i]], collapse = "")
+ for (j in 1:totalcol)
+ SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
}
+ ## Prepare the names so that they all have the same nb of chars
+ max.nc <- max(nchar(names(x)))
+ ## in case all names are 10 char long or less, sequences are
+ ## always started on the 11th column of the file
+ if (max.nc < 11) max.nc <- 9
+ fmt <- paste("%-", max.nc + 1, "s", sep = "")
+ names(x) <- sprintf(fmt, names(x))
}
if (format == "interleaved") {
## Write the first block with the taxon names
- if (nb.block == 1) {
- for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, ], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
- } else {
- for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ colsel <- if (nb.block == 1) 1:totalcol else 1:nbcol
+ for (i in 1:N) {
+ cat(names(x)[i], file = zz)
+ cat(SEQ[i, colsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
- ## Write the other blocks
+ ## Write eventually the other blocks
if (nb.block > 1) {
for (k in 2:nb.block) {
- if (blockseparation) cat(blocksep, file = zz)
- if (k == nb.block) {
- for (i in 1:N) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
- } else {
- for (i in 1:N) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ cat(blocksep, file = zz)
+ endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
+ for (i in 1:N) {
+ cat(indent, file = zz)
+ cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
+
}
if (format == "sequential") {
if (nb.block == 1) {
for (i in 1:N) {
- cat(names(x)[i], file = zz)
- cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ cat(names(x)[i], file = zz)
+ cat(SEQ[i, ], sep = colsep, file = zz)
+ cat("\n", file = zz)
+ }
} else {
for (i in 1:N) {
cat(names(x)[i], file = zz)
cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
cat("\n", file = zz)
for (k in 2:nb.block) {
- if (k == nb.block) {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- } else {
- cat(indent, file = zz)
- cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
+ cat(indent, file = zz)
+ cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
for (i in 1:N) {
cat(">", names(x)[i], file = zz)
cat("\n", file = zz)
- X <- paste(x[[i]], collapse= "")
+ X <- paste(x[[i]], collapse = "")
S <- length(x[[i]])
- if (nbcol < 0) {
- nb.block <- 1
- nbcol <- totalcol <- ceiling(S / colw)
- } else {
- totalcol <- ceiling(S / colw)
- nb.block <- ceiling(totalcol / nbcol)
- }
+ totalcol <- ceiling(S/colw)
+ if (nbcol < 0) nbcol <- totalcol
+ nb.lines <- ceiling(totalcol/nbcol)
SEQ <- character(totalcol)
- for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
- for (k in 1:nb.block) {
- if (k == nb.block) {
- cat(indent, file = zz)
- cat(SEQ[(1 + (k - 1)*nbcol):length(SEQ)], sep = colsep, file = zz)
- cat("\n", file = zz)
- } else {
- cat(indent, file = zz)
- cat(SEQ[(1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
- cat("\n", file = zz)
- }
+ for (j in 1:totalcol)
+ SEQ[j] <- substr(X, 1 + (j - 1) * colw, colw + (j - 1) * colw)
+ for (k in 1:nb.lines) {
+ endsel <-
+ if (k == nb.lines) length(SEQ) else nbcol + (k - 1)*nbcol
+ cat(indent, file = zz)
+ cat(SEQ[(1 + (k - 1)*nbcol):endsel], sep = colsep, file = zz)
+ cat("\n", file = zz)
}
}
}
- close(zz)
}