1 ## write.dna.R (2003-12-29)
3 ## Write DNA Sequences in a File
5 ## Copyright 2003-2006 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 write.dna <- function(x, file, format = "interleaved", append = FALSE,
11 nbcol = 6, colsep = " ", colw = 10, indent = NULL,
14 format <- match.arg(format, c("interleaved", "sequential", "fasta"))
15 phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
16 if (class(x) == "DNAbin") x <- as.character(x)
19 xx <- vector("list", N)
20 for (i in 1:N) xx[[i]] <- x[i, ]
21 names(xx) <- rownames(x)
25 if (is.null(names(x))) names(x) <- as.character(1:N)
27 indent <- if (phylip) 10 else 0
28 if (indent == "") indent <- 0
29 if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "")
30 if (format == "interleaved") {
32 blockseparation <- TRUE
33 blocksep <- paste(rep("\n", blocksep), collapse = "")
34 } else blockseparation <- FALSE
35 if (nbcol < 0) format <- "sequential"
37 zz <- if (append) file(file, "a") else file(file, "w")
39 S <- unique(unlist(lapply(x, length)))
40 ## check that all sequences have the same length
42 stop("sequences must have the same length for interleaved or sequential format.")
43 ## truncate names if necessary
44 if (any(nchar(names(x)) > 10)) {
45 warning("at least one name was longer than 10 characters;\nthey will be truncated which may lead to some redundancy.\n")
46 names(x) <- substr(names(x), 1, 10)
49 names(x) <- sprintf("%-10s", names(x))
50 cat(N, " ", S, "\n", sep = "", file = zz)
53 nbcol <- totalcol <- ceiling(S / colw)
55 nb.block <- ceiling(S / (colw * nbcol))
56 totalcol <- ceiling(S / colw)
58 ## Prepare the sequences in a matrix whose elements are
59 ## strings with `colw' characters.
60 SEQ <- matrix(NA, N, totalcol)
61 mode(SEQ) <- "character"
63 X <- paste(x[[i]], collapse= "")
64 for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
67 if (format == "interleaved") {
68 ## Write the first block with the taxon names
71 cat(names(x)[i], file = zz)
72 cat(SEQ[i, ], sep = colsep, file = zz)
77 cat(names(x)[i], file = zz)
78 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
82 ## Write the other blocks
84 for (k in 2:nb.block) {
85 if (blockseparation) cat(blocksep, file = zz)
88 cat(indent, file = zz)
89 cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
94 cat(indent, file = zz)
95 cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
102 if (format == "sequential") {
105 cat(names(x)[i], file = zz)
106 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
111 cat(names(x)[i], file = zz)
112 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
114 for (k in 2:nb.block) {
116 cat(indent, file = zz)
117 cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
120 cat(indent, file = zz)
121 cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
128 if (format == "fasta") {
130 cat(">", names(x)[i], file = zz)
132 X <- paste(x[[i]], collapse= "")
136 nbcol <- totalcol <- ceiling(S / colw)
138 totalcol <- ceiling(S / colw)
139 nb.block <- ceiling(totalcol / nbcol)
141 SEQ <- character(totalcol)
142 for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
143 for (k in 1:nb.block) {
145 cat(indent, file = zz)
146 cat(SEQ[(1 + (k - 1)*nbcol):length(SEQ)], sep = colsep, file = zz)
149 cat(indent, file = zz)
150 cat(SEQ[(1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)