1 ## write.dna.R (2009-05-10)
3 ## Write DNA Sequences in a File
5 ## Copyright 2003-2009 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 write.dna <- function(x, file, format = "interleaved", append = FALSE,
11 nbcol = 6, colsep = " ", colw = 10, indent = NULL,
14 format <- match.arg(format, c("interleaved", "sequential", "fasta"))
15 phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
16 if (inherits(x, "DNAbin")) x <- as.character(x)
22 xx <- vector("list", N)
23 for (i in 1:N) xx[[i]] <- x[i, ]
24 names(xx) <- rownames(x)
29 S <- unique(unlist(lapply(x, length)))
30 if (length(S) > 1) aligned <- FALSE
32 if (is.null(names(x))) names(x) <- as.character(1:N)
34 indent <- if (phylip) 10 else 0
35 if (is.numeric(indent))
36 indent <- paste(rep(" ", indent), collapse = "")
37 if (format == "interleaved") {
38 blocksep <- paste(rep("\n", blocksep), collapse = "")
39 if (nbcol < 0) format <- "sequential"
41 zz <- if (append) file(file, "a") else file(file, "w")
45 stop("sequences must have the same length for
46 interleaved or sequential format.")
47 cat(N, " ", S, "\n", sep = "", file = zz)
50 nbcol <- totalcol <- ceiling(S/colw)
52 nb.block <- ceiling(S/(colw * nbcol))
53 totalcol <- ceiling(S/colw)
55 ## Prepare the sequences in a matrix whose elements are
56 ## strings with `colw' characters.
57 SEQ <- matrix("", N, totalcol)
59 X <- paste(x[[i]], collapse = "")
61 SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
63 ## Prepare the names so that they all have the same nb of chars
64 max.nc <- max(nchar(names(x)))
65 ## in case all names are 10 char long or less, sequences are
66 ## always started on the 11th column of the file
67 if (max.nc < 11) max.nc <- 9
68 fmt <- paste("%-", max.nc + 1, "s", sep = "")
69 names(x) <- sprintf(fmt, names(x))
71 if (format == "interleaved") {
72 ## Write the first block with the taxon names
73 colsel <- if (nb.block == 1) 1:totalcol else 1:nbcol
75 cat(names(x)[i], file = zz)
76 cat(SEQ[i, colsel], sep = colsep, file = zz)
79 ## Write eventually the other blocks
81 for (k in 2:nb.block) {
82 cat(blocksep, file = zz)
83 endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
85 cat(indent, file = zz)
86 cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
93 if (format == "sequential") {
96 cat(names(x)[i], file = zz)
97 cat(SEQ[i, ], sep = colsep, file = zz)
102 cat(names(x)[i], file = zz)
103 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
105 for (k in 2:nb.block) {
106 endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol
107 cat(indent, file = zz)
108 cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz)
114 if (format == "fasta") {
116 cat(">", names(x)[i], file = zz)
118 X <- paste(x[[i]], collapse = "")
120 totalcol <- ceiling(S/colw)
121 if (nbcol < 0) nbcol <- totalcol
122 nb.lines <- ceiling(totalcol/nbcol)
123 SEQ <- character(totalcol)
124 for (j in 1:totalcol)
125 SEQ[j] <- substr(X, 1 + (j - 1) * colw, colw + (j - 1) * colw)
126 for (k in 1:nb.lines) {
128 if (k == nb.lines) length(SEQ) else nbcol + (k - 1)*nbcol
129 cat(indent, file = zz)
130 cat(SEQ[(1 + (k - 1)*nbcol):endsel], sep = colsep, file = zz)