R/write.dna.R

   1 ## write.dna.R (2003-12-29)
   2
   3 ##   Write DNA Sequences in a File
   4
   5 ## Copyright 2003-2006 Emmanuel Paradis
   6
   7 ## This file is part of the R-package `ape'.
   8 ## See the file ../COPYING for licensing issues.
   9
  10 write.dna <- function(x, file, format = "interleaved", append = FALSE,
  11                       nbcol = 6, colsep = " ", colw = 10, indent = NULL,
  12                       blocksep = 1)
  13 {
  14     format <- match.arg(format, c("interleaved", "sequential", "fasta"))
  15     phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE
  16     if (class(x) == "DNAbin") x <- as.character(x)
  17     if (is.matrix(x)) {
  18         N <- dim(x)[1]
  19         xx <- vector("list", N)
  20         for (i in 1:N) xx[[i]] <- x[i, ]
  21         names(xx) <- rownames(x)
  22         x <- xx
  23         rm(xx)
  24     } else N <- length(x)
  25     if (is.null(names(x))) names(x) <- as.character(1:N)
  26     if (is.null(indent))
  27       indent <- if (phylip) 10 else  0
  28     if (indent == "") indent <- 0
  29     if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "")
  30     if (format == "interleaved") {
  31         if (blocksep) {
  32             blockseparation <- TRUE
  33             blocksep <- paste(rep("\n", blocksep), collapse = "")
  34         } else blockseparation <- FALSE
  35         if (nbcol < 0) format <- "sequential"
  36     }
  37     zz <- if (append) file(file, "a") else file(file, "w")
  38     if (phylip) {
  39         S <- unique(unlist(lapply(x, length)))
  40         ## check that all sequences have the same length
  41         if (length(S) != 1)
  42           stop("sequences must have the same length for interleaved or sequential format.")
  43         ## truncate names if necessary
  44         if (any(nchar(names(x)) > 10)) {
  45             warning("at least one name was longer than 10 characters;\nthey will be truncated which may lead to some redundancy.\n")
  46             names(x) <- substr(names(x), 1, 10)
  47         }
  48         ## left justify
  49         names(x) <- sprintf("%-10s", names(x))
  50         cat(N, " ", S, "\n", sep = "", file = zz)
  51         if (nbcol < 0) {
  52             nb.block <- 1
  53             nbcol <- totalcol <- ceiling(S / colw)
  54         } else {
  55             nb.block <- ceiling(S / (colw * nbcol))
  56             totalcol <- ceiling(S / colw)
  57         }
  58         ## Prepare the sequences in a matrix whose elements are
  59         ## strings with `colw' characters.
  60         SEQ <- matrix(NA, N, totalcol)
  61         mode(SEQ) <- "character"
  62         for (i in 1:N) {
  63             X <- paste(x[[i]], collapse= "")
  64             for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
  65         }
  66     }
  67     if (format == "interleaved") {
  68         ## Write the first block with the taxon names
  69         if (nb.block == 1) {
  70             for (i in 1:N) {
  71                 cat(names(x)[i], file = zz)
  72                 cat(SEQ[i, ], sep = colsep, file = zz)
  73                 cat("\n", file = zz)
  74             }
  75         } else {
  76             for (i in 1:N) {
  77                 cat(names(x)[i], file = zz)
  78                 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
  79                 cat("\n", file = zz)
  80             }
  81         }
  82         ## Write the other blocks
  83         if (nb.block > 1) {
  84             for (k in 2:nb.block) {
  85                 if (blockseparation) cat(blocksep, file = zz)
  86                 if (k == nb.block) {
  87                     for (i in 1:N) {
  88                         cat(indent, file = zz)
  89                         cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
  90                         cat("\n", file = zz)
  91                     }
  92                 } else {
  93                     for (i in 1:N) {
  94                         cat(indent, file = zz)
  95                         cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
  96                         cat("\n", file = zz)
  97                     }
  98                 }
  99             }
 100         }
 101     }
 102     if (format == "sequential") {
 103         if (nb.block == 1) {
 104             for (i in 1:N) {
 105                cat(names(x)[i], file = zz)
 106                cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
 107                cat("\n", file = zz)
 108            }
 109         } else {
 110             for (i in 1:N) {
 111                 cat(names(x)[i], file = zz)
 112                 cat(SEQ[i, 1:nbcol], sep = colsep, file = zz)
 113                 cat("\n", file = zz)
 114                 for (k in 2:nb.block) {
 115                     if (k == nb.block) {
 116                         cat(indent, file = zz)
 117                         cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz)
 118                         cat("\n", file = zz)
 119                     } else {
 120                         cat(indent, file = zz)
 121                         cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
 122                         cat("\n", file = zz)
 123                     }
 124                 }
 125             }
 126         }
 127     }
 128     if (format == "fasta") {
 129         for (i in 1:N) {
 130             cat(">", names(x)[i], file = zz)
 131             cat("\n", file = zz)
 132             X <- paste(x[[i]], collapse= "")
 133             S <- length(x[[i]])
 134             if (nbcol < 0) {
 135                 nb.block <- 1
 136                 nbcol <- totalcol <- ceiling(S / colw)
 137             } else {
 138                 totalcol <- ceiling(S / colw)
 139                 nb.block <- ceiling(totalcol / nbcol)
 140             }
 141             SEQ <- character(totalcol)
 142             for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw)
 143             for (k in 1:nb.block) {
 144                 if (k == nb.block) {
 145                     cat(indent, file = zz)
 146                     cat(SEQ[(1 + (k - 1)*nbcol):length(SEQ)], sep = colsep, file = zz)
 147                     cat("\n", file = zz)
 148                 } else {
 149                     cat(indent, file = zz)
 150                     cat(SEQ[(1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz)
 151                     cat("\n", file = zz)
 152                 }
 153             }
 154         }
 155     }
 156     close(zz)
 157 }