X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fwrite.dna.R;h=eeedab5732219fa09d469c9495f9ed201747080e;hb=477a8f1b7e5841202ef29d3d8af3c93acd35c043;hp=68070b8623308fa54cf386f1f2398e1266029b08;hpb=c827059eeafc8cbe41c812b26979543ab287803e;p=ape.git diff --git a/R/write.dna.R b/R/write.dna.R index 68070b8..eeedab5 100644 --- a/R/write.dna.R +++ b/R/write.dna.R @@ -1,8 +1,8 @@ -## write.dna.R (2003-12-29) +## write.dna.R (2009-05-10) ## Write DNA Sequences in a File -## Copyright 2003-2006 Emmanuel Paradis +## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -13,114 +13,100 @@ write.dna <- function(x, file, format = "interleaved", append = FALSE, { format <- match.arg(format, c("interleaved", "sequential", "fasta")) phylip <- if (format %in% c("interleaved", "sequential")) TRUE else FALSE - if (class(x) == "DNAbin") x <- as.character(x) + if (inherits(x, "DNAbin")) x <- as.character(x) + aligned <- TRUE if (is.matrix(x)) { - N <- dim(x)[1] + N <- dim(x) + S <- N[2] + N <- N[1] xx <- vector("list", N) for (i in 1:N) xx[[i]] <- x[i, ] names(xx) <- rownames(x) x <- xx rm(xx) - } else N <- length(x) + } else { + N <- length(x) + S <- unique(unlist(lapply(x, length))) + if (length(S) > 1) aligned <- FALSE + } if (is.null(names(x))) names(x) <- as.character(1:N) if (is.null(indent)) indent <- if (phylip) 10 else 0 - if (indent == "") indent <- 0 - if (is.numeric(indent)) indent <- paste(rep(" ", indent), collapse = "") + if (is.numeric(indent)) + indent <- paste(rep(" ", indent), collapse = "") if (format == "interleaved") { - if (blocksep) { - blockseparation <- TRUE - blocksep <- paste(rep("\n", blocksep), collapse = "") - } else blockseparation <- FALSE + blocksep <- paste(rep("\n", blocksep), collapse = "") if (nbcol < 0) format <- "sequential" } zz <- if (append) file(file, "a") else file(file, "w") + on.exit(close(zz)) if (phylip) { - S <- unique(unlist(lapply(x, length))) - ## check that all sequences have the same length - if (length(S) != 1) - stop("sequences must have the same length for interleaved or sequential format.") - ## truncate names if necessary - if (any(nchar(names(x)) > 10)) { - warning("at least one name was longer than 10 characters;\nthey will be truncated which may lead to some redundancy.\n") - names(x) <- substr(names(x), 1, 10) - } - ## left justify - names(x) <- sprintf("%-10s", names(x)) + if (!aligned) + stop("sequences must have the same length for + interleaved or sequential format.") cat(N, " ", S, "\n", sep = "", file = zz) if (nbcol < 0) { nb.block <- 1 - nbcol <- totalcol <- ceiling(S / colw) + nbcol <- totalcol <- ceiling(S/colw) } else { - nb.block <- ceiling(S / (colw * nbcol)) - totalcol <- ceiling(S / colw) + nb.block <- ceiling(S/(colw * nbcol)) + totalcol <- ceiling(S/colw) } ## Prepare the sequences in a matrix whose elements are ## strings with `colw' characters. - SEQ <- matrix(NA, N, totalcol) - mode(SEQ) <- "character" + SEQ <- matrix("", N, totalcol) for (i in 1:N) { - X <- paste(x[[i]], collapse= "") - for (j in 1:totalcol) SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw) + X <- paste(x[[i]], collapse = "") + for (j in 1:totalcol) + SEQ[i, j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw) } + ## Prepare the names so that they all have the same nb of chars + max.nc <- max(nchar(names(x))) + ## in case all names are 10 char long or less, sequences are + ## always started on the 11th column of the file + if (max.nc < 11) max.nc <- 9 + fmt <- paste("%-", max.nc + 1, "s", sep = "") + names(x) <- sprintf(fmt, names(x)) } if (format == "interleaved") { ## Write the first block with the taxon names - if (nb.block == 1) { - for (i in 1:N) { - cat(names(x)[i], file = zz) - cat(SEQ[i, ], sep = colsep, file = zz) - cat("\n", file = zz) - } - } else { - for (i in 1:N) { - cat(names(x)[i], file = zz) - cat(SEQ[i, 1:nbcol], sep = colsep, file = zz) - cat("\n", file = zz) - } + colsel <- if (nb.block == 1) 1:totalcol else 1:nbcol + for (i in 1:N) { + cat(names(x)[i], file = zz) + cat(SEQ[i, colsel], sep = colsep, file = zz) + cat("\n", file = zz) } - ## Write the other blocks + ## Write eventually the other blocks if (nb.block > 1) { for (k in 2:nb.block) { - if (blockseparation) cat(blocksep, file = zz) - if (k == nb.block) { - for (i in 1:N) { - cat(indent, file = zz) - cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz) - cat("\n", file = zz) - } - } else { - for (i in 1:N) { - cat(indent, file = zz) - cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz) - cat("\n", file = zz) - } + cat(blocksep, file = zz) + endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol + for (i in 1:N) { + cat(indent, file = zz) + cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz) + cat("\n", file = zz) } } } + } if (format == "sequential") { if (nb.block == 1) { for (i in 1:N) { - cat(names(x)[i], file = zz) - cat(SEQ[i, 1:nbcol], sep = colsep, file = zz) - cat("\n", file = zz) - } + cat(names(x)[i], file = zz) + cat(SEQ[i, ], sep = colsep, file = zz) + cat("\n", file = zz) + } } else { for (i in 1:N) { cat(names(x)[i], file = zz) cat(SEQ[i, 1:nbcol], sep = colsep, file = zz) cat("\n", file = zz) for (k in 2:nb.block) { - if (k == nb.block) { - cat(indent, file = zz) - cat(SEQ[i, (1 + (k - 1)*nbcol):ncol(SEQ)], sep = colsep, file = zz) - cat("\n", file = zz) - } else { - cat(indent, file = zz) - cat(SEQ[i, (1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz) - cat("\n", file = zz) - } + endcolsel <- if (k == nb.block) totalcol else nbcol + (k - 1)*nbcol + cat(indent, file = zz) + cat(SEQ[i, (1 + (k - 1)*nbcol):endcolsel], sep = colsep, file = zz) + cat("\n", file = zz) } } } @@ -129,29 +115,21 @@ write.dna <- function(x, file, format = "interleaved", append = FALSE, for (i in 1:N) { cat(">", names(x)[i], file = zz) cat("\n", file = zz) - X <- paste(x[[i]], collapse= "") + X <- paste(x[[i]], collapse = "") S <- length(x[[i]]) - if (nbcol < 0) { - nb.block <- 1 - nbcol <- totalcol <- ceiling(S / colw) - } else { - totalcol <- ceiling(S / colw) - nb.block <- ceiling(totalcol / nbcol) - } + totalcol <- ceiling(S/colw) + if (nbcol < 0) nbcol <- totalcol + nb.lines <- ceiling(totalcol/nbcol) SEQ <- character(totalcol) - for (j in 1:totalcol) SEQ[j] <- substr(X, 1 + (j - 1)*colw, colw + (j - 1)*colw) - for (k in 1:nb.block) { - if (k == nb.block) { - cat(indent, file = zz) - cat(SEQ[(1 + (k - 1)*nbcol):length(SEQ)], sep = colsep, file = zz) - cat("\n", file = zz) - } else { - cat(indent, file = zz) - cat(SEQ[(1 + (k - 1)*nbcol):(nbcol + (k - 1)*nbcol)], sep = colsep, file = zz) - cat("\n", file = zz) - } + for (j in 1:totalcol) + SEQ[j] <- substr(X, 1 + (j - 1) * colw, colw + (j - 1) * colw) + for (k in 1:nb.lines) { + endsel <- + if (k == nb.lines) length(SEQ) else nbcol + (k - 1)*nbcol + cat(indent, file = zz) + cat(SEQ[(1 + (k - 1)*nbcol):endsel], sep = colsep, file = zz) + cat("\n", file = zz) } } } - close(zz) }