- start <- c(start, length(X) + 1) # this avoids the following to crash when `i = n'
- for (i in 1:n)
- obj[[i]] <- getNucleotide(X[(start[i] + 1):(start[i + 1] - 1)])
- }
- if (format == "clustal") {
- X <- X[-1]
- ## find where the 1st sequence starts
- start.seq <- regexpr(pat.base, X[1])[1]
- ## find the lines with *********....
- nspaces <- paste("^ {", start.seq - 1, "}", sep = "", collapse = "")
- stars <- grep(nspaces, X)
- ## we now know how many sequences in the file:
- n <- stars[1] - 1
- ## get the sequence names in the same way than "interleaved":
- if (is.null(seq.names))
- seq.names <- getTaxaNames(substr(X[1:n], 1, start.seq - 1))
- ## need to remove the sequence names before getting the sequences:
- X <- substr(X, start.seq, nchar(X))
- nl <- length(X)
- ## find the length of the 1st sequence:
- tmp <- getNucleotide(X[seq(1, nl, n + 1)])
- s <- length(tmp)
- obj <- matrix("", n, s)
- obj[1, ] <- tmp
- for (i in 2:n)
- obj[i, ] <- getNucleotide(X[seq(i, nl, n + 1)])
+ switch(format,
+ "interleaved" = {
+ start.seq <- findFirstNucleotide(X[1])
+ one2n <- 1:n
+ taxa <- getTaxaNames(substr(X[one2n], 1, start.seq - 1))
+ X[one2n] <- substr(X[one2n], start.seq, nchar(X[one2n]))
+ nl <- length(X)
+ for (i in one2n)
+ obj[i, ] <- getNucleotide(X[seq(i, nl, n)])
+ },
+ "sequential" = {
+ taxa <- character(n)
+ j <- 1L # line number
+ for (i in 1:n) {
+ start.seq <- findFirstNucleotide(X[j])
+ taxa[i] <- getTaxaNames(substr(X[j], 1, start.seq - 1))
+ sequ <- getNucleotide(substr(X[j], start.seq, nchar(X[j])))
+ j <- j + 1L
+ while (length(sequ) < s) {
+ sequ <- c(sequ, getNucleotide(X[j]))
+ j <- j + 1L
+ }
+ obj[i, ] <- sequ
+ }
+ taxa <- getTaxaNames(taxa)
+ },
+ "clustal" = {
+ X <- X[-1] # drop the line with "Clustal bla bla..."
+ ## find where the 1st sequence starts
+ start.seq <- findFirstNucleotide(X[1])
+ ## find the lines with *********....
+ nspaces <- paste("^ {", start.seq - 1, "}", sep = "", collapse = "")
+ stars <- grep(nspaces, X)
+ ## we now know how many sequences in the file:
+ n <- stars[1] - 1
+ taxa <- getTaxaNames(substr(X[1:n], 1, start.seq - 1))
+ ## need to remove the sequence names before getting the sequences:
+ X <- substr(X, start.seq, nchar(X))
+ nl <- length(X)
+ ## find the length of the 1st sequence:
+ tmp <- getNucleotide(X[seq(1, nl, n + 1)])
+ s <- length(tmp)
+ obj <- matrix("", n, s)
+ obj[1, ] <- tmp
+ for (i in 2:n)
+ obj[i, ] <- getNucleotide(X[seq(i, nl, n + 1)])
+ })