1 ## read.GenBank.R (2007-06-27)
3 ## Read DNA Sequences from GenBank via Internet
5 ## Copyright 2002-2007 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 read.GenBank <- function(access.nb, seq.names = access.nb,
11 species.names = TRUE, as.character = FALSE)
13 N <- length(access.nb)
14 ## If there are more than 400 sequences, we need to break down the
15 ## requests, otherwise there is a segmentation fault.
16 nrequest <- N %/% 400 + as.logical(N %% 400)
18 for (i in 1:nrequest) {
19 a <- (i - 1) * 400 + 1
21 if (i == nrequest) b <- N
22 URL <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=",
23 paste(access.nb[a:b], collapse = ","),
24 "&rettype=gb", sep = "")
25 X <- c(X, scan(file = URL, what = "", sep = "\n", quiet = TRUE))
27 FI <- grep("^ {0,}ORIGIN", X) + 1
28 LA <- which(X == "//") - 1
32 ## remove all spaces and digits
33 tmp <- gsub("[[:digit:] ]", "", X[FI[i]:LA[i]])
34 obj[[i]] <- unlist(strsplit(tmp, NULL))
36 names(obj) <- seq.names
37 if (!as.character) obj <- as.DNAbin(obj)
40 sp <- grep("ORGANISM", X)
42 tmp[i] <- unlist(strsplit(X[sp[i]], " +ORGANISM +"))[2]
43 attr(obj, "species") <- gsub(" ", "_", tmp)