1 "read.nexus.data" <- function (file)
3 # Simplified NEXUS data parser.
5 # Version: 09/13/2006 01:01:59 PM CEST
7 # By: Johan Nylander, nylander @ scs.fsu.edu
9 # WARNING: This is parser reads a restricted nexus format,
10 # see README for details.
12 # Argument (x) is a nexus formatted data file.
14 # Returns (Value) a list of data sequences each made of a single
15 # vector of mode character where each element is a character.
17 # TODO: Error checking, gap/missing, find.datatype, etc.
18 #------------------------------------------------------------------
20 "find.ntax" <- function (x)
22 for (i in 1:NROW(x)) {
23 if(any(f <- grep("\\bntax", x[i], ignore.case = TRUE))) {
24 ntax <- as.numeric(sub("(.+?)(ntax\\s*\\=\\s*)(\\d+)(.+)",
25 "\\3", x[i], perl = TRUE, ignore.case = TRUE))
32 "find.nchar" <- function (x)
34 for (i in 1:NROW(x)) {
35 if(any(f <- grep("\\bnchar", x[i], ignore.case = TRUE))) {
36 nchar <- as.numeric(sub("(.+?)(nchar\\s*\\=\\s*)(\\d+)(.+)",
37 "\\3", x[i], perl = TRUE, ignore.case = TRUE))
44 "find.matrix.line" <- function (x)
46 for (i in 1:NROW(x)) {
47 if(any(f <- grep("\\bmatrix\\b", x[i], ignore.case = TRUE))) {
48 matrix.line <- as.numeric(i)
55 "trim.whitespace" <- function (x)
60 "trim.semicolon" <- function (x)
65 if(file.access(file, mode = 4)) {
66 stop("file could not be found")
69 X <- scan(file = file, what = character(), sep = "\n",
70 quiet = TRUE, comment.char = "[", strip.white = TRUE)
72 nchar <- find.nchar(X)
73 matrix.line <- find.matrix.line(X)
74 start.reading <- matrix.line + 1
82 for (j in start.reading:NROW(X)) {
83 Xj <- trim.semicolon(X[j])
87 if(any(jtmp <- grep("\\bend\\b", X[j], perl = TRUE, ignore.case = TRUE))) {
90 ts <- unlist(strsplit(Xj, "(?<=\\S)(\\s+)(?=\\S)", perl = TRUE))
92 stop("nexus parser does not handle spaces in sequences or taxon names (ts>2)")
95 stop("nexus parser failed to read the sequences (ts!=2)")
97 Seq <- trim.whitespace(ts[2])
98 Name <- trim.whitespace(ts[1])
99 nAME <- paste(c("\\b", Name, "\\b"), collapse = "")
100 if (any(l <- grep(nAME, names(Obj)))) {
101 tsp <- strsplit(Seq, NULL)[[1]]
102 for (k in 1:length(tsp)) {
104 Obj[[l]][p] <- tsp[k]
109 names(Obj)[i] <- Name
110 tsp <- strsplit(Seq, NULL)[[1]]
111 for (k in 1:length(tsp)) {
113 Obj[[i]][p] <- tsp[k]
117 tot.ntax <- tot.ntax + 1
118 if (tot.ntax == ntax) {
121 tot.nchar <- tot.nchar + chars.done
122 if (tot.nchar == nchar*ntax) {
123 print("ntot was more than nchar*ntax")
133 cat("ntax:",ntax,"differ from actual number of taxa in file?\n")
134 stop("nexus parser did not read names correctly (tot.ntax!=0)")
136 for (i in 1:length(Obj)) {
137 if (length(Obj[[i]]) != nchar) {
138 cat(names(Obj[i]),"has",length(Obj[[i]]),"characters\n")
139 stop("nchar differ from sequence length (length(Obj[[i]])!=nchar)")
142 Obj <- lapply(Obj, tolower)