1 "read.nexus.data" <- function (file)
3 # Simplified NEXUS data parser.
5 # Version: 09/13/2006 01:01:59 PM CEST
6 # (modified by EP 2011-06-01)
8 # By: Johan Nylander, nylander @ scs.fsu.edu
10 # WARNING: This is parser reads a restricted nexus format,
11 # see README for details.
13 # Argument (x) is a nexus formatted data file.
15 # Returns (Value) a list of data sequences each made of a single
16 # vector of mode character where each element is a character.
18 # TODO: Error checking, gap/missing, find.datatype, etc.
19 #------------------------------------------------------------------
21 "find.ntax" <- function (x)
23 for (i in 1:NROW(x)) {
24 if(any(f <- grep("\\bntax", x[i], ignore.case = TRUE))) {
25 ntax <- as.numeric(sub("(.+?)(ntax\\s*\\=\\s*)(\\d+)(.+)",
26 "\\3", x[i], perl = TRUE, ignore.case = TRUE))
33 "find.nchar" <- function (x)
35 for (i in 1:NROW(x)) {
36 if(any(f <- grep("\\bnchar", x[i], ignore.case = TRUE))) {
37 nchar <- as.numeric(sub("(.+?)(nchar\\s*\\=\\s*)(\\d+)(.+)",
38 "\\3", x[i], perl = TRUE, ignore.case = TRUE))
45 "find.matrix.line" <- function (x)
47 for (i in 1:NROW(x)) {
48 if(any(f <- grep("\\bmatrix\\b", x[i], ignore.case = TRUE))) {
49 matrix.line <- as.numeric(i)
56 "trim.whitespace" <- function (x)
61 "trim.semicolon" <- function (x)
66 X <- scan(file = file, what = character(), sep = "\n",
67 quiet = TRUE, comment.char = "[", strip.white = TRUE)
69 nchar <- find.nchar(X)
70 matrix.line <- find.matrix.line(X)
71 start.reading <- matrix.line + 1
79 for (j in start.reading:NROW(X)) {
80 Xj <- trim.semicolon(X[j])
84 if(any(jtmp <- grep("\\bend\\b", X[j], perl = TRUE, ignore.case = TRUE))) {
87 ts <- unlist(strsplit(Xj, "(?<=\\S)(\\s+)(?=\\S)", perl = TRUE))
89 stop("nexus parser does not handle spaces in sequences or taxon names (ts>2)")
92 stop("nexus parser failed to read the sequences (ts!=2)")
94 Seq <- trim.whitespace(ts[2])
95 Name <- trim.whitespace(ts[1])
96 nAME <- paste(c("\\b", Name, "\\b"), collapse = "")
97 if (any(l <- grep(nAME, names(Obj)))) {
98 tsp <- strsplit(Seq, NULL)[[1]]
99 for (k in 1:length(tsp)) {
101 Obj[[l]][p] <- tsp[k]
106 names(Obj)[i] <- Name
107 tsp <- strsplit(Seq, NULL)[[1]]
108 for (k in 1:length(tsp)) {
110 Obj[[i]][p] <- tsp[k]
114 tot.ntax <- tot.ntax + 1
115 if (tot.ntax == ntax) {
118 tot.nchar <- tot.nchar + chars.done
119 if (tot.nchar == nchar*ntax) {
120 print("ntot was more than nchar*ntax")
130 cat("ntax:",ntax,"differ from actual number of taxa in file?\n")
131 stop("nexus parser did not read names correctly (tot.ntax!=0)")
133 for (i in 1:length(Obj)) {
134 if (length(Obj[[i]]) != nchar) {
135 cat(names(Obj[i]),"has",length(Obj[[i]]),"characters\n")
136 stop("nchar differ from sequence length (length(Obj[[i]])!=nchar)")
139 Obj <- lapply(Obj, tolower)