-## read.nexus.R (2008-02-28)
+## read.nexus.R (2012-02-09)
## Read Tree File in Nexus Format
-## Copyright 2003-2008 Emmanuel Paradis
+## Copyright 2003-2012 Emmanuel Paradis and 2010 Klaus Schliep
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
{
phy <- .Call("treeBuildWithTokens", x, PACKAGE = "ape")
dim(phy[[1]]) <- c(length(phy[[1]])/2, 2)
- nms <- c("edge", "edge.length", "Nnode", "node.label")
- if (length(phy) == 5) nms <- c(nms, "root.edge")
+ nms <- c("edge", "edge.length", "Nnode", "node.label", "root.edge")
+ if (length(phy) == 4) nms <- nms[-5]
names(phy) <- nms
- if (!sum(phy[[4]])) phy[[4]] <- NULL
+ if (all(phy$node.label == "")) phy$node.label <- NULL
class(phy) <- "phylo"
phy
}
edge[j, 1] <<- current.node
node <<- node + 1
edge[j, 2] <<- current.node <<- node
+ index[node] <<- j # set index
j <<- j + 1
}
add.terminal <- function() {
edge[j, 1] <<- current.node
edge[j, 2] <<- tip
+ index[tip] <<- j # set index
tip.label[tip] <<- tpc[k]
k <<- k + 1
tip <<- tip + 1
j <<- j + 1
}
go.down <- function() {
- l <- which(edge[, 2] == current.node)
+ l <- index[current.node]
node.label[current.node - nb.tip] <<- tpc[k]
k <<- k + 1
current.node <<- edge[l, 1]
obj <- list(edge = matrix(c(2, 1), 1, 2), Nnode = 1)
tp <- unlist(strsplit(tp, "[\\(\\);]"))
obj$tip.label <- tp[2]
- if (length(tp) == 3) obj$node.label <- tp[3]
+ if (tp[3] != "") obj$node.label <- tp[3]
class(obj) <- "phylo"
return(obj)
}
edge[nb.edge, 1] <- 0 # see comment above
edge[nb.edge, 2] <- node #
+ index <- numeric(nb.edge + 1)
+ index[node] <- nb.edge
## j: index of the line number of edge
## k: index of the line number of tpc
## tip: tip number
j <- k <- tip <- 1
-
for (i in 2:nsk) {
if (skeleton[i] == "(") add.internal() # add an internal branch (on top)
if (skeleton[i] == ",") {
if (skeleton[i - 1] == ")") go.down() # go down one level
}
}
-# if(node.label[1] == "NA") node.label[1] <- ""
edge <- edge[-nb.edge, ]
obj <- list(edge = edge, tip.label = tip.label,
Nnode = nb.node, node.label = node.label)
w <- LEFT == RIGHT
if (any(w)) { # in case all comments use at least 2 lines
s <- LEFT[w]
- X[s] <- gsub("\\[.*\\]", "", X[s])
+ X[s] <- gsub("\\[[^]]*\\]", "", X[s])
+ ## The above regexp was quite tough to find: it makes
+ ## possible to delete series of comments on the same line:
+ ## ...[...]xxx[...]...
+ ## without deleting the "xxx". This regexp is in three parts:
+ ## \\[ [^]]* \\]
+ ## where [^]]* means "any character, except "]", repeated zero
+ ## or more times" (note that the ']' is not escaped here).
+ ## The previous version was:
+ ## X[s] <- gsub("\\[.*\\]", "", X[s])
+ ## which deleted the "xxx". (EP 2008-06-24)
}
w <- !w
if (any(w)) {
end <- endblock[endblock > i1][1] - 1
tree <- X[start:end]
rm(X)
- tree <- gsub("^.*= *", "", tree)
+### tree <- gsub("^.*= *", "", tree)
+ ## check whether there are empty lines from the above manips:
+ tree <- tree[tree != ""]
semico <- grep(";", tree)
- Ntree <- length(semico)
+ Ntree <- length(semico) # provisional -- some ";" may actually mark end of commands
## are some trees on several lines?
- if (any(diff(semico) != 1)) {
- STRING <- character(Ntree)
- s <- c(1, semico[-Ntree] + 1)
- j <- mapply(":", s, semico)
- for (i in 1:Ntree)
- STRING[i] <- paste(tree[j[, i]], collapse = "")
- } else STRING <- tree
+ ## -- this actually 'packs' all characters ending with a ";" in a single string --
+ if (Ntree == 1 && length(tree) > 1) STRING <- paste(tree, collapse = "") else {
+ if (any(diff(semico) != 1)) {
+ STRING <- character(Ntree)
+ s <- c(1, semico[-Ntree] + 1)
+ j <- mapply(":", s, semico)
+ if (is.list(j)) {
+ for (i in 1:Ntree)
+ STRING[i] <- paste(tree[j[[i]]], collapse = "")
+ } else {
+ for (i in 1:Ntree)
+ STRING[i] <- paste(tree[j[, i]], collapse = "")
+ }
+ } else STRING <- tree
+ }
rm(tree)
- STRING <- gsub(" ", "", STRING)
+ ## exclude the possible command lines ending with ";":
+ STRING <- STRING[grep("^[[:blank:]]*tree.*= *", STRING, ignore.case = TRUE)]
+ Ntree <- length(STRING) # update Ntree
+ ## get the tree names:
+ nms.trees <- sub(" *= *.*", "", STRING) # only the first occurence of "="
+ nms.trees <- sub("^ *tree *", "", nms.trees, ignore.case = TRUE)
+ STRING <- sub("^.*= *", "", STRING) # delete title and 'TREE' command with 'sub'
+ STRING <- gsub(" ", "", STRING) # delete all white spaces
colon <- grep(":", STRING)
if (!length(colon)) {
trees <- lapply(STRING, clado.build)
if (!translation) n <- length(tr$tip.label)
ROOT <- n + 1
if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) {
- stop(paste("There is apparently two root edges in your file: cannot read tree file.\n Reading NEXUS file aborted at tree no.", i, sep = ""))
+ stop(paste("The tree has apparently singleton node(s): cannot read tree file.\n Reading NEXUS file aborted at tree no.", i, sep = ""))
}
}
- if (Ntree == 1) trees <- trees[[1]] else {
+ if (Ntree == 1) {
+ trees <- trees[[1]]
+ if (translation) {
+ trees$tip.label <-
+ if (length(colon)) TRANS[, 2] else
+ TRANS[, 2][as.numeric(trees$tip.label)]
+ }
+ } else {
if (!is.null(tree.names)) names(trees) <- tree.names
- if (translation) attr(trees, "TipLabel") <- TRANS[, 2]
+ if (translation) {
+ if (length(colon) == Ntree) # .treeBuildWithTokens() was used
+ attr(trees, "TipLabel") <- TRANS[, 2]
+ else { # reassign the tip labels then compress
+ for (i in 1:Ntree)
+ trees[[i]]$tip.label <-
+ TRANS[, 2][as.numeric(trees[[i]]$tip.label)]
+ trees <- .compressTipLabel(trees)
+ }
+ }
class(trees) <- "multiPhylo"
+ if (!all(nms.trees == "")) names(trees) <- nms.trees
}
- if (length(grep("[\\/]", file)) == 1)
- file <- paste(getwd(), file, sep = "/")
- attr(trees, "origin") <- file
trees
}