X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fread.nexus.R;h=642bee7d15c3dcc6c2066a138f7f4e05cfa1534a;hb=1ad48c7a70983375138a6500372db588c8a3a134;hp=abcd1314b8d3b7b91c7449a580446f453577c7b7;hpb=21eb56120c84786502f24ff9c27b39d5badfe1f7;p=ape.git diff --git a/R/read.nexus.R b/R/read.nexus.R index abcd131..642bee7 100644 --- a/R/read.nexus.R +++ b/R/read.nexus.R @@ -1,28 +1,20 @@ -## read.nexus.R (2009-11-21) +## read.nexus.R (2011-03-26) ## Read Tree File in Nexus Format -## Copyright 2003-2009 Emmanuel Paradis +## Copyright 2003-2011 Emmanuel Paradis and 2010 Klaus Schliep ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .treeBuildWithTokens <- function(x) { - ## remove potential node labels; see ?read.nexus for justification - node.label <- gsub("[:;].*$", "", strsplit(x, ")")[[1]][-1]) - has.node.labels <- FALSE - if (any(node.label != "")) { - x <- gsub(")[^:]*:", "):", x) - x <- gsub(")[^:]*;", ");", x) # if there's no root edge - has.node.labels <- TRUE - } phy <- .Call("treeBuildWithTokens", x, PACKAGE = "ape") dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) - nms <- c("edge", "edge.length", "Nnode", "root.edge") - if (length(phy) == 3) nms <- nms[-4] + nms <- c("edge", "edge.length", "Nnode", "node.label", "root.edge") + if (length(phy) == 4) nms <- nms[-5] names(phy) <- nms - if (has.node.labels) phy$node.label <- node.label + if (all(phy$node.label == "")) phy$node.label <- NULL class(phy) <- "phylo" phy } @@ -33,18 +25,20 @@ clado.build <- function(tp) edge[j, 1] <<- current.node node <<- node + 1 edge[j, 2] <<- current.node <<- node + index[node] <<- j # set index j <<- j + 1 } add.terminal <- function() { edge[j, 1] <<- current.node edge[j, 2] <<- tip + index[tip] <<- j # set index tip.label[tip] <<- tpc[k] k <<- k + 1 tip <<- tip + 1 j <<- j + 1 } go.down <- function() { - l <- which(edge[, 2] == current.node) + l <- index[current.node] node.label[current.node - nb.tip] <<- tpc[k] k <<- k + 1 current.node <<- edge[l, 1] @@ -77,11 +71,12 @@ clado.build <- function(tp) edge[nb.edge, 1] <- 0 # see comment above edge[nb.edge, 2] <- node # + index <- numeric(nb.edge + 1) + index[node] <- nb.edge ## j: index of the line number of edge ## k: index of the line number of tpc ## tip: tip number j <- k <- tip <- 1 - for (i in 2:nsk) { if (skeleton[i] == "(") add.internal() # add an internal branch (on top) if (skeleton[i] == ",") { @@ -95,7 +90,6 @@ clado.build <- function(tp) if (skeleton[i - 1] == ")") go.down() # go down one level } } -# if(node.label[1] == "NA") node.label[1] <- "" edge <- edge[-nb.edge, ] obj <- list(edge = edge, tip.label = tip.label, Nnode = nb.node, node.label = node.label) @@ -160,12 +154,13 @@ read.nexus <- function(file, tree.names = NULL) end <- endblock[endblock > i1][1] - 1 tree <- X[start:end] rm(X) - tree <- gsub("^.*= *", "", tree) +### tree <- gsub("^.*= *", "", tree) ## check whether there are empty lines from the above manips: tree <- tree[tree != ""] semico <- grep(";", tree) - Ntree <- length(semico) + Ntree <- length(semico) # provisional -- some ";" may actually mark end of commands ## are some trees on several lines? + ## -- this actually 'packs' all characters ending with a ";" in a single string -- if (Ntree == 1 && length(tree) > 1) STRING <- paste(tree, collapse = "") else { if (any(diff(semico) != 1)) { STRING <- character(Ntree) @@ -181,7 +176,14 @@ read.nexus <- function(file, tree.names = NULL) } else STRING <- tree } rm(tree) - STRING <- gsub(" ", "", STRING) + ## exclude the possible command lines ending with ";": + STRING <- STRING[grep("^[[:blank:]]*tree.*= *", STRING, ignore.case = TRUE)] + Ntree <- length(STRING) # update Ntree + ## get the tree names: + nms.trees <- sub(" *= *.*", "", STRING) # only the first occurence of "=" + nms.trees <- sub("^ *tree *", "", nms.trees, ignore.case = TRUE) + STRING <- sub("^.*= *", "", STRING) # delete title and 'TREE' command with 'sub' + STRING <- gsub(" ", "", STRING) # delete all white spaces colon <- grep(":", STRING) if (!length(colon)) { trees <- lapply(STRING, clado.build) @@ -220,7 +222,7 @@ read.nexus <- function(file, tree.names = NULL) if (!translation) n <- length(tr$tip.label) ROOT <- n + 1 if (sum(tr$edge[, 1] == ROOT) == 1 && dim(tr$edge)[1] > 1) { - stop(paste("There is apparently two root edges in your file: cannot read tree file.\n Reading NEXUS file aborted at tree no.", i, sep = "")) + stop(paste("The tree has apparently singleton node(s): cannot read tree file.\n Reading NEXUS file aborted at tree no.", i, sep = "")) } } if (Ntree == 1) { @@ -243,6 +245,7 @@ read.nexus <- function(file, tree.names = NULL) } } class(trees) <- "multiPhylo" + if (!all(nms.trees == "")) names(trees) <- nms.trees } if (length(grep("[\\/]", file)) == 1) if (!file.exists(file)) # suggestion by Francois Michonneau