X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fread.nexus.R;h=969d42c0e403c23c28bb459ecdc41dc1d2a3305c;hb=b41db3e6053bfb0e4b9000d5de678d65a1af9670;hp=54384561166cad5e3e272b36cc01d7e23a70bb3e;hpb=dd6a471eb1c239c03a082a67573603a20e5256ad;p=ape.git diff --git a/R/read.nexus.R b/R/read.nexus.R index 5438456..969d42c 100644 --- a/R/read.nexus.R +++ b/R/read.nexus.R @@ -1,20 +1,28 @@ -## read.nexus.R (2008-07-04) +## read.nexus.R (2009-11-21) ## Read Tree File in Nexus Format -## Copyright 2003-2008 Emmanuel Paradis +## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. .treeBuildWithTokens <- function(x) { + ## remove potential node labels; see ?read.nexus for justification + node.label <- gsub("[:;].*$", "", strsplit(x, ")")[[1]][-1]) + has.node.labels <- FALSE + if (any(node.label != "")) { + x <- gsub(")[^:]*:", "):", x) + x <- gsub(")[^:]*;", ");", x) # if there's no root edge + has.node.labels <- TRUE + } phy <- .Call("treeBuildWithTokens", x, PACKAGE = "ape") dim(phy[[1]]) <- c(length(phy[[1]])/2, 2) - nms <- c("edge", "edge.length", "Nnode", "node.label") - if (length(phy) == 5) nms <- c(nms, "root.edge") + nms <- c("edge", "edge.length", "Nnode", "root.edge") + if (length(phy) == 3) nms <- nms[-4] names(phy) <- nms - if (!sum(phy[[4]])) phy[[4]] <- NULL + if (has.node.labels) phy$node.label <- node.label class(phy) <- "phylo" phy } @@ -110,7 +118,7 @@ read.nexus <- function(file, tree.names = NULL) if (any(w)) { # in case all comments use at least 2 lines s <- LEFT[w] X[s] <- gsub("\\[[^]]*\\]", "", X[s]) - ## The previous regexp was quite tough to find: it makes + ## The above regexp was quite tough to find: it makes ## possible to delete series of comments on the same line: ## ...[...]xxx[...]... ## without deleting the "xxx". This regexp is in three parts: @@ -153,16 +161,25 @@ read.nexus <- function(file, tree.names = NULL) tree <- X[start:end] rm(X) tree <- gsub("^.*= *", "", tree) + ## check whether there are empty lines from the above manips: + tree <- tree[tree != ""] semico <- grep(";", tree) Ntree <- length(semico) ## are some trees on several lines? - if (any(diff(semico) != 1)) { - STRING <- character(Ntree) - s <- c(1, semico[-Ntree] + 1) - j <- mapply(":", s, semico) - for (i in 1:Ntree) - STRING[i] <- paste(tree[j[, i]], collapse = "") - } else STRING <- tree + if (Ntree == 1 && length(tree) > 1) STRING <- paste(tree, collapse = "") else { + if (any(diff(semico) != 1)) { + STRING <- character(Ntree) + s <- c(1, semico[-Ntree] + 1) + j <- mapply(":", s, semico) + if (is.list(j)) { + for (i in 1:Ntree) + STRING[i] <- paste(tree[j[[i]]], collapse = "") + } else { + for (i in 1:Ntree) + STRING[i] <- paste(tree[j[, i]], collapse = "") + } + } else STRING <- tree + } rm(tree) STRING <- gsub(" ", "", STRING) colon <- grep(":", STRING) @@ -208,14 +225,28 @@ read.nexus <- function(file, tree.names = NULL) } if (Ntree == 1) { trees <- trees[[1]] - if (translation) trees$tip.label <- TRANS[, 2] + if (translation) { + trees$tip.label <- + if (length(colon)) TRANS[, 2] else + TRANS[, 2][as.numeric(trees$tip.label)] + } } else { if (!is.null(tree.names)) names(trees) <- tree.names - if (translation) attr(trees, "TipLabel") <- TRANS[, 2] + if (translation) { + if (length(colon) == Ntree) # .treeBuildWithTokens() was used + attr(trees, "TipLabel") <- TRANS[, 2] + else { # reassign the tip labels then compress + for (i in 1:Ntree) + trees[[i]]$tip.label <- + TRANS[, 2][as.numeric(trees[[i]]$tip.label)] + trees <- .compressTipLabel(trees) + } + } class(trees) <- "multiPhylo" } if (length(grep("[\\/]", file)) == 1) - file <- paste(getwd(), file, sep = "/") + if (!file.exists(file)) # suggestion by Francois Michonneau + file <- paste(getwd(), file, sep = "/") attr(trees, "origin") <- file trees }