1 ## read.tree.R (2012-09-14)
3 ## Read Tree Files in Parenthetic Format
5 ## Copyright 2002-2012 Emmanuel Paradis, Daniel Lawson and Klaus Schliep
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 tree.build <- function(tp)
12 add.internal <- function() {
13 edge[j, 1] <<- current.node
14 edge[j, 2] <<- current.node <<- node <<- node + 1L
15 index[node] <<- j # set index
18 add.terminal <- function() {
19 edge[j, 1] <<- current.node
21 index[tip] <<- j # set index
22 X <- unlist(strsplit(tpc[k], ":"))
23 tip.label[tip] <<- X[1]
24 edge.length[j] <<- as.numeric(X[2])
29 go.down <- function() {
30 l <- index[current.node]
31 X <- unlist(strsplit(tpc[k], ":"))
32 node.label[current.node - nb.tip] <<- X[1]
33 edge.length[l] <<- as.numeric(X[2])
35 current.node <<- edge[l, 1]
37 if (!length(grep(",", tp))) {
38 obj <- list(edge = matrix(c(2L, 1L), 1, 2))
39 tp <- unlist(strsplit(tp, "[\\(\\):;]"))
40 obj$edge.length <- as.numeric(tp[3])
42 obj$tip.label <- tp[2]
43 if (tp[4] != "") obj$node.label <- tp[4]
48 tpc <- unlist(strsplit(tp, "[\\(\\),;]"))
49 tpc <- tpc[nzchar(tpc)]
50 ## the following 2 lines are (slightly) faster than using gsub()
51 tsp <- unlist(strsplit(tp, NULL))
52 skeleton <- tsp[tsp %in% c("(", ")", ",", ";")]
53 nsk <- length(skeleton)
54 nb.node <- sum(skeleton == ")")
55 nb.tip <- sum(skeleton == ",") + 1
56 ## We will assume there is an edge at the root;
57 ## if so, it will be removed and put into a vector
58 nb.edge <- nb.node + nb.tip
59 node.label <- character(nb.node)
60 tip.label <- character(nb.tip)
62 edge.length <- numeric(nb.edge)
63 edge <- matrix(0L, nb.edge, 2)
64 current.node <- node <- as.integer(nb.tip + 1) # node number
65 edge[nb.edge, 2] <- node
66 index <- numeric(nb.edge + 1) # hash index to avoid which
67 index[node] <- nb.edge
69 ## j: index of the line number of edge
70 ## k: index of the line number of tpc
75 if (skeleton[i] == "(") add.internal() # add an internal branch (on top)
76 if (skeleton[i] == ",") {
77 if (skeleton[i - 1] != ")") add.terminal() # add a terminal branch
79 if (skeleton[i] == ")") {
80 if (skeleton[i - 1] == ",") { # add a terminal branch and go down one level
84 if (skeleton[i - 1] == ")") go.down() # go down one level
88 edge <- edge[-nb.edge, ]
89 obj <- list(edge = edge, Nnode = nb.node, tip.label = tip.label)
90 root.edge <- edge.length[nb.edge]
91 edge.length <- edge.length[-nb.edge]
92 if (!all(is.na(edge.length))) # added 2005-08-18
93 obj$edge.length <- edge.length
94 if (is.na(node.label[1])) node.label[1] <- ""
95 if (any(nzchar(node.label))) obj$node.label <- node.label
96 if (!is.na(root.edge)) obj$root.edge <- root.edge
98 attr(obj, "order") <- "cladewise"
102 read.tree <- function(file = "", text = NULL, tree.names = NULL, skip = 0,
103 comment.char = "#", keep.multi = FALSE, ...)
105 unname <- function(treetext) {
106 nc <- nchar(treetext)
108 while (substr(treetext, tstart, tstart) != "(" && tstart <= nc)
111 return(c(substr(treetext, 1, tstart - 1),
112 substr(treetext, tstart, nc)))
113 return(c("", treetext))
115 if (!is.null(text)) {
116 if (!is.character(text))
117 stop("argument `text' must be of mode character")
120 tree <- scan(file = file, what = "", sep = "\n", quiet = TRUE,
121 skip = skip, comment.char = comment.char, ...)
123 ## Suggestion from Eric Durand and Nicolas Bortolussi (added 2005-08-17):
124 if (identical(tree, character(0))) {
125 warning("empty character string.")
128 tree <- gsub("[ \t]", "", tree)
129 tree <- unlist(strsplit(tree, NULL))
130 y <- which(tree == ";")
132 x <- c(1, y[-Ntree] + 1)
133 ## Suggestion from Olivier François (added 2006-07-15):
134 if (is.na(y[1])) return(NULL)
135 STRING <- character(Ntree)
137 STRING[i] <- paste(tree[x[i]:y[i]], sep = "", collapse = "")
139 tmp <- unlist(lapply(STRING, unname))
140 tmpnames <- tmp[c(TRUE, FALSE)]
141 STRING <- tmp[c(FALSE, TRUE)]
142 if (is.null(tree.names) && any(nzchar(tmpnames)))
143 tree.names <- tmpnames
145 colon <- grep(":", STRING)
146 if (!length(colon)) {
147 obj <- lapply(STRING, clado.build)
148 } else if (length(colon) == Ntree) {
149 obj <- lapply(STRING, tree.build)
151 obj <- vector("list", Ntree)
152 obj[colon] <- lapply(STRING[colon], tree.build)
153 nocolon <- (1:Ntree)[!1:Ntree %in% colon]
154 obj[nocolon] <- lapply(STRING[nocolon], clado.build)
157 ## Check here that the root edge is not incorrectly represented
158 ## in the object of class "phylo" by simply checking that there
159 ## is a bifurcation at the root
160 ROOT <- length(obj[[i]]$tip.label) + 1
161 if(sum(obj[[i]]$edge[, 1] == ROOT) == 1 && dim(obj[[i]]$edge)[1] > 1)
162 stop(paste("The tree has apparently singleton node(s): cannot read tree file.\n Reading Newick file aborted at tree no.", i))
164 if (Ntree == 1 && !keep.multi) obj <- obj[[1]] else {
165 if (!is.null(tree.names)) names(obj) <- tree.names
166 class(obj) <- "multiPhylo"