1 ## read.tree.R (2009-03-09)
3 ## Read Tree Files in Parenthetic Format
5 ## Copyright 2002-2009 Emmanuel Paradis and Daniel Lawson
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 tree.build <- function(tp)
12 add.internal <- function() {
13 edge[j, 1] <<- current.node
14 edge[j, 2] <<- current.node <<- node <<- node + 1L
17 add.terminal <- function() {
18 edge[j, 1] <<- current.node
20 X <- unlist(strsplit(tpc[k], ":"))
21 tip.label[tip] <<- X[1]
22 edge.length[j] <<- as.numeric(X[2])
27 go.down <- function() {
28 l <- which(edge[, 2] == current.node)
29 X <- unlist(strsplit(tpc[k], ":"))
30 node.label[current.node - nb.tip] <<- X[1]
31 edge.length[l] <<- as.numeric(X[2])
33 current.node <<- edge[l, 1]
35 if (!length(grep(",", tp))) {
36 obj <- list(edge = matrix(c(2L, 1L), 1, 2))
37 tp <- unlist(strsplit(tp, "[\\(\\):;]"))
38 obj$edge.length <- as.numeric(tp[3])
40 obj$tip.label <- tp[2]
41 if (length(tp) == 4) obj$node.label <- tp[4]
46 tpc <- unlist(strsplit(tp, "[\\(\\),;]"))
47 tpc <- tpc[nzchar(tpc)]
48 ## the following 2 lines are (slightly) faster than using gsub()
49 tsp <- unlist(strsplit(tp, NULL))
50 skeleton <- tsp[tsp %in% c("(", ")", ",", ";")]
51 nsk <- length(skeleton)
52 nb.node <- sum(skeleton == ")")
53 nb.tip <- sum(skeleton == ",") + 1
54 ## We will assume there is an edge at the root;
55 ## if so, it will be removed and put into a vector
56 nb.edge <- nb.node + nb.tip
57 node.label <- character(nb.node)
58 tip.label <- character(nb.tip)
60 edge.length <- numeric(nb.edge)
61 edge <- matrix(0L, nb.edge, 2)
62 current.node <- node <- as.integer(nb.tip + 1) # node number
63 edge[nb.edge, 2] <- node #
65 ## j: index of the line number of edge
66 ## k: index of the line number of tpc
71 if (skeleton[i] == "(") add.internal() # add an internal branch (on top)
72 if (skeleton[i] == ",") {
73 if (skeleton[i - 1] != ")") add.terminal() # add a terminal branch
75 if (skeleton[i] == ")") {
76 if (skeleton[i - 1] == ",") { # add a terminal branch and go down one level
80 if (skeleton[i - 1] == ")") go.down() # go down one level
84 edge <- edge[-nb.edge, ]
85 obj <- list(edge = edge, Nnode = nb.node, tip.label = tip.label)
86 root.edge <- edge.length[nb.edge]
87 edge.length <- edge.length[-nb.edge]
88 if (!all(is.na(edge.length))) # added 2005-08-18
89 obj$edge.length <- edge.length
90 if (is.na(node.label[1])) node.label[1] <- ""
91 if (any(nzchar(node.label))) obj$node.label <- node.label
92 if (!is.na(root.edge)) obj$root.edge <- root.edge
97 read.tree <- function(file = "", text = NULL, tree.names = NULL, skip = 0,
98 comment.char = "#", keep.multi = FALSE, ...)
100 unname <- function(treetext) {
102 while (substr(treetext, tstart, tstart) != "(" && tstart <= nchar(treetext))
105 return(c(substr(treetext, 1, tstart - 1),
106 substr(treetext, tstart, nchar(treetext))))
107 return(c("", treetext))
109 if (!is.null(text)) {
110 if (!is.character(text))
111 stop("argument `text' must be of mode character")
114 tree <- scan(file = file, what = "", sep = "\n", quiet = TRUE,
115 skip = skip, comment.char = comment.char, ...)
117 tmp <- lapply(tree, unname)
118 tmpnames <- sapply(tmp, function(x) x[1])
119 tree <- sapply(tmp, function(x) x[2])
120 if (is.null(tree.names) && any(nzchar(tmpnames))) tree.names <- tmpnames
121 ## Suggestion from Eric Durand and Nicolas Bortolussi (added 2005-08-17):
122 if (identical(tree, character(0))) {
123 warning("empty character string.")
126 tree <- gsub("[ \t]", "", tree)
127 tree <- unlist(strsplit(tree, NULL))
128 y <- which(tree == ";")
130 x <- c(1, y[-Ntree] + 1)
131 ## Suggestion from Olivier François (added 2006-07-15):
132 if (is.na(y[1])) return(NULL)
133 STRING <- character(Ntree)
135 STRING[i] <- paste(tree[x[i]:y[i]], sep = "", collapse = "")
136 colon <- grep(":", STRING)
137 if (!length(colon)) {
138 obj <- lapply(STRING, clado.build)
139 } else if (length(colon) == Ntree) {
140 obj <- lapply(STRING, tree.build)
142 obj <- vector("list", Ntree)
143 obj[colon] <- lapply(STRING[colon], tree.build)
144 nocolon <- (1:Ntree)[!1:Ntree %in% colon]
145 obj[nocolon] <- lapply(STRING[nocolon], clado.build)
148 ## Check here that the root edge is not incorrectly represented
149 ## in the object of class "phylo" by simply checking that there
150 ## is a bifurcation at the root
151 ROOT <- length(obj[[i]]$tip.label) + 1
152 if(sum(obj[[i]]$edge[, 1] == ROOT) == 1 && dim(obj[[i]]$edge)[1] > 1)
153 stop(paste("There is apparently two root edges in your file: cannot read tree file.\n Reading Newick file aborted at tree no.", i, sep = ""))
155 if (Ntree == 1 && !keep.multi) obj <- obj[[1]] else {
156 if (!is.null(tree.names)) names(obj) <- tree.names
157 class(obj) <- "multiPhylo"