1 ## read.tree.R (2007-12-22)
3 ## Read Tree Files in Parenthetic Format
5 ## Copyright 2002-2007 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 tree.build <- function(tp)
12 add.internal <- function() {
13 edge[j, 1] <<- current.node
14 edge[j, 2] <<- current.node <<- node <<- node + 1
17 add.terminal <- function() {
18 edge[j, 1] <<- current.node
20 X <- unlist(strsplit(tpc[k], ":"))
21 tip.label[tip] <<- X[1]
22 edge.length[j] <<- as.numeric(X[2])
27 go.down <- function() {
28 l <- which(edge[, 2] == current.node)
29 X <- unlist(strsplit(tpc[k], ":"))
30 node.label[current.node - nb.tip] <<- X[1]
31 edge.length[l] <<- as.numeric(X[2])
33 current.node <<- edge[l, 1]
35 if (!length(grep(",", tp))) {
36 obj <- list(edge = matrix(c(2, 1), 1, 2))
37 tp <- unlist(strsplit(tp, "[\\(\\):;]"))
38 obj$edge.length <- as.numeric(tp[3])
40 obj$tip.label <- tp[2]
41 if (length(tp) == 4) obj$node.label <- tp[4]
45 tsp <- unlist(strsplit(tp, NULL))
46 tpc <- unlist(strsplit(tp, "[\\(\\),;]"))
48 skeleton <- tsp[tsp == "(" | tsp == ")" | tsp == "," | tsp == ";"]
49 nsk <- length(skeleton)
50 nb.node <- sum(skeleton == ")")
51 nb.tip <- sum(skeleton == ",") + 1
52 ## We will assume there is an edge at the root;
53 ## if so, it will be removed and put into a vector
54 nb.edge <- nb.node + nb.tip
55 node.label <- character(nb.node)
56 tip.label <- character(nb.tip)
58 edge.length <- numeric(nb.edge)
59 edge <- matrix(NA, nb.edge, 2)
60 current.node <- node <- nb.tip + 1 # node number
61 edge[nb.edge, 1] <- 0 # see comment above
62 edge[nb.edge, 2] <- node #
64 ## j: index of the line number of edge
65 ## k: index of the line number of tpc
70 if (skeleton[i] == "(") add.internal() # add an internal branch (on top)
71 if (skeleton[i] == ",") {
72 if (skeleton[i - 1] != ")") add.terminal() # add a terminal branch
74 if (skeleton[i] == ")") {
75 if (skeleton[i - 1] == ",") { # add a terminal branch and go down one level
79 if (skeleton[i - 1] == ")") go.down() # go down one level
82 if (is.na(node.label[1])) node.label[1] <- ""
83 edge <- edge[-nb.edge, ]
84 root.edge <- edge.length[nb.edge]
85 edge.length <- edge.length[-nb.edge]
86 obj <- list(edge = edge, edge.length = edge.length, Nnode = nb.node,
87 tip.label = tip.label, node.label = node.label,
88 root.edge = root.edge)
89 if (all(obj$node.label == "")) obj$node.label <- NULL
90 if (is.na(obj$root.edge)) obj$root.edge <- NULL
91 if (all(is.na(obj$edge.length))) obj$edge.length <- NULL # added 2005-08-18
96 read.tree <- function(file = "", text = NULL, tree.names = NULL,
97 skip = 0, comment.char = "#", ...)
100 if (!is.character(text))
101 stop("argument `text' must be of mode character")
104 tree <- scan(file = file, what = character(), sep = "\n", quiet = TRUE,
105 skip = skip, comment.char = comment.char, ...)
107 ## Suggestion from Eric Durand and Nicolas Bortolussi (added 2005-08-17):
108 if (identical(tree, character(0))) {
109 warning("empty character string.")
112 tree <- gsub("[ \t]", "", tree)
113 tsp <- unlist(strsplit(tree, NULL))
114 ind <- which(tsp == ";")
115 nb.tree <- length(ind)
116 x <- c(1, ind[-nb.tree] + 1)
118 ## Suggestion from Olivier François (added 2006-07-15):
119 if (is.na(y[1])) return(NULL)
121 STRING <- vector("list", nb.tree)
123 STRING[[i]] <- paste(tsp[x[i]:y[i]], sep = "", collapse = "")
125 obj <- vector("list", nb.tree)
126 for (i in 1:nb.tree) {
127 obj[[i]] <- if (length(grep(":", STRING[[i]]))) tree.build(STRING[[i]]) else clado.build(STRING[[i]])
128 ## Check here that the root edge is not incorrectly represented
129 ## in the object of class "phylo" by simply checking that there
130 ## is a bifurcation at the root
131 ROOT <- length(obj[[i]]$tip.label) + 1
132 if(sum(obj[[i]]$edge[, 1] == ROOT) == 1 && dim(obj[[i]]$edge)[1] > 1) {
133 stop(paste("There is apparently two root edges in your file: cannot read tree file.\n Reading Newick file aborted at tree no.", i, sep = ""))
136 if (nb.tree == 1) obj <- obj[[1]] else {
137 if (is.null(tree.names))
138 tree.names <- paste("tree", 1:nb.tree, sep = "")
139 names(obj) <- tree.names
140 class(obj) <- "multiPhylo"