]> git.donarmstrong.com Git - ape.git/blob - R/read.caic.R
final warp for ape 3.0-2
[ape.git] / R / read.caic.R
1 ## read.caic.R (2005-09-21)
2
3 ##   Read Tree File in CAIC Format
4
5 ## Copyright 2005 Julien Dutheil
6
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
9
10 read.caic <- function(file, brlen=NULL, skip = 0, comment.char="#", ...)
11 {
12   text <- scan(file = file, what = character(), sep="\n", skip = skip, comment.char = comment.char, ...)
13
14         # Parse the whole file:
15   n <- length(text) / 2
16   nodes <- 1:n;
17         leaf.names <- character(n)
18         patterns   <- character(n)
19         lengths    <- numeric(n)
20         for(i in 1:n)
21         {
22                 leaf.names[i] <- text[2*i]
23                 patterns[i]   <- text[2*i-1]
24                 lengths[i]    <- nchar(patterns[i])
25         }
26         # Sort all patterns if not done:
27         i <- order(patterns);
28         leaf.names <- leaf.names[i]
29         patterns   <- patterns[i]
30         lengths    <- lengths[i]
31
32         # This inner function compares two patterns:
33         test.patterns <- function(p1, p2)
34         {
35                 t1 <- strsplit(p1, split="")[[1]]
36                 t2 <- strsplit(p2, split="")[[1]]
37                 if(length(t1) == length(t2))
38                 {
39                         l <- length(t1)
40                         if(l==1) return(TRUE)
41                         return(all(t1[1:(l-1)]==t2[1:(l-1)]) & t1[l] != t2[l])
42                 }
43                 return(FALSE)
44         }
45
46         # The main loop:
47         while(length(nodes) > 1)
48         {
49                 # Recompute indexes:
50                 index <- logical(length(nodes))
51                 maxi  <- max(lengths)
52                 for(i in 1:length(nodes))
53                 {
54                         index[i] <- lengths[i] == maxi
55                 }
56                 i <- 1
57                 while(i <= length(nodes))
58                 {
59                         if(index[i])
60                         {
61                                 p <- paste("(",nodes[i],sep="")
62                                 c <- i+1
63                                 while(c <= length(nodes) && index[c] && test.patterns(patterns[i], patterns[c]))
64                                 {
65                                         p <- paste(p, nodes[c], sep=",")
66                                         c <- c+1
67                                 }
68                                 if(c-i < 2) stop("Unvalid format.")
69                                 p <- paste(p, ")", sep="")
70                                 nodes[i]   <- p
71                                 patterns[i]<- substr(patterns[i],1,nchar(patterns[i])-1)
72                                 lengths[i] <- lengths[i]-1
73                                 nodes      <- nodes   [-((i+1):(c-1))]
74                                 lengths    <- lengths [-((i+1):(c-1))]
75                                 patterns   <- patterns[-((i+1):(c-1))]
76                                 index      <- index   [-((i+1):(c-1))]
77                         }
78                         i <- i+1
79                 }
80         }
81
82         # Create a 'phylo' object and return it:
83         phy <- read.tree(text=paste(nodes[1],";", sep=""))
84         phy$tip.label <- leaf.names;
85         if(!is.null(brlen))
86         {
87                 br <- read.table(file=brlen)
88                 phy$edge.length <- br[,1]
89         }
90         return(phy)
91 }