]> git.donarmstrong.com Git - ape.git/blob - src/newick.c
e94ded5e612ed08acf9d29d9ac9dada4c9537727
[ape.git] / src / newick.c
1 /* newick.c    2010-11-23 */
2
3 /* Copyright 2007-2008 Vincent Lefort */
4
5 /* This file is part of the R-package `ape'. */
6 /* See the file ../COPYING for licensing issues. */
7
8 #include "me.h"
9
10 int nodeCount;
11 int edgeCount;
12
13 int whiteSpace(char c)
14 {
15   if ((' ' == c) || ('\t' == c) || ('\n' == c))
16     return(1);
17   else return(0);
18 }
19
20 int leaf(node *v)
21 {
22   int count = 0;
23   if (NULL != v->parentEdge)
24     count++;
25   if (NULL != v->leftEdge)
26     count++;
27   if (NULL != v->rightEdge)
28     count++;
29   if (NULL != v->middleEdge)
30     count++;
31   if (count > 1)
32     return(0);
33   return(1);
34 }
35
36 /*decodeNewickSubtree is used to turn a string of the form
37   "(v1:d1,v2:d2,(subtree) v3:d3....vk:dk) subroot:d," into a subtree
38   rooted at subrooted, with corresponding subtrees and leaves at v1
39   through vk.  It is called recursively on subtrees*/
40
41 node *decodeNewickSubtree(char *treeString, tree *T, int *uCount)
42 {
43   node *thisNode;
44   node *centerNode;
45   double thisWeight;
46   edge *thisEdge;
47 //  char label[MAX_LABEL_LENGTH];
48   char stringWeight[MAX_LABEL_LENGTH];
49   int state;
50   int i = 0;
51   int j;
52   int left,right;
53   int parcount;
54 //  snprintf(label,14,"Default_Label\0");
55   left = right = 0;
56   parcount = 0;
57   state = ReadOpenParenthesis;
58   if('(' == treeString[0])
59     parcount++;
60   //centerNode = makeNode(label,NULL,nodeCount++);
61   centerNode = makeNode("",NULL,nodeCount++);
62   T->size++;
63   while(parcount > 0)
64     {
65       while(whiteSpace(treeString[i]))
66         i++;
67       switch(state)
68         {
69         case(ReadOpenParenthesis):
70           if('(' != treeString[0])
71             {
72               Rprintf("Error reading subtree.\n");
73               exit(0);
74             }
75           i++;
76           state = ReadSubTree;
77           break;
78         case(ReadSubTree):
79           if('(' == treeString[i])  /*if treeString[i] is a left parenthesis,
80                                       we scan down the string until we find its partner.
81                                       the relative number of '('s vs. ')'s is counted
82                                       by the variable parcount*/
83             {
84               left = i++;
85               parcount++;
86               while(parcount > 1)
87                 {
88                   while (('(' != treeString[i]) && (')' != treeString[i]))
89                     i++;  /*skip over characters which are not parentheses*/
90                   if('(' == treeString[i])
91                     parcount++;
92                   else if (')' == treeString[i])
93                     parcount--;
94                   i++;
95                 }  /*end while */
96               right = i;  /*at this point, the subtree string goes from
97                             treeString[left] to treeString[right - 1]*/
98               thisNode = decodeNewickSubtree(treeString + left,T,uCount);  /*note that this
99                                                                       step will put
100                                                                       thisNode in T*/
101               i = right;  /*having created the node for the subtree, we move
102                             to assigning the label for the new node.
103                             treeString[right] will be the start of this label */
104             } /* end if ('(' == treeString[i]) condition */
105           else
106             {
107               //thisNode = makeNode(label,NULL,nodeCount++);
108               thisNode = makeNode("",NULL,nodeCount++);
109               T->size++;
110             }
111           state = ReadLabel;
112           break;
113         case(ReadLabel):
114           left = i;  /*recall "left" is the left marker for the substring, "right" the right*/
115           if (':' == treeString[i])   /*this means an internal node?*/
116             {
117               //sprintf(thisNode->label,"I%d",(*uCount)++);
118               //snprintf(thisNode->label,MAX_LABEL_LENGTH,"I%d",(*uCount)++);
119               (*uCount)++;
120               right = i;
121             }
122           else
123             {
124               while((':' != treeString[i]) && (',' != treeString[i]) && (')' != treeString[i]))
125                 i++;
126               right = i;
127               j = 0;
128               for(i = left; i < right;i++)
129                 if(!(whiteSpace(treeString[i])))
130                   thisNode->label[j++] = treeString[i];
131               thisNode->label[j] = '\0';
132             }
133           if(':' == treeString[right])
134             state = ReadWeight;
135           else
136             {
137               state = AddEdge;
138               thisWeight = 0.0;
139             }
140           i = right + 1;
141           break;
142         case(ReadWeight):
143           left = i;
144           while
145             ((',' != treeString[i]) && (')' != treeString[i]))
146             i++;
147           right = i;
148           j = 0;
149           for(i = left; i < right; i++)
150             stringWeight[j++] = treeString[i];
151           stringWeight[j] = '\0';
152           thisWeight = atof(stringWeight);
153           state=AddEdge;
154           break;
155         case(AddEdge):
156           //thisEdge = makeEdge(label,centerNode,thisNode,thisWeight);
157           thisEdge = makeEdge("",centerNode,thisNode,thisWeight);
158           thisNode->parentEdge = thisEdge;
159           if (NULL == centerNode->leftEdge)
160             centerNode->leftEdge = thisEdge;
161           else if (NULL == centerNode->rightEdge)
162             centerNode->rightEdge = thisEdge;
163           else if (NULL == centerNode->middleEdge)
164             centerNode->middleEdge = thisEdge;
165           else
166             {
167               Rprintf("Error: node %s has too many (>3) children.\n",centerNode->label);
168               exit(0);
169             }
170           //sprintf(thisEdge->label,"E%d",edgeCount++);
171           //snprintf(thisEdge->label,MAX_LABEL_LENGTH,"E%d",edgeCount++);
172           edgeCount++;
173           i = right + 1;
174           if (',' == treeString[right])
175             state = ReadSubTree;
176           else
177             parcount--;
178           break;
179         }
180     }
181   return(centerNode);
182 }
183
184 tree *readNewickString (char *str, int numLeaves)
185 {
186   tree *T;
187   node *centerNode;
188   int i = 0;
189   int j = 0;
190   int inputLength;
191   int uCount = 0;
192   int parCount = 0;
193   char rootLabel[MAX_LABEL_LENGTH];
194   nodeCount = edgeCount = 0;
195
196   T = newTree();
197
198   if ('(' != str[0])
199     {
200       Rprintf("Error reading generated tree - does not start with '('.\n");
201       exit(0);
202     }
203   inputLength = strlen (str)+1;
204   for(i = 0; i < inputLength; i++)
205     {
206       if ('(' == str[i])
207         parCount++;
208       else if (')' == str[i])
209         parCount--;
210       if (parCount > 0)
211         ;
212       else if (0 == parCount)
213         {
214           i++;
215 /*        if(';' == str[i])
216             sprintf(rootLabel,"URoot");
217           else
218             {*/
219               while(';' != str[i])
220                 if (!(whiteSpace (str[i++])))
221                   rootLabel[j++] = str[i-1];  /*be careful here */
222                 rootLabel[j] = '\0';
223 //          }
224           i = inputLength;
225         }
226       else if (parCount < 0)
227         {
228           Rprintf("Error reading generated tree. Too many right parentheses.\n");
229           exit(0);
230         }
231     }
232   centerNode = decodeNewickSubtree (str, T, &uCount);
233   snprintf (centerNode->label, MAX_LABEL_LENGTH, "%s", rootLabel); /* added "%s" following Jos Kafer's suggestion (2010-11-23) */
234   T->root = centerNode;
235   return(T);
236 }