-/* tree_build.c 2008-03-09 */
+/* tree_build.c 2011-02-28 */
-/* Copyright 2008 Emmanuel Paradis */
+/* Copyright 2008-2011 Emmanuel Paradis */
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
{
int i, k = 1, ans = 0;
- for (i = n - 2; i >= 0; i--, k *= 10)
+ for (i = n - 1; i >= 0; i--, k *= 10)
ans += ((int)x[i] - 48) * k;
return ans;
}
-void decode_edge(const char *x, int a, int b, int *node, double *w)
+void extract_portion_Newick(const char *x, int a, int b, char *y)
+{
+ int i, j;
+
+ for (i = a, j = 0; i <= b; i++, j++) y[j] = x[i];
+
+ y[j] = '\0';
+}
+
+void decode_terminal_edge_token(const char *x, int a, int b, int *node, double *w)
+{
+ int co = a;
+ char *endstr, str[100];
+
+ while (x[co] != ':') co++;
+
+ extract_portion_Newick(x, a, co - 1, str);
+ *node = str2int(str, co - a);
+ extract_portion_Newick(x, co + 1, b, str);
+ *w = R_strtod(str, &endstr);
+}
+
+void decode_internal_edge(const char *x, int a, int b, char *lab, double *w)
{
int i, k, co = a;
char *endstr, str[100];
while (x[co] != ':') co++;
- if (a == co) *node = 0;
- else {
- for (i = a, k = 0; i < co; i++, k++) str[k] = x[i];
- str[k] = '\0';
- *node = str2int(str, k + 1);
- }
- for (i = co + 1, k = 0; i <= b; i++, k++) str[k] = x[i];
- str[k] = '\0';
+
+ if (a == co) lab[0] = '\0'; /* if no node label */
+ else extract_portion_Newick(x, a, co - 1, lab);
+
+ extract_portion_Newick(x, co + 1, b, str);
*w = R_strtod(str, &endstr);
}
-#define ADD_INTERNAL_EDGE\
- e[j] = curnode;\
- e[j + nedge] = curnode = ++node;\
+#define ADD_INTERNAL_EDGE \
+ e[j] = curnode; \
+ e[j + nedge] = curnode = ++node; \
+ stack_internal[k++] = j; \
j++
-#define ADD_TERMINAL_EDGE\
- e[j] = curnode;\
- decode_edge(x, pr + 1, ps - 1, &tmpi, &tmpd);\
- e[j + nedge] = tmpi;\
- el[j] = tmpd;\
+#define ADD_TERMINAL_EDGE \
+ e[j] = curnode; \
+ decode_terminal_edge_token(x, pr + 1, ps - 1, &tmpi, &tmpd); \
+ e[j + nedge] = tmpi; \
+ el[j] = tmpd; \
j++
-#define GO_DOWN\
- l = j - 1;\
- while (e[l + nedge] != curnode) l--;\
- decode_edge(x, ps + 1, pt - 1, &tmpi, &tmpd);\
- nl[curnode - ntip - 1] = tmpi;\
- el[l] = tmpd;\
+#define GO_DOWN \
+ decode_internal_edge(x, ps + 1, pt - 1, lab, &tmpd); \
+ SET_STRING_ELT(node_label, curnode - 1 - ntip, mkChar(lab)); \
+ l = stack_internal[--k]; \
+ el[l] = tmpd; \
curnode = e[l]
SEXP treeBuildWithTokens(SEXP nwk)
{
const char *x;
- int n, i, ntip = 1, nnode = 0, nedge, *e, *nl, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l;
+ int n, i, ntip = 1, nnode = 0, nedge, *e, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l, k, stack_internal[10000];
double *el, tmpd;
- SEXP node_label, edge, edge_length, Nnode, phy;
+ char lab[512];
+ SEXP edge, edge_length, Nnode, node_label, phy;
PROTECT(nwk = coerceVector(nwk, STRSXP));
x = CHAR(STRING_ELT(nwk, 0));
n = strlen(x);
skeleton = (int *)R_alloc(n, sizeof(int *));
+
+/* first pass on the Newick string to localize parentheses and commas */
for (i = 0; i < n; i++) {
- if (x[i] == '(' || x[i] == ',' || x[i] == ')') {
+ if (x[i] == '(') {
skeleton[nsk] = i;
nsk++;
- switch(x[i]) {
- case '(': break;
- case ',': ntip++; break;
- case ')': nnode++; break;
- }
+ continue;
+ }
+ if (x[i] == ',') {
+ skeleton[nsk] = i;
+ nsk++;
+ ntip++;
+ continue;
+ }
+ if (x[i] == ')') {
+ skeleton[nsk] = i;
+ nsk++;
+ nnode++;
}
}
+
nedge = ntip + nnode - 1;
- PROTECT(node_label = allocVector(INTSXP, nnode));
PROTECT(Nnode = allocVector(INTSXP, 1));
PROTECT(edge = allocVector(INTSXP, nedge*2));
PROTECT(edge_length = allocVector(REALSXP, nedge));
+ PROTECT(node_label = allocVector(STRSXP, nnode));
INTEGER(Nnode)[0] = nnode;
- nl = INTEGER(node_label);
- memset(nl, 0, nnode*sizeof(int));
e = INTEGER(edge);
el = REAL(edge_length);
curnode = node = ntip + 1;
- j = 0;
-
+ k = j = 0;
+/* j: index of the current position in the edge matrix */
+/* k: index of the current position in stack_internal */
+/* stack_internal is a simple array storing the indices of the
+ successive internal edges from the root; it's a stack so it is
+ incremented every time an internal edge is added, and decremented
+ every GO_DOWN step. This makes easy to the index of the
+ subtending edge. */
+
+/* second pass on the Newick string to build the "phylo" object elements */
for (i = 1; i < nsk - 1; i++) {
ps = skeleton[i];
- Rprintf(""); /* <- again !!!! */
+// Rprintf(""); /* <- again !!!! */
if (x[ps] == '(') {
ADD_INTERNAL_EDGE;
continue;
continue;
}
if (x[ps] == ')') {
- pt = skeleton[i + 1];
+ pt = skeleton[i + 1]; // <- utile ???
if (x[pr] == ',') {
ADD_TERMINAL_EDGE;
GO_DOWN;
ADD_TERMINAL_EDGE;
}
-/* is there a root edge? */
+/* is there a root edge and/or root label? */
if (ps < n - 2) {
- PROTECT(phy = allocVector(VECSXP, 5));
- SEXP root_edge;
- decode_edge(x, ps + 1, n - 2, &tmpi, &tmpd);
- PROTECT(root_edge = allocVector(REALSXP, 1));
- nl[0] = tmpi;
- REAL(root_edge)[0] = tmpd;
- SET_VECTOR_ELT(phy, 4, root_edge);
- UNPROTECT(1);
+ i = ps + 1;
+ while (i < n - 2 && x[i] != ':') i++;
+ if (i < n - 2) {
+ PROTECT(phy = allocVector(VECSXP, 5));
+ SEXP root_edge;
+ decode_internal_edge(x, ps + 1, n - 2, lab, &tmpd);
+ PROTECT(root_edge = allocVector(REALSXP, 1));
+ REAL(root_edge)[0] = tmpd;
+ SET_VECTOR_ELT(phy, 4, root_edge);
+ UNPROTECT(1);
+ SET_STRING_ELT(node_label, 0, mkChar(lab));
+ } else {
+ extract_portion_Newick(x, ps + 1, n - 2, lab);
+ SET_STRING_ELT(node_label, 0, mkChar(lab));
+ PROTECT(phy = allocVector(VECSXP, 4));
+ }
} else PROTECT(phy = allocVector(VECSXP, 4));
SET_VECTOR_ELT(phy, 0, edge);