X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ftree_build.c;h=5ac3f860f0cf4115cf682284459abe3e1cadde22;hb=6ee9e0a4e1e6bbc09187382bfdef57fafe3844c7;hp=aeb917cccee97c41593f31a7da03db17bd76f713;hpb=1090d5990d4b6f7feb10c87638f4229f53891eb7;p=ape.git diff --git a/src/tree_build.c b/src/tree_build.c index aeb917c..5ac3f86 100644 --- a/src/tree_build.c +++ b/src/tree_build.c @@ -1,6 +1,6 @@ -/* tree_build.c 2009-11-21 */ +/* tree_build.c 2011-06-23 */ -/* Copyright 2008-2009 Emmanuel Paradis */ +/* Copyright 2008-2011 Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -12,86 +12,126 @@ static int str2int(char *x, int n) { int i, k = 1, ans = 0; - for (i = n - 2; i >= 0; i--, k *= 10) + for (i = n - 1; i >= 0; i--, k *= 10) ans += ((int)x[i] - 48) * k; return ans; } -void decode_edge(const char *x, int a, int b, int *node, double *w) +void extract_portion_Newick(const char *x, int a, int b, char *y) { - int i, k, co = a; + int i, j; + + for (i = a, j = 0; i <= b; i++, j++) y[j] = x[i]; + + y[j] = '\0'; +} + +void decode_terminal_edge_token(const char *x, int a, int b, int *node, double *w) +{ + int co = a; char *endstr, str[100]; while (x[co] != ':') co++; - if (a == co) *node = 0; - else { - for (i = a, k = 0; i < co; i++, k++) str[k] = x[i]; - str[k] = '\0'; - *node = str2int(str, k + 1); - } - for (i = co + 1, k = 0; i <= b; i++, k++) str[k] = x[i]; - str[k] = '\0'; + + extract_portion_Newick(x, a, co - 1, str); + *node = str2int(str, co - a); + extract_portion_Newick(x, co + 1, b, str); *w = R_strtod(str, &endstr); } -#define ADD_INTERNAL_EDGE\ - e[j] = curnode;\ - e[j + nedge] = curnode = ++node;\ +void decode_internal_edge(const char *x, int a, int b, char *lab, double *w) +{ + int co = a; + char *endstr, str[100]; + + while (x[co] != ':') co++; + + if (a == co) lab[0] = '\0'; /* if no node label */ + else extract_portion_Newick(x, a, co - 1, lab); + + extract_portion_Newick(x, co + 1, b, str); + *w = R_strtod(str, &endstr); +} + +#define ADD_INTERNAL_EDGE \ + e[j] = curnode; \ + e[j + nedge] = curnode = ++node; \ + stack_internal[k++] = j; \ j++ -#define ADD_TERMINAL_EDGE\ - e[j] = curnode;\ - decode_edge(x, pr + 1, ps - 1, &tmpi, &tmpd);\ - e[j + nedge] = tmpi;\ - el[j] = tmpd;\ +#define ADD_TERMINAL_EDGE \ + e[j] = curnode; \ + decode_terminal_edge_token(x, pr + 1, ps - 1, &tmpi, &tmpd); \ + e[j + nedge] = tmpi; \ + el[j] = tmpd; \ j++ -#define GO_DOWN\ - l = j - 1;\ - while (e[l + nedge] != curnode) l--;\ - decode_edge(x, ps + 1, pt - 1, &tmpi, &tmpd);\ - el[l] = tmpd;\ +#define GO_DOWN \ + decode_internal_edge(x, ps + 1, pt - 1, lab, &tmpd); \ + SET_STRING_ELT(node_label, curnode - 1 - ntip, mkChar(lab)); \ + l = stack_internal[--k]; \ + el[l] = tmpd; \ curnode = e[l] SEXP treeBuildWithTokens(SEXP nwk) { const char *x; - int n, i, ntip = 1, nnode = 0, nedge, *e, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l; + int n, i, ntip = 1, nnode = 0, nedge, *e, curnode, node, j, *skeleton, nsk = 0, ps, pr, pt, tmpi, l, k, stack_internal[10000]; double *el, tmpd; - SEXP edge, edge_length, Nnode, phy; + char lab[512]; + SEXP edge, edge_length, Nnode, node_label, phy; PROTECT(nwk = coerceVector(nwk, STRSXP)); x = CHAR(STRING_ELT(nwk, 0)); n = strlen(x); skeleton = (int *)R_alloc(n, sizeof(int *)); + +/* first pass on the Newick string to localize parentheses and commas */ for (i = 0; i < n; i++) { - if (x[i] == '(' || x[i] == ',' || x[i] == ')') { + if (x[i] == '(') { skeleton[nsk] = i; nsk++; - switch(x[i]) { - case '(': break; - case ',': ntip++; break; - case ')': nnode++; break; - } + continue; + } + if (x[i] == ',') { + skeleton[nsk] = i; + nsk++; + ntip++; + continue; + } + if (x[i] == ')') { + skeleton[nsk] = i; + nsk++; + nnode++; } } + nedge = ntip + nnode - 1; PROTECT(Nnode = allocVector(INTSXP, 1)); PROTECT(edge = allocVector(INTSXP, nedge*2)); PROTECT(edge_length = allocVector(REALSXP, nedge)); + PROTECT(node_label = allocVector(STRSXP, nnode)); INTEGER(Nnode)[0] = nnode; e = INTEGER(edge); el = REAL(edge_length); curnode = node = ntip + 1; - j = 0; - + k = j = 0; +/* j: index of the current position in the edge matrix */ +/* k: index of the current position in stack_internal */ +/* stack_internal is a simple array storing the indices of the + successive internal edges from the root; it's a stack so it is + incremented every time an internal edge is added, and decremented + every GO_DOWN step. This makes easy to the index of the + subtending edge. */ + +/* second pass on the Newick string to build the "phylo" object elements */ for (i = 1; i < nsk - 1; i++) { ps = skeleton[i]; - Rprintf(""); /* <- again !!!! */ +// Rprintf(""); /* <- again !!!! */ if (x[ps] == '(') { ADD_INTERNAL_EDGE; continue; @@ -105,7 +145,7 @@ SEXP treeBuildWithTokens(SEXP nwk) continue; } if (x[ps] == ')') { - pt = skeleton[i + 1]; + pt = skeleton[i + 1]; // <- utile ??? if (x[pr] == ',') { ADD_TERMINAL_EDGE; GO_DOWN; @@ -124,22 +164,32 @@ SEXP treeBuildWithTokens(SEXP nwk) ADD_TERMINAL_EDGE; } -/* is there a root edge? */ +/* is there a root edge and/or root label? */ if (ps < n - 2) { - PROTECT(phy = allocVector(VECSXP, 4)); - SEXP root_edge; - decode_edge(x, ps + 1, n - 2, &tmpi, &tmpd); - PROTECT(root_edge = allocVector(REALSXP, 1)); - REAL(root_edge)[0] = tmpd; - SET_VECTOR_ELT(phy, 3, root_edge); - UNPROTECT(1); - } else PROTECT(phy = allocVector(VECSXP, 3)); + i = ps + 1; + while (i < n - 2 && x[i] != ':') i++; + if (i < n - 2) { + PROTECT(phy = allocVector(VECSXP, 5)); + SEXP root_edge; + decode_internal_edge(x, ps + 1, n - 2, lab, &tmpd); + PROTECT(root_edge = allocVector(REALSXP, 1)); + REAL(root_edge)[0] = tmpd; + SET_VECTOR_ELT(phy, 4, root_edge); + UNPROTECT(1); + SET_STRING_ELT(node_label, 0, mkChar(lab)); + } else { + extract_portion_Newick(x, ps + 1, n - 2, lab); + SET_STRING_ELT(node_label, 0, mkChar(lab)); + PROTECT(phy = allocVector(VECSXP, 4)); + } + } else PROTECT(phy = allocVector(VECSXP, 4)); SET_VECTOR_ELT(phy, 0, edge); SET_VECTOR_ELT(phy, 1, edge_length); SET_VECTOR_ELT(phy, 2, Nnode); + SET_VECTOR_ELT(phy, 3, node_label); - UNPROTECT(5); + UNPROTECT(6); return phy; }