X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fread_dna.c;h=01a4db32c5d2b1de517d2f659496a73c1d1b9c13;hb=a0436318d70829a2d16134be7ca1d6d454613a20;hp=6d44c6665603d19c0bcd0f6c8aea2409c94e671b;hpb=be6a044200152fd83d0b72f348012a0adc2593c5;p=ape.git diff --git a/src/read_dna.c b/src/read_dna.c index 6d44c66..01a4db3 100644 --- a/src/read_dna.c +++ b/src/read_dna.c @@ -1,6 +1,6 @@ -/* read_dna.c 2012-12-27 */ +/* read_dna.c 2013-01-04 */ -/* Copyright 2008-2012 Emmanuel Paradis */ +/* Copyright 2013 Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -81,7 +81,7 @@ static const unsigned char lineFeed = 0x0a; SEXP rawStreamToDNAbin(SEXP x) { - int N, i, j, k, n; + int N, i, j, k, n, startOfSeq; unsigned char *xr, *rseq, *buffer, tmp; SEXP obj, nms, seq; @@ -89,15 +89,25 @@ SEXP rawStreamToDNAbin(SEXP x) N = LENGTH(x); xr = RAW(x); -/* do a 1st pass to find the number of sequences */ +/* do a 1st pass to find the number of sequences - n = 0; - j = 0; /* use j as a flag */ - for (i = 0; i < N; i++) { + this code should be robust to '>' present inside + a label or in the header text before the sequences */ + + n = j = 0; /* use j as a flag */ + if (xr[0] == hook) { + j = 1; + startOfSeq = 0; + } + i = 1; + for (i = 1; i < N; i++) { if (j && xr[i] == lineFeed) { n++; j = 0; - } else if (xr[i] == hook) j = 1; + } else if (xr[i] == hook) { + if (!n) startOfSeq = i; + j = 1; + } } PROTECT(obj = allocVector(VECSXP, n)); @@ -106,16 +116,15 @@ SEXP rawStreamToDNAbin(SEXP x) /* Refine the way the size of the buffer is set? */ buffer = (unsigned char *)R_alloc(N, sizeof(unsigned char *)); - i = j = 0; + i = startOfSeq; + j = 0; /* gives the index of the sequence */ while (i < N) { /* 1st read the label... */ - if (xr[i] == hook) { - i++; - k = 0; - while (xr[i] != lineFeed) buffer[k++] = xr[i++]; - buffer[k] = '\0'; - SET_STRING_ELT(nms, j, mkChar(buffer)); - } + i++; + k = 0; + while (xr[i] != lineFeed) buffer[k++] = xr[i++]; + buffer[k] = '\0'; + SET_STRING_ELT(nms, j, mkChar(buffer)); /* ... then read the sequence */ n = 0; while (xr[i] != hook && i < N) { @@ -125,7 +134,7 @@ SEXP rawStreamToDNAbin(SEXP x) then the following check would not be needed; additionally the size of tab_trans could be restriced to 0-121. This check has the advantage that all invalid characters are simply ignored without - causing error. */ + causing error -- except if '>' occurs in the middle of a sequence. */ if (tmp) buffer[n++] = tmp; } PROTECT(seq = allocVector(RAWSXP, n));