-/* read_dna.c 2012-12-27 */
+/* read_dna.c 2013-01-19 */
-/* Copyright 2008-2012 Emmanuel Paradis */
+/* Copyright 2013 Emmanuel Paradis */
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, /* 40-49 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 50-59 */
0x00, 0x00, 0x00, 0x02, 0x00, 0x88, 0x70, 0x28, 0xd0, 0x00, /* 60-69 */
- 0x48, 0x00, 0xb0, 0x00, 0x00, 0x50, 0x00, 0xa0, 0xf0, 0x00, /* 70-79 */
+ 0x00, 0x48, 0xb0, 0x00, 0x00, 0x50, 0x00, 0xa0, 0xf0, 0x00, /* 70-79 */
0x00, 0x00, 0xc0, 0x60, 0x18, 0x00, 0xe0, 0x90, 0x00, 0x30, /* 80-89 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x70, 0x28, /* 90-99 */
0xd0, 0x00, 0x00, 0x48, 0xb0, 0x00, 0x00, 0x50, 0x00, 0xa0, /* 100-109 */
SEXP rawStreamToDNAbin(SEXP x)
{
- int N, i, j, k, n;
+ int N, i, j, k, n, startOfSeq;
unsigned char *xr, *rseq, *buffer, tmp;
SEXP obj, nms, seq;
N = LENGTH(x);
xr = RAW(x);
-/* do a 1st pass to find the number of sequences */
+/* do a 1st pass to find the number of sequences
- n = 0;
- j = 0; /* use j as a flag */
- for (i = 0; i < N; i++) {
+ this code should be robust to '>' present inside
+ a label or in the header text before the sequences */
+
+ n = j = 0; /* use j as a flag */
+ if (xr[0] == hook) {
+ j = 1;
+ startOfSeq = 0;
+ }
+ i = 1;
+ for (i = 1; i < N; i++) {
if (j && xr[i] == lineFeed) {
n++;
j = 0;
- } else if (xr[i] == hook) j = 1;
+ } else if (xr[i] == hook) {
+ if (!n) startOfSeq = i;
+ j = 1;
+ }
}
PROTECT(obj = allocVector(VECSXP, n));
/* Refine the way the size of the buffer is set? */
buffer = (unsigned char *)R_alloc(N, sizeof(unsigned char *));
- i = j = 0;
+ i = startOfSeq;
+ j = 0; /* gives the index of the sequence */
while (i < N) {
/* 1st read the label... */
- if (xr[i] == hook) {
- i++;
- k = 0;
- while (xr[i] != lineFeed) buffer[k++] = xr[i++];
- buffer[k] = '\0';
- SET_STRING_ELT(nms, j, mkChar(buffer));
- }
+ i++;
+ k = 0;
+ while (xr[i] != lineFeed) buffer[k++] = xr[i++];
+ buffer[k] = '\0';
+ SET_STRING_ELT(nms, j, mkChar(buffer));
/* ... then read the sequence */
n = 0;
while (xr[i] != hook && i < N) {
then the following check would not be needed; additionally the size
of tab_trans could be restriced to 0-121. This check has the
advantage that all invalid characters are simply ignored without
- causing error. */
+ causing error -- except if '>' occurs in the middle of a sequence. */
if (tmp) buffer[n++] = tmp;
}
PROTECT(seq = allocVector(RAWSXP, n));