X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=blobdiff_plain;f=kseq.h;h=a5cec7c289aeda5fe36edb2a828a9f669741d3a3;hp=0bbc7dc9370e910b19286167bd0862c64855387c;hb=307c147168f7154e3755712797078c513e0b242a;hpb=4efe4317c377019fbb64bcfae519403a3a9d1f5f diff --git a/kseq.h b/kseq.h index 0bbc7dc..a5cec7c 100644 --- a/kseq.h +++ b/kseq.h @@ -23,7 +23,7 @@ SOFTWARE. */ -/* Last Modified: 18AUG2011 */ +/* Last Modified: 05MAR2012 */ #ifndef AC_KSEQ_H #define AC_KSEQ_H @@ -34,7 +34,8 @@ #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r #define KS_SEP_TAB 1 // isspace() && !' ' -#define KS_SEP_MAX 1 +#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) +#define KS_SEP_MAX 2 #define __KS_TYPE(type_t) \ typedef struct __kstream_t { \ @@ -51,7 +52,7 @@ { \ kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ ks->f = f; \ - ks->buf = malloc(__bufsize); \ + ks->buf = (unsigned char*)malloc(__bufsize); \ return ks; \ } \ static inline void ks_destroy(kstream_t *ks) \ @@ -103,7 +104,10 @@ typedef struct __kstring_t { if (ks->end == 0) break; \ } else break; \ } \ - if (delimiter > KS_SEP_MAX) { \ + if (delimiter == KS_SEP_LINE) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (ks->buf[i] == '\n') break; \ + } else if (delimiter > KS_SEP_MAX) { \ for (i = ks->begin; i < ks->end; ++i) \ if (ks->buf[i] == delimiter) break; \ } else if (delimiter == KS_SEP_SPACE) { \ @@ -113,7 +117,7 @@ typedef struct __kstring_t { for (i = ks->begin; i < ks->end; ++i) \ if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ } else i = 0; /* never come to here! */ \ - if (str->m - str->l < i - ks->begin + 1) { \ + if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ str->m = str->l + (i - ks->begin) + 1; \ kroundup32(str->m); \ str->s = (char*)realloc(str->s, str->m); \ @@ -129,7 +133,7 @@ typedef struct __kstring_t { if (str->s == 0) { \ str->m = 1; \ str->s = (char*)calloc(1, 1); \ - } \ + } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ str->s[str->l] = '\0'; \ return str->l; \ } \ @@ -142,19 +146,16 @@ typedef struct __kstring_t { __KS_GETC(__read, __bufsize) \ __KS_GETUNTIL(__read, __bufsize) -#define __KSEQ_BASIC(type_t) \ - static inline kseq_t *kseq_init(type_t fd) \ +#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) + +#define __KSEQ_BASIC(SCOPE, type_t) \ + SCOPE kseq_t *kseq_init(type_t fd) \ { \ kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ s->f = ks_init(fd); \ return s; \ } \ - static inline void kseq_rewind(kseq_t *ks) \ - { \ - ks->last_char = 0; \ - ks->f->is_eof = ks->f->begin = ks->f->end = 0; \ - } \ - static inline void kseq_destroy(kseq_t *ks) \ + SCOPE void kseq_destroy(kseq_t *ks) \ { \ if (!ks) return; \ free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ @@ -167,8 +168,8 @@ typedef struct __kstring_t { -1 end-of-file -2 truncated quality string */ -#define __KSEQ_READ \ - static int kseq_read(kseq_t *seq) \ +#define __KSEQ_READ(SCOPE) \ + SCOPE int kseq_read(kseq_t *seq) \ { \ int c; \ kstream_t *ks = seq->f; \ @@ -179,14 +180,15 @@ typedef struct __kstring_t { } /* else: the first header char has been read in the previous call */ \ seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \ - if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); /* read FASTA/Q comment */ \ + if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ seq->seq.m = 256; \ seq->seq.s = (char*)malloc(seq->seq.m); \ } \ while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ + if (c == '\n') continue; /* skip empty lines */ \ seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ - ks_getuntil2(ks, '\n', &seq->seq, 0, 1); /* read the rest of the line */ \ + ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ } \ if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ @@ -202,7 +204,7 @@ typedef struct __kstring_t { } \ while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ if (c == -1) return -2; /* error: no quality string */ \ - while (ks_getuntil2(ks, '\n', &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ + while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ seq->last_char = 0; /* we have not come to the next header line */ \ if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ return seq->seq.l; \ @@ -215,10 +217,19 @@ typedef struct __kstring_t { kstream_t *f; \ } kseq_t; -#define KSEQ_INIT(type_t, __read) \ +#define KSEQ_INIT2(SCOPE, type_t, __read) \ KSTREAM_INIT(type_t, __read, 16384) \ __KSEQ_TYPE(type_t) \ - __KSEQ_BASIC(type_t) \ - __KSEQ_READ + __KSEQ_BASIC(SCOPE, type_t) \ + __KSEQ_READ(SCOPE) + +#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) + +#define KSEQ_DECLARE(type_t) \ + __KS_TYPE(type_t) \ + __KSEQ_TYPE(type_t) \ + extern kseq_t *kseq_init(type_t fd); \ + void kseq_destroy(kseq_t *ks); \ + int kseq_read(kseq_t *seq); #endif