3 Copyright (c) 2008 Genome Research Ltd (GRL).
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
29 2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"
32 /* Last Modified: 12APR2009 */
41 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
42 #define KS_SEP_TAB 1 // isspace() && !' '
45 #define __KS_TYPE(type_t) \
46 typedef struct __kstream_t { \
48 int begin, end, is_eof; \
52 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
53 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
55 #define __KS_BASIC(type_t, __bufsize) \
56 static inline kstream_t *ks_init(type_t f) \
58 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
60 ks->buf = malloc(__bufsize); \
63 static inline void ks_destroy(kstream_t *ks) \
71 #define __KS_GETC(__read, __bufsize) \
72 static inline int ks_getc(kstream_t *ks) \
74 if (ks->is_eof && ks->begin >= ks->end) return -1; \
75 if (ks->begin >= ks->end) { \
77 ks->end = __read(ks->f, ks->buf, __bufsize); \
78 if (ks->end < __bufsize) ks->is_eof = 1; \
79 if (ks->end == 0) return -1; \
81 return (int)ks->buf[ks->begin++]; \
85 #define KSTRING_T kstring_t
86 typedef struct __kstring_t {
93 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
96 #define __KS_GETUNTIL(__read, __bufsize) \
97 static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
99 if (dret) *dret = 0; \
101 if (ks->begin >= ks->end && ks->is_eof) return -1; \
104 if (ks->begin >= ks->end) { \
107 ks->end = __read(ks->f, ks->buf, __bufsize); \
108 if (ks->end < __bufsize) ks->is_eof = 1; \
109 if (ks->end == 0) break; \
112 if (delimiter > KS_SEP_MAX) { \
113 for (i = ks->begin; i < ks->end; ++i) \
114 if (ks->buf[i] == delimiter) break; \
115 } else if (delimiter == KS_SEP_SPACE) { \
116 for (i = ks->begin; i < ks->end; ++i) \
117 if (isspace(ks->buf[i])) break; \
118 } else if (delimiter == KS_SEP_TAB) { \
119 for (i = ks->begin; i < ks->end; ++i) \
120 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
121 } else i = 0; /* never come to here! */ \
122 if (str->m - str->l < i - ks->begin + 1) { \
123 str->m = str->l + (i - ks->begin) + 1; \
124 kroundup32(str->m); \
125 str->s = (char*)realloc(str->s, str->m); \
127 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
128 str->l = str->l + (i - ks->begin); \
131 if (dret) *dret = ks->buf[i]; \
137 str->s = (char*)calloc(1, 1); \
139 str->s[str->l] = '\0'; \
143 #define KSTREAM_INIT(type_t, __read, __bufsize) \
145 __KS_BASIC(type_t, __bufsize) \
146 __KS_GETC(__read, __bufsize) \
147 __KS_GETUNTIL(__read, __bufsize)
149 #define __KSEQ_BASIC(type_t) \
150 static inline kseq_t *kseq_init(type_t fd) \
152 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
153 s->f = ks_init(fd); \
156 static inline void kseq_rewind(kseq_t *ks) \
159 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
161 static inline void kseq_destroy(kseq_t *ks) \
164 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
170 >=0 length of the sequence (normal)
172 -2 truncated quality string
174 #define __KSEQ_READ \
175 static int kseq_read(kseq_t *seq) \
178 kstream_t *ks = seq->f; \
179 if (seq->last_char == 0) { /* then jump to the next header line */ \
180 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
181 if (c == -1) return -1; /* end of file */ \
182 seq->last_char = c; \
183 } /* the first header char has been read */ \
184 seq->comment.l = seq->seq.l = seq->qual.l = 0; \
185 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
186 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
187 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
188 if (isgraph(c)) { /* printable non-space character */ \
189 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
190 seq->seq.m = seq->seq.l + 2; \
191 kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
192 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
194 seq->seq.s[seq->seq.l++] = (char)c; \
197 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
198 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
199 if (c != '+') return seq->seq.l; /* FASTA */ \
200 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
201 seq->qual.m = seq->seq.m; \
202 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
204 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
205 if (c == -1) return -2; /* we should not stop here */ \
206 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
207 if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
208 seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
209 seq->last_char = 0; /* we have not come to the next header line */ \
210 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
214 #define __KSEQ_TYPE(type_t) \
216 kstring_t name, comment, seq, qual; \
221 #define KSEQ_INIT(type_t, __read) \
222 KSTREAM_INIT(type_t, __read, 4096) \
223 __KSEQ_TYPE(type_t) \
224 __KSEQ_BASIC(type_t) \