3 Copyright (c) 2008 Genome Research Ltd (GRL).
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
28 /* Last Modified: 12APR2009 */
37 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
38 #define KS_SEP_TAB 1 // isspace() && !' '
41 #define __KS_TYPE(type_t) \
42 typedef struct __kstream_t { \
44 int begin, end, is_eof; \
48 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
49 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
51 #define __KS_BASIC(type_t, __bufsize) \
52 static inline kstream_t *ks_init(type_t f) \
54 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
56 ks->buf = (char*)malloc(__bufsize); \
59 static inline void ks_destroy(kstream_t *ks) \
67 #define __KS_GETC(__read, __bufsize) \
68 static inline int ks_getc(kstream_t *ks) \
70 if (ks->is_eof && ks->begin >= ks->end) return -1; \
71 if (ks->begin >= ks->end) { \
73 ks->end = __read(ks->f, ks->buf, __bufsize); \
74 if (ks->end < __bufsize) ks->is_eof = 1; \
75 if (ks->end == 0) return -1; \
77 return (int)ks->buf[ks->begin++]; \
81 #define KSTRING_T kstring_t
82 typedef struct __kstring_t {
89 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
92 #define __KS_GETUNTIL(__read, __bufsize) \
93 static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
95 if (dret) *dret = 0; \
97 if (ks->begin >= ks->end && ks->is_eof) return -1; \
100 if (ks->begin >= ks->end) { \
103 ks->end = __read(ks->f, ks->buf, __bufsize); \
104 if (ks->end < __bufsize) ks->is_eof = 1; \
105 if (ks->end == 0) break; \
108 if (delimiter > KS_SEP_MAX) { \
109 for (i = ks->begin; i < ks->end; ++i) \
110 if (ks->buf[i] == delimiter) break; \
111 } else if (delimiter == KS_SEP_SPACE) { \
112 for (i = ks->begin; i < ks->end; ++i) \
113 if (isspace(ks->buf[i])) break; \
114 } else if (delimiter == KS_SEP_TAB) { \
115 for (i = ks->begin; i < ks->end; ++i) \
116 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117 } else i = 0; /* never come to here! */ \
118 if (str->m - str->l < i - ks->begin + 1) { \
119 str->m = str->l + (i - ks->begin) + 1; \
120 kroundup32(str->m); \
121 str->s = (char*)realloc(str->s, str->m); \
123 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124 str->l = str->l + (i - ks->begin); \
127 if (dret) *dret = ks->buf[i]; \
131 str->s[str->l] = '\0'; \
135 #define KSTREAM_INIT(type_t, __read, __bufsize) \
137 __KS_BASIC(type_t, __bufsize) \
138 __KS_GETC(__read, __bufsize) \
139 __KS_GETUNTIL(__read, __bufsize)
141 #define __KSEQ_BASIC(type_t) \
142 static inline kseq_t *kseq_init(type_t fd) \
144 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
145 s->f = ks_init(fd); \
148 static inline void kseq_rewind(kseq_t *ks) \
151 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
153 static inline void kseq_destroy(kseq_t *ks) \
156 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
162 >=0 length of the sequence (normal)
164 -2 truncated quality string
166 #define __KSEQ_READ \
167 static int kseq_read(kseq_t *seq) \
170 kstream_t *ks = seq->f; \
171 if (seq->last_char == 0) { /* then jump to the next header line */ \
172 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
173 if (c == -1) return -1; /* end of file */ \
174 seq->last_char = c; \
175 } /* the first header char has been read */ \
176 seq->comment.l = seq->seq.l = seq->qual.l = 0; \
177 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
178 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
179 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
180 if (isgraph(c)) { /* printable non-space character */ \
181 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
182 seq->seq.m = seq->seq.l + 2; \
183 kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
184 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
186 seq->seq.s[seq->seq.l++] = (char)c; \
189 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
190 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
191 if (c != '+') return seq->seq.l; /* FASTA */ \
192 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
193 seq->qual.m = seq->seq.m; \
194 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
196 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
197 if (c == -1) return -2; /* we should not stop here */ \
198 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
199 if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
200 seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
201 seq->last_char = 0; /* we have not come to the next header line */ \
202 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
206 #define __KSEQ_TYPE(type_t) \
208 kstring_t name, comment, seq, qual; \
213 #define KSEQ_INIT(type_t, __read) \
214 KSTREAM_INIT(type_t, __read, 4096) \
215 __KSEQ_TYPE(type_t) \
216 __KSEQ_BASIC(type_t) \