3 Copyright (c) 2008 Genome Research Ltd (GRL).
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
35 #define __KS_TYPE(type_t) \
36 typedef struct __kstream_t { \
38 int begin, end, is_eof; \
42 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
43 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
45 #define __KS_BASIC(type_t, __bufsize) \
46 static inline kstream_t *ks_init(type_t f) \
48 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
50 ks->buf = (char*)malloc(__bufsize); \
53 static inline void ks_destroy(kstream_t *ks) \
61 #define __KS_GETC(__read, __bufsize) \
62 static inline int ks_getc(kstream_t *ks) \
64 if (ks->is_eof && ks->begin >= ks->end) return -1; \
65 if (ks->begin >= ks->end) { \
67 ks->end = __read(ks->f, ks->buf, __bufsize); \
68 if (ks->end < __bufsize) ks->is_eof = 1; \
69 if (ks->end == 0) return -1; \
71 return (int)ks->buf[ks->begin++]; \
75 #define KSTRING_T kstring_t
76 typedef struct __kstring_t {
83 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
86 #define __KS_GETUNTIL(__read, __bufsize) \
87 static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
89 if (dret) *dret = 0; \
91 if (ks->begin >= ks->end && ks->is_eof) return -1; \
94 if (ks->begin >= ks->end) { \
97 ks->end = __read(ks->f, ks->buf, __bufsize); \
98 if (ks->end < __bufsize) ks->is_eof = 1; \
99 if (ks->end == 0) break; \
103 for (i = ks->begin; i < ks->end; ++i) \
104 if (ks->buf[i] == delimiter) break; \
106 for (i = ks->begin; i < ks->end; ++i) \
107 if (isspace(ks->buf[i])) break; \
109 if (str->m - str->l < i - ks->begin + 1) { \
110 str->m = str->l + (i - ks->begin) + 1; \
111 kroundup32(str->m); \
112 str->s = (char*)realloc(str->s, str->m); \
114 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
115 str->l = str->l + (i - ks->begin); \
118 if (dret) *dret = ks->buf[i]; \
122 str->s[str->l] = '\0'; \
126 #define KSTREAM_INIT(type_t, __read, __bufsize) \
128 __KS_BASIC(type_t, __bufsize) \
129 __KS_GETC(__read, __bufsize) \
130 __KS_GETUNTIL(__read, __bufsize)
132 #define __KSEQ_BASIC(type_t) \
133 static inline kseq_t *kseq_init(type_t fd) \
135 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
136 s->f = ks_init(fd); \
139 static inline void kseq_rewind(kseq_t *ks) \
142 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
144 static inline void kseq_destroy(kseq_t *ks) \
147 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
153 >=0 length of the sequence (normal)
155 -2 truncated quality string
157 #define __KSEQ_READ \
158 static int kseq_read(kseq_t *seq) \
161 kstream_t *ks = seq->f; \
162 if (seq->last_char == 0) { /* then jump to the next header line */ \
163 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
164 if (c == -1) return -1; /* end of file */ \
165 seq->last_char = c; \
166 } /* the first header char has been read */ \
167 seq->comment.l = seq->seq.l = seq->qual.l = 0; \
168 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
169 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
170 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
171 if (isgraph(c)) { /* printable non-space character */ \
172 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
173 seq->seq.m = seq->seq.l + 2; \
174 kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
175 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
177 seq->seq.s[seq->seq.l++] = (char)c; \
180 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
181 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
182 if (c != '+') return seq->seq.l; /* FASTA */ \
183 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
184 seq->qual.m = seq->seq.m; \
185 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
187 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
188 if (c == -1) return -2; /* we should not stop here */ \
189 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
190 if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
191 seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
192 seq->last_char = 0; /* we have not come to the next header line */ \
193 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
197 #define __KSEQ_TYPE(type_t) \
199 kstring_t name, comment, seq, qual; \
204 #define KSEQ_INIT(type_t, __read) \
205 KSTREAM_INIT(type_t, __read, 4096) \
206 __KSEQ_TYPE(type_t) \
207 __KSEQ_BASIC(type_t) \