3 Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* Last Modified: 18AUG2011 */
35 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
36 #define KS_SEP_TAB 1 // isspace() && !' '
39 #define __KS_TYPE(type_t) \
40 typedef struct __kstream_t { \
42 int begin, end, is_eof; \
46 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
47 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
49 #define __KS_BASIC(type_t, __bufsize) \
50 static inline kstream_t *ks_init(type_t f) \
52 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
54 ks->buf = malloc(__bufsize); \
57 static inline void ks_destroy(kstream_t *ks) \
65 #define __KS_GETC(__read, __bufsize) \
66 static inline int ks_getc(kstream_t *ks) \
68 if (ks->is_eof && ks->begin >= ks->end) return -1; \
69 if (ks->begin >= ks->end) { \
71 ks->end = __read(ks->f, ks->buf, __bufsize); \
72 if (ks->end < __bufsize) ks->is_eof = 1; \
73 if (ks->end == 0) return -1; \
75 return (int)ks->buf[ks->begin++]; \
79 #define KSTRING_T kstring_t
80 typedef struct __kstring_t {
87 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
90 #define __KS_GETUNTIL(__read, __bufsize) \
91 static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
93 if (dret) *dret = 0; \
94 str->l = append? str->l : 0; \
95 if (ks->begin >= ks->end && ks->is_eof) return -1; \
98 if (ks->begin >= ks->end) { \
101 ks->end = __read(ks->f, ks->buf, __bufsize); \
102 if (ks->end < __bufsize) ks->is_eof = 1; \
103 if (ks->end == 0) break; \
106 if (delimiter > KS_SEP_MAX) { \
107 for (i = ks->begin; i < ks->end; ++i) \
108 if (ks->buf[i] == delimiter) break; \
109 } else if (delimiter == KS_SEP_SPACE) { \
110 for (i = ks->begin; i < ks->end; ++i) \
111 if (isspace(ks->buf[i])) break; \
112 } else if (delimiter == KS_SEP_TAB) { \
113 for (i = ks->begin; i < ks->end; ++i) \
114 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
115 } else i = 0; /* never come to here! */ \
116 if (str->m - str->l < i - ks->begin + 1) { \
117 str->m = str->l + (i - ks->begin) + 1; \
118 kroundup32(str->m); \
119 str->s = (char*)realloc(str->s, str->m); \
121 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
122 str->l = str->l + (i - ks->begin); \
125 if (dret) *dret = ks->buf[i]; \
131 str->s = (char*)calloc(1, 1); \
133 str->s[str->l] = '\0'; \
136 static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
137 { return ks_getuntil2(ks, delimiter, str, dret, 0); }
139 #define KSTREAM_INIT(type_t, __read, __bufsize) \
141 __KS_BASIC(type_t, __bufsize) \
142 __KS_GETC(__read, __bufsize) \
143 __KS_GETUNTIL(__read, __bufsize)
145 #define __KSEQ_BASIC(type_t) \
146 static inline kseq_t *kseq_init(type_t fd) \
148 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
149 s->f = ks_init(fd); \
152 static inline void kseq_rewind(kseq_t *ks) \
155 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
157 static inline void kseq_destroy(kseq_t *ks) \
160 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
166 >=0 length of the sequence (normal)
168 -2 truncated quality string
170 #define __KSEQ_READ \
171 static int kseq_read(kseq_t *seq) \
174 kstream_t *ks = seq->f; \
175 if (seq->last_char == 0) { /* then jump to the next header line */ \
176 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
177 if (c == -1) return -1; /* end of file */ \
178 seq->last_char = c; \
179 } /* else: the first header char has been read in the previous call */ \
180 seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
181 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
182 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); /* read FASTA/Q comment */ \
183 if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
185 seq->seq.s = (char*)malloc(seq->seq.m); \
187 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
188 seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
189 ks_getuntil2(ks, '\n', &seq->seq, 0, 1); /* read the rest of the line */ \
191 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
192 if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
193 seq->seq.m = seq->seq.l + 2; \
194 kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
195 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
197 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
198 if (c != '+') return seq->seq.l; /* FASTA */ \
199 if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
200 seq->qual.m = seq->seq.m; \
201 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
203 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
204 if (c == -1) return -2; /* error: no quality string */ \
205 while (ks_getuntil2(ks, '\n', &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
206 seq->last_char = 0; /* we have not come to the next header line */ \
207 if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
211 #define __KSEQ_TYPE(type_t) \
213 kstring_t name, comment, seq, qual; \
218 #define KSEQ_INIT(type_t, __read) \
219 KSTREAM_INIT(type_t, __read, 16384) \
220 __KSEQ_TYPE(type_t) \
221 __KSEQ_BASIC(type_t) \