]> git.donarmstrong.com Git - samtools.git/blob - kseq.h
Create trunk copy
[samtools.git] / kseq.h
1 /* The MIT License
2
3    Copyright (c) 2008 Genome Research Ltd (GRL).
4
5    Permission is hereby granted, free of charge, to any person obtaining
6    a copy of this software and associated documentation files (the
7    "Software"), to deal in the Software without restriction, including
8    without limitation the rights to use, copy, modify, merge, publish,
9    distribute, sublicense, and/or sell copies of the Software, and to
10    permit persons to whom the Software is furnished to do so, subject to
11    the following conditions:
12
13    The above copyright notice and this permission notice shall be
14    included in all copies or substantial portions of the Software.
15
16    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23    SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
28 #ifndef AC_KSEQ_H
29 #define AC_KSEQ_H
30
31 #include <ctype.h>
32 #include <string.h>
33 #include <stdlib.h>
34
35 #define __KS_TYPE(type_t)                                               \
36         typedef struct __kstream_t {                            \
37                 char *buf;                                                              \
38                 int begin, end, is_eof;                                 \
39                 type_t f;                                                               \
40         } kstream_t;
41
42 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
43 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
44
45 #define __KS_BASIC(type_t, __bufsize)                                                           \
46         static inline kstream_t *ks_init(type_t f)                                              \
47         {                                                                                                                               \
48                 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));       \
49                 ks->f = f;                                                                                                      \
50                 ks->buf = (char*)malloc(__bufsize);                                                     \
51                 return ks;                                                                                                      \
52         }                                                                                                                               \
53         static inline void ks_destroy(kstream_t *ks)                                    \
54         {                                                                                                                               \
55                 if (ks) {                                                                                                       \
56                         free(ks->buf);                                                                                  \
57                         free(ks);                                                                                               \
58                 }                                                                                                                       \
59         }
60
61 #define __KS_GETC(__read, __bufsize)                                            \
62         static inline int ks_getc(kstream_t *ks)                                \
63         {                                                                                                               \
64                 if (ks->is_eof && ks->begin >= ks->end) return -1;      \
65                 if (ks->begin >= ks->end) {                                                     \
66                         ks->begin = 0;                                                                  \
67                         ks->end = __read(ks->f, ks->buf, __bufsize);    \
68                         if (ks->end < __bufsize) ks->is_eof = 1;                \
69                         if (ks->end == 0) return -1;                                    \
70                 }                                                                                                       \
71                 return (int)ks->buf[ks->begin++];                                       \
72         }
73
74 typedef struct __kstring_t {
75         size_t l, m;
76         char *s;
77 } kstring_t;
78
79 #ifndef kroundup32
80 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
81 #endif
82
83 #define __KS_GETUNTIL(__read, __bufsize)                                                                \
84         static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
85         {                                                                                                                                       \
86                 if (dret) *dret = 0;                                                                                    \
87                 str->l = 0;                                                                                                             \
88                 if (ks->begin >= ks->end && ks->is_eof) return -1;                              \
89                 for (;;) {                                                                                                              \
90                         int i;                                                                                                          \
91                         if (ks->begin >= ks->end) {                                                                     \
92                                 if (!ks->is_eof) {                                                                              \
93                                         ks->begin = 0;                                                                          \
94                                         ks->end = __read(ks->f, ks->buf, __bufsize);            \
95                                         if (ks->end < __bufsize) ks->is_eof = 1;                        \
96                                         if (ks->end == 0) break;                                                        \
97                                 } else break;                                                                                   \
98                         }                                                                                                                       \
99                         if (delimiter) {                                                                                        \
100                                 for (i = ks->begin; i < ks->end; ++i)                                   \
101                                         if (ks->buf[i] == delimiter) break;                                     \
102                         } else {                                                                                                        \
103                                 for (i = ks->begin; i < ks->end; ++i)                                   \
104                                         if (isspace(ks->buf[i])) break;                                         \
105                         }                                                                                                                       \
106                         if (str->m - str->l < i - ks->begin + 1) {                                      \
107                                 str->m = str->l + (i - ks->begin) + 1;                                  \
108                                 kroundup32(str->m);                                                                             \
109                                 str->s = (char*)realloc(str->s, str->m);                                \
110                         }                                                                                                                       \
111                         memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
112                         str->l = str->l + (i - ks->begin);                                                      \
113                         ks->begin = i + 1;                                                                                      \
114                         if (i < ks->end) {                                                                                      \
115                                 if (dret) *dret = ks->buf[i];                                                   \
116                                 break;                                                                                                  \
117                         }                                                                                                                       \
118                 }                                                                                                                               \
119                 str->s[str->l] = '\0';                                                                                  \
120                 return str->l;                                                                                                  \
121         }
122
123 #define KSTREAM_INIT(type_t, __read, __bufsize) \
124         __KS_TYPE(type_t)                                                       \
125         __KS_BASIC(type_t, __bufsize)                           \
126         __KS_GETC(__read, __bufsize)                            \
127         __KS_GETUNTIL(__read, __bufsize)
128
129 #define __KSEQ_BASIC(type_t)                                                                                    \
130         static inline kseq_t *kseq_init(type_t fd)                                                      \
131         {                                                                                                                                       \
132                 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                                 \
133                 s->f = ks_init(fd);                                                                                             \
134                 return s;                                                                                                               \
135         }                                                                                                                                       \
136         static inline void kseq_rewind(kseq_t *ks)                                                      \
137         {                                                                                                                                       \
138                 ks->last_char = 0;                                                                                              \
139                 ks->f->is_eof = ks->f->begin = ks->f->end = 0;                                  \
140         }                                                                                                                                       \
141         static inline void kseq_destroy(kseq_t *ks)                                                     \
142         {                                                                                                                                       \
143                 if (!ks) return;                                                                                                \
144                 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
145                 ks_destroy(ks->f);                                                                                              \
146                 free(ks);                                                                                                               \
147         }
148
149 /* Return value:
150    >=0  length of the sequence (normal)
151    -1   end-of-file
152    -2   truncated quality string
153  */
154 #define __KSEQ_READ                                                                                                             \
155         static int kseq_read(kseq_t *seq)                                                                       \
156         {                                                                                                                                       \
157                 int c;                                                                                                                  \
158                 kstream_t *ks = seq->f;                                                                                 \
159                 if (seq->last_char == 0) { /* then jump to the next header line */ \
160                         while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');        \
161                         if (c == -1) return -1; /* end of file */                                       \
162                         seq->last_char = c;                                                                                     \
163                 } /* the first header char has been read */                                             \
164                 seq->comment.l = seq->seq.l = seq->qual.l = 0;                                  \
165                 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;                  \
166                 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);                 \
167                 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
168                         if (isgraph(c)) { /* printable non-space character */           \
169                                 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
170                                         seq->seq.m = seq->seq.l + 2;                                            \
171                                         kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
172                                         seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
173                                 }                                                                                                               \
174                                 seq->seq.s[seq->seq.l++] = (char)c;                                             \
175                         }                                                                                                                       \
176                 }                                                                                                                               \
177                 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
178                 seq->seq.s[seq->seq.l] = 0;     /* null terminated string */            \
179                 if (c != '+') return seq->seq.l; /* FASTA */                                    \
180                 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */    \
181                         seq->qual.m = seq->seq.m;                                                                       \
182                         seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);         \
183                 }                                                                                                                               \
184                 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
185                 if (c == -1) return -2; /* we should not stop here */                   \
186                 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)             \
187                         if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
188                 seq->qual.s[seq->qual.l] = 0; /* null terminated string */              \
189                 seq->last_char = 0;     /* we have not come to the next header line */ \
190                 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
191                 return seq->seq.l;                                                                                              \
192         }
193
194 #define __KSEQ_TYPE(type_t)                                             \
195         typedef struct {                                                        \
196                 kstring_t name, comment, seq, qual;             \
197                 int last_char;                                                  \
198                 kstream_t *f;                                                   \
199         } kseq_t;
200
201 #define KSEQ_INIT(type_t, __read)                               \
202         KSTREAM_INIT(type_t, __read, 4096)                      \
203         __KSEQ_TYPE(type_t)                                                     \
204         __KSEQ_BASIC(type_t)                                            \
205         __KSEQ_READ
206
207 #endif