]> git.donarmstrong.com Git - samtools.git/blob - kseq.h
* samtools-0.1.2-16
[samtools.git] / kseq.h
1 /* The MIT License
2
3    Copyright (c) 2008 Genome Research Ltd (GRL).
4
5    Permission is hereby granted, free of charge, to any person obtaining
6    a copy of this software and associated documentation files (the
7    "Software"), to deal in the Software without restriction, including
8    without limitation the rights to use, copy, modify, merge, publish,
9    distribute, sublicense, and/or sell copies of the Software, and to
10    permit persons to whom the Software is furnished to do so, subject to
11    the following conditions:
12
13    The above copyright notice and this permission notice shall be
14    included in all copies or substantial portions of the Software.
15
16    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23    SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
28 #ifndef AC_KSEQ_H
29 #define AC_KSEQ_H
30
31 #include <ctype.h>
32 #include <string.h>
33 #include <stdlib.h>
34
35 #define __KS_TYPE(type_t)                                               \
36         typedef struct __kstream_t {                            \
37                 char *buf;                                                              \
38                 int begin, end, is_eof;                                 \
39                 type_t f;                                                               \
40         } kstream_t;
41
42 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
43 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
44
45 #define __KS_BASIC(type_t, __bufsize)                                                           \
46         static inline kstream_t *ks_init(type_t f)                                              \
47         {                                                                                                                               \
48                 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));       \
49                 ks->f = f;                                                                                                      \
50                 ks->buf = (char*)malloc(__bufsize);                                                     \
51                 return ks;                                                                                                      \
52         }                                                                                                                               \
53         static inline void ks_destroy(kstream_t *ks)                                    \
54         {                                                                                                                               \
55                 if (ks) {                                                                                                       \
56                         free(ks->buf);                                                                                  \
57                         free(ks);                                                                                               \
58                 }                                                                                                                       \
59         }
60
61 #define __KS_GETC(__read, __bufsize)                                            \
62         static inline int ks_getc(kstream_t *ks)                                \
63         {                                                                                                               \
64                 if (ks->is_eof && ks->begin >= ks->end) return -1;      \
65                 if (ks->begin >= ks->end) {                                                     \
66                         ks->begin = 0;                                                                  \
67                         ks->end = __read(ks->f, ks->buf, __bufsize);    \
68                         if (ks->end < __bufsize) ks->is_eof = 1;                \
69                         if (ks->end == 0) return -1;                                    \
70                 }                                                                                                       \
71                 return (int)ks->buf[ks->begin++];                                       \
72         }
73
74 #ifndef KSTRING_T
75 #define KSTRING_T kstring_t
76 typedef struct __kstring_t {
77         size_t l, m;
78         char *s;
79 } kstring_t;
80 #endif
81
82 #ifndef kroundup32
83 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
84 #endif
85
86 #define __KS_GETUNTIL(__read, __bufsize)                                                                \
87         static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
88         {                                                                                                                                       \
89                 if (dret) *dret = 0;                                                                                    \
90                 str->l = 0;                                                                                                             \
91                 if (ks->begin >= ks->end && ks->is_eof) return -1;                              \
92                 for (;;) {                                                                                                              \
93                         int i;                                                                                                          \
94                         if (ks->begin >= ks->end) {                                                                     \
95                                 if (!ks->is_eof) {                                                                              \
96                                         ks->begin = 0;                                                                          \
97                                         ks->end = __read(ks->f, ks->buf, __bufsize);            \
98                                         if (ks->end < __bufsize) ks->is_eof = 1;                        \
99                                         if (ks->end == 0) break;                                                        \
100                                 } else break;                                                                                   \
101                         }                                                                                                                       \
102                         if (delimiter) {                                                                                        \
103                                 for (i = ks->begin; i < ks->end; ++i)                                   \
104                                         if (ks->buf[i] == delimiter) break;                                     \
105                         } else {                                                                                                        \
106                                 for (i = ks->begin; i < ks->end; ++i)                                   \
107                                         if (isspace(ks->buf[i])) break;                                         \
108                         }                                                                                                                       \
109                         if (str->m - str->l < i - ks->begin + 1) {                                      \
110                                 str->m = str->l + (i - ks->begin) + 1;                                  \
111                                 kroundup32(str->m);                                                                             \
112                                 str->s = (char*)realloc(str->s, str->m);                                \
113                         }                                                                                                                       \
114                         memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
115                         str->l = str->l + (i - ks->begin);                                                      \
116                         ks->begin = i + 1;                                                                                      \
117                         if (i < ks->end) {                                                                                      \
118                                 if (dret) *dret = ks->buf[i];                                                   \
119                                 break;                                                                                                  \
120                         }                                                                                                                       \
121                 }                                                                                                                               \
122                 str->s[str->l] = '\0';                                                                                  \
123                 return str->l;                                                                                                  \
124         }
125
126 #define KSTREAM_INIT(type_t, __read, __bufsize) \
127         __KS_TYPE(type_t)                                                       \
128         __KS_BASIC(type_t, __bufsize)                           \
129         __KS_GETC(__read, __bufsize)                            \
130         __KS_GETUNTIL(__read, __bufsize)
131
132 #define __KSEQ_BASIC(type_t)                                                                                    \
133         static inline kseq_t *kseq_init(type_t fd)                                                      \
134         {                                                                                                                                       \
135                 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                                 \
136                 s->f = ks_init(fd);                                                                                             \
137                 return s;                                                                                                               \
138         }                                                                                                                                       \
139         static inline void kseq_rewind(kseq_t *ks)                                                      \
140         {                                                                                                                                       \
141                 ks->last_char = 0;                                                                                              \
142                 ks->f->is_eof = ks->f->begin = ks->f->end = 0;                                  \
143         }                                                                                                                                       \
144         static inline void kseq_destroy(kseq_t *ks)                                                     \
145         {                                                                                                                                       \
146                 if (!ks) return;                                                                                                \
147                 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
148                 ks_destroy(ks->f);                                                                                              \
149                 free(ks);                                                                                                               \
150         }
151
152 /* Return value:
153    >=0  length of the sequence (normal)
154    -1   end-of-file
155    -2   truncated quality string
156  */
157 #define __KSEQ_READ                                                                                                             \
158         static int kseq_read(kseq_t *seq)                                                                       \
159         {                                                                                                                                       \
160                 int c;                                                                                                                  \
161                 kstream_t *ks = seq->f;                                                                                 \
162                 if (seq->last_char == 0) { /* then jump to the next header line */ \
163                         while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');        \
164                         if (c == -1) return -1; /* end of file */                                       \
165                         seq->last_char = c;                                                                                     \
166                 } /* the first header char has been read */                                             \
167                 seq->comment.l = seq->seq.l = seq->qual.l = 0;                                  \
168                 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;                  \
169                 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);                 \
170                 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
171                         if (isgraph(c)) { /* printable non-space character */           \
172                                 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
173                                         seq->seq.m = seq->seq.l + 2;                                            \
174                                         kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
175                                         seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
176                                 }                                                                                                               \
177                                 seq->seq.s[seq->seq.l++] = (char)c;                                             \
178                         }                                                                                                                       \
179                 }                                                                                                                               \
180                 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
181                 seq->seq.s[seq->seq.l] = 0;     /* null terminated string */            \
182                 if (c != '+') return seq->seq.l; /* FASTA */                                    \
183                 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */    \
184                         seq->qual.m = seq->seq.m;                                                                       \
185                         seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);         \
186                 }                                                                                                                               \
187                 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
188                 if (c == -1) return -2; /* we should not stop here */                   \
189                 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)             \
190                         if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
191                 seq->qual.s[seq->qual.l] = 0; /* null terminated string */              \
192                 seq->last_char = 0;     /* we have not come to the next header line */ \
193                 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
194                 return seq->seq.l;                                                                                              \
195         }
196
197 #define __KSEQ_TYPE(type_t)                                             \
198         typedef struct {                                                        \
199                 kstring_t name, comment, seq, qual;             \
200                 int last_char;                                                  \
201                 kstream_t *f;                                                   \
202         } kseq_t;
203
204 #define KSEQ_INIT(type_t, __read)                               \
205         KSTREAM_INIT(type_t, __read, 4096)                      \
206         __KSEQ_TYPE(type_t)                                                     \
207         __KSEQ_BASIC(type_t)                                            \
208         __KSEQ_READ
209
210 #endif