2 * This file is part of fastq-tools.
4 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
14 static const size_t init_str_size = 128;
15 static const size_t fastq_buf_size = 4096;
17 static void fastq_alloc_str(str_t* s)
19 s->s = malloc_or_die(init_str_size);
22 s->size = init_str_size;
26 static void fastq_expand_str(str_t* s)
29 realloc_or_die(s->s, s->size);
33 seq_t* fastq_alloc_seq()
35 seq_t* seq = malloc_or_die(sizeof(seq_t));
36 fastq_alloc_str(&seq->id1);
37 fastq_alloc_str(&seq->seq);
38 fastq_alloc_str(&seq->id2);
39 fastq_alloc_str(&seq->qual);
45 void fastq_free_seq(seq_t* seq)
67 fastq_t* fastq_open(FILE* f)
69 fastq_t* fqf = malloc_or_die(sizeof(fastq_t));
70 or_die((int)(fqf->file = gzdopen(fileno(f), "rb")),
71 "Can not open gzip file.");
73 fqf->state = STATE_ID1;
74 fqf->buf = malloc_or_die(fastq_buf_size);
82 void fastq_close(fastq_t* fqf)
90 void fastq_refill(fastq_t* f)
95 int n = gzread(f->file, f->buf, fastq_buf_size - 1);
103 errmsg = gzerror(f->file, &errnum);
104 fprintf(stderr, "I/O error: %s\n", errmsg);
114 void fastq_get_line(fastq_t* f, str_t* s)
118 if (f->state == STATE_EOF) goto fastq_get_line_done;
124 if (f->state == STATE_EOF) goto fastq_get_line_done;
132 goto fastq_get_line_done;
135 while (s->size < i + 2) {
138 if (s) s->s[i++] = *f->c;
153 int fastq_next(fastq_t* f, seq_t* seq)
155 if (f->state == STATE_EOF) return 0;
159 /* read more, if needed */
160 if (*f->c == '\0' ) {
162 if (f->state == STATE_EOF) return 0;
166 /* skip over leading whitespace */
167 else if (isspace(*f->c)) {
172 else if (*f->c == ';') {
173 fastq_get_line(f, NULL);
174 if (f->state == STATE_EOF) return 0;
178 else if (f->state == STATE_ID1) {
179 if (*f->c == '@' || *f->c == '>') {
181 fastq_get_line(f, &seq->id1);
182 if (f->state == STATE_EOF) return 0;
184 f->state = STATE_SEQ;
188 "Malformed FASTQ file: expecting an '@' or '>', saw a '%c'\n",
195 else if (f->state == STATE_SEQ) {
196 fastq_get_line(f, &seq->seq);
197 if (f->state == STATE_EOF) return 0;
199 f->state = STATE_ID2;
203 else if (f->state == STATE_ID2) {
206 fastq_get_line(f, &seq->id2);
207 if (f->state == STATE_EOF) return 0;
209 f->state = STATE_QUAL;
212 /* fasta style entry */
213 seq->id2.s[0] = '\0';
214 seq->qual.s[0] = '\0';
216 f->state = STATE_ID1;
221 /* read quality string */
222 else if (f->state == STATE_QUAL) {
223 fastq_get_line(f, &seq->qual);
224 if (f->state == STATE_EOF) return 1;
226 f->state = STATE_ID1;
231 fputs("Inexplicable error in fastq parser.\n", stderr);
242 void fastq_print(FILE* fout, seq_t* seq)
245 if (seq->qual.n > 0) {
246 fprintf(fout, "@%s\n%s\n+%s\n%s\n",
255 fprintf(fout, ">%s\n%s\n",