2 * This file is part of fastq-tools.
4 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
14 static const size_t init_str_size = 128;
15 static const size_t fastq_buf_size = 4096;
17 static void fastq_alloc_str(str_t* s)
19 s->s = malloc_or_die(init_str_size);
22 s->size = init_str_size;
26 static void fastq_expand_str(str_t* s)
29 realloc_or_die(s->s, s->size);
33 seq_t* fastq_alloc_seq()
35 seq_t* seq = malloc_or_die(sizeof(seq_t));
36 fastq_alloc_str(&seq->id1);
37 fastq_alloc_str(&seq->seq);
38 fastq_alloc_str(&seq->id2);
39 fastq_alloc_str(&seq->qual);
45 void fastq_free_seq(seq_t* seq)
67 fastq_t* fastq_open(FILE* f)
69 fastq_t* fqf = malloc_or_die(sizeof(fastq_t));
70 or_die((int)((fqf->file = gzdopen(fileno(f), "rb")) != NULL),
71 "Can not open gzip file.");
73 fqf->state = STATE_ID1;
74 fqf->buf = malloc_or_die(fastq_buf_size);
82 void fastq_close(fastq_t* fqf)
90 void fastq_refill(fastq_t* f)
95 int n = gzread(f->file, f->buf, fastq_buf_size - 1);
103 errmsg = gzerror(f->file, &errnum);
104 fprintf(stderr, "I/O error: %s\n", errmsg);
114 void fastq_get_line(fastq_t* f, str_t* s)
118 if (f->state == STATE_EOF) goto fastq_get_line_done;
124 if (f->state == STATE_EOF) goto fastq_get_line_done;
132 goto fastq_get_line_done;
135 while (s->size < i + 2) {
138 if (s) s->s[i++] = *f->c;
153 int fastq_next(fastq_t* f, seq_t* seq)
155 if (f->state == STATE_EOF) return 0;
159 /* read more, if needed */
160 if (*f->c == '\0' ) {
162 if (f->state == STATE_EOF) return 0;
166 /* skip over leading whitespace */
167 else if (isspace(*f->c)) {
173 else if (*f->c == ';') {
174 fastq_get_line(f, NULL);
175 if (f->state == STATE_EOF) return 0;
180 else if (f->state == STATE_ID1) {
181 if (*f->c == '@' || *f->c == '>') {
183 fastq_get_line(f, &seq->id1);
184 if (f->state == STATE_EOF) return 0;
186 f->state = STATE_SEQ;
190 "Malformed FASTQ file: expecting an '@' or '>', saw a '%c'\n",
197 else if (f->state == STATE_SEQ) {
198 fastq_get_line(f, &seq->seq);
199 if (f->state == STATE_EOF) return 0;
201 f->state = STATE_ID2;
205 else if (f->state == STATE_ID2) {
208 fastq_get_line(f, &seq->id2);
209 if (f->state == STATE_EOF) return 0;
211 f->state = STATE_QUAL;
214 /* fasta style entry */
215 seq->id2.s[0] = '\0';
216 seq->qual.s[0] = '\0';
218 f->state = STATE_ID1;
223 /* read quality string */
224 else if (f->state == STATE_QUAL) {
225 fastq_get_line(f, &seq->qual);
226 if (f->state == STATE_EOF) return 1;
228 f->state = STATE_ID1;
233 fputs("Inexplicable error in fastq parser.\n", stderr);
244 void fastq_rewind(fastq_t* fqf)
247 fqf->state = STATE_ID1;
253 void fastq_print(FILE* fout, seq_t* seq)
256 if (seq->qual.n > 0) {
257 fprintf(fout, "@%s\n%s\n+%s\n%s\n",
266 fprintf(fout, ">%s\n%s\n",