bin_PROGRAMS = fastq-grep fastq-kmers fastq-match
-fastq_grep_SOURCES = kseq.h fastq-grep.c
+fastq_grep_SOURCES = kseq.h fastq-grep.c fastq-parse.c fastq-common.c
fastq_grep_LDADD = $(PCRE_LIBS) -lz
-fastq_kmers_SOURCES = kseq.h fastq-kmers.c
+fastq_kmers_SOURCES = kseq.h fastq-kmers.c fastq-parse.c fastq-common.c
fastq_kmers_LDADD = -lz
-fastq_match_SOURCES = kseq.h fastq-match.c
+fastq_match_SOURCES = kseq.h fastq-match.c fastq-parse.c fastq-common.c
fastq_match_LDADD = swsse2/libswsse2.la -lz
fastq_match_CFLAGS = -msse2
--- /dev/null
+
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
+
+
+#include "fastq-common.h"
+#include <stdlib.h>
+
+
+void or_die(int b, const char* msg)
+{
+ if (b == 0) {
+ fputs(msg, stderr);
+ exit(1);
+ }
+}
+
+
+void* malloc_or_die(size_t n)
+{
+ void* p = malloc(n);
+ if (p == NULL) {
+ fprintf(stderr, "Can not allocate %zu bytes.\n", n);
+ exit(1);
+ }
+ return p;
+}
+
+
+void* realloc_or_die(void* ptr, size_t n)
+{
+ void* p = realloc(ptr, n);
+ if (p == NULL) {
+ fprintf(stderr, "Can not allocate %zu bytes.\n", n);
+ exit(1);
+ }
+ return p;
+}
+
+
+FILE* fopen_or_die(const char* path, const char* mode)
+{
+ FILE* f = fopen(path, mode);
+ if (f == NULL) {
+ fprintf(stderr, "Can not open file %s with mode %s.\n", path, mode);
+ exit(1);
+ }
+ return f;
+}
+
+
+
--- /dev/null
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * common :
+ * A few common functions, primarily for crashing whilst retaining our dignity.
+ *
+ */
+
+#ifndef FASTQ_TOOLS_COMMON_H
+#define FASTQ_TOOLS_COMMON_H
+
+#include <stdio.h>
+
+void or_die(int b, const char* msg);
+
+void* malloc_or_die(size_t);
+void* realloc_or_die(void*, size_t);
+FILE* fopen_or_die(const char*, const char*);
+
+#endif
+
-
-/*
- * fastq-grep: regular expression searches of the sequences within a fastq file
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
- * Febuary 2011 / Daniel Jones <dcjones@cs.washington.edu>
+ * fastq-grep :
+ * Regular expression searches of the sequences within a FASTQ file.
*
*/
+#include "fastq-common.h"
+#include "fastq-parse.h"
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <zlib.h>
#include <pcre.h>
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
-void print_fastq_entry( FILE* fout, kseq_t* seq )
+void print_fastq_entry(FILE* fout, seq_t* seq)
{
fprintf(fout, "@%s\n%s\n+%s\n%s\n",
- seq->name.s,
+ seq->id1.s,
seq->seq.s,
- seq->comment.s,
+ seq->id2.s,
seq->qual.s );
}
-void fastq_grep( gzFile fin, FILE* fout, pcre* re )
+void fastq_grep(FILE* fin, FILE* fout, pcre* re)
{
int rc;
int ovector[3];
size_t count = 0;
- kseq_t* seq;
- seq = kseq_init(fin);
+ fastq_t* fqf = fastq_open(fin);
+ seq_t* seq = fastq_alloc_seq();
- while (kseq_read(seq) >= 0) {
+ while (fastq_next(fqf, seq)) {
rc = pcre_exec(re, /* pattern */
NULL, /* extre data */
seq->seq.s, /* subject */
- seq->seq.l, /* subject length */
+ seq->seq.n, /* subject length */
0, /* subject offset */
0, /* options */
ovector, /* output vector */
}
}
- kseq_destroy(seq);
+ fastq_free_seq(seq);
+ fastq_close(fqf);
if (count_flag) fprintf(fout, "%zu\n", count);
}
int pat_error_offset;
FILE* fin;
- gzFile gzfin;
invert_flag = 0;
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- fastq_grep(gzfin, stdout, re);
-
- gzclose(gzfin);
+ fastq_grep(stdin, stdout, re);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- fastq_grep(gzfin, stdout, re);
+ fastq_grep(fin, stdout, re);
- gzclose(gzfin);
+ fclose(fin);
}
}
-/*
- * fastq-kmers: kmer frequences within fastq files
+/*
+ * This file is part of fastq-tools.
*
- * Febuary 2011 / Daniel Jones <dcjones@cs.washington.edu>
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-kmers :
+ * Tabulate k-mer frequencies with FASTQ files.
*
*/
-
-
+#include "fastq-common.h"
+#include "fastq-parse.h"
+#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <getopt.h>
#include <zlib.h>
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
}
-void count_fastq_kmers(gzFile* fin, uint32_t* cs)
+void count_fastq_kmers(FILE* fin, uint32_t* cs)
{
- kseq_t* seq = kseq_init(fin);
+ seq_t* seq = fastq_alloc_seq();
+ fastq_t* fqf = fastq_open(fin);
int i;
int n;
uint32_t kmer;
- while (kseq_read(seq) >= 0) {
- n = (int)seq->seq.l - k + 1;
+ while (fastq_next(fqf, seq)) {
+ n = (int)seq->seq.n - k + 1;
for (i = 0; i < n; i++) {
if( packkmer(seq->seq.s + i, &kmer, k) ) {
cs[kmer]++;
}
}
- kseq_destroy(seq);
+ fastq_free_seq(seq);
+ fastq_close(fqf);
}
uint32_t* cs; /* counts */
FILE* fin;
- gzFile gzfin;
int opt;
int opt_idx;
}
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- count_fastq_kmers(gzfin, cs);
-
- gzclose(gzfin);
+ count_fastq_kmers(stdin, cs);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- count_fastq_kmers(gzfin, cs);
-
- gzclose(gzfin);
+ count_fastq_kmers(fin, cs);
}
}
-/* Smith-Waterman alignments against sequences within a fastq file.
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-match :
+ * Smith-Waterman alignments against sequences within a fastq file.
*
- * Daniel Jones <dcjones@cs.washington.edu>
*/
-#include <stdlib.h>
-#include <zlib.h>
-#include <getopt.h>
+#include "fastq-common.h"
+#include "fastq-parse.h"
#include "swsse2/blosum62.h"
#include "swsse2/swsse2.h"
#include "swsse2/matrix.h"
#include "swsse2/swstriped.h"
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <getopt.h>
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
}
-void fastq_match(gzFile fin, FILE* fout,
+void fastq_match(FILE* fin, FILE* fout,
SwStripedData* sw_data,
unsigned char* query, int n,
SEARCH_OPTIONS* options)
int gap_ext = -options->gapExt;
int threshold = options->threshold;
- kseq_t* seq;
- seq = kseq_init(fin);
+ fastq_t* fqf = fastq_open(fin);
+ seq_t* seq = fastq_alloc_seq();
- while (kseq_read(seq) >= 0) {
+ while (fastq_next(fqf, seq)) {
fprintf(fout, "%s\t", seq->seq.s);
- convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
+ convert_sequence((unsigned char*)seq->seq.s, seq->seq.n);
score = swStripedByte(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
+ (unsigned char*)seq->seq.s, seq->seq.n,
gap_init, gap_ext,
sw_data->pvbQueryProf,
sw_data->pvH1,
sw_data->bias);
if (score >= 255) {
score = swStripedWord(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
+ (unsigned char*)seq->seq.s, seq->seq.n,
gap_init, gap_ext,
sw_data->pvbQueryProf,
sw_data->pvH1,
fprintf(fout, "%d\n", score);
}
- kseq_destroy(seq);
+ fastq_free_seq(seq);
+ fastq_close(fqf);
}
options.threshold = -1;
FILE* fin;
- gzFile gzfin;
help_flag = 0;
sw_data = swStripedInit(query, query_len, mat);
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(stdin, stdout, sw_data, query, query_len, &options);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(fin, stdout, sw_data, query, query_len, &options);
}
}
--- /dev/null
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
+
+#include "fastq-parse.h"
+#include "fastq-common.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+
+static const size_t init_str_size = 128;
+static const size_t fastq_buf_size = 4096;
+
+static void fastq_alloc_str(str_t* s)
+{
+ s->s = malloc_or_die(init_str_size);
+ s->s[0] = '\0';
+ s->n = 0;
+ s->size = init_str_size;
+}
+
+
+static void fastq_expand_str(str_t* s)
+{
+ s->size *= 2;
+ realloc_or_die(s->s, s->size);
+}
+
+
+seq_t* fastq_alloc_seq()
+{
+ seq_t* seq = malloc_or_die(sizeof(seq_t));
+ fastq_alloc_str(&seq->id1);
+ fastq_alloc_str(&seq->seq);
+ fastq_alloc_str(&seq->id2);
+ fastq_alloc_str(&seq->qual);
+
+ return seq;
+}
+
+
+void fastq_free_seq(seq_t* seq)
+{
+ free(seq->id1.s);
+ free(seq->seq.s);
+ free(seq->id2.s);
+ free(seq->qual.s);
+ free(seq);
+}
+
+
+typedef enum
+{
+ STATE_EOF,
+ STATE_ERROR,
+ STATE_ID1,
+ STATE_SEQ,
+ STATE_ID2,
+ STATE_QUAL
+
+} fastq_state;
+
+
+fastq_t* fastq_open(FILE* f)
+{
+ fastq_t* fqf = malloc_or_die(sizeof(fastq_t));
+ or_die((int)(fqf->file = gzdopen(fileno(f), "rb")),
+ "Can not open gzip file.");
+
+ fqf->state = STATE_ID1;
+ fqf->buf = malloc_or_die(fastq_buf_size);
+ fqf->buf[0] = '\0';
+ fqf->c = fqf->buf;
+
+ return fqf;
+}
+
+
+void fastq_close(fastq_t* fqf)
+{
+ gzclose(fqf->file);
+ free(fqf->buf);
+ free(fqf);
+}
+
+
+void fastq_refill(fastq_t* f)
+{
+ int errnum;
+ const char* errmsg;
+
+ int n = gzread(f->file, f->buf, fastq_buf_size - 1);
+
+ if (n <= 0) {
+ if (gzeof(f->file)) {
+ f->state = STATE_EOF;
+ n = 0;
+ }
+ else {
+ errmsg = gzerror(f->file, &errnum);
+ fprintf(stderr, "I/O error: %s\n", errmsg);
+ exit(1);
+ }
+ }
+
+ f->buf[n] = '\0';
+ f->c = f->buf;
+}
+
+
+void fastq_get_line(fastq_t* f, str_t* s)
+{
+ int i = 0;
+
+ if (f->state == STATE_EOF) goto fastq_get_line_done;
+
+ while (1) {
+ switch (*f->c) {
+ case '\0':
+ fastq_refill(f);
+ if (f->state == STATE_EOF) goto fastq_get_line_done;
+ break;
+
+ case '\r':
+ f->c++;
+ break;
+
+ case '\n':
+ goto fastq_get_line_done;
+
+ default:
+ while (s->size < i + 2) {
+ fastq_expand_str(s);
+ }
+ if (s) s->s[i++] = *f->c;
+ f->c++;
+ }
+
+ }
+
+fastq_get_line_done:
+ if (s) {
+ s->s[i] = '\0';
+ s->n = i;
+ }
+}
+
+
+
+int fastq_next(fastq_t* f, seq_t* seq)
+{
+ if (f->state == STATE_EOF) return 0;
+
+ while (1) {
+
+ /* read more, if needed */
+ if (*f->c == '\0' ) {
+ fastq_refill(f);
+ if (f->state == STATE_EOF) return 0;
+ continue;
+ }
+
+ /* skip over leading whitespace */
+ else if (isspace(*f->c)) {
+ /* do nothing */
+ }
+
+ /* skip comments */
+ else if (*f->c == ';') {
+ fastq_get_line(f, NULL);
+ if (f->state == STATE_EOF) return 0;
+ }
+
+ /* read id1 */
+ else if (f->state == STATE_ID1) {
+ if (*f->c == '@') {
+ f->c++;
+ fastq_get_line(f, &seq->id1);
+ if (f->state == STATE_EOF) return 0;
+
+ f->state = STATE_SEQ;
+ }
+ else {
+ fprintf(stderr, "Malformed FASTQ file: expecting an '@', saw a '%c'\n", *f->c);
+ exit(1);
+ }
+ }
+
+ /* read sequence */
+ else if (f->state == STATE_SEQ) {
+ fastq_get_line(f, &seq->seq);
+ if (f->state == STATE_EOF) return 0;
+
+ f->state = STATE_ID2;
+ }
+
+ /* read id2 */
+ else if (f->state == STATE_ID2) {
+ if (*f->c == '+') {
+ f->c++;
+ fastq_get_line(f, &seq->id2);
+ if (f->state == STATE_EOF) return 0;
+
+ f->state = STATE_QUAL;
+ }
+ else {
+ fprintf(stderr, "Malformed FASTQ file: expecting an '+', saw a '%c'\n", *f->c);
+ exit(1);
+ }
+ }
+
+ /* read quality string */
+ else if (f->state == STATE_QUAL) {
+ fastq_get_line(f, &seq->qual);
+ if (f->state == STATE_EOF) return 1;
+
+ f->state = STATE_ID1;
+ break;
+ }
+
+ else {
+ fputs("Inexplicable error in fastq parser.\n", stderr);
+ exit(1);
+ }
+
+ f->c++;
+ }
+
+ return 1;
+}
+
+
--- /dev/null
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-parse :
+ * A parser for FASTQ files.
+ *
+ * This parser is mostly derivative of Heng Li's.
+ * See: http://lh3lh3.users.sourceforge.net/kseq.shtml
+ *
+ */
+
+#ifndef FASTQ_TOOLS_PARSE_H
+#define FASTQ_TOOLS_PARSE_H
+
+#include <stdio.h>
+#include <zlib.h>
+
+
+typedef struct
+{
+ char* s; /* null-terminated string */
+ size_t n; /* length of s */
+ size_t size; /* bytes allocated for s */
+} str_t;
+
+
+
+typedef struct
+{
+ str_t id1;
+ str_t seq;
+ str_t id2;
+ str_t qual;
+} seq_t;
+
+
+seq_t* fastq_alloc_seq();
+void fastq_free_seq(seq_t*);
+
+
+typedef struct
+{
+ gzFile file;
+ int state;
+ char* buf;
+ char* c;
+} fastq_t;
+
+
+fastq_t* fastq_open(FILE*);
+void fastq_close(fastq_t*);
+int fastq_next(fastq_t*, seq_t*);
+
+#endif
+
+++ /dev/null
-/* The MIT License
-
- Copyright (c) 2008 Genome Research Ltd (GRL).
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Contact: Heng Li <lh3@sanger.ac.uk> */
-
-/* Last Modified: 12APR2009 */
-
-#ifndef AC_KSEQ_H
-#define AC_KSEQ_H
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
-#define KS_SEP_TAB 1 // isspace() && !' '
-#define KS_SEP_MAX 1
-
-#define __KS_TYPE(type_t) \
- typedef struct __kstream_t { \
- char *buf; \
- int begin, end, is_eof; \
- type_t f; \
- } kstream_t;
-
-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
-
-#define __KS_BASIC(type_t, __bufsize) \
- static inline kstream_t *ks_init(type_t f) \
- { \
- kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
- ks->f = f; \
- ks->buf = (char*)malloc(__bufsize); \
- return ks; \
- } \
- static inline void ks_destroy(kstream_t *ks) \
- { \
- if (ks) { \
- free(ks->buf); \
- free(ks); \
- } \
- }
-
-#define __KS_GETC(__read, __bufsize) \
- static inline int ks_getc(kstream_t *ks) \
- { \
- if (ks->is_eof && ks->begin >= ks->end) return -1; \
- if (ks->begin >= ks->end) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end < __bufsize) ks->is_eof = 1; \
- if (ks->end == 0) return -1; \
- } \
- return (int)ks->buf[ks->begin++]; \
- }
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
- size_t l, m;
- char *s;
-} kstring_t;
-#endif
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#define __KS_GETUNTIL(__read, __bufsize) \
- static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
- { \
- if (dret) *dret = 0; \
- str->l = 0; \
- if (ks->begin >= ks->end && ks->is_eof) return -1; \
- for (;;) { \
- int i; \
- if (ks->begin >= ks->end) { \
- if (!ks->is_eof) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end < __bufsize) ks->is_eof = 1; \
- if (ks->end == 0) break; \
- } else break; \
- } \
- if (delimiter > KS_SEP_MAX) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (ks->buf[i] == delimiter) break; \
- } else if (delimiter == KS_SEP_SPACE) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (isspace(ks->buf[i])) break; \
- } else if (delimiter == KS_SEP_TAB) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
- } else i = 0; /* never come to here! */ \
- if (str->m - str->l < i - ks->begin + 1) { \
- str->m = str->l + (i - ks->begin) + 1; \
- kroundup32(str->m); \
- str->s = (char*)realloc(str->s, str->m); \
- } \
- memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
- str->l = str->l + (i - ks->begin); \
- ks->begin = i + 1; \
- if (i < ks->end) { \
- if (dret) *dret = ks->buf[i]; \
- break; \
- } \
- } \
- if (str->l == 0) { \
- str->m = 1; \
- str->s = (char*)calloc(1, 1); \
- } \
- str->s[str->l] = '\0'; \
- return str->l; \
- }
-
-#define KSTREAM_INIT(type_t, __read, __bufsize) \
- __KS_TYPE(type_t) \
- __KS_BASIC(type_t, __bufsize) \
- __KS_GETC(__read, __bufsize) \
- __KS_GETUNTIL(__read, __bufsize)
-
-#define __KSEQ_BASIC(type_t) \
- static inline kseq_t *kseq_init(type_t fd) \
- { \
- kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
- s->f = ks_init(fd); \
- return s; \
- } \
- static inline void kseq_rewind(kseq_t *ks) \
- { \
- ks->last_char = 0; \
- ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
- } \
- static inline void kseq_destroy(kseq_t *ks) \
- { \
- if (!ks) return; \
- free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
- ks_destroy(ks->f); \
- free(ks); \
- }
-
-/* Return value:
- >=0 length of the sequence (normal)
- -1 end-of-file
- -2 truncated quality string
- */
-#define __KSEQ_READ \
- static int kseq_read(kseq_t *seq) \
- { \
- int c; \
- kstream_t *ks = seq->f; \
- if (seq->last_char == 0) { /* then jump to the next header line */ \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
- if (c == -1) return -1; /* end of file */ \
- seq->last_char = c; \
- } /* the first header char has been read */ \
- seq->comment.l = seq->seq.l = seq->qual.l = 0; \
- if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
- if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
- if (isgraph(c)) { /* printable non-space character */ \
- if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
- seq->seq.m = seq->seq.l + 2; \
- kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
- seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
- } \
- seq->seq.s[seq->seq.l++] = (char)c; \
- } \
- } \
- if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
- seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
- if (c != '+') return seq->seq.l; /* FASTA */ \
- if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
- seq->qual.m = seq->seq.m; \
- seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
- } \
- ks_getuntil(ks, '\n', &seq->comment, &c); \
- if (c == -1) return -2; /* we should not stop here */ \
- while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
- if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
- seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
- seq->last_char = 0; /* we have not come to the next header line */ \
- if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
- return seq->seq.l; \
- }
-
-#define __KSEQ_TYPE(type_t) \
- typedef struct { \
- kstring_t name, comment, seq, qual; \
- int last_char; \
- kstream_t *f; \
- } kseq_t;
-
-#define KSEQ_INIT(type_t, __read) \
- KSTREAM_INIT(type_t, __read, 4096) \
- __KSEQ_TYPE(type_t) \
- __KSEQ_BASIC(type_t) \
- __KSEQ_READ
-
-#endif