3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
8 * Regular expression searches of the sequences within a FASTQ file.
13 #include "fastq-common.h"
14 #include "fastq-parse.h"
22 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
27 # define SET_BINARY_MODE(file)
34 "fastq-grep [OPTION]... PATTERN [FILE]...\n"
35 "Search for PATTERN in the read sequences in each FILE or standard input.\n"
36 "PATTERN, by default, is a perl compatible regular expression.\n\n"
38 " -h, --help print this message\n"
39 " -v, --invert-match select nonmatching entries\n"
40 " -c, --count output only the number of matching sequences\n"
44 static int invert_flag;
46 static int count_flag;
50 void print_fastq_entry(FILE* fout, seq_t* seq)
52 fprintf(fout, "@%s\n%s\n+%s\n%s\n",
60 void fastq_grep(FILE* fin, FILE* fout, pcre* re)
66 fastq_t* fqf = fastq_open(fin);
67 seq_t* seq = fastq_alloc_seq();
69 while (fastq_next(fqf, seq)) {
70 rc = pcre_exec(re, /* pattern */
71 NULL, /* extre data */
72 seq->seq.s, /* subject */
73 seq->seq.n, /* subject length */
74 0, /* subject offset */
76 ovector, /* output vector */
77 3 ); /* output vector length */
79 if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || rc >= 0) {
80 if (count_flag) count++;
81 else print_fastq_entry(fout, seq);
88 if (count_flag) fprintf(fout, "%zu\n", count);
93 int main(int argc, char* argv[])
95 SET_BINARY_MODE(stdin);
96 SET_BINARY_MODE(stdout);
100 const char* pat_error;
101 int pat_error_offset;
114 static struct option long_options[] =
116 {"help", no_argument, &help_flag, 1},
117 {"invert-match", no_argument, &invert_flag, 1},
118 {"count", no_argument, &count_flag, 1},
123 opt = getopt_long(argc, argv, "hvc", long_options, &opt_idx);
125 if( opt == -1 ) break;
129 if (long_options[opt_idx].flag != 0) break;
159 if (optind >= argc) {
160 fprintf(stderr, "A pattern must be specified.\n");
164 pat = argv[optind++];
165 re = pcre_compile( pat, PCRE_CASELESS, &pat_error, &pat_error_offset, NULL );
169 fprintf(stderr, "Syntax error in PCRE pattern at offset: %d: %s\n",
170 pat_error_offset, pat_error );
175 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
176 fastq_grep(stdin, stdout, re);
179 for (; optind < argc; optind++) {
180 fin = fopen(argv[optind], "rb");
182 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
186 fastq_grep(fin, stdout, re);