3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
8 * Regular expression searches of the sequences within a FASTQ file.
22 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
27 # define SET_BINARY_MODE(file)
31 static const char* prog_name = "fastq-grep";
37 "fastq-grep [OPTION]... PATTERN [FILE]...\n"
38 "Search for PATTERN in the read sequences in each FILE or standard input.\n"
39 "PATTERN, by default, is a perl compatible regular expression.\n\n"
41 " -v, --invert-match select nonmatching entries\n"
42 " -c, --count output only the number of matching sequences\n"
43 " -h, --help print this message\n"
44 " -V, --version output version information and exit\n"
48 static int invert_flag;
49 static int count_flag;
53 void fastq_grep(FILE* fin, FILE* fout, pcre* re)
59 fastq_t* fqf = fastq_open(fin);
60 seq_t* seq = fastq_alloc_seq();
62 while (fastq_next(fqf, seq)) {
63 rc = pcre_exec(re, /* pattern */
64 NULL, /* extre data */
65 seq->seq.s, /* subject */
66 seq->seq.n, /* subject length */
67 0, /* subject offset */
69 ovector, /* output vector */
70 3 ); /* output vector length */
72 if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || rc >= 0) {
73 if (count_flag) count++;
74 else fastq_print(fout, seq);
81 if (count_flag) fprintf(fout, "%zu\n", count);
86 int main(int argc, char* argv[])
88 SET_BINARY_MODE(stdin);
89 SET_BINARY_MODE(stdout);
93 const char* pat_error;
106 static struct option long_options[] =
108 {"invert-match", no_argument, &invert_flag, 1},
109 {"count", no_argument, &count_flag, 1},
110 {"help", no_argument, NULL, 'h'},
111 {"version", no_argument, NULL, 'V'},
116 opt = getopt_long(argc, argv, "vchV", long_options, &opt_idx);
118 if( opt == -1 ) break;
122 if (long_options[opt_idx].flag != 0) break;
140 print_version(stdout, prog_name);
151 if (optind >= argc) {
152 fprintf(stderr, "A pattern must be specified.\n");
156 pat = argv[optind++];
157 re = pcre_compile( pat, PCRE_CASELESS, &pat_error, &pat_error_offset, NULL );
161 fprintf(stderr, "Syntax error in PCRE pattern at offset: %d: %s\n",
162 pat_error_offset, pat_error );
167 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
168 fastq_grep(stdin, stdout, re);
171 for (; optind < argc; optind++) {
172 fin = fopen(argv[optind], "rb");
174 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
178 fastq_grep(fin, stdout, re);