3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
8 * Regular expression searches of the sequences within a FASTQ file.
22 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
27 # define SET_BINARY_MODE(file)
34 "fastq-grep [OPTION]... PATTERN [FILE]...\n"
35 "Search for PATTERN in the read sequences in each FILE or standard input.\n"
36 "PATTERN, by default, is a perl compatible regular expression.\n\n"
38 " -h, --help print this message\n"
39 " -v, --invert-match select nonmatching entries\n"
40 " -c, --count output only the number of matching sequences\n"
44 static int invert_flag;
46 static int count_flag;
50 void fastq_grep(FILE* fin, FILE* fout, pcre* re)
56 fastq_t* fqf = fastq_open(fin);
57 seq_t* seq = fastq_alloc_seq();
59 while (fastq_next(fqf, seq)) {
60 rc = pcre_exec(re, /* pattern */
61 NULL, /* extre data */
62 seq->seq.s, /* subject */
63 seq->seq.n, /* subject length */
64 0, /* subject offset */
66 ovector, /* output vector */
67 3 ); /* output vector length */
69 if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || rc >= 0) {
70 if (count_flag) count++;
71 else fastq_print(fout, seq);
78 if (count_flag) fprintf(fout, "%zu\n", count);
83 int main(int argc, char* argv[])
85 SET_BINARY_MODE(stdin);
86 SET_BINARY_MODE(stdout);
90 const char* pat_error;
104 static struct option long_options[] =
106 {"help", no_argument, &help_flag, 1},
107 {"invert-match", no_argument, &invert_flag, 1},
108 {"count", no_argument, &count_flag, 1},
113 opt = getopt_long(argc, argv, "hvc", long_options, &opt_idx);
115 if( opt == -1 ) break;
119 if (long_options[opt_idx].flag != 0) break;
149 if (optind >= argc) {
150 fprintf(stderr, "A pattern must be specified.\n");
154 pat = argv[optind++];
155 re = pcre_compile( pat, PCRE_CASELESS, &pat_error, &pat_error_offset, NULL );
159 fprintf(stderr, "Syntax error in PCRE pattern at offset: %d: %s\n",
160 pat_error_offset, pat_error );
165 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
166 fastq_grep(stdin, stdout, re);
169 for (; optind < argc; optind++) {
170 fin = fopen(argv[optind], "rb");
172 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
176 fastq_grep(fin, stdout, re);