3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
8 * Regular expression searches of the sequences within a FASTQ file.
22 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
27 # define SET_BINARY_MODE(file)
31 static const char* prog_name = "fastq-grep";
37 "fastq-grep [OPTION]... PATTERN [FILE]...\n"
38 "Search for PATTERN in the read sequences in each FILE or standard input.\n"
39 "PATTERN, by default, is a perl compatible regular expression.\n\n"
41 " -i, --id match the read id (by default, sequence is matched)\n"
42 " -v, --invert-match select nonmatching entries\n"
43 " -c, --count output only the number of matching sequences\n"
44 " -h, --help print this message\n"
45 " -V, --version output version information and exit\n"
49 static int invert_flag;
50 static int count_flag;
55 void fastq_grep(FILE* fin, FILE* fout, pcre* re)
61 fastq_t* fqf = fastq_open(fin);
62 seq_t* seq = fastq_alloc_seq();
64 while (fastq_next(fqf, seq)) {
66 rc = pcre_exec(re, /* pattern */
67 NULL, /* extra data */
68 id_flag ? seq->id1.s : seq->seq.s,
69 id_flag ? seq->id1.n : seq->seq.n,
70 0, /* subject offset */
72 ovector, /* output vector */
73 3 ); /* output vector length */
75 if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || (!invert_flag && rc >= 0)) {
76 if (count_flag) count++;
77 else fastq_print(fout, seq);
84 if (count_flag) fprintf(fout, "%zu\n", count);
89 int main(int argc, char* argv[])
91 SET_BINARY_MODE(stdin);
92 SET_BINARY_MODE(stdout);
96 const char* pat_error;
110 static struct option long_options[] =
112 {"id", no_argument, &id_flag, 1},
113 {"invert-match", no_argument, &invert_flag, 1},
114 {"count", no_argument, &count_flag, 1},
115 {"help", no_argument, NULL, 'h'},
116 {"version", no_argument, NULL, 'V'},
121 opt = getopt_long(argc, argv, "ivchV", long_options, &opt_idx);
123 if( opt == -1 ) break;
127 if (long_options[opt_idx].flag != 0) break;
149 print_version(stdout, prog_name);
160 if (optind >= argc) {
161 fprintf(stderr, "A pattern must be specified.\n");
165 pat = argv[optind++];
166 re = pcre_compile( pat, PCRE_CASELESS, &pat_error, &pat_error_offset, NULL );
170 fprintf(stderr, "Syntax error in PCRE pattern at offset: %d: %s\n",
171 pat_error_offset, pat_error );
176 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
177 fastq_grep(stdin, stdout, re);
180 for (; optind < argc; optind++) {
181 fin = fopen(argv[optind], "rb");
183 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
187 fastq_grep(fin, stdout, re);