X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ffastq-grep.c;h=4d1603f7ced45f3d34e28d00e60204689031d4a8;hb=d6b81119e55cf77c1e4c17b9e9026abc01e22b96;hp=f7b3913beb4b2869a5fd3d4298c24aa1fd0387f7;hpb=48a5939e574874106f1450fd278f602b731d2a83;p=fastq-tools.git diff --git a/src/fastq-grep.c b/src/fastq-grep.c index f7b3913..4d1603f 100644 --- a/src/fastq-grep.c +++ b/src/fastq-grep.c @@ -10,8 +10,8 @@ */ -#include "fastq-common.h" -#include "fastq-parse.h" +#include "common.h" +#include "parse.h" #include #include #include @@ -28,62 +28,62 @@ #endif +static const char* prog_name = "fastq-grep"; + + void print_help() { - fprintf( stderr, + fprintf(stdout, "fastq-grep [OPTION]... PATTERN [FILE]...\n" "Search for PATTERN in the read sequences in each FILE or standard input.\n" "PATTERN, by default, is a perl compatible regular expression.\n\n" "Options:\n" -" -h, --help print this message\n" +" -i, --id match the read id (by default, sequence is matched)\n" " -v, --invert-match select nonmatching entries\n" +" -m, --mismatches=FILE output mismatching entries to the given file\n" " -c, --count output only the number of matching sequences\n" +" -h, --help print this message\n" +" -V, --version output version information and exit\n" ); } static int invert_flag; -static int help_flag; static int count_flag; +static int id_flag; -void print_fastq_entry(FILE* fout, seq_t* seq) -{ - fprintf(fout, "@%s\n%s\n+%s\n%s\n", - seq->id1.s, - seq->seq.s, - seq->id2.s, - seq->qual.s ); -} - - -void fastq_grep(FILE* fin, FILE* fout, pcre* re) +void fastq_grep(FILE* fin, FILE* fout, FILE* mismatch_file, pcre* re) { int rc; int ovector[3]; size_t count = 0; - fastq_t* fqf = fastq_open(fin); - seq_t* seq = fastq_alloc_seq(); + fastq_t* fqf = fastq_create(fin); + seq_t* seq = seq_create(); + + while (fastq_read(fqf, seq)) { - while (fastq_next(fqf, seq)) { rc = pcre_exec(re, /* pattern */ - NULL, /* extre data */ - seq->seq.s, /* subject */ - seq->seq.n, /* subject length */ + NULL, /* extra data */ + id_flag ? seq->id1.s : seq->seq.s, + id_flag ? seq->id1.n : seq->seq.n, 0, /* subject offset */ 0, /* options */ ovector, /* output vector */ 3 ); /* output vector length */ - if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || rc >= 0) { + if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || (!invert_flag && rc >= 0)) { if (count_flag) count++; - else print_fastq_entry(fout, seq); + else fastq_print(fout, seq); + } + else if (mismatch_file) { + fastq_print(mismatch_file, seq); } } - fastq_free_seq(seq); - fastq_close(fqf); + seq_free(seq); + fastq_free(fqf); if (count_flag) fprintf(fout, "%zu\n", count); } @@ -104,25 +104,29 @@ int main(int argc, char* argv[]) invert_flag = 0; - help_flag = 0; count_flag = 0; + id_flag = 0; int opt; int opt_idx; + FILE* mismatch_file = NULL; static struct option long_options[] = { - {"help", no_argument, &help_flag, 1}, + {"id", no_argument, &id_flag, 1}, {"invert-match", no_argument, &invert_flag, 1}, - {"count", no_argument, &count_flag, 1}, + {"mismatches", required_argument, NULL, 'm'}, + {"count", no_argument, &count_flag, 1}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, {0, 0, 0, 0} }; while (1) { - opt = getopt_long(argc, argv, "hvc", long_options, &opt_idx); + opt = getopt_long(argc, argv, "ivmchV", long_options, &opt_idx); - if( opt == -1 ) break; + if (opt == -1) break; switch (opt) { case 0: @@ -131,18 +135,34 @@ int main(int argc, char* argv[]) } break; - case 'h': - help_flag = 1; + case 'i': + id_flag = 1; break; case 'v': invert_flag = 1; break; + case 'm': + mismatch_file = fopen(optarg, "w"); + if (mismatch_file == NULL) { + fprintf(stderr, "No such file '%s'.\n", optarg); + return 1; + } + break; + case 'c': count_flag = 1; break; + case 'h': + print_help(); + return 0; + + case 'V': + print_version(stdout, prog_name); + return 0; + case '?': return 1; @@ -151,11 +171,6 @@ int main(int argc, char* argv[]) } } - if (help_flag) { - print_help(); - return 0; - } - if (optind >= argc) { fprintf(stderr, "A pattern must be specified.\n"); return 1; @@ -173,7 +188,7 @@ int main(int argc, char* argv[]) if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - fastq_grep(stdin, stdout, re); + fastq_grep(stdin, stdout, mismatch_file, re); } else { for (; optind < argc; optind++) { @@ -183,13 +198,14 @@ int main(int argc, char* argv[]) continue; } - fastq_grep(fin, stdout, re); + fastq_grep(fin, stdout, mismatch_file, re); fclose(fin); } } pcre_free(re); + if (mismatch_file) fclose(mismatch_file); return 0; }