From: Daniel Jones Date: Mon, 12 Dec 2011 20:10:55 +0000 (-0800) Subject: An option for fastq-grep to output non-matching entries to a file (e.g. for splitting... X-Git-Url: https://git.donarmstrong.com/?p=fastq-tools.git;a=commitdiff_plain;h=07ffc4895d705c5207492a002ffce4576dac5fc5 An option for fastq-grep to output non-matching entries to a file (e.g. for splitting a fastq file) --- diff --git a/doc/fastq-grep.1 b/doc/fastq-grep.1 index fb792b4..89bae04 100644 --- a/doc/fastq-grep.1 +++ b/doc/fastq-grep.1 @@ -21,6 +21,9 @@ Match the read ID (by default, the sequence is matched). \fB\-v\fR, \fB\-\-invert\-match\fR Invert the sense of matching, to select non-matching entries. .TP +\fB\-m\fR, \fB\-\-mismatches=FILE\fR +Output non-matching entries to the given file. +.TP \fB\-c\fR, \fB\-\-count\fR Suppress normal output; instead output the number of matching (or, non-matching, with '-v') entries. diff --git a/src/fastq-grep.c b/src/fastq-grep.c index 994ab58..8eb6f7f 100644 --- a/src/fastq-grep.c +++ b/src/fastq-grep.c @@ -40,6 +40,7 @@ void print_help() "Options:\n" " -i, --id match the read id (by default, sequence is matched)\n" " -v, --invert-match select nonmatching entries\n" +" -m, --mismatches=FILE output mismatching entries to the given file\n" " -c, --count output only the number of matching sequences\n" " -h, --help print this message\n" " -V, --version output version information and exit\n" @@ -52,7 +53,7 @@ static int id_flag; -void fastq_grep(FILE* fin, FILE* fout, pcre* re) +void fastq_grep(FILE* fin, FILE* fout, FILE* mismatch_file, pcre* re) { int rc; int ovector[3]; @@ -76,6 +77,9 @@ void fastq_grep(FILE* fin, FILE* fout, pcre* re) if (count_flag) count++; else fastq_print(fout, seq); } + else if (mismatch_file) { + fastq_print(mismatch_file, seq); + } } fastq_free_seq(seq); @@ -106,11 +110,13 @@ int main(int argc, char* argv[]) int opt; int opt_idx; + FILE* mismatch_file = NULL; static struct option long_options[] = { {"id", no_argument, &id_flag, 1}, {"invert-match", no_argument, &invert_flag, 1}, + {"mismatches", required_argument, NULL, 'm'}, {"count", no_argument, &count_flag, 1}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, @@ -118,7 +124,7 @@ int main(int argc, char* argv[]) }; while (1) { - opt = getopt_long(argc, argv, "ivchV", long_options, &opt_idx); + opt = getopt_long(argc, argv, "ivmchV", long_options, &opt_idx); if (opt == -1) break; @@ -137,6 +143,14 @@ int main(int argc, char* argv[]) invert_flag = 1; break; + case 'm': + mismatch_file = fopen(optarg, "w"); + if (mismatch_file == NULL) { + fprintf(stderr, "No such file '%s'.\n", optarg); + return 1; + } + break; + case 'c': count_flag = 1; break; @@ -174,7 +188,7 @@ int main(int argc, char* argv[]) if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - fastq_grep(stdin, stdout, re); + fastq_grep(stdin, stdout, mismatch_file, re); } else { for (; optind < argc; optind++) { @@ -184,13 +198,14 @@ int main(int argc, char* argv[]) continue; } - fastq_grep(fin, stdout, re); + fastq_grep(fin, stdout, mismatch_file, re); fclose(fin); } } pcre_free(re); + if (mismatch_file) fclose(mismatch_file); return 0; }