From d54d9ee8883cd106868e1c798542ce4de593f627 Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Fri, 8 Jul 2011 14:59:52 -0700 Subject: [PATCH] a few fixes, and a program to tabulate quality scores --- configure.ac | 4 +- src/Makefile.am | 4 +- src/fastq-match.c | 8 +-- src/fastq-qual.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++ src/fastq-uniq.c | 3 +- src/parse.c | 6 +- 6 files changed, 163 insertions(+), 11 deletions(-) create mode 100644 src/fastq-qual.c diff --git a/configure.ac b/configure.ac index 07fef16..522949e 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,8 @@ AC_CONFIG_MACRO_DIR([m4]) AC_PROG_CC AM_PROG_CC_C_O -opt_CFLAGS="--std=c99 -Wall -g -O3" -dbg_CFLAGS="--std=c99 -Wall -g -O0" +opt_CFLAGS="--std=c99 -Wall -Wextra -pedantic -g -O3" +dbg_CFLAGS="--std=c99 -Wall -Wextra -pedantic -g -O0" AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug], diff --git a/src/Makefile.am b/src/Makefile.am index 989ae69..080e884 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,5 +1,5 @@ -bin_PROGRAMS = fastq-grep fastq-kmers fastq-match fastq-uniq +bin_PROGRAMS = fastq-grep fastq-kmers fastq-match fastq-uniq fastq-qual fastq_common_src=common.h common.c fastq_parse_src=parse.h parse.c @@ -18,4 +18,6 @@ fastq_match_LDADD = -lz fastq_uniq_SOURCES = fastq-uniq.c $(fastq_common_src) $(fastq_parse_src) $(fastq_hash_src) fastq_uniq_LDADD = -lz +fastq_qual_SOURCES = fastq-qual.c $(fastq_common_src) $(fastq_parse_src) $(fastq_qual_src) +fastq_qual_LDADD = -lz diff --git a/src/fastq-match.c b/src/fastq-match.c index 53af134..bd040a6 100644 --- a/src/fastq-match.c +++ b/src/fastq-match.c @@ -43,9 +43,7 @@ void print_help() } -void fastq_match(FILE* fin, FILE* fout, - sw_t* sw, - unsigned char* query, int n) +void fastq_match(FILE* fin, FILE* fout, sw_t* sw) { int score; @@ -131,7 +129,7 @@ int main(int argc, char* argv[]) sw = fastq_alloc_sw(query, query_len); if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - fastq_match(stdin, stdout, sw, query, query_len); + fastq_match(stdin, stdout, sw); } else { for (; optind < argc; optind++) { @@ -141,7 +139,7 @@ int main(int argc, char* argv[]) continue; } - fastq_match(fin, stdout, sw, query, query_len); + fastq_match(fin, stdout, sw); } } diff --git a/src/fastq-qual.c b/src/fastq-qual.c new file mode 100644 index 0000000..5201376 --- /dev/null +++ b/src/fastq-qual.c @@ -0,0 +1,149 @@ + +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + * fastq-qual : + * Collect quality score statistics. + * + */ + +#include "common.h" +#include "parse.h" +#include +#include +#include +#include + + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +static const char* prog_name = "fastq-grep"; + +void print_help() +{ + fprintf(stdout, +"fastq-qual [OPTION]... [FILE]...\n" +"Output a tab-delimnated table such that row i, column j, given \n" +"the number of times that quality score i occured in read position j\n\n." +"Options:\n" +" -h, --help print this message\n" +" -V, --version output version information and exit\n"); +} + + + +void tally_quals(FILE* fin, unsigned int** xs, size_t* n) +{ + seq_t* seq = fastq_alloc_seq(); + fastq_t* fqf = fastq_open(fin); + + size_t i; + + while (fastq_next(fqf, seq)) { + if (seq->qual.n > *n) { + *xs = realloc_or_die(*xs, 255 * seq->qual.n * sizeof(unsigned int)); + memset(*xs + *n, 0, 255 * (seq->qual.n - *n) * sizeof(unsigned int)); + *n = seq->qual.n; + } + + + for (i = 0; i < seq->qual.n; ++i) { + (*xs)[i * 255 + (int) seq->qual.s[i]] += 1; + } + } + + fastq_free_seq(seq); + fastq_close(fqf); +} + + + + +void print_table(FILE* fout, unsigned int* xs, size_t n) +{ + size_t i, j; + + for (j = 0; j < 255; ++j) { + fprintf(fout, "%u", xs[j]); + for (i = 1; i < n; ++i) { + fprintf(fout, "\t%u", xs[i * 255 + j]); + } + fputc('\n', fout); + } +} + + + +int main(int argc, char* argv[]) +{ + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + FILE* fin; + + int opt; + int opt_idx; + static struct option long_options[] = + { + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {0, 0, 0, 0} + }; + + while (1) { + opt = getopt_long(argc, argv, "hV", long_options, &opt_idx); + + if( opt == -1 ) break; + + switch (opt) { + case 'h': + print_help(); + return 0; + + case 'V': + print_version(stdout, prog_name); + return 0; + + case '?': + return 1; + + default: + abort(); + } + } + + + size_t n = 0; + unsigned int* xs = NULL; + + if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { + tally_quals(stdin, &xs, &n); + } + else { + for (; optind < argc; optind++) { + fin = fopen(argv[optind], "rb"); + if (fin == NULL) { + fprintf(stderr, "No such file '%s'.\n", argv[optind]); + continue; + } + + tally_quals(fin, &xs, &n); + } + } + + print_table(stdout, xs, n); + + free(xs); + + return 0; +} + + diff --git a/src/fastq-uniq.c b/src/fastq-uniq.c index 32d344e..2e50762 100644 --- a/src/fastq-uniq.c +++ b/src/fastq-uniq.c @@ -13,6 +13,7 @@ #include "hash.h" #include "parse.h" #include +#include #include #include @@ -84,7 +85,7 @@ void print_hash_table(FILE* fout, hash_table* T) size_t i; for (i = 0; i < T->m; i++) { - fprintf(fout, ">unique-read-%07zu (%zu copies)\n", i, S[i]->count); + fprintf(fout, ">unique-read-%07zu (%"PRIu32" copies)\n", i, S[i]->count); fwrite(S[i]->value, S[i]->len, sizeof(char), fout); fprintf(fout, "\n"); } diff --git a/src/parse.c b/src/parse.c index 9a7774d..1ccc7de 100644 --- a/src/parse.c +++ b/src/parse.c @@ -67,7 +67,7 @@ typedef enum fastq_t* fastq_open(FILE* f) { fastq_t* fqf = malloc_or_die(sizeof(fastq_t)); - or_die((int)(fqf->file = gzdopen(fileno(f), "rb")), + or_die((int)((fqf->file = gzdopen(fileno(f), "rb")) != NULL), "Can not open gzip file."); fqf->state = STATE_ID1; @@ -113,7 +113,7 @@ void fastq_refill(fastq_t* f) void fastq_get_line(fastq_t* f, str_t* s) { - int i = 0; + size_t i = 0; if (f->state == STATE_EOF) goto fastq_get_line_done; @@ -169,10 +169,12 @@ int fastq_next(fastq_t* f, seq_t* seq) } /* skip comments */ + /* else if (*f->c == ';') { fastq_get_line(f, NULL); if (f->state == STATE_EOF) return 0; } + */ /* read id1 */ else if (f->state == STATE_ID1) { -- 2.39.2