X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ffastq-kmers.c;h=92f4314141133c14bd0e4dc8c7a6507fb76daf53;hb=d6b81119e55cf77c1e4c17b9e9026abc01e22b96;hp=43fa3026b8bf02919cced34663fc14fc4999d066;hpb=54e8755d8c3bf9df0e27aae9ac6ee8976d5943c4;p=fastq-tools.git diff --git a/src/fastq-kmers.c b/src/fastq-kmers.c index 43fa302..92f4314 100644 --- a/src/fastq-kmers.c +++ b/src/fastq-kmers.c @@ -1,22 +1,23 @@ -/* - * fastq-kmers: kmer frequences within fastq files +/* + * This file is part of fastq-tools. * - * Febuary 2011 / Daniel Jones + * Copyright (c) 2011 by Daniel C. Jones + * + * fastq-kmers : + * Tabulate k-mer frequencies with FASTQ files. * */ - - +#include "common.h" +#include "parse.h" +#include #include #include #include #include #include -#include "kseq.h" -KSEQ_INIT(gzFile, gzread) - #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include @@ -26,6 +27,7 @@ KSEQ_INIT(gzFile, gzread) # define SET_BINARY_MODE(file) #endif +static const char* prog_name = "fastq-kmers"; void print_help() { @@ -34,12 +36,12 @@ void print_help() "Print kmer counts for the given kmer size.\n" "Output is in two tab-seperated columns for kmer and frequency.\n\n" "Options:\n" +" -k NUM, --size=NUM kmer size (default: 1)\n" " -h, --help print this message\n" -" -k, --size kmer size (default: 1)\n" +" -V, --version output version information and exit\n" ); } -static int help_flag; static int k; int packkmer( const char* s, uint32_t* kmer, int k ) @@ -105,15 +107,16 @@ void unpackkmer( uint32_t kmer, char* s, int k ) } -void count_fastq_kmers(gzFile* fin, uint32_t* cs) +void count_fastq_kmers(FILE* fin, uint32_t* cs) { - kseq_t* seq = kseq_init(fin); + seq_t* seq = seq_create(); + fastq_t* fqf = fastq_create(fin); int i; int n; uint32_t kmer; - while (kseq_read(seq) >= 0) { - n = (int)seq->seq.l - k + 1; + while (fastq_read(fqf, seq)) { + n = (int)seq->seq.n - k + 1; for (i = 0; i < n; i++) { if( packkmer(seq->seq.s + i, &kmer, k) ) { cs[kmer]++; @@ -121,7 +124,8 @@ void count_fastq_kmers(gzFile* fin, uint32_t* cs) } } - kseq_destroy(seq); + seq_free(seq); + fastq_free(fqf); } @@ -147,26 +151,25 @@ int main(int argc, char* argv[]) SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); - help_flag = 0; k = 1; uint32_t n; /* number of kmers: 4^k */ uint32_t* cs; /* counts */ FILE* fin; - gzFile gzfin; int opt; int opt_idx; static struct option long_options[] = { - {"help", no_argument, &help_flag, 1}, - {"size", no_argument, 0, 0}, + {"size", no_argument, 0, 0}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, {0, 0, 0, 0} }; while (1) { - opt = getopt_long(argc, argv, "hk:", long_options, &opt_idx); + opt = getopt_long(argc, argv, "k:hV", long_options, &opt_idx); if( opt == -1 ) break; @@ -180,14 +183,18 @@ int main(int argc, char* argv[]) } break; - case 'h': - help_flag = 1; - break; - case 'k': k = atoi(optarg); break; + case 'h': + print_help(); + return 0; + + case 'V': + print_version(stdout, prog_name); + return 0; + case '?': return 1; @@ -196,11 +203,6 @@ int main(int argc, char* argv[]) } } - if (help_flag) { - print_help(); - return 0; - } - if (k < 1) { fprintf(stderr, "Kmer size must be at least 1."); return 1; @@ -222,15 +224,7 @@ int main(int argc, char* argv[]) } if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - gzfin = gzdopen( fileno(stdin), "rb" ); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file 'stdin'.\n"); - return 1; - } - - count_fastq_kmers(gzfin, cs); - - gzclose(gzfin); + count_fastq_kmers(stdin, cs); } else { for (; optind < argc; optind++) { @@ -240,15 +234,7 @@ int main(int argc, char* argv[]) continue; } - gzfin = gzdopen(fileno(fin), "rb"); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file '%s'.\n", argv[optind]); - continue; - } - - count_fastq_kmers(gzfin, cs); - - gzclose(gzfin); + count_fastq_kmers(fin, cs); } }