-/*
- * fastq-kmers: kmer frequences within fastq files
+/*
+ * This file is part of fastq-tools.
*
- * Febuary 2011 / Daniel Jones <dcjones@cs.washington.edu>
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-kmers :
+ * Tabulate k-mer frequencies with FASTQ files.
*
*/
-
-
+#include "common.h"
+#include "parse.h"
+#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <getopt.h>
#include <zlib.h>
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# define SET_BINARY_MODE(file)
#endif
+static const char* prog_name = "fastq-kmers";
void print_help()
{
"Print kmer counts for the given kmer size.\n"
"Output is in two tab-seperated columns for kmer and frequency.\n\n"
"Options:\n"
+" -k NUM, --size=NUM kmer size (default: 1)\n"
" -h, --help print this message\n"
-" -k, --size kmer size (default: 1)\n"
+" -V, --version output version information and exit\n"
);
}
-static int help_flag;
static int k;
int packkmer( const char* s, uint32_t* kmer, int k )
}
-void count_fastq_kmers(gzFile* fin, uint32_t* cs)
+void count_fastq_kmers(FILE* fin, uint32_t* cs)
{
- kseq_t* seq = kseq_init(fin);
+ seq_t* seq = seq_create();
+ fastq_t* fqf = fastq_create(fin);
int i;
int n;
uint32_t kmer;
- while (kseq_read(seq) >= 0) {
- n = (int)seq->seq.l - k + 1;
+ while (fastq_read(fqf, seq)) {
+ n = (int)seq->seq.n - k + 1;
for (i = 0; i < n; i++) {
if( packkmer(seq->seq.s + i, &kmer, k) ) {
cs[kmer]++;
}
}
- kseq_destroy(seq);
+ seq_free(seq);
+ fastq_free(fqf);
}
SET_BINARY_MODE(stdin);
SET_BINARY_MODE(stdout);
- help_flag = 0;
k = 1;
uint32_t n; /* number of kmers: 4^k */
uint32_t* cs; /* counts */
FILE* fin;
- gzFile gzfin;
int opt;
int opt_idx;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
- {"size", no_argument, 0, 0},
+ {"size", no_argument, 0, 0},
+ {"help", no_argument, 0, 'h'},
+ {"version", no_argument, 0, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "hk:", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "k:hV", long_options, &opt_idx);
if( opt == -1 ) break;
}
break;
- case 'h':
- help_flag = 1;
- break;
-
case 'k':
k = atoi(optarg);
break;
+ case 'h':
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
+
case '?':
return 1;
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
-
if (k < 1) {
fprintf(stderr, "Kmer size must be at least 1.");
return 1;
}
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- count_fastq_kmers(gzfin, cs);
-
- gzclose(gzfin);
+ count_fastq_kmers(stdin, cs);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- count_fastq_kmers(gzfin, cs);
-
- gzclose(gzfin);
+ count_fastq_kmers(fin, cs);
}
}