ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = src
+SUBDIRS = src doc
-AC_INIT( [fastq-tools], [0.1], [dcjones@cs.washington.ed] )
+AC_INIT( [fastq-tools], [0.2], [dcjones@cs.washington.ed] )
AM_INIT_AUTOMAKE( [foreign -Wall -Werror] )
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
AC_PROG_CC
AM_PROG_CC_C_O
-opt_CFLAGS="-Wall -g -O3"
-dbg_CFLAGS="-Wall -g -O0"
+opt_CFLAGS="--std=c99 -Wall -g -O3"
+dbg_CFLAGS="--std=c99 -Wall -g -O0"
AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug],
[CFLAGS="$opt_CFLAGS"])
AC_DEFINE(_FILE_OFFSET_BITS, 64)
+AC_DEFINE(_POSIX_SOURCE) # needed for fileno
AC_PROG_LIBTOOL
+AC_CHECK_FUNC(fileno, ,
+ AC_MSG_ERROR([The 'fileno' function is missing.]))
+
# check zlib
AX_CHECK_ZLIB
CXXFLAGS=$CFLAGS
-AC_CONFIG_FILES( [Makefile
- src/Makefile] )
+AC_CONFIG_FILES([Makefile
+ src/Makefile
+ doc/Makefile
+ src/version.h])
AC_OUTPUT
--- /dev/null
+
+man_MANS = fastq-grep.1 fastq-kmers.1 fastq-match.1 fastq-uniq.1
+
+
--- /dev/null
+.TH FASTQ-GREP 1
+
+.SH NAME
+fastq-grep - print sequences matching a pattern
+
+.SH SYNOPSIS
+.B fastq-grep [OPTION]... PATTERN [FILE]...
+
+.SH DESCRIPTION
+Given a PATTERN, specified as a perl-compatible regular expression, print every
+FASTQ entry with a matching nucleotide sequence.
+
+One ore more FILEs may be specified, otherwise input is read from standard input.
+Input files may be gziped.
+
+.SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-invert\-match\fR
+Invert the sense of matching, to select non-matching entries.
+.TP
+\fB\-c\fR, \fB\-\-count\fR
+Suppress normal output; instead output the number of matching (or, non-matching,
+with '-v') entries.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Output a help message and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Output version information and exit.
+
+.SH AUTHOR
+Written by Daniel C. Jones <dcjones@cs.washington.edu>
+
--- /dev/null
+.TH FASTQ-KMERS 1
+
+.SH NAME
+fastq-kmers - print the frequency of all k-mers for a given k
+
+.SH SYNOPSIS
+.B fastq-kmers [OPTION]... [FILE]...
+
+.SH DESCRIPTION
+For a given k, for example k = 4, a table in the following format is output,
+
+.fc # ^
+.ta T 2i
+#kmer^frequency#
+.br
+#AAAA^1358#
+.br
+#AAAC^2393#
+.br
+#AAAG^1039#
+.br
+#AAAT^964#
+.br
+#AACA^4076#
+.br
+#...^...#
+.br
+#TTTT^876#
+
+One ore more FILEs may be specified, otherwise input is read from standard input.
+Input files may be gziped.
+
+.SH OPTIONS
+.TP
+\fB\-k NUM\fR, \fB\-\-size=NUM\fR
+The size of the k-mers to count, where 1 <= k <= 16. (default: 1)
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Output a help message and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Output version information and exit.
+
+.SH AUTHOR
+Written by Daniel C. Jones <dcjones@cs.washington.edu>
+
--- /dev/null
+.TH FASTQ-MATCH 1
+
+.SH NAME
+fastq-match - print sequences matching a pattern
+
+.SH SYNOPSIS
+.B fastq-match [OPTION]... QUERY [FILE]...
+
+.SH DESCRIPTION
+Given a nucleotide sequence QUERY, perform local alignment against every FASTQ
+sequence using the Smith-Waterman algorithm. For each read, an alignment score
+is printed, so that the output consists of a simple list of scores.
+
+One ore more FILEs may be specified, otherwise input is read from standard input.
+Input files may be gziped.
+
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Output a help message and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Output version information and exit.
+
+.SH AUTHOR
+Written by Daniel C. Jones <dcjones@cs.washington.edu>
+
--- /dev/null
+.TH FASTQ-UNIQ 1
+
+.SH NAME
+fastq-uniq - print sequences matching a pattern
+
+.SH SYNOPSIS
+.B fastq-uniq [OPTION]... PATTERN [FILE]...
+
+.SH DESCRIPTION
+Print a non-redundant list of occurring sequences, removing all duplicate
+sequences. Output is in FASTA format (as quality strings are ignored), with
+sequence identifiers formatted as,
+
+>uniq-read-N (M copies)
+
+Where, N in a unique number assigned to the sequence and M is the number of
+copies occurring.
+
+One ore more FILEs may be specified, otherwise input is read from standard input.
+Input files may be gziped.
+
+.SH OPTIONS
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Print status updates along the way.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Output a help message and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Output version information and exit.
+
+.SH AUTHOR
+Written by Daniel C. Jones <dcjones@cs.washington.edu>
+
#include "common.h"
+#include "version.h"
#include <stdlib.h>
+void print_version(FILE *f, const char* prog_name)
+{
+ fprintf(f, "%s (fastq-tools) %s\n",
+ prog_name, FASTQ_TOOLS_VERSION);
+}
+
+
void or_die(int b, const char* msg)
{
if (b == 0) {
#include <stdio.h>
+void print_version(FILE* f, const char* prog_name);
+
void or_die(int b, const char* msg);
void* malloc_or_die(size_t);
#endif
+static const char* prog_name = "fastq-grep";
+
+
void print_help()
{
- fprintf( stderr,
+ fprintf(stdout,
"fastq-grep [OPTION]... PATTERN [FILE]...\n"
"Search for PATTERN in the read sequences in each FILE or standard input.\n"
"PATTERN, by default, is a perl compatible regular expression.\n\n"
"Options:\n"
-" -h, --help print this message\n"
" -v, --invert-match select nonmatching entries\n"
" -c, --count output only the number of matching sequences\n"
+" -h, --help print this message\n"
+" -V, --version output version information and exit\n"
);
}
static int invert_flag;
-static int help_flag;
static int count_flag;
invert_flag = 0;
- help_flag = 0;
count_flag = 0;
int opt;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
{"invert-match", no_argument, &invert_flag, 1},
- {"count", no_argument, &count_flag, 1},
+ {"count", no_argument, &count_flag, 1},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "hvc", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "vchV", long_options, &opt_idx);
if( opt == -1 ) break;
}
break;
- case 'h':
- help_flag = 1;
- break;
-
case 'v':
invert_flag = 1;
break;
count_flag = 1;
break;
+ case 'h':
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
+
case '?':
return 1;
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
-
if (optind >= argc) {
fprintf(stderr, "A pattern must be specified.\n");
return 1;
# define SET_BINARY_MODE(file)
#endif
+static const char* prog_name = "fastq-kmers";
void print_help()
{
"Print kmer counts for the given kmer size.\n"
"Output is in two tab-seperated columns for kmer and frequency.\n\n"
"Options:\n"
+" -k NUM, --size=NUM kmer size (default: 1)\n"
" -h, --help print this message\n"
-" -k, --size kmer size (default: 1)\n"
+" -V, --version output version information and exit\n"
);
}
-static int help_flag;
static int k;
int packkmer( const char* s, uint32_t* kmer, int k )
SET_BINARY_MODE(stdin);
SET_BINARY_MODE(stdout);
- help_flag = 0;
k = 1;
uint32_t n; /* number of kmers: 4^k */
int opt_idx;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
- {"size", no_argument, 0, 0},
+ {"size", no_argument, 0, 0},
+ {"help", no_argument, 0, 'h'},
+ {"version", no_argument, 0, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "hk:", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "k:hV", long_options, &opt_idx);
if( opt == -1 ) break;
}
break;
- case 'h':
- help_flag = 1;
- break;
-
case 'k':
k = atoi(optarg);
break;
+ case 'h':
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
+
case '?':
return 1;
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
-
if (k < 1) {
fprintf(stderr, "Kmer size must be at least 1.");
return 1;
#endif
-static int help_flag;
+static const char* prog_name = "fastq-match";
void print_help()
{
- fprintf(stderr,
+ fprintf(stdout,
"fastq-match [OPTION]... QUERY [FILE]...\n"
"Perform Smith-Waterman local alignment of a query sequence\n"
"against each sequence in a fastq file.\n\n"
"Options:\n"
" -h, --help print this message\n"
+" -V, --version output version information and exit\n"
);
}
-
-
void fastq_match(FILE* fin, FILE* fout,
sw_t* sw,
unsigned char* query, int n)
FILE* fin;
- help_flag = 0;
-
int opt;
int opt_idx;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
- {"gap-init", required_argument, NULL, 0},
- {"gap-extend", required_argument, NULL, 0},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "hV", long_options, &opt_idx);
if (opt == -1) break;
break;
case 'h':
- help_flag = 1;
- break;
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
case '?':
return 1;
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
if (optind >= argc) {
fprintf(stderr, "A query sequence must be specified.\n");
# define SET_BINARY_MODE(file)
#endif
-
-static int help_flag;
-static int verbose_flag;
-size_t total_reads;
+static const char* prog_name = "fastq-uniq";
void print_help()
{
"Output a non-redundant FASTQ file, in which there are no duplicate reads.\n"
"(Warning: this program can be somewhat memory intensive.)\n\n"
"Options:\n"
-" -h, --help print this message\n"
" -v, --verbose print status along the way\n"
+" -h, --help print this message\n"
+" -V, --version output version information and exit\n"
);
}
+static int verbose_flag;
+static size_t total_reads;
+
+
+
void fastq_hash(FILE* fin, hash_table* T)
{
fastq_t* fqf = fastq_open(fin);
FILE* fin ;
- help_flag = 0;
-
int opt;
int opt_idx;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
{"verbose", no_argument, &verbose_flag, 1},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "hv", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "vhV", long_options, &opt_idx);
if (opt == -1) break;
}
break;
- case 'h':
- help_flag = 1;
- break;
-
case 'v':
verbose_flag = 1;
break;
case '?':
return 1;
+ case 'h':
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
+
default:
abort();
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
fastq_hash(stdin, T);
--- /dev/null
+
+#ifndef FASTQ_TOOLS_VERSION_H
+#define FASTQ_TOOLS_VERSION_H
+
+#define FASTQ_TOOLS_VERSION "@VERSION@"
+
+#endif
+