*
* Copyright (c) 2012 by Daniel C. Jones <dcjones@cs.washington.edu>
*
- * fastq-sample :
- * Sample reads with or without replacement from a FASTQ file.
+ * fastq-sort:
+ * Sort fastq files efficiently.
*
*/
#include <assert.h>
+#include <ctype.h>
#include <getopt.h>
#include <string.h>
#include <unistd.h>
memcpy(&a->data[a->data_used], seq->id1.s, seq->id1.n + 1);
a->seqs[a->n].id1.s = &a->data[a->data_used];
- a->seqs[a->n].id1.n = seq->id1.n + 1;
+ a->seqs[a->n].id1.n = seq->id1.n;
a->data_used += seq->id1.n + 1;
memcpy(&a->data[a->data_used], seq->seq.s, seq->seq.n + 1);
a->seqs[a->n].seq.s = &a->data[a->data_used];
- a->seqs[a->n].seq.n = seq->seq.n + 1;
+ a->seqs[a->n].seq.n = seq->seq.n;
a->data_used += seq->seq.n + 1;
memcpy(&a->data[a->data_used], seq->id2.s, seq->id2.n + 1);
a->seqs[a->n].id2.s = &a->data[a->data_used];
- a->seqs[a->n].id2.n = seq->id2.n + 1;
+ a->seqs[a->n].id2.n = seq->id2.n;
a->data_used += seq->id2.n + 1;
memcpy(&a->data[a->data_used], seq->qual.s, seq->qual.n + 1);
a->seqs[a->n].qual.s = &a->data[a->data_used];
- a->seqs[a->n].qual.n = seq->qual.n + 1;
+ a->seqs[a->n].qual.n = seq->qual.n;
a->data_used += seq->qual.n + 1;
++a->n;
}
+/* Parse a size specification, which is just a number with a K, M, G suffix. */
+size_t parse_size(const char* str)
+{
+ char* endptr;
+ unsigned long size = strtoul(str, &endptr, 10);
+
+ if (toupper(*endptr) == 'K') size *= 1000;
+ else if (toupper(*endptr) == 'M') size *= 1000000;
+ else if (toupper(*endptr) == 'G') size *= 1000000000;
+
+ return size;
+}
+
+
int main(int argc, char* argv[])
{
int opt, opt_idx;
static struct option long_options[] =
{
- {"reverse", no_argument, NULL, 'r'},
- {"id", no_argument, NULL, 'I'},
- {"seq", no_argument, NULL, 'S'},
- {"random", no_argument, NULL, 'R'},
- {"help", no_argument, NULL, 'h'},
- {"version", no_argument, NULL, 'V'},
+ {"buffer-size", required_argument, NULL, 'S'},
+ {"reverse", no_argument, NULL, 'r'},
+ {"id", no_argument, NULL, 'i'},
+ {"seq", no_argument, NULL, 's'},
+ {"random", no_argument, NULL, 'R'},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
{0, 0, 0, 0}
};
while (true) {
- opt = getopt_long(argc, argv, "rISRhV", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "S:risRhV", long_options, &opt_idx);
if (opt == -1) break;
switch (opt) {
+ case 'S':
+ buffer_size = parse_size(optarg);
+ break;
+
case 'r':
reverse_sort = true;
break;
- case 'I':
+ case 'i':
user_cmp = seq_cmp_id;
break;
- case 'S':
+ case 's':
user_cmp = seq_cmp_seq;
break;