X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ffastq-sort.c;h=9799b075348ad087ef6df654f075618db0af4e50;hb=52779892ccc2ac676654a34baff8d1e7645121de;hp=e96d9bf5164c9711aa965e05a4a47868929e7ffd;hpb=1860da1d6d887e658671c60a1ec47cb3cb80449e;p=fastq-tools.git diff --git a/src/fastq-sort.c b/src/fastq-sort.c index e96d9bf..9799b07 100644 --- a/src/fastq-sort.c +++ b/src/fastq-sort.c @@ -3,12 +3,13 @@ * * Copyright (c) 2012 by Daniel C. Jones * - * fastq-sample : - * Sample reads with or without replacement from a FASTQ file. + * fastq-sort: + * Sort fastq files efficiently. * */ #include +#include #include #include #include @@ -245,22 +246,22 @@ bool seq_array_push(seq_array_t* a, const seq_t* seq) memcpy(&a->data[a->data_used], seq->id1.s, seq->id1.n + 1); a->seqs[a->n].id1.s = &a->data[a->data_used]; - a->seqs[a->n].id1.n = seq->id1.n + 1; + a->seqs[a->n].id1.n = seq->id1.n; a->data_used += seq->id1.n + 1; memcpy(&a->data[a->data_used], seq->seq.s, seq->seq.n + 1); a->seqs[a->n].seq.s = &a->data[a->data_used]; - a->seqs[a->n].seq.n = seq->seq.n + 1; + a->seqs[a->n].seq.n = seq->seq.n; a->data_used += seq->seq.n + 1; memcpy(&a->data[a->data_used], seq->id2.s, seq->id2.n + 1); a->seqs[a->n].id2.s = &a->data[a->data_used]; - a->seqs[a->n].id2.n = seq->id2.n + 1; + a->seqs[a->n].id2.n = seq->id2.n; a->data_used += seq->id2.n + 1; memcpy(&a->data[a->data_used], seq->qual.s, seq->qual.n + 1); a->seqs[a->n].qual.s = &a->data[a->data_used]; - a->seqs[a->n].qual.n = seq->qual.n + 1; + a->seqs[a->n].qual.n = seq->qual.n; a->data_used += seq->qual.n + 1; ++a->n; @@ -357,6 +358,20 @@ void print_help() } +/* Parse a size specification, which is just a number with a K, M, G suffix. */ +size_t parse_size(const char* str) +{ + char* endptr; + unsigned long size = strtoul(str, &endptr, 10); + + if (toupper(*endptr) == 'K') size *= 1000; + else if (toupper(*endptr) == 'M') size *= 1000000; + else if (toupper(*endptr) == 'G') size *= 1000000000; + + return size; +} + + int main(int argc, char* argv[]) { int opt, opt_idx; @@ -366,29 +381,34 @@ int main(int argc, char* argv[]) static struct option long_options[] = { - {"reverse", no_argument, NULL, 'r'}, - {"id", no_argument, NULL, 'I'}, - {"seq", no_argument, NULL, 'S'}, - {"random", no_argument, NULL, 'R'}, - {"help", no_argument, NULL, 'h'}, - {"version", no_argument, NULL, 'V'}, + {"buffer-size", required_argument, NULL, 'S'}, + {"reverse", no_argument, NULL, 'r'}, + {"id", no_argument, NULL, 'i'}, + {"seq", no_argument, NULL, 's'}, + {"random", no_argument, NULL, 'R'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, {0, 0, 0, 0} }; while (true) { - opt = getopt_long(argc, argv, "rISRhV", long_options, &opt_idx); + opt = getopt_long(argc, argv, "S:risRhV", long_options, &opt_idx); if (opt == -1) break; switch (opt) { + case 'S': + buffer_size = parse_size(optarg); + break; + case 'r': reverse_sort = true; break; - case 'I': + case 'i': user_cmp = seq_cmp_id; break; - case 'S': + case 's': user_cmp = seq_cmp_seq; break;