+/* A collection of filenames of sorted chunks of fastq. */
+typedef struct seq_dumps_t_
+{
+ char** fns;
+ size_t n;
+ size_t size;
+} seq_dumps_t;
+
+
+seq_dumps_t* seq_dumps_create()
+{
+ seq_dumps_t* d = malloc_or_die(sizeof(seq_dumps_t));
+ d->n = 0;
+ d->size = 64;
+ d->fns = malloc_or_die(d->size * sizeof(char*));
+ return d;
+}
+
+
+void seq_dumps_free(seq_dumps_t* d)
+{
+ size_t i;
+ for (i = 0; i < d->n; ++i) {
+ if (unlink(d->fns[i]) == -1) {
+ fprintf(stderr, "Warning: unable to remove temporary file %s.\n",
+ d->fns[i]);
+ }
+ free(d->fns[i]);
+ }
+ free(d->fns);
+ free(d);
+}
+
+
+static inline size_t heap_parent(size_t i) { return (i - 1) / 2; }
+static inline size_t heap_left (size_t i) { return 2 * i + 1; }
+static inline size_t heap_right (size_t i) { return 2 * i + 2; }
+
+
+/* Enqueue an item in the heap.
+ *
+ * Args:
+ * heap: A binary heap in which stores indexs into seqs.
+ * n: Fixed maximum size of the heap.
+ * m: Current size of the heap.
+ * seqs: Sequences.
+ * idx: Index to enqueue.
+ * cmp: Comparison function.
+ *
+ */
+void heap_push(size_t* heap, size_t n, size_t* m, seq_t** seqs,
+ int (*cmp)(const void*, const void*), size_t idx)
+{
+ if (*m >= n) return;
+ size_t tmp, j, i = (*m)++;
+ heap[i] = idx;
+
+ /* percolate up */
+ while (i > 0) {
+ j = heap_parent(i);
+ if (cmp(seqs[heap[i]], seqs[heap[j]]) < 0) {
+ tmp = heap[i];
+ heap[i] = heap[j];
+ heap[j] = tmp;
+ i = j;
+ }
+ else break;
+ }
+}
+
+
+/* Dequeue an item from a heap. */
+size_t heap_pop(size_t* heap, size_t* m, seq_t** seqs,
+ int (*cmp)(const void*, const void*))
+{
+ assert(*m > 0);
+ size_t ans = heap[0];
+ heap[0] = heap[--(*m)];
+ size_t tmp, l, r, j, i = 0;
+ while (true) {
+ l = heap_left(i);
+ r = heap_right(i);
+
+ if (l >= (*m)) break;
+
+ if (r >= (*m)) {
+ j = l;
+ }
+ else {
+ j = cmp(seqs[heap[l]], seqs[heap[r]]) < 0 ? l : r;
+ }
+
+ if (cmp(seqs[heap[i]], seqs[heap[j]]) > 0) {
+ tmp = heap[i];
+ heap[i] = heap[j];
+ heap[j] = tmp;
+ i = j;
+ }
+ else break;
+ }
+
+ return ans;
+}
+
+
+/* n-way merge sort to stdout */
+void merge_sort(const seq_dumps_t* d, int (*cmp)(const void*, const void*))
+{
+ FILE** files = malloc_or_die(d->n * sizeof(FILE*));
+ size_t i;
+ for (i = 0; i < d->n; ++i) {
+ files[i] = fopen(d->fns[i], "rb");
+ if (files[i] == NULL) {
+ fprintf(stderr, "Cannot open temporary file %s for reading.\n",
+ d->fns[i]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ fastq_t** fs = malloc_or_die(d->n * sizeof(fastq_t*));
+ seq_t** seqs = malloc_or_die(d->n * sizeof(seq_t*));
+ for (i = 0; i < d->n; ++i) {
+ fs[i] = fastq_create(files[i]);
+ seqs[i] = seq_create();
+ }
+
+ /* A binary heap of indexes to fs. We use this to repeatedly pop the
+ * smallest fastq entry. */
+ size_t* heap = malloc_or_die(d->n * sizeof(size_t));
+
+ /* heap size */
+ size_t m = 0;
+
+ for (i = 0; i < d->n; ++i) {
+ if (fastq_read(fs[i], seqs[i])) {
+ heap_push(heap, d->n, &m, seqs, cmp, i);
+ }
+ }
+
+ while (m > 0) {
+ i = heap_pop(heap, &m, seqs, cmp);
+ fastq_print(stdout, seqs[i]);
+ if (fastq_read(fs[i], seqs[i])) {
+ heap_push(heap, d->n, &m, seqs, cmp, i);
+ }
+ }
+
+ for (i = 0; i < d->n; ++i) {
+ seq_free(seqs[i]);
+ fastq_free(fs[i]);
+ fclose(files[i]);
+ }
+
+ free(files);
+ free(fs);
+}
+
+