"Options:\n"
" -n N the number of reads to sample (default: 10000)\n"
" -p N the proportion of the total reads to sample\n"
"Options:\n"
" -n N the number of reads to sample (default: 10000)\n"
" -p N the proportion of the total reads to sample\n"
" -c, --complement-output=PREFIX\n"
" output reads not included in the random sample to\n"
" a file (or files) with the given prefix (by default,\n"
" -c, --complement-output=PREFIX\n"
" output reads not included in the random sample to\n"
" a file (or files) with the given prefix (by default,\n"
" -s, --seed=SEED a manual seed to the random number generator\n"
" -h, --help print this message\n"
" -V, --version output version information and exit\n"
" -s, --seed=SEED a manual seed to the random number generator\n"
" -h, --help print this message\n"
" -V, --version output version information and exit\n"
- const char* prefix, const char* cprefix,
- FILE* file1, FILE* file2, unsigned long k, double p)
+ const char* prefix, const char* cprefix,
+ FILE* file1, FILE* file2, unsigned long k, double p)
- /*
- * The basic idea is this:
- *
- * 1. Count the number of lines in the file, n.
- *
- * 2a. If sampling with replacement, generate k random integers in [0, n-1].
- *
- * 2b. If sampling without replacement, generate a list of integers 0..(n-1),
- * shuffle with fisher-yates, then consider the first k.
- *
- * 3. Sort the integer list.
- *
- * 3. Read through the file again, when the number at the front of the integer
- * list matches the index of the fastq etry, print the entry, and pop the
- * number.
- */
+ /*
+ * The basic idea is this:
+ *
+ * 1. Count the number of lines in the file, n.
+ *
+ * 2a. If sampling with replacement, generate k random integers in [0, n-1].
+ *
+ * 2b. If sampling without replacement, generate a list of integers 0..(n-1),
+ * shuffle with fisher-yates, then consider the first k.
+ *
+ * 3. Sort the integer list.
+ *
+ * 3. Read through the file again, when the number at the front of the integer
+ * list matches the index of the fastq etry, print the entry, and pop the
+ * number.
+ */
qsort(xs, k, sizeof(unsigned long), cmpul);
qsort(xs, k, sizeof(unsigned long), cmpul);
output_name = malloc_or_die((output_len + 1) * sizeof(char));
snprintf(output_name, output_len, "%s.fastq", prefix);
output_name = malloc_or_die((output_len + 1) * sizeof(char));
snprintf(output_name, output_len, "%s.fastq", prefix);
output_name = malloc_or_die((output_len + 1) * sizeof(char));
snprintf(output_name, output_len, "%s.1.fastq", prefix);
output_name = malloc_or_die((output_len + 1) * sizeof(char));
snprintf(output_name, output_len, "%s.1.fastq", prefix);
if (fout1 == NULL) {
fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
exit(1);
}
snprintf(output_name, output_len, "%s.2.fastq", prefix);
if (fout1 == NULL) {
fprintf(stderr, "Cannot open file %s for writing.\n", output_name);
exit(1);
}
snprintf(output_name, output_len, "%s.2.fastq", prefix);
unsigned long rng_seed = 4357;
unsigned long k = 10000; // number of reads to sample
double p = -1; // proportion of reads to sample
unsigned long rng_seed = 4357;
unsigned long k = 10000; // number of reads to sample
double p = -1; // proportion of reads to sample
{"with-replacement", no_argument, NULL, 'r'},
{"complement-output", required_argument, NULL, 'c'},
{"seed", required_argument, NULL, 's'},
{"with-replacement", no_argument, NULL, 'r'},
{"complement-output", required_argument, NULL, 'c'},
{"seed", required_argument, NULL, 's'},
- if (prefix == NULL) {
- /* guess at a reasonable output refix by trimming the
- * trailing file extension, if any. */
- char* tmp;
-
- /* base name */
- tmp = strrchr(argv[optind], '/');
- if (tmp != NULL) argv[optind] = tmp + 1;
-
- /* exclude file suffixes */
- tmp = strchr(argv[optind], '.');
- if (tmp == NULL) prefix = argv[optind];
- else {
- prefix_alloc = malloc_or_die((tmp - argv[optind] + 1) * sizeof(char));
- memcpy(prefix_alloc, argv[optind], (tmp - argv[optind]) * sizeof(char));
- prefix_alloc[tmp - argv[optind]] = '\0';
- prefix = prefix_alloc;
- }
- }
- ++optind;
+ file1 = fopen(argv[optind], "rb");
+ if (file1 == NULL) {
+ fprintf(stderr, "Cannot open '%s' for reading.\n", argv[optind]);
+ return 1;
+ }
- if (optind < argc) {
- file2 = fopen(argv[optind], "rb");
- if (file2 == NULL) {
- fprintf(stderr, "Cannot open '%s' for reading.\n", argv[optind]);
- return 1;
- }
+ if (++optind < argc) {
+ file2 = fopen(argv[optind], "rb");
+ if (file2 == NULL) {
+ fprintf(stderr, "Cannot open '%s' for reading.\n", argv[optind]);
+ return 1;
}
}
fastq_sample(rng_seed, prefix, cprefix, file1, file2, k, p);
}
}
fastq_sample(rng_seed, prefix, cprefix, file1, file2, k, p);