5 Generate random data in FASTQ format.
12 static void print_help()
15 "Usage: random_fastq [option]...\n"
16 "Generate an endless stream of random FASTQ data to standard out.\n\n"
19 "Beware: the only purpose of this program is test quip.\n"
20 "No particular guarantees are made.\n\n");
24 /* Draw n samples from a categorial distribution of size k with cumulative
25 * distribution given by cs and element us. The sample is stored in xs which
26 * is assumed to be of appropriate length. */
27 void randcat(const char* us, const double* cs, size_t k, char* xs, size_t n)
31 for (i = 0; i < n; ++i) {
33 int a = 0, b = (int) k, c;
36 if (r <= cs[c]) b = c;
45 int main(int argc, char* argv[])
47 static struct option long_options[] =
49 {"min-length", required_argument, NULL, 'm'},
50 {"max-length", required_argument, NULL, 'M'},
51 {"length", required_argument, NULL, 'l'},
52 {"id-length", required_argument, NULL, 'i'},
53 {"help", no_argument, NULL, 'h'},
57 size_t min_len = 100, max_len = 100;
62 opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
68 min_len = (size_t) strtoul(optarg, NULL, 10);
72 max_len = (size_t) strtoul(optarg, NULL, 10);
76 min_len = max_len = (size_t) strtoul(optarg, NULL, 10);
80 id_len = (size_t) strtoul(optarg, NULL, 10);
95 char nucleotides[5] = {'A', 'C', 'G', 'T', 'N'};
96 double nuc_cs[5] = {0.28, 0.49, 0.70, 0.90, 1.00};
102 for (i = 0; i < 64; ++i) {
103 qualities[i] = '!' + i;
104 last_c = qual_cs[i] = last_c + 1.0 / 64.0;
110 for (i = 0; i < 94; ++i) {
111 id_chars[i] = '!' + i;
112 last_c = id_cs[i] = last_c + 1.0 / 94.0;
115 char* id = malloc(id_len + 1);
116 char* seq = malloc(max_len + 1);
117 char* qual = malloc(max_len + 1);
118 size_t len = min_len;
121 if (max_len > min_len) {
122 len = min_len + (size_t) (drand48() * (double) (1 + max_len - min_len));
125 randcat(id_chars, id_cs, 94, id, id_len);
128 randcat(nucleotides, nuc_cs, 5, seq, len);
131 randcat(qualities, qual_cs, 64, qual, len);
139 /* Yeah, right. As if we'll ever get here. */