2 * This file is part of fastq-tools.
4 * Copyright (c) 2012 by Daniel C. Jones <dcjones@cs.washington.edu>
7 * Determine the quality score encoding used by a fastq file.
16 /* The scales here are taken from the wikipedia article for FASTQ, the relavent
17 * part is reproduced here:
19 * SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS.....................................................
20 * ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................
21 * ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII......................
22 * .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ......................
23 * LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL....................................................
24 * !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
28 * S - Sanger Phred+33, raw reads typically (0, 40)
29 * X - Solexa Solexa+64, raw reads typically (-5, 40)
30 * I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
31 * J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)
32 * with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold)
33 * (Note: See discussion above).
34 * L - Illumina 1.8+ Phred+33, raw reads typically (0, 41)
38 typedef struct qual_scale_t_
40 const char* description;
41 char min_qual, max_qual;
45 /* When the scale is ambiguous, we choose the first compatible one. Hence these
46 * are ordered roughly by increasing exclusivity. */
48 static qual_scale_t scales[NUM_SCALES] =
50 {"Sanger/Phred+33", '!', 'I'},
51 {"Illumina 1.8/Phred+33", '!', 'J'},
52 {"Illumina 1.5/Phred+64", 'B', 'h'},
53 {"Illumina 1.3/Phred+64", '@', 'h'},
54 {"Solexa/Solexa+64", ';', 'h'}
58 /* Return true if x has excatly one 1 bit. */
59 bool single_bit(uint32_t x)
61 return x && !(x & (x - 1));
65 /* Make a bitset of compatible scales. */
66 uint32_t make_bitset(char min_qual, char max_qual)
70 for (i = 0; i < NUM_SCALES; ++i) {
71 if (scales[i].min_qual <= min_qual &&
72 scales[i].max_qual >= max_qual) {
81 void fastq_qualscale(const char* fn, FILE* fin)
83 char min_qual = '~', max_qual = '!';
85 fastq_t* fqf = fastq_create(fin);
86 seq_t* seq = seq_create();
88 /* Scales compatible with the data so far. */
89 uint32_t compat_scales;
93 while (n-- && fastq_read(fqf, seq) && !single_bit(compat_scales)) {
94 char* q = seq->qual.s;
96 if (*q < min_qual) min_qual = *q;
97 if (*q > max_qual) max_qual = *q;
101 compat_scales = make_bitset(min_qual, max_qual);
102 if (compat_scales == 0 || single_bit(compat_scales)) break;
108 if (compat_scales == 0) {
109 printf("%s: Unknown scale ['%c', '%c']\n", fn, min_qual, max_qual);
114 for (i = 0; !(compat_scales & (1 << i)); ++i) {}
115 printf("%s: %s\n", fn, scales[i].description);
120 static const char* prog_name = "fastq-qscale";
126 "fastq-qscale [OPTION] [FILE]...\n"
127 "Detect and output the quality score scale for each file given as an argument.\n\n"
129 " -h, --help print this message\n"
130 " -V, --version output version information and exit\n"
135 int main(int argc, char* argv[])
137 static struct option long_options[] =
139 {"help", no_argument, NULL, 'h'},
140 {"version", no_argument, NULL, 'V'},
146 opt = getopt_long(argc, argv, "hV", long_options, &opt_idx);
147 if (opt == -1) break;
155 print_version(stdout, prog_name);
166 if (optind >= argc) {
167 fastq_qualscale("stdin", stdin);
170 for (; optind < argc; optind++) {
171 FILE* fin = fopen(argv[optind], "rb");
173 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
177 fastq_qualscale(argv[optind], fin);