12 #include "my_assert.h"
23 int main(int argc, char* argv[]) {
25 printf("Usage: rsem-sam-validator <input.sam/input.bam>\n");
29 string input_file(argv[1]);
30 size_t pos = input_file.find_last_of('.');
31 general_assert(pos != string::npos, "Input file does not have a suffix!");
34 string suffix = input_file.substr(pos);
35 size_t len = suffix.length();
36 for (size_t i = 0; i < len; i++) suffix[i] = tolower(suffix[i]);
38 general_assert(suffix == "sam" || suffix == "bam", "Cannot recognize input file's file type! The file suffix is neither sam or bam.");
40 in = (suffix == "sam" ? samopen(argv[1], "r", NULL) : samopen(argv[1], "rb", NULL));
41 general_assert(in != 0, "Cannot open input file!");
44 b = bam_init1(); b2 = bam_init1();
49 string cqname(""), qname;
50 uint64_t creadlen = 0, readlen;
51 bool cispaired = false, ispaired;
55 if (samread(in, b) < 0) break;
56 assert(b->core.l_qseq > 0);
58 qname.assign(bam1_qname(b));
60 // if this is a paired-end read
61 ispaired = b->core.flag & 0x0001;
63 isValid = (samread(in, b2) >= 0) && (qname == bam1_qname(b2)) && (b2->core.flag & 0x0001);
64 if (!isValid) { printf("\nOnly find one mate for paired-end read %s!\n", qname.c_str()); continue; }
65 assert(b2->core.l_qseq > 0);
66 isValid = !(((b->core.flag & 0x0040) && (b2->core.flag & 0x0040)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0080)));
67 if (!isValid) { printf("\nThe two mates of paired-end read %s are not adjacent!\n", qname.c_str()); continue; }
68 // both mates are mapped
69 if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) {
70 isValid = (b->core.tid == b2->core.tid) && (b->core.pos == b2->core.mpos) && (b2->core.pos == b->core.mpos);
71 if (!isValid) { printf("\nOne of paired-end read %s's alignment does not have two mates adjacent to each other!\n", qname.c_str()); continue; }
74 readlen = ((b->core.flag & 0x0040) ? (uint64_t(b->core.l_qseq) << 32) + b2->core.l_qseq : (uint64_t(b2->core.l_qseq) << 32) + b->core.l_qseq);
76 else readlen = b->core.l_qseq;
78 if (cqname != qname) {
79 isValid = used.find(qname) == used.end();
80 if (!isValid) { printf("\nThe alignments of read %s are not grouped together!", qname.c_str()); continue; }
88 isValid = (creadlen == readlen);
89 if (!isValid) { printf("\nRead %s have different read/mate lengths!\n", qname.c_str()); continue; }
90 isValid = (cispaired == ispaired);
91 if (!isValid) { printf("\nRead %s is detected as both single-end read and paired-end read!\n", qname.c_str()); continue; }
95 if (cnt % 1000000 == 0) printf(".");
99 bam_destroy1(b); bam_destroy1(b2);
102 if (isValid) printf("\nThe input file is valid!\n");
103 else printf("The input file is not valid!\n");