X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=samValidator.cpp;h=6e602107090f1d795341f9c08ee3b7d27689ae22;hp=06e91d454707c0298b47212e8fc32210bf0b74e8;hb=7777bfc71742d4e239d1297d2e3de058895f4ce1;hpb=fb2aa1ca9d00710943155ef3abbbdd87df116e4a diff --git a/samValidator.cpp b/samValidator.cpp index 06e91d4..6e60210 100644 --- a/samValidator.cpp +++ b/samValidator.cpp @@ -9,6 +9,7 @@ #include "sam/bam.h" #include "sam/sam.h" +#include "utils.h" #include "my_assert.h" using namespace std; @@ -45,12 +46,12 @@ int main(int argc, char* argv[]) { isValid = true; - long cnt = 0; + HIT_INT_TYPE cnt = 0; string cqname(""), qname; uint64_t creadlen = 0, readlen; bool cispaired = false, ispaired; - printf("."); + printf("."); fflush(stdout); do { if (samread(in, b) < 0) break; assert(b->core.l_qseq > 0); @@ -60,15 +61,20 @@ int main(int argc, char* argv[]) { // if this is a paired-end read ispaired = b->core.flag & 0x0001; if (ispaired) { + isValid = (samread(in, b2) >= 0) && (qname == bam1_qname(b2)) && (b2->core.flag & 0x0001); if (!isValid) { printf("\nOnly find one mate for paired-end read %s!\n", qname.c_str()); continue; } assert(b2->core.l_qseq > 0); + isValid = !((b->core.flag & 0x0040) && (b->core.flag & 0x0080)) && !((b2->core.flag & 0x0040) & (b2->core.flag & 0x0080)); + if (!isValid) { printf("\nRead %s has more than 2 segments (e.g. tripled or more ended reads)!\n", qname.c_str()); continue; } isValid = !(((b->core.flag & 0x0040) && (b2->core.flag & 0x0040)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0080))); if (!isValid) { printf("\nThe two mates of paired-end read %s are not adjacent!\n", qname.c_str()); continue; } + + // both mates are mapped if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) { isValid = (b->core.tid == b2->core.tid) && (b->core.pos == b2->core.mpos) && (b2->core.pos == b->core.mpos); - if (!isValid) { printf("\nOne of paired-end read %s's alignment does not have two mates adjacent to each other!\n", qname.c_str()); continue; } + if (!isValid) { printf("\nOne of paired-end read %s's alignment does not have two mates adjacent to each other! If you're running covert-sam-for-rsem now, this might mean the read contains duplicate alignments.\n", qname.c_str()); continue; } } readlen = ((b->core.flag & 0x0040) ? (uint64_t(b->core.l_qseq) << 32) + b2->core.l_qseq : (uint64_t(b2->core.l_qseq) << 32) + b->core.l_qseq); @@ -92,7 +98,7 @@ int main(int argc, char* argv[]) { } ++cnt; - if (cnt % 1000000 == 0) printf("."); + if (cnt % 1000000 == 0) { printf("."); fflush(stdout); } } while(isValid);