X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=SamParser.h;h=4747ef4281af593f714450d68ccfc65090662891;hp=9d62eff0402ba2eca240b686b598ea3b36324ea4;hb=refs%2Fheads%2Fmaster;hpb=5a2752145fda053ac654baaf1102ac40961c65b6 diff --git a/SamParser.h b/SamParser.h index 9d62eff..4747ef4 100644 --- a/SamParser.h +++ b/SamParser.h @@ -25,7 +25,7 @@ class SamParser { public: - SamParser(char, const char*, Transcripts&, const char* = 0); + SamParser(char, const char*, const char*, Transcripts&, const char*); ~SamParser(); /** @@ -62,7 +62,7 @@ private: } std::string getName(const bam1_t* b) { - return std::string((char*)bam1_qname(b)); + return std::string(bam1_qname(b)); } std::string getReadSeq(const bam1_t*); @@ -80,7 +80,7 @@ private: char SamParser::rtTag[STRLEN] = ""; // default : no tag, thus no Type 2 reads // aux, if not 0, points to the file name of fn_list -SamParser::SamParser(char inpType, const char* inpF, Transcripts& transcripts, const char* aux) +SamParser::SamParser(char inpType, const char* inpF, const char* aux, Transcripts& transcripts, const char* imdName) : transcripts(transcripts) { switch(inpType) { @@ -93,7 +93,7 @@ SamParser::SamParser(char inpType, const char* inpF, Transcripts& transcripts, c header = sam_in->header; general_assert(header != 0, "Fail to parse sam header!"); - transcripts.buildMappings(header->n_targets, header->target_name); + transcripts.buildMappings(header->n_targets, header->target_name, imdName); b = bam_init1(); b2 = bam_init1(); @@ -178,13 +178,19 @@ int SamParser::parseNext(PairedEndRead& read, PairedEndHit& hit) { bam1_t *mp1 = NULL, *mp2 = NULL; - if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) { + // If lose mate info, discard. is it necessary? + if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4; + // If only one mate is mapped, discard + if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4; + + if (b->core.flag & 0x0040) { mp1 = b; mp2 = b2; } - else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) { + else { mp1 = b2; mp2 = b; } - else return 4; // If lose mate info, discard. is it necessary? + + general_assert(!strcmp(bam1_qname(mp1), bam1_qname(mp2)), "Detected a read pair whose two mates have different names: " + getName(mp1) + " , " + getName(mp2) + " !"); int readType = getReadType(mp1, mp2); std::string name = getName(mp1); @@ -226,13 +232,19 @@ int SamParser::parseNext(PairedEndReadQ& read, PairedEndHit& hit) { bam1_t *mp1 = NULL, *mp2 = NULL; - if ((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) { + // If lose mate info, discard. is it necessary? + if (!((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) && !((b->core.flag & 0x0080) && (b2->core.flag & 0x0040))) return 4; + // If only one mate is mapped, discard + if (((b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) || (!(b->core.flag & 0x0004) && (b2->core.flag & 0x0004))) return 4; + + if (b->core.flag & 0x0040) { mp1 = b; mp2 = b2; } - else if ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)) { + else { mp1 = b2; mp2 = b; } - else return 4; + + general_assert(!strcmp(bam1_qname(mp1), bam1_qname(mp2)), "Detected a read pair whose two mates have different names: " + getName(mp1) + " , " + getName(mp2) + " !"); int readType = getReadType(mp1, mp2); std::string name = getName(mp1); @@ -333,10 +345,17 @@ inline int SamParser::getReadType(const bam1_t* b) { return (bam_aux2i(p) > 0 ? 2 : 0); } - //For paired-end reads, do not print out type 2 reads inline int SamParser::getReadType(const bam1_t* b, const bam1_t* b2) { - if ((b->core.flag & 0x0002) && (b2->core.flag & 0x0002)) return 1; + if (!(b->core.flag & 0x0004) && !(b2->core.flag & 0x0004)) return 1; + + if (!strcmp(rtTag, "")) return 0; + + uint8_t *p = bam_aux_get(b, rtTag); + if (p != NULL && bam_aux2i(p) > 0) return 2; + + p = bam_aux_get(b2, rtTag); + if (p != NULL && bam_aux2i(p) > 0) return 2; return 0; }