X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=BamConverter.h;h=eaf2f66669d5858a4cd29778d7533a01f5a5690f;hp=e7253ba9415b566442f3efbfe266ffe052bb2753;hb=683863b75f8d8bef2461039a6911b0e9619cc113;hpb=635ca2939cfb1f519f19e9dec072ddd05e9fb450 diff --git a/BamConverter.h b/BamConverter.h index e7253ba..eaf2f66 100644 --- a/BamConverter.h +++ b/BamConverter.h @@ -50,6 +50,8 @@ BamConverter::BamConverter(const char* inpF, const char* outF, const char* chr_l in = samopen(inpF, "rb", NULL); assert(in != 0); + transcripts.buildMappings(in->header->n_targets, in->header->target_name); + bam_header_t *out_header = sam_header_read2(chr_list); refmap.clear(); for (int i = 0; i < out_header->n_targets; i++) { @@ -74,7 +76,7 @@ void BamConverter::process() { std::string cqname; bool isPaired = false; - int cnt = 0; + HIT_INT_TYPE cnt = 0; cqname = ""; b = bam_init1(); b2 = bam_init1(); @@ -83,32 +85,51 @@ void BamConverter::process() { ++cnt; isPaired = (b->core.flag & 0x0001) > 0; if (isPaired) { - assert(samread(in, b2) >= 0 && (b2->core.flag & 0x0001) && b->core.tid == b2->core.tid); - assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)); // for collapsing - ++cnt; + assert(samread(in, b2) >= 0 && (b2->core.flag & 0x0001)); + assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001)); + assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080) || (b->core.flag & 0x0080) && (b2->core.flag & 0x0040)); + ++cnt; } if (cnt % 1000000 == 0) { printf("."); fflush(stdout); } // at least one segment is not properly mapped - if ((b->core.flag & 0x0004) || (isPaired && (b2->core.flag & 0x0004))) continue; + bool notgood = (b->core.flag & 0x0004) || (isPaired && (b2->core.flag & 0x0004)); + - const Transcript& transcript = transcripts.getTranscriptAt(b->core.tid + 1); + if (isPaired && notgood) assert((b->core.flag & 0x0004) && (b2->core.flag & 0x0004)); - convert(b, transcript); - if (isPaired) { - convert(b2, transcript); - b->core.mpos = b2->core.pos; - b2->core.mpos = b->core.pos; + + if (!notgood) { + if (isPaired) assert((b->core.tid == b2->core.tid) && (b->core.flag & 0x0040) && (b2->core.flag & 0x0080)); // for collapsing + + const Transcript& transcript = transcripts.getTranscriptViaEid(b->core.tid + 1); + + convert(b, transcript); + if (isPaired) { + convert(b2, transcript); + b->core.mpos = b2->core.pos; + b2->core.mpos = b->core.pos; + } + + if (cqname != bam1_qname(b)) { + writeCollapsedLines(); + cqname = bam1_qname(b); + collapseMap.init(isPaired); + } + collapseMap.insert(b, b2, bam_aux2f(bam_aux_get(b, "ZW"))); } + else { + assert(cqname != bam1_qname(b)); - if (cqname != bam1_qname(b)) { - writeCollapsedLines(); - cqname = bam1_qname(b); - collapseMap.init(isPaired); + writeCollapsedLines(); + cqname = bam1_qname(b); + collapseMap.init(isPaired); + + samwrite(out, b); + if (isPaired) samwrite(out, b2); } - collapseMap.insert(b, b2, bam_aux2f(bam_aux_get(b, "ZW"))); } writeCollapsedLines(); @@ -171,17 +192,14 @@ inline void BamConverter::writeCollapsedLines() { while (collapseMap.next(tmp_b, tmp_b2, prb)) { memcpy(bam_aux_get(tmp_b, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); tmp_b->core.qual = getMAPQ(prb); - if (tmp_b->core.qual > 0) { - samwrite(out, tmp_b); - if (isPaired) { - memcpy(bam_aux_get(tmp_b2, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); - tmp_b2->core.qual = tmp_b->core.qual; - samwrite(out, tmp_b2); - } + samwrite(out, tmp_b); + if (isPaired) { + memcpy(bam_aux_get(tmp_b2, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); + tmp_b2->core.qual = tmp_b->core.qual; + samwrite(out, tmp_b2); } bam_destroy1(tmp_b); if (isPaired) bam_destroy1(tmp_b2); - } } }