X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=BamConverter.h;h=eaf2f66669d5858a4cd29778d7533a01f5a5690f;hp=9b543087443aac68a72999a13e874b199d613cc4;hb=683863b75f8d8bef2461039a6911b0e9619cc113;hpb=fc69cf6af24c0550e55447fc82f01cb6f90c1c42 diff --git a/BamConverter.h b/BamConverter.h index 9b54308..eaf2f66 100644 --- a/BamConverter.h +++ b/BamConverter.h @@ -14,6 +14,7 @@ #include "sam_rsem_cvt.h" #include "utils.h" +#include "my_assert.h" #include "bc_aux.h" #include "Transcript.h" #include "Transcripts.h" @@ -44,12 +45,13 @@ private: BamConverter::BamConverter(const char* inpF, const char* outF, const char* chr_list, Transcripts& transcripts) : transcripts(transcripts) { - if (transcripts.getType() != 0) - exitWithError("Genome information is not provided! RSEM cannot convert the transcript bam file!"); + general_assert(transcripts.getType() == 0, "Genome information is not provided! RSEM cannot convert the transcript bam file!"); in = samopen(inpF, "rb", NULL); assert(in != 0); + transcripts.buildMappings(in->header->n_targets, in->header->target_name); + bam_header_t *out_header = sam_header_read2(chr_list); refmap.clear(); for (int i = 0; i < out_header->n_targets; i++) { @@ -74,7 +76,7 @@ void BamConverter::process() { std::string cqname; bool isPaired = false; - int cnt = 0; + HIT_INT_TYPE cnt = 0; cqname = ""; b = bam_init1(); b2 = bam_init1(); @@ -83,32 +85,51 @@ void BamConverter::process() { ++cnt; isPaired = (b->core.flag & 0x0001) > 0; if (isPaired) { - assert(samread(in, b2) >= 0 && (b2->core.flag & 0x0001) && b->core.tid == b2->core.tid); - assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)); // for collapsing - ++cnt; + assert(samread(in, b2) >= 0 && (b2->core.flag & 0x0001)); + assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001)); + assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080) || (b->core.flag & 0x0080) && (b2->core.flag & 0x0040)); + ++cnt; } if (cnt % 1000000 == 0) { printf("."); fflush(stdout); } // at least one segment is not properly mapped - if ((b->core.flag & 0x0004) || isPaired && (b2->core.flag & 0x0004)) continue; + bool notgood = (b->core.flag & 0x0004) || (isPaired && (b2->core.flag & 0x0004)); + - const Transcript& transcript = transcripts.getTranscriptAt(b->core.tid + 1); + if (isPaired && notgood) assert((b->core.flag & 0x0004) && (b2->core.flag & 0x0004)); - convert(b, transcript); - if (isPaired) { - convert(b2, transcript); - b->core.mpos = b2->core.pos; - b2->core.mpos = b->core.pos; + + if (!notgood) { + if (isPaired) assert((b->core.tid == b2->core.tid) && (b->core.flag & 0x0040) && (b2->core.flag & 0x0080)); // for collapsing + + const Transcript& transcript = transcripts.getTranscriptViaEid(b->core.tid + 1); + + convert(b, transcript); + if (isPaired) { + convert(b2, transcript); + b->core.mpos = b2->core.pos; + b2->core.mpos = b->core.pos; + } + + if (cqname != bam1_qname(b)) { + writeCollapsedLines(); + cqname = bam1_qname(b); + collapseMap.init(isPaired); + } + collapseMap.insert(b, b2, bam_aux2f(bam_aux_get(b, "ZW"))); } + else { + assert(cqname != bam1_qname(b)); - if (cqname != bam1_qname(b)) { - writeCollapsedLines(); - cqname = bam1_qname(b); - collapseMap.init(isPaired); + writeCollapsedLines(); + cqname = bam1_qname(b); + collapseMap.init(isPaired); + + samwrite(out, b); + if (isPaired) samwrite(out, b2); } - collapseMap.insert(b, b2, bam_aux2f(bam_aux_get(b, "ZW"))); } writeCollapsedLines(); @@ -123,7 +144,7 @@ void BamConverter::convert(bam1_t* b, const Transcript& transcript) { int pos = b->core.pos; int readlen = b->core.l_qseq; - if (readlen == 0) exitWithError("One alignment line has SEQ field as *. RSEM does not support this currently!"); + general_assert(readlen > 0, "One alignment line has SEQ field as *. RSEM does not support this currently!"); iter = refmap.find(transcript.getSeqName()); assert(iter != refmap.end()); @@ -171,17 +192,14 @@ inline void BamConverter::writeCollapsedLines() { while (collapseMap.next(tmp_b, tmp_b2, prb)) { memcpy(bam_aux_get(tmp_b, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); tmp_b->core.qual = getMAPQ(prb); - if (tmp_b->core.qual > 0) { - samwrite(out, tmp_b); - if (isPaired) { - memcpy(bam_aux_get(tmp_b2, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); - tmp_b2->core.qual = tmp_b->core.qual; - samwrite(out, tmp_b2); - } + samwrite(out, tmp_b); + if (isPaired) { + memcpy(bam_aux_get(tmp_b2, "ZW") + 1, (uint8_t*)&(prb), bam_aux_type2size('f')); + tmp_b2->core.qual = tmp_b->core.qual; + samwrite(out, tmp_b2); } bam_destroy1(tmp_b); if (isPaired) bam_destroy1(tmp_b2); - } } }