15 #include "sam_rsem_aux.h"
16 #include "sam_rsem_cvt.h"
20 #include "SingleHit.h"
21 #include "PairedEndHit.h"
23 #include "HitWrapper.h"
24 #include "Transcript.h"
25 #include "Transcripts.h"
29 BamWriter(char, const char*, const char*, const char*, Transcripts&);
32 void work(HitWrapper<SingleHit>);
33 void work(HitWrapper<PairedEndHit>);
36 Transcripts& transcripts;
39 void convert(bam1_t*, double);
43 BamWriter::BamWriter(char inpType, const char* inpF, const char* fn_list, const char* outF, Transcripts& transcripts)
44 : transcripts(transcripts)
47 case 's': in = samopen(inpF, "r", fn_list); break;
48 case 'b': in = samopen(inpF, "rb", fn_list); break;
49 default: assert(false);
53 //build mappings from external sid to internal sid
54 transcripts.buildMappings(in->header->n_targets, in->header->target_name);
56 //generate output's header
57 bam_header_t *out_header = bam_header_dwt(in->header);
58 for (int i = 0; i < out_header->n_targets; i++) {
59 out_header->target_len[i] = transcripts.getTranscriptViaEid(i + 1).getLength(); // transcript length without poly(A) tail
62 std::ostringstream strout;
63 strout<<"@HD\tVN:1.4\tSO:unknown\n@PG\tID:RSEM\n";
64 std::string content = strout.str();
65 append_header_text(out_header, content.c_str(), content.length());
67 out = samopen(outF, "wb", out_header);
70 bam_header_destroy(out_header);
73 BamWriter::~BamWriter() {
78 void BamWriter::work(HitWrapper<SingleHit> wrapper) {
86 while (samread(in, b) >= 0) {
88 if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " alignment lines are loaded!"<< std::endl; }
90 if (!(b->core.flag & 0x0004)) {
91 hit = wrapper.getNextHit();
94 assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
95 convert(b, hit->getConPrb());
98 //if (b->core.qual > 0) samwrite(out, b); // output only when MAPQ > 0
102 assert(wrapper.getNextHit() == NULL);
105 if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
108 void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
117 while (samread(in, b) >= 0 && samread(in, b2) >= 0) {
119 if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
121 assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001));
122 assert((b->core.flag & 0x0040) && (b2->core.flag & 0x0080) || (b->core.flag & 0x0080) && (b2->core.flag & 0x0040));
124 //unalignable reads, skip
125 bool notgood = (b->core.flag & 0x0004) || (b2->core.flag & 0x0004);
128 //swap if b is mate 2
129 if (b->core.flag & 0x0080) {
130 assert(b2->core.flag & 0x0040);
135 hit = wrapper.getNextHit();
138 assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
139 assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
141 convert(b, hit->getConPrb());
142 convert(b2, hit->getConPrb());
144 b->core.mpos = b2->core.pos;
145 b2->core.mpos = b->core.pos;
150 // if only one mate can be aligned, mask it as unaligned and put an additional tag Z0:A:!
151 char exclamation = '!';
152 if (!(b->core.flag & 0x0004)) {
153 b->core.flag |= 0x0004;
154 bam_aux_append(b, "Z0", 'A', bam_aux_type2size('A'), (uint8_t*)&exclamation);
156 if (!(b2->core.flag & 0x0004)) {
157 b2->core.flag |= 0x0004;
158 bam_aux_append(b2, "Z0", 'A', bam_aux_type2size('A'), (uint8_t*)&exclamation);
167 assert(wrapper.getNextHit() == NULL);
172 if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
175 void BamWriter::convert(bam1_t *b, double prb) {
176 b->core.qual = getMAPQ(prb);
177 float val = (float)prb;
178 bam_aux_append(b, "ZW", 'f', bam_aux_type2size('f'), (uint8_t*)&val);
181 #endif /* BAMWRITER_H_ */