]> git.donarmstrong.com Git - rsem.git/blob - BamWriter.h
Deleted a ';' at the end of RSEM v1.2.15 updates
[rsem.git] / BamWriter.h
1 #ifndef BAMWRITER_H_
2 #define BAMWRITER_H_
3
4 #include<cmath>
5 #include<cstdio>
6 #include<cstring>
7 #include<cassert>
8 #include<string>
9 #include<sstream>
10 #include<iostream>
11
12 #include <stdint.h>
13 #include "sam/bam.h"
14 #include "sam/sam.h"
15 #include "sam_rsem_aux.h"
16 #include "sam_rsem_cvt.h"
17
18 #include "utils.h"
19
20 #include "SingleHit.h"
21 #include "PairedEndHit.h"
22
23 #include "HitWrapper.h"
24 #include "Transcript.h"
25 #include "Transcripts.h"
26
27 class BamWriter {
28 public:
29         BamWriter(char, const char*, const char*, const char*, Transcripts&);
30         ~BamWriter();
31
32         void work(HitWrapper<SingleHit>);
33         void work(HitWrapper<PairedEndHit>);
34 private:
35         samfile_t *in, *out;
36         Transcripts& transcripts;
37
38         //convert bam1_t
39         void convert(bam1_t*, double);
40 };
41
42 //fn_list can be NULL
43 BamWriter::BamWriter(char inpType, const char* inpF, const char* fn_list, const char* outF, Transcripts& transcripts)
44         : transcripts(transcripts)
45 {
46         switch(inpType) {
47         case 's': in = samopen(inpF, "r", fn_list); break;
48         case 'b': in = samopen(inpF, "rb", fn_list); break;
49         default: assert(false);
50         }
51         assert(in != 0);
52
53         //build mappings from external sid to internal sid
54         transcripts.buildMappings(in->header->n_targets, in->header->target_name);
55
56         //generate output's header
57         bam_header_t *out_header = bam_header_dwt(in->header);
58
59         std::ostringstream strout;
60         strout<<"@HD\tVN:1.4\tSO:unknown\n@PG\tID:RSEM\n";
61         std::string content = strout.str();
62         append_header_text(out_header, content.c_str(), content.length());
63
64         out = samopen(outF, "wb", out_header);
65         assert(out != 0);
66
67         bam_header_destroy(out_header);
68 }
69
70 BamWriter::~BamWriter() {
71         samclose(in);
72         samclose(out);
73 }
74
75 void BamWriter::work(HitWrapper<SingleHit> wrapper) {
76         bam1_t *b;
77         SingleHit *hit;
78
79         HIT_INT_TYPE cnt = 0;
80
81         b = bam_init1();
82
83         while (samread(in, b) >= 0) {
84                 ++cnt;
85                 if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " alignment lines are loaded!"<< std::endl; }
86
87                 if (!(b->core.flag & 0x0004)) {
88                   hit = wrapper.getNextHit();
89                   assert(hit != NULL);
90
91                   assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
92                   convert(b, hit->getConPrb());
93                 }
94
95                 //if (b->core.qual > 0) samwrite(out, b); // output only when MAPQ > 0
96                 samwrite(out, b);
97         }
98
99         assert(wrapper.getNextHit() == NULL);
100
101         bam_destroy1(b);
102         if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
103 }
104
105 void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
106         bam1_t *b, *b2;
107         PairedEndHit *hit;
108
109         HIT_INT_TYPE cnt = 0;
110
111         b = bam_init1();
112         b2 = bam_init1();
113
114         while (samread(in, b) >= 0 && samread(in, b2) >= 0) {
115                 cnt += 2;
116                 if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " alignment lines are loaded!"<< std::endl; }
117
118                 assert((b->core.flag & 0x0001) && (b2->core.flag & 0x0001));
119                 assert(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)));
120
121                 //unalignable reads, skip               
122                 bool notgood = (b->core.flag & 0x0004) || (b2->core.flag & 0x0004);
123
124                 if (!notgood) {
125                         //swap if b is mate 2
126                         if (b->core.flag & 0x0080) {
127                                 assert(b2->core.flag & 0x0040);
128                                 bam1_t *tmp = b;
129                                 b = b2; b2 = tmp;
130                         }
131
132                         hit = wrapper.getNextHit();
133                         assert(hit != NULL);
134
135                         assert(transcripts.getInternalSid(b->core.tid + 1) == hit->getSid());
136                         assert(transcripts.getInternalSid(b2->core.tid + 1) == hit->getSid());
137
138                         convert(b, hit->getConPrb());
139                         convert(b2, hit->getConPrb());
140
141                         // omit "b->core.mpos = b2->core.pos; b2->core.mpos = b->core.pos" for the reason that it is possible that only one mate is alignable 
142                 }
143
144                 samwrite(out, b);
145                 samwrite(out, b2);
146         }
147
148         assert(wrapper.getNextHit() == NULL);
149
150         bam_destroy1(b);
151         bam_destroy1(b2);
152
153         if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
154 }
155
156 void BamWriter::convert(bam1_t *b, double prb) {
157         b->core.qual = getMAPQ(prb);
158         float val = (float)prb;
159         bam_aux_append(b, "ZW", 'f', bam_aux_type2size('f'), (uint8_t*)&val);
160 }
161
162 #endif /* BAMWRITER_H_ */