]> git.donarmstrong.com Git - bamtools.git/blob - src/utils/bamtools_pileup_engine.cpp
Cleaned up intra-API includes & moved version numbers to 2.0.0
[bamtools.git] / src / utils / bamtools_pileup_engine.cpp
1 // ***************************************************************************
2 // bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011
6 // ---------------------------------------------------------------------------
7 // Provides pileup at position functionality for various tools.
8 // ***************************************************************************
9
10 #include "utils/bamtools_pileup_engine.h"
11 using namespace BamTools;
12
13 #include <iostream>
14 using namespace std;
15
16 // ---------------------------------------------
17 // PileupEnginePrivate implementation
18
19 struct PileupEngine::PileupEnginePrivate {
20   
21     // data members
22     int CurrentId;
23     int CurrentPosition;
24     vector<BamAlignment> CurrentAlignments;
25     PileupPosition CurrentPileupData;
26     
27     bool IsFirstAlignment;
28     vector<PileupVisitor*> Visitors;
29   
30     // ctor & dtor
31     PileupEnginePrivate(void)
32         : CurrentId(-1)
33         , CurrentPosition(-1)
34         , IsFirstAlignment(true)
35     { }
36     ~PileupEnginePrivate(void) { }
37     
38     // 'public' methods
39     bool AddAlignment(const BamAlignment& al);
40     void Flush(void);
41     
42     // internal methods
43     private:
44         void ApplyVisitors(void);
45         void ClearOldData(void);
46         void CreatePileupData(void);
47         void ParseAlignmentCigar(const BamAlignment& al);
48 };
49
50 bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
51   
52     // if first time
53     if ( IsFirstAlignment ) {
54       
55         // set initial markers 
56         CurrentId       = al.RefID;
57         CurrentPosition = al.Position;
58         
59         // store first entry
60         CurrentAlignments.clear();
61         CurrentAlignments.push_back(al);
62         
63         // set flag & return
64         IsFirstAlignment = false;
65         return true;
66     }
67   
68     // if same reference
69     if ( al.RefID == CurrentId ) {
70       
71         // if same position, store and move on
72         if ( al.Position == CurrentPosition )
73             CurrentAlignments.push_back(al);
74         
75         // if less than CurrentPosition - sorting error => ABORT
76         else if ( al.Position < CurrentPosition ) {
77             cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
78             return false;
79         }
80         
81         // else print pileup data until 'catching up' to CurrentPosition
82         else {
83             while ( al.Position > CurrentPosition ) {
84                 ApplyVisitors();
85                 ++CurrentPosition;
86             }
87             CurrentAlignments.push_back(al);
88         }
89     } 
90
91     // if reference ID less than CurrentId - sorting error => ABORT
92     else if ( al.RefID < CurrentId ) {
93         cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
94         return false;
95     }
96
97     // else moved forward onto next reference
98     else {
99         
100         // print any remaining pileup data from previous reference
101         while ( !CurrentAlignments.empty() ) {
102             ApplyVisitors();
103             ++CurrentPosition;
104         }
105         
106         // store first entry on this new reference, update markers
107         CurrentAlignments.clear();
108         CurrentAlignments.push_back(al);
109         CurrentId = al.RefID;
110         CurrentPosition = al.Position;
111     }
112   
113     return true;
114 }
115
116 void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
117   
118     // parse CIGAR data in BamAlignments to build up current pileup data
119     CreatePileupData();
120   
121     // apply all visitors to current alignment set
122     vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
123     vector<PileupVisitor*>::const_iterator visitorEnd  = Visitors.end();
124     for ( ; visitorIter != visitorEnd; ++visitorIter ) 
125         (*visitorIter)->Visit(CurrentPileupData);
126 }
127
128 void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
129  
130     // remove any data that ends before CurrentPosition
131     size_t i = 0;
132     while ( i < CurrentAlignments.size() ) {
133       
134         // remove alignment if it ends before CurrentPosition
135         const int endPosition = CurrentAlignments[i].GetEndPosition();
136         if ( endPosition < CurrentPosition )
137             CurrentAlignments.erase(CurrentAlignments.begin() + i);
138         else
139             ++i;
140     }
141 }
142
143 void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
144   
145     // remove any non-overlapping alignments
146     ClearOldData();
147   
148     // set pileup refId, position to current markers
149     CurrentPileupData.RefId = CurrentId;
150     CurrentPileupData.Position = CurrentPosition;
151     CurrentPileupData.PileupAlignments.clear();
152     
153     // parse CIGAR data in remaining alignments 
154     vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
155     vector<BamAlignment>::const_iterator alEnd  = CurrentAlignments.end(); 
156     for ( ; alIter != alEnd; ++alIter )
157         ParseAlignmentCigar( (*alIter) );
158 }
159
160 void PileupEngine::PileupEnginePrivate::Flush(void) {
161     while ( !CurrentAlignments.empty() ) {
162         ApplyVisitors();
163         ++CurrentPosition;
164     }
165 }
166
167 void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
168   
169     // skip if unmapped
170     if ( !al.IsMapped() ) return;
171     
172     // intialize local variables
173     int  genomePosition      = al.Position;
174     int  positionInAlignment = 0;
175     bool isNewReadSegment    = true;
176     bool saveAlignment       = true;    
177     PileupAlignment pileupAlignment(al);
178     
179     // iterate over CIGAR operations
180     const int numCigarOps = (const int)al.CigarData.size();
181     for (int i = 0; i < numCigarOps; ++i ) { 
182         const CigarOp& op = al.CigarData.at(i);
183       
184         // if op is MATCH
185         if ( op.Type == 'M' ) {
186           
187             // if match op overlaps current position
188             if ( genomePosition + (int)op.Length > CurrentPosition ) {
189               
190                 // set pileup data
191                 pileupAlignment.IsCurrentDeletion   = false;
192                 pileupAlignment.IsNextDeletion      = false;
193                 pileupAlignment.IsNextInsertion     = false;
194                 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
195                 
196                 // check for beginning of read segment
197                 if ( genomePosition == CurrentPosition && isNewReadSegment ) 
198                     pileupAlignment.IsSegmentBegin = true;
199                 
200                 // if we're at the end of a match operation
201                 if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
202                     
203                     // if not last operation
204                     if ( i < numCigarOps - 1 ) {
205                         
206                         // check next CIGAR op
207                         const CigarOp& nextOp = al.CigarData.at(i+1);
208                         
209                         // if next CIGAR op is DELETION
210                         if ( nextOp.Type == 'D') {
211                             pileupAlignment.IsNextDeletion = true;
212                             pileupAlignment.DeletionLength = nextOp.Length;
213                         }
214                         
215                         // if next CIGAR op is INSERTION
216                         else if ( nextOp.Type == 'I' ) {
217                             pileupAlignment.IsNextInsertion = true;
218                             pileupAlignment.InsertionLength = nextOp.Length;
219                         }
220                             
221                         // if next CIGAR op is either DELETION or INSERTION
222                         if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
223
224                             // if there is a CIGAR op after the DEL/INS
225                             if ( i < numCigarOps - 2 ) {
226                                 const CigarOp& nextNextOp = al.CigarData.at(i+2);
227                                 
228                                 // if next CIGAR op is clipping or ref_skip
229                                 if ( nextNextOp.Type == 'S' || 
230                                      nextNextOp.Type == 'N' ||
231                                      nextNextOp.Type == 'H' )
232                                     pileupAlignment.IsSegmentEnd = true;
233                             } 
234                             else {
235                                 pileupAlignment.IsSegmentEnd = true;
236                                 
237                                 // if next CIGAR op is clipping or ref_skip
238                                 if ( nextOp.Type == 'S' || 
239                                      nextOp.Type == 'N' ||
240                                      nextOp.Type == 'H' )
241                                     pileupAlignment.IsSegmentEnd = true;
242                             }
243                         }
244                         
245                         // otherwise
246                         else { 
247                         
248                             // if next CIGAR op is clipping or ref_skip
249                             if ( nextOp.Type == 'S' || 
250                                  nextOp.Type == 'N' ||
251                                  nextOp.Type == 'H' )
252                                 pileupAlignment.IsSegmentEnd = true;
253                         }
254                     }
255                     
256                     // else this is last operation
257                     else pileupAlignment.IsSegmentEnd = true;
258                 }
259             }
260           
261             // increment markers
262             genomePosition      += op.Length;
263             positionInAlignment += op.Length;
264         } 
265         
266         // if op is DELETION
267         else if ( op.Type == 'D' ) {
268           
269             // if deletion op overlaps current position
270             if ( genomePosition + (int)op.Length > CurrentPosition ) {
271               
272                 // set pileup data
273                 pileupAlignment.IsCurrentDeletion   = true;
274                 pileupAlignment.IsNextDeletion      = false;
275                 pileupAlignment.IsNextInsertion     = true;
276                 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
277             }
278             
279             // increment marker
280             genomePosition += op.Length;
281         }
282
283         // if op is REF_SKIP
284         else if ( op.Type == 'N' ) {
285             genomePosition += op.Length;
286         }
287         
288         // if op is INSERTION or SOFT_CLIP
289         else if ( op.Type == 'I' || op.Type == 'S' ) {
290             positionInAlignment += op.Length;
291         }
292         
293         // checl for beginning of new read segment
294         if ( op.Type == 'N' ||
295              op.Type == 'S' ||
296              op.Type == 'H' )
297             isNewReadSegment = true;
298         else 
299             isNewReadSegment = false;
300       
301         // if we've moved beyond current position
302         if ( genomePosition > CurrentPosition ) {
303             if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
304             break;
305         }
306     }
307
308     // save pileup position if flag is true
309     if ( saveAlignment )
310         CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
311 }
312
313 // ---------------------------------------------
314 // PileupEngine implementation
315
316 PileupEngine::PileupEngine(void)
317     : d( new PileupEnginePrivate )
318 { }
319
320 PileupEngine::~PileupEngine(void) {
321     delete d;
322     d = 0;
323 }
324
325 bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
326 void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
327 void PileupEngine::Flush(void) { d->Flush(); }