]> git.donarmstrong.com Git - bamtools.git/blob - src/utils/bamtools_pileup_engine.cpp
Added UTILS_EXPORT macro to classes in BamTools utility library
[bamtools.git] / src / utils / bamtools_pileup_engine.cpp
1 // ***************************************************************************
2 // bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010
7 // ---------------------------------------------------------------------------
8 // Provides pileup at position functionality for various tools.
9 // ***************************************************************************
10
11 #include <utils/bamtools_pileup_engine.h>
12 using namespace BamTools;
13
14 #include <iostream>
15 using namespace std;
16
17 // ---------------------------------------------
18 // PileupEnginePrivate implementation
19
20 struct PileupEngine::PileupEnginePrivate {
21   
22     // data members
23     int CurrentId;
24     int CurrentPosition;
25     vector<BamAlignment> CurrentAlignments;
26     PileupPosition CurrentPileupData;
27     
28     bool IsFirstAlignment;
29     vector<PileupVisitor*> Visitors;
30   
31     // ctor & dtor
32     PileupEnginePrivate(void)
33         : CurrentId(-1)
34         , CurrentPosition(-1)
35         , IsFirstAlignment(true)
36     { }
37     ~PileupEnginePrivate(void) { }
38     
39     // 'public' methods
40     bool AddAlignment(const BamAlignment& al);
41     void Flush(void);
42     
43     // internal methods
44     private:
45         void ApplyVisitors(void);
46         void ClearOldData(void);
47         void CreatePileupData(void);
48         void ParseAlignmentCigar(const BamAlignment& al);
49 };
50
51 bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
52   
53     // if first time
54     if ( IsFirstAlignment ) {
55       
56         // set initial markers 
57         CurrentId       = al.RefID;
58         CurrentPosition = al.Position;
59         
60         // store first entry
61         CurrentAlignments.clear();
62         CurrentAlignments.push_back(al);
63         
64         // set flag & return
65         IsFirstAlignment = false;
66         return true;
67     }
68   
69     // if same reference
70     if ( al.RefID == CurrentId ) {
71       
72         // if same position, store and move on
73         if ( al.Position == CurrentPosition )
74             CurrentAlignments.push_back(al);
75         
76         // if less than CurrentPosition - sorting error => ABORT
77         else if ( al.Position < CurrentPosition ) {
78             cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
79             return false;
80         }
81         
82         // else print pileup data until 'catching up' to CurrentPosition
83         else {
84             while ( al.Position > CurrentPosition ) {
85                 ApplyVisitors();
86                 ++CurrentPosition;
87             }
88             CurrentAlignments.push_back(al);
89         }
90     } 
91
92     // if reference ID less than CurrentId - sorting error => ABORT
93     else if ( al.RefID < CurrentId ) {
94         cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
95         return false;
96     }
97
98     // else moved forward onto next reference
99     else {
100         
101         // print any remaining pileup data from previous reference
102         while ( !CurrentAlignments.empty() ) {
103             ApplyVisitors();
104             ++CurrentPosition;
105         }
106         
107         // store first entry on this new reference, update markers
108         CurrentAlignments.clear();
109         CurrentAlignments.push_back(al);
110         CurrentId = al.RefID;
111         CurrentPosition = al.Position;
112     }
113   
114     return true;
115 }
116
117 void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
118   
119     // parse CIGAR data in BamAlignments to build up current pileup data
120     CreatePileupData();
121   
122     // apply all visitors to current alignment set
123     vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
124     vector<PileupVisitor*>::const_iterator visitorEnd  = Visitors.end();
125     for ( ; visitorIter != visitorEnd; ++visitorIter ) 
126         (*visitorIter)->Visit(CurrentPileupData);
127 }
128
129 void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
130  
131     // remove any data that ends before CurrentPosition
132     size_t i = 0;
133     while ( i < CurrentAlignments.size() ) {
134       
135         // remove alignment if it ends before CurrentPosition
136         const int endPosition = CurrentAlignments[i].GetEndPosition();
137         if ( endPosition < CurrentPosition )
138             CurrentAlignments.erase(CurrentAlignments.begin() + i);
139         else
140             ++i;
141     }
142 }
143
144 void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
145   
146     // remove any non-overlapping alignments
147     ClearOldData();
148   
149     // set pileup refId, position to current markers
150     CurrentPileupData.RefId = CurrentId;
151     CurrentPileupData.Position = CurrentPosition;
152     CurrentPileupData.PileupAlignments.clear();
153     
154     // parse CIGAR data in remaining alignments 
155     vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
156     vector<BamAlignment>::const_iterator alEnd  = CurrentAlignments.end(); 
157     for ( ; alIter != alEnd; ++alIter )
158         ParseAlignmentCigar( (*alIter) );
159 }
160
161 void PileupEngine::PileupEnginePrivate::Flush(void) {
162     while ( !CurrentAlignments.empty() ) {
163         ApplyVisitors();
164         ++CurrentPosition;
165     }
166 }
167
168 void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
169   
170     // skip if unmapped
171     if ( !al.IsMapped() ) return;
172     
173     // intialize local variables
174     int  genomePosition      = al.Position;
175     int  positionInAlignment = 0;
176     bool isNewReadSegment    = true;
177     bool saveAlignment       = true;    
178     PileupAlignment pileupAlignment(al);
179     
180     // iterate over CIGAR operations
181     const int numCigarOps = (const int)al.CigarData.size();
182     for (int i = 0; i < numCigarOps; ++i ) { 
183         const CigarOp& op = al.CigarData.at(i);
184       
185         // if op is MATCH
186         if ( op.Type == 'M' ) {
187           
188             // if match op overlaps current position
189             if ( genomePosition + (int)op.Length > CurrentPosition ) {
190               
191                 // set pileup data
192                 pileupAlignment.IsCurrentDeletion   = false;
193                 pileupAlignment.IsNextDeletion      = false;
194                 pileupAlignment.IsNextInsertion     = false;
195                 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
196                 
197                 // check for beginning of read segment
198                 if ( genomePosition == CurrentPosition && isNewReadSegment ) 
199                     pileupAlignment.IsSegmentBegin = true;
200                 
201                 // if we're at the end of a match operation
202                 if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
203                     
204                     // if not last operation
205                     if ( i < numCigarOps - 1 ) {
206                         
207                         // check next CIGAR op
208                         const CigarOp& nextOp = al.CigarData.at(i+1);
209                         
210                         // if next CIGAR op is DELETION
211                         if ( nextOp.Type == 'D') {
212                             pileupAlignment.IsNextDeletion = true;
213                             pileupAlignment.DeletionLength = nextOp.Length;
214                         }
215                         
216                         // if next CIGAR op is INSERTION
217                         else if ( nextOp.Type == 'I' ) {
218                             pileupAlignment.IsNextInsertion = true;
219                             pileupAlignment.InsertionLength = nextOp.Length;
220                         }
221                             
222                         // if next CIGAR op is either DELETION or INSERTION
223                         if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
224
225                             // if there is a CIGAR op after the DEL/INS
226                             if ( i < numCigarOps - 2 ) {
227                                 const CigarOp& nextNextOp = al.CigarData.at(i+2);
228                                 
229                                 // if next CIGAR op is clipping or ref_skip
230                                 if ( nextNextOp.Type == 'S' || 
231                                      nextNextOp.Type == 'N' ||
232                                      nextNextOp.Type == 'H' )
233                                     pileupAlignment.IsSegmentEnd = true;
234                             } 
235                             else {
236                                 pileupAlignment.IsSegmentEnd = true;
237                                 
238                                 // if next CIGAR op is clipping or ref_skip
239                                 if ( nextOp.Type == 'S' || 
240                                      nextOp.Type == 'N' ||
241                                      nextOp.Type == 'H' )
242                                     pileupAlignment.IsSegmentEnd = true;
243                             }
244                         }
245                         
246                         // otherwise
247                         else { 
248                         
249                             // if next CIGAR op is clipping or ref_skip
250                             if ( nextOp.Type == 'S' || 
251                                  nextOp.Type == 'N' ||
252                                  nextOp.Type == 'H' )
253                                 pileupAlignment.IsSegmentEnd = true;
254                         }
255                     }
256                     
257                     // else this is last operation
258                     else pileupAlignment.IsSegmentEnd = true;
259                 }
260             }
261           
262             // increment markers
263             genomePosition      += op.Length;
264             positionInAlignment += op.Length;
265         } 
266         
267         // if op is DELETION
268         else if ( op.Type == 'D' ) {
269           
270             // if deletion op overlaps current position
271             if ( genomePosition + (int)op.Length > CurrentPosition ) {
272               
273                 // set pileup data
274                 pileupAlignment.IsCurrentDeletion   = true;
275                 pileupAlignment.IsNextDeletion      = false;
276                 pileupAlignment.IsNextInsertion     = true;
277                 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
278             }
279             
280             // increment marker
281             genomePosition += op.Length;
282         }
283
284         // if op is REF_SKIP
285         else if ( op.Type == 'N' ) {
286             genomePosition += op.Length;
287         }
288         
289         // if op is INSERTION or SOFT_CLIP
290         else if ( op.Type == 'I' || op.Type == 'S' ) {
291             positionInAlignment += op.Length;
292         }
293         
294         // checl for beginning of new read segment
295         if ( op.Type == 'N' ||
296              op.Type == 'S' ||
297              op.Type == 'H' )
298             isNewReadSegment = true;
299         else 
300             isNewReadSegment = false;
301       
302         // if we've moved beyond current position
303         if ( genomePosition > CurrentPosition ) {
304             if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
305             break;
306         }
307     }
308
309     // save pileup position if flag is true
310     if ( saveAlignment )
311         CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
312 }
313
314 // ---------------------------------------------
315 // PileupEngine implementation
316
317 PileupEngine::PileupEngine(void)
318     : d( new PileupEnginePrivate )
319 { }
320
321 PileupEngine::~PileupEngine(void) {
322     delete d;
323     d = 0;
324 }
325
326 bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
327 void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
328 void PileupEngine::Flush(void) { d->Flush(); }