1 // ***************************************************************************
2 // bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 9 March 2012 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides pileup at position functionality for various tools.
8 // ***************************************************************************
10 #include "utils/bamtools_pileup_engine.h"
11 using namespace BamTools;
16 // ---------------------------------------------
17 // PileupEnginePrivate implementation
19 struct PileupEngine::PileupEnginePrivate {
24 vector<BamAlignment> CurrentAlignments;
25 PileupPosition CurrentPileupData;
27 bool IsFirstAlignment;
28 vector<PileupVisitor*> Visitors;
31 PileupEnginePrivate(void)
34 , IsFirstAlignment(true)
36 ~PileupEnginePrivate(void) { }
39 bool AddAlignment(const BamAlignment& al);
44 void ApplyVisitors(void);
45 void ClearOldData(void);
46 void CreatePileupData(void);
47 void ParseAlignmentCigar(const BamAlignment& al);
50 bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
53 if ( IsFirstAlignment ) {
55 // set initial markers
57 CurrentPosition = al.Position;
60 CurrentAlignments.clear();
61 CurrentAlignments.push_back(al);
64 IsFirstAlignment = false;
69 if ( al.RefID == CurrentId ) {
71 // if same position, store and move on
72 if ( al.Position == CurrentPosition )
73 CurrentAlignments.push_back(al);
75 // if less than CurrentPosition - sorting error => ABORT
76 else if ( al.Position < CurrentPosition ) {
77 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
81 // else print pileup data until 'catching up' to CurrentPosition
83 while ( al.Position > CurrentPosition ) {
87 CurrentAlignments.push_back(al);
91 // if reference ID less than CurrentId - sorting error => ABORT
92 else if ( al.RefID < CurrentId ) {
93 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
97 // else moved forward onto next reference
100 // print any remaining pileup data from previous reference
101 while ( !CurrentAlignments.empty() ) {
106 // store first entry on this new reference, update markers
107 CurrentAlignments.clear();
108 CurrentAlignments.push_back(al);
109 CurrentId = al.RefID;
110 CurrentPosition = al.Position;
116 void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
118 // parse CIGAR data in BamAlignments to build up current pileup data
121 // apply all visitors to current alignment set
122 vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
123 vector<PileupVisitor*>::const_iterator visitorEnd = Visitors.end();
124 for ( ; visitorIter != visitorEnd; ++visitorIter )
125 (*visitorIter)->Visit(CurrentPileupData);
128 void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
130 // remove any alignments that end before our CurrentPosition
131 // N.B. - BAM positions are 0-based, half-open. GetEndPosition() returns a 1-based position,
132 // while our CurrentPosition is 0-based. For example, an alignment with 'endPosition' of
133 // 100 does not overlap a 'CurrentPosition' of 100, and should be discarded.
137 const size_t numAlignments = CurrentAlignments.size();
138 while ( i < numAlignments ) {
140 // skip over alignment if its (1-based) endPosition is <= to (0-based) CurrentPosition
141 // i.e. this entry will not be saved upon vector resize
142 const int endPosition = CurrentAlignments[i].GetEndPosition();
143 if ( endPosition <= CurrentPosition ) {
148 // otherwise alignment ends after CurrentPosition
149 // move it towards vector beginning, at index j
151 CurrentAlignments[j] = CurrentAlignments[i];
153 // increment our indices
158 // 'squeeze' vector to size j, discarding all remaining alignments in the container
159 CurrentAlignments.resize(j);
162 void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
164 // remove any non-overlapping alignments
167 // set pileup refId, position to current markers
168 CurrentPileupData.RefId = CurrentId;
169 CurrentPileupData.Position = CurrentPosition;
170 CurrentPileupData.PileupAlignments.clear();
172 // parse CIGAR data in remaining alignments
173 vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
174 vector<BamAlignment>::const_iterator alEnd = CurrentAlignments.end();
175 for ( ; alIter != alEnd; ++alIter )
176 ParseAlignmentCigar( (*alIter) );
179 void PileupEngine::PileupEnginePrivate::Flush(void) {
180 while ( !CurrentAlignments.empty() ) {
186 void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
189 if ( !al.IsMapped() ) return;
191 // intialize local variables
192 int genomePosition = al.Position;
193 int positionInAlignment = 0;
194 bool isNewReadSegment = true;
195 bool saveAlignment = true;
196 PileupAlignment pileupAlignment(al);
198 // iterate over CIGAR operations
199 const int numCigarOps = (const int)al.CigarData.size();
200 for (int i = 0; i < numCigarOps; ++i ) {
201 const CigarOp& op = al.CigarData.at(i);
204 if ( op.Type == 'M' ) {
206 // if match op overlaps current position
207 if ( genomePosition + (int)op.Length > CurrentPosition ) {
210 pileupAlignment.IsCurrentDeletion = false;
211 pileupAlignment.IsNextDeletion = false;
212 pileupAlignment.IsNextInsertion = false;
213 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
215 // check for beginning of read segment
216 if ( genomePosition == CurrentPosition && isNewReadSegment )
217 pileupAlignment.IsSegmentBegin = true;
219 // if we're at the end of a match operation
220 if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
222 // if not last operation
223 if ( i < numCigarOps - 1 ) {
225 // check next CIGAR op
226 const CigarOp& nextOp = al.CigarData.at(i+1);
228 // if next CIGAR op is DELETION
229 if ( nextOp.Type == 'D') {
230 pileupAlignment.IsNextDeletion = true;
231 pileupAlignment.DeletionLength = nextOp.Length;
234 // if next CIGAR op is INSERTION
235 else if ( nextOp.Type == 'I' ) {
236 pileupAlignment.IsNextInsertion = true;
237 pileupAlignment.InsertionLength = nextOp.Length;
240 // if next CIGAR op is either DELETION or INSERTION
241 if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
243 // if there is a CIGAR op after the DEL/INS
244 if ( i < numCigarOps - 2 ) {
245 const CigarOp& nextNextOp = al.CigarData.at(i+2);
247 // if next CIGAR op is clipping or ref_skip
248 if ( nextNextOp.Type == 'S' ||
249 nextNextOp.Type == 'N' ||
250 nextNextOp.Type == 'H' )
251 pileupAlignment.IsSegmentEnd = true;
254 pileupAlignment.IsSegmentEnd = true;
256 // if next CIGAR op is clipping or ref_skip
257 if ( nextOp.Type == 'S' ||
258 nextOp.Type == 'N' ||
260 pileupAlignment.IsSegmentEnd = true;
267 // if next CIGAR op is clipping or ref_skip
268 if ( nextOp.Type == 'S' ||
269 nextOp.Type == 'N' ||
271 pileupAlignment.IsSegmentEnd = true;
275 // else this is last operation
276 else pileupAlignment.IsSegmentEnd = true;
281 genomePosition += op.Length;
282 positionInAlignment += op.Length;
286 else if ( op.Type == 'D' ) {
288 // if deletion op overlaps current position
289 if ( genomePosition + (int)op.Length > CurrentPosition ) {
292 pileupAlignment.IsCurrentDeletion = true;
293 pileupAlignment.IsNextDeletion = false;
294 pileupAlignment.IsNextInsertion = true;
295 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
299 genomePosition += op.Length;
303 else if ( op.Type == 'N' ) {
304 genomePosition += op.Length;
307 // if op is INSERTION or SOFT_CLIP
308 else if ( op.Type == 'I' || op.Type == 'S' ) {
309 positionInAlignment += op.Length;
312 // checl for beginning of new read segment
313 if ( op.Type == 'N' ||
316 isNewReadSegment = true;
318 isNewReadSegment = false;
320 // if we've moved beyond current position
321 if ( genomePosition > CurrentPosition ) {
322 if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
327 // save pileup position if flag is true
329 CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
332 // ---------------------------------------------
333 // PileupEngine implementation
335 PileupEngine::PileupEngine(void)
336 : d( new PileupEnginePrivate )
339 PileupEngine::~PileupEngine(void) {
344 bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
345 void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
346 void PileupEngine::Flush(void) { d->Flush(); }