1 // ***************************************************************************
2 // bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 16 September 2010
7 // ---------------------------------------------------------------------------
8 // Provides pileup at position functionality for various tools.
9 // ***************************************************************************
12 #include "bamtools_pileup_engine.h"
14 using namespace BamTools;
16 // ---------------------------------------------
17 // PileupEnginePrivate implementation
19 struct PileupEngine::PileupEnginePrivate {
24 vector<BamAlignment> CurrentAlignments;
25 PileupPosition CurrentPileupData;
27 bool IsFirstAlignment;
28 vector<PileupVisitor*> Visitors;
31 PileupEnginePrivate(void)
34 , IsFirstAlignment(true)
36 ~PileupEnginePrivate(void) { }
39 bool AddAlignment(const BamAlignment& al);
44 void ApplyVisitors(void);
45 void ClearOldData(void);
46 void CreatePileupData(void);
47 void ParseAlignmentCigar(const BamAlignment& al);
50 bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
53 if ( IsFirstAlignment ) {
55 // set initial markers
57 CurrentPosition = al.Position;
60 CurrentAlignments.clear();
61 CurrentAlignments.push_back(al);
64 IsFirstAlignment = false;
69 if ( al.RefID == CurrentId ) {
71 // if same position, store and move on
72 if ( al.Position == CurrentPosition )
73 CurrentAlignments.push_back(al);
75 // if less than CurrentPosition - sorting error => ABORT
76 else if ( al.Position < CurrentPosition ) {
77 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
81 // else print pileup data until 'catching up' to CurrentPosition
83 while ( al.Position > CurrentPosition ) {
87 CurrentAlignments.push_back(al);
91 // if reference ID less than CurrentId - sorting error => ABORT
92 else if ( al.RefID < CurrentId ) {
93 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
97 // else moved forward onto next reference
100 // print any remaining pileup data from previous reference
101 while ( !CurrentAlignments.empty() ) {
106 // store first entry on this new reference, update markers
107 CurrentAlignments.clear();
108 CurrentAlignments.push_back(al);
109 CurrentId = al.RefID;
110 CurrentPosition = al.Position;
116 void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
118 // parse CIGAR data in BamAlignments to build up current pileup data
121 // apply all visitors to current alignment set
122 vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
123 vector<PileupVisitor*>::const_iterator visitorEnd = Visitors.end();
124 for ( ; visitorIter != visitorEnd; ++visitorIter )
125 (*visitorIter)->Visit(CurrentPileupData);
128 void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
130 // remove any data that ends before CurrentPosition
132 while ( i < CurrentAlignments.size() ) {
134 // remove alignment if it ends before CurrentPosition
135 const int endPosition = CurrentAlignments[i].GetEndPosition();
136 if ( endPosition < CurrentPosition )
137 CurrentAlignments.erase(CurrentAlignments.begin() + i);
143 void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
145 // remove any non-overlapping alignments
148 // set pileup refId, position to current markers
149 CurrentPileupData.RefId = CurrentId;
150 CurrentPileupData.Position = CurrentPosition;
151 CurrentPileupData.PileupAlignments.clear();
153 // parse CIGAR data in remaining alignments
154 vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
155 vector<BamAlignment>::const_iterator alEnd = CurrentAlignments.end();
156 for ( ; alIter != alEnd; ++alIter )
157 ParseAlignmentCigar( (*alIter) );
160 void PileupEngine::PileupEnginePrivate::Flush(void) {
161 while ( !CurrentAlignments.empty() ) {
167 void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
170 if ( !al.IsMapped() ) return;
172 // intialize local variables
173 int genomePosition = al.Position;
174 int positionInAlignment = 0;
175 bool isNewReadSegment = true;
176 bool saveAlignment = true;
177 PileupAlignment pileupAlignment(al);
179 // iterate over CIGAR operations
180 const int numCigarOps = (const int)al.CigarData.size();
181 for (int i = 0; i < numCigarOps; ++i ) {
182 const CigarOp& op = al.CigarData.at(i);
185 if ( op.Type == 'M' ) {
187 // if match op overlaps current position
188 if ( genomePosition + (int)op.Length > CurrentPosition ) {
191 pileupAlignment.IsCurrentDeletion = false;
192 pileupAlignment.IsNextDeletion = false;
193 pileupAlignment.IsNextInsertion = false;
194 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
196 // check for beginning of read segment
197 if ( genomePosition == CurrentPosition && isNewReadSegment )
198 pileupAlignment.IsSegmentBegin = true;
200 // if we're at the end of a match operation
201 if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
203 // if not last operation
204 if ( i < numCigarOps - 1 ) {
206 // check next CIGAR op
207 const CigarOp& nextOp = al.CigarData.at(i+1);
209 // if next CIGAR op is DELETION
210 if ( nextOp.Type == 'D') {
211 pileupAlignment.IsNextDeletion = true;
212 pileupAlignment.DeletionLength = nextOp.Length;
215 // if next CIGAR op is INSERTION
216 else if ( nextOp.Type == 'I' ) {
217 pileupAlignment.IsNextInsertion = true;
218 pileupAlignment.InsertionLength = nextOp.Length;
221 // if next CIGAR op is either DELETION or INSERTION
222 if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
224 // if there is a CIGAR op after the DEL/INS
225 if ( i < numCigarOps - 2 ) {
226 const CigarOp& nextNextOp = al.CigarData.at(i+2);
228 // if next CIGAR op is clipping or ref_skip
229 if ( nextNextOp.Type == 'S' ||
230 nextNextOp.Type == 'N' ||
231 nextNextOp.Type == 'H' )
232 pileupAlignment.IsSegmentEnd = true;
235 pileupAlignment.IsSegmentEnd = true;
237 // if next CIGAR op is clipping or ref_skip
238 if ( nextOp.Type == 'S' ||
239 nextOp.Type == 'N' ||
241 pileupAlignment.IsSegmentEnd = true;
248 // if next CIGAR op is clipping or ref_skip
249 if ( nextOp.Type == 'S' ||
250 nextOp.Type == 'N' ||
252 pileupAlignment.IsSegmentEnd = true;
256 // else this is last operation
257 else pileupAlignment.IsSegmentEnd = true;
262 genomePosition += op.Length;
263 positionInAlignment += op.Length;
267 else if ( op.Type == 'D' ) {
269 // if deletion op overlaps current position
270 if ( genomePosition + (int)op.Length > CurrentPosition ) {
273 pileupAlignment.IsCurrentDeletion = true;
274 pileupAlignment.IsNextDeletion = false;
275 pileupAlignment.IsNextInsertion = true;
276 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
280 genomePosition += op.Length;
284 else if ( op.Type == 'N' ) {
285 genomePosition += op.Length;
288 // if op is INSERTION or SOFT_CLIP
289 else if ( op.Type == 'I' || op.Type == 'S' ) {
290 positionInAlignment += op.Length;
293 // checl for beginning of new read segment
294 if ( op.Type == 'N' ||
297 isNewReadSegment = true;
299 isNewReadSegment = false;
301 // if we've moved beyond current position
302 if ( genomePosition > CurrentPosition ) {
303 if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
308 // save pileup position if flag is true
310 CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
313 // ---------------------------------------------
314 // PileupEngine implementation
316 PileupEngine::PileupEngine(void)
317 : d( new PileupEnginePrivate )
320 PileupEngine::~PileupEngine(void) {
325 bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
326 void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
327 void PileupEngine::Flush(void) { d->Flush(); }