1 // ***************************************************************************
2 // bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010
7 // ---------------------------------------------------------------------------
8 // Provides pileup at position functionality for various tools.
9 // ***************************************************************************
11 #include <utils/bamtools_pileup_engine.h>
12 using namespace BamTools;
17 // ---------------------------------------------
18 // PileupEnginePrivate implementation
20 struct PileupEngine::PileupEnginePrivate {
25 vector<BamAlignment> CurrentAlignments;
26 PileupPosition CurrentPileupData;
28 bool IsFirstAlignment;
29 vector<PileupVisitor*> Visitors;
32 PileupEnginePrivate(void)
35 , IsFirstAlignment(true)
37 ~PileupEnginePrivate(void) { }
40 bool AddAlignment(const BamAlignment& al);
45 void ApplyVisitors(void);
46 void ClearOldData(void);
47 void CreatePileupData(void);
48 void ParseAlignmentCigar(const BamAlignment& al);
51 bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
54 if ( IsFirstAlignment ) {
56 // set initial markers
58 CurrentPosition = al.Position;
61 CurrentAlignments.clear();
62 CurrentAlignments.push_back(al);
65 IsFirstAlignment = false;
70 if ( al.RefID == CurrentId ) {
72 // if same position, store and move on
73 if ( al.Position == CurrentPosition )
74 CurrentAlignments.push_back(al);
76 // if less than CurrentPosition - sorting error => ABORT
77 else if ( al.Position < CurrentPosition ) {
78 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
82 // else print pileup data until 'catching up' to CurrentPosition
84 while ( al.Position > CurrentPosition ) {
88 CurrentAlignments.push_back(al);
92 // if reference ID less than CurrentId - sorting error => ABORT
93 else if ( al.RefID < CurrentId ) {
94 cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
98 // else moved forward onto next reference
101 // print any remaining pileup data from previous reference
102 while ( !CurrentAlignments.empty() ) {
107 // store first entry on this new reference, update markers
108 CurrentAlignments.clear();
109 CurrentAlignments.push_back(al);
110 CurrentId = al.RefID;
111 CurrentPosition = al.Position;
117 void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
119 // parse CIGAR data in BamAlignments to build up current pileup data
122 // apply all visitors to current alignment set
123 vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
124 vector<PileupVisitor*>::const_iterator visitorEnd = Visitors.end();
125 for ( ; visitorIter != visitorEnd; ++visitorIter )
126 (*visitorIter)->Visit(CurrentPileupData);
129 void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
131 // remove any data that ends before CurrentPosition
133 while ( i < CurrentAlignments.size() ) {
135 // remove alignment if it ends before CurrentPosition
136 const int endPosition = CurrentAlignments[i].GetEndPosition();
137 if ( endPosition < CurrentPosition )
138 CurrentAlignments.erase(CurrentAlignments.begin() + i);
144 void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
146 // remove any non-overlapping alignments
149 // set pileup refId, position to current markers
150 CurrentPileupData.RefId = CurrentId;
151 CurrentPileupData.Position = CurrentPosition;
152 CurrentPileupData.PileupAlignments.clear();
154 // parse CIGAR data in remaining alignments
155 vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
156 vector<BamAlignment>::const_iterator alEnd = CurrentAlignments.end();
157 for ( ; alIter != alEnd; ++alIter )
158 ParseAlignmentCigar( (*alIter) );
161 void PileupEngine::PileupEnginePrivate::Flush(void) {
162 while ( !CurrentAlignments.empty() ) {
168 void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
171 if ( !al.IsMapped() ) return;
173 // intialize local variables
174 int genomePosition = al.Position;
175 int positionInAlignment = 0;
176 bool isNewReadSegment = true;
177 bool saveAlignment = true;
178 PileupAlignment pileupAlignment(al);
180 // iterate over CIGAR operations
181 const int numCigarOps = (const int)al.CigarData.size();
182 for (int i = 0; i < numCigarOps; ++i ) {
183 const CigarOp& op = al.CigarData.at(i);
186 if ( op.Type == 'M' ) {
188 // if match op overlaps current position
189 if ( genomePosition + (int)op.Length > CurrentPosition ) {
192 pileupAlignment.IsCurrentDeletion = false;
193 pileupAlignment.IsNextDeletion = false;
194 pileupAlignment.IsNextInsertion = false;
195 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
197 // check for beginning of read segment
198 if ( genomePosition == CurrentPosition && isNewReadSegment )
199 pileupAlignment.IsSegmentBegin = true;
201 // if we're at the end of a match operation
202 if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
204 // if not last operation
205 if ( i < numCigarOps - 1 ) {
207 // check next CIGAR op
208 const CigarOp& nextOp = al.CigarData.at(i+1);
210 // if next CIGAR op is DELETION
211 if ( nextOp.Type == 'D') {
212 pileupAlignment.IsNextDeletion = true;
213 pileupAlignment.DeletionLength = nextOp.Length;
216 // if next CIGAR op is INSERTION
217 else if ( nextOp.Type == 'I' ) {
218 pileupAlignment.IsNextInsertion = true;
219 pileupAlignment.InsertionLength = nextOp.Length;
222 // if next CIGAR op is either DELETION or INSERTION
223 if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
225 // if there is a CIGAR op after the DEL/INS
226 if ( i < numCigarOps - 2 ) {
227 const CigarOp& nextNextOp = al.CigarData.at(i+2);
229 // if next CIGAR op is clipping or ref_skip
230 if ( nextNextOp.Type == 'S' ||
231 nextNextOp.Type == 'N' ||
232 nextNextOp.Type == 'H' )
233 pileupAlignment.IsSegmentEnd = true;
236 pileupAlignment.IsSegmentEnd = true;
238 // if next CIGAR op is clipping or ref_skip
239 if ( nextOp.Type == 'S' ||
240 nextOp.Type == 'N' ||
242 pileupAlignment.IsSegmentEnd = true;
249 // if next CIGAR op is clipping or ref_skip
250 if ( nextOp.Type == 'S' ||
251 nextOp.Type == 'N' ||
253 pileupAlignment.IsSegmentEnd = true;
257 // else this is last operation
258 else pileupAlignment.IsSegmentEnd = true;
263 genomePosition += op.Length;
264 positionInAlignment += op.Length;
268 else if ( op.Type == 'D' ) {
270 // if deletion op overlaps current position
271 if ( genomePosition + (int)op.Length > CurrentPosition ) {
274 pileupAlignment.IsCurrentDeletion = true;
275 pileupAlignment.IsNextDeletion = false;
276 pileupAlignment.IsNextInsertion = true;
277 pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
281 genomePosition += op.Length;
285 else if ( op.Type == 'N' ) {
286 genomePosition += op.Length;
289 // if op is INSERTION or SOFT_CLIP
290 else if ( op.Type == 'I' || op.Type == 'S' ) {
291 positionInAlignment += op.Length;
294 // checl for beginning of new read segment
295 if ( op.Type == 'N' ||
298 isNewReadSegment = true;
300 isNewReadSegment = false;
302 // if we've moved beyond current position
303 if ( genomePosition > CurrentPosition ) {
304 if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
309 // save pileup position if flag is true
311 CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
314 // ---------------------------------------------
315 // PileupEngine implementation
317 PileupEngine::PileupEngine(void)
318 : d( new PileupEnginePrivate )
321 PileupEngine::~PileupEngine(void) {
326 bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
327 void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
328 void PileupEngine::Flush(void) { d->Flush(); }