+// ***************************************************************************
+// bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 16 September 2010
+// ---------------------------------------------------------------------------
+// Provides pileup at position functionality for various tools.
+// ***************************************************************************
+
+#include <iostream>
+#include "bamtools_pileup_engine.h"
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// PileupEnginePrivate implementation
+
+struct PileupEngine::PileupEnginePrivate {
+
+ // data members
+ int CurrentId;
+ int CurrentPosition;
+ vector<BamAlignment> CurrentAlignments;
+ PileupPosition CurrentPileupData;
+
+ bool IsFirstAlignment;
+ vector<PileupVisitor*> Visitors;
+
+ // ctor & dtor
+ PileupEnginePrivate(void)
+ : CurrentId(-1)
+ , CurrentPosition(-1)
+ , IsFirstAlignment(true)
+ { }
+ ~PileupEnginePrivate(void) { }
+
+ // 'public' methods
+ bool AddAlignment(const BamAlignment& al);
+ void Flush(void);
+
+ // internal methods
+ private:
+ void ApplyVisitors(void);
+ void ClearOldData(void);
+ void CreatePileupData(void);
+ void ParseAlignmentCigar(const BamAlignment& al);
+};
+
+bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
+
+ // if first time
+ if ( IsFirstAlignment ) {
+
+ // set initial markers
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+
+ // store first entry
+ CurrentAlignments.clear();
+ CurrentAlignments.push_back(al);
+
+ // set flag & return
+ IsFirstAlignment = false;
+ return true;
+ }
+
+ // if same reference
+ if ( al.RefID == CurrentId ) {
+
+ // if same position, store and move on
+ if ( al.Position == CurrentPosition )
+ CurrentAlignments.push_back(al);
+
+ // if less than CurrentPosition - sorting error => ABORT
+ else if ( al.Position < CurrentPosition ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else print pileup data until 'catching up' to CurrentPosition
+ else {
+ while ( al.Position > CurrentPosition ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+ CurrentAlignments.push_back(al);
+ }
+ }
+
+ // if reference ID less than CurrentId - sorting error => ABORT
+ else if ( al.RefID < CurrentId ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else moved forward onto next reference
+ else {
+
+ // print any remaining pileup data from previous reference
+ while ( !CurrentAlignments.empty() ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+
+ // store first entry on this new reference, update markers
+ CurrentAlignments.clear();
+ CurrentAlignments.push_back(al);
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+ }
+
+ return true;
+}
+
+void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
+
+ // parse CIGAR data in BamAlignments to build up current pileup data
+ CreatePileupData();
+
+ // apply all visitors to current alignment set
+ vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
+ vector<PileupVisitor*>::const_iterator visitorEnd = Visitors.end();
+ for ( ; visitorIter != visitorEnd; ++visitorIter )
+ (*visitorIter)->Visit(CurrentPileupData);
+}
+
+void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
+
+ // remove any data that ends before CurrentPosition
+ size_t i = 0;
+ while ( i < CurrentAlignments.size() ) {
+
+ // remove alignment if it ends before CurrentPosition
+ const int endPosition = CurrentAlignments[i].GetEndPosition();
+ if ( endPosition < CurrentPosition )
+ CurrentAlignments.erase(CurrentAlignments.begin() + i);
+ else
+ ++i;
+ }
+}
+
+void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
+
+ // remove any non-overlapping alignments
+ ClearOldData();
+
+ // set pileup refId, position to current markers
+ CurrentPileupData.RefId = CurrentId;
+ CurrentPileupData.Position = CurrentPosition;
+ CurrentPileupData.PileupAlignments.clear();
+
+ // parse CIGAR data in remaining alignments
+ vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
+ vector<BamAlignment>::const_iterator alEnd = CurrentAlignments.end();
+ for ( ; alIter != alEnd; ++alIter )
+ ParseAlignmentCigar( (*alIter) );
+}
+
+void PileupEngine::PileupEnginePrivate::Flush(void) {
+ while ( !CurrentAlignments.empty() ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+}
+
+void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
+
+ // skip if unmapped
+ if ( !al.IsMapped() ) return;
+
+ // intialize local variables
+ int genomePosition = al.Position;
+ int positionInAlignment = 0;
+ bool isNewReadSegment = true;
+ bool saveAlignment = true;
+ PileupAlignment pileupAlignment(al);
+
+ // iterate over CIGAR operations
+ const int numCigarOps = (const int)al.CigarData.size();
+ for (int i = 0; i < numCigarOps; ++i ) {
+ const CigarOp& op = al.CigarData.at(i);
+
+ // if op is MATCH
+ if ( op.Type == 'M' ) {
+
+ // if match op overlaps current position
+ if ( genomePosition + (int)op.Length > CurrentPosition ) {
+
+ // set pileup data
+ pileupAlignment.IsCurrentDeletion = false;
+ pileupAlignment.IsNextDeletion = false;
+ pileupAlignment.IsNextInsertion = false;
+ pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+
+ // check for beginning of read segment
+ if ( genomePosition == CurrentPosition && isNewReadSegment )
+ pileupAlignment.IsSegmentBegin = true;
+
+ // if we're at the end of a match operation
+ if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
+
+ // if not last operation
+ if ( i < numCigarOps - 1 ) {
+
+ // check next CIGAR op
+ const CigarOp& nextOp = al.CigarData.at(i+1);
+
+ // if next CIGAR op is DELETION
+ if ( nextOp.Type == 'D') {
+ pileupAlignment.IsNextDeletion = true;
+ pileupAlignment.DeletionLength = nextOp.Length;
+ }
+
+ // if next CIGAR op is INSERTION
+ else if ( nextOp.Type == 'I' ) {
+ pileupAlignment.IsNextInsertion = true;
+ pileupAlignment.InsertionLength = nextOp.Length;
+ }
+
+ // if next CIGAR op is either DELETION or INSERTION
+ if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
+
+ // if there is a CIGAR op after the DEL/INS
+ if ( i < numCigarOps - 2 ) {
+ const CigarOp& nextNextOp = al.CigarData.at(i+2);
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextNextOp.Type == 'S' ||
+ nextNextOp.Type == 'N' ||
+ nextNextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ else {
+ pileupAlignment.IsSegmentEnd = true;
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextOp.Type == 'S' ||
+ nextOp.Type == 'N' ||
+ nextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // otherwise
+ else {
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextOp.Type == 'S' ||
+ nextOp.Type == 'N' ||
+ nextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // else this is last operation
+ else pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // increment markers
+ genomePosition += op.Length;
+ positionInAlignment += op.Length;
+ }
+
+ // if op is DELETION
+ else if ( op.Type == 'D' ) {
+
+ // if deletion op overlaps current position
+ if ( genomePosition + (int)op.Length > CurrentPosition ) {
+
+ // set pileup data
+ pileupAlignment.IsCurrentDeletion = true;
+ pileupAlignment.IsNextDeletion = false;
+ pileupAlignment.IsNextInsertion = true;
+ pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+ }
+
+ // increment marker
+ genomePosition += op.Length;
+ }
+
+ // if op is REF_SKIP
+ else if ( op.Type == 'N' ) {
+ genomePosition += op.Length;
+ }
+
+ // if op is INSERTION or SOFT_CLIP
+ else if ( op.Type == 'I' || op.Type == 'S' ) {
+ positionInAlignment += op.Length;
+ }
+
+ // checl for beginning of new read segment
+ if ( op.Type == 'N' ||
+ op.Type == 'S' ||
+ op.Type == 'H' )
+ isNewReadSegment = true;
+ else
+ isNewReadSegment = false;
+
+ // if we've moved beyond current position
+ if ( genomePosition > CurrentPosition ) {
+ if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
+ break;
+ }
+ }
+
+ // save pileup position if flag is true
+ if ( saveAlignment )
+ CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
+}
+
+// ---------------------------------------------
+// PileupEngine implementation
+
+PileupEngine::PileupEngine(void)
+ : d( new PileupEnginePrivate )
+{ }
+
+PileupEngine::~PileupEngine(void) {
+ delete d;
+ d = 0;
+}
+
+bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
+void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
+void PileupEngine::Flush(void) { d->Flush(); }