]> git.donarmstrong.com Git - bamtools.git/commitdiff
Added new PileupEngine to the toolkit. This is used by CoverageTool as well as Conver...
authorDerek <derekwbarnett@gmail.com>
Thu, 16 Sep 2010 05:37:28 +0000 (01:37 -0400)
committerDerek <derekwbarnett@gmail.com>
Thu, 16 Sep 2010 05:37:28 +0000 (01:37 -0400)
src/toolkit/bamtools_convert.cpp
src/toolkit/bamtools_coverage.cpp
src/toolkit/bamtools_coverage.h
src/utils/Makefile
src/utils/bamtools_pileup.cpp [deleted file]
src/utils/bamtools_pileup.h [deleted file]
src/utils/bamtools_pileup_engine.cpp [new file with mode: 0644]
src/utils/bamtools_pileup_engine.h [new file with mode: 0644]

index edfc83c6761823430531bc672b355ab1a28e2a71..1929da6442d5741791201df79041ba63ee6e77b4 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 22 July 2010
+// Last modified: 16 September 2010
 // ---------------------------------------------------------------------------
 // Converts between BAM and a number of other formats
 // ***************************************************************************
 #include <sstream>
 #include <string>
 #include <vector>
-
 #include "bamtools_convert.h"
+#include "bamtools_fasta.h"
 #include "bamtools_options.h"
-#include "bamtools_pileup.h"
+#include "bamtools_pileup_engine.h"
 #include "bamtools_utilities.h"
 #include "BGZF.h"
-#include "BamReader.h"
 #include "BamMultiReader.h"
-
 using namespace std;
 using namespace BamTools;
   
 namespace BamTools { 
-    // format names
+  
+    // ---------------------------------------------
+    // ConvertTool constants
+  
+    // supported conversion format command-line names
     static const string FORMAT_BED      = "bed";
     static const string FORMAT_BEDGRAPH = "bedgraph";
     static const string FORMAT_FASTA    = "fasta";
@@ -40,36 +41,36 @@ namespace BamTools {
     // other constants
     static const unsigned int FASTA_LINE_MAX = 50;
     
-} // namespace BamTools
-  
-struct ConvertTool::ConvertToolPrivate {
-  
-    // ctor & dtor
-    public:
-        ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
-        ~ConvertToolPrivate(void);
+    // ---------------------------------------------
+    // ConvertPileupFormatVisitor declaration
     
-    // interface
-    public:
-        bool Run(void);
+    class ConvertPileupFormatVisitor : public PileupVisitor {
+      
+        // ctor & dtor
+        public:
+            ConvertPileupFormatVisitor(const RefVector& references, 
+                                       const string& fastaFilename,
+                                       const bool isPrintingMapQualities,
+                                       ostream* out);
+            ~ConvertPileupFormatVisitor(void);
+      
+        // PileupVisitor interface implementation
+        public:
+//             void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments);
+            
+            void Visit(const PileupPosition& pileupData);
+            
+        // data members
+        private:
+            Fasta     m_fasta;
+            bool      m_hasFasta;
+            bool      m_isPrintingMapQualities;
+            ostream*  m_out;
+            RefVector m_references;
+    };
     
-    // internal methods
-    private:
-        void PrintBed(const BamAlignment& a);
-        void PrintBedGraph(const BamAlignment& a);
-        void PrintFasta(const BamAlignment& a);
-        void PrintFastq(const BamAlignment& a);
-        void PrintJson(const BamAlignment& a);
-        void PrintSam(const BamAlignment& a);
-        void PrintWiggle(const BamAlignment& a);
-        
-    // data members
-    private: 
-        ConvertTool::ConvertSettings* m_settings;
-        RefVector m_references;
-        ostream m_out;
-};
-
+} // namespace BamTools
+  
 // ---------------------------------------------
 // ConvertSettings implementation
 
@@ -105,66 +106,43 @@ struct ConvertTool::ConvertSettings {
         , IsOmittingSamHeader(false)
         , IsPrintingPileupMapQualities(false)
         , OutputFilename(Options::StandardOut())
+        , FastaFilename("")
     { } 
-};  
+};    
 
 // ---------------------------------------------
-// ConvertTool implementation
-
-ConvertTool::ConvertTool(void)
-    : AbstractTool()
-    , m_settings(new ConvertSettings)
-    , m_impl(0)
-{
-    // set program details
-    Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [other options]");
-    
-    // set up options 
-    OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
-    Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,   m_settings->InputFiles,     IO_Opts, Options::StandardIn());
-    Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutput,  m_settings->OutputFilename, IO_Opts, Options::StandardOut());
-    Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
-   
-    OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
-    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more  details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
-    
-    OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
-    Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
-    Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
-    
-    OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
-    Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
-}
-
-ConvertTool::~ConvertTool(void) {
-    delete m_settings;
-    m_settings = 0;
-    
-    delete m_impl;
-    m_impl = 0;
-}
-
-int ConvertTool::Help(void) {
-    Options::DisplayHelp();
-    return 0;
-}
-
-int ConvertTool::Run(int argc, char* argv[]) {
+// ConvertToolPrivate implementation  
   
-    // parse command line arguments
-    Options::Parse(argc, argv, 1);
-    
-    // run internal ConvertTool implementation, return success/fail
-    m_impl = new ConvertToolPrivate(m_settings);
+struct ConvertTool::ConvertToolPrivate {
+  
+    // ctor & dtor
+    public:
+        ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
+        ~ConvertToolPrivate(void);
     
-    if ( m_impl->Run() ) 
-        return 0;
-    else 
-        return 1;
-}
-
-// ---------------------------------------------
-// ConvertToolPrivate implementation
+    // interface
+    public:
+        bool Run(void);
+        
+    // internal methods
+    private:
+        void PrintBed(const BamAlignment& a);
+        void PrintBedGraph(const BamAlignment& a);
+        void PrintFasta(const BamAlignment& a);
+        void PrintFastq(const BamAlignment& a);
+        void PrintJson(const BamAlignment& a);
+        void PrintSam(const BamAlignment& a);
+        void PrintWiggle(const BamAlignment& a);
+        
+        // special case - uses the PileupEngine
+        bool RunPileupConversion(BamMultiReader* reader);
+        
+    // data members
+    private: 
+        ConvertTool::ConvertSettings* m_settings;
+        RefVector m_references;
+        ostream m_out;
+};
 
 ConvertTool::ConvertToolPrivate::ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
     : m_settings(settings)
@@ -175,8 +153,6 @@ ConvertTool::ConvertToolPrivate::~ConvertToolPrivate(void) { }
 
 bool ConvertTool::ConvertToolPrivate::Run(void) {
  
-    bool convertedOk = true;
-  
     // ------------------------------------
     // initialize conversion input/output
         
@@ -193,8 +169,13 @@ bool ConvertTool::ConvertToolPrivate::Run(void) {
     BamRegion region;
     if ( m_settings->HasRegion ) {
         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
-            if ( !reader.SetRegion(region) )
-              cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+            if ( !reader.SetRegion(region) ) {
+                cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+                return false;
+            }
+        } else {
+            cerr << "Could not parse REGION: " << m_settings->Region << endl;
+            return false;
         }
     }
         
@@ -213,33 +194,22 @@ bool ConvertTool::ConvertToolPrivate::Run(void) {
         m_out.rdbuf(outFile.rdbuf()); 
     }
     
-    // ------------------------
+    // -------------------------------------
+    // do conversion based on format
+    
+     bool convertedOk = true;
+    
     // pileup is special case
-    if ( m_settings->Format == FORMAT_PILEUP ) {
-        
-        // initialize pileup input/output
-        Pileup pileup(&reader, &m_out);
-        
-        // ---------------------------
-        // configure pileup settings
-        
-        if ( m_settings->HasRegion ) 
-            pileup.SetRegion(region);
-        
-        if ( m_settings->HasFastaFilename ) 
-            pileup.SetFastaFilename(m_settings->FastaFilename);
-        
-        pileup.SetIsPrintingMapQualities( m_settings->IsPrintingPileupMapQualities );
-        
-        // run pileup
-        convertedOk = pileup.Run();
-    }
+    // conversion not done per alignment, like the other formats
+    if ( m_settings->Format == FORMAT_PILEUP )
+        convertedOk = RunPileupConversion(&reader);
     
-    // -------------------------------------
-    // else determine 'simpler' format type
+    // all other formats
     else {
     
         bool formatError = false;
+        
+        // set function pointer to proper conversion method
         void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
         if      ( m_settings->Format == FORMAT_BED )      pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
         else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph;
@@ -255,24 +225,26 @@ bool ConvertTool::ConvertToolPrivate::Run(void) {
             convertedOk = false;
         }
         
-        // if SAM format & not omitting header, print SAM header
-        if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) {
-            string headerText = reader.GetHeaderText();
-            m_out << headerText;
-        }
-        
-        // ------------------------
-        // do conversion
+        // if format selected ok
         if ( !formatError ) {
+        
+            // if SAM format & not omitting header, print SAM header first
+            if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) 
+                m_out << reader.GetHeaderText();
+            
+            // iterate through file, doing conversion
             BamAlignment a;
-            while ( reader.GetNextAlignment(a) ) {
-                (this->*pFunction)(a);
-            }
+            while ( reader.GetNextAlignment(a) ) 
+              (this->*pFunction)(a);
+            
+            // set flag for successful conversion
+            convertedOk = true;
         }
     }
     
     // ------------------------
     // clean up & exit
+    
     reader.Close();
     if ( m_settings->HasOutput ) outFile.close();
     return convertedOk;   
@@ -282,7 +254,7 @@ bool ConvertTool::ConvertToolPrivate::Run(void) {
 // Conversion/output methods
 // ----------------------------------------------------------
 
-void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a)      
+void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) { 
   
     // tab-delimited, 0-based half-open 
     // (e.g. a 50-base read aligned to pos 10 could have BED coordinates (10, 60) instead of BAM coordinates (10, 59) )
@@ -485,7 +457,6 @@ void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) {
     }
 
     m_out << "}" << endl;
-    
 }
 
 // print BamAlignment in SAM format
@@ -617,3 +588,236 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) {
 void ConvertTool::ConvertToolPrivate::PrintWiggle(const BamAlignment& a) { 
     ; 
 }
+
+bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
+  
+    // check for valid BamMultiReader
+    if ( reader == 0 ) return false;
+  
+    // set up our pileup format 'visitor'
+    ConvertPileupFormatVisitor* v = new ConvertPileupFormatVisitor(m_references, 
+                                                                   m_settings->FastaFilename,
+                                                                   m_settings->IsPrintingPileupMapQualities, 
+                                                                   &m_out);
+
+    // set up PileupEngine
+    PileupEngine pileup;
+    pileup.AddVisitor(v);
+    
+    // iterate through data
+    BamAlignment al;
+    while ( reader->GetNextAlignment(al) )
+        pileup.AddAlignment(al);
+    pileup.Flush();
+    
+    // clean up
+    delete v;
+    v = 0;
+    
+    // return success
+    return true;
+}       
+
+// ---------------------------------------------
+// ConvertTool implementation
+
+ConvertTool::ConvertTool(void)
+    : AbstractTool()
+    , m_settings(new ConvertSettings)
+    , m_impl(0)
+{
+    // set program details
+    Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [other options]");
+    
+    // set up options 
+    OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+    Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,   m_settings->InputFiles,     IO_Opts, Options::StandardIn());
+    Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutput,  m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+    Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
+   
+    OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
+    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more  details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+    
+    OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
+    Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
+    Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
+    
+    OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
+    Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
+}
+
+ConvertTool::~ConvertTool(void) {
+    delete m_settings;
+    m_settings = 0;
+    
+    delete m_impl;
+    m_impl = 0;
+}
+
+int ConvertTool::Help(void) {
+    Options::DisplayHelp();
+    return 0;
+}
+
+int ConvertTool::Run(int argc, char* argv[]) {
+  
+    // parse command line arguments
+    Options::Parse(argc, argv, 1);
+    
+    // run internal ConvertTool implementation, return success/fail
+    m_impl = new ConvertToolPrivate(m_settings);
+    
+    if ( m_impl->Run() ) 
+        return 0;
+    else 
+        return 1;
+}
+
+// ---------------------------------------------
+// ConvertPileupFormatVisitor implementation
+
+ConvertPileupFormatVisitor::ConvertPileupFormatVisitor(const RefVector& references, 
+                                                       const string& fastaFilename,
+                                                       const bool isPrintingMapQualities,
+                                                       ostream* out)
+    : PileupVisitor()
+    , m_hasFasta(false)
+    , m_isPrintingMapQualities(isPrintingMapQualities)
+    , m_out(out)
+    , m_references(references)
+{ 
+    // set up Fasta reader if file is provided
+    if ( !fastaFilename.empty() ) {
+      
+        // check for FASTA index
+        string indexFilename = "";
+        if ( Utilities::FileExists(fastaFilename + ".fai") ) 
+            indexFilename = fastaFilename + ".fai";
+      
+        // open FASTA file
+        if ( m_fasta.Open(fastaFilename, indexFilename) ) 
+            m_hasFasta = true;
+    }
+}
+
+ConvertPileupFormatVisitor::~ConvertPileupFormatVisitor(void) { 
+    // be sure to close Fasta reader
+    if ( m_hasFasta ) {
+        m_fasta.Close();
+        m_hasFasta = false;
+    }
+}
+
+void ConvertPileupFormatVisitor::Visit(const PileupPosition& pileupData ) {
+  
+    // skip if no alignments at this position
+    if ( pileupData.PileupAlignments.empty() ) return;
+  
+    // retrieve reference name
+    const string& referenceName = m_references[pileupData.RefId].RefName;
+    const int& position   = pileupData.Position;
+    
+    // retrieve reference base from FASTA file, if one provided; otherwise default to 'N'
+    char referenceBase('N');
+    if ( m_hasFasta && (pileupData.Position < m_references[pileupData.RefId].RefLength) ) {
+        if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position, referenceBase ) ) {
+            cerr << "Pileup error : Could not read reference base from FASTA file" << endl;
+            return;
+        }
+    }
+    
+    // get count of alleles at this position
+    const int numberAlleles = pileupData.PileupAlignments.size();
+    
+    // -----------------------------------------------------------
+    // build strings based on alleles at this positionInAlignment
+    
+    stringstream bases("");
+    stringstream baseQualities("");
+    stringstream mapQualities("");
+    
+    // iterate over alignments at this pileup position
+    vector<PileupAlignment>::const_iterator pileupIter = pileupData.PileupAlignments.begin();
+    vector<PileupAlignment>::const_iterator pileupEnd  = pileupData.PileupAlignments.end();
+    for ( ; pileupIter != pileupEnd; ++pileupIter ) {
+        const PileupAlignment pa = (*pileupIter);
+        const BamAlignment& ba = pa.Alignment;
+        
+        // if beginning of read segment
+        if ( pa.IsSegmentBegin )
+            bases << '^' << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+        
+        // if current base is not a DELETION
+        if ( !pa.IsCurrentDeletion ) {
+          
+            // get base at current position
+            char base = ba.QueryBases.at(pa.PositionInAlignment);
+            
+            // if base matches reference
+            if ( base == '=' || 
+                 toupper(base) == toupper(referenceBase) ||
+                 tolower(base) == tolower(referenceBase) ) 
+            {
+                base = (ba.IsReverseStrand() ? ',' : '.' );
+            }
+            
+            // mismatches reference
+            else base = (ba.IsReverseStrand() ? tolower(base) : toupper(base) );
+            
+            // store base
+            bases << base;
+          
+            // if next position contains insertion
+            if ( pa.IsNextInsertion ) {
+                bases << '+' << pa.InsertionLength;
+                for (int i = 1; i <= pa.InsertionLength; ++i) {
+                    char insertedBase = (char)ba.QueryBases.at(pa.PositionInAlignment + i);
+                    bases << (ba.IsReverseStrand() ? (char)tolower(insertedBase) : (char)toupper(insertedBase) );
+                }
+            }
+            
+            // if next position contains DELETION
+            else if ( pa.IsNextDeletion ) {
+                bases << '-' << pa.DeletionLength;
+                for (int i = 1; i <= pa.DeletionLength; ++i) {
+                    char deletedBase('N');
+                    if ( m_hasFasta && (pileupData.Position+i < m_references[pileupData.RefId].RefLength) ) {
+                        if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position+i, deletedBase ) ) {
+                            cerr << "Pileup error : Could not read reference base from FASTA file" << endl;
+                            return;
+                        }
+                    }
+                    bases << (ba.IsReverseStrand() ? (char)tolower(deletedBase) : (char)toupper(deletedBase) );
+                }
+            }
+        }
+        
+        // otherwise, DELETION
+        else bases << '*';
+        
+        // if end of read segment
+        if ( pa.IsSegmentEnd ) bases << '$';
+        
+        // store current base quality
+        baseQualities << ba.Qualities.at(pa.PositionInAlignment);
+        
+        // save alignment map quality if desired
+        if ( m_isPrintingMapQualities )
+            mapQualities << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+    }
+    
+    // ----------------------
+    // print results 
+    
+    // tab-delimited
+    // <refName> <1-based pos> <refBase> <numberAlleles> <bases> <qualities> [mapQuals]
+    
+    const string TAB = "\t";
+    *m_out << referenceName       << TAB 
+           << position + 1        << TAB 
+           << referenceBase       << TAB 
+           << numberAlleles       << TAB 
+           << bases.str()         << TAB 
+           << baseQualities.str() << TAB
+           << mapQualities.str()  << endl;
+}
index b587445747cb7d2be054b841b432ca2a93a84e1d..5924edb78a85b93af928f56610dd0b9d8be2a007 100644 (file)
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 16 September 2010
 // ---------------------------------------------------------------------------
-// Prints coverage statistics for a single BAM file 
-//
-// ** Expand to multiple?? 
-//
+// Prints coverage data for a single BAM file 
 // ***************************************************************************
 
 #include <iostream>
+#include <fstream>
 #include <string>
 #include <vector>
-
 #include "bamtools_coverage.h"
 #include "bamtools_options.h"
+#include "bamtools_pileup_engine.h"
 #include "BamReader.h"
-
 using namespace std;
 using namespace BamTools; 
   
+namespace BamTools {
+// ---------------------------------------------  
+// CoverageVisitor implementation 
+  
+class CoverageVisitor : public PileupVisitor {
+  
+    public:
+        CoverageVisitor(const RefVector& references, ostream* out)
+            : PileupVisitor()
+            , m_references(references)
+            , m_out(out)
+        { }
+        ~CoverageVisitor(void) { }
+  
+    // PileupVisitor interface implementation
+    public:
+//         void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments) {
+        void Visit(const PileupPosition& pileupData) {
+            // -----------------------------------------
+            // print coverage results ( tab-delimited )
+            // <refName> <0-based pos> <number of alleles>
+//             *m_out << m_references[refId].RefName << "\t" << position << "\t" << alignments.size() << endl;
+            *m_out << m_references[pileupData.RefId].RefName << "\t" 
+                   << pileupData.Position << "\t" 
+                   << pileupData.PileupAlignments.size() << endl;
+        }
+        
+    private:
+        RefVector m_references;
+        ostream*  m_out;
+};
+
+} // namespace BamTools
+
 // ---------------------------------------------  
 // CoverageSettings implementation
 
 struct CoverageTool::CoverageSettings {
 
     // flags
-    bool HasInputBamFilename;
+    bool HasInputFile;
+    bool HasOutputFile;
 
     // filenames
-    std::string InputBamFilename;
+    string InputBamFilename;
+    string OutputFilename;
     
     // constructor
     CoverageSettings(void)
-        : HasInputBamFilename(false)
+        : HasInputFile(false)
+        , HasOutputFile(false)
         , InputBamFilename(Options::StandardIn())
+        , OutputFilename(Options::StandardOut())
     { } 
 };  
 
+// ---------------------------------------------
+// CoverageToolPrivate implementation
+
+struct CoverageTool::CoverageToolPrivate {
+  
+    // ctor & dtor
+    public:
+        CoverageToolPrivate(CoverageTool::CoverageSettings* settings);
+        ~CoverageToolPrivate(void);
+    
+    // interface
+    public:
+        bool Run(void);
+        
+    // data members
+    private: 
+        CoverageTool::CoverageSettings* m_settings;
+        ostream m_out;
+        RefVector m_references;
+};  
+
+CoverageTool::CoverageToolPrivate::CoverageToolPrivate(CoverageTool::CoverageSettings* settings)
+    : m_settings(settings)
+    , m_out(cout.rdbuf()) // default output to cout
+{ }
+
+CoverageTool::CoverageToolPrivate::~CoverageToolPrivate(void) { }  
+  
+bool CoverageTool::CoverageToolPrivate::Run(void) {  
+  
+    // if output filename given
+    ofstream outFile;
+    if ( m_settings->HasOutputFile ) {
+      
+        // open output file stream
+        outFile.open(m_settings->OutputFilename.c_str());
+        if ( !outFile ) {
+            cerr << "Could not open " << m_settings->OutputFilename << " for output." << endl; 
+            return false; 
+        }
+        
+        // set m_out to file's streambuf
+        m_out.rdbuf(outFile.rdbuf()); 
+    } 
+    
+    //open our BAM reader
+    BamReader reader;
+    reader.Open(m_settings->InputBamFilename);
+    m_references = reader.GetReferenceData();
+    
+    // set up our output 'visitor'
+    CoverageVisitor* cv = new CoverageVisitor(m_references, &m_out);
+    
+    // set up pileup engine with 'visitor'
+    PileupEngine pileup;
+    pileup.AddVisitor(cv);
+    
+    // process input data
+    BamAlignment al;    
+    while ( reader.GetNextAlignment(al) ) 
+        pileup.AddAlignment(al);
+    
+    // clean up 
+    reader.Close();
+    if ( m_settings->HasOutputFile ) outFile.close();
+    delete cv;
+    cv = 0;
+    
+    // return success
+    return true;
+}
+
 // ---------------------------------------------
 // CoverageTool implementation
 
 CoverageTool::CoverageTool(void) 
     : AbstractTool()
     , m_settings(new CoverageSettings)
+    , m_impl(0)
 { 
     // set program details
-    Options::SetProgramInfo("bamtools coverage", "prints coverage stats for a BAM file", "-in <filename> ");
+    Options::SetProgramInfo("bamtools coverage", "prints coverage data for a single BAM file", "[-in <filename>] [-out <filename>]");
     
     // set up options 
     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
-    Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+    Options::AddValueOption("-in",  "BAM filename", "the input BAM file", "", m_settings->HasInputFile,  m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+    Options::AddValueOption("-out", "filename",     "the output file",    "", m_settings->HasOutputFile, m_settings->OutputFilename,   IO_Opts, Options::StandardOut());
 }
 
 CoverageTool::~CoverageTool(void) { 
     delete m_settings;
     m_settings = 0;
+    
+    delete m_impl;
+    m_impl = 0;
 }
 
 int CoverageTool::Help(void) { 
@@ -69,16 +182,12 @@ int CoverageTool::Run(int argc, char* argv[]) {
 
     // parse command line arguments
     Options::Parse(argc, argv, 1);
-
-    //open our BAM reader
-    BamReader reader;
-    reader.Open(m_settings->InputBamFilename);
-
-    // generate coverage stats
-    cerr << "Generating coverage stats for " << m_settings->InputBamFilename << endl;
-    cerr << "FEATURE NOT YET IMPLEMENTED!" << endl;
-
-    // clean & exit
-    reader.Close();
-    return 0;
-}
\ No newline at end of file
+    
+    // run internal ConvertTool implementation, return success/fail
+    m_impl = new CoverageToolPrivate(m_settings);
+    
+    if ( m_impl->Run() ) 
+        return 0;
+    else 
+        return 1;
+}  
index c3714c0d02af818d0c7eeb64e99161ff0bea02b6..e9e8a712d3277ed28b8c067bcdc94948cd4db540 100644 (file)
@@ -3,12 +3,9 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 1 August 2010
 // ---------------------------------------------------------------------------
-// Prints coverage statistics for a single BAM file 
-//
-// ** Expand to multiple?? 
-//
+// Prints coverage data for a single BAM file 
 // ***************************************************************************
 
 #ifndef BAMTOOLS_COVERAGE_H
@@ -31,6 +28,9 @@ class CoverageTool : public AbstractTool {
     private:  
         struct CoverageSettings;
         CoverageSettings* m_settings;
+        
+        struct CoverageToolPrivate;
+        CoverageToolPrivate* m_impl;
 };
   
 } // namespace BamTools
index 7e13cddbdcb79535f5366a3874edb24859c9a796..d56bfe01e5d783effe72f332d6cc20c89eef2cdc 100644 (file)
@@ -17,7 +17,7 @@ INCLUDES = -I$(API_DIR)/
 SOURCES = bamtools_fasta.cpp \
           bamtools_filter_engine.cpp \
           bamtools_options.cpp \
-          bamtools_pileup.cpp \
+         bamtools_pileup_engine.cpp \
           bamtools_utilities.cpp 
 OBJECTS= $(SOURCES:.cpp=.o)
 BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
diff --git a/src/utils/bamtools_pileup.cpp b/src/utils/bamtools_pileup.cpp
deleted file mode 100644 (file)
index 1862289..0000000
+++ /dev/null
@@ -1,231 +0,0 @@
-// ***************************************************************************
-// bamtools_pileup.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 13 July 2010
-// ---------------------------------------------------------------------------
-// Provides pileup conversion functionality.  
-// 
-// The 'assembly' aspect of pileup makes this more complicated than the 
-// simpler one-to-one conversion methods for other formats.
-// ***************************************************************************
-
-#include <vector>
-#include "BamMultiReader.h"
-#include "bamtools_pileup.h"
-using namespace std;
-using namespace BamTools;
-
-struct Pileup::PileupPrivate {
-  
-    // ---------------------
-    // data members
-  
-    // IO & settings
-    BamMultiReader* Reader;
-    ostream* OutStream;
-    string FastaFilename;
-    bool IsPrintingMapQualities;
-    BamRegion Region;
-    
-    // parsing data
-    int CurrentId;
-    int CurrentPosition;
-    vector<BamAlignment> CurrentData;
-    RefVector References;
-    
-    // ----------------------
-    // ctor
-    
-    PileupPrivate(BamMultiReader* reader, ostream* outStream)
-        : Reader(reader)
-        , OutStream(outStream)
-        , FastaFilename("")
-        , IsPrintingMapQualities(false)
-    { }
-    
-    // ----------------------
-    // internal methods
-    
-    void PrintCurrentData(void);
-    bool Run(void);
-};
-
-void Pileup::PileupPrivate::PrintCurrentData(void) {
-  
-    // remove any data that ends before CurrentPosition
-    size_t i = 0;
-    while ( i < CurrentData.size() ) {
-        if ( CurrentData[i].GetEndPosition() < CurrentPosition )
-            CurrentData.erase(CurrentData.begin() + i);
-        else
-            ++i;
-    }
-  
-    // if not data remains, return
-    if ( CurrentData.empty() ) return;
-    
-    // initialize empty strings
-    string bases     = "";
-    string baseQuals = "";
-    string mapQuals  = "";
-    
-    // iterate over alignments
-    vector<BamAlignment>::const_iterator dataIter = CurrentData.begin();
-    vector<BamAlignment>::const_iterator dataEnd  = CurrentData.end();
-    for ( ; dataIter != dataEnd; ++dataIter ) {
-        
-        // retrieve alignment
-        const BamAlignment& al = (*dataIter);
-        
-        // determine current base character & store
-        const char base = al.AlignedBases[CurrentPosition -al.Position];
-        if ( al.IsReverseStrand() ) 
-            bases.push_back( tolower(base) );
-        else 
-            bases.push_back( toupper(base) );
-        
-        // determine current base quality & store
-        baseQuals.push_back( al.Qualities[CurrentPosition - al.Position] );
-        
-        // if using mapQuals, determine current mapQual & store
-        if ( IsPrintingMapQualities ) {
-            int mapQuality = (int)(al.MapQuality + 33);
-            if ( mapQuality > 126 ) mapQuality = 126; 
-            mapQuals.push_back((char)mapQuality);
-        }
-    }
-    
-    // print results to OutStream
-    const string& refName = References[CurrentId].RefName;
-    const char refBase = 'N';
-    
-    *OutStream << refName << "\t" << CurrentPosition << "\t" << refBase << "\t" << CurrentData.size() << "\t" << bases << "\t" << baseQuals;
-    if ( IsPrintingMapQualities ) *OutStream << "\t" << mapQuals;
-    *OutStream << endl;
-}
-
-bool Pileup::PileupPrivate::Run(void) {
-  
-    // -----------------------------
-    // validate input & output 
-    
-    if ( !Reader ) {
-        cerr << "Pileup::Run() : Invalid multireader" << endl;
-        return false;
-    }
-    
-    if ( !OutStream) { 
-        cerr << "Pileup::Run() : Invalid output stream" << endl;
-        return false;
-    }
-    
-    References = Reader->GetReferenceData();
-    
-    // -----------------------------
-    // process input data
-    
-    // get first entry    
-    BamAlignment al;
-    if ( !Reader->GetNextAlignment(al) ) {
-        cerr << "Pileup::Run() : Could not read from multireader" << endl;
-        return false;
-    }
-    
-    // set initial markers & store first entry
-    CurrentId = al.RefID;
-    CurrentPosition = al.Position;
-    CurrentData.clear();
-    CurrentData.push_back(al);
-    
-    // iterate over remaining data
-    while ( Reader->GetNextAlignment(al) ) {
-        
-        // if same reference
-        if ( al.RefID == CurrentId ) {
-          
-            // if same position, store and move on
-            if ( al.Position == CurrentPosition )
-                CurrentData.push_back(al);
-            
-            // if less than CurrentPosition - sorting error => ABORT
-            else if ( al.Position < CurrentPosition ) {
-                cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
-                return false;
-            }
-            
-            // else print pileup data until 'catching up' to CurrentPosition
-            else {
-                while ( al.Position > CurrentPosition ) {
-                    PrintCurrentData();
-                    ++CurrentPosition;
-                }
-                CurrentData.push_back(al);
-            }
-        } 
-        
-        // if reference ID less than CurrentID - sorting error => ABORT
-        else if ( al.RefID < CurrentId ) {
-            cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
-            return false;
-        }
-        
-        // else moved forward onto next reference
-        else {
-            
-            // print any remaining pileup data from previous reference
-            while ( !CurrentData.empty() ) {
-                PrintCurrentData();
-                ++CurrentPosition;
-            }
-            
-            // store first entry on this new reference, update markers
-            CurrentData.clear();
-            CurrentData.push_back(al);
-            CurrentId = al.RefID;
-            CurrentPosition = al.Position;
-        }
-    }
-    
-    // ------------------------------------
-    // handle  any remaining data entries
-    
-    while ( !CurrentData.empty() ) {
-        PrintCurrentData();
-        ++CurrentPosition;
-    }
-  
-    // -------------------------
-    // return success
-    
-    return true;
-}
-
-// ----------------------------------------------------------
-// Pileup implementation
-
-Pileup::Pileup(BamMultiReader* reader, ostream* outStream) {
-    d = new PileupPrivate(reader, outStream);
-}
-
-Pileup::~Pileup(void) {
-    delete d;
-    d = 0;
-}
-
-bool Pileup::Run(void) {
-    return d->Run();
-}
-
-void Pileup::SetFastaFilename(const string& filename) {
-    d->FastaFilename = filename;
-}
-
-void Pileup::SetIsPrintingMapQualities(bool ok) {
-    d->IsPrintingMapQualities = ok;
-}
-
-void Pileup::SetRegion(const BamRegion& region) {
-    d->Region = region;
-}
diff --git a/src/utils/bamtools_pileup.h b/src/utils/bamtools_pileup.h
deleted file mode 100644 (file)
index 3ffb030..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-// ***************************************************************************
-// bamtools_pileup.h (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 13 July 2010
-// ---------------------------------------------------------------------------
-// Provides pileup conversion functionality.  
-// 
-// The 'assembly' aspect of pileup makes this more complicated than the 
-// simpler one-to-one conversion methods for other formats.
-// ***************************************************************************
-
-#ifndef BAMTOOLS_PILEUP_H
-#define BAMTOOLS_PILEUP_H
-
-#include <iostream>
-#include <string>
-
-namespace BamTools {
-
-class BamMultiReader;
-class BamRegion;
-
-class Pileup {
-  
-    public:
-        Pileup(BamMultiReader* reader, std::ostream* outStream);
-        ~Pileup(void);
-        
-    public:
-        bool Run(void);
-        void SetFastaFilename(const std::string& filename);
-        void SetIsPrintingMapQualities(bool ok);  
-        void SetRegion(const BamRegion& region);
-  
-    private:
-        struct PileupPrivate;
-        PileupPrivate* d;
-};
-
-} // namespace BamTools
-
-#endif // BAMTOOLS_PILEUP_H
\ No newline at end of file
diff --git a/src/utils/bamtools_pileup_engine.cpp b/src/utils/bamtools_pileup_engine.cpp
new file mode 100644 (file)
index 0000000..70bace8
--- /dev/null
@@ -0,0 +1,327 @@
+// ***************************************************************************
+// bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 16 September 2010
+// ---------------------------------------------------------------------------
+// Provides pileup at position functionality for various tools.
+// ***************************************************************************
+
+#include <iostream>
+#include "bamtools_pileup_engine.h"
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// PileupEnginePrivate implementation
+
+struct PileupEngine::PileupEnginePrivate {
+  
+    // data members
+    int CurrentId;
+    int CurrentPosition;
+    vector<BamAlignment> CurrentAlignments;
+    PileupPosition CurrentPileupData;
+    
+    bool IsFirstAlignment;
+    vector<PileupVisitor*> Visitors;
+  
+    // ctor & dtor
+    PileupEnginePrivate(void)
+        : CurrentId(-1)
+        , CurrentPosition(-1)
+        , IsFirstAlignment(true)
+    { }
+    ~PileupEnginePrivate(void) { }
+    
+    // 'public' methods
+    bool AddAlignment(const BamAlignment& al);
+    void Flush(void);
+    
+    // internal methods
+    private:
+        void ApplyVisitors(void);
+        void ClearOldData(void);
+        void CreatePileupData(void);
+        void ParseAlignmentCigar(const BamAlignment& al);
+};
+
+bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
+  
+    // if first time
+    if ( IsFirstAlignment ) {
+      
+        // set initial markers 
+        CurrentId       = al.RefID;
+        CurrentPosition = al.Position;
+        
+        // store first entry
+        CurrentAlignments.clear();
+        CurrentAlignments.push_back(al);
+        
+        // set flag & return
+        IsFirstAlignment = false;
+        return true;
+    }
+  
+    // if same reference
+    if ( al.RefID == CurrentId ) {
+      
+        // if same position, store and move on
+        if ( al.Position == CurrentPosition )
+            CurrentAlignments.push_back(al);
+        
+        // if less than CurrentPosition - sorting error => ABORT
+        else if ( al.Position < CurrentPosition ) {
+            cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+            return false;
+        }
+        
+        // else print pileup data until 'catching up' to CurrentPosition
+        else {
+            while ( al.Position > CurrentPosition ) {
+                ApplyVisitors();
+                ++CurrentPosition;
+            }
+            CurrentAlignments.push_back(al);
+        }
+    } 
+
+    // if reference ID less than CurrentId - sorting error => ABORT
+    else if ( al.RefID < CurrentId ) {
+        cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+        return false;
+    }
+
+    // else moved forward onto next reference
+    else {
+        
+        // print any remaining pileup data from previous reference
+        while ( !CurrentAlignments.empty() ) {
+            ApplyVisitors();
+            ++CurrentPosition;
+        }
+        
+        // store first entry on this new reference, update markers
+        CurrentAlignments.clear();
+        CurrentAlignments.push_back(al);
+        CurrentId = al.RefID;
+        CurrentPosition = al.Position;
+    }
+  
+    return true;
+}
+
+void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
+  
+    // parse CIGAR data in BamAlignments to build up current pileup data
+    CreatePileupData();
+  
+    // apply all visitors to current alignment set
+    vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
+    vector<PileupVisitor*>::const_iterator visitorEnd  = Visitors.end();
+    for ( ; visitorIter != visitorEnd; ++visitorIter ) 
+        (*visitorIter)->Visit(CurrentPileupData);
+}
+
+void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
+    // remove any data that ends before CurrentPosition
+    size_t i = 0;
+    while ( i < CurrentAlignments.size() ) {
+      
+        // remove alignment if it ends before CurrentPosition
+        const int endPosition = CurrentAlignments[i].GetEndPosition();
+        if ( endPosition < CurrentPosition )
+            CurrentAlignments.erase(CurrentAlignments.begin() + i);
+        else
+            ++i;
+    }
+}
+
+void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
+  
+    // remove any non-overlapping alignments
+    ClearOldData();
+  
+    // set pileup refId, position to current markers
+    CurrentPileupData.RefId = CurrentId;
+    CurrentPileupData.Position = CurrentPosition;
+    CurrentPileupData.PileupAlignments.clear();
+    
+    // parse CIGAR data in remaining alignments 
+    vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
+    vector<BamAlignment>::const_iterator alEnd  = CurrentAlignments.end(); 
+    for ( ; alIter != alEnd; ++alIter )
+        ParseAlignmentCigar( (*alIter) );
+}
+
+void PileupEngine::PileupEnginePrivate::Flush(void) {
+    while ( !CurrentAlignments.empty() ) {
+        ApplyVisitors();
+        ++CurrentPosition;
+    }
+}
+
+void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
+  
+    // skip if unmapped
+    if ( !al.IsMapped() ) return;
+    
+    // intialize local variables
+    int  genomePosition      = al.Position;
+    int  positionInAlignment = 0;
+    bool isNewReadSegment    = true;
+    bool saveAlignment       = true;    
+    PileupAlignment pileupAlignment(al);
+    
+    // iterate over CIGAR operations
+    const int numCigarOps = (const int)al.CigarData.size();
+    for (int i = 0; i < numCigarOps; ++i ) { 
+        const CigarOp& op = al.CigarData.at(i);
+      
+        // if op is MATCH
+        if ( op.Type == 'M' ) {
+          
+            // if match op overlaps current position
+            if ( genomePosition + (int)op.Length > CurrentPosition ) {
+              
+                // set pileup data
+                pileupAlignment.IsCurrentDeletion   = false;
+                pileupAlignment.IsNextDeletion      = false;
+                pileupAlignment.IsNextInsertion     = false;
+                pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+                
+                // check for beginning of read segment
+                if ( genomePosition == CurrentPosition && isNewReadSegment ) 
+                    pileupAlignment.IsSegmentBegin = true;
+                
+                // if we're at the end of a match operation
+                if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
+                    
+                    // if not last operation
+                    if ( i < numCigarOps - 1 ) {
+                        
+                        // check next CIGAR op
+                        const CigarOp& nextOp = al.CigarData.at(i+1);
+                        
+                        // if next CIGAR op is DELETION
+                        if ( nextOp.Type == 'D') {
+                            pileupAlignment.IsNextDeletion = true;
+                            pileupAlignment.DeletionLength = nextOp.Length;
+                        }
+                        
+                        // if next CIGAR op is INSERTION
+                        else if ( nextOp.Type == 'I' ) {
+                            pileupAlignment.IsNextInsertion = true;
+                            pileupAlignment.InsertionLength = nextOp.Length;
+                        }
+                            
+                        // if next CIGAR op is either DELETION or INSERTION
+                        if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
+
+                            // if there is a CIGAR op after the DEL/INS
+                            if ( i < numCigarOps - 2 ) {
+                                const CigarOp& nextNextOp = al.CigarData.at(i+2);
+                                
+                                // if next CIGAR op is clipping or ref_skip
+                                if ( nextNextOp.Type == 'S' || 
+                                     nextNextOp.Type == 'N' ||
+                                     nextNextOp.Type == 'H' )
+                                    pileupAlignment.IsSegmentEnd = true;
+                            } 
+                            else {
+                                pileupAlignment.IsSegmentEnd = true;
+                                
+                                // if next CIGAR op is clipping or ref_skip
+                                if ( nextOp.Type == 'S' || 
+                                     nextOp.Type == 'N' ||
+                                     nextOp.Type == 'H' )
+                                    pileupAlignment.IsSegmentEnd = true;
+                            }
+                        }
+                        
+                        // otherwise
+                        else { 
+                        
+                            // if next CIGAR op is clipping or ref_skip
+                            if ( nextOp.Type == 'S' || 
+                                 nextOp.Type == 'N' ||
+                                 nextOp.Type == 'H' )
+                                pileupAlignment.IsSegmentEnd = true;
+                        }
+                    }
+                    
+                    // else this is last operation
+                    else pileupAlignment.IsSegmentEnd = true;
+                }
+            }
+          
+            // increment markers
+            genomePosition      += op.Length;
+            positionInAlignment += op.Length;
+        } 
+        
+        // if op is DELETION
+        else if ( op.Type == 'D' ) {
+          
+            // if deletion op overlaps current position
+            if ( genomePosition + (int)op.Length > CurrentPosition ) {
+              
+                // set pileup data
+                pileupAlignment.IsCurrentDeletion   = true;
+                pileupAlignment.IsNextDeletion      = false;
+                pileupAlignment.IsNextInsertion     = true;
+                pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+            }
+            
+            // increment marker
+            genomePosition += op.Length;
+        }
+
+        // if op is REF_SKIP
+        else if ( op.Type == 'N' ) {
+            genomePosition += op.Length;
+        }
+        
+        // if op is INSERTION or SOFT_CLIP
+        else if ( op.Type == 'I' || op.Type == 'S' ) {
+            positionInAlignment += op.Length;
+        }
+        
+        // checl for beginning of new read segment
+        if ( op.Type == 'N' ||
+             op.Type == 'S' ||
+             op.Type == 'H' )
+            isNewReadSegment = true;
+        else 
+            isNewReadSegment = false;
+      
+        // if we've moved beyond current position
+        if ( genomePosition > CurrentPosition ) {
+            if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
+            break;
+        }
+    }
+
+    // save pileup position if flag is true
+    if ( saveAlignment )
+        CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
+}
+
+// ---------------------------------------------
+// PileupEngine implementation
+
+PileupEngine::PileupEngine(void)
+    : d( new PileupEnginePrivate )
+{ }
+
+PileupEngine::~PileupEngine(void) {
+    delete d;
+    d = 0;
+}
+
+bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
+void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
+void PileupEngine::Flush(void) { d->Flush(); }
diff --git a/src/utils/bamtools_pileup_engine.h b/src/utils/bamtools_pileup_engine.h
new file mode 100644 (file)
index 0000000..675b8e6
--- /dev/null
@@ -0,0 +1,94 @@
+// ***************************************************************************
+// bamtools_pileup_engine.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 16 September 2010
+// ---------------------------------------------------------------------------
+// Provides pileup at position functionality for various tools.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_PILEUP_ENGINE_H
+#define BAMTOOLS_PILEUP_ENGINE_H
+
+#include <vector>
+#include "BamAux.h"
+
+namespace BamTools {
+
+// contains auxiliary data about a single BamAlignment
+// at current position considered
+struct PileupAlignment {
+  
+    // data members
+    BamAlignment Alignment;
+    int32_t PositionInAlignment;
+    bool IsCurrentDeletion;
+    bool IsNextDeletion;
+    bool IsNextInsertion;
+    int DeletionLength;
+    int InsertionLength;
+    bool IsSegmentBegin;
+    bool IsSegmentEnd;
+    
+    // ctor
+    PileupAlignment(const BamAlignment& al)
+        : Alignment(al)
+        , PositionInAlignment(-1)
+        , IsCurrentDeletion(false)
+        , IsNextDeletion(false)
+        , IsNextInsertion(false)
+        , DeletionLength(0)
+        , InsertionLength(0)
+        , IsSegmentBegin(false)
+        , IsSegmentEnd(false)
+    { }
+};
+  
+// contains all data at a position
+struct PileupPosition {
+  
+    // data members
+    int RefId;
+    int Position;
+    std::vector<PileupAlignment> PileupAlignments;
+
+    // ctor
+    PileupPosition(const int& refId = 0,
+                   const int& position = 0, 
+                   const std::vector<PileupAlignment>& alignments = std::vector<PileupAlignment>())
+        : RefId(refId)
+        , Position(position)
+        , PileupAlignments(alignments)
+    { }
+};
+  
+class PileupVisitor {
+  
+    public:
+        PileupVisitor(void) { }
+        virtual ~PileupVisitor(void) { }
+  
+    public:
+        virtual void Visit(const PileupPosition& pileupData) =0;
+};
+
+class PileupEngine {
+  
+    public:
+        PileupEngine(void);
+        ~PileupEngine(void);
+        
+    public:
+        bool AddAlignment(const BamAlignment& al);
+        void AddVisitor(PileupVisitor* visitor);
+        void Flush(void);
+        
+    private:
+        struct PileupEnginePrivate;
+        PileupEnginePrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_PILEUP_ENGINE_H
\ No newline at end of file