// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 22 July 2010
+// Last modified: 16 September 2010
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
#include <sstream>
#include <string>
#include <vector>
-
#include "bamtools_convert.h"
+#include "bamtools_fasta.h"
#include "bamtools_options.h"
-#include "bamtools_pileup.h"
+#include "bamtools_pileup_engine.h"
#include "bamtools_utilities.h"
#include "BGZF.h"
-#include "BamReader.h"
#include "BamMultiReader.h"
-
using namespace std;
using namespace BamTools;
namespace BamTools {
-
- // format names
+
+ // ---------------------------------------------
+ // ConvertTool constants
+
+ // supported conversion format command-line names
static const string FORMAT_BED = "bed";
static const string FORMAT_BEDGRAPH = "bedgraph";
static const string FORMAT_FASTA = "fasta";
// other constants
static const unsigned int FASTA_LINE_MAX = 50;
-} // namespace BamTools
-
-struct ConvertTool::ConvertToolPrivate {
-
- // ctor & dtor
- public:
- ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
- ~ConvertToolPrivate(void);
+ // ---------------------------------------------
+ // ConvertPileupFormatVisitor declaration
- // interface
- public:
- bool Run(void);
+ class ConvertPileupFormatVisitor : public PileupVisitor {
+
+ // ctor & dtor
+ public:
+ ConvertPileupFormatVisitor(const RefVector& references,
+ const string& fastaFilename,
+ const bool isPrintingMapQualities,
+ ostream* out);
+ ~ConvertPileupFormatVisitor(void);
+
+ // PileupVisitor interface implementation
+ public:
+// void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments);
+
+ void Visit(const PileupPosition& pileupData);
+
+ // data members
+ private:
+ Fasta m_fasta;
+ bool m_hasFasta;
+ bool m_isPrintingMapQualities;
+ ostream* m_out;
+ RefVector m_references;
+ };
- // internal methods
- private:
- void PrintBed(const BamAlignment& a);
- void PrintBedGraph(const BamAlignment& a);
- void PrintFasta(const BamAlignment& a);
- void PrintFastq(const BamAlignment& a);
- void PrintJson(const BamAlignment& a);
- void PrintSam(const BamAlignment& a);
- void PrintWiggle(const BamAlignment& a);
-
- // data members
- private:
- ConvertTool::ConvertSettings* m_settings;
- RefVector m_references;
- ostream m_out;
-};
-
+} // namespace BamTools
+
// ---------------------------------------------
// ConvertSettings implementation
, IsOmittingSamHeader(false)
, IsPrintingPileupMapQualities(false)
, OutputFilename(Options::StandardOut())
+ , FastaFilename("")
{ }
-};
+};
// ---------------------------------------------
-// ConvertTool implementation
-
-ConvertTool::ConvertTool(void)
- : AbstractTool()
- , m_settings(new ConvertSettings)
- , m_impl(0)
-{
- // set program details
- Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [other options]");
-
- // set up options
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
- Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
-
- OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
- Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
-
- OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
- Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
- Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
-
- OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
- Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
-}
-
-ConvertTool::~ConvertTool(void) {
- delete m_settings;
- m_settings = 0;
-
- delete m_impl;
- m_impl = 0;
-}
-
-int ConvertTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
-
-int ConvertTool::Run(int argc, char* argv[]) {
+// ConvertToolPrivate implementation
- // parse command line arguments
- Options::Parse(argc, argv, 1);
-
- // run internal ConvertTool implementation, return success/fail
- m_impl = new ConvertToolPrivate(m_settings);
+struct ConvertTool::ConvertToolPrivate {
+
+ // ctor & dtor
+ public:
+ ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
+ ~ConvertToolPrivate(void);
- if ( m_impl->Run() )
- return 0;
- else
- return 1;
-}
-
-// ---------------------------------------------
-// ConvertToolPrivate implementation
+ // interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+ void PrintBed(const BamAlignment& a);
+ void PrintBedGraph(const BamAlignment& a);
+ void PrintFasta(const BamAlignment& a);
+ void PrintFastq(const BamAlignment& a);
+ void PrintJson(const BamAlignment& a);
+ void PrintSam(const BamAlignment& a);
+ void PrintWiggle(const BamAlignment& a);
+
+ // special case - uses the PileupEngine
+ bool RunPileupConversion(BamMultiReader* reader);
+
+ // data members
+ private:
+ ConvertTool::ConvertSettings* m_settings;
+ RefVector m_references;
+ ostream m_out;
+};
ConvertTool::ConvertToolPrivate::ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
: m_settings(settings)
bool ConvertTool::ConvertToolPrivate::Run(void) {
- bool convertedOk = true;
-
// ------------------------------------
// initialize conversion input/output
BamRegion region;
if ( m_settings->HasRegion ) {
if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
- if ( !reader.SetRegion(region) )
- cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+ if ( !reader.SetRegion(region) ) {
+ cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+ return false;
+ }
+ } else {
+ cerr << "Could not parse REGION: " << m_settings->Region << endl;
+ return false;
}
}
m_out.rdbuf(outFile.rdbuf());
}
- // ------------------------
+ // -------------------------------------
+ // do conversion based on format
+
+ bool convertedOk = true;
+
// pileup is special case
- if ( m_settings->Format == FORMAT_PILEUP ) {
-
- // initialize pileup input/output
- Pileup pileup(&reader, &m_out);
-
- // ---------------------------
- // configure pileup settings
-
- if ( m_settings->HasRegion )
- pileup.SetRegion(region);
-
- if ( m_settings->HasFastaFilename )
- pileup.SetFastaFilename(m_settings->FastaFilename);
-
- pileup.SetIsPrintingMapQualities( m_settings->IsPrintingPileupMapQualities );
-
- // run pileup
- convertedOk = pileup.Run();
- }
+ // conversion not done per alignment, like the other formats
+ if ( m_settings->Format == FORMAT_PILEUP )
+ convertedOk = RunPileupConversion(&reader);
- // -------------------------------------
- // else determine 'simpler' format type
+ // all other formats
else {
bool formatError = false;
+
+ // set function pointer to proper conversion method
void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph;
convertedOk = false;
}
- // if SAM format & not omitting header, print SAM header
- if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) {
- string headerText = reader.GetHeaderText();
- m_out << headerText;
- }
-
- // ------------------------
- // do conversion
+ // if format selected ok
if ( !formatError ) {
+
+ // if SAM format & not omitting header, print SAM header first
+ if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader )
+ m_out << reader.GetHeaderText();
+
+ // iterate through file, doing conversion
BamAlignment a;
- while ( reader.GetNextAlignment(a) ) {
- (this->*pFunction)(a);
- }
+ while ( reader.GetNextAlignment(a) )
+ (this->*pFunction)(a);
+
+ // set flag for successful conversion
+ convertedOk = true;
}
}
// ------------------------
// clean up & exit
+
reader.Close();
if ( m_settings->HasOutput ) outFile.close();
return convertedOk;
// Conversion/output methods
// ----------------------------------------------------------
-void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) {
+void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) {
// tab-delimited, 0-based half-open
// (e.g. a 50-base read aligned to pos 10 could have BED coordinates (10, 60) instead of BAM coordinates (10, 59) )
}
m_out << "}" << endl;
-
}
// print BamAlignment in SAM format
void ConvertTool::ConvertToolPrivate::PrintWiggle(const BamAlignment& a) {
;
}
+
+bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
+
+ // check for valid BamMultiReader
+ if ( reader == 0 ) return false;
+
+ // set up our pileup format 'visitor'
+ ConvertPileupFormatVisitor* v = new ConvertPileupFormatVisitor(m_references,
+ m_settings->FastaFilename,
+ m_settings->IsPrintingPileupMapQualities,
+ &m_out);
+
+ // set up PileupEngine
+ PileupEngine pileup;
+ pileup.AddVisitor(v);
+
+ // iterate through data
+ BamAlignment al;
+ while ( reader->GetNextAlignment(al) )
+ pileup.AddAlignment(al);
+ pileup.Flush();
+
+ // clean up
+ delete v;
+ v = 0;
+
+ // return success
+ return true;
+}
+
+// ---------------------------------------------
+// ConvertTool implementation
+
+ConvertTool::ConvertTool(void)
+ : AbstractTool()
+ , m_settings(new ConvertSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [other options]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
+
+ OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
+ Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+
+ OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
+ Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
+ Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
+
+ OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
+ Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
+}
+
+ConvertTool::~ConvertTool(void) {
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int ConvertTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int ConvertTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // run internal ConvertTool implementation, return success/fail
+ m_impl = new ConvertToolPrivate(m_settings);
+
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
+}
+
+// ---------------------------------------------
+// ConvertPileupFormatVisitor implementation
+
+ConvertPileupFormatVisitor::ConvertPileupFormatVisitor(const RefVector& references,
+ const string& fastaFilename,
+ const bool isPrintingMapQualities,
+ ostream* out)
+ : PileupVisitor()
+ , m_hasFasta(false)
+ , m_isPrintingMapQualities(isPrintingMapQualities)
+ , m_out(out)
+ , m_references(references)
+{
+ // set up Fasta reader if file is provided
+ if ( !fastaFilename.empty() ) {
+
+ // check for FASTA index
+ string indexFilename = "";
+ if ( Utilities::FileExists(fastaFilename + ".fai") )
+ indexFilename = fastaFilename + ".fai";
+
+ // open FASTA file
+ if ( m_fasta.Open(fastaFilename, indexFilename) )
+ m_hasFasta = true;
+ }
+}
+
+ConvertPileupFormatVisitor::~ConvertPileupFormatVisitor(void) {
+ // be sure to close Fasta reader
+ if ( m_hasFasta ) {
+ m_fasta.Close();
+ m_hasFasta = false;
+ }
+}
+
+void ConvertPileupFormatVisitor::Visit(const PileupPosition& pileupData ) {
+
+ // skip if no alignments at this position
+ if ( pileupData.PileupAlignments.empty() ) return;
+
+ // retrieve reference name
+ const string& referenceName = m_references[pileupData.RefId].RefName;
+ const int& position = pileupData.Position;
+
+ // retrieve reference base from FASTA file, if one provided; otherwise default to 'N'
+ char referenceBase('N');
+ if ( m_hasFasta && (pileupData.Position < m_references[pileupData.RefId].RefLength) ) {
+ if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position, referenceBase ) ) {
+ cerr << "Pileup error : Could not read reference base from FASTA file" << endl;
+ return;
+ }
+ }
+
+ // get count of alleles at this position
+ const int numberAlleles = pileupData.PileupAlignments.size();
+
+ // -----------------------------------------------------------
+ // build strings based on alleles at this positionInAlignment
+
+ stringstream bases("");
+ stringstream baseQualities("");
+ stringstream mapQualities("");
+
+ // iterate over alignments at this pileup position
+ vector<PileupAlignment>::const_iterator pileupIter = pileupData.PileupAlignments.begin();
+ vector<PileupAlignment>::const_iterator pileupEnd = pileupData.PileupAlignments.end();
+ for ( ; pileupIter != pileupEnd; ++pileupIter ) {
+ const PileupAlignment pa = (*pileupIter);
+ const BamAlignment& ba = pa.Alignment;
+
+ // if beginning of read segment
+ if ( pa.IsSegmentBegin )
+ bases << '^' << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+
+ // if current base is not a DELETION
+ if ( !pa.IsCurrentDeletion ) {
+
+ // get base at current position
+ char base = ba.QueryBases.at(pa.PositionInAlignment);
+
+ // if base matches reference
+ if ( base == '=' ||
+ toupper(base) == toupper(referenceBase) ||
+ tolower(base) == tolower(referenceBase) )
+ {
+ base = (ba.IsReverseStrand() ? ',' : '.' );
+ }
+
+ // mismatches reference
+ else base = (ba.IsReverseStrand() ? tolower(base) : toupper(base) );
+
+ // store base
+ bases << base;
+
+ // if next position contains insertion
+ if ( pa.IsNextInsertion ) {
+ bases << '+' << pa.InsertionLength;
+ for (int i = 1; i <= pa.InsertionLength; ++i) {
+ char insertedBase = (char)ba.QueryBases.at(pa.PositionInAlignment + i);
+ bases << (ba.IsReverseStrand() ? (char)tolower(insertedBase) : (char)toupper(insertedBase) );
+ }
+ }
+
+ // if next position contains DELETION
+ else if ( pa.IsNextDeletion ) {
+ bases << '-' << pa.DeletionLength;
+ for (int i = 1; i <= pa.DeletionLength; ++i) {
+ char deletedBase('N');
+ if ( m_hasFasta && (pileupData.Position+i < m_references[pileupData.RefId].RefLength) ) {
+ if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position+i, deletedBase ) ) {
+ cerr << "Pileup error : Could not read reference base from FASTA file" << endl;
+ return;
+ }
+ }
+ bases << (ba.IsReverseStrand() ? (char)tolower(deletedBase) : (char)toupper(deletedBase) );
+ }
+ }
+ }
+
+ // otherwise, DELETION
+ else bases << '*';
+
+ // if end of read segment
+ if ( pa.IsSegmentEnd ) bases << '$';
+
+ // store current base quality
+ baseQualities << ba.Qualities.at(pa.PositionInAlignment);
+
+ // save alignment map quality if desired
+ if ( m_isPrintingMapQualities )
+ mapQualities << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+ }
+
+ // ----------------------
+ // print results
+
+ // tab-delimited
+ // <refName> <1-based pos> <refBase> <numberAlleles> <bases> <qualities> [mapQuals]
+
+ const string TAB = "\t";
+ *m_out << referenceName << TAB
+ << position + 1 << TAB
+ << referenceBase << TAB
+ << numberAlleles << TAB
+ << bases.str() << TAB
+ << baseQualities.str() << TAB
+ << mapQualities.str() << endl;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 16 September 2010
// ---------------------------------------------------------------------------
-// Prints coverage statistics for a single BAM file
-//
-// ** Expand to multiple??
-//
+// Prints coverage data for a single BAM file
// ***************************************************************************
#include <iostream>
+#include <fstream>
#include <string>
#include <vector>
-
#include "bamtools_coverage.h"
#include "bamtools_options.h"
+#include "bamtools_pileup_engine.h"
#include "BamReader.h"
-
using namespace std;
using namespace BamTools;
+namespace BamTools {
+
+// ---------------------------------------------
+// CoverageVisitor implementation
+
+class CoverageVisitor : public PileupVisitor {
+
+ public:
+ CoverageVisitor(const RefVector& references, ostream* out)
+ : PileupVisitor()
+ , m_references(references)
+ , m_out(out)
+ { }
+ ~CoverageVisitor(void) { }
+
+ // PileupVisitor interface implementation
+ public:
+// void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments) {
+ void Visit(const PileupPosition& pileupData) {
+ // -----------------------------------------
+ // print coverage results ( tab-delimited )
+ // <refName> <0-based pos> <number of alleles>
+// *m_out << m_references[refId].RefName << "\t" << position << "\t" << alignments.size() << endl;
+ *m_out << m_references[pileupData.RefId].RefName << "\t"
+ << pileupData.Position << "\t"
+ << pileupData.PileupAlignments.size() << endl;
+ }
+
+ private:
+ RefVector m_references;
+ ostream* m_out;
+};
+
+} // namespace BamTools
+
// ---------------------------------------------
// CoverageSettings implementation
struct CoverageTool::CoverageSettings {
// flags
- bool HasInputBamFilename;
+ bool HasInputFile;
+ bool HasOutputFile;
// filenames
- std::string InputBamFilename;
+ string InputBamFilename;
+ string OutputFilename;
// constructor
CoverageSettings(void)
- : HasInputBamFilename(false)
+ : HasInputFile(false)
+ , HasOutputFile(false)
, InputBamFilename(Options::StandardIn())
+ , OutputFilename(Options::StandardOut())
{ }
};
+// ---------------------------------------------
+// CoverageToolPrivate implementation
+
+struct CoverageTool::CoverageToolPrivate {
+
+ // ctor & dtor
+ public:
+ CoverageToolPrivate(CoverageTool::CoverageSettings* settings);
+ ~CoverageToolPrivate(void);
+
+ // interface
+ public:
+ bool Run(void);
+
+ // data members
+ private:
+ CoverageTool::CoverageSettings* m_settings;
+ ostream m_out;
+ RefVector m_references;
+};
+
+CoverageTool::CoverageToolPrivate::CoverageToolPrivate(CoverageTool::CoverageSettings* settings)
+ : m_settings(settings)
+ , m_out(cout.rdbuf()) // default output to cout
+{ }
+
+CoverageTool::CoverageToolPrivate::~CoverageToolPrivate(void) { }
+
+bool CoverageTool::CoverageToolPrivate::Run(void) {
+
+ // if output filename given
+ ofstream outFile;
+ if ( m_settings->HasOutputFile ) {
+
+ // open output file stream
+ outFile.open(m_settings->OutputFilename.c_str());
+ if ( !outFile ) {
+ cerr << "Could not open " << m_settings->OutputFilename << " for output." << endl;
+ return false;
+ }
+
+ // set m_out to file's streambuf
+ m_out.rdbuf(outFile.rdbuf());
+ }
+
+ //open our BAM reader
+ BamReader reader;
+ reader.Open(m_settings->InputBamFilename);
+ m_references = reader.GetReferenceData();
+
+ // set up our output 'visitor'
+ CoverageVisitor* cv = new CoverageVisitor(m_references, &m_out);
+
+ // set up pileup engine with 'visitor'
+ PileupEngine pileup;
+ pileup.AddVisitor(cv);
+
+ // process input data
+ BamAlignment al;
+ while ( reader.GetNextAlignment(al) )
+ pileup.AddAlignment(al);
+
+ // clean up
+ reader.Close();
+ if ( m_settings->HasOutputFile ) outFile.close();
+ delete cv;
+ cv = 0;
+
+ // return success
+ return true;
+}
+
// ---------------------------------------------
// CoverageTool implementation
CoverageTool::CoverageTool(void)
: AbstractTool()
, m_settings(new CoverageSettings)
+ , m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools coverage", "prints coverage stats for a BAM file", "-in <filename> ");
+ Options::SetProgramInfo("bamtools coverage", "prints coverage data for a single BAM file", "[-in <filename>] [-out <filename>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFile, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "filename", "the output file", "", m_settings->HasOutputFile, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
}
CoverageTool::~CoverageTool(void) {
delete m_settings;
m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
}
int CoverageTool::Help(void) {
// parse command line arguments
Options::Parse(argc, argv, 1);
-
- //open our BAM reader
- BamReader reader;
- reader.Open(m_settings->InputBamFilename);
-
- // generate coverage stats
- cerr << "Generating coverage stats for " << m_settings->InputBamFilename << endl;
- cerr << "FEATURE NOT YET IMPLEMENTED!" << endl;
-
- // clean & exit
- reader.Close();
- return 0;
-}
\ No newline at end of file
+
+ // run internal ConvertTool implementation, return success/fail
+ m_impl = new CoverageToolPrivate(m_settings);
+
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 1 August 2010
// ---------------------------------------------------------------------------
-// Prints coverage statistics for a single BAM file
-//
-// ** Expand to multiple??
-//
+// Prints coverage data for a single BAM file
// ***************************************************************************
#ifndef BAMTOOLS_COVERAGE_H
private:
struct CoverageSettings;
CoverageSettings* m_settings;
+
+ struct CoverageToolPrivate;
+ CoverageToolPrivate* m_impl;
};
} // namespace BamTools
SOURCES = bamtools_fasta.cpp \
bamtools_filter_engine.cpp \
bamtools_options.cpp \
- bamtools_pileup.cpp \
+ bamtools_pileup_engine.cpp \
bamtools_utilities.cpp
OBJECTS= $(SOURCES:.cpp=.o)
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+++ /dev/null
-// ***************************************************************************
-// bamtools_pileup.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 13 July 2010
-// ---------------------------------------------------------------------------
-// Provides pileup conversion functionality.
-//
-// The 'assembly' aspect of pileup makes this more complicated than the
-// simpler one-to-one conversion methods for other formats.
-// ***************************************************************************
-
-#include <vector>
-#include "BamMultiReader.h"
-#include "bamtools_pileup.h"
-using namespace std;
-using namespace BamTools;
-
-struct Pileup::PileupPrivate {
-
- // ---------------------
- // data members
-
- // IO & settings
- BamMultiReader* Reader;
- ostream* OutStream;
- string FastaFilename;
- bool IsPrintingMapQualities;
- BamRegion Region;
-
- // parsing data
- int CurrentId;
- int CurrentPosition;
- vector<BamAlignment> CurrentData;
- RefVector References;
-
- // ----------------------
- // ctor
-
- PileupPrivate(BamMultiReader* reader, ostream* outStream)
- : Reader(reader)
- , OutStream(outStream)
- , FastaFilename("")
- , IsPrintingMapQualities(false)
- { }
-
- // ----------------------
- // internal methods
-
- void PrintCurrentData(void);
- bool Run(void);
-};
-
-void Pileup::PileupPrivate::PrintCurrentData(void) {
-
- // remove any data that ends before CurrentPosition
- size_t i = 0;
- while ( i < CurrentData.size() ) {
- if ( CurrentData[i].GetEndPosition() < CurrentPosition )
- CurrentData.erase(CurrentData.begin() + i);
- else
- ++i;
- }
-
- // if not data remains, return
- if ( CurrentData.empty() ) return;
-
- // initialize empty strings
- string bases = "";
- string baseQuals = "";
- string mapQuals = "";
-
- // iterate over alignments
- vector<BamAlignment>::const_iterator dataIter = CurrentData.begin();
- vector<BamAlignment>::const_iterator dataEnd = CurrentData.end();
- for ( ; dataIter != dataEnd; ++dataIter ) {
-
- // retrieve alignment
- const BamAlignment& al = (*dataIter);
-
- // determine current base character & store
- const char base = al.AlignedBases[CurrentPosition -al.Position];
- if ( al.IsReverseStrand() )
- bases.push_back( tolower(base) );
- else
- bases.push_back( toupper(base) );
-
- // determine current base quality & store
- baseQuals.push_back( al.Qualities[CurrentPosition - al.Position] );
-
- // if using mapQuals, determine current mapQual & store
- if ( IsPrintingMapQualities ) {
- int mapQuality = (int)(al.MapQuality + 33);
- if ( mapQuality > 126 ) mapQuality = 126;
- mapQuals.push_back((char)mapQuality);
- }
- }
-
- // print results to OutStream
- const string& refName = References[CurrentId].RefName;
- const char refBase = 'N';
-
- *OutStream << refName << "\t" << CurrentPosition << "\t" << refBase << "\t" << CurrentData.size() << "\t" << bases << "\t" << baseQuals;
- if ( IsPrintingMapQualities ) *OutStream << "\t" << mapQuals;
- *OutStream << endl;
-}
-
-bool Pileup::PileupPrivate::Run(void) {
-
- // -----------------------------
- // validate input & output
-
- if ( !Reader ) {
- cerr << "Pileup::Run() : Invalid multireader" << endl;
- return false;
- }
-
- if ( !OutStream) {
- cerr << "Pileup::Run() : Invalid output stream" << endl;
- return false;
- }
-
- References = Reader->GetReferenceData();
-
- // -----------------------------
- // process input data
-
- // get first entry
- BamAlignment al;
- if ( !Reader->GetNextAlignment(al) ) {
- cerr << "Pileup::Run() : Could not read from multireader" << endl;
- return false;
- }
-
- // set initial markers & store first entry
- CurrentId = al.RefID;
- CurrentPosition = al.Position;
- CurrentData.clear();
- CurrentData.push_back(al);
-
- // iterate over remaining data
- while ( Reader->GetNextAlignment(al) ) {
-
- // if same reference
- if ( al.RefID == CurrentId ) {
-
- // if same position, store and move on
- if ( al.Position == CurrentPosition )
- CurrentData.push_back(al);
-
- // if less than CurrentPosition - sorting error => ABORT
- else if ( al.Position < CurrentPosition ) {
- cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
- return false;
- }
-
- // else print pileup data until 'catching up' to CurrentPosition
- else {
- while ( al.Position > CurrentPosition ) {
- PrintCurrentData();
- ++CurrentPosition;
- }
- CurrentData.push_back(al);
- }
- }
-
- // if reference ID less than CurrentID - sorting error => ABORT
- else if ( al.RefID < CurrentId ) {
- cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
- return false;
- }
-
- // else moved forward onto next reference
- else {
-
- // print any remaining pileup data from previous reference
- while ( !CurrentData.empty() ) {
- PrintCurrentData();
- ++CurrentPosition;
- }
-
- // store first entry on this new reference, update markers
- CurrentData.clear();
- CurrentData.push_back(al);
- CurrentId = al.RefID;
- CurrentPosition = al.Position;
- }
- }
-
- // ------------------------------------
- // handle any remaining data entries
-
- while ( !CurrentData.empty() ) {
- PrintCurrentData();
- ++CurrentPosition;
- }
-
- // -------------------------
- // return success
-
- return true;
-}
-
-// ----------------------------------------------------------
-// Pileup implementation
-
-Pileup::Pileup(BamMultiReader* reader, ostream* outStream) {
- d = new PileupPrivate(reader, outStream);
-}
-
-Pileup::~Pileup(void) {
- delete d;
- d = 0;
-}
-
-bool Pileup::Run(void) {
- return d->Run();
-}
-
-void Pileup::SetFastaFilename(const string& filename) {
- d->FastaFilename = filename;
-}
-
-void Pileup::SetIsPrintingMapQualities(bool ok) {
- d->IsPrintingMapQualities = ok;
-}
-
-void Pileup::SetRegion(const BamRegion& region) {
- d->Region = region;
-}
+++ /dev/null
-// ***************************************************************************
-// bamtools_pileup.h (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 13 July 2010
-// ---------------------------------------------------------------------------
-// Provides pileup conversion functionality.
-//
-// The 'assembly' aspect of pileup makes this more complicated than the
-// simpler one-to-one conversion methods for other formats.
-// ***************************************************************************
-
-#ifndef BAMTOOLS_PILEUP_H
-#define BAMTOOLS_PILEUP_H
-
-#include <iostream>
-#include <string>
-
-namespace BamTools {
-
-class BamMultiReader;
-class BamRegion;
-
-class Pileup {
-
- public:
- Pileup(BamMultiReader* reader, std::ostream* outStream);
- ~Pileup(void);
-
- public:
- bool Run(void);
- void SetFastaFilename(const std::string& filename);
- void SetIsPrintingMapQualities(bool ok);
- void SetRegion(const BamRegion& region);
-
- private:
- struct PileupPrivate;
- PileupPrivate* d;
-};
-
-} // namespace BamTools
-
-#endif // BAMTOOLS_PILEUP_H
\ No newline at end of file
--- /dev/null
+// ***************************************************************************
+// bamtools_pileup_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 16 September 2010
+// ---------------------------------------------------------------------------
+// Provides pileup at position functionality for various tools.
+// ***************************************************************************
+
+#include <iostream>
+#include "bamtools_pileup_engine.h"
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// PileupEnginePrivate implementation
+
+struct PileupEngine::PileupEnginePrivate {
+
+ // data members
+ int CurrentId;
+ int CurrentPosition;
+ vector<BamAlignment> CurrentAlignments;
+ PileupPosition CurrentPileupData;
+
+ bool IsFirstAlignment;
+ vector<PileupVisitor*> Visitors;
+
+ // ctor & dtor
+ PileupEnginePrivate(void)
+ : CurrentId(-1)
+ , CurrentPosition(-1)
+ , IsFirstAlignment(true)
+ { }
+ ~PileupEnginePrivate(void) { }
+
+ // 'public' methods
+ bool AddAlignment(const BamAlignment& al);
+ void Flush(void);
+
+ // internal methods
+ private:
+ void ApplyVisitors(void);
+ void ClearOldData(void);
+ void CreatePileupData(void);
+ void ParseAlignmentCigar(const BamAlignment& al);
+};
+
+bool PileupEngine::PileupEnginePrivate::AddAlignment(const BamAlignment& al) {
+
+ // if first time
+ if ( IsFirstAlignment ) {
+
+ // set initial markers
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+
+ // store first entry
+ CurrentAlignments.clear();
+ CurrentAlignments.push_back(al);
+
+ // set flag & return
+ IsFirstAlignment = false;
+ return true;
+ }
+
+ // if same reference
+ if ( al.RefID == CurrentId ) {
+
+ // if same position, store and move on
+ if ( al.Position == CurrentPosition )
+ CurrentAlignments.push_back(al);
+
+ // if less than CurrentPosition - sorting error => ABORT
+ else if ( al.Position < CurrentPosition ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else print pileup data until 'catching up' to CurrentPosition
+ else {
+ while ( al.Position > CurrentPosition ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+ CurrentAlignments.push_back(al);
+ }
+ }
+
+ // if reference ID less than CurrentId - sorting error => ABORT
+ else if ( al.RefID < CurrentId ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else moved forward onto next reference
+ else {
+
+ // print any remaining pileup data from previous reference
+ while ( !CurrentAlignments.empty() ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+
+ // store first entry on this new reference, update markers
+ CurrentAlignments.clear();
+ CurrentAlignments.push_back(al);
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+ }
+
+ return true;
+}
+
+void PileupEngine::PileupEnginePrivate::ApplyVisitors(void) {
+
+ // parse CIGAR data in BamAlignments to build up current pileup data
+ CreatePileupData();
+
+ // apply all visitors to current alignment set
+ vector<PileupVisitor*>::const_iterator visitorIter = Visitors.begin();
+ vector<PileupVisitor*>::const_iterator visitorEnd = Visitors.end();
+ for ( ; visitorIter != visitorEnd; ++visitorIter )
+ (*visitorIter)->Visit(CurrentPileupData);
+}
+
+void PileupEngine::PileupEnginePrivate::ClearOldData(void) {
+
+ // remove any data that ends before CurrentPosition
+ size_t i = 0;
+ while ( i < CurrentAlignments.size() ) {
+
+ // remove alignment if it ends before CurrentPosition
+ const int endPosition = CurrentAlignments[i].GetEndPosition();
+ if ( endPosition < CurrentPosition )
+ CurrentAlignments.erase(CurrentAlignments.begin() + i);
+ else
+ ++i;
+ }
+}
+
+void PileupEngine::PileupEnginePrivate::CreatePileupData(void) {
+
+ // remove any non-overlapping alignments
+ ClearOldData();
+
+ // set pileup refId, position to current markers
+ CurrentPileupData.RefId = CurrentId;
+ CurrentPileupData.Position = CurrentPosition;
+ CurrentPileupData.PileupAlignments.clear();
+
+ // parse CIGAR data in remaining alignments
+ vector<BamAlignment>::const_iterator alIter = CurrentAlignments.begin();
+ vector<BamAlignment>::const_iterator alEnd = CurrentAlignments.end();
+ for ( ; alIter != alEnd; ++alIter )
+ ParseAlignmentCigar( (*alIter) );
+}
+
+void PileupEngine::PileupEnginePrivate::Flush(void) {
+ while ( !CurrentAlignments.empty() ) {
+ ApplyVisitors();
+ ++CurrentPosition;
+ }
+}
+
+void PileupEngine::PileupEnginePrivate::ParseAlignmentCigar(const BamAlignment& al) {
+
+ // skip if unmapped
+ if ( !al.IsMapped() ) return;
+
+ // intialize local variables
+ int genomePosition = al.Position;
+ int positionInAlignment = 0;
+ bool isNewReadSegment = true;
+ bool saveAlignment = true;
+ PileupAlignment pileupAlignment(al);
+
+ // iterate over CIGAR operations
+ const int numCigarOps = (const int)al.CigarData.size();
+ for (int i = 0; i < numCigarOps; ++i ) {
+ const CigarOp& op = al.CigarData.at(i);
+
+ // if op is MATCH
+ if ( op.Type == 'M' ) {
+
+ // if match op overlaps current position
+ if ( genomePosition + (int)op.Length > CurrentPosition ) {
+
+ // set pileup data
+ pileupAlignment.IsCurrentDeletion = false;
+ pileupAlignment.IsNextDeletion = false;
+ pileupAlignment.IsNextInsertion = false;
+ pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+
+ // check for beginning of read segment
+ if ( genomePosition == CurrentPosition && isNewReadSegment )
+ pileupAlignment.IsSegmentBegin = true;
+
+ // if we're at the end of a match operation
+ if ( genomePosition + (int)op.Length - 1 == CurrentPosition ) {
+
+ // if not last operation
+ if ( i < numCigarOps - 1 ) {
+
+ // check next CIGAR op
+ const CigarOp& nextOp = al.CigarData.at(i+1);
+
+ // if next CIGAR op is DELETION
+ if ( nextOp.Type == 'D') {
+ pileupAlignment.IsNextDeletion = true;
+ pileupAlignment.DeletionLength = nextOp.Length;
+ }
+
+ // if next CIGAR op is INSERTION
+ else if ( nextOp.Type == 'I' ) {
+ pileupAlignment.IsNextInsertion = true;
+ pileupAlignment.InsertionLength = nextOp.Length;
+ }
+
+ // if next CIGAR op is either DELETION or INSERTION
+ if ( nextOp.Type == 'D' || nextOp.Type == 'I' ) {
+
+ // if there is a CIGAR op after the DEL/INS
+ if ( i < numCigarOps - 2 ) {
+ const CigarOp& nextNextOp = al.CigarData.at(i+2);
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextNextOp.Type == 'S' ||
+ nextNextOp.Type == 'N' ||
+ nextNextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ else {
+ pileupAlignment.IsSegmentEnd = true;
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextOp.Type == 'S' ||
+ nextOp.Type == 'N' ||
+ nextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // otherwise
+ else {
+
+ // if next CIGAR op is clipping or ref_skip
+ if ( nextOp.Type == 'S' ||
+ nextOp.Type == 'N' ||
+ nextOp.Type == 'H' )
+ pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // else this is last operation
+ else pileupAlignment.IsSegmentEnd = true;
+ }
+ }
+
+ // increment markers
+ genomePosition += op.Length;
+ positionInAlignment += op.Length;
+ }
+
+ // if op is DELETION
+ else if ( op.Type == 'D' ) {
+
+ // if deletion op overlaps current position
+ if ( genomePosition + (int)op.Length > CurrentPosition ) {
+
+ // set pileup data
+ pileupAlignment.IsCurrentDeletion = true;
+ pileupAlignment.IsNextDeletion = false;
+ pileupAlignment.IsNextInsertion = true;
+ pileupAlignment.PositionInAlignment = positionInAlignment + (CurrentPosition - genomePosition);
+ }
+
+ // increment marker
+ genomePosition += op.Length;
+ }
+
+ // if op is REF_SKIP
+ else if ( op.Type == 'N' ) {
+ genomePosition += op.Length;
+ }
+
+ // if op is INSERTION or SOFT_CLIP
+ else if ( op.Type == 'I' || op.Type == 'S' ) {
+ positionInAlignment += op.Length;
+ }
+
+ // checl for beginning of new read segment
+ if ( op.Type == 'N' ||
+ op.Type == 'S' ||
+ op.Type == 'H' )
+ isNewReadSegment = true;
+ else
+ isNewReadSegment = false;
+
+ // if we've moved beyond current position
+ if ( genomePosition > CurrentPosition ) {
+ if ( op.Type == 'N' ) saveAlignment = false; // ignore alignment if REF_SKIP
+ break;
+ }
+ }
+
+ // save pileup position if flag is true
+ if ( saveAlignment )
+ CurrentPileupData.PileupAlignments.push_back( pileupAlignment );
+}
+
+// ---------------------------------------------
+// PileupEngine implementation
+
+PileupEngine::PileupEngine(void)
+ : d( new PileupEnginePrivate )
+{ }
+
+PileupEngine::~PileupEngine(void) {
+ delete d;
+ d = 0;
+}
+
+bool PileupEngine::AddAlignment(const BamAlignment& al) { return d->AddAlignment(al); }
+void PileupEngine::AddVisitor(PileupVisitor* visitor) { d->Visitors.push_back(visitor); }
+void PileupEngine::Flush(void) { d->Flush(); }
--- /dev/null
+// ***************************************************************************
+// bamtools_pileup_engine.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 16 September 2010
+// ---------------------------------------------------------------------------
+// Provides pileup at position functionality for various tools.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_PILEUP_ENGINE_H
+#define BAMTOOLS_PILEUP_ENGINE_H
+
+#include <vector>
+#include "BamAux.h"
+
+namespace BamTools {
+
+// contains auxiliary data about a single BamAlignment
+// at current position considered
+struct PileupAlignment {
+
+ // data members
+ BamAlignment Alignment;
+ int32_t PositionInAlignment;
+ bool IsCurrentDeletion;
+ bool IsNextDeletion;
+ bool IsNextInsertion;
+ int DeletionLength;
+ int InsertionLength;
+ bool IsSegmentBegin;
+ bool IsSegmentEnd;
+
+ // ctor
+ PileupAlignment(const BamAlignment& al)
+ : Alignment(al)
+ , PositionInAlignment(-1)
+ , IsCurrentDeletion(false)
+ , IsNextDeletion(false)
+ , IsNextInsertion(false)
+ , DeletionLength(0)
+ , InsertionLength(0)
+ , IsSegmentBegin(false)
+ , IsSegmentEnd(false)
+ { }
+};
+
+// contains all data at a position
+struct PileupPosition {
+
+ // data members
+ int RefId;
+ int Position;
+ std::vector<PileupAlignment> PileupAlignments;
+
+ // ctor
+ PileupPosition(const int& refId = 0,
+ const int& position = 0,
+ const std::vector<PileupAlignment>& alignments = std::vector<PileupAlignment>())
+ : RefId(refId)
+ , Position(position)
+ , PileupAlignments(alignments)
+ { }
+};
+
+class PileupVisitor {
+
+ public:
+ PileupVisitor(void) { }
+ virtual ~PileupVisitor(void) { }
+
+ public:
+ virtual void Visit(const PileupPosition& pileupData) =0;
+};
+
+class PileupEngine {
+
+ public:
+ PileupEngine(void);
+ ~PileupEngine(void);
+
+ public:
+ bool AddAlignment(const BamAlignment& al);
+ void AddVisitor(PileupVisitor* visitor);
+ void Flush(void);
+
+ private:
+ struct PileupEnginePrivate;
+ PileupEnginePrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_PILEUP_ENGINE_H
\ No newline at end of file