// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
namespace BamTools {
- // ---------------------------------------------
- // ConvertTool constants
-
- // supported conversion format command-line names
- static const string FORMAT_BED = "bed";
- static const string FORMAT_FASTA = "fasta";
- static const string FORMAT_FASTQ = "fastq";
- static const string FORMAT_JSON = "json";
- static const string FORMAT_SAM = "sam";
- static const string FORMAT_PILEUP = "pileup";
- static const string FORMAT_YAML = "yaml";
-
- // other constants
- static const unsigned int FASTA_LINE_MAX = 50;
-
- // ---------------------------------------------
- // ConvertPileupFormatVisitor declaration
-
- class ConvertPileupFormatVisitor : public PileupVisitor {
-
- // ctor & dtor
- public:
- ConvertPileupFormatVisitor(const RefVector& references,
- const string& fastaFilename,
- const bool isPrintingMapQualities,
- ostream* out);
- ~ConvertPileupFormatVisitor(void);
-
- // PileupVisitor interface implementation
- public:
- void Visit(const PileupPosition& pileupData);
-
- // data members
- private:
- Fasta m_fasta;
- bool m_hasFasta;
- bool m_isPrintingMapQualities;
- ostream* m_out;
- RefVector m_references;
- };
+// ---------------------------------------------
+// ConvertTool constants
+
+// supported conversion format command-line names
+static const string FORMAT_BED = "bed";
+static const string FORMAT_FASTA = "fasta";
+static const string FORMAT_FASTQ = "fastq";
+static const string FORMAT_JSON = "json";
+static const string FORMAT_SAM = "sam";
+static const string FORMAT_PILEUP = "pileup";
+static const string FORMAT_YAML = "yaml";
+
+// other constants
+static const unsigned int FASTA_LINE_MAX = 50;
+
+// ---------------------------------------------
+// ConvertPileupFormatVisitor declaration
+
+class ConvertPileupFormatVisitor : public PileupVisitor {
+
+ // ctor & dtor
+ public:
+ ConvertPileupFormatVisitor(const RefVector& references,
+ const string& fastaFilename,
+ const bool isPrintingMapQualities,
+ ostream* out);
+ ~ConvertPileupFormatVisitor(void);
+
+ // PileupVisitor interface implementation
+ public:
+ void Visit(const PileupPosition& pileupData);
+
+ // data members
+ private:
+ Fasta m_fasta;
+ bool m_hasFasta;
+ bool m_isPrintingMapQualities;
+ ostream* m_out;
+ RefVector m_references;
+};
} // namespace BamTools
struct ConvertTool::ConvertSettings {
- // flags
+ // flag
bool HasInput;
bool HasOutput;
bool HasFormat;
// ctor & dtor
public:
- ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
- ~ConvertToolPrivate(void);
+ ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
+ : m_settings(settings)
+ , m_out(cout.rdbuf())
+ { }
+
+ ~ConvertToolPrivate(void) { }
// interface
public:
ostream m_out;
};
-ConvertTool::ConvertToolPrivate::ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
- : m_settings(settings)
- , m_out(cout.rdbuf()) // default output to cout
-{ }
-
-ConvertTool::ConvertToolPrivate::~ConvertToolPrivate(void) { }
-
bool ConvertTool::ConvertToolPrivate::Run(void) {
// ------------------------------------
}
ConvertTool::~ConvertTool(void) {
+
delete m_settings;
m_settings = 0;
// parse command line arguments
Options::Parse(argc, argv, 1);
- // run internal ConvertTool implementation, return success/fail
+ // initialize ConvertTool with settings
m_impl = new ConvertToolPrivate(m_settings);
+ // run ConvertTool, return success/fail
if ( m_impl->Run() )
return 0;
else
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 23 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Prints alignment count for BAM file(s)
// ***************************************************************************
};
// ---------------------------------------------
-// CountTool implementation
+// CountToolPrivate implementation
-CountTool::CountTool(void)
- : AbstractTool()
- , m_settings(new CountSettings)
-{
- // set program details
- Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
-
- // set up options
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
-}
+struct CountTool::CountToolPrivate {
-CountTool::~CountTool(void) {
- delete m_settings;
- m_settings = 0;
-}
+ // ctor & dtro
+ public:
+ CountToolPrivate(CountTool::CountSettings* settings)
+ : m_settings(settings)
+ { }
-int CountTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
+ ~CountToolPrivate(void) { }
-int CountTool::Run(int argc, char* argv[]) {
+ // interface
+ public:
+ bool Run(void);
- // parse command line arguments
- Options::Parse(argc, argv, 1);
+ // data members
+ private:
+ CountTool::CountSettings* m_settings;
+};
+
+bool CountTool::CountToolPrivate::Run(void) {
// if no '-in' args supplied, default to stdin
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput )
m_settings->InputFiles.push_back(Options::StandardIn());
-
+
// open reader without index
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
- return 1;
+ return false;
}
// alignment counter
BamAlignment al;
int alignmentCount(0);
-
- // if no region specified, count entire file
+
+ // if no region specified, count entire file
if ( !m_settings->HasRegion ) {
while ( reader.GetNextAlignmentCore(al) )
++alignmentCount;
}
-
+
// otherwise attempt to use region as constraint
else {
-
+
// if region string parses OK
BamRegion region;
if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
// if index data available for all BAM files, we can use SetRegion
if ( reader.IsIndexLoaded() ) {
-
+
// attempt to set region on reader
if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
reader.Close();
- return 1;
- }
-
+ return false;
+ }
+
// everything checks out, just iterate through specified region, counting alignments
while ( reader.GetNextAlignmentCore(al) )
++alignmentCount;
- }
-
+ }
+
// no index data available, we have to iterate through until we
// find overlapping alignments
else {
while ( reader.GetNextAlignmentCore(al) ) {
if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
- (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
+ (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
{
++alignmentCount;
}
}
}
- }
-
+ }
+
// error parsing REGION string
else {
cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
<< endl;
reader.Close();
- return 1;
+ return false;
}
}
-
- // print results
+
+ // print results
cout << alignmentCount << endl;
-
+
// clean up & exit
reader.Close();
+ return true;
+}
+
+// ---------------------------------------------
+// CountTool implementation
+
+CountTool::CountTool(void)
+ : AbstractTool()
+ , m_settings(new CountSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+}
+
+CountTool::~CountTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int CountTool::Help(void) {
+ Options::DisplayHelp();
return 0;
+}
+
+int CountTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize CountTool with settings
+ m_impl = new CountToolPrivate(m_settings);
+
+ // run CountTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Prints alignment count for BAM file
-//
-// ** Expand to multiple??
-//
+// Prints alignment count for BAM file(s)
// ***************************************************************************
#ifndef BAMTOOLS_COUNT_H
private:
struct CountSettings;
CountSettings* m_settings;
+
+ struct CountToolPrivate;
+ CountToolPrivate* m_impl;
};
} // namespace BamTools
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Prints coverage data for a single BAM file
// ***************************************************************************
// ctor & dtor
public:
- CoverageToolPrivate(CoverageTool::CoverageSettings* settings);
- ~CoverageToolPrivate(void);
+ CoverageToolPrivate(CoverageTool::CoverageSettings* settings)
+ : m_settings(settings)
+ , m_out(cout.rdbuf())
+ { }
+
+ ~CoverageToolPrivate(void) { }
// interface
public:
RefVector m_references;
};
-CoverageTool::CoverageToolPrivate::CoverageToolPrivate(CoverageTool::CoverageSettings* settings)
- : m_settings(settings)
- , m_out(cout.rdbuf()) // default output to cout
-{ }
-
-CoverageTool::CoverageToolPrivate::~CoverageToolPrivate(void) { }
-
bool CoverageTool::CoverageToolPrivate::Run(void) {
// if output filename given
}
CoverageTool::~CoverageTool(void) {
+
delete m_settings;
m_settings = 0;
// parse command line arguments
Options::Parse(argc, argv, 1);
- // run internal ConvertTool implementation, return success/fail
+ // initialize CoverageTool with settings
m_impl = new CoverageToolPrivate(m_settings);
+ // run CoverageTool, return success/fail
if ( m_impl->Run() )
return 0;
else
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Filters BAM file(s) according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria
// ***************************************************************************
#include "bamtools_filter.h"
} // namespace BamTools
-// ---------------------------------------------
-// FilterToolPrivate declaration
-
-class FilterTool::FilterToolPrivate {
-
- // ctor & dtor
- public:
- FilterToolPrivate(FilterTool::FilterSettings* settings);
- ~FilterToolPrivate(void);
-
- // 'public' interface
- public:
- bool Run(void);
-
- // internal methods
- private:
- bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
- bool CheckAlignment(const BamAlignment& al);
- const string GetScriptContents(void);
- void InitProperties(void);
- bool ParseCommandLine(void);
- bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
- bool ParseScript(void);
- bool SetupFilters(void);
-
- // data members
- private:
- vector<string> m_propertyNames;
- FilterTool::FilterSettings* m_settings;
- FilterEngine<BamAlignmentChecker> m_filterEngine;
-};
-
// ---------------------------------------------
// FilterSettings implementation
// ----------------------------------
// IO opts
-
+
// flags
bool HasInputBamFilename;
bool HasOutputBamFilename;
bool HasRegion;
bool HasScriptFilename;
bool IsForceCompression;
-
+
// filenames
vector<string> InputFiles;
string OutputFilename;
string Region;
string ScriptFilename;
-
+
// -----------------------------------
// General filter opts
-
+
// flags
bool HasAlignmentFlagFilter;
bool HasInsertSizeFilter;
// -----------------------------------
// AlignmentFlag filter opts
-
+
// flags
bool HasIsDuplicateFilter;
bool HasIsFailedQCFilter;
bool HasIsProperPairFilter;
bool HasIsReverseStrandFilter;
bool HasIsSecondMateFilter;
-
+
// filters
string IsDuplicateFilter;
string IsFailedQCFilter;
string IsProperPairFilter;
string IsReverseStrandFilter;
string IsSecondMateFilter;
-
+
// ---------------------------------
// constructor
-
+
FilterSettings(void)
: HasInputBamFilename(false)
, HasOutputBamFilename(false)
, IsReverseStrandFilter(TRUE_STR)
, IsSecondMateFilter(TRUE_STR)
{ }
-};
+};
// ---------------------------------------------
-// FilterTool implementation
-
-FilterTool::FilterTool(void)
- : AbstractTool()
- , m_settings(new FilterSettings)
- , m_impl(0)
-{
- // set program details
- Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
-
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
- Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
- Options::AddValueOption("-script", "filename", "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
- Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
-
- OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
- Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
- Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
- Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
- Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
- Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
- Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts);
-
- OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
- Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
- Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR);
-}
-
-FilterTool::~FilterTool(void) {
- delete m_settings;
- m_settings = 0;
-
- delete m_impl;
- m_impl = 0;
-}
-
-int FilterTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
+// FilterToolPrivate declaration
-int FilterTool::Run(int argc, char* argv[]) {
-
- // parse command line arguments
- Options::Parse(argc, argv, 1);
-
- // run internal FilterTool implementation, return success/fail
- m_impl = new FilterToolPrivate(m_settings);
-
- if ( m_impl->Run() ) return 0;
- else return 1;
-}
+class FilterTool::FilterToolPrivate {
+
+ // ctor & dtor
+ public:
+ FilterToolPrivate(FilterTool::FilterSettings* settings);
+ ~FilterToolPrivate(void);
+
+ // 'public' interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+ bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
+ bool CheckAlignment(const BamAlignment& al);
+ const string GetScriptContents(void);
+ void InitProperties(void);
+ bool ParseCommandLine(void);
+ bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
+ bool ParseScript(void);
+ bool SetupFilters(void);
+
+ // data members
+ private:
+ vector<string> m_propertyNames;
+ FilterTool::FilterSettings* m_settings;
+ FilterEngine<BamAlignmentChecker> m_filterEngine;
+};
// ---------------------------------------------
// FilterToolPrivate implementation
// otherwise check command line for filters
else return ParseCommandLine();
}
+
+// ---------------------------------------------
+// FilterTool implementation
+
+FilterTool::FilterTool(void)
+ : AbstractTool()
+ , m_settings(new FilterSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
+
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+ Options::AddValueOption("-script", "filename", "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
+ Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
+
+ OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
+ Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
+ Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
+ Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
+ Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
+ Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
+ Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts);
+
+ OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
+ Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
+ Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR);
+}
+
+FilterTool::~FilterTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int FilterTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int FilterTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize FilterTool with settings
+ m_impl = new FilterToolPrivate(m_settings);
+
+ // run FilterTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
+}
// ---------------------------------------------------------------------------
// Last modified: 28 August 2010
// ---------------------------------------------------------------------------
-// Filters a single BAM file (or filters multiple BAM files and merges)
-// according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria
// ***************************************************************************
#ifndef BAMTOOLS_FILTER_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Prints the SAM-style header from a single BAM file ( or merged header from
// multiple BAM files) to stdout
{ }
};
+struct HeaderTool::HeaderToolPrivate {
+
+ // ctor & dtor
+ public:
+ HeaderToolPrivate(HeaderTool::HeaderSettings* settings)
+ : m_settings(settings)
+ { }
+
+ ~HeaderToolPrivate(void) { }
+
+ // interface
+ public:
+ bool Run(void);
+
+ // data members
+ private:
+ HeaderTool::HeaderSettings* m_settings;
+};
+
+bool HeaderTool::HeaderToolPrivate::Run(void) {
+
+ // set to default input if none provided
+ if ( !m_settings->HasInputBamFilename )
+ m_settings->InputFiles.push_back(Options::StandardIn());
+
+ // attemp to open BAM files
+ BamMultiReader reader;
+ if ( !reader.Open(m_settings->InputFiles) ) {
+ cerr << "bamtools header ERROR: could not open BAM file(s) for reading... Aborting." << endl;
+ return false;
+ }
+
+ // dump (merged) header contents to stdout
+ cout << reader.GetHeaderText() << endl;
+
+ // clean up & exit
+ reader.Close();
+ return true;
+}
+
// ---------------------------------------------
// HeaderTool implementation
HeaderTool::HeaderTool(void)
: AbstractTool()
, m_settings(new HeaderSettings)
+ , m_impl(0)
{
// set program details
Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in <filename> -in <filename> ...] ");
}
HeaderTool::~HeaderTool(void) {
+
delete m_settings;
m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
}
int HeaderTool::Help(void) {
// parse command line arguments
Options::Parse(argc, argv, 1);
- // set to default input if none provided
- if ( !m_settings->HasInputBamFilename )
- m_settings->InputFiles.push_back(Options::StandardIn());
-
- // attemp to open BAM files
- BamMultiReader reader;
- if ( !reader.Open(m_settings->InputFiles) ) {
- cerr << "bamtools header ERROR: could not open BAM file(s) for reading... Aborting." << endl;
- return 1;
- }
-
- // dump (merged) header contents to stdout
- cout << reader.GetHeaderText() << endl;
+ // initialize HeaderTool with settings
+ m_impl = new HeaderToolPrivate(m_settings);
- // clean up & exit
- reader.Close();
- return 0;
+ // run HeaderTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Prints the SAM-style header from a single BAM file ( or merged header from
// multiple BAM files) to stdout
private:
struct HeaderSettings;
HeaderSettings* m_settings;
+
+ struct HeaderToolPrivate;
+ HeaderToolPrivate* m_impl;
};
} // namespace BamTools
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Creates a BAM index file.
+// Creates a BAM index file
// ***************************************************************************
#include "bamtools_index.h"
{ }
};
+// ---------------------------------------------
+// IndexToolPrivate implementation
+
+struct IndexTool::IndexToolPrivate {
+
+ // ctor & dtor
+ public:
+ IndexToolPrivate(IndexTool::IndexSettings* settings)
+ : m_settings(settings)
+ { }
+
+ ~IndexToolPrivate(void) { }
+
+ // interface
+ public:
+ bool Run(void);
+
+ // data members
+ private:
+ IndexTool::IndexSettings* m_settings;
+};
+
+bool IndexTool::IndexToolPrivate::Run(void) {
+
+ // open our BAM reader
+ BamReader reader;
+ if ( !reader.Open(m_settings->InputBamFilename) ) {
+ cerr << "bamtools index ERROR: could not open BAM file: "
+ << m_settings->InputBamFilename << endl;
+ return false;
+ }
+
+ // create index for BAM file
+ const BamIndex::IndexType type = ( m_settings->IsUsingBamtoolsIndex ? BamIndex::BAMTOOLS
+ : BamIndex::STANDARD );
+ reader.CreateIndex(type);
+
+ // clean & exit
+ reader.Close();
+ return true;
+}
+
// ---------------------------------------------
// IndexTool implementation
IndexTool::IndexTool(void)
: AbstractTool()
, m_settings(new IndexSettings)
+ , m_impl(0)
{
// set program details
Options::SetProgramInfo("bamtools index", "creates index for BAM file", "[-in <filename>] [-bti]");
}
IndexTool::~IndexTool(void) {
+
delete m_settings;
m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
}
int IndexTool::Help(void) {
// parse command line arguments
Options::Parse(argc, argv, 1);
- // open our BAM reader
- BamReader reader;
- if ( !reader.Open(m_settings->InputBamFilename) ) {
- cerr << "bamtools index ERROR: could not open BAM file: " << m_settings->InputBamFilename << endl;
+ // initialize IndexTool with settings
+ m_impl = new IndexToolPrivate(m_settings);
+
+ // run IndexTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
return 1;
- }
-
- // create index for BAM file
- const BamIndex::IndexType type = ( m_settings->IsUsingBamtoolsIndex ? BamIndex::BAMTOOLS
- : BamIndex::STANDARD );
- reader.CreateIndex(type);
-
- // clean & exit
- reader.Close();
- return 0;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 18 January 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Creates a BAM index file.
+// Creates a BAM index file
// ***************************************************************************
#ifndef BAMTOOLS_INDEX_H
private:
struct IndexSettings;
IndexSettings* m_settings;
+
+ struct IndexToolPrivate;
+ IndexToolPrivate* m_impl;
};
} // namespace BamTools
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Merges multiple BAM files into one.
+// Merges multiple BAM files into one
// ***************************************************************************
#include "bamtools_merge.h"
};
// ---------------------------------------------
-// MergeTool implementation
+// MergeToolPrivate implementation
-MergeTool::MergeTool(void)
- : AbstractTool()
- , m_settings(new MergeSettings)
-{
- // set program details
- Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>]");
-
- // set up options
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts);
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts);
- Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
- Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
-}
+struct MergeTool::MergeToolPrivate {
-MergeTool::~MergeTool(void) {
- delete m_settings;
- m_settings = 0;
-}
+ // ctor & dtor
+ public:
+ MergeToolPrivate(MergeTool::MergeSettings* settings)
+ : m_settings(settings)
+ { }
-int MergeTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
+ ~MergeToolPrivate(void) { }
-int MergeTool::Run(int argc, char* argv[]) {
-
- // parse command line arguments
- Options::Parse(argc, argv, 1);
-
- // set to default input if none provided
- if ( !m_settings->HasInputBamFilename )
+ // interface
+ public:
+ bool Run(void);
+
+ // data members
+ private:
+ MergeTool::MergeSettings* m_settings;
+};
+
+bool MergeTool::MergeToolPrivate::Run(void) {
+
+ // set to default input if none provided
+ if ( !m_settings->HasInputBamFilename )
m_settings->InputFiles.push_back(Options::StandardIn());
-
+
// opens the BAM files (by default without checking for indexes)
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl;
- return 1;
+ return false;
}
-
+
// retrieve header & reference dictionary info
std::string mergedHeader = reader.GetHeaderText();
RefVector references = reader.GetReferenceData();
// determine compression mode for BamWriter
bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
- !m_settings->IsForceCompression );
+ !m_settings->IsForceCompression );
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
BamWriter writer;
writer.SetCompressionMode(compressionMode);
if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) {
- cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
+ cerr << "bamtools merge ERROR: could not open "
+ << m_settings->OutputFilename << " for writing." << endl;
reader.Close();
return false;
}
while ( reader.GetNextAlignmentCore(al) )
writer.SaveAlignment(al);
}
-
+
// otherwise attempt to use region as constraint
else {
-
+
// if region string parses OK
BamRegion region;
if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
if ( reader.HasIndexes() ) {
// attempt to use SetRegion(), if failed report error
- if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
- cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl;
+ if ( !reader.SetRegion(region.LeftRefID,
+ region.LeftPosition,
+ region.RightRefID,
+ region.RightPosition) )
+ {
+ cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range"
+ << endl;
reader.Close();
- return 1;
- }
-
+ return false;
+ }
+
// everything checks out, just iterate through specified region, storing alignments
BamAlignment al;
while ( reader.GetNextAlignmentCore(al) )
writer.SaveAlignment(al);
- }
-
+ }
+
// no index data available, we have to iterate through until we
// find overlapping alignments
else {
BamAlignment al;
while ( reader.GetNextAlignmentCore(al) ) {
if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
- (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
+ (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
{
writer.SaveAlignment(al);
}
}
}
- }
-
+ }
+
// error parsing REGION string
else {
cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl;
<< endl;
reader.Close();
writer.Close();
- return 1;
+ return false;
}
}
-
+
// clean & exit
reader.Close();
writer.Close();
- return 0;
+ return true;
+}
+
+// ---------------------------------------------
+// MergeTool implementation
+
+MergeTool::MergeTool(void)
+ : AbstractTool()
+ , m_settings(new MergeSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts);
+ Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
+ Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+}
+
+MergeTool::~MergeTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int MergeTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int MergeTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize MergeTool with settings
+ m_impl = new MergeToolPrivate(m_settings);
+
+ // run MergeTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Merges multiple BAM files into one
// ***************************************************************************
private:
struct MergeSettings;
MergeSettings* m_settings;
+
+ struct MergeToolPrivate;
+ MergeToolPrivate* m_impl;
};
} // namespace BamTools
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 7 April 2011 (DB)
// ---------------------------------------------------------------------------
-// Grab a random subset of alignments.
+// Grab a random subset of alignments (testing tool)
// ***************************************************************************
#include "bamtools_random.h"
};
// ---------------------------------------------
-// RandomTool implementation
+// RandomToolPrivate implementation
-RandomTool::RandomTool(void)
- : AbstractTool()
- , m_settings(new RandomSettings)
-{
- // set program details
- Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in <filename> -in <filename> ...] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
-
- // set up options
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
- Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
- Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
-
- OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings");
- Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
-}
+struct RandomTool::RandomToolPrivate {
-RandomTool::~RandomTool(void) {
- delete m_settings;
- m_settings = 0;
-}
+ // ctor & dtor
+ public:
+ RandomToolPrivate(RandomTool::RandomSettings* settings)
+ : m_settings(settings)
+ { }
-int RandomTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
+ ~RandomToolPrivate(void) { }
-int RandomTool::Run(int argc, char* argv[]) {
+ // interface
+ public:
+ bool Run(void);
- // parse command line arguments
- Options::Parse(argc, argv, 1);
+ // data members
+ private:
+ RandomTool::RandomSettings* m_settings;
+};
+
+bool RandomTool::RandomToolPrivate::Run(void) {
// set to default stdin if no input files provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput )
m_settings->InputFiles.push_back(Options::StandardIn());
-
+
// open our reader
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl;
- return 1;
+ return false;
}
-
+
// look up index files for all BAM files
reader.LocateIndexes();
if ( !reader.HasIndexes() ) {
cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl;
reader.Close();
- return 1;
+ return false;
}
-
- // get BamReader metadata
+
+ // get BamReader metadata
const string headerText = reader.GetHeaderText();
const RefVector references = reader.GetReferenceData();
if ( references.empty() ) {
cerr << "bamtools random ERROR: no reference data available... Aborting." << endl;
reader.Close();
- return 1;
+ return false;
}
-
+
// determine compression mode for BamWriter
bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
!m_settings->IsForceCompression );
BamWriter writer;
writer.SetCompressionMode(compressionMode);
if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) {
- cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename << " for writing... Aborting." << endl;
+ cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename
+ << " for writing... Aborting." << endl;
reader.Close();
- return 1;
+ return false;
}
- // if user specified a REGION constraint, attempt to parse REGION string
- BamRegion region;
+ // if user specified a REGION constraint, attempt to parse REGION string
+ BamRegion region;
if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl;
cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
<< endl;
reader.Close();
writer.Close();
- return 1;
+ return false;
}
-
+
// seed our random number generator
srand( time(NULL) );
-
- // grab random alignments
+
+ // grab random alignments
BamAlignment al;
unsigned int i = 0;
while ( i < m_settings->AlignmentCount ) {
-
+
int randomRefId = 0;
int randomPosition = 0;
-
+
// use REGION constraints to select random refId & position
if ( m_settings->HasRegion ) {
-
+
// select a random refId
randomRefId = getRandomInt(region.LeftRefID, region.RightRefID);
-
+
// select a random position based on randomRefId
- const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 );
- const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) );
+ const int lowerBoundPosition = ( (randomRefId == region.LeftRefID)
+ ? region.LeftPosition
+ : 0 );
+ const int upperBoundPosition = ( (randomRefId == region.RightRefID)
+ ? region.RightPosition
+ : (references.at(randomRefId).RefLength - 1) );
randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
- }
-
+ }
+
// otherwise select from all possible random refId & position
else {
-
+
// select random refId
randomRefId = getRandomInt(0, (int)references.size() - 1);
-
+
// select random position based on randomRefId
const int lowerBoundPosition = 0;
const int upperBoundPosition = references.at(randomRefId).RefLength - 1;
- randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
+ randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
}
-
+
// if jump & read successful, save first alignment that overlaps random refId & position
if ( reader.Jump(randomRefId, randomPosition) ) {
while ( reader.GetNextAlignmentCore(al) ) {
}
}
}
-
+
// cleanup & exit
reader.Close();
writer.Close();
+ return true;
+}
+
+// ---------------------------------------------
+// RandomTool implementation
+
+RandomTool::RandomTool(void)
+ : AbstractTool()
+ , m_settings(new RandomSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in <filename> -in <filename> ...] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
+ Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+
+ OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings");
+ Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
+}
+
+RandomTool::~RandomTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int RandomTool::Help(void) {
+ Options::DisplayHelp();
return 0;
+}
+
+int RandomTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize RandomTool with settings
+ m_impl = new RandomToolPrivate(m_settings);
+
+ // run RandomTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 20 July 2010 (DB)
+// Last modified: 7 April 2010 (DB)
// ---------------------------------------------------------------------------
-// Grab a random subset of alignments.
+// Grab a random subset of alignments (testing tool)
// ***************************************************************************
#ifndef BAMTOOLS_RANDOM_H
private:
struct RandomSettings;
RandomSettings* m_settings;
+
+ struct RandomToolPrivate;
+ RandomToolPrivate* m_impl;
};
} // namespace BamTools
// ***************************************************************************
-// bamtools_cpp (c) 2010 Derek Barnett, Alistair Ward
+// bamtools_revert.cpp (c) 2010 Derek Barnett, Alistair Ward
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Prints general alignment statistics for BAM file(s).
+// Removes duplicate marks and restores original base qualities
// ***************************************************************************
#include "bamtools_revert.h"
#include <string>
using namespace std;
+namespace BamTools {
+
+static const string OQ_TAG = "OQ";
+
+} // namespace BamTools;
+
// ---------------------------------------------
// RevertSettings implementation
// ctor & dtor
public:
- RevertToolPrivate(RevertTool::RevertSettings* settings);
- ~RevertToolPrivate(void);
+ RevertToolPrivate(RevertTool::RevertSettings* settings)
+ : m_settings(settings)
+ { }
+ ~RevertToolPrivate(void) { }
// 'public' interface
public:
// data members
private:
RevertTool::RevertSettings* m_settings;
- string m_OQ;
};
-RevertTool::RevertToolPrivate::RevertToolPrivate(RevertTool::RevertSettings* settings)
- : m_settings(settings)
- , m_OQ("OQ")
-{ }
-
-RevertTool::RevertToolPrivate::~RevertToolPrivate(void) { }
-
-// reverts a BAM alignment
-// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag
+// 'reverts' a BAM alignment
+// default behavior (for now) is:
+// 1 - replace Qualities with OQ contents
+// 2 - clear IsDuplicate flag
// can override default behavior using command line options
void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) {
- // replace Qualities with OQ, if requested
+ // replace Qualities with OQ contents, if requested
if ( !m_settings->IsKeepQualities ) {
string originalQualities;
- if ( al.GetTag(m_OQ, originalQualities) ) {
+ if ( al.GetTag(OQ_TAG, originalQualities) ) {
al.Qualities = originalQualities;
- al.RemoveTag(m_OQ);
+ al.RemoveTag(OQ_TAG);
}
}
}
RevertTool::~RevertTool(void) {
+
delete m_settings;
m_settings = 0;
// parse command line arguments
Options::Parse(argc, argv, 1);
- // run internal RevertTool implementation, return success/fail
+ // intialize RevertTool with settings
m_impl = new RevertToolPrivate(m_settings);
- if ( m_impl->Run() ) return 0;
- else return 1;
+ // run RevertTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// ***************************************************************************
-// bamtools_stats.h (c) 2010 Derek Barnett, Alistair Ward
+// bamtools_revert.h (c) 2010 Derek Barnett, Alistair Ward
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 5 December 2010
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-//
+// Removes duplicate marks and restores original base qualities
// ***************************************************************************
#ifndef BAMTOOLS_REVERT_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 7 April 2011 (DB)
// ---------------------------------------------------------------------------
-// Sorts an input BAM file (default by position) and stores in a new BAM file.
+// Sorts an input BAM file
// ***************************************************************************
#include "bamtools_sort.h"
namespace BamTools {
- // defaults
- //
- // ** These defaults should be tweaked & 'optimized' per testing ** //
- //
- // I say 'optimized' because each system will naturally perform
- // differently. We will attempt to determine a sensible
- // compromise that should perform well on average.
- const unsigned int SORT_DEFAULT_MAX_BUFFER_COUNT = 500000; // max numberOfAlignments for buffer
- const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024; // Mb
-
- // -----------------------------------
- // comparison objects (for sorting)
-
- struct SortLessThanPosition {
- bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
- if ( lhs.RefID != rhs.RefID )
- return lhs.RefID < rhs.RefID;
- else
- return lhs.Position < rhs.Position;
- }
- };
-
- struct SortLessThanName {
- bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
- return lhs.Name < rhs.Name;
- }
- };
-
-} // namespace BamTools
+// defaults
+//
+// ** These defaults should be tweaked & 'optimized' per testing ** //
+//
+// I say 'optimized' because each system will naturally perform
+// differently. We will attempt to determine a sensible
+// compromise that should perform well on average.
+const unsigned int SORT_DEFAULT_MAX_BUFFER_COUNT = 500000; // max numberOfAlignments for buffer
+const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024; // Mb
+
+// -----------------------------------
+// comparison objects (for sorting)
+
+struct SortLessThanPosition {
+ bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
+ if ( lhs.RefID != rhs.RefID )
+ return lhs.RefID < rhs.RefID;
+ else
+ return lhs.Position < rhs.Position;
+ }
+};
-// ---------------------------------------------
-// SortToolPrivate declaration
-class SortTool::SortToolPrivate {
-
- // ctor & dtor
- public:
- SortToolPrivate(SortTool::SortSettings* settings);
- ~SortToolPrivate(void);
-
- // 'public' interface
- public:
- bool Run(void);
-
- // internal methods
- private:
- void ClearBuffer(vector<BamAlignment>& buffer);
- bool GenerateSortedRuns(void);
- bool HandleBufferContents(vector<BamAlignment>& buffer);
- bool MergeSortedRuns(void);
- bool WriteTempFile(const vector<BamAlignment>& buffer, const string& tempFilename);
- void SortBuffer(vector<BamAlignment>& buffer);
-
- // data members
- private:
- SortTool::SortSettings* m_settings;
- string m_tempFilenameStub;
- int m_numberOfRuns;
- string m_headerText;
- RefVector m_references;
- vector<string> m_tempFilenames;
+struct SortLessThanName {
+ bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
+ return lhs.Name < rhs.Name;
+ }
};
+
+} // namespace BamTools
// ---------------------------------------------
// SortSettings implementation
// filenames
string InputBamFilename;
string OutputBamFilename;
-
+
// parameters
unsigned int MaxBufferCount;
unsigned int MaxBufferMemory;
-
+
// constructor
SortSettings(void)
: HasInputBamFilename(false)
, OutputBamFilename(Options::StandardOut())
, MaxBufferCount(SORT_DEFAULT_MAX_BUFFER_COUNT)
, MaxBufferMemory(SORT_DEFAULT_MAX_BUFFER_MEMORY)
- { }
-};
+ { }
+};
// ---------------------------------------------
-// SortTool implementation
-
-SortTool::SortTool(void)
- : AbstractTool()
- , m_settings(new SortSettings)
- , m_impl(0)
-{
- // set program details
- Options::SetProgramInfo("bamtools sort", "sorts a BAM file", "[-in <filename>] [-out <filename>] [sortOptions]");
-
- // set up options
- OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut());
-
- OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods");
- Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts);
-
- OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings");
- Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT);
- Options::AddValueOption("-mem", "Mb", "max memory to use", "", m_settings->HasMaxBufferMemory, m_settings->MaxBufferMemory, MemOpts, SORT_DEFAULT_MAX_BUFFER_MEMORY);
-}
-
-SortTool::~SortTool(void) {
-
- delete m_settings;
- m_settings = 0;
-
- delete m_impl;
- m_impl = 0;
-}
-
-int SortTool::Help(void) {
- Options::DisplayHelp();
- return 0;
-}
+// SortToolPrivate implementation
-int SortTool::Run(int argc, char* argv[]) {
-
- // parse command line arguments
- Options::Parse(argc, argv, 1);
-
- // run internal SortTool implementation, return success/fail
- m_impl = new SortToolPrivate(m_settings);
-
- if ( m_impl->Run() ) return 0;
- else return 1;
-}
+class SortTool::SortToolPrivate {
+
+ // ctor & dtor
+ public:
+ SortToolPrivate(SortTool::SortSettings* settings);
+ ~SortToolPrivate(void) { }
+
+ // 'public' interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+ void ClearBuffer(vector<BamAlignment>& buffer);
+ bool GenerateSortedRuns(void);
+ bool HandleBufferContents(vector<BamAlignment>& buffer);
+ bool MergeSortedRuns(void);
+ bool WriteTempFile(const vector<BamAlignment>& buffer, const string& tempFilename);
+ void SortBuffer(vector<BamAlignment>& buffer);
+
+ // data members
+ private:
+ SortTool::SortSettings* m_settings;
+ string m_tempFilenameStub;
+ int m_numberOfRuns;
+ string m_headerText;
+ RefVector m_references;
+ vector<string> m_tempFilenames;
+};
-// ---------------------------------------------
-// SortToolPrivate implementation
// constructor
SortTool::SortToolPrivate::SortToolPrivate(SortTool::SortSettings* settings)
}
}
-// destructor
-SortTool::SortToolPrivate::~SortToolPrivate(void) { }
-
// generates mutiple sorted temp BAM files from single unsorted BAM file
bool SortTool::SortToolPrivate::GenerateSortedRuns(void) {
tempWriter.Close();
return true;
}
+
+// ---------------------------------------------
+// SortTool implementation
+
+SortTool::SortTool(void)
+ : AbstractTool()
+ , m_settings(new SortSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools sort", "sorts a BAM file", "[-in <filename>] [-out <filename>] [sortOptions]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut());
+
+ OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods");
+ Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts);
+
+ OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings");
+ Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT);
+ Options::AddValueOption("-mem", "Mb", "max memory to use", "", m_settings->HasMaxBufferMemory, m_settings->MaxBufferMemory, MemOpts, SORT_DEFAULT_MAX_BUFFER_MEMORY);
+}
+
+SortTool::~SortTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int SortTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int SortTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize SortTool with settings
+ m_impl = new SortToolPrivate(m_settings);
+
+ // run SortTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 June 2010 (DB)
+// Last modified: 7 April 2011 (DB)
// ---------------------------------------------------------------------------
-// Sorts a BAM file.
+// Sorts a BAM file
// ***************************************************************************
#ifndef BAMTOOLS_SORT_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 7 April 2011 (DB)
// ---------------------------------------------------------------------------
// Splits a BAM file on user-specified property, creating a new BAM output
-// file for each value found.
+// file for each value found
// ***************************************************************************
#include "bamtools_split.h"
// ctor & dtor
public:
- SplitToolPrivate(SplitTool::SplitSettings* settings);
- ~SplitToolPrivate(void);
+ SplitToolPrivate(SplitTool::SplitSettings* settings)
+ : m_settings(settings)
+ { }
+
+ ~SplitToolPrivate(void) {
+ m_reader.Close();
+ }
// 'public' interface
public:
RefVector m_references;
};
-// constructor
-SplitTool::SplitToolPrivate::SplitToolPrivate(SplitTool::SplitSettings* settings)
- : m_settings(settings)
-{ }
-
-// destructor
-SplitTool::SplitToolPrivate::~SplitToolPrivate(void) {
- m_reader.Close();
-}
-
void SplitTool::SplitToolPrivate::DetermineOutputFilenameStub(void) {
// if user supplied output filename stub, use that
// --------------------------------------------------------------------------------
// template method implementation
-// N.B. - *technical note* - use of template methods defined in ".cpp" goes against normal practices
-// but works here because these are purely internal (no one can call from outside this file)
+// *Technical Note* - use of template methods declared & defined in ".cpp" file
+// goes against normal practices, but works here because these
+// are purely internal (no one can call from outside this file)
// close BamWriters & delete pointers
template<typename T>
WriterMapIterator writerEnd = writers.end();
for ( ; writerIter != writerEnd; ++writerIter ) {
BamWriter* writer = (*writerIter).second;
- if (writer == 0 ) continue;
+ if ( writer == 0 ) continue;
- // close & delete writer
+ // close BamWriter
writer->Close();
+
+ // destroy BamWriter
delete writer;
writer = 0;
}
+
+ // clear the container (destroying the items doesn't remove them)
writers.clear();
}
// parse command line arguments
Options::Parse(argc, argv, 1);
- // initialize internal implementation
+ // initialize SplitTool with settings
m_impl = new SplitToolPrivate(m_settings);
- // run tool, return success/fail
+ // run SplitTool, return success/fail
if ( m_impl->Run() )
return 0;
else
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 7 April 2011 (DB)
// ---------------------------------------------------------------------------
// Splits a BAM file on user-specified property, creating a new BAM output
-// file for each value found.
+// file for each value found
// ***************************************************************************
#ifndef BAMTOOLS_SPLIT_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
// Prints general alignment statistics for BAM file(s).
// ***************************************************************************
// ctor & dtor
public:
StatsToolPrivate(StatsTool::StatsSettings* _settings);
- ~StatsToolPrivate(void);
+ ~StatsToolPrivate(void) { }
// 'public' interface
public:
// data members
private:
- StatsTool::StatsSettings* settings;
- unsigned int numReads;
- unsigned int numPaired;
- unsigned int numProperPair;
- unsigned int numMapped;
- unsigned int numBothMatesMapped;
- unsigned int numForwardStrand;
- unsigned int numReverseStrand;
- unsigned int numFirstMate;
- unsigned int numSecondMate;
- unsigned int numSingletons;
- unsigned int numFailedQC;
- unsigned int numDuplicates;
- vector<int> insertSizes;
+ StatsTool::StatsSettings* m_settings;
+ unsigned int m_numReads;
+ unsigned int m_numPaired;
+ unsigned int m_numProperPair;
+ unsigned int m_numMapped;
+ unsigned int m_numBothMatesMapped;
+ unsigned int m_numForwardStrand;
+ unsigned int m_numReverseStrand;
+ unsigned int m_numFirstMate;
+ unsigned int m_numSecondMate;
+ unsigned int m_numSingletons;
+ unsigned int m_numFailedQC;
+ unsigned int m_numDuplicates;
+ vector<int> m_insertSizes;
};
-StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* _settings)
- : settings(_settings)
- , numReads(0)
- , numPaired(0)
- , numProperPair(0)
- , numMapped(0)
- , numBothMatesMapped(0)
- , numForwardStrand(0)
- , numReverseStrand(0)
- , numFirstMate(0)
- , numSecondMate(0)
- , numSingletons(0)
- , numFailedQC(0)
- , numDuplicates(0)
+StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* settings)
+ : m_settings(settings)
+ , m_numReads(0)
+ , m_numPaired(0)
+ , m_numProperPair(0)
+ , m_numMapped(0)
+ , m_numBothMatesMapped(0)
+ , m_numForwardStrand(0)
+ , m_numReverseStrand(0)
+ , m_numFirstMate(0)
+ , m_numSecondMate(0)
+ , m_numSingletons(0)
+ , m_numFailedQC(0)
+ , m_numDuplicates(0)
{
- insertSizes.reserve(100000);
+ m_insertSizes.reserve(100000);
}
-StatsTool::StatsToolPrivate::~StatsToolPrivate(void) { }
-
// median is of type double because in the case of even number of data elements,
// we need to return the average of middle 2 elements
bool StatsTool::StatsToolPrivate::CalculateMedian(vector<int>& data, double& median) {
cout << "Stats for BAM file(s): " << endl;
cout << "**********************************************" << endl;
cout << endl;
- cout << "Total reads: " << numReads << endl;
- cout << "Mapped reads: " << numMapped << "\t(" << ((float)numMapped/numReads)*100 << "%)" << endl;
- cout << "Forward strand: " << numForwardStrand << "\t(" << ((float)numForwardStrand/numReads)*100 << "%)" << endl;
- cout << "Reverse strand: " << numReverseStrand << "\t(" << ((float)numReverseStrand/numReads)*100 << "%)" << endl;
- cout << "Failed QC: " << numFailedQC << "\t(" << ((float)numFailedQC/numReads)*100 << "%)" << endl;
- cout << "Duplicates: " << numDuplicates << "\t(" << ((float)numDuplicates/numReads)*100 << "%)" << endl;
- cout << "Paired-end reads: " << numPaired << "\t(" << ((float)numPaired/numReads)*100 << "%)" << endl;
+ cout << "Total reads: " << m_numReads << endl;
+ cout << "Mapped reads: " << m_numMapped << "\t(" << ((float)m_numMapped/m_numReads)*100 << "%)" << endl;
+ cout << "Forward strand: " << m_numForwardStrand << "\t(" << ((float)m_numForwardStrand/m_numReads)*100 << "%)" << endl;
+ cout << "Reverse strand: " << m_numReverseStrand << "\t(" << ((float)m_numReverseStrand/m_numReads)*100 << "%)" << endl;
+ cout << "Failed QC: " << m_numFailedQC << "\t(" << ((float)m_numFailedQC/m_numReads)*100 << "%)" << endl;
+ cout << "Duplicates: " << m_numDuplicates << "\t(" << ((float)m_numDuplicates/m_numReads)*100 << "%)" << endl;
+ cout << "Paired-end reads: " << m_numPaired << "\t(" << ((float)m_numPaired/m_numReads)*100 << "%)" << endl;
- if ( numPaired != 0 ) {
- cout << "'Proper-pairs': " << numProperPair << "\t(" << ((float)numProperPair/numPaired)*100 << "%)" << endl;
- cout << "Both pairs mapped: " << numBothMatesMapped << "\t(" << ((float)numBothMatesMapped/numPaired)*100 << "%)" << endl;
- cout << "Read 1: " << numFirstMate << endl;
- cout << "Read 2: " << numSecondMate << endl;
- cout << "Singletons: " << numSingletons << "\t(" << ((float)numSingletons/numPaired)*100 << "%)" << endl;
+ if ( m_numPaired != 0 ) {
+ cout << "'Proper-pairs': " << m_numProperPair << "\t(" << ((float)m_numProperPair/m_numPaired)*100 << "%)" << endl;
+ cout << "Both pairs mapped: " << m_numBothMatesMapped << "\t(" << ((float)m_numBothMatesMapped/m_numPaired)*100 << "%)" << endl;
+ cout << "Read 1: " << m_numFirstMate << endl;
+ cout << "Read 2: " << m_numSecondMate << endl;
+ cout << "Singletons: " << m_numSingletons << "\t(" << ((float)m_numSingletons/m_numPaired)*100 << "%)" << endl;
}
- if ( settings->IsShowingInsertSizeSummary ) {
+ if ( m_settings->IsShowingInsertSizeSummary ) {
double avgInsertSize = 0.0;
- if ( !insertSizes.empty() ) {
- avgInsertSize = ( accumulate(insertSizes.begin(), insertSizes.end(), 0.0) / (double)insertSizes.size() );
+ if ( !m_insertSizes.empty() ) {
+ avgInsertSize = ( accumulate(m_insertSizes.begin(), m_insertSizes.end(), 0.0) / (double)m_insertSizes.size() );
cout << "Average insert size (absolute value): " << avgInsertSize << endl;
}
double medianInsertSize = 0.0;
- if ( CalculateMedian(insertSizes, medianInsertSize) )
+ if ( CalculateMedian(m_insertSizes, medianInsertSize) )
cout << "Median insert size (absolute value): " << medianInsertSize << endl;
}
cout << endl;
void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) {
// increment total alignment counter
- ++numReads;
+ ++m_numReads;
- // check the paired-independent flags
- if ( al.IsDuplicate() ) ++numDuplicates;
- if ( al.IsFailedQC() ) ++numFailedQC;
- if ( al.IsMapped() ) ++numMapped;
+ // incrememt counters for pairing-independent flags
+ if ( al.IsDuplicate() ) ++m_numDuplicates;
+ if ( al.IsFailedQC() ) ++m_numFailedQC;
+ if ( al.IsMapped() ) ++m_numMapped;
- // check forward/reverse strand
+ // increment strand counters
if ( al.IsReverseStrand() )
- ++numReverseStrand;
+ ++m_numReverseStrand;
else
- ++numForwardStrand;
+ ++m_numForwardStrand;
// if alignment is paired-end
if ( al.IsPaired() ) {
// increment PE counter
- ++numPaired;
+ ++m_numPaired;
// increment first mate/second mate counters
- if ( al.IsFirstMate() ) ++numFirstMate;
- if ( al.IsSecondMate() ) ++numSecondMate;
+ if ( al.IsFirstMate() ) ++m_numFirstMate;
+ if ( al.IsSecondMate() ) ++m_numSecondMate;
// if alignment is mapped, check mate status
if ( al.IsMapped() ) {
// if mate mapped
if ( al.IsMateMapped() )
- ++numBothMatesMapped;
+ ++m_numBothMatesMapped;
// else singleton
else
- ++numSingletons;
+ ++m_numSingletons;
}
// check for explicit proper pair flag
- if ( al.IsProperPair() ) ++numProperPair;
+ if ( al.IsProperPair() ) ++m_numProperPair;
// store insert size for first mate
- if ( settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) {
+ if ( m_settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) {
int insertSize = abs(al.InsertSize);
- insertSizes.push_back( insertSize );
+ m_insertSizes.push_back( insertSize );
}
}
}
bool StatsTool::StatsToolPrivate::Run() {
+ // set to default input if none provided
+ if ( !m_settings->HasInput )
+ m_settings->InputFiles.push_back(Options::StandardIn());
+
// open the BAM files
BamMultiReader reader;
- if ( !reader.Open(settings->InputFiles) ) {
+ if ( !reader.Open(m_settings->InputFiles) ) {
cerr << "bamtools stats ERROR: could not open input BAM file(s)... Aborting." << endl;
reader.Close();
return false;
}
StatsTool::~StatsTool(void) {
+
delete m_settings;
m_settings = 0;
// parse command line arguments
Options::Parse(argc, argv, 1);
-
- // set to default input if none provided
- if ( !m_settings->HasInput )
- m_settings->InputFiles.push_back(Options::StandardIn());
- // run internal StatsTool implementation, return success/fail
+ // initialize StatsTool with settings
m_impl = new StatsToolPrivate(m_settings);
- if ( m_impl->Run() ) return 0;
- else return 1;
+ // run StatsTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 1 June 2010
+// Last modified: 7 April 2011
// ---------------------------------------------------------------------------
-// Prints general statistics for a single BAM file.
-//
-// ** Expand to multiple? **
-//
+// Prints general statistics for a single BAM file
// ***************************************************************************
#ifndef BAMTOOLS_STATS_H