From a357e1a5853b5bd5a43ceb673269863afc54a08e Mon Sep 17 00:00:00 2001 From: derek Date: Tue, 7 Jun 2011 15:45:35 -0400 Subject: [PATCH] Minor cleanup to toolkit code formatting. --- src/toolkit/bamtools_convert.cpp | 103 +++++++------ src/toolkit/bamtools_count.cpp | 129 +++++++++++------ src/toolkit/bamtools_count.h | 10 +- src/toolkit/bamtools_coverage.cpp | 21 ++- src/toolkit/bamtools_filter.cpp | 214 +++++++++++++-------------- src/toolkit/bamtools_filter.h | 3 +- src/toolkit/bamtools_header.cpp | 70 ++++++--- src/toolkit/bamtools_header.h | 5 +- src/toolkit/bamtools_index.cpp | 72 +++++++-- src/toolkit/bamtools_index.h | 7 +- src/toolkit/bamtools_merge.cpp | 149 ++++++++++++------- src/toolkit/bamtools_merge.h | 5 +- src/toolkit/bamtools_random.cpp | 166 +++++++++++++-------- src/toolkit/bamtools_random.h | 7 +- src/toolkit/bamtools_revert.cpp | 48 +++--- src/toolkit/bamtools_revert.h | 6 +- src/toolkit/bamtools_sort.cpp | 233 +++++++++++++++--------------- src/toolkit/bamtools_sort.h | 4 +- src/toolkit/bamtools_split.cpp | 40 ++--- src/toolkit/bamtools_split.h | 4 +- src/toolkit/bamtools_stats.cpp | 150 +++++++++---------- src/toolkit/bamtools_stats.h | 7 +- 22 files changed, 837 insertions(+), 616 deletions(-) diff --git a/src/toolkit/bamtools_convert.cpp b/src/toolkit/bamtools_convert.cpp index cc32e2c..8025802 100644 --- a/src/toolkit/bamtools_convert.cpp +++ b/src/toolkit/bamtools_convert.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Converts between BAM and a number of other formats // *************************************************************************** @@ -27,46 +27,46 @@ using namespace std; namespace BamTools { - // --------------------------------------------- - // ConvertTool constants - - // supported conversion format command-line names - static const string FORMAT_BED = "bed"; - static const string FORMAT_FASTA = "fasta"; - static const string FORMAT_FASTQ = "fastq"; - static const string FORMAT_JSON = "json"; - static const string FORMAT_SAM = "sam"; - static const string FORMAT_PILEUP = "pileup"; - static const string FORMAT_YAML = "yaml"; - - // other constants - static const unsigned int FASTA_LINE_MAX = 50; - - // --------------------------------------------- - // ConvertPileupFormatVisitor declaration - - class ConvertPileupFormatVisitor : public PileupVisitor { - - // ctor & dtor - public: - ConvertPileupFormatVisitor(const RefVector& references, - const string& fastaFilename, - const bool isPrintingMapQualities, - ostream* out); - ~ConvertPileupFormatVisitor(void); - - // PileupVisitor interface implementation - public: - void Visit(const PileupPosition& pileupData); - - // data members - private: - Fasta m_fasta; - bool m_hasFasta; - bool m_isPrintingMapQualities; - ostream* m_out; - RefVector m_references; - }; +// --------------------------------------------- +// ConvertTool constants + +// supported conversion format command-line names +static const string FORMAT_BED = "bed"; +static const string FORMAT_FASTA = "fasta"; +static const string FORMAT_FASTQ = "fastq"; +static const string FORMAT_JSON = "json"; +static const string FORMAT_SAM = "sam"; +static const string FORMAT_PILEUP = "pileup"; +static const string FORMAT_YAML = "yaml"; + +// other constants +static const unsigned int FASTA_LINE_MAX = 50; + +// --------------------------------------------- +// ConvertPileupFormatVisitor declaration + +class ConvertPileupFormatVisitor : public PileupVisitor { + + // ctor & dtor + public: + ConvertPileupFormatVisitor(const RefVector& references, + const string& fastaFilename, + const bool isPrintingMapQualities, + ostream* out); + ~ConvertPileupFormatVisitor(void); + + // PileupVisitor interface implementation + public: + void Visit(const PileupPosition& pileupData); + + // data members + private: + Fasta m_fasta; + bool m_hasFasta; + bool m_isPrintingMapQualities; + ostream* m_out; + RefVector m_references; +}; } // namespace BamTools @@ -75,7 +75,7 @@ namespace BamTools { struct ConvertTool::ConvertSettings { - // flags + // flag bool HasInput; bool HasOutput; bool HasFormat; @@ -116,8 +116,12 @@ struct ConvertTool::ConvertToolPrivate { // ctor & dtor public: - ConvertToolPrivate(ConvertTool::ConvertSettings* settings); - ~ConvertToolPrivate(void); + ConvertToolPrivate(ConvertTool::ConvertSettings* settings) + : m_settings(settings) + , m_out(cout.rdbuf()) + { } + + ~ConvertToolPrivate(void) { } // interface public: @@ -142,13 +146,6 @@ struct ConvertTool::ConvertToolPrivate { ostream m_out; }; -ConvertTool::ConvertToolPrivate::ConvertToolPrivate(ConvertTool::ConvertSettings* settings) - : m_settings(settings) - , m_out(cout.rdbuf()) // default output to cout -{ } - -ConvertTool::ConvertToolPrivate::~ConvertToolPrivate(void) { } - bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ @@ -713,6 +710,7 @@ ConvertTool::ConvertTool(void) } ConvertTool::~ConvertTool(void) { + delete m_settings; m_settings = 0; @@ -730,9 +728,10 @@ int ConvertTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // run internal ConvertTool implementation, return success/fail + // initialize ConvertTool with settings m_impl = new ConvertToolPrivate(m_settings); + // run ConvertTool, return success/fail if ( m_impl->Run() ) return 0; else diff --git a/src/toolkit/bamtools_count.cpp b/src/toolkit/bamtools_count.cpp index 40e7c5d..c17b257 100644 --- a/src/toolkit/bamtools_count.cpp +++ b/src/toolkit/bamtools_count.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 23 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Prints alignment count for BAM file(s) // *************************************************************************** @@ -41,60 +41,53 @@ struct CountTool::CountSettings { }; // --------------------------------------------- -// CountTool implementation +// CountToolPrivate implementation -CountTool::CountTool(void) - : AbstractTool() - , m_settings(new CountSettings) -{ - // set program details - Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in -in ...] [-region ]"); - - // set up options - OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); -} +struct CountTool::CountToolPrivate { -CountTool::~CountTool(void) { - delete m_settings; - m_settings = 0; -} + // ctor & dtro + public: + CountToolPrivate(CountTool::CountSettings* settings) + : m_settings(settings) + { } -int CountTool::Help(void) { - Options::DisplayHelp(); - return 0; -} + ~CountToolPrivate(void) { } -int CountTool::Run(int argc, char* argv[]) { + // interface + public: + bool Run(void); - // parse command line arguments - Options::Parse(argc, argv, 1); + // data members + private: + CountTool::CountSettings* m_settings; +}; + +bool CountTool::CountToolPrivate::Run(void) { // if no '-in' args supplied, default to stdin - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); - + // open reader without index BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl; - return 1; + return false; } // alignment counter BamAlignment al; int alignmentCount(0); - - // if no region specified, count entire file + + // if no region specified, count entire file if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } - + // otherwise attempt to use region as constraint else { - + // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { @@ -104,46 +97,92 @@ int CountTool::Run(int argc, char* argv[]) { // if index data available for all BAM files, we can use SetRegion if ( reader.IsIndexLoaded() ) { - + // attempt to set region on reader if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); - return 1; - } - + return false; + } + // everything checks out, just iterate through specified region, counting alignments while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; - } - + } + // no index data available, we have to iterate through until we // find overlapping alignments else { while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && - (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } - } - + } + // error parsing REGION string else { cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); - return 1; + return false; } } - - // print results + + // print results cout << alignmentCount << endl; - + // clean up & exit reader.Close(); + return true; +} + +// --------------------------------------------- +// CountTool implementation + +CountTool::CountTool(void) + : AbstractTool() + , m_settings(new CountSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in -in ...] [-region ]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); +} + +CountTool::~CountTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int CountTool::Help(void) { + Options::DisplayHelp(); return 0; +} + +int CountTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize CountTool with settings + m_impl = new CountToolPrivate(m_settings); + + // run CountTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_count.h b/src/toolkit/bamtools_count.h index e3d0c81..d9c3c3d 100644 --- a/src/toolkit/bamtools_count.h +++ b/src/toolkit/bamtools_count.h @@ -3,12 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Prints alignment count for BAM file -// -// ** Expand to multiple?? -// +// Prints alignment count for BAM file(s) // *************************************************************************** #ifndef BAMTOOLS_COUNT_H @@ -31,6 +28,9 @@ class CountTool : public AbstractTool { private: struct CountSettings; CountSettings* m_settings; + + struct CountToolPrivate; + CountToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_coverage.cpp b/src/toolkit/bamtools_coverage.cpp index 748f513..c3eaa20 100644 --- a/src/toolkit/bamtools_coverage.cpp +++ b/src/toolkit/bamtools_coverage.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Prints coverage data for a single BAM file // *************************************************************************** @@ -81,8 +81,12 @@ struct CoverageTool::CoverageToolPrivate { // ctor & dtor public: - CoverageToolPrivate(CoverageTool::CoverageSettings* settings); - ~CoverageToolPrivate(void); + CoverageToolPrivate(CoverageTool::CoverageSettings* settings) + : m_settings(settings) + , m_out(cout.rdbuf()) + { } + + ~CoverageToolPrivate(void) { } // interface public: @@ -95,13 +99,6 @@ struct CoverageTool::CoverageToolPrivate { RefVector m_references; }; -CoverageTool::CoverageToolPrivate::CoverageToolPrivate(CoverageTool::CoverageSettings* settings) - : m_settings(settings) - , m_out(cout.rdbuf()) // default output to cout -{ } - -CoverageTool::CoverageToolPrivate::~CoverageToolPrivate(void) { } - bool CoverageTool::CoverageToolPrivate::Run(void) { // if output filename given @@ -171,6 +168,7 @@ CoverageTool::CoverageTool(void) } CoverageTool::~CoverageTool(void) { + delete m_settings; m_settings = 0; @@ -188,9 +186,10 @@ int CoverageTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // run internal ConvertTool implementation, return success/fail + // initialize CoverageTool with settings m_impl = new CoverageToolPrivate(m_settings); + // run CoverageTool, return success/fail if ( m_impl->Run() ) return 0; else diff --git a/src/toolkit/bamtools_filter.cpp b/src/toolkit/bamtools_filter.cpp index 023fbc9..9316711 100644 --- a/src/toolkit/bamtools_filter.cpp +++ b/src/toolkit/bamtools_filter.cpp @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Filters BAM file(s) according to some user-specified criteria. +// Filters BAM file(s) according to some user-specified criteria // *************************************************************************** #include "bamtools_filter.h" @@ -223,38 +223,6 @@ struct BamAlignmentChecker { } // namespace BamTools -// --------------------------------------------- -// FilterToolPrivate declaration - -class FilterTool::FilterToolPrivate { - - // ctor & dtor - public: - FilterToolPrivate(FilterTool::FilterSettings* settings); - ~FilterToolPrivate(void); - - // 'public' interface - public: - bool Run(void); - - // internal methods - private: - bool AddPropertyTokensToFilter(const string& filterName, const map& propertyTokens); - bool CheckAlignment(const BamAlignment& al); - const string GetScriptContents(void); - void InitProperties(void); - bool ParseCommandLine(void); - bool ParseFilterObject(const string& filterName, const Json::Value& filterObject); - bool ParseScript(void); - bool SetupFilters(void); - - // data members - private: - vector m_propertyNames; - FilterTool::FilterSettings* m_settings; - FilterEngine m_filterEngine; -}; - // --------------------------------------------- // FilterSettings implementation @@ -262,23 +230,23 @@ struct FilterTool::FilterSettings { // ---------------------------------- // IO opts - + // flags bool HasInputBamFilename; bool HasOutputBamFilename; bool HasRegion; bool HasScriptFilename; bool IsForceCompression; - + // filenames vector InputFiles; string OutputFilename; string Region; string ScriptFilename; - + // ----------------------------------- // General filter opts - + // flags bool HasAlignmentFlagFilter; bool HasInsertSizeFilter; @@ -297,7 +265,7 @@ struct FilterTool::FilterSettings { // ----------------------------------- // AlignmentFlag filter opts - + // flags bool HasIsDuplicateFilter; bool HasIsFailedQCFilter; @@ -310,7 +278,7 @@ struct FilterTool::FilterSettings { bool HasIsProperPairFilter; bool HasIsReverseStrandFilter; bool HasIsSecondMateFilter; - + // filters string IsDuplicateFilter; string IsFailedQCFilter; @@ -323,10 +291,10 @@ struct FilterTool::FilterSettings { string IsProperPairFilter; string IsReverseStrandFilter; string IsSecondMateFilter; - + // --------------------------------- // constructor - + FilterSettings(void) : HasInputBamFilename(false) , HasOutputBamFilename(false) @@ -363,72 +331,39 @@ struct FilterTool::FilterSettings { , IsReverseStrandFilter(TRUE_STR) , IsSecondMateFilter(TRUE_STR) { } -}; +}; // --------------------------------------------- -// FilterTool implementation - -FilterTool::FilterTool(void) - : AbstractTool() - , m_settings(new FilterSettings) - , m_impl(0) -{ - // set program details - Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in -in ...] [-out | [-forceCompression]] [-region ] [ [-script HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); - Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts); - Options::AddValueOption("-script", "filename", "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts); - Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); - - OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters"); - Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts); - Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts); - Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts); - Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts); - Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts); - Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts); - - OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters"); - Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); - Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR); -} - -FilterTool::~FilterTool(void) { - delete m_settings; - m_settings = 0; - - delete m_impl; - m_impl = 0; -} - -int FilterTool::Help(void) { - Options::DisplayHelp(); - return 0; -} +// FilterToolPrivate declaration -int FilterTool::Run(int argc, char* argv[]) { - - // parse command line arguments - Options::Parse(argc, argv, 1); - - // run internal FilterTool implementation, return success/fail - m_impl = new FilterToolPrivate(m_settings); - - if ( m_impl->Run() ) return 0; - else return 1; -} +class FilterTool::FilterToolPrivate { + + // ctor & dtor + public: + FilterToolPrivate(FilterTool::FilterSettings* settings); + ~FilterToolPrivate(void); + + // 'public' interface + public: + bool Run(void); + + // internal methods + private: + bool AddPropertyTokensToFilter(const string& filterName, const map& propertyTokens); + bool CheckAlignment(const BamAlignment& al); + const string GetScriptContents(void); + void InitProperties(void); + bool ParseCommandLine(void); + bool ParseFilterObject(const string& filterName, const Json::Value& filterObject); + bool ParseScript(void); + bool SetupFilters(void); + + // data members + private: + vector m_propertyNames; + FilterTool::FilterSettings* m_settings; + FilterEngine m_filterEngine; +}; // --------------------------------------------- // FilterToolPrivate implementation @@ -847,3 +782,72 @@ bool FilterTool::FilterToolPrivate::SetupFilters(void) { // otherwise check command line for filters else return ParseCommandLine(); } + +// --------------------------------------------- +// FilterTool implementation + +FilterTool::FilterTool(void) + : AbstractTool() + , m_settings(new FilterSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in -in ...] [-out | [-forceCompression]] [-region ] [ [-script HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts); + Options::AddValueOption("-script", "filename", "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + + OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters"); + Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts); + Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts); + Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts); + Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts); + Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts); + Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts); + + OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters"); + Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR); +} + +FilterTool::~FilterTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int FilterTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int FilterTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize FilterTool with settings + m_impl = new FilterToolPrivate(m_settings); + + // run FilterTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; +} diff --git a/src/toolkit/bamtools_filter.h b/src/toolkit/bamtools_filter.h index 2abb0e7..4d125c9 100644 --- a/src/toolkit/bamtools_filter.h +++ b/src/toolkit/bamtools_filter.h @@ -5,8 +5,7 @@ // --------------------------------------------------------------------------- // Last modified: 28 August 2010 // --------------------------------------------------------------------------- -// Filters a single BAM file (or filters multiple BAM files and merges) -// according to some user-specified criteria. +// Filters BAM file(s) according to some user-specified criteria // *************************************************************************** #ifndef BAMTOOLS_FILTER_H diff --git a/src/toolkit/bamtools_header.cpp b/src/toolkit/bamtools_header.cpp index aad413f..984ddd4 100644 --- a/src/toolkit/bamtools_header.cpp +++ b/src/toolkit/bamtools_header.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Prints the SAM-style header from a single BAM file ( or merged header from // multiple BAM files) to stdout @@ -37,12 +37,53 @@ struct HeaderTool::HeaderSettings { { } }; +struct HeaderTool::HeaderToolPrivate { + + // ctor & dtor + public: + HeaderToolPrivate(HeaderTool::HeaderSettings* settings) + : m_settings(settings) + { } + + ~HeaderToolPrivate(void) { } + + // interface + public: + bool Run(void); + + // data members + private: + HeaderTool::HeaderSettings* m_settings; +}; + +bool HeaderTool::HeaderToolPrivate::Run(void) { + + // set to default input if none provided + if ( !m_settings->HasInputBamFilename ) + m_settings->InputFiles.push_back(Options::StandardIn()); + + // attemp to open BAM files + BamMultiReader reader; + if ( !reader.Open(m_settings->InputFiles) ) { + cerr << "bamtools header ERROR: could not open BAM file(s) for reading... Aborting." << endl; + return false; + } + + // dump (merged) header contents to stdout + cout << reader.GetHeaderText() << endl; + + // clean up & exit + reader.Close(); + return true; +} + // --------------------------------------------- // HeaderTool implementation HeaderTool::HeaderTool(void) : AbstractTool() , m_settings(new HeaderSettings) + , m_impl(0) { // set program details Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in -in ...] "); @@ -53,8 +94,12 @@ HeaderTool::HeaderTool(void) } HeaderTool::~HeaderTool(void) { + delete m_settings; m_settings = 0; + + delete m_impl; + m_impl = 0; } int HeaderTool::Help(void) { @@ -67,21 +112,12 @@ int HeaderTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) - m_settings->InputFiles.push_back(Options::StandardIn()); - - // attemp to open BAM files - BamMultiReader reader; - if ( !reader.Open(m_settings->InputFiles) ) { - cerr << "bamtools header ERROR: could not open BAM file(s) for reading... Aborting." << endl; - return 1; - } - - // dump (merged) header contents to stdout - cout << reader.GetHeaderText() << endl; + // initialize HeaderTool with settings + m_impl = new HeaderToolPrivate(m_settings); - // clean up & exit - reader.Close(); - return 0; + // run HeaderTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_header.h b/src/toolkit/bamtools_header.h index c52e090..7b5f100 100644 --- a/src/toolkit/bamtools_header.h +++ b/src/toolkit/bamtools_header.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Prints the SAM-style header from a single BAM file ( or merged header from // multiple BAM files) to stdout @@ -29,6 +29,9 @@ class HeaderTool : public AbstractTool { private: struct HeaderSettings; HeaderSettings* m_settings; + + struct HeaderToolPrivate; + HeaderToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_index.cpp b/src/toolkit/bamtools_index.cpp index 6e5a86d..c1bd2e5 100644 --- a/src/toolkit/bamtools_index.cpp +++ b/src/toolkit/bamtools_index.cpp @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Creates a BAM index file. +// Creates a BAM index file // *************************************************************************** #include "bamtools_index.h" @@ -38,12 +38,55 @@ struct IndexTool::IndexSettings { { } }; +// --------------------------------------------- +// IndexToolPrivate implementation + +struct IndexTool::IndexToolPrivate { + + // ctor & dtor + public: + IndexToolPrivate(IndexTool::IndexSettings* settings) + : m_settings(settings) + { } + + ~IndexToolPrivate(void) { } + + // interface + public: + bool Run(void); + + // data members + private: + IndexTool::IndexSettings* m_settings; +}; + +bool IndexTool::IndexToolPrivate::Run(void) { + + // open our BAM reader + BamReader reader; + if ( !reader.Open(m_settings->InputBamFilename) ) { + cerr << "bamtools index ERROR: could not open BAM file: " + << m_settings->InputBamFilename << endl; + return false; + } + + // create index for BAM file + const BamIndex::IndexType type = ( m_settings->IsUsingBamtoolsIndex ? BamIndex::BAMTOOLS + : BamIndex::STANDARD ); + reader.CreateIndex(type); + + // clean & exit + reader.Close(); + return true; +} + // --------------------------------------------- // IndexTool implementation IndexTool::IndexTool(void) : AbstractTool() , m_settings(new IndexSettings) + , m_impl(0) { // set program details Options::SetProgramInfo("bamtools index", "creates index for BAM file", "[-in ] [-bti]"); @@ -55,8 +98,12 @@ IndexTool::IndexTool(void) } IndexTool::~IndexTool(void) { + delete m_settings; m_settings = 0; + + delete m_impl; + m_impl = 0; } int IndexTool::Help(void) { @@ -69,19 +116,12 @@ int IndexTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // open our BAM reader - BamReader reader; - if ( !reader.Open(m_settings->InputBamFilename) ) { - cerr << "bamtools index ERROR: could not open BAM file: " << m_settings->InputBamFilename << endl; + // initialize IndexTool with settings + m_impl = new IndexToolPrivate(m_settings); + + // run IndexTool, return success/fail + if ( m_impl->Run() ) + return 0; + else return 1; - } - - // create index for BAM file - const BamIndex::IndexType type = ( m_settings->IsUsingBamtoolsIndex ? BamIndex::BAMTOOLS - : BamIndex::STANDARD ); - reader.CreateIndex(type); - - // clean & exit - reader.Close(); - return 0; } diff --git a/src/toolkit/bamtools_index.h b/src/toolkit/bamtools_index.h index 0c6c58f..11cc057 100644 --- a/src/toolkit/bamtools_index.h +++ b/src/toolkit/bamtools_index.h @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 18 January 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Creates a BAM index file. +// Creates a BAM index file // *************************************************************************** #ifndef BAMTOOLS_INDEX_H @@ -28,6 +28,9 @@ class IndexTool : public AbstractTool { private: struct IndexSettings; IndexSettings* m_settings; + + struct IndexToolPrivate; + IndexToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_merge.cpp b/src/toolkit/bamtools_merge.cpp index fc3675e..7cfc099 100644 --- a/src/toolkit/bamtools_merge.cpp +++ b/src/toolkit/bamtools_merge.cpp @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Merges multiple BAM files into one. +// Merges multiple BAM files into one // *************************************************************************** #include "bamtools_merge.h" @@ -50,56 +50,47 @@ struct MergeTool::MergeSettings { }; // --------------------------------------------- -// MergeTool implementation +// MergeToolPrivate implementation -MergeTool::MergeTool(void) - : AbstractTool() - , m_settings(new MergeSettings) -{ - // set program details - Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in -in ...] [-out | [-forceCompression]] [-region ]"); - - // set up options - OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); - Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); - Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts); -} +struct MergeTool::MergeToolPrivate { -MergeTool::~MergeTool(void) { - delete m_settings; - m_settings = 0; -} + // ctor & dtor + public: + MergeToolPrivate(MergeTool::MergeSettings* settings) + : m_settings(settings) + { } -int MergeTool::Help(void) { - Options::DisplayHelp(); - return 0; -} + ~MergeToolPrivate(void) { } -int MergeTool::Run(int argc, char* argv[]) { - - // parse command line arguments - Options::Parse(argc, argv, 1); - - // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) + // interface + public: + bool Run(void); + + // data members + private: + MergeTool::MergeSettings* m_settings; +}; + +bool MergeTool::MergeToolPrivate::Run(void) { + + // set to default input if none provided + if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); - + // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl; - return 1; + return false; } - + // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && - !m_settings->IsForceCompression ); + !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; @@ -107,7 +98,8 @@ int MergeTool::Run(int argc, char* argv[]) { BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) { - cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; + cerr << "bamtools merge ERROR: could not open " + << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } @@ -118,10 +110,10 @@ int MergeTool::Run(int argc, char* argv[]) { while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } - + // otherwise attempt to use region as constraint else { - + // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { @@ -133,32 +125,37 @@ int MergeTool::Run(int argc, char* argv[]) { if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error - if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { - cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl; + if ( !reader.SetRegion(region.LeftRefID, + region.LeftPosition, + region.RightRefID, + region.RightPosition) ) + { + cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" + << endl; reader.Close(); - return 1; - } - + return false; + } + // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); - } - + } + // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && - (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } - } - + } + // error parsing REGION string else { cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl; @@ -166,12 +163,60 @@ int MergeTool::Run(int argc, char* argv[]) { << endl; reader.Close(); writer.Close(); - return 1; + return false; } } - + // clean & exit reader.Close(); writer.Close(); - return 0; + return true; +} + +// --------------------------------------------- +// MergeTool implementation + +MergeTool::MergeTool(void) + : AbstractTool() + , m_settings(new MergeSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in -in ...] [-out | [-forceCompression]] [-region ]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts); +} + +MergeTool::~MergeTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int MergeTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int MergeTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize MergeTool with settings + m_impl = new MergeToolPrivate(m_settings); + + // run MergeTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_merge.h b/src/toolkit/bamtools_merge.h index 2962ce8..b1c2e1f 100644 --- a/src/toolkit/bamtools_merge.h +++ b/src/toolkit/bamtools_merge.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Merges multiple BAM files into one // *************************************************************************** @@ -28,6 +28,9 @@ class MergeTool : public AbstractTool { private: struct MergeSettings; MergeSettings* m_settings; + + struct MergeToolPrivate; + MergeToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_random.cpp b/src/toolkit/bamtools_random.cpp index bca9861..927349e 100644 --- a/src/toolkit/bamtools_random.cpp +++ b/src/toolkit/bamtools_random.cpp @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) +// Last modified: 7 April 2011 (DB) // --------------------------------------------------------------------------- -// Grab a random subset of alignments. +// Grab a random subset of alignments (testing tool) // *************************************************************************** #include "bamtools_random.h" @@ -67,52 +67,40 @@ struct RandomTool::RandomSettings { }; // --------------------------------------------- -// RandomTool implementation +// RandomToolPrivate implementation -RandomTool::RandomTool(void) - : AbstractTool() - , m_settings(new RandomSettings) -{ - // set program details - Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in -in ...] [-out ] [-forceCompression] [-n] [-region ]"); - - // set up options - OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); - Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); - Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); - - OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings"); - Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT); -} +struct RandomTool::RandomToolPrivate { -RandomTool::~RandomTool(void) { - delete m_settings; - m_settings = 0; -} + // ctor & dtor + public: + RandomToolPrivate(RandomTool::RandomSettings* settings) + : m_settings(settings) + { } -int RandomTool::Help(void) { - Options::DisplayHelp(); - return 0; -} + ~RandomToolPrivate(void) { } -int RandomTool::Run(int argc, char* argv[]) { + // interface + public: + bool Run(void); - // parse command line arguments - Options::Parse(argc, argv, 1); + // data members + private: + RandomTool::RandomSettings* m_settings; +}; + +bool RandomTool::RandomToolPrivate::Run(void) { // set to default stdin if no input files provided - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); - + // open our reader BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl; - return 1; + return false; } - + // look up index files for all BAM files reader.LocateIndexes(); @@ -120,18 +108,18 @@ int RandomTool::Run(int argc, char* argv[]) { if ( !reader.HasIndexes() ) { cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl; reader.Close(); - return 1; + return false; } - - // get BamReader metadata + + // get BamReader metadata const string headerText = reader.GetHeaderText(); const RefVector references = reader.GetReferenceData(); if ( references.empty() ) { cerr << "bamtools random ERROR: no reference data available... Aborting." << endl; reader.Close(); - return 1; + return false; } - + // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); @@ -142,57 +130,62 @@ int RandomTool::Run(int argc, char* argv[]) { BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) { - cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename << " for writing... Aborting." << endl; + cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename + << " for writing... Aborting." << endl; reader.Close(); - return 1; + return false; } - // if user specified a REGION constraint, attempt to parse REGION string - BamRegion region; + // if user specified a REGION constraint, attempt to parse REGION string + BamRegion region; if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) { cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); - return 1; + return false; } - + // seed our random number generator srand( time(NULL) ); - - // grab random alignments + + // grab random alignments BamAlignment al; unsigned int i = 0; while ( i < m_settings->AlignmentCount ) { - + int randomRefId = 0; int randomPosition = 0; - + // use REGION constraints to select random refId & position if ( m_settings->HasRegion ) { - + // select a random refId randomRefId = getRandomInt(region.LeftRefID, region.RightRefID); - + // select a random position based on randomRefId - const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); - const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) ); + const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) + ? region.LeftPosition + : 0 ); + const int upperBoundPosition = ( (randomRefId == region.RightRefID) + ? region.RightPosition + : (references.at(randomRefId).RefLength - 1) ); randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); - } - + } + // otherwise select from all possible random refId & position else { - + // select random refId randomRefId = getRandomInt(0, (int)references.size() - 1); - + // select random position based on randomRefId const int lowerBoundPosition = 0; const int upperBoundPosition = references.at(randomRefId).RefLength - 1; - randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); + randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } - + // if jump & read successful, save first alignment that overlaps random refId & position if ( reader.Jump(randomRefId, randomPosition) ) { while ( reader.GetNextAlignmentCore(al) ) { @@ -204,9 +197,60 @@ int RandomTool::Run(int argc, char* argv[]) { } } } - + // cleanup & exit reader.Close(); writer.Close(); + return true; +} + +// --------------------------------------------- +// RandomTool implementation + +RandomTool::RandomTool(void) + : AbstractTool() + , m_settings(new RandomSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in -in ...] [-out ] [-forceCompression] [-n] [-region ]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); + + OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings"); + Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT); +} + +RandomTool::~RandomTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int RandomTool::Help(void) { + Options::DisplayHelp(); return 0; +} + +int RandomTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize RandomTool with settings + m_impl = new RandomToolPrivate(m_settings); + + // run RandomTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_random.h b/src/toolkit/bamtools_random.h index ac13149..688ab5c 100644 --- a/src/toolkit/bamtools_random.h +++ b/src/toolkit/bamtools_random.h @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 20 July 2010 (DB) +// Last modified: 7 April 2010 (DB) // --------------------------------------------------------------------------- -// Grab a random subset of alignments. +// Grab a random subset of alignments (testing tool) // *************************************************************************** #ifndef BAMTOOLS_RANDOM_H @@ -28,6 +28,9 @@ class RandomTool : public AbstractTool { private: struct RandomSettings; RandomSettings* m_settings; + + struct RandomToolPrivate; + RandomToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_revert.cpp b/src/toolkit/bamtools_revert.cpp index a9da67e..c751ce4 100644 --- a/src/toolkit/bamtools_revert.cpp +++ b/src/toolkit/bamtools_revert.cpp @@ -1,11 +1,11 @@ // *************************************************************************** -// bamtools_cpp (c) 2010 Derek Barnett, Alistair Ward +// bamtools_revert.cpp (c) 2010 Derek Barnett, Alistair Ward // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Prints general alignment statistics for BAM file(s). +// Removes duplicate marks and restores original base qualities // *************************************************************************** #include "bamtools_revert.h" @@ -20,6 +20,12 @@ using namespace BamTools; #include using namespace std; +namespace BamTools { + +static const string OQ_TAG = "OQ"; + +} // namespace BamTools; + // --------------------------------------------- // RevertSettings implementation @@ -55,8 +61,10 @@ struct RevertTool::RevertToolPrivate { // ctor & dtor public: - RevertToolPrivate(RevertTool::RevertSettings* settings); - ~RevertToolPrivate(void); + RevertToolPrivate(RevertTool::RevertSettings* settings) + : m_settings(settings) + { } + ~RevertToolPrivate(void) { } // 'public' interface public: @@ -69,27 +77,21 @@ struct RevertTool::RevertToolPrivate { // data members private: RevertTool::RevertSettings* m_settings; - string m_OQ; }; -RevertTool::RevertToolPrivate::RevertToolPrivate(RevertTool::RevertSettings* settings) - : m_settings(settings) - , m_OQ("OQ") -{ } - -RevertTool::RevertToolPrivate::~RevertToolPrivate(void) { } - -// reverts a BAM alignment -// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag +// 'reverts' a BAM alignment +// default behavior (for now) is: +// 1 - replace Qualities with OQ contents +// 2 - clear IsDuplicate flag // can override default behavior using command line options void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) { - // replace Qualities with OQ, if requested + // replace Qualities with OQ contents, if requested if ( !m_settings->IsKeepQualities ) { string originalQualities; - if ( al.GetTag(m_OQ, originalQualities) ) { + if ( al.GetTag(OQ_TAG, originalQualities) ) { al.Qualities = originalQualities; - al.RemoveTag(m_OQ); + al.RemoveTag(OQ_TAG); } } @@ -164,6 +166,7 @@ RevertTool::RevertTool(void) } RevertTool::~RevertTool(void) { + delete m_settings; m_settings = 0; @@ -181,9 +184,12 @@ int RevertTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // run internal RevertTool implementation, return success/fail + // intialize RevertTool with settings m_impl = new RevertToolPrivate(m_settings); - if ( m_impl->Run() ) return 0; - else return 1; + // run RevertTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_revert.h b/src/toolkit/bamtools_revert.h index b97d47b..4b2a24c 100644 --- a/src/toolkit/bamtools_revert.h +++ b/src/toolkit/bamtools_revert.h @@ -1,11 +1,11 @@ // *************************************************************************** -// bamtools_stats.h (c) 2010 Derek Barnett, Alistair Ward +// bamtools_revert.h (c) 2010 Derek Barnett, Alistair Ward // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 5 December 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// +// Removes duplicate marks and restores original base qualities // *************************************************************************** #ifndef BAMTOOLS_REVERT_H diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index 8d18f67..5cbeaba 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) +// Last modified: 7 April 2011 (DB) // --------------------------------------------------------------------------- -// Sorts an input BAM file (default by position) and stores in a new BAM file. +// Sorts an input BAM file // *************************************************************************** #include "bamtools_sort.h" @@ -26,67 +26,35 @@ using namespace std; namespace BamTools { - // defaults - // - // ** These defaults should be tweaked & 'optimized' per testing ** // - // - // I say 'optimized' because each system will naturally perform - // differently. We will attempt to determine a sensible - // compromise that should perform well on average. - const unsigned int SORT_DEFAULT_MAX_BUFFER_COUNT = 500000; // max numberOfAlignments for buffer - const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024; // Mb - - // ----------------------------------- - // comparison objects (for sorting) - - struct SortLessThanPosition { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID < rhs.RefID; - else - return lhs.Position < rhs.Position; - } - }; - - struct SortLessThanName { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name < rhs.Name; - } - }; - -} // namespace BamTools +// defaults +// +// ** These defaults should be tweaked & 'optimized' per testing ** // +// +// I say 'optimized' because each system will naturally perform +// differently. We will attempt to determine a sensible +// compromise that should perform well on average. +const unsigned int SORT_DEFAULT_MAX_BUFFER_COUNT = 500000; // max numberOfAlignments for buffer +const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024; // Mb + +// ----------------------------------- +// comparison objects (for sorting) + +struct SortLessThanPosition { + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + if ( lhs.RefID != rhs.RefID ) + return lhs.RefID < rhs.RefID; + else + return lhs.Position < rhs.Position; + } +}; -// --------------------------------------------- -// SortToolPrivate declaration -class SortTool::SortToolPrivate { - - // ctor & dtor - public: - SortToolPrivate(SortTool::SortSettings* settings); - ~SortToolPrivate(void); - - // 'public' interface - public: - bool Run(void); - - // internal methods - private: - void ClearBuffer(vector& buffer); - bool GenerateSortedRuns(void); - bool HandleBufferContents(vector& buffer); - bool MergeSortedRuns(void); - bool WriteTempFile(const vector& buffer, const string& tempFilename); - void SortBuffer(vector& buffer); - - // data members - private: - SortTool::SortSettings* m_settings; - string m_tempFilenameStub; - int m_numberOfRuns; - string m_headerText; - RefVector m_references; - vector m_tempFilenames; +struct SortLessThanName { + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return lhs.Name < rhs.Name; + } }; + +} // namespace BamTools // --------------------------------------------- // SortSettings implementation @@ -103,11 +71,11 @@ struct SortTool::SortSettings { // filenames string InputBamFilename; string OutputBamFilename; - + // parameters unsigned int MaxBufferCount; unsigned int MaxBufferMemory; - + // constructor SortSettings(void) : HasInputBamFilename(false) @@ -119,61 +87,42 @@ struct SortTool::SortSettings { , OutputBamFilename(Options::StandardOut()) , MaxBufferCount(SORT_DEFAULT_MAX_BUFFER_COUNT) , MaxBufferMemory(SORT_DEFAULT_MAX_BUFFER_MEMORY) - { } -}; + { } +}; // --------------------------------------------- -// SortTool implementation - -SortTool::SortTool(void) - : AbstractTool() - , m_settings(new SortSettings) - , m_impl(0) -{ - // set program details - Options::SetProgramInfo("bamtools sort", "sorts a BAM file", "[-in ] [-out ] [sortOptions]"); - - // set up options - OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut()); - - OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods"); - Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts); - - OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings"); - Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT); - Options::AddValueOption("-mem", "Mb", "max memory to use", "", m_settings->HasMaxBufferMemory, m_settings->MaxBufferMemory, MemOpts, SORT_DEFAULT_MAX_BUFFER_MEMORY); -} - -SortTool::~SortTool(void) { - - delete m_settings; - m_settings = 0; - - delete m_impl; - m_impl = 0; -} - -int SortTool::Help(void) { - Options::DisplayHelp(); - return 0; -} +// SortToolPrivate implementation -int SortTool::Run(int argc, char* argv[]) { - - // parse command line arguments - Options::Parse(argc, argv, 1); - - // run internal SortTool implementation, return success/fail - m_impl = new SortToolPrivate(m_settings); - - if ( m_impl->Run() ) return 0; - else return 1; -} +class SortTool::SortToolPrivate { + + // ctor & dtor + public: + SortToolPrivate(SortTool::SortSettings* settings); + ~SortToolPrivate(void) { } + + // 'public' interface + public: + bool Run(void); + + // internal methods + private: + void ClearBuffer(vector& buffer); + bool GenerateSortedRuns(void); + bool HandleBufferContents(vector& buffer); + bool MergeSortedRuns(void); + bool WriteTempFile(const vector& buffer, const string& tempFilename); + void SortBuffer(vector& buffer); + + // data members + private: + SortTool::SortSettings* m_settings; + string m_tempFilenameStub; + int m_numberOfRuns; + string m_headerText; + RefVector m_references; + vector m_tempFilenames; +}; -// --------------------------------------------- -// SortToolPrivate implementation // constructor SortTool::SortToolPrivate::SortToolPrivate(SortTool::SortSettings* settings) @@ -190,9 +139,6 @@ SortTool::SortToolPrivate::SortToolPrivate(SortTool::SortSettings* settings) } } -// destructor -SortTool::SortToolPrivate::~SortToolPrivate(void) { } - // generates mutiple sorted temp BAM files from single unsorted BAM file bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { @@ -371,3 +317,56 @@ bool SortTool::SortToolPrivate::WriteTempFile(const vector& buffer tempWriter.Close(); return true; } + +// --------------------------------------------- +// SortTool implementation + +SortTool::SortTool(void) + : AbstractTool() + , m_settings(new SortSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools sort", "sorts a BAM file", "[-in ] [-out ] [sortOptions]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut()); + + OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods"); + Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts); + + OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings"); + Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT); + Options::AddValueOption("-mem", "Mb", "max memory to use", "", m_settings->HasMaxBufferMemory, m_settings->MaxBufferMemory, MemOpts, SORT_DEFAULT_MAX_BUFFER_MEMORY); +} + +SortTool::~SortTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int SortTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int SortTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize SortTool with settings + m_impl = new SortToolPrivate(m_settings); + + // run SortTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; +} diff --git a/src/toolkit/bamtools_sort.h b/src/toolkit/bamtools_sort.h index 0241b02..fe0ed5d 100644 --- a/src/toolkit/bamtools_sort.h +++ b/src/toolkit/bamtools_sort.h @@ -3,9 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 June 2010 (DB) +// Last modified: 7 April 2011 (DB) // --------------------------------------------------------------------------- -// Sorts a BAM file. +// Sorts a BAM file // *************************************************************************** #ifndef BAMTOOLS_SORT_H diff --git a/src/toolkit/bamtools_split.cpp b/src/toolkit/bamtools_split.cpp index f4d3db8..bfbf303 100644 --- a/src/toolkit/bamtools_split.cpp +++ b/src/toolkit/bamtools_split.cpp @@ -3,10 +3,10 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) +// Last modified: 7 April 2011 (DB) // --------------------------------------------------------------------------- // Splits a BAM file on user-specified property, creating a new BAM output -// file for each value found. +// file for each value found // *************************************************************************** #include "bamtools_split.h" @@ -101,8 +101,13 @@ class SplitTool::SplitToolPrivate { // ctor & dtor public: - SplitToolPrivate(SplitTool::SplitSettings* settings); - ~SplitToolPrivate(void); + SplitToolPrivate(SplitTool::SplitSettings* settings) + : m_settings(settings) + { } + + ~SplitToolPrivate(void) { + m_reader.Close(); + } // 'public' interface public: @@ -140,16 +145,6 @@ class SplitTool::SplitToolPrivate { RefVector m_references; }; -// constructor -SplitTool::SplitToolPrivate::SplitToolPrivate(SplitTool::SplitSettings* settings) - : m_settings(settings) -{ } - -// destructor -SplitTool::SplitToolPrivate::~SplitToolPrivate(void) { - m_reader.Close(); -} - void SplitTool::SplitToolPrivate::DetermineOutputFilenameStub(void) { // if user supplied output filename stub, use that @@ -392,8 +387,9 @@ bool SplitTool::SplitToolPrivate::SplitTag(void) { // -------------------------------------------------------------------------------- // template method implementation -// N.B. - *technical note* - use of template methods defined in ".cpp" goes against normal practices -// but works here because these are purely internal (no one can call from outside this file) +// *Technical Note* - use of template methods declared & defined in ".cpp" file +// goes against normal practices, but works here because these +// are purely internal (no one can call from outside this file) // close BamWriters & delete pointers template @@ -407,13 +403,17 @@ void SplitTool::SplitToolPrivate::CloseWriters(map& writers) { WriterMapIterator writerEnd = writers.end(); for ( ; writerIter != writerEnd; ++writerIter ) { BamWriter* writer = (*writerIter).second; - if (writer == 0 ) continue; + if ( writer == 0 ) continue; - // close & delete writer + // close BamWriter writer->Close(); + + // destroy BamWriter delete writer; writer = 0; } + + // clear the container (destroying the items doesn't remove them) writers.clear(); } @@ -541,10 +541,10 @@ int SplitTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // initialize internal implementation + // initialize SplitTool with settings m_impl = new SplitToolPrivate(m_settings); - // run tool, return success/fail + // run SplitTool, return success/fail if ( m_impl->Run() ) return 0; else diff --git a/src/toolkit/bamtools_split.h b/src/toolkit/bamtools_split.h index 3cb85dd..776037c 100644 --- a/src/toolkit/bamtools_split.h +++ b/src/toolkit/bamtools_split.h @@ -3,10 +3,10 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) +// Last modified: 7 April 2011 (DB) // --------------------------------------------------------------------------- // Splits a BAM file on user-specified property, creating a new BAM output -// file for each value found. +// file for each value found // *************************************************************************** #ifndef BAMTOOLS_SPLIT_H diff --git a/src/toolkit/bamtools_stats.cpp b/src/toolkit/bamtools_stats.cpp index 42e7cbc..f66c462 100644 --- a/src/toolkit/bamtools_stats.cpp +++ b/src/toolkit/bamtools_stats.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- // Prints general alignment statistics for BAM file(s). // *************************************************************************** @@ -50,7 +50,7 @@ struct StatsTool::StatsToolPrivate { // ctor & dtor public: StatsToolPrivate(StatsTool::StatsSettings* _settings); - ~StatsToolPrivate(void); + ~StatsToolPrivate(void) { } // 'public' interface public: @@ -64,42 +64,40 @@ struct StatsTool::StatsToolPrivate { // data members private: - StatsTool::StatsSettings* settings; - unsigned int numReads; - unsigned int numPaired; - unsigned int numProperPair; - unsigned int numMapped; - unsigned int numBothMatesMapped; - unsigned int numForwardStrand; - unsigned int numReverseStrand; - unsigned int numFirstMate; - unsigned int numSecondMate; - unsigned int numSingletons; - unsigned int numFailedQC; - unsigned int numDuplicates; - vector insertSizes; + StatsTool::StatsSettings* m_settings; + unsigned int m_numReads; + unsigned int m_numPaired; + unsigned int m_numProperPair; + unsigned int m_numMapped; + unsigned int m_numBothMatesMapped; + unsigned int m_numForwardStrand; + unsigned int m_numReverseStrand; + unsigned int m_numFirstMate; + unsigned int m_numSecondMate; + unsigned int m_numSingletons; + unsigned int m_numFailedQC; + unsigned int m_numDuplicates; + vector m_insertSizes; }; -StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* _settings) - : settings(_settings) - , numReads(0) - , numPaired(0) - , numProperPair(0) - , numMapped(0) - , numBothMatesMapped(0) - , numForwardStrand(0) - , numReverseStrand(0) - , numFirstMate(0) - , numSecondMate(0) - , numSingletons(0) - , numFailedQC(0) - , numDuplicates(0) +StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* settings) + : m_settings(settings) + , m_numReads(0) + , m_numPaired(0) + , m_numProperPair(0) + , m_numMapped(0) + , m_numBothMatesMapped(0) + , m_numForwardStrand(0) + , m_numReverseStrand(0) + , m_numFirstMate(0) + , m_numSecondMate(0) + , m_numSingletons(0) + , m_numFailedQC(0) + , m_numDuplicates(0) { - insertSizes.reserve(100000); + m_insertSizes.reserve(100000); } -StatsTool::StatsToolPrivate::~StatsToolPrivate(void) { } - // median is of type double because in the case of even number of data elements, // we need to return the average of middle 2 elements bool StatsTool::StatsToolPrivate::CalculateMedian(vector& data, double& median) { @@ -136,32 +134,32 @@ void StatsTool::StatsToolPrivate::PrintStats(void) { cout << "Stats for BAM file(s): " << endl; cout << "**********************************************" << endl; cout << endl; - cout << "Total reads: " << numReads << endl; - cout << "Mapped reads: " << numMapped << "\t(" << ((float)numMapped/numReads)*100 << "%)" << endl; - cout << "Forward strand: " << numForwardStrand << "\t(" << ((float)numForwardStrand/numReads)*100 << "%)" << endl; - cout << "Reverse strand: " << numReverseStrand << "\t(" << ((float)numReverseStrand/numReads)*100 << "%)" << endl; - cout << "Failed QC: " << numFailedQC << "\t(" << ((float)numFailedQC/numReads)*100 << "%)" << endl; - cout << "Duplicates: " << numDuplicates << "\t(" << ((float)numDuplicates/numReads)*100 << "%)" << endl; - cout << "Paired-end reads: " << numPaired << "\t(" << ((float)numPaired/numReads)*100 << "%)" << endl; + cout << "Total reads: " << m_numReads << endl; + cout << "Mapped reads: " << m_numMapped << "\t(" << ((float)m_numMapped/m_numReads)*100 << "%)" << endl; + cout << "Forward strand: " << m_numForwardStrand << "\t(" << ((float)m_numForwardStrand/m_numReads)*100 << "%)" << endl; + cout << "Reverse strand: " << m_numReverseStrand << "\t(" << ((float)m_numReverseStrand/m_numReads)*100 << "%)" << endl; + cout << "Failed QC: " << m_numFailedQC << "\t(" << ((float)m_numFailedQC/m_numReads)*100 << "%)" << endl; + cout << "Duplicates: " << m_numDuplicates << "\t(" << ((float)m_numDuplicates/m_numReads)*100 << "%)" << endl; + cout << "Paired-end reads: " << m_numPaired << "\t(" << ((float)m_numPaired/m_numReads)*100 << "%)" << endl; - if ( numPaired != 0 ) { - cout << "'Proper-pairs': " << numProperPair << "\t(" << ((float)numProperPair/numPaired)*100 << "%)" << endl; - cout << "Both pairs mapped: " << numBothMatesMapped << "\t(" << ((float)numBothMatesMapped/numPaired)*100 << "%)" << endl; - cout << "Read 1: " << numFirstMate << endl; - cout << "Read 2: " << numSecondMate << endl; - cout << "Singletons: " << numSingletons << "\t(" << ((float)numSingletons/numPaired)*100 << "%)" << endl; + if ( m_numPaired != 0 ) { + cout << "'Proper-pairs': " << m_numProperPair << "\t(" << ((float)m_numProperPair/m_numPaired)*100 << "%)" << endl; + cout << "Both pairs mapped: " << m_numBothMatesMapped << "\t(" << ((float)m_numBothMatesMapped/m_numPaired)*100 << "%)" << endl; + cout << "Read 1: " << m_numFirstMate << endl; + cout << "Read 2: " << m_numSecondMate << endl; + cout << "Singletons: " << m_numSingletons << "\t(" << ((float)m_numSingletons/m_numPaired)*100 << "%)" << endl; } - if ( settings->IsShowingInsertSizeSummary ) { + if ( m_settings->IsShowingInsertSizeSummary ) { double avgInsertSize = 0.0; - if ( !insertSizes.empty() ) { - avgInsertSize = ( accumulate(insertSizes.begin(), insertSizes.end(), 0.0) / (double)insertSizes.size() ); + if ( !m_insertSizes.empty() ) { + avgInsertSize = ( accumulate(m_insertSizes.begin(), m_insertSizes.end(), 0.0) / (double)m_insertSizes.size() ); cout << "Average insert size (absolute value): " << avgInsertSize << endl; } double medianInsertSize = 0.0; - if ( CalculateMedian(insertSizes, medianInsertSize) ) + if ( CalculateMedian(m_insertSizes, medianInsertSize) ) cout << "Median insert size (absolute value): " << medianInsertSize << endl; } cout << endl; @@ -171,55 +169,59 @@ void StatsTool::StatsToolPrivate::PrintStats(void) { void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) { // increment total alignment counter - ++numReads; + ++m_numReads; - // check the paired-independent flags - if ( al.IsDuplicate() ) ++numDuplicates; - if ( al.IsFailedQC() ) ++numFailedQC; - if ( al.IsMapped() ) ++numMapped; + // incrememt counters for pairing-independent flags + if ( al.IsDuplicate() ) ++m_numDuplicates; + if ( al.IsFailedQC() ) ++m_numFailedQC; + if ( al.IsMapped() ) ++m_numMapped; - // check forward/reverse strand + // increment strand counters if ( al.IsReverseStrand() ) - ++numReverseStrand; + ++m_numReverseStrand; else - ++numForwardStrand; + ++m_numForwardStrand; // if alignment is paired-end if ( al.IsPaired() ) { // increment PE counter - ++numPaired; + ++m_numPaired; // increment first mate/second mate counters - if ( al.IsFirstMate() ) ++numFirstMate; - if ( al.IsSecondMate() ) ++numSecondMate; + if ( al.IsFirstMate() ) ++m_numFirstMate; + if ( al.IsSecondMate() ) ++m_numSecondMate; // if alignment is mapped, check mate status if ( al.IsMapped() ) { // if mate mapped if ( al.IsMateMapped() ) - ++numBothMatesMapped; + ++m_numBothMatesMapped; // else singleton else - ++numSingletons; + ++m_numSingletons; } // check for explicit proper pair flag - if ( al.IsProperPair() ) ++numProperPair; + if ( al.IsProperPair() ) ++m_numProperPair; // store insert size for first mate - if ( settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) { + if ( m_settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) { int insertSize = abs(al.InsertSize); - insertSizes.push_back( insertSize ); + m_insertSizes.push_back( insertSize ); } } } bool StatsTool::StatsToolPrivate::Run() { + // set to default input if none provided + if ( !m_settings->HasInput ) + m_settings->InputFiles.push_back(Options::StandardIn()); + // open the BAM files BamMultiReader reader; - if ( !reader.Open(settings->InputFiles) ) { + if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools stats ERROR: could not open input BAM file(s)... Aborting." << endl; reader.Close(); return false; @@ -256,6 +258,7 @@ StatsTool::StatsTool(void) } StatsTool::~StatsTool(void) { + delete m_settings; m_settings = 0; @@ -272,14 +275,13 @@ int StatsTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - - // set to default input if none provided - if ( !m_settings->HasInput ) - m_settings->InputFiles.push_back(Options::StandardIn()); - // run internal StatsTool implementation, return success/fail + // initialize StatsTool with settings m_impl = new StatsToolPrivate(m_settings); - if ( m_impl->Run() ) return 0; - else return 1; + // run StatsTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; } diff --git a/src/toolkit/bamtools_stats.h b/src/toolkit/bamtools_stats.h index 16dc252..799befe 100644 --- a/src/toolkit/bamtools_stats.h +++ b/src/toolkit/bamtools_stats.h @@ -3,12 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Prints general statistics for a single BAM file. -// -// ** Expand to multiple? ** -// +// Prints general statistics for a single BAM file // *************************************************************************** #ifndef BAMTOOLS_STATS_H -- 2.39.2