From f34f0673ce130d54298c7cac3fe106c31171a4f7 Mon Sep 17 00:00:00 2001 From: Derek Barnett Date: Mon, 10 Dec 2012 13:23:10 -0500 Subject: [PATCH] Added the "-list" option to all toolkit utilities that accept multiple BAM files as input (issue #67) --- src/toolkit/bamtools_convert.cpp | 25 ++++++++++-- src/toolkit/bamtools_count.cpp | 32 +++++++++++++--- src/toolkit/bamtools_filter.cpp | 65 +++++++++++++++++++++----------- src/toolkit/bamtools_header.cpp | 31 ++++++++++++--- src/toolkit/bamtools_merge.cpp | 47 ++++++++++++----------- src/toolkit/bamtools_random.cpp | 26 +++++++++++-- src/toolkit/bamtools_stats.cpp | 31 ++++++++++++--- 7 files changed, 192 insertions(+), 65 deletions(-) diff --git a/src/toolkit/bamtools_convert.cpp b/src/toolkit/bamtools_convert.cpp index 0e1743f..b0aae07 100644 --- a/src/toolkit/bamtools_convert.cpp +++ b/src/toolkit/bamtools_convert.cpp @@ -2,7 +2,7 @@ // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 11 November 2012 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Converts between BAM and a number of other formats // *************************************************************************** @@ -76,6 +76,7 @@ struct ConvertTool::ConvertSettings { // flag bool HasInput; + bool HasInputFilelist; bool HasOutput; bool HasFormat; bool HasRegion; @@ -87,6 +88,7 @@ struct ConvertTool::ConvertSettings { // options vector InputFiles; + string InputFilelist; string OutputFilename; string Format; string Region; @@ -97,6 +99,7 @@ struct ConvertTool::ConvertSettings { // constructor ConvertSettings(void) : HasInput(false) + , HasInputFilelist(false) , HasOutput(false) , HasFormat(false) , HasRegion(false) @@ -151,9 +154,23 @@ bool ConvertTool::ConvertToolPrivate::Run(void) { // initialize conversion input/output // set to default input if none provided - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // open input files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -703,11 +720,13 @@ ConvertTool::ConvertTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format [-in -in ...] [-out ] [-region ] [format-specific options]"); + Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", + "-format [-in -in ... | -list ] [-out ] [-region ] [format-specific options]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts); Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); diff --git a/src/toolkit/bamtools_count.cpp b/src/toolkit/bamtools_count.cpp index 3593f4d..5a7c0a7 100644 --- a/src/toolkit/bamtools_count.cpp +++ b/src/toolkit/bamtools_count.cpp @@ -2,7 +2,7 @@ // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Prints alignment count for BAM file(s) // *************************************************************************** @@ -15,6 +15,7 @@ #include using namespace BamTools; +#include #include #include #include @@ -27,15 +28,18 @@ struct CountTool::CountSettings { // flags bool HasInput; + bool HasInputFilelist; bool HasRegion; // filenames vector InputFiles; + string InputFilelist; string Region; // constructor CountSettings(void) : HasInput(false) + , HasInputFilelist(false) , HasRegion(false) { } }; @@ -64,10 +68,24 @@ struct CountTool::CountToolPrivate { bool CountTool::CountToolPrivate::Run(void) { - // if no '-in' args supplied, default to stdin - if ( !m_settings->HasInput ) + // set to default input if none provided + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools count ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // open reader without index BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -150,12 +168,16 @@ CountTool::CountTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in -in ...] [-region ]"); + Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", + "[-in -in ... | -list ] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); + Options::AddValueOption("-region", "REGION", + "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", + "", m_settings->HasRegion, m_settings->Region, IO_Opts); } CountTool::~CountTool(void) { diff --git a/src/toolkit/bamtools_filter.cpp b/src/toolkit/bamtools_filter.cpp index 8af9cb9..1189981 100644 --- a/src/toolkit/bamtools_filter.cpp +++ b/src/toolkit/bamtools_filter.cpp @@ -2,7 +2,7 @@ // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 14 October 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Filters BAM file(s) according to some user-specified criteria // *************************************************************************** @@ -20,6 +20,7 @@ using namespace BamTools; using namespace Json; #include +#include #include #include #include @@ -236,14 +237,16 @@ struct FilterTool::FilterSettings { // IO opts // flags - bool HasInputBamFilename; - bool HasOutputBamFilename; + bool HasInput; + bool HasInputFilelist; + bool HasOutput; bool HasRegion; - bool HasScriptFilename; + bool HasScript; bool IsForceCompression; // filenames vector InputFiles; + string InputFilelist; string OutputFilename; string Region; string ScriptFilename; @@ -302,10 +305,11 @@ struct FilterTool::FilterSettings { // constructor FilterSettings(void) - : HasInputBamFilename(false) - , HasOutputBamFilename(false) + : HasInput(false) + , HasInputFilelist(false) + , HasOutput(false) , HasRegion(false) - , HasScriptFilename(false) + , HasScript(false) , IsForceCompression(false) , OutputFilename(Options::StandardOut()) , HasAlignmentFlagFilter(false) @@ -463,11 +467,11 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt m_filterEngine.setProperty(filterName, propertyName, stringValue, type); } - else if ( propertyName == TAG_PROPERTY ) { - // this will be stored directly as the TAG:VALUE token - // (VALUE may contain compare ops, will be parsed out later) - m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT); - } + else if ( propertyName == TAG_PROPERTY ) { + // this will be stored directly as the TAG:VALUE token + // (VALUE may contain compare ops, will be parsed out later) + m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT); + } // else unknown property else { @@ -500,7 +504,8 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) { // peek ahead, make sure there is data available char ch = fgetc(inFile); ungetc(ch, inFile); - if( feof(inFile) ) break; + if( feof(inFile) ) + break; // read next block of data if ( fgets(buffer, 1024, inFile) == 0 ) { @@ -682,12 +687,27 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) { bool FilterTool::FilterToolPrivate::Run(void) { // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // initialize defined properties & user-specified filters // quit if failed - if ( !SetupFilters() ) return false; + if ( !SetupFilters() ) + return false; // open reader without index BamMultiReader reader; @@ -786,7 +806,7 @@ bool FilterTool::FilterToolPrivate::SetupFilters(void) { InitProperties(); // parse script for filter rules, if given - if ( m_settings->HasScriptFilename ) + if ( m_settings->HasScript ) return ParseScript(); // otherwise check command line for filters @@ -804,9 +824,10 @@ FilterTool::FilterTool(void) // ---------------------------------- // set program details - const string usage = "[-in -in ...] " + const string usage = "[-in -in ... | -list ] " "[-out | [-forceCompression]] [-region ] " "[ [-script HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-out", "BAM filename", outDesc, "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); - Options::AddValueOption("-region", "REGION", regionDesc, "", m_settings->HasRegion, m_settings->Region, IO_Opts); - Options::AddValueOption("-script", "filename", scriptDesc, "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts); + Options::AddValueOption("-in", "BAM filename", inDesc, "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", listDesc, "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); + Options::AddValueOption("-out", "BAM filename", outDesc, "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-region", "REGION", regionDesc, "", m_settings->HasRegion, m_settings->Region, IO_Opts); + Options::AddValueOption("-script", "filename", scriptDesc, "", m_settings->HasScript, m_settings->ScriptFilename, IO_Opts); Options::AddOption("-forceCompression",forceDesc, m_settings->IsForceCompression, IO_Opts); // ---------------------------------- diff --git a/src/toolkit/bamtools_header.cpp b/src/toolkit/bamtools_header.cpp index a08c632..534bb14 100644 --- a/src/toolkit/bamtools_header.cpp +++ b/src/toolkit/bamtools_header.cpp @@ -2,7 +2,7 @@ // bamtools_header.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Prints the SAM-style header from a single BAM file ( or merged header from // multiple BAM files) to stdout @@ -14,6 +14,7 @@ #include using namespace BamTools; +#include #include #include #include @@ -25,14 +26,17 @@ using namespace std; struct HeaderTool::HeaderSettings { // flags - bool HasInputBamFilename; + bool HasInput; + bool HasInputFilelist; // filenames vector InputFiles; + string InputFilelist; // constructor HeaderSettings(void) - : HasInputBamFilename(false) + : HasInput(false) + , HasInputFilelist(false) { } }; @@ -58,9 +62,23 @@ struct HeaderTool::HeaderToolPrivate { bool HeaderTool::HeaderToolPrivate::Run(void) { // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools header ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // attemp to open BAM files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -85,11 +103,12 @@ HeaderTool::HeaderTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in -in ...] "); + Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in -in ... | -list ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); } HeaderTool::~HeaderTool(void) { diff --git a/src/toolkit/bamtools_merge.cpp b/src/toolkit/bamtools_merge.cpp index 2bb47c4..6a33d12 100644 --- a/src/toolkit/bamtools_merge.cpp +++ b/src/toolkit/bamtools_merge.cpp @@ -2,7 +2,7 @@ // bamtools_merge.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Merges multiple BAM files into one // *************************************************************************** @@ -15,6 +15,7 @@ #include using namespace BamTools; +#include #include #include #include @@ -26,9 +27,9 @@ using namespace std; struct MergeTool::MergeSettings { // flags - bool HasInputBamFilename; - bool HasInputBamFilelist; - bool HasOutputBamFilename; + bool HasInput; + bool HasInputFilelist; + bool HasOutput; bool IsForceCompression; bool HasRegion; @@ -42,9 +43,9 @@ struct MergeTool::MergeSettings { // constructor MergeSettings(void) - : HasInputBamFilename(false) - , HasInputBamFilelist(false) - , HasOutputBamFilename(false) + : HasInput(false) + , HasInputFilelist(false) + , HasOutput(false) , IsForceCompression(false) , HasRegion(false) , OutputFilename(Options::StandardOut()) @@ -76,20 +77,21 @@ struct MergeTool::MergeToolPrivate { bool MergeTool::MergeToolPrivate::Run(void) { // set to default input if none provided - if ( !m_settings->HasInputBamFilename && !m_settings->HasInputBamFilelist ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list - if ( m_settings->HasInputBamFilelist ) { - ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); - if ( !filelist.is_open() ) { - cerr << "bamtools merge ERROR: could not open input BAM file list... Aborting." << endl; - return false; - } - string line; - while ( getline(filelist, line) ) { - m_settings->InputFiles.push_back(line); - } + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools merge ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); } // opens the BAM files (by default without checking for indexes) @@ -197,13 +199,14 @@ MergeTool::MergeTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[ [-in -in ...] | [-list ] ] [-out | [-forceCompression]] [-region ]"); + Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", + "[-in -in ... | -list ] [-out | [-forceCompression]] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); - Options::AddValueOption("-list", "BAM filename", "the input BAM file list, one line per file", "", m_settings->HasInputBamFilelist, m_settings->InputFilelist, IO_Opts); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts); Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts); } diff --git a/src/toolkit/bamtools_random.cpp b/src/toolkit/bamtools_random.cpp index e28ea70..5282f15 100644 --- a/src/toolkit/bamtools_random.cpp +++ b/src/toolkit/bamtools_random.cpp @@ -2,7 +2,7 @@ // bamtools_random.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 (DB) +// Last modified: 10 December 2012 (DB) // --------------------------------------------------------------------------- // Grab a random subset of alignments (testing tool) // *************************************************************************** @@ -17,6 +17,7 @@ using namespace BamTools; #include #include +#include #include #include #include @@ -43,6 +44,7 @@ struct RandomTool::RandomSettings { // flags bool HasAlignmentCount; bool HasInput; + bool HasInputFilelist; bool HasOutput; bool HasRegion; bool IsForceCompression; @@ -50,6 +52,7 @@ struct RandomTool::RandomSettings { // parameters unsigned int AlignmentCount; vector InputFiles; + string InputFilelist; string OutputFilename; string Region; @@ -57,6 +60,7 @@ struct RandomTool::RandomSettings { RandomSettings(void) : HasAlignmentCount(false) , HasInput(false) + , HasInputFilelist(false) , HasOutput(false) , HasRegion(false) , IsForceCompression(false) @@ -90,9 +94,23 @@ struct RandomTool::RandomToolPrivate { bool RandomTool::RandomToolPrivate::Run(void) { // set to default stdin if no input files provided - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // open our reader BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -212,11 +230,13 @@ RandomTool::RandomTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in -in ...] [-out ] [-forceCompression] [-n] [-region ]"); + Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", + "[-in -in ... | -list ] [-out ] [-forceCompression] [-n] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); diff --git a/src/toolkit/bamtools_stats.cpp b/src/toolkit/bamtools_stats.cpp index eb57a95..0035913 100644 --- a/src/toolkit/bamtools_stats.cpp +++ b/src/toolkit/bamtools_stats.cpp @@ -2,7 +2,7 @@ // bamtools_cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Prints general alignment statistics for BAM file(s). // *************************************************************************** @@ -15,6 +15,7 @@ using namespace BamTools; #include #include +#include #include #include #include @@ -29,14 +30,17 @@ struct StatsTool::StatsSettings { // flags bool HasInput; + bool HasInputFilelist; bool IsShowingInsertSizeSummary; // filenames vector InputFiles; + string InputFilelist; // constructor StatsSettings(void) : HasInput(false) + , HasInputFilelist(false) , IsShowingInsertSizeSummary(false) { } }; @@ -102,7 +106,8 @@ StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* settings bool StatsTool::StatsToolPrivate::CalculateMedian(vector& data, double& median) { // skip if data empty - if ( data.empty() ) return false; + if ( data.empty() ) + return false; // find middle element size_t middleIndex = data.size() / 2; @@ -202,7 +207,8 @@ void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) { } // check for explicit proper pair flag - if ( al.IsProperPair() ) ++m_numProperPair; + if ( al.IsProperPair() ) + ++m_numProperPair; // store insert size for first mate if ( m_settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) { @@ -215,9 +221,23 @@ void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) { bool StatsTool::StatsToolPrivate::Run() { // set to default input if none provided - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools stats ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // open the BAM files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -246,11 +266,12 @@ StatsTool::StatsTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools stats", "prints general alignment statistics", "[-in -in ...] [statsOptions]"); + Options::SetProgramInfo("bamtools stats", "prints general alignment statistics", "[-in -in ... | -list ] [statsOptions]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); OptionGroup* AdditionalOpts = Options::CreateOptionGroup("Additional Stats"); Options::AddOption("-insert", "summarize insert size data", m_settings->IsShowingInsertSizeSummary, AdditionalOpts); -- 2.39.2