From ca4a8df663c33d8b62536558022931b31408c411 Mon Sep 17 00:00:00 2001 From: Derek Date: Tue, 1 Jun 2010 22:54:30 -0400 Subject: [PATCH] Implemented Mosaik-style command line parser, instead of former GetOpt parser. Setup an AbstractTool base class for all subtools. Split tools into .h/.cpp pairs --- Makefile | 11 +- bamtools.cpp | 94 +++---- bamtools_count.cpp | 108 ++++++++ bamtools_count.h | 38 +++ bamtools_coverage.cpp | 84 +++++++ bamtools_coverage.h | 56 ++--- bamtools_dump.h | 74 ------ bamtools_getopt.h | 558 ------------------------------------------ bamtools_header.cpp | 84 +++++++ bamtools_header.h | 60 ++--- bamtools_index.cpp | 79 ++++++ bamtools_index.h | 61 ++--- bamtools_merge.cpp | 121 +++++++++ bamtools_merge.h | 96 ++------ bamtools_options.cpp | 258 +++++++++++++++++++ bamtools_options.h | 210 ++++++++++++++++ bamtools_sam.cpp | 160 ++++++++++++ bamtools_sam.h | 140 ++--------- bamtools_sort.cpp | 77 ++++++ bamtools_sort.h | 76 ++---- bamtools_stats.cpp | 75 ++++++ bamtools_stats.h | 62 ++--- bamtools_tool.h | 38 +++ bamtools_utilities.h | 100 ++++++++ bamtools_variant.h | 127 ++++++++++ 25 files changed, 1745 insertions(+), 1102 deletions(-) create mode 100644 bamtools_count.cpp create mode 100644 bamtools_count.h create mode 100644 bamtools_coverage.cpp delete mode 100644 bamtools_dump.h delete mode 100644 bamtools_getopt.h create mode 100644 bamtools_header.cpp create mode 100644 bamtools_index.cpp create mode 100644 bamtools_merge.cpp create mode 100644 bamtools_options.cpp create mode 100644 bamtools_options.h create mode 100644 bamtools_sam.cpp create mode 100644 bamtools_sort.cpp create mode 100644 bamtools_stats.cpp create mode 100644 bamtools_tool.h create mode 100644 bamtools_utilities.h create mode 100644 bamtools_variant.h diff --git a/Makefile b/Makefile index 435f776..9a72fcc 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,16 @@ CXX= g++ CXXFLAGS= -Wall -O3 PROG= bamtools -LIBS= -lz -OBJS= BGZF.o BamReader.o BamWriter.o BamMultiReader.o bamtools.o +API= BGZF.o BamReader.o BamWriter.o BamMultiReader.o +UTILS= bamtools_options.o +TOOLKIT= bamtools_count.o bamtools_coverage.o bamtools_header.o bamtools_index.o bamtools_merge.o bamtools_sam.o bamtools_sort.o bamtools_stats.o +MAIN= bamtools.o +LIBS= -lz all: $(PROG) -bamtools: $(OBJS) - $(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(LIBS) +bamtools: $(API) $(UTILS) $(TOOLKIT) $(MAIN) + $(CXX) $(CXXFLAGS) -o $@ $(API) $(UTILS) $(TOOLKIT) $(MAIN) $(LIBS) clean: rm -fr gmon.out *.o *.a a.out *~ diff --git a/bamtools.cpp b/bamtools.cpp index 6a43438..12731f2 100644 --- a/bamtools.cpp +++ b/bamtools.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 26 May 2010 +// Last modified: 1 June 2010 // --------------------------------------------------------------------------- // Integrates a number of BamTools functionalities into a single executable. // *************************************************************************** @@ -12,8 +12,8 @@ #include // BamTools includes +#include "bamtools_count.h" #include "bamtools_coverage.h" -#include "bamtools_dump.h" #include "bamtools_header.h" #include "bamtools_index.h" #include "bamtools_merge.h" @@ -26,9 +26,8 @@ using namespace BamTools; // ------------------------------------------ // bamtools subtool names +static const string COUNT = "count"; static const string COVERAGE = "coverage"; -static const string DUMP = "dump"; // <-- do we even want to keep this? I think 'bamtools sam' will be more useful anyway - // nobody's going to want what was essentially an early, bloated, debugging output static const string HEADER = "header"; static const string INDEX = "index"; static const string MERGE = "merge"; @@ -46,40 +45,48 @@ static const string VERSION = "version"; static const string LONG_VERSION = "--version"; static const string SHORT_VERSION = "-v"; +// ------------------------------------------ +// Print help info int Help(int argc, char* argv[]) { // 'bamtools help COMMAND' + AbstractTool* tool(0); if (argc > 2) { - if ( argv[2] == COVERAGE) return BamCoverageHelp(); - if ( argv[2] == DUMP ) return BamDumpHelp(); // keep? - if ( argv[2] == HEADER ) return BamHeaderHelp(); - if ( argv[2] == INDEX ) return BamIndexHelp(); - if ( argv[2] == MERGE ) return BamMergeHelp(); - if ( argv[2] == SAM ) return BamSamHelp(); - if ( argv[2] == SORT ) return BamSortHelp(); - if ( argv[2] == STATS ) return BamStatsHelp(); + if ( argv[2] == COUNT ) tool = new CountTool; + if ( argv[2] == COVERAGE ) tool = new CoverageTool; + if ( argv[2] == HEADER ) tool = new HeaderTool; + if ( argv[2] == INDEX ) tool = new IndexTool; + if ( argv[2] == MERGE ) tool = new MergeTool; + if ( argv[2] == SAM ) tool = new SamTool; + if ( argv[2] == SORT ) tool = new SortTool; + if ( argv[2] == STATS ) tool = new StatsTool; } - - // either 'bamtools help' or unrecognized argument after 'help' - cerr << endl; - cerr << "usage: bamtools [--help] COMMAND [ARGS]" << endl; - cerr << endl; - cerr << "Available bamtools commands:" << endl; - cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl; - cerr << "\tdump Dump BAM file contents to text output" << endl; // keep? - cerr << "\theader Prints BAM header information" << endl; - cerr << "\tindex Generates index for BAM file" << endl; - cerr << "\tmerge Merge multiple BAM files into single file" << endl; - cerr << "\tsam Prints the BAM file in SAM (text) format" << endl; - cerr << "\tsort Sorts the BAM file according to some criteria" << endl; - cerr << "\tstats Prints some basic statistics from the input BAM file" << endl; - cerr << endl; - cerr << "See 'bamtools help COMMAND' for more information on a specific command." << endl; - cerr << endl; - return 0; + if ( tool ) return tool->Help(); + else { + + // either 'bamtools help' or unrecognized argument after 'help' + cerr << endl; + cerr << "usage: bamtools [--help] COMMAND [ARGS]" << endl; + cerr << endl; + cerr << "Available bamtools commands:" << endl; + cerr << "\tcount Prints number of alignments in BAM file" << endl; + cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl; + cerr << "\theader Prints BAM header information" << endl; + cerr << "\tindex Generates index for BAM file" << endl; + cerr << "\tmerge Merge multiple BAM files into single file" << endl; + cerr << "\tsam Prints the BAM file in SAM (text) format" << endl; + cerr << "\tsort Sorts the BAM file according to some criteria" << endl; + cerr << "\tstats Prints some basic statistics from the input BAM file" << endl; + cerr << endl; + cerr << "See 'bamtools help COMMAND' for more information on a specific command." << endl; + cerr << endl; + return 0; + } } +// ------------------------------------------ +// Print version info int Version(void) { cout << endl; cout << "bamtools v0.x.xx" << endl; @@ -90,6 +97,8 @@ int Version(void) { return 0; } +// ------------------------------------------ +// toolkit entry point int main(int argc, char* argv[]) { // just 'bamtools' @@ -101,16 +110,19 @@ int main(int argc, char* argv[]) { // 'bamtools version', 'bamtools --version', or 'bamtools -v' if ( (argv[1] == VERSION) || (argv[1] == LONG_VERSION) || (argv[1] == SHORT_VERSION) ) return Version(); - // run desired sub-tool - if ( argv[1] == COVERAGE ) return RunBamCoverage(argc, argv); - if ( argv[1] == DUMP ) return RunBamDump(argc, argv); // keep? - if ( argv[1] == HEADER ) return RunBamHeader(argc, argv); - if ( argv[1] == INDEX ) return RunBamIndex(argc, argv); - if ( argv[1] == MERGE ) return RunBamMerge(argc, argv); - if ( argv[1] == SAM ) return RunBamSam(argc, argv); - if ( argv[1] == SORT ) return RunBamSort(argc, argv); - if ( argv[1] == STATS ) return RunBamStats(argc, argv); + // determine desired sub-tool + AbstractTool* tool(0); + if ( argv[1] == COUNT ) tool = new CountTool; + if ( argv[1] == COVERAGE ) tool = new CoverageTool; + if ( argv[1] == HEADER ) tool = new HeaderTool; + if ( argv[1] == INDEX ) tool = new IndexTool; + if ( argv[1] == MERGE ) tool = new MergeTool; + if ( argv[1] == SAM ) tool = new SamTool; + if ( argv[1] == SORT ) tool = new SortTool; + if ( argv[1] == STATS ) tool = new StatsTool; - // unrecognized 2nd argument, print help - return Help(argc, argv); + // if found, run tool + if ( tool ) return tool->Run(argc, argv); + // no match found, show help + else return Help(argc, argv); } diff --git a/bamtools_count.cpp b/bamtools_count.cpp new file mode 100644 index 0000000..bfdfd28 --- /dev/null +++ b/bamtools_count.cpp @@ -0,0 +1,108 @@ +// *************************************************************************** +// bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 1 June 2010 +// --------------------------------------------------------------------------- +// Prints alignment count for BAM file +// +// ** Expand to multiple?? +// +// *************************************************************************** + +#include +#include +#include + +#include "bamtools_count.h" +#include "bamtools_options.h" +#include "bamtools_utilities.h" +#include "BamReader.h" + +using namespace std; +using namespace BamTools; + +// --------------------------------------------- +// CountSettings implementation + +struct CountTool::CountSettings { + + // flags + bool HasInputBamFilename; + bool HasRegion; + + // filenames + std::string InputBamFilename; + std::string Region; + + // constructor + CountSettings(void) + : HasInputBamFilename(false) + , HasRegion(false) + , InputBamFilename(Options::StandardIn()) + { } +}; + +// --------------------------------------------- +// CountTool implementation + +CountTool::CountTool(void) + : AbstractTool() + , m_settings(new CountSettings) +{ + // set program details + Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in "); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); + + OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); + Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); +} + +CountTool::~CountTool(void) { + delete m_settings; + m_settings = 0; +} + +int CountTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int CountTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + //open our BAM reader +// BamReader reader; +// reader.Open(m_settings.InputBamFilename); + + // count alignments + string startChrom; + string stopChrom; + int startPos; + int stopPos; + + if ( !m_settings->HasRegion ) { + cerr << "Counting all alignments " << endl; + } else { + if ( ParseRegionString(m_settings->Region, startChrom, startPos, stopChrom, stopPos) ) { + cerr << "Counting only alignments in region " << m_settings->Region << endl; + cerr << "StartChrom: " << startChrom << endl; + cerr << "StartPos: " << startPos << endl; + cerr << "StopChrom: " << stopChrom << endl; + cerr << "StopPos: " << stopPos << endl; + } + } + + cerr << " from " << m_settings->InputBamFilename << endl; + cerr << "FEATURE NOT YET IMPLEMENTED!" << endl; + + // clean & exit +// reader.Close(); + return 0; +} \ No newline at end of file diff --git a/bamtools_count.h b/bamtools_count.h new file mode 100644 index 0000000..e3d0c81 --- /dev/null +++ b/bamtools_count.h @@ -0,0 +1,38 @@ +// *************************************************************************** +// bamtools_count.h (c) 2010 Derek Barnett, Erik Garrison +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 1 June 2010 +// --------------------------------------------------------------------------- +// Prints alignment count for BAM file +// +// ** Expand to multiple?? +// +// *************************************************************************** + +#ifndef BAMTOOLS_COUNT_H +#define BAMTOOLS_COUNT_H + +#include "bamtools_tool.h" + +namespace BamTools { + +class CountTool : public AbstractTool { + + public: + CountTool(void); + ~CountTool(void); + + public: + int Help(void); + int Run(int argc, char* argv[]); + + private: + struct CountSettings; + CountSettings* m_settings; +}; + +} // namespace BamTools + +#endif // BAMTOOLS_COUNT_H diff --git a/bamtools_coverage.cpp b/bamtools_coverage.cpp new file mode 100644 index 0000000..b587445 --- /dev/null +++ b/bamtools_coverage.cpp @@ -0,0 +1,84 @@ +// *************************************************************************** +// bamtools_coverage.cpp (c) 2010 Derek Barnett, Erik Garrison +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 1 June 2010 +// --------------------------------------------------------------------------- +// Prints coverage statistics for a single BAM file +// +// ** Expand to multiple?? +// +// *************************************************************************** + +#include +#include +#include + +#include "bamtools_coverage.h" +#include "bamtools_options.h" +#include "BamReader.h" + +using namespace std; +using namespace BamTools; + +// --------------------------------------------- +// CoverageSettings implementation + +struct CoverageTool::CoverageSettings { + + // flags + bool HasInputBamFilename; + + // filenames + std::string InputBamFilename; + + // constructor + CoverageSettings(void) + : HasInputBamFilename(false) + , InputBamFilename(Options::StandardIn()) + { } +}; + +// --------------------------------------------- +// CoverageTool implementation + +CoverageTool::CoverageTool(void) + : AbstractTool() + , m_settings(new CoverageSettings) +{ + // set program details + Options::SetProgramInfo("bamtools coverage", "prints coverage stats for a BAM file", "-in "); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); +} + +CoverageTool::~CoverageTool(void) { + delete m_settings; + m_settings = 0; +} + +int CoverageTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int CoverageTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + //open our BAM reader + BamReader reader; + reader.Open(m_settings->InputBamFilename); + + // generate coverage stats + cerr << "Generating coverage stats for " << m_settings->InputBamFilename << endl; + cerr << "FEATURE NOT YET IMPLEMENTED!" << endl; + + // clean & exit + reader.Close(); + return 0; +} \ No newline at end of file diff --git a/bamtools_coverage.h b/bamtools_coverage.h index dcb8e23..c3714c0 100644 --- a/bamtools_coverage.h +++ b/bamtools_coverage.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 26 May 2010 +// Last modified: 1 June 2010 // --------------------------------------------------------------------------- // Prints coverage statistics for a single BAM file // @@ -14,47 +14,25 @@ #ifndef BAMTOOLS_COVERAGE_H #define BAMTOOLS_COVERAGE_H -#include -#include - -#include "BamReader.h" -#include "bamtools_getopt.h" +#include "bamtools_tool.h" namespace BamTools { - -int BamCoverageHelp(void) { - std::cerr << std::endl; - std::cerr << "usage:\tbamtools coverage [--in FILE]" << std::endl; - std::cerr << std::endl; - std::cerr << "\t--in FILE Input BAM file to generate coverage stats [stdin]" << std::endl; - std::cerr << std::endl; - return 0; -} - -int RunBamCoverage(int argc, char* argv[]) { - // else parse command line for args - GetOpt options(argc, argv, 1); - - std::string inputFilename; - options.addOption("in", &inputFilename); - - if ( !options.parse() ) return BamCoverageHelp(); - if ( inputFilename.empty() ) { inputFilename = "stdin"; } - -// // open our BAM reader -// BamReader reader; -// reader.Open(inputFilename); - - // generate coverage stats - std::cerr << "Generating coverage stats for " << inputFilename << std::endl; - std::cerr << "FEATURE NOT YET IMPLEMENTED!" << std::endl; - - // clean & exit -// reader.Close(); - return 0; -} - +class CoverageTool : public AbstractTool { + + public: + CoverageTool(void); + ~CoverageTool(void); + + public: + int Help(void); + int Run(int argc, char* argv[]); + + private: + struct CoverageSettings; + CoverageSettings* m_settings; +}; + } // namespace BamTools #endif // BAMTOOLS_COVERAGE_H diff --git a/bamtools_dump.h b/bamtools_dump.h deleted file mode 100644 index aac5ba1..0000000 --- a/bamtools_dump.h +++ /dev/null @@ -1,74 +0,0 @@ -// *************************************************************************** -// bamtools_dump.h (c) 2010 Derek Barnett, Erik Garrison -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 26 May 2010 -// --------------------------------------------------------------------------- -// Dumps alignment summaries out to stdout. -// -// ** This should probably go the way of the dodo soon? bamtools sam makes this -// obsolete and probably worthless. -// -// *************************************************************************** - -#ifndef BAMTOOLS_DUMP_H -#define BAMTOOLS_DUMP_H - -#include -#include -#include - -#include "BamMultiReader.h" -// #include "GetOpt.h" -#include "bamtools_getopt.h" - -namespace BamTools { - -int BamDumpHelp(void) { - std::cerr << std::endl; - std::cerr << "usage:\tbamtools dump [--in FILE [FILE] [FILE] ...]" << std::endl; - std::cerr << std::endl; - std::cerr << "\t--in FILE Input file(s) to dump alignment summaries from [stdin]" << std::endl; - std::cerr << std::endl; - return 0; -} - -// Spit out basic BamAlignment data -void PrintAlignment(const BamTools::BamAlignment& alignment) { - std::cout << "---------------------------------" << std::endl; - std::cout << "Name: " << alignment.Name << std::endl; - std::cout << "Aligned to: " << alignment.RefID; - std::cout << ":" << alignment.Position << std::endl; - std::cout << std::endl; -} - -int RunBamDump(int argc, char* argv[]) { - - // else parse command line for args - GetOpt options(argc, argv, 1); - - std::vector inputFilenames; - options.addVariableLengthOption("in", &inputFilenames); - - if ( !options.parse() ) return BamDumpHelp(); - if ( inputFilenames.empty() ) { inputFilenames.push_back("stdin"); } - - // open files - BamMultiReader reader; - reader.Open(inputFilenames, false); - - // dump alignment summaries to stdout - BamAlignment bAlignment; - while (reader.GetNextAlignment(bAlignment)) { - PrintAlignment(bAlignment); - } - - // clean up & exit - reader.Close(); - return 0; -} - -} // namespace BamTools - -#endif // BAMTOOLS_DUMP_H diff --git a/bamtools_getopt.h b/bamtools_getopt.h deleted file mode 100644 index c3ff2d8..0000000 --- a/bamtools_getopt.h +++ /dev/null @@ -1,558 +0,0 @@ -// *************************************************************************** -// bamtools_getopt.h (c) 2010 Derek Barnett, Erik Garrison -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 26 May 2010 -// --------------------------------------------------------------------------- -// Provides a configurable commandline parser used by the BamTools subtools -// *************************************************************************** - -#ifndef BAMTOOLS_GETOPT_H -#define BAMTOOLS_GETOPT_H - -// C includes -#include -#include - -// C++ includes -#include -#include -#include -#include - -namespace BamTools { - -class GetOpt { - - // ctors & dtor - public: - - // ctor: takes the 'standard' command line args (optional offset) - GetOpt(int argc, char* argv[], int offset = 0); - - // d-tor - ~GetOpt(void); - - // set rules for bare word arguments - public: - // add an optional 'bare word' argument (eg 'help') - // 'name' is not used on the command line, but for reporting - void addOptionalArgument(const std::string& name, std::string* value); - - // add a required 'bare word' argument (eg input data file) - // 'name' is not used on the command line, but for reporting - void addRequiredArgument(const std::string& name, std::string* value); - - // set rules for key=>value options - public: - // add standard option with arguments ( -Wall, -O2, --type=foo ) - void addOption(const char shortName, const std::string& longName, std::string* value); - void addOption(const std::string& longName, std::string* value); - - // add an option whose argument is optional (eg --log may default to dumping to stderr, unless a file is specified ) - // must provide a default string - void addOptionalOption(const char shortName, const std::string& longName, std::string* value, const std::string& defaultValue); - void addOptionalOption(const std::string& longName, std::string* value, const std::string& defaultValue); - - // add a repeatable option (like compiler includes -I/path/ -I/path2/ etc) - // only supporting one type of name (short/long) for this option for now - void addRepeatableOption(const char shortName, std::vector* values); // single char version - void addRepeatableOption(const std::string& longName, std::vector* values); // long name version - - // add an option that takes a variable number of arguments ( --files f1 f2 f3 f4... ) - void addVariableLengthOption(const std::string& longName, std::vector* values); - - // set rules for on/off switch - public: - // on/off switch ( --verbose --searchOnly ) only long names supported for now - void addSwitch(const std::string& longName, bool* ok); - - // parse and query methods - public: - - // get application name - const std::string& applicationName(void) const; - - // query if particular 'bare-word' argument is set - bool isSet(const std::string& name) const; - - // runs parser (does validation and assign values to arguments) - // returns success/fail - bool parse(void); - - void print(void); - - // define Option-related types & enums - private: - enum OptionType { OptUnknown = 0 - , OptEnd - , OptSwitch - , OptArg1 - , OptOptional - , OptRepeat - , OptVariable - }; - - // define Option - struct Option { - - // ctor - Option(OptionType t = OptUnknown, const char shortName = 0, const std::string& longName = "") - : Type(t) - , ShortName(shortName) - , LongName(longName) - , BoolValue(0) - { } - - // data members - OptionType Type; - char ShortName; - std::string LongName; - union { - bool* BoolValue; - std::string* StringValue; - std::vector* ListValue; - }; - std::string Default; - }; - - // internal methods - private: - void init(int argc, char* argv[], int offset); - void saveOption(const Option& opt); // const & ?? he doesnt use it - why? - void setSwitch(const Option& opt); - - // data members - private: - std::vector