CXX= g++\r
CXXFLAGS= -Wall -O3\r
PROG= bamtools\r
-LIBS= -lz\r
-OBJS= BGZF.o BamReader.o BamWriter.o BamMultiReader.o bamtools.o
+API= BGZF.o BamReader.o BamWriter.o BamMultiReader.o
+UTILS= bamtools_options.o
+TOOLKIT= bamtools_count.o bamtools_coverage.o bamtools_header.o bamtools_index.o bamtools_merge.o bamtools_sam.o bamtools_sort.o bamtools_stats.o
+MAIN= bamtools.o
+LIBS= -lz
\r
all: $(PROG)\r
\r
-bamtools: $(OBJS)\r
- $(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(LIBS)\r
+bamtools: $(API) $(UTILS) $(TOOLKIT) $(MAIN)\r
+ $(CXX) $(CXXFLAGS) -o $@ $(API) $(UTILS) $(TOOLKIT) $(MAIN) $(LIBS)\r
\r
clean:\r
rm -fr gmon.out *.o *.a a.out *~\r
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
// Integrates a number of BamTools functionalities into a single executable.
// ***************************************************************************
#include <iostream>
// BamTools includes
+#include "bamtools_count.h"
#include "bamtools_coverage.h"
-#include "bamtools_dump.h"
#include "bamtools_header.h"
#include "bamtools_index.h"
#include "bamtools_merge.h"
// ------------------------------------------
// bamtools subtool names
+static const string COUNT = "count";
static const string COVERAGE = "coverage";
-static const string DUMP = "dump"; // <-- do we even want to keep this? I think 'bamtools sam' will be more useful anyway
- // nobody's going to want what was essentially an early, bloated, debugging output
static const string HEADER = "header";
static const string INDEX = "index";
static const string MERGE = "merge";
static const string LONG_VERSION = "--version";
static const string SHORT_VERSION = "-v";
+// ------------------------------------------
+// Print help info
int Help(int argc, char* argv[]) {
// 'bamtools help COMMAND'
+ AbstractTool* tool(0);
if (argc > 2) {
- if ( argv[2] == COVERAGE) return BamCoverageHelp();
- if ( argv[2] == DUMP ) return BamDumpHelp(); // keep?
- if ( argv[2] == HEADER ) return BamHeaderHelp();
- if ( argv[2] == INDEX ) return BamIndexHelp();
- if ( argv[2] == MERGE ) return BamMergeHelp();
- if ( argv[2] == SAM ) return BamSamHelp();
- if ( argv[2] == SORT ) return BamSortHelp();
- if ( argv[2] == STATS ) return BamStatsHelp();
+ if ( argv[2] == COUNT ) tool = new CountTool;
+ if ( argv[2] == COVERAGE ) tool = new CoverageTool;
+ if ( argv[2] == HEADER ) tool = new HeaderTool;
+ if ( argv[2] == INDEX ) tool = new IndexTool;
+ if ( argv[2] == MERGE ) tool = new MergeTool;
+ if ( argv[2] == SAM ) tool = new SamTool;
+ if ( argv[2] == SORT ) tool = new SortTool;
+ if ( argv[2] == STATS ) tool = new StatsTool;
}
-
- // either 'bamtools help' or unrecognized argument after 'help'
- cerr << endl;
- cerr << "usage: bamtools [--help] COMMAND [ARGS]" << endl;
- cerr << endl;
- cerr << "Available bamtools commands:" << endl;
- cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl;
- cerr << "\tdump Dump BAM file contents to text output" << endl; // keep?
- cerr << "\theader Prints BAM header information" << endl;
- cerr << "\tindex Generates index for BAM file" << endl;
- cerr << "\tmerge Merge multiple BAM files into single file" << endl;
- cerr << "\tsam Prints the BAM file in SAM (text) format" << endl;
- cerr << "\tsort Sorts the BAM file according to some criteria" << endl;
- cerr << "\tstats Prints some basic statistics from the input BAM file" << endl;
- cerr << endl;
- cerr << "See 'bamtools help COMMAND' for more information on a specific command." << endl;
- cerr << endl;
- return 0;
+ if ( tool ) return tool->Help();
+ else {
+
+ // either 'bamtools help' or unrecognized argument after 'help'
+ cerr << endl;
+ cerr << "usage: bamtools [--help] COMMAND [ARGS]" << endl;
+ cerr << endl;
+ cerr << "Available bamtools commands:" << endl;
+ cerr << "\tcount Prints number of alignments in BAM file" << endl;
+ cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl;
+ cerr << "\theader Prints BAM header information" << endl;
+ cerr << "\tindex Generates index for BAM file" << endl;
+ cerr << "\tmerge Merge multiple BAM files into single file" << endl;
+ cerr << "\tsam Prints the BAM file in SAM (text) format" << endl;
+ cerr << "\tsort Sorts the BAM file according to some criteria" << endl;
+ cerr << "\tstats Prints some basic statistics from the input BAM file" << endl;
+ cerr << endl;
+ cerr << "See 'bamtools help COMMAND' for more information on a specific command." << endl;
+ cerr << endl;
+ return 0;
+ }
}
+// ------------------------------------------
+// Print version info
int Version(void) {
cout << endl;
cout << "bamtools v0.x.xx" << endl;
return 0;
}
+// ------------------------------------------
+// toolkit entry point
int main(int argc, char* argv[]) {
// just 'bamtools'
// 'bamtools version', 'bamtools --version', or 'bamtools -v'
if ( (argv[1] == VERSION) || (argv[1] == LONG_VERSION) || (argv[1] == SHORT_VERSION) ) return Version();
- // run desired sub-tool
- if ( argv[1] == COVERAGE ) return RunBamCoverage(argc, argv);
- if ( argv[1] == DUMP ) return RunBamDump(argc, argv); // keep?
- if ( argv[1] == HEADER ) return RunBamHeader(argc, argv);
- if ( argv[1] == INDEX ) return RunBamIndex(argc, argv);
- if ( argv[1] == MERGE ) return RunBamMerge(argc, argv);
- if ( argv[1] == SAM ) return RunBamSam(argc, argv);
- if ( argv[1] == SORT ) return RunBamSort(argc, argv);
- if ( argv[1] == STATS ) return RunBamStats(argc, argv);
+ // determine desired sub-tool
+ AbstractTool* tool(0);
+ if ( argv[1] == COUNT ) tool = new CountTool;
+ if ( argv[1] == COVERAGE ) tool = new CoverageTool;
+ if ( argv[1] == HEADER ) tool = new HeaderTool;
+ if ( argv[1] == INDEX ) tool = new IndexTool;
+ if ( argv[1] == MERGE ) tool = new MergeTool;
+ if ( argv[1] == SAM ) tool = new SamTool;
+ if ( argv[1] == SORT ) tool = new SortTool;
+ if ( argv[1] == STATS ) tool = new StatsTool;
- // unrecognized 2nd argument, print help
- return Help(argc, argv);
+ // if found, run tool
+ if ( tool ) return tool->Run(argc, argv);
+ // no match found, show help
+ else return Help(argc, argv);
}
--- /dev/null
+// ***************************************************************************
+// bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints alignment count for BAM file
+//
+// ** Expand to multiple??
+//
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "bamtools_count.h"
+#include "bamtools_options.h"
+#include "bamtools_utilities.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// CountSettings implementation
+
+struct CountTool::CountSettings {
+
+ // flags
+ bool HasInputBamFilename;
+ bool HasRegion;
+
+ // filenames
+ std::string InputBamFilename;
+ std::string Region;
+
+ // constructor
+ CountSettings(void)
+ : HasInputBamFilename(false)
+ , HasRegion(false)
+ , InputBamFilename(Options::StandardIn())
+ { }
+};
+
+// ---------------------------------------------
+// CountTool implementation
+
+CountTool::CountTool(void)
+ : AbstractTool()
+ , m_settings(new CountSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in <filename> ");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+
+ OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
+ Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+}
+
+CountTool::~CountTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int CountTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int CountTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ //open our BAM reader
+// BamReader reader;
+// reader.Open(m_settings.InputBamFilename);
+
+ // count alignments
+ string startChrom;
+ string stopChrom;
+ int startPos;
+ int stopPos;
+
+ if ( !m_settings->HasRegion ) {
+ cerr << "Counting all alignments " << endl;
+ } else {
+ if ( ParseRegionString(m_settings->Region, startChrom, startPos, stopChrom, stopPos) ) {
+ cerr << "Counting only alignments in region " << m_settings->Region << endl;
+ cerr << "StartChrom: " << startChrom << endl;
+ cerr << "StartPos: " << startPos << endl;
+ cerr << "StopChrom: " << stopChrom << endl;
+ cerr << "StopPos: " << stopPos << endl;
+ }
+ }
+
+ cerr << " from " << m_settings->InputBamFilename << endl;
+ cerr << "FEATURE NOT YET IMPLEMENTED!" << endl;
+
+ // clean & exit
+// reader.Close();
+ return 0;
+}
\ No newline at end of file
--- /dev/null
+// ***************************************************************************
+// bamtools_count.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints alignment count for BAM file
+//
+// ** Expand to multiple??
+//
+// ***************************************************************************
+
+#ifndef BAMTOOLS_COUNT_H
+#define BAMTOOLS_COUNT_H
+
+#include "bamtools_tool.h"
+
+namespace BamTools {
+
+class CountTool : public AbstractTool {
+
+ public:
+ CountTool(void);
+ ~CountTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct CountSettings;
+ CountSettings* m_settings;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_COUNT_H
--- /dev/null
+// ***************************************************************************
+// bamtools_coverage.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints coverage statistics for a single BAM file
+//
+// ** Expand to multiple??
+//
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "bamtools_coverage.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// CoverageSettings implementation
+
+struct CoverageTool::CoverageSettings {
+
+ // flags
+ bool HasInputBamFilename;
+
+ // filenames
+ std::string InputBamFilename;
+
+ // constructor
+ CoverageSettings(void)
+ : HasInputBamFilename(false)
+ , InputBamFilename(Options::StandardIn())
+ { }
+};
+
+// ---------------------------------------------
+// CoverageTool implementation
+
+CoverageTool::CoverageTool(void)
+ : AbstractTool()
+ , m_settings(new CoverageSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools coverage", "prints coverage stats for a BAM file", "-in <filename> ");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+}
+
+CoverageTool::~CoverageTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int CoverageTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int CoverageTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ //open our BAM reader
+ BamReader reader;
+ reader.Open(m_settings->InputBamFilename);
+
+ // generate coverage stats
+ cerr << "Generating coverage stats for " << m_settings->InputBamFilename << endl;
+ cerr << "FEATURE NOT YET IMPLEMENTED!" << endl;
+
+ // clean & exit
+ reader.Close();
+ return 0;
+}
\ No newline at end of file
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
// Prints coverage statistics for a single BAM file
//
#ifndef BAMTOOLS_COVERAGE_H
#define BAMTOOLS_COVERAGE_H
-#include <iostream>
-#include <string>
-
-#include "BamReader.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamCoverageHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools coverage [--in FILE]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input BAM file to generate coverage stats [stdin]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamCoverage(int argc, char* argv[]) {
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string inputFilename;
- options.addOption("in", &inputFilename);
-
- if ( !options.parse() ) return BamCoverageHelp();
- if ( inputFilename.empty() ) { inputFilename = "stdin"; }
-
-// // open our BAM reader
-// BamReader reader;
-// reader.Open(inputFilename);
-
- // generate coverage stats
- std::cerr << "Generating coverage stats for " << inputFilename << std::endl;
- std::cerr << "FEATURE NOT YET IMPLEMENTED!" << std::endl;
-
- // clean & exit
-// reader.Close();
- return 0;
-}
-
+class CoverageTool : public AbstractTool {
+
+ public:
+ CoverageTool(void);
+ ~CoverageTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct CoverageSettings;
+ CoverageSettings* m_settings;
+};
+
} // namespace BamTools
#endif // BAMTOOLS_COVERAGE_H
+++ /dev/null
-// ***************************************************************************
-// bamtools_dump.h (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
-// ---------------------------------------------------------------------------
-// Dumps alignment summaries out to stdout.
-//
-// ** This should probably go the way of the dodo soon? bamtools sam makes this
-// obsolete and probably worthless.
-//
-// ***************************************************************************
-
-#ifndef BAMTOOLS_DUMP_H
-#define BAMTOOLS_DUMP_H
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include "BamMultiReader.h"
-// #include "GetOpt.h"
-#include "bamtools_getopt.h"
-
-namespace BamTools {
-
-int BamDumpHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools dump [--in FILE [FILE] [FILE] ...]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input file(s) to dump alignment summaries from [stdin]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-// Spit out basic BamAlignment data
-void PrintAlignment(const BamTools::BamAlignment& alignment) {
- std::cout << "---------------------------------" << std::endl;
- std::cout << "Name: " << alignment.Name << std::endl;
- std::cout << "Aligned to: " << alignment.RefID;
- std::cout << ":" << alignment.Position << std::endl;
- std::cout << std::endl;
-}
-
-int RunBamDump(int argc, char* argv[]) {
-
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::vector<std::string> inputFilenames;
- options.addVariableLengthOption("in", &inputFilenames);
-
- if ( !options.parse() ) return BamDumpHelp();
- if ( inputFilenames.empty() ) { inputFilenames.push_back("stdin"); }
-
- // open files
- BamMultiReader reader;
- reader.Open(inputFilenames, false);
-
- // dump alignment summaries to stdout
- BamAlignment bAlignment;
- while (reader.GetNextAlignment(bAlignment)) {
- PrintAlignment(bAlignment);
- }
-
- // clean up & exit
- reader.Close();
- return 0;
-}
-
-} // namespace BamTools
-
-#endif // BAMTOOLS_DUMP_H
+++ /dev/null
-// ***************************************************************************
-// bamtools_getopt.h (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
-// ---------------------------------------------------------------------------
-// Provides a configurable commandline parser used by the BamTools subtools
-// ***************************************************************************
-
-#ifndef BAMTOOLS_GETOPT_H
-#define BAMTOOLS_GETOPT_H
-
-// C includes
-#include <cassert>
-#include <cstdlib>
-
-// C++ includes
-#include <iostream>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace BamTools {
-
-class GetOpt {
-
- // ctors & dtor
- public:
-
- // ctor: takes the 'standard' command line args (optional offset)
- GetOpt(int argc, char* argv[], int offset = 0);
-
- // d-tor
- ~GetOpt(void);
-
- // set rules for bare word arguments
- public:
- // add an optional 'bare word' argument (eg 'help')
- // 'name' is not used on the command line, but for reporting
- void addOptionalArgument(const std::string& name, std::string* value);
-
- // add a required 'bare word' argument (eg input data file)
- // 'name' is not used on the command line, but for reporting
- void addRequiredArgument(const std::string& name, std::string* value);
-
- // set rules for key=>value options
- public:
- // add standard option with arguments ( -Wall, -O2, --type=foo )
- void addOption(const char shortName, const std::string& longName, std::string* value);
- void addOption(const std::string& longName, std::string* value);
-
- // add an option whose argument is optional (eg --log may default to dumping to stderr, unless a file is specified )
- // must provide a default string
- void addOptionalOption(const char shortName, const std::string& longName, std::string* value, const std::string& defaultValue);
- void addOptionalOption(const std::string& longName, std::string* value, const std::string& defaultValue);
-
- // add a repeatable option (like compiler includes -I/path/ -I/path2/ etc)
- // only supporting one type of name (short/long) for this option for now
- void addRepeatableOption(const char shortName, std::vector<std::string>* values); // single char version
- void addRepeatableOption(const std::string& longName, std::vector<std::string>* values); // long name version
-
- // add an option that takes a variable number of arguments ( --files f1 f2 f3 f4... )
- void addVariableLengthOption(const std::string& longName, std::vector<std::string>* values);
-
- // set rules for on/off switch
- public:
- // on/off switch ( --verbose --searchOnly ) only long names supported for now
- void addSwitch(const std::string& longName, bool* ok);
-
- // parse and query methods
- public:
-
- // get application name
- const std::string& applicationName(void) const;
-
- // query if particular 'bare-word' argument is set
- bool isSet(const std::string& name) const;
-
- // runs parser (does validation and assign values to arguments)
- // returns success/fail
- bool parse(void);
-
- void print(void);
-
- // define Option-related types & enums
- private:
- enum OptionType { OptUnknown = 0
- , OptEnd
- , OptSwitch
- , OptArg1
- , OptOptional
- , OptRepeat
- , OptVariable
- };
-
- // define Option
- struct Option {
-
- // ctor
- Option(OptionType t = OptUnknown, const char shortName = 0, const std::string& longName = "")
- : Type(t)
- , ShortName(shortName)
- , LongName(longName)
- , BoolValue(0)
- { }
-
- // data members
- OptionType Type;
- char ShortName;
- std::string LongName;
- union {
- bool* BoolValue;
- std::string* StringValue;
- std::vector<std::string>* ListValue;
- };
- std::string Default;
- };
-
- // internal methods
- private:
- void init(int argc, char* argv[], int offset);
- void saveOption(const Option& opt); // const & ?? he doesnt use it - why?
- void setSwitch(const Option& opt);
-
- // data members
- private:
- std::vector<Option> m_options;
- std::map<std::string, int> m_setOptions;
- std::vector<std::string> m_args;
- std::string m_appname;
-
- int m_numberRequiredArguments;
- int m_numberOptionalArguments;
- Option m_requiredArgument;
- Option m_optionalArgument;
-
- int m_currentArgument;
-};
-
-inline
-GetOpt::GetOpt(int argc, char* argv[], int offset)
-{
- init(argc, argv, offset);
-}
-
-inline
-GetOpt::~GetOpt(void) { }
-
-// add an optional 'bare word' argument (eg 'help')
-// 'name' is not used on the command line, but for reporting
-inline
-void GetOpt::addOptionalArgument(const std::string& name, std::string* value) {
-
- Option opt( OptUnknown, 0, name );
- opt.StringValue = value;
- m_optionalArgument = opt;
- ++m_numberOptionalArguments;
- *value = std::string();
-}
-
-// add a required 'bare word' argument (eg input data file)
-// 'name' is not used on the command line, but for reporting
-inline
-void GetOpt::addRequiredArgument(const std::string& name, std::string* value) {
-
- Option opt( OptUnknown, 0, name );
- opt.StringValue = value;
- m_requiredArgument = opt;
- ++m_numberRequiredArguments;
- *value = std::string();
-}
-
-// add standard option with arguments ( -Wall, -O2, --type=foo )
-inline
-void GetOpt::addOption(const char shortName, const std::string& longName, std::string* value) {
-
- Option opt( OptArg1, shortName, longName );
- opt.StringValue = value;
- saveOption(opt);
- *value = std::string();
-}
-
-// add standard option with arguments ( -Wall, -O2, --type=foo )
-inline
-void GetOpt::addOption(const std::string& longName, std::string* value) {
- addOption(0, longName, value);
-}
-
-// add an option whose argument is optional (eg --log may default to dumping to stderr, unless a file is specified )
-// must provide a default string
-// short & long name version
-inline
-void GetOpt::addOptionalOption(const char shortName, const std::string& longName, std::string* value, const std::string& defaultValue) {
-
- Option opt( OptOptional, shortName, longName );
- opt.StringValue = value;
- opt.Default = defaultValue;
- saveOption(opt);
- *value = std::string();
-}
-
-// long name only version
-inline
-void GetOpt::addOptionalOption(const std::string& longName, std::string* value, const std::string& defaultValue) {
- addOptionalOption(0, longName, value, defaultValue);
-}
-
-// add a repeatable option (like compiler includes -I/path/ -I/path2/ etc)
-// only supporting one type of name (short/long) for this option for now
-// short name only version
-inline
-void GetOpt::addRepeatableOption(const char shortName, std::vector<std::string>* values) {
-
- Option opt( OptRepeat, shortName, std::string() );
- opt.ListValue = values;
- saveOption(opt);
- *values = std::vector<std::string>();
-}
-
-// long name only version
-inline
-void GetOpt::addRepeatableOption(const std::string& longName, std::vector<std::string>* values) {
-
- Option opt( OptRepeat, 0, longName );
- opt.ListValue = values;
- saveOption(opt);
- *values = std::vector<std::string>();
-}
-
-// add an option that takes a variable number of arguments ( --files f1 f2 f3 f4... )
-inline
-void GetOpt::addVariableLengthOption(const std::string& longName, std::vector<std::string>* values) {
-
- Option opt( OptVariable, 0, longName );
- opt.ListValue = values;
- saveOption(opt);
- *values = std::vector<std::string>();
-}
-
-// on/off switch ( --verbose --searchOnly ) only long names supported for now
-inline
-void GetOpt::addSwitch(const std::string& longName, bool* ok) {
-
- Option opt( OptSwitch, 0, longName );
- opt.BoolValue = ok;
- saveOption(opt);
- *ok = false;
-}
-
-inline
-const std::string& GetOpt::applicationName(void) const {
- return m_appname;
-}
-
-inline
-void GetOpt::init(int argc, char* argv[], int offset) {
-
- m_numberRequiredArguments = 0;
- m_numberOptionalArguments = 0;
- m_currentArgument = 1;
-
- if ( argc > 0 ) {
-
- // store app name
- std::string fullPath = argv[0];
- size_t lastSlash = fullPath.find_last_of("/\\"); // should work on Unix- and Windows-style paths
- m_appname = fullPath.substr(lastSlash + 1);
-
- // store remaining arguments from offset to end
- for (int i = offset + 1; i < argc; ++i) {
- m_args.push_back( argv[i] );
- }
-
- } else {
- std::cerr << "GetOpt ERROR: No arguments given." << std::endl;
- exit(1);
- }
-}
-
-// query if particular 'bare-word' argument is set
-inline
-bool GetOpt::isSet(const std::string& name) const {
- return ( m_setOptions.find(name) != m_setOptions.end() );
-}
-
-// runs parser (does validation and assign values to arguments)
-// returns success/fail
-inline
-bool GetOpt::parse(void) {
-
- // initialize argument stack (reversed input args)
- std::vector<std::string> argStack( m_args.rbegin(), m_args.rend() );
-
- // initialize state
- enum State { StartingState, ExpectingState, OptionalState };
- State state = StartingState;
-
- // initialize token types
- enum TokenType { LongOpt, ShortOpt, Arg, End };
- TokenType token = End;
- TokenType currentType = End;
-
- // store option list bounds
- std::vector<Option>::const_iterator optBegin = m_options.begin();
- std::vector<Option>::const_iterator optEnd = m_options.end();
-
- // declare currentOption
- Option currentOption;
-
- // we're going to fake an 'End' argument
- bool isExtraLoopNeeded = true;
-
- // iterate through stack contents & do one extra loop for the fake 'End'
- while ( !argStack.empty() || isExtraLoopNeeded ) {
-
- std::string arg;
- std::string originalArg; // store the original arg because we're going to mangle 'arg'
-
- // if contents on the arg stack
- if ( !argStack.empty() ) {
-
- arg = argStack.back();
- argStack.pop_back();
- ++m_currentArgument;
- originalArg = arg;
-
- // long option version
- if ( arg.substr(0,2) == "--" ) {
-
- // set token type
- token = LongOpt;
-
- // strip the '--'
- arg = arg.substr(2);
-
- // make sure there's still somthing there
- if ( arg.empty() ) {
- std::cerr << "'--' feature is not supported, yet." << std::endl;
- exit(1);
- }
-
- // split any key=value style args
- size_t foundEqual = arg.find('=');
- if ( foundEqual != std::string::npos ) {
-
- // push value back onto stack
- argStack.push_back( arg.substr(foundEqual+1) );
- --m_currentArgument;
-
- // save key as current arg
- arg = arg.substr(0, foundEqual);
- }
-
- }
-
- // short option version
- else if ( arg.at(0) == '-' ) {
-
- // set token type
- token = ShortOpt;
-
- // if option is directly followed by argument (eg -Wall), push that arg back onto stack
- if ( arg.length() > 2 ) {
- argStack.push_back( arg.substr(2) );
- --m_currentArgument;
- }
-
- // strip the '-'
- arg = arg[1];
- }
-
- // bare-word argument
- else { token = Arg; }
- }
-
- // in fake End iteration
- else { token = End; }
-
- // look up arg in list of known options, modify token type if necessary
- Option opt;
- if ( token != End ) {
-
- // look up arg in option list
- std::vector<Option>::const_iterator optIter = optBegin;
- for ( ; optIter != optEnd; ++optIter ) {
- const Option& o = (*optIter);
- if ( (token == LongOpt && arg == o.LongName) ||
- (token == ShortOpt && arg.at(0) == o.ShortName) ) {
- opt = o;
- break;
- }
- }
-
- // modify token type if needed
- if ( token == LongOpt && opt.Type == OptUnknown ) {
- if ( currentOption.Type != OptVariable ) {
- std::cerr << "GetOpt ERROR: Unknown option --" << arg << std::endl;
- return false;
- } else {
- token = Arg;
- }
- } else if ( token == ShortOpt && opt.Type == OptUnknown ) {
- if ( currentOption.Type != OptVariable ) {
- std::cerr << "GetOpt ERROR: Unknown option -" << arg.at(0) << std::endl;
- return false;
- } else {
- token = Arg;
- }
- }
- } else { opt = Option(OptEnd); }
-
-
- // interpret result
- switch ( state ) {
-
- case ( StartingState ) :
-
- if ( opt.Type == OptSwitch ) {
- setSwitch(opt);
- m_setOptions.insert( std::pair<std::string, int>(opt.LongName, 1) );
- m_setOptions.insert( std::pair<std::string, int>((const char*)&opt.ShortName, 1) );
- } else if ( opt.Type == OptArg1 || opt.Type == OptRepeat ) {
- state = ExpectingState;
- currentOption = opt;
- currentType = token;
- m_setOptions.insert( std::pair<std::string, int>(opt.LongName, 1) );
- m_setOptions.insert( std::pair<std::string, int>((const char*)&opt.ShortName, 1) );
- } else if ( opt.Type == OptOptional || opt.Type == OptVariable ) {
- state = OptionalState;
- currentOption = opt;
- currentType = token;
- m_setOptions.insert( std::pair<std::string, int>(opt.LongName, 1) );
- m_setOptions.insert( std::pair<std::string, int>((const char*)&opt.ShortName, 1) );
- } else if ( opt.Type == OptEnd ) {
- // do nothing (we're almost done here)
- } else if ( opt.Type == OptUnknown && token == Arg ) {
- if ( m_numberRequiredArguments > 0 ) {
- if ( (*m_requiredArgument.StringValue).empty() ) {
- *m_requiredArgument.StringValue = arg;
- } else {
- std::cerr << "Too many bare arguments" << std::endl;
- return false;
- }
- }
-
- else if ( m_numberOptionalArguments > 0 ) {
- if ( (*m_optionalArgument.StringValue).empty() ) {
- *m_optionalArgument.StringValue = arg;
- } else {
- std::cerr << "Too many bare arguments" << std::endl;
- return false;
- }
- }
- } else {
- std::cerr << "GetOpt ERROR: Unhandled StartingState case: " << opt.Type << std::endl;
- exit(1);
- }
-
- break;
-
- case ( ExpectingState ) :
-
- if ( token == Arg ) {
- if ( currentOption.Type == OptArg1 ) {
- *currentOption.StringValue = arg;
- state = StartingState;
- } else if ( currentOption.Type == OptRepeat ) {
- currentOption.ListValue->push_back(arg);
- state = StartingState;
- } else {
- std::cerr << "GetOpt ERROR: Unhandled ExpectingState case: " << currentOption.Type << std::endl;
- exit(1);
- }
- } else {
- std::string name = (currentType == LongOpt) ? currentOption.LongName : (const char*)¤tOption.ShortName;
- std::cerr << "GetOpt ERROR: Expected an argument after option: " << name << std::endl;
- exit(1);
- }
-
- break;
-
- case ( OptionalState ) :
-
- if ( token == Arg ) {
- if ( currentOption.Type == OptOptional ) {
- *currentOption.StringValue = arg;
- state = StartingState;
- } else if ( currentOption.Type == OptVariable ) {
- currentOption.ListValue->push_back(originalArg);
- // stay in this state
- } else {
- std::cerr << "GetOpt ERROR: Unhandled OptionalState case: " << currentOption.Type << std::endl;
- exit(1);
- }
- } else {
-
- // optional argument not specified
- if ( currentOption.Type == OptOptional ) {
- *currentOption.StringValue = currentOption.Default;
- }
-
- if ( token != End ) {
- // re-evaluate current argument
- argStack.push_back( originalArg );
- --m_currentArgument;
- }
-
- state = StartingState;
- }
-
- break;
- }
-
- if ( token == End ) {
- isExtraLoopNeeded = false;
- }
- }
-
- // check that required argument has been satisfied
- if ( m_numberRequiredArguments > 0 && (*m_requiredArgument.StringValue).empty() ) {
- std::cerr << "Lacking required argument" << std::endl;
- return false;
- }
-
- return true;
-}
-
-inline
-void GetOpt::print(void) {
-
- std::cout << "---------------------------------" << std::endl;
- std::cout << "Options for app: " << m_appname << std::endl;
- std::cout << std::endl;
- std::cout << "Args: ";
- std::vector<std::string>::const_iterator argIter = m_args.begin();
- std::vector<std::string>::const_iterator argEnd = m_args.end();
- for ( ; argIter != argEnd; ++argIter ) {
- std::cout << (*argIter) << " ";
- }
- std::cout << std::endl;
-}
-
-inline
-void GetOpt::saveOption(const Option& opt) {
- // check for conflicts (duplicating options) ??
- m_options.push_back(opt);
-}
-
-inline
-void GetOpt::setSwitch(const Option& opt) {
- assert( opt.Type == OptSwitch );
- *opt.BoolValue = true;
-}
-
-} // namespace BamTools
-
-#endif // BAMTOOLS_GETOPT_H
--- /dev/null
+// ***************************************************************************
+// bamtools_header.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints the SAM-style header from a single BAM file ( or merged header from
+// multiple BAM files) to stdout
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "bamtools_header.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+#include "BamMultiReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// HeaderSettings implementation
+
+struct HeaderTool::HeaderSettings {
+
+ // flags
+ bool HasInputBamFilename;
+
+ // filenames
+ vector<string> InputFiles;
+
+ // constructor
+ HeaderSettings(void)
+ : HasInputBamFilename(false)
+ { }
+};
+
+// ---------------------------------------------
+// HeaderTool implementation
+
+HeaderTool::HeaderTool(void)
+ : AbstractTool()
+ , m_settings(new HeaderSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "-in <filename> [-in <filename> ... ] ");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+}
+
+HeaderTool::~HeaderTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int HeaderTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int HeaderTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // set to default input if none provided
+ if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn());
+
+ // open files
+ BamMultiReader reader;
+ reader.Open(m_settings->InputFiles, false);
+
+ // dump header contents to stdout
+ cout << reader.GetHeaderText() << endl;
+
+ // clean up & exit
+ reader.Close();
+ return 0;
+}
\ No newline at end of file
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
-// Prints the SAM-style header from a single BAM file (or merged header from
-// multiple BAM files) to stdout.
+// Prints the SAM-style header from a single BAM file ( or merged header from
+// multiple BAM files) to stdout
// ***************************************************************************
#ifndef BAMTOOLS_HEADER_H
#define BAMTOOLS_HEADER_H
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include "BamReader.h"
-#include "BamMultiReader.h"
-// #include "GetOpt.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamHeaderHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools header [--in FILE [FILE] [FILE] ...]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input file(s) to dump header contents from [stdin]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamHeader(int argc, char* argv[]) {
-
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::vector<std::string> inputFilenames;
- options.addVariableLengthOption("in", &inputFilenames);
-
- if ( !options.parse() ) return BamHeaderHelp();
- if ( inputFilenames.empty() ) { inputFilenames.push_back("stdin"); }
- // open files
- BamMultiReader reader;
- reader.Open(inputFilenames, false);
+class HeaderTool : public AbstractTool {
+
+ public:
+ HeaderTool(void);
+ ~HeaderTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
- // dump header contents to stdout
- std::cout << reader.GetHeaderText() << std::endl;
-
- // clean up & exit
- reader.Close();
- return 0;
-}
-
+ private:
+ struct HeaderSettings;
+ HeaderSettings* m_settings;
+};
+
} // namespace BamTools
#endif // BAMTOOLS_HEADER_H
--- /dev/null
+// ***************************************************************************
+// bamtools_index.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 26 May 2010
+// ---------------------------------------------------------------------------
+// Creates a BAM index (".bai") file for the provided BAM file.
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+
+#include "bamtools_index.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// IndexSettings implementation
+
+struct IndexTool::IndexSettings {
+
+ // flags
+ bool HasInputBamFilename;
+
+ // filenames
+ string InputBamFilename;
+
+ // constructor
+ IndexSettings(void)
+ : HasInputBamFilename(false)
+ , InputBamFilename(Options::StandardIn())
+ { }
+};
+
+// ---------------------------------------------
+// IndexTool implementation
+
+IndexTool::IndexTool(void)
+ : AbstractTool()
+ , m_settings(new IndexSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools index", "creates index for BAM file", "-in <filename>");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+}
+
+IndexTool::~IndexTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int IndexTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int IndexTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // open our BAM reader
+ BamReader reader;
+ reader.Open(m_settings->InputBamFilename);
+
+ // create index for BAM file
+ reader.CreateIndex();
+
+ // clean & exit
+ reader.Close();
+ return 0;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
-// Creates a BAM index (".bai") file for the provided BAM file.
+// Creates a BAM index (".bai") file for the provided BAM file
// ***************************************************************************
#ifndef BAMTOOLS_INDEX_H
#define BAMTOOLS_INDEX_H
-#include <iostream>
-#include <string>
-
-#include "BamReader.h"
-// #include "GetOpt.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamIndexHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools index [--nclist] FILE" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--nclist Use NCList indexing scheme (faster?) [off] ** JUST HERE AS POSSIBLE SWITCH EXAMPLE FOR NOW **" << std::endl;
- std::cerr << "\tFILE Input BAM file to generate index from [REQUIRED]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamIndex(int argc, char* argv[]) {
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string inputFilename;
- options.addRequiredArgument("input", &inputFilename);
-
- bool useNCList;
- options.addSwitch("nclist", &useNCList);
-
- if ( !options.parse() ) return BamIndexHelp();
-
- // open our BAM reader
- BamReader reader;
- reader.Open(inputFilename);
-
- // create index for BAM file
- reader.CreateIndex();
-
- // clean & exit
- reader.Close();
- return 0;
-}
-
+class IndexTool : public AbstractTool {
+
+ public:
+ IndexTool(void);
+ ~IndexTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct IndexSettings;
+ IndexSettings* m_settings;
+};
+
} // namespace BamTools
#endif // BAMTOOLS_INDEX_H
--- /dev/null
+// ***************************************************************************
+// bamtools_merge.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 26 May 2010
+// ---------------------------------------------------------------------------
+// Merges multiple BAM files into one.
+//
+// ** Provide selectable region? eg chr2:10000..20000
+//
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "bamtools_merge.h"
+#include "bamtools_options.h"
+#include "bamtools_utilities.h"
+#include "BamMultiReader.h"
+#include "BamWriter.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// MergeSettings implementation
+
+struct MergeTool::MergeSettings {
+
+ // flags
+ bool HasInputBamFilename;
+ bool HasOutputBamFilename;
+ bool HasRegion;
+
+ // filenames
+ vector<string> InputFiles;
+
+ // other parameters
+ string OutputFilename;
+ string Region;
+
+ // constructor
+ MergeSettings(void)
+ : HasInputBamFilename(false)
+ , HasOutputBamFilename(false)
+ , HasRegion(false)
+ , OutputFilename(Options::StandardOut())
+ { }
+};
+
+// ---------------------------------------------
+// MergeTool implementation
+
+MergeTool::MergeTool(void)
+ : AbstractTool()
+ , m_settings(new MergeSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in <filename> ...] [-region REGION] [-out <filename>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts);
+
+ OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
+ Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+}
+
+MergeTool::~MergeTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int MergeTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int MergeTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // set to default input if none provided
+ if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn());
+
+// // opens the BAM files without checking for indexes
+// BamMultiReader reader;
+// reader.Open(m_settings->InputFiles, false);
+//
+// // retrieve header & reference dictionary info
+// std::string mergedHeader = reader.GetHeaderText();
+// RefVector references = reader.GetReferenceData();
+//
+// // open BamWriter
+// BamWriter writer;
+// writer.Open(m_settings->OutputFilename, mergedHeader, references);
+//
+// // if desired region provided
+// if ( m_settings->HasRegion ) {
+// // parse region string
+// // only get alignments from this region
+// }
+//
+// // else get all alignments
+// else {
+// // store alignments to output file
+// BamAlignment bAlignment;
+// while (reader.GetNextAlignment(bAlignment)) {
+// writer.SaveAlignment(bAlignment);
+// }
+// }
+//
+// // clean & exit
+// reader.Close();
+// writer.Close();
+ return 0;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
-// Merges multiple BAM files into one.
-//
-// ** Provide selectable region? eg chr2:10000..20000
-//
+// Merges multiple BAM files into one
// ***************************************************************************
#ifndef BAMTOOLS_MERGE_H
#define BAMTOOLS_MERGE_H
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include "BamMultiReader.h"
-#include "BamWriter.h"
-// #include "GetOpt.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamMergeHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools merge [--out FILE] [--region REGION] [--in FILE [FILE] [FILE] ...] " << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input BAM file(s) [stdin]" << std::endl;
- std::cerr << "\t--region REGION Only keep alignments from this region. [all alignments]" << std::endl;
- std::cerr << "\t REGION format - (eg chr2:1000..2000)" << std::endl;
- std::cerr << "\t--out FILE Destination for merge results [stdout]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamMerge(int argc, char* argv[]) {
- // only 'bamtool merge', show help
- if ( argc == 2 ) return BamMergeHelp();
+class MergeTool : public AbstractTool {
+
+ public:
+ MergeTool(void);
+ ~MergeTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct MergeSettings;
+ MergeSettings* m_settings;
+};
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string outputFilename = "";
- options.addOption("out", &outputFilename);
-
- std::vector<std::string> inputFilenames;
- options.addVariableLengthOption("in", &inputFilenames);
-
- std::string regionString;
- options.addOption("region", ®ionString);
-
- if ( !options.parse() || inputFilenames.empty() ) return BamMergeHelp();
- if ( inputFilenames.empty() ) { inputFilenames.push_back("stdin"); }
- if ( outputFilename.empty() ) { outputFilename = "stdout"; }
-
- // opens the BAM files without checking for indexes
- BamMultiReader reader;
- reader.Open(inputFilenames, false);
-
- // retrieve header & reference dictionary info
- std::string mergedHeader = reader.GetHeaderText();
- RefVector references = reader.GetReferenceData();
-
- // open BamWriter
- BamWriter writer;
- writer.Open(outputFilename, mergedHeader, references);
-
- // if desired region provided
- if ( !regionString.empty() ) {
- // parse region string
- // only get alignments from this region
- }
-
- // else get all alignments
- else {
- // store alignments to output file
- BamAlignment bAlignment;
- while (reader.GetNextAlignment(bAlignment)) {
- writer.SaveAlignment(bAlignment);
- }
- }
- // clean & exit
- reader.Close();
- writer.Close();
- return 0;
-}
-
} // namespace BamTools
-#endif // BAMTOOLS_MERGE_H
\ No newline at end of file
+#endif // BAMTOOLS_MERGE_H
--- /dev/null
+#include "bamtools_options.h"
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+using namespace std;
+using namespace BamTools;
+
+string Options::m_programName; // the program name
+string Options::m_description; // the main description
+string Options::m_exampleArguments; // the example arguments
+vector<OptionGroup> Options::m_optionGroups; // stores the option groups
+map<string, OptionValue> Options::m_optionsMap; // stores the options in a map
+string Options::m_stdin = "stdin"; // string representation of stdin
+string Options::m_stdout = "stdout"; // string representation of stdout
+
+// adds a simple option to the parser
+void Options::AddOption(const string& argument, const string& optionDescription, bool& foundArgument, OptionGroup* group) {
+
+ Option o;
+ o.Argument = argument;
+ o.Description = optionDescription;
+ o.StoreValue = false;
+ group->Options.push_back(o);
+
+ OptionValue ov;
+ ov.pFoundArgument = &foundArgument;
+ ov.StoreValue = false;
+
+ m_optionsMap[argument] = ov;
+}
+
+// creates an option group
+OptionGroup* Options::CreateOptionGroup(const string& groupName) {
+ OptionGroup og;
+ og.Name = groupName;
+ m_optionGroups.push_back(og);
+ return &m_optionGroups[m_optionGroups.size() - 1];
+}
+
+// displays the help menu
+void Options::DisplayHelp(void) {
+
+ // initialize
+ char argumentBuffer[ARGUMENT_LENGTH + 1];
+ ostringstream sb;
+
+ char indentBuffer[MAX_LINE_LENGTH - DESC_LENGTH + 1];
+ memset(indentBuffer, ' ', MAX_LINE_LENGTH - DESC_LENGTH);
+ indentBuffer[MAX_LINE_LENGTH - DESC_LENGTH] = 0;
+
+ // display the menu
+ printf("Description: %s.\n\n", m_description.c_str());
+
+ printf("Usage: ");
+ printf("%s", m_programName.c_str());
+ printf(" %s\n\n", m_exampleArguments.c_str());
+
+ vector<Option>::const_iterator optionIter;
+ vector<OptionGroup>::const_iterator groupIter;
+
+ for (groupIter = m_optionGroups.begin(); groupIter != m_optionGroups.end(); ++groupIter) {
+
+ printf("%s:\n", groupIter->Name.c_str());
+
+ for (optionIter = groupIter->Options.begin(); optionIter != groupIter->Options.end(); ++optionIter) {
+
+ if (optionIter->StoreValue)
+ snprintf(argumentBuffer, ARGUMENT_LENGTH + 1, " %s <%s>", optionIter->Argument.c_str(), optionIter->ValueDescription.c_str());
+ else
+ snprintf(argumentBuffer, ARGUMENT_LENGTH + 1, " %s", optionIter->Argument.c_str());
+ printf("%-35s ", argumentBuffer);
+
+ string description = optionIter->Description;
+
+ // handle default values
+ if (optionIter->HasDefaultValue) {
+
+ sb.str("");
+ sb << description << " [";
+
+ if (optionIter->DefaultValue.is_type<unsigned int>()) {
+ sb << (unsigned int)optionIter->DefaultValue;
+ } else if (optionIter->DefaultValue.is_type<unsigned char>()) {
+ sb << (unsigned short)(unsigned char)optionIter->DefaultValue;
+ } else if (optionIter->DefaultValue.is_type<float>()) {
+ sb << std::fixed << std::setprecision(2) << (float)optionIter->DefaultValue;
+ } else if (optionIter->DefaultValue.is_type<double>()) {
+ sb << std::fixed << std::setprecision(4) << (double)optionIter->DefaultValue;
+ } else if (optionIter->DefaultValue.is_type<std::string>()) {
+ const std::string stringValue = optionIter->DefaultValue;
+ sb << stringValue;
+ } else {
+ printf("ERROR: Found an unsupported data type for argument %s when casting the default value.\n", optionIter->Argument.c_str());
+ exit(1);
+ }
+
+ sb << "]";
+ description = sb.str();
+ }
+
+ if (description.size() <= DESC_LENGTH_FIRST_ROW) {
+ printf("%s\n", description.c_str());
+ } else {
+
+ // handle the first row
+ const char* pDescription = description.data();
+ unsigned int cutIndex = DESC_LENGTH_FIRST_ROW;
+ while(pDescription[cutIndex] != ' ') cutIndex--;
+ printf("%s\n", description.substr(0, cutIndex).c_str());
+ description = description.substr(cutIndex + 1);
+
+ // handle subsequent rows
+ while(description.size() > DESC_LENGTH) {
+ pDescription = description.data();
+ cutIndex = DESC_LENGTH;
+ while(pDescription[cutIndex] != ' ') cutIndex--;
+ printf("%s%s\n", indentBuffer, description.substr(0, cutIndex).c_str());
+ description = description.substr(cutIndex + 1);
+ }
+
+ // handle last row
+ printf("%s%s\n", indentBuffer, description.c_str());
+ }
+ }
+
+ printf("\n");
+ }
+
+ printf("Help:\n");
+ printf(" --help, -h shows this help text\n");
+ exit(1);
+}
+
+// parses the command line
+void Options::Parse(int argc, char* argv[], int offset) {
+
+ // initialize
+ map<string, OptionValue>::const_iterator ovMapIter;
+ map<string, OptionValue>::const_iterator checkMapIter;
+ const int LAST_INDEX = argc - 1;
+ ostringstream errorBuilder;
+ bool foundError = false;
+ char* end_ptr = NULL;
+ const string ERROR_SPACER(7, ' ');
+
+ // check if we should show the help menu
+ bool showHelpMenu = false;
+ if (argc > 1) {
+ for (int i = 1; i < argc; i++) {
+ const std::string argument = argv[i];
+ if ( (argument == "-h") || (argument == "--help") || (argument == "help") )
+ showHelpMenu = true;
+ }
+ } else showHelpMenu = true;
+
+ if (showHelpMenu)
+ DisplayHelp();
+
+ // check each argument
+ for (int i = offset+1; i < argc; i++) {
+
+ const string argument = argv[i];
+ ovMapIter = m_optionsMap.find(argument);
+
+ if (ovMapIter == m_optionsMap.end()) {
+ errorBuilder << ERROR_SPACER << "An unrecognized argument was found: " << argument << std::endl;
+ foundError = true;
+
+ } else {
+
+ *ovMapIter->second.pFoundArgument = true;
+
+ // grab the value
+ if (ovMapIter->second.StoreValue) {
+
+ if (i < LAST_INDEX) {
+
+ // check if the next argument is really a command line option
+ const string val = argv[i + 1];
+ checkMapIter = m_optionsMap.find(val);
+
+ if (checkMapIter == m_optionsMap.end()) {
+
+ ++i;
+
+ if (ovMapIter->second.VariantValue.is_type<unsigned int>()) {
+ const unsigned int uint32 = (unsigned int)strtoul(val.c_str(), &end_ptr, 10);
+ unsigned int* varValue = (unsigned int*)ovMapIter->second.pValue;
+ *varValue = uint32;
+ } else if (ovMapIter->second.VariantValue.is_type<unsigned char>()) {
+ const unsigned char uint8 = (unsigned char)strtoul(val.c_str(), &end_ptr, 10);
+ unsigned char* varValue = (unsigned char*)ovMapIter->second.pValue;
+ *varValue = uint8;
+ } else if (ovMapIter->second.VariantValue.is_type<uint64_t>()) {
+ const uint64_t uint64 = strtoui64(val.c_str(), &end_ptr, 10);
+ uint64_t* varValue = (uint64_t*)ovMapIter->second.pValue;
+ *varValue = uint64;
+ } else if (ovMapIter->second.VariantValue.is_type<double>()) {
+ const double d = strtod(val.c_str(), &end_ptr);
+ double* varValue = (double*)ovMapIter->second.pValue;
+ *varValue = d;
+ } else if (ovMapIter->second.VariantValue.is_type<float>()) {
+ const float f = (float)strtod(val.c_str(), &end_ptr);
+ float* varValue = (float*)ovMapIter->second.pValue;
+ *varValue = f;
+ } else if (ovMapIter->second.VariantValue.is_type<string>()) {
+ string* pStringValue = (string*)ovMapIter->second.pValue;
+ *pStringValue = val;
+ } else if (ovMapIter->second.VariantValue.is_type<vector<string> >()) {
+ vector<string>* pVectorValue = (vector<string>*)ovMapIter->second.pValue;
+ pVectorValue->push_back(val);
+ } else {
+ printf("ERROR: Found an unsupported data type for argument %s when parsing the arguments.\n", argument.c_str());
+ exit(1);
+ }
+ } else {
+ errorBuilder << ERROR_SPACER << "The argument (" << argument << ") expects a value, but none was found." << endl;
+ foundError = true;
+ }
+ } else {
+ errorBuilder << ERROR_SPACER << "The argument (" << argument << ") expects a value, but none was found." << endl;
+ foundError = true;
+ }
+ }
+ }
+ }
+
+ // check if we missed any required parameters
+ for (ovMapIter = m_optionsMap.begin(); ovMapIter != m_optionsMap.end(); ++ovMapIter) {
+ if (ovMapIter->second.IsRequired && !*ovMapIter->second.pFoundArgument) {
+ errorBuilder << ERROR_SPACER << ovMapIter->second.ValueTypeDescription << " was not specified. Please use the " << ovMapIter->first << " parameter." << endl;
+ foundError = true;
+ }
+ }
+
+ // print the errors if any were found
+ if (foundError) {
+ printf("ERROR: Some problems were encountered when parsing the command line options:\n");
+ printf("%s\n", errorBuilder.str().c_str());
+ printf("For a complete list of command line options, type \"%s help %s\"\n", argv[0], argv[1]);
+ exit(1);
+ }
+}
+
+// sets the program info
+void Options::SetProgramInfo(const string& programName, const string& description, const string& arguments) {
+ m_programName = programName;
+ m_description = description;
+ m_exampleArguments = arguments;
+}
+
+// return string representations of stdin
+const string& Options::StandardIn(void) { return m_stdin; }
+
+// return string representations of stdout
+const string& Options::StandardOut(void) { return m_stdout; }
--- /dev/null
+// ***************************************************************************
+// bamtools_options.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Parses command line arguments and creates a help menu
+// ---------------------------------------------------------------------------
+// Modified from:
+// The Mosaik suite's command line parser class: COptions
+// (c) 2006 - 2009 Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// Dual licenced under the GNU General Public License 2.0+ license or as
+// a commercial license with the Marth Lab.
+//
+// * Modified to fit BamTools code-style, otherwise code is same. (DB)
+// ***************************************************************************
+
+#ifndef BAMTOOLS_OPTIONS_H
+#define BAMTOOLS_OPTIONS_H
+
+#include <map>
+#include <string>
+#include <vector>
+#include "bamtools_variant.h"
+
+#ifndef WIN32
+ #include <stdint.h>
+#endif
+
+namespace BamTools {
+
+#define ARGUMENT_LENGTH 35
+#define DESC_LENGTH_FIRST_ROW 42
+#define DESC_LENGTH 39
+#define MAX_LINE_LENGTH 78
+
+#ifdef WIN32
+ #define snprintf _snprintf
+ typedef __int64 int64_t;
+ typedef unsigned __int64 uint64_t;
+ #define strtoui64 _strtoui64
+#else
+ #define strtoui64 strtoull
+#endif
+
+struct Option {
+
+ // data members
+ std::string Argument;
+ std::string ValueDescription;
+ std::string Description;
+ bool StoreValue;
+ bool HasDefaultValue;
+ Variant DefaultValue;
+
+ // constructor
+ Option(void)
+ : StoreValue(true)
+ , HasDefaultValue(false)
+ { }
+};
+
+struct OptionValue {
+
+ // data members
+ bool* pFoundArgument;
+ void* pValue;
+ std::string ValueTypeDescription;
+ bool UseVector;
+ bool StoreValue;
+ bool IsRequired;
+ Variant VariantValue;
+
+ // constructor
+ OptionValue(void)
+ : pFoundArgument(NULL)
+ , pValue(NULL)
+ , UseVector(false)
+ , StoreValue(true)
+ , IsRequired(false)
+ { }
+};
+
+struct OptionGroup {
+ std::string Name;
+ std::vector<Option> Options;
+};
+
+class Options {
+
+ // add option/argument rules
+ public:
+ // adds a simple option to the parser
+ static void AddOption(const std::string& argument,
+ const std::string& optionDescription,
+ bool& foundArgument,
+ OptionGroup* group);
+
+ // adds a value option to the parser
+ template<typename T>
+ static void AddValueOption(const std::string& argument,
+ const std::string& valueDescription,
+ const std::string& optionDescription,
+ const std::string& valueTypeDescription,
+ bool& foundArgument,
+ T& val,
+ OptionGroup* group);
+
+ // adds a value option to the parser (with a default value)
+ template<typename T, typename D>
+ static void AddValueOption(const std::string& argument,
+ const std::string& valueDescription,
+ const std::string& optionDescription,
+ const std::string& valueTypeDescription,
+ bool& foundArgument,
+ T& val,
+ OptionGroup* group,
+ D& defaultValue);
+
+ // other API methods
+ public:
+ // creates an option group
+ static OptionGroup* CreateOptionGroup(const std::string& groupName);
+ // displays the help menu
+ static void DisplayHelp(void);
+ // parses the command line
+ static void Parse(int argc, char* argv[], int offset = 0);
+ // sets the program info
+ static void SetProgramInfo(const std::string& programName, const std::string& description, const std::string& arguments);
+ // returns string representation of stdin
+ static const std::string& StandardIn(void);
+ // returns string representation of stdout
+ static const std::string& StandardOut(void);
+
+ // static data members
+ private:
+ // the program name
+ static std::string m_programName;
+ // the main description
+ static std::string m_description;
+ // the example arguments
+ static std::string m_exampleArguments;
+ // stores the option groups
+ static std::vector<OptionGroup> m_optionGroups;
+ // stores the options in a map
+ static std::map<std::string, OptionValue> m_optionsMap;
+ // string representation of stdin
+ static std::string m_stdin;
+ // string representation of stdout
+ static std::string m_stdout;
+};
+
+// adds a value option to the parser
+template<typename T>
+void Options::AddValueOption(const std::string& argument,
+ const std::string& valueDescription,
+ const std::string& optionDescription,
+ const std::string& valueTypeDescription,
+ bool& foundArgument,
+ T& val,
+ OptionGroup* group)
+{
+ Option o;
+ o.Argument = argument;
+ o.ValueDescription = valueDescription;
+ o.Description = optionDescription;
+ group->Options.push_back(o);
+
+ OptionValue ov;
+ ov.pFoundArgument = &foundArgument;
+ ov.pValue = (void*)&val;
+ ov.VariantValue = val;
+ ov.IsRequired = (valueTypeDescription.empty() ? false : true);
+ ov.ValueTypeDescription = valueTypeDescription;
+ m_optionsMap[argument] = ov;
+}
+
+// adds a value option to the parser (with a default value)
+template<typename T, typename D>
+void Options::AddValueOption(const std::string& argument,
+ const std::string& valueDescription,
+ const std::string& optionDescription,
+ const std::string& valueTypeDescription,
+ bool& foundArgument,
+ T& val,
+ OptionGroup* group,
+ D& defaultValue)
+{
+ Option o;
+ o.Argument = argument;
+ o.ValueDescription = valueDescription;
+ o.Description = optionDescription;
+ o.DefaultValue = defaultValue;
+ o.HasDefaultValue = true;
+ group->Options.push_back(o);
+
+ OptionValue ov;
+ ov.pFoundArgument = &foundArgument;
+ ov.pValue = (void*)&val;
+ ov.VariantValue = val;
+ ov.IsRequired = (valueTypeDescription.empty() ? false : true);
+ ov.ValueTypeDescription = valueTypeDescription;
+ m_optionsMap[argument] = ov;
+}
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_OPTIONS_H
\ No newline at end of file
--- /dev/null
+// ***************************************************************************
+// bamtools_sam.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints a BAM file in the text-based SAM format.
+// ***************************************************************************
+
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include "bamtools_sam.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+RefVector references;
+
+// ---------------------------------------------
+// print BamAlignment in SAM format
+
+void PrintSAM(const BamAlignment& a) {
+
+ // tab-delimited
+ // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
+
+ // ******************************* //
+ // ** NOT FULLY IMPLEMENTED YET ** //
+ //******************************** //
+ //
+ // Todo : build CIGAR string
+ // build TAG string
+ // there are some quirks, per the spec, regarding when to use '=' or not
+ //
+ // ******************************* //
+
+ //
+ // do validity check on RefID / MateRefID ??
+ //
+
+ // build CIGAR string
+ string cigarString("CIGAR:NOT YET");
+
+ // build TAG string
+ string tagString("TAG:NOT YET");
+
+ // print BamAlignment to stdout in SAM format
+ cout << a.Name << '\t'
+ << a.AlignmentFlag << '\t'
+ << references[a.RefID].RefName << '\t'
+ << a.Position << '\t'
+ << a.MapQuality << '\t'
+ << cigarString << '\t'
+ << ( a.IsPaired() ? references[a.MateRefID].RefName : "*" ) << '\t'
+ << ( a.IsPaired() ? a.MatePosition : 0 ) << '\t'
+ << ( a.IsPaired() ? a.InsertSize : 0 ) << '\t'
+ << a.QueryBases << '\t'
+ << a.Qualities << '\t'
+ << tagString << endl;
+}
+
+// ---------------------------------------------
+// SamSettings implementation
+
+struct SamTool::SamSettings {
+
+ // flags
+ bool HasInputBamFilename;
+ bool HasMaximumOutput;
+ bool IsOmittingHeader;
+
+ // filenames
+ string InputBamFilename;
+
+ // other parameters
+ int MaximumOutput;
+
+ // constructor
+ SamSettings(void)
+ : HasInputBamFilename(false)
+ , HasMaximumOutput(false)
+ , IsOmittingHeader(false)
+ , InputBamFilename(Options::StandardIn())
+ { }
+};
+
+// ---------------------------------------------
+// SamTool implementation
+
+SamTool::SamTool(void)
+ : AbstractTool()
+ , m_settings(new SamSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools sam", "prints BAM file in SAM text format", "-in <filename>");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+
+ OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
+ Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingHeader, FilterOpts);
+ Options::AddValueOption("-num", "N", "maximum number of alignments to output", "", m_settings->HasMaximumOutput, m_settings->MaximumOutput, FilterOpts);
+}
+
+SamTool::~SamTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int SamTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int SamTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // open our BAM reader
+ BamReader reader;
+ reader.Open(m_settings->InputBamFilename);
+
+ // if header desired, retrieve and print to stdout
+ if ( !m_settings->IsOmittingHeader ) {
+ string header = reader.GetHeaderText();
+ cout << header << endl;
+ }
+
+ // store reference data
+ references = reader.GetReferenceData();
+
+ // print all alignments to stdout in SAM format
+ if ( !m_settings->HasMaximumOutput ) {
+ BamAlignment ba;
+ while( reader.GetNextAlignment(ba) ) {
+ PrintSAM(ba);
+ }
+ }
+
+ // print first N alignments to stdout in SAM format
+ else {
+ BamAlignment ba;
+ int alignmentsPrinted = 0;
+ while ( reader.GetNextAlignment(ba) && (alignmentsPrinted < m_settings->MaximumOutput) ) {
+ PrintSAM(ba);
+ ++alignmentsPrinted;
+ }
+ }
+
+ // clean & exit
+ reader.Close();
+ return 0;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
// Prints a BAM file in the text-based SAM format.
// ***************************************************************************
#ifndef BAMTOOLS_SAM_H
#define BAMTOOLS_SAM_H
-#include <cstdlib>
-#include <iostream>
-#include <string>
-
-#include "BamReader.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamSamHelp(void) {
-
- // '--head' makes more sense than '--num' from a Unix perspective, but could be confusing with header info ??
- // but this is also only the default case (from the beginning of the file)
- // do we want to add a region specifier, eg 'chr2:1000..1500'? In this case, '--num' still makes sense (give me up to N alignments from this region)
-
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools sam [--in FILE] [--num N] [--no_header]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input BAM file to generate SAM-format [stdin]" << std::endl;
- std::cerr << "\t--num N Only print up to N alignments from beginning of file [50*]" << endl;
- std::cerr << "\t--no_header Omits SAM header information from output (alignments only) [off]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t* - By default bamtools sam will print all alignments in SAM format." << std::endl;
- std::cerr << "\t However if '--num' is included with no N, the default of 50 is used." << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-static RefVector references;
-
-void PrintSAM(const BamAlignment& a) {
- // tab-delimited
- // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
+class SamTool : public AbstractTool {
- // ******************************* //
- // ** NOT FULLY IMPLEMENTED YET ** //
- //******************************** //
- //
- // Todo : build CIGAR string
- // build TAG string
- // there are some quirks, per the spec, regarding when to use '=' or not
- //
- // ******************************* //
-
- //
- // do validity check on RefID / MateRefID ??
- //
+ public:
+ SamTool(void);
+ ~SamTool(void);
- // build CIGAR string
- std::string cigarString("CIGAR:NOT YET");
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct SamSettings;
+ SamSettings* m_settings;
+};
- // build TAG string
- std::string tagString("TAG:NOT YET");
-
- // print BamAlignment to stdout in SAM format
- std::cout << a.Name << '\t'
- << a.AlignmentFlag << '\t'
- << references[a.RefID].RefName << '\t'
- << a.Position << '\t'
- << a.MapQuality << '\t'
- << cigarString << '\t'
- << ( a.IsPaired() ? references[a.MateRefID].RefName : "*" ) << '\t'
- << ( a.IsPaired() ? a.MatePosition : 0 ) << '\t'
- << ( a.IsPaired() ? a.InsertSize : 0 ) << '\t'
- << a.QueryBases << '\t'
- << a.Qualities << '\t'
- << tagString << std::endl;
-}
-
-int RunBamSam(int argc, char* argv[]) {
-
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string inputFilename;
- options.addOption("in", &inputFilename);
-
- std::string numberString;
- options.addOptionalOption("num", &numberString, "50");
-
- bool isOmittingHeader;
- options.addSwitch("no_header", &isOmittingHeader);
-
- if ( !options.parse() ) return BamCoverageHelp();
- if ( inputFilename.empty() ) { inputFilename = "stdin"; }
-
- // maxNumberOfAlignments = all (if nothing specified)
- // = 50 (if '--num' but no N)
- // = N (if '--num N')
- int maxNumberOfAlignments = -1;
- if ( !numberString.empty() ) { maxNumberOfAlignments = atoi(numberString.c_str()); }
-
- // open our BAM reader
- BamReader reader;
- reader.Open(inputFilename);
-
- // if header desired, retrieve and print to stdout
- if ( !isOmittingHeader ) {
- std::string header = reader.GetHeaderText();
- std::cout << header << std::endl;
- }
-
- // store reference data
- references = reader.GetReferenceData();
-
- // print all alignments to stdout in SAM format
- if ( maxNumberOfAlignments < 0 ) {
- BamAlignment ba;
- while( reader.GetNextAlignment(ba) ) {
- PrintSAM(ba);
- }
- }
-
- // print first N alignments to stdout in SAM format
- else {
- BamAlignment ba;
- int alignmentsPrinted = 0;
- while ( reader.GetNextAlignment(ba) && (alignmentsPrinted < maxNumberOfAlignments) ) {
- PrintSAM(ba);
- ++alignmentsPrinted;
- }
- }
-
- // clean & exit
- reader.Close();
- return 0;
-}
-
} // namespace BamTools
-#endif // BAMTOOLS_SAM_H
\ No newline at end of file
+#endif // BAMTOOLS_SAM_H
--- /dev/null
+// ***************************************************************************
+// bamtools_sort.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 26 May 2010
+// ---------------------------------------------------------------------------
+// Sorts an input BAM file (default by position) and stores in a new BAM file.
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+
+#include "bamtools_sort.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// SortSettings implementation
+
+struct SortTool::SortSettings {
+
+ // flags
+ bool HasInputBamFilename;
+ bool HasOutputBamFilename;
+
+ // filenames
+ string InputBamFilename;
+ string OutputBamFilename;
+
+ // constructor
+ SortSettings(void)
+ : HasInputBamFilename(false)
+ , HasOutputBamFilename(false)
+ , InputBamFilename(Options::StandardIn())
+ , OutputBamFilename(Options::StandardOut())
+ { }
+};
+
+// ---------------------------------------------
+// SortTool implementation
+
+SortTool::SortTool(void)
+ : AbstractTool()
+ , m_settings(new SortSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools sort", "sorts a BAM file", "[-in <filename>] [-out <filename>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut());
+}
+
+SortTool::~SortTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int SortTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int SortTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // do sorting
+
+ return 0;
+}
// ***************************************************************************
-// bamtools_sortt.h (c) 2010 Derek Barnett, Erik Garrison
+// bamtools_sort.h (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
-// Sorts an input BAM file (default by position) and stores in a new BAM file.
+// Sorts a BAM file.
// ***************************************************************************
#ifndef BAMTOOLS_SORT_H
#define BAMTOOLS_SORT_H
-#include <iostream>
-#include <string>
-
-#include "BamReader.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamSortHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools sort [--in FILE] [--out FILE]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input BAM file to sort [stdin]" << std::endl;
- std::cerr << "\t--out FILE Destination of sorted BAM file [stdout]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamSort(int argc, char* argv[]) {
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string inputFilename;
- options.addOption("in", &inputFilename);
-
- std::string outputFilename;
- options.addOption("out", &outputFilename);
-
- if ( !options.parse() ) return BamCoverageHelp();
- if ( inputFilename.empty() ) { inputFilename = "stdin"; }
- if ( outputFilename.empty() ) { outputFilename = "stdout"; }
-
- // open our BAM reader
-// BamReader reader;
-// reader.Open(inputFilename);
-//
-// // retrieve header & reference dictionary info
-// std::string header = reader.GetHeaderText();
-// RefVector references = reader.GetReferenceData();
-//
-// BamWriter writer;
-// writer.Open(outputFilename, header, references);
-//
- // sort BAM file
- std::cerr << "Sorting " << inputFilename << std::endl;
- std::cerr << "Saving sorted BAM in " << outputFilename << endl;
- std::cerr << "FEATURE NOT YET IMPLEMENTED!" << std::endl;
-
- // clean & exit
-// reader.Close();
-// writer.Close();
- return 0;
-}
-
+class SortTool : public AbstractTool {
+
+ public:
+ SortTool(void);
+ ~SortTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct SortSettings;
+ SortSettings* m_settings;
+};
+
} // namespace BamTools
-#endif // BAMTOOLS_SORT_H
\ No newline at end of file
+#endif // BAMTOOLS_SORT_H
--- /dev/null
+// ***************************************************************************
+// bamtools_stats.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Prints general statistics for a single BAM file
+//
+// ** Expand to multiple??
+//
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+
+#include "bamtools_stats.h"
+#include "bamtools_options.h"
+#include "BamReader.h"
+
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// StatsSettings implementation
+
+struct StatsTool::StatsSettings {
+
+ // flags
+ bool HasInputBamFilename;
+
+ // filenames
+ string InputBamFilename;
+
+ // constructor
+ StatsSettings(void)
+ : HasInputBamFilename(false)
+ , InputBamFilename(Options::StandardIn())
+ { }
+};
+
+// ---------------------------------------------
+// StatsTool implementation
+
+StatsTool::StatsTool(void)
+ : AbstractTool()
+ , m_settings(new StatsSettings)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools stats", "prints general stats for a BAM file", "[-in <filename>]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+}
+
+StatsTool::~StatsTool(void) {
+ delete m_settings;
+ m_settings = 0;
+}
+
+int StatsTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int StatsTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // calculate stats
+
+ return 0;
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 1 June 2010
// ---------------------------------------------------------------------------
-// Prints general statistics for a single BAM file
+// Prints general statistics for a single BAM file.
//
-// ** Expand to multiple??
+// ** Expand to multiple? **
//
// ***************************************************************************
#ifndef BAMTOOLS_STATS_H
#define BAMTOOLS_STATS_H
-#include <iostream>
-#include <string>
-
-#include "BamReader.h"
-#include "bamtools_getopt.h"
+#include "bamtools_tool.h"
namespace BamTools {
-
-int BamStatsHelp(void) {
- std::cerr << std::endl;
- std::cerr << "usage:\tbamtools stats [--in FILE]" << std::endl;
- std::cerr << std::endl;
- std::cerr << "\t--in FILE Input BAM file to calculate general stats [stdin]" << std::endl;
- std::cerr << std::endl;
- return 0;
-}
-
-int RunBamStats(int argc, char* argv[]) {
- // else parse command line for args
- GetOpt options(argc, argv, 1);
-
- std::string inputFilename;
- options.addOption("in", &inputFilename);
-
- if ( !options.parse() ) return BamStatsHelp();
- if ( inputFilename.empty() ) { inputFilename = "stdin"; }
-
- // open our BAM reader
-// BamReader reader;
-// reader.Open(inputFilename);
-
- // calculate general stats
- std::cerr << "Calculating general stats for " << inputFilename << std::endl;
- std::cerr << "FEATURE NOT YET IMPLEMENTED!" << std::endl;
-
- // clean & exit
-// reader.Close();
- return 0;
-}
-
+class StatsTool : public AbstractTool {
+
+ public:
+ StatsTool(void);
+ ~StatsTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct StatsSettings;
+ StatsSettings* m_settings;
+};
+
} // namespace BamTools
-#endif // BAMTOOLS_STATS_H
\ No newline at end of file
+#endif // BAMTOOLS_STATS_H
--- /dev/null
+// ***************************************************************************
+// bamtools_tool.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Base class for all other BamTools sub-tools
+//
+// ** Expand to multiple??
+//
+// ***************************************************************************
+
+#ifndef BAMTOOLS_ABSTRACTTOOL_H
+#define BAMTOOLS_ABSTRACTTOOL_H
+
+#include <string>
+
+namespace BamTools {
+
+class AbstractTool {
+
+ public:
+ AbstractTool(void) : STDIN("stdin"), STDOUT("stdout") { }
+ virtual ~AbstractTool(void) { }
+
+ public:
+ virtual int Help(void) =0;
+ virtual int Run(int argc, char* argv[]) =0;
+
+ public:
+ const std::string STDIN;
+ const std::string STDOUT;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_ABSTRACTTOOL_H
\ No newline at end of file
--- /dev/null
+// ***************************************************************************
+// bamtools_utilities.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 27 May 2010
+// ---------------------------------------------------------------------------
+// Provides general utilities used by BamTools sub-tools.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_UTILITIES_H
+#define BAMTOOLS_UTILITIES_H
+
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+namespace BamTools {
+
+// Parses a REGION string, stores in (startChrom, startPos, stopChrom, stopPos) variables
+// Returns successful parse (true/false)
+static inline
+bool ParseRegionString(const std::string& regionString, std::string& startChrom, int& startPos, std::string& stopChrom, int& stopPos) {
+
+ // shouldn't call this function with empty string but worth checking
+ // checked first for clarity purposes later on, since we can assume at least some content in the string
+ if ( regionString.empty() ) {
+ std::cerr << "Empty REGION. Usual format (e.g. chr2:1000..2000). See README for more detailed uses." << std::endl;
+ return false;
+ }
+
+ // non-empty string, look for a colom
+ size_t foundFirstColon = regionString.find(':');
+
+ // no colon found
+ // going to use entire contents of requested chromosome
+ // just store entire region string as startChrom name
+ // use BamReader methods to check if its valid for current BAM file
+ if ( foundFirstColon == std::string::npos ) {
+ startChrom = regionString;
+ startPos = -1; // ** not sure about these defaults (should stopChrom == startChrom if same?)
+ stopChrom = "";
+ stopPos = -1;
+ return true;
+ }
+
+ // colon found, so we at least have some sort of startPos requested
+ else {
+
+ // store start chrom from beginning to first colon
+ startChrom = regionString.substr(0,foundFirstColon);
+
+ // look for ".." after the colon
+ size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
+
+ // no dots found
+ // so we have a startPos but no range
+ // store contents before colon as startChrom, after as startPos
+ if ( foundRangeDots == std::string::npos ) {
+ startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
+ stopChrom = "";
+ stopPos = -1;
+ return true;
+ }
+
+ // ".." found, so we have some sort of range selected
+ else {
+
+ // store startPos between first colon and range dots ".."
+ startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
+
+ // look for second colon
+ size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
+
+ // no second colon found
+ // so we have a "standard" chrom:start..stop input format (on single chrom)
+ if ( foundSecondColon == std::string::npos ) {
+ stopChrom = "";
+ stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
+ return true;
+ }
+
+ // second colon found
+ // so we have a range requested across 2 chrom's
+ else {
+ stopChrom = regionString.substr(foundRangeDots+2, regionString.length()-foundSecondColon-1);
+ stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
+ return true;
+ }
+ }
+ }
+
+ // shouldn't get here - any code path that does?
+ // if not, what does true/false really signify?
+ return false;
+}
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_UTILITIES_H
\ No newline at end of file
--- /dev/null
+// ***************************************************************************
+// bamtools_variant.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 1 June 2010
+// ---------------------------------------------------------------------------
+// Provides a template-based variant type
+// ---------------------------------------------------------------------------
+// Modified from:
+// variant_t - An Improved Variant Type Based on Member Templates
+// (c) 2000 Fernando Cacciola
+// Dr. Dobb's (http://www.ddj.com/cpp/184401293)
+//
+// * Modified to fit BamTools code-style, otherwise code is same. (DB)
+// ***************************************************************************
+
+#ifndef BAMTOOLS_VARIANT_H
+#define BAMTOOLS_VARIANT_H
+
+#include <stdexcept>
+#include <typeinfo>
+#include <string>
+
+namespace BamTools {
+
+class Variant {
+
+ public:
+ Variant(void) : data (NULL) { }
+
+ Variant(const Variant& other) {
+ if(other.data != NULL)
+ other.data->AddRef();
+ data = other.data;
+ }
+
+ ~Variant(void) {
+ if(data != NULL) data->Release();
+ }
+
+ // NOTE: This code takes care of self-assignment.
+ // DO NOT CHANGE THE ORDER of the statements.
+ Variant& operator=(const Variant& rhs) {
+ if(rhs.data != NULL)
+ rhs.data->AddRef();
+ if(data != NULL)
+ data->Release();
+ data = rhs.data;
+ return * this;
+ }
+
+ // This member template constructor allows you to
+ // instance a variant_t object with a value of any type.
+ template<typename T>
+ Variant(T v)
+ : data(new Impl<T>(v))
+ {
+ data->AddRef();
+ }
+
+ // This generic conversion operator let you retrieve
+ // the value held. To avoid template specialization conflicts,
+ // it returns an instance of type T, which will be a COPY
+ // of the value contained.
+ template<typename T>
+ operator T() const {
+ return CastFromBase<T>(data)->data;
+ }
+
+ // This forms returns a REFERENCE and not a COPY, which
+ // will be significant in some cases.
+ template<typename T>
+ const T& get(void) const {
+ return CastFromBase<T>(data)->data;
+ }
+
+ template<typename T>
+ bool is_type(void) const {
+ return typeid(*data)==typeid(Impl<T>);
+ }
+
+ template<typename T>
+ bool is_type(T v) const {
+ return typeid(*data)==typeid(v);
+ }
+
+ private:
+ struct ImplBase {
+
+ ImplBase() : refs(0) {}
+ virtual ~ImplBase() {}
+
+ void AddRef(void) { refs ++; }
+ void Release(void) {
+ --refs;
+ if(refs == 0) delete this;
+ }
+
+ size_t refs;
+ };
+
+ template<typename T>
+ struct Impl : ImplBase {
+ Impl(T v) : data (v) { }
+ ~Impl(void) { }
+ T data;
+ };
+
+ // The following method is static because it doesn't
+ // operate on variant_t instances.
+ template<typename T>
+ static Impl<T>* CastFromBase(ImplBase* v) {
+ // This upcast will fail if T is other than the T used
+ // with the constructor of variant_t.
+ Impl<T>* p = dynamic_cast<Impl<T>*> (v);
+ if (p == NULL)
+ throw std::invalid_argument(typeid(T).name()+std::string(" is not a valid type"));
+ return p;
+ }
+
+ ImplBase* data;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_VARIANT_H