X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ftoolkit%2Fbamtools_merge.cpp;h=2bb47c4d1846fe9ee455df317c1b0a04fdfcd51b;hb=eff70361664007ae1712efe87e3bc4647c2afc10;hp=3d2d9026de085553863827645d5a87c77dae838b;hpb=ce6b127bd2921f1dc137eb296190d0f4fb686d17;p=bamtools.git diff --git a/src/toolkit/bamtools_merge.cpp b/src/toolkit/bamtools_merge.cpp index 3d2d902..2bb47c4 100644 --- a/src/toolkit/bamtools_merge.cpp +++ b/src/toolkit/bamtools_merge.cpp @@ -1,25 +1,24 @@ // *************************************************************************** // bamtools_merge.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 7 September 2010 +// Last modified: 7 April 2011 // --------------------------------------------------------------------------- -// Merges multiple BAM files into one. +// Merges multiple BAM files into one // *************************************************************************** +#include "bamtools_merge.h" + +#include +#include +#include +#include +using namespace BamTools; + #include #include #include - -#include "bamtools_merge.h" -#include "bamtools_options.h" -#include "bamtools_utilities.h" -#include "BamMultiReader.h" -#include "BamWriter.h" - using namespace std; -using namespace BamTools; // --------------------------------------------- // MergeSettings implementation @@ -28,12 +27,14 @@ struct MergeTool::MergeSettings { // flags bool HasInputBamFilename; + bool HasInputBamFilelist; bool HasOutputBamFilename; bool IsForceCompression; bool HasRegion; // filenames vector InputFiles; + string InputFilelist; // other parameters string OutputFilename; @@ -42,6 +43,7 @@ struct MergeTool::MergeSettings { // constructor MergeSettings(void) : HasInputBamFilename(false) + , HasInputBamFilelist(false) , HasOutputBamFilename(false) , IsForceCompression(false) , HasRegion(false) @@ -50,130 +52,187 @@ struct MergeTool::MergeSettings { }; // --------------------------------------------- -// MergeTool implementation +// MergeToolPrivate implementation -MergeTool::MergeTool(void) - : AbstractTool() - , m_settings(new MergeSettings) -{ - // set program details - Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in -in ...] [-out ]"); - - // set up options - OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); - Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); - - OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); -} +struct MergeTool::MergeToolPrivate { -MergeTool::~MergeTool(void) { - delete m_settings; - m_settings = 0; -} + // ctor & dtor + public: + MergeToolPrivate(MergeTool::MergeSettings* settings) + : m_settings(settings) + { } -int MergeTool::Help(void) { - Options::DisplayHelp(); - return 0; -} + ~MergeToolPrivate(void) { } -int MergeTool::Run(int argc, char* argv[]) { - - // parse command line arguments - Options::Parse(argc, argv, 1); - - // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) + // interface + public: + bool Run(void); + + // data members + private: + MergeTool::MergeSettings* m_settings; +}; + +bool MergeTool::MergeToolPrivate::Run(void) { + + // set to default input if none provided + if ( !m_settings->HasInputBamFilename && !m_settings->HasInputBamFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); - + + // add files in the filelist to the input file list + if ( m_settings->HasInputBamFilelist ) { + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools merge ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + string line; + while ( getline(filelist, line) ) { + m_settings->InputFiles.push_back(line); + } + } + // opens the BAM files (by default without checking for indexes) BamMultiReader reader; - if ( !reader.Open(m_settings->InputFiles, false, true) ) { - cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; - return 1; + if ( !reader.Open(m_settings->InputFiles) ) { + cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl; + return false; } - + // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); - // open writer + // determine compression mode for BamWriter + bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && + !m_settings->IsForceCompression ); + BamWriter::CompressionMode compressionMode = BamWriter::Compressed; + if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; + + // open BamWriter BamWriter writer; - bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); - if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { - cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; + writer.SetCompressionMode(compressionMode); + if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) { + cerr << "bamtools merge ERROR: could not open " + << m_settings->OutputFilename << " for writing." << endl; reader.Close(); - return 1; + return false; } - + // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } - + // otherwise attempt to use region as constraint else { - + // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // attempt to re-open reader with index files - reader.Close(); - bool openedOK = reader.Open(m_settings->InputFiles, true, true ); - - // if error - if ( !openedOK ) { - cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; - return 1; - } - - // if index data available, we can use SetRegion - if ( reader.IsIndexLoaded() ) { - + // attempt to find index files + reader.LocateIndexes(); + + // if index data available for all BAM files, we can use SetRegion + if ( reader.HasIndexes() ) { + // attempt to use SetRegion(), if failed report error - if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { - cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; + if ( !reader.SetRegion(region.LeftRefID, + region.LeftPosition, + region.RightRefID, + region.RightPosition) ) + { + cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" + << endl; reader.Close(); - return 1; - } - + return false; + } + // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); - } - + } + // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && - (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } - } - + } + // error parsing REGION string else { - cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; - cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl; + cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" + << endl; reader.Close(); writer.Close(); - return 1; + return false; } } - + // clean & exit reader.Close(); writer.Close(); - return 0; + return true; +} + +// --------------------------------------------- +// MergeTool implementation + +MergeTool::MergeTool(void) + : AbstractTool() + , m_settings(new MergeSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[ [-in -in ...] | [-list ] ] [-out | [-forceCompression]] [-region ]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); + Options::AddValueOption("-list", "BAM filename", "the input BAM file list, one line per file", "", m_settings->HasInputBamFilelist, m_settings->InputFilelist, IO_Opts); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts); +} + +MergeTool::~MergeTool(void) { + + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int MergeTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int MergeTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // initialize MergeTool with settings + m_impl = new MergeToolPrivate(m_settings); + + // run MergeTool, return success/fail + if ( m_impl->Run() ) + return 0; + else + return 1; }