From ce6b127bd2921f1dc137eb296190d0f4fb686d17 Mon Sep 17 00:00:00 2001 From: Derek Date: Tue, 7 Sep 2010 13:24:56 -0400 Subject: [PATCH] Added -region option to bamtools_merge --- src/toolkit/bamtools_merge.cpp | 99 +++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/src/toolkit/bamtools_merge.cpp b/src/toolkit/bamtools_merge.cpp index 8170f1a..3d2d902 100644 --- a/src/toolkit/bamtools_merge.cpp +++ b/src/toolkit/bamtools_merge.cpp @@ -3,12 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 31 August 2010 +// Last modified: 7 September 2010 // --------------------------------------------------------------------------- // Merges multiple BAM files into one. -// -// ** Provide selectable region? eg chr2:10000..20000 -// // *************************************************************************** #include @@ -33,21 +30,21 @@ struct MergeTool::MergeSettings { bool HasInputBamFilename; bool HasOutputBamFilename; bool IsForceCompression; -// bool HasRegion; + bool HasRegion; // filenames vector InputFiles; // other parameters string OutputFilename; -// string Region; + string Region; // constructor MergeSettings(void) : HasInputBamFilename(false) , HasOutputBamFilename(false) , IsForceCompression(false) -// , HasRegion(false) + , HasRegion(false) , OutputFilename(Options::StandardOut()) { } }; @@ -68,8 +65,8 @@ MergeTool::MergeTool(void) Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); -// OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); -// Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); + Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } MergeTool::~MergeTool(void) { @@ -88,12 +85,16 @@ int MergeTool::Run(int argc, char* argv[]) { Options::Parse(argc, argv, 1); // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); + if ( !m_settings->HasInputBamFilename ) + m_settings->InputFiles.push_back(Options::StandardIn()); - // opens the BAM files without checking for indexes + // opens the BAM files (by default without checking for indexes) BamMultiReader reader; - reader.Open(m_settings->InputFiles, false, true); - + if ( !reader.Open(m_settings->InputFiles, false, true) ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; + } + // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); @@ -101,12 +102,74 @@ int MergeTool::Run(int argc, char* argv[]) { // open writer BamWriter writer; bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); - writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed); + if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { + cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; + reader.Close(); + return 1; + } + + // if no region specified, store entire contents of file(s) + if ( !m_settings->HasRegion ) { + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) + writer.SaveAlignment(al); + } + + // otherwise attempt to use region as constraint + else { + + // if region string parses OK + BamRegion region; + if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // store alignments to output file - BamAlignment bAlignment; - while (reader.GetNextAlignmentCore(bAlignment)) { - writer.SaveAlignment(bAlignment); + // attempt to re-open reader with index files + reader.Close(); + bool openedOK = reader.Open(m_settings->InputFiles, true, true ); + + // if error + if ( !openedOK ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; + } + + // if index data available, we can use SetRegion + if ( reader.IsIndexLoaded() ) { + + // attempt to use SetRegion(), if failed report error + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; + reader.Close(); + return 1; + } + + // everything checks out, just iterate through specified region, storing alignments + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) + writer.SaveAlignment(al); + } + + // no index data available, we have to iterate through until we + // find overlapping alignments + else { + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) { + if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + { + writer.SaveAlignment(al); + } + } + } + } + + // error parsing REGION string + else { + cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; + cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + reader.Close(); + writer.Close(); + return 1; + } } // clean & exit -- 2.39.5