From 12c10ab962401b7da243d59cba821a8bb06d65ec Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Fri, 18 Jun 2010 15:13:46 -0400 Subject: [PATCH] integration of SetRegion into BamMultiReader Also includes update to bamtools_count which uses the BamMultiReader by default and no longer requires the specification of an index file on the command line, as this would be very cumbersome to parse for multiple input files. Added method to check for file existence using stat to bamtools_utilities.cpp --- BamMultiReader.cpp | 54 +++++++++++++++++++++++------- BamMultiReader.h | 8 +++++ bamtools_count.cpp | 76 ++++++++++++++++++------------------------ bamtools_utilities.cpp | 8 +++++ bamtools_utilities.h | 3 ++ 5 files changed, 94 insertions(+), 55 deletions(-) diff --git a/BamMultiReader.cpp b/BamMultiReader.cpp index f0a8ced..733087e 100644 --- a/BamMultiReader.cpp +++ b/BamMultiReader.cpp @@ -163,21 +163,51 @@ bool BamMultiReader::Jump(int refID, int position) { exit(1); } } - if (result) { - // Update Alignments - alignments.clear(); - for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { - BamReader* br = it->first; - BamAlignment* ba = it->second; - if (br->GetNextAlignment(*ba)) { - alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), - make_pair(br, ba))); - } else { - // assume BamReader EOF - } + if (result) UpdateAlignments(); + return result; +} + +bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) { + + BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition); + + return SetRegion(region); + +} + +bool BamMultiReader::SetRegion(const BamRegion& region) { + + Region = region; + + bool result = true; + for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { + BamReader* reader = it->first; + result &= reader->SetRegion(region); + if (!result) { + cerr << "ERROR: could not set region " << reader->GetFilename() << " to " + << region.LeftRefID << ":" << region.LeftPosition << ".." << region.RightRefID << ":" << region.RightPosition + << endl; + exit(1); } } + if (result) UpdateAlignments(); return result; + +} + +void BamMultiReader::UpdateAlignments(void) { + // Update Alignments + alignments.clear(); + for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { + BamReader* br = it->first; + BamAlignment* ba = it->second; + if (br->GetNextAlignment(*ba)) { + alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), + make_pair(br, ba))); + } else { + // assume BamReader end of region / EOF + } + } } // opens BAM files diff --git a/BamMultiReader.h b/BamMultiReader.h index 3d9024c..e5960df 100644 --- a/BamMultiReader.h +++ b/BamMultiReader.h @@ -43,6 +43,9 @@ class BamMultiReader { int CurrentRefID; int CurrentLeft; + // region under analysis, specified using SetRegion + BamRegion Region; + // ---------------------- // BAM file operations // ---------------------- @@ -61,6 +64,10 @@ class BamMultiReader { // performs random-access jump to reference, position bool Jump(int refID, int position = 0); + // sets the target region + bool SetRegion(const BamRegion& region); + bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above + // returns file pointers to beginning of alignments bool Rewind(void); @@ -106,6 +113,7 @@ class BamMultiReader { // utility void PrintFilenames(void); void DumpAlignmentIndex(void); + void UpdateAlignments(void); // updates our alignment cache // private implementation private: diff --git a/bamtools_count.cpp b/bamtools_count.cpp index 54ddd45..29b5507 100644 --- a/bamtools_count.cpp +++ b/bamtools_count.cpp @@ -20,6 +20,7 @@ #include "bamtools_options.h" #include "bamtools_utilities.h" #include "BamReader.h" +#include "BamMultiReader.h" using namespace std; using namespace BamTools; @@ -30,21 +31,17 @@ using namespace BamTools; struct CountTool::CountSettings { // flags - bool HasBamIndexFilename; - bool HasInputBamFilename; + bool HasInput; bool HasRegion; // filenames - string BamIndexFilename; - string InputBamFilename; + vector InputFiles; string Region; // constructor CountSettings(void) - : HasBamIndexFilename(false) - , HasInputBamFilename(false) + : HasInput(false) , HasRegion(false) - , InputBamFilename(Options::StandardIn()) { } }; @@ -56,15 +53,16 @@ CountTool::CountTool(void) , m_settings(new CountSettings) { // set program details - Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in [-region [-index ]]"); + Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts); + //Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts); + //Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts); OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } CountTool::~CountTool(void) { @@ -82,9 +80,8 @@ int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - //open our BAM reader - BamReader reader; - reader.Open(m_settings->InputBamFilename); + BamMultiReader reader; + reader.Open(m_settings->InputFiles, false, true); // alignment counter int alignmentCount(0); @@ -96,9 +93,9 @@ int CountTool::Run(int argc, char* argv[]) { // if no region specified, count entire file if ( !m_settings->HasRegion ) { BamAlignment al; - while ( reader.GetNextAlignment(al) ) + while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; - } + } // more complicated - region specified else { @@ -106,33 +103,39 @@ int CountTool::Run(int argc, char* argv[]) { BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { + // check if there are index files *.bai corresponding to the input files + bool hasIndexes = true; + for (vector::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) { + if (!Utilities::FileExists(*f + ".bai")) { + errorStream << "could not find index file " << *f + ".bai" + << ", parsing whole file(s) to get alignment counts for target region" << endl; + hasIndexes = false; + break; + } + } // has index, we can jump directly to - if ( m_settings->HasBamIndexFilename ) { + if ( hasIndexes ) { // this is kind of a hack...? - // re-open BamReader, this time with the index file - reader.Close(); - reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename); + BamMultiReader reader; + reader.Open(m_settings->InputFiles, true, true); - // attempt Jump(), catch error - // if ( !reader.Jump(region.StartChromID, region.StartPosition) ) { - // foundError = true; - // errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl; - // } - // if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { foundError = true; errorStream << "Could not set BamReader region to REGION: " << m_settings->Region << endl; + } else { + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) + ++alignmentCount; } - } - - else { + } else { // read through sequentially, until first overlapping read is found BamAlignment al; bool alignmentFound(false); - while( reader.GetNextAlignment(al) ) { + //reader.Open(m_settings->InputFiles, false, true); + while( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID == region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) ) { alignmentFound = true; break; @@ -146,19 +149,6 @@ int CountTool::Run(int argc, char* argv[]) { // ----------------------------- // count alignments until stop hit - if ( !foundError ) { - // while valid alignment AND - // ( either current ref is before stopRefID OR - // current ref stopRefID but we're before stopPos ) - BamAlignment al; - //while ( reader.GetNextAlignment(al) && ((al.RefID < region.StopChromID ) || (al.RefID == region.StopChromID && al.Position <= region.StopPosition)) ) - // ++alignmentCount; - // - while ( reader.GetNextAlignment(al) ) - ++alignmentCount; - - } - } else { foundError = true; errorStream << "Could not parse REGION: " << m_settings->Region << endl; diff --git a/bamtools_utilities.cpp b/bamtools_utilities.cpp index cdbddf5..7772587 100644 --- a/bamtools_utilities.cpp +++ b/bamtools_utilities.cpp @@ -9,6 +9,7 @@ // *************************************************************************** #include +#include #include "bamtools_utilities.h" #include "BamReader.h" #include "BamMultiReader.h" @@ -231,3 +232,10 @@ bool Utilities::ParseRegionString(const std::string& regionString, const BamMult return true; } + +bool Utilities::FileExists(const std::string& filename) { + + struct stat fileInfo; + return stat(filename.c_str(), &fileInfo) == 0; + +} diff --git a/bamtools_utilities.h b/bamtools_utilities.h index 5101e26..4f6928b 100644 --- a/bamtools_utilities.h +++ b/bamtools_utilities.h @@ -27,6 +27,9 @@ class Utilities { static bool ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region); // Same as above, but accepts a BamMultiReader static bool ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region); + + // check if a file exists + static bool FileExists(const std::string& fname); }; } // namespace BamTools -- 2.39.2