X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ftoolkit%2Fbamtools_count.cpp;h=40e7c5d6d3373afb65591ba6cec751a9f2762619;hb=8c80d760637f8df39262683cd2570f0589423d36;hp=28677ceb57898f2448a356dffd406b1f2ad4003f;hpb=c33546e0647048a290814d64937a709f33264468;p=bamtools.git diff --git a/src/toolkit/bamtools_count.cpp b/src/toolkit/bamtools_count.cpp index 28677ce..40e7c5d 100644 --- a/src/toolkit/bamtools_count.cpp +++ b/src/toolkit/bamtools_count.cpp @@ -3,28 +3,23 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 23 March 2011 // --------------------------------------------------------------------------- -// Prints alignment count for BAM file -// -// ** Expand to multiple?? -// +// Prints alignment count for BAM file(s) // *************************************************************************** +#include "bamtools_count.h" + +#include +#include +#include +using namespace BamTools; + #include -#include #include #include - -#include "bamtools_count.h" -#include "bamtools_options.h" -#include "bamtools_utilities.h" -#include "BamReader.h" -#include "BamMultiReader.h" - using namespace std; -using namespace BamTools; - + // --------------------------------------------- // CountSettings implementation @@ -53,15 +48,12 @@ CountTool::CountTool(void) , m_settings(new CountSettings) { // set program details - Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in [-region ]"); + Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in -in ...] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts); - //Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts); - - OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai or .bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); } CountTool::~CountTool(void) { @@ -79,115 +71,79 @@ int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); + // if no '-in' args supplied, default to stdin if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open reader without index BamMultiReader reader; - reader.Open(m_settings->InputFiles, false, true); + if ( !reader.Open(m_settings->InputFiles) ) { + cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl; + return 1; + } // alignment counter + BamAlignment al; int alignmentCount(0); - // set up error handling - ostringstream errorStream(""); - bool foundError(false); - // if no region specified, count entire file if ( !m_settings->HasRegion ) { - BamAlignment al; - while ( reader.GetNextAlignmentCore(al) ) + while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } - // more complicated - region specified + // otherwise attempt to use region as constraint else { + // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // check if there are index files *.bai/*.bti corresponding to the input files - bool hasDefaultIndex = false; - bool hasBamtoolsIndex = false; - bool hasNoIndex = false; - int defaultIndexCount = 0; - int bamtoolsIndexCount = 0; - for (vector::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) { + // attempt to find index files + reader.LocateIndexes(); + + // if index data available for all BAM files, we can use SetRegion + if ( reader.IsIndexLoaded() ) { - if ( Utilities::FileExists(*f + ".bai") ) { - hasDefaultIndex = true; - ++defaultIndexCount; - } - - if ( Utilities::FileExists(*f + ".bti") ) { - hasBamtoolsIndex = true; - ++bamtoolsIndexCount; - } - - if ( !hasDefaultIndex && !hasBamtoolsIndex ) { - hasNoIndex = true; - cerr << "*WARNING - could not find index file for " << *f - << ", parsing whole file(s) to get alignment counts for target region" - << " (could be slow)" << endl; - break; - } - } - - // determine if index file types are heterogeneous - bool hasDifferentIndexTypes = false; - if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) { - hasDifferentIndexTypes = true; - cerr << "*WARNING - different index file formats found" - << ", parsing whole file(s) to get alignment counts for target region" - << " (could be slow)" << endl; - } + // attempt to set region on reader + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl; + reader.Close(); + return 1; + } + + // everything checks out, just iterate through specified region, counting alignments + while ( reader.GetNextAlignmentCore(al) ) + ++alignmentCount; + } - // if any input file has no index, or if input files use different index formats - // can't use BamMultiReader to jump directly (**for now**) - if ( hasNoIndex || hasDifferentIndexTypes ) { - - // read through sequentially, counting all overlapping reads - BamAlignment al; - while( reader.GetNextAlignmentCore(al) ) { + // no index data available, we have to iterate through until we + // find overlapping alignments + else { + while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && - (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } - - // has index file for each input file (and same format) - else { - - // this is kind of a hack...? - BamMultiReader reader; - reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex ); - - if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { - foundError = true; - errorStream << "Could not set BamReader region to REGION: " << m_settings->Region << endl; - } else { - BamAlignment al; - while ( reader.GetNextAlignmentCore(al) ) - ++alignmentCount; - } - } - - } else { - foundError = true; - errorStream << "Could not parse REGION: " << m_settings->Region << endl; - errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + } + + // error parsing REGION string + else { + cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl; + cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" + << endl; + reader.Close(); + return 1; } } - - // print errors OR results - if ( foundError ) - cerr << errorStream.str() << endl; - else - cout << alignmentCount << endl; - // clean & exit + // print results + cout << alignmentCount << endl; + + // clean up & exit reader.Close(); - return (int)foundError; + return 0; }