From d776d518237008a804656ff27e9f06707d032ae2 Mon Sep 17 00:00:00 2001 From: derek Date: Sat, 8 Oct 2011 23:47:07 -0400 Subject: [PATCH] Converted intervals from 0-based, CLOSED to 0-based, HALF-OPEN --- src/api/BamAlignment.cpp | 61 ++++++--- src/api/BamAlignment.h | 2 +- src/api/BamAux.h | 11 +- src/api/BamMultiReader.cpp | 5 +- .../internal/BamRandomAccessController_p.cpp | 16 +-- src/api/internal/BamStandardIndex_p.cpp | 11 +- src/api/internal/BamToolsIndex_p.cpp | 33 +++-- src/api/internal/BamToolsIndex_p.h | 5 +- src/api/internal/BamWriter_p.cpp | 11 +- src/toolkit/bamtools_convert.cpp | 4 +- src/toolkit/bamtools_count.cpp | 117 ++---------------- src/utils/bamtools_utilities.cpp | 54 ++++---- 12 files changed, 128 insertions(+), 202 deletions(-) diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp index 0eff5c7..5cc138b 100644 --- a/src/api/BamAlignment.cpp +++ b/src/api/BamAlignment.cpp @@ -2,7 +2,7 @@ // BamAlignment.cpp (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -446,19 +446,25 @@ bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { return GetTag("NM", (uint32_t&)editDistance); } -/*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const +/*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool closedInterval = USE_CLOSED_DEFAULT) const \brief Calculates alignment end position, based on starting position and CIGAR data. - \param usePadded Inserted bases affect reported position. Default is false, so that reported - position stays 'sync-ed' with reference coordinates. - \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful - when using BAM data with half-open formats (e.g. BED). + \warning The position returned now represents a zero-based, HALF-OPEN interval. + In previous versions of BamTools (0.x & 1.x) all intervals were treated + as zero-based, CLOSED. I whole-heartedly apologize for any inconsistencies this + may have caused if you assumed that BT was always half-open; full aplogies also + to those who recognized that BamTools originally used a closed interval, but may + need to update their code to reflect this new change. + + \param usePadded Allow inserted bases to affect the reported position. Default is false, so that + reported position stays synced with reference coordinates. + + \param closedInterval Setting this to true will return a 0-based end coordinate. Default is false, + so that his value represents a standard, half-open interval. \return alignment end position */ -int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const { - - // TODO: Come back to this for coordinate issues !!! +int BamAlignment::GetEndPosition(bool usePadded, bool closedInterval) const { // initialize alignment end to starting position int alignEnd = Position; @@ -467,19 +473,34 @@ int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const { vector::const_iterator cigarIter = CigarData.begin(); vector::const_iterator cigarEnd = CigarData.end(); for ( ; cigarIter != cigarEnd; ++cigarIter) { - const char cigarType = (*cigarIter).Type; - const uint32_t& cigarLength = (*cigarIter).Length; - - if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR || - cigarType == Constants::BAM_CIGAR_DEL_CHAR || - cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR ) - alignEnd += cigarLength; - else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR ) - alignEnd += cigarLength; + const CigarOp& op = (*cigarIter); + + switch ( op.Type ) { + + // increase end position on CIGAR chars [DMXN=] + case Constants::BAM_CIGAR_DEL_CHAR : + case Constants::BAM_CIGAR_MATCH_CHAR : + case Constants::BAM_CIGAR_MISMATCH_CHAR : + case Constants::BAM_CIGAR_REFSKIP_CHAR : + case Constants::BAM_CIGAR_SEQMATCH_CHAR : + alignEnd += op.Length; + break; + + // increase end position when CIGAR char is 'I' only if @usePadded is true + case Constants::BAM_CIGAR_INS_CHAR : + if ( usePadded ) + alignEnd += op.Length; + break; + + // all other CIGAR chars do not affect end position + default : + break; + } } - // adjust for zero-based coordinates, if requested - if ( zeroBased ) alignEnd -= 1; + // adjust for closedInterval, if requested + if ( closedInterval ) + alignEnd -= 1; // return result return alignEnd; diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h index b096499..3b906ec 100644 --- a/src/api/BamAlignment.h +++ b/src/api/BamAlignment.h @@ -103,7 +103,7 @@ struct API_EXPORT BamAlignment { bool BuildCharData(void); // calculates alignment end position - int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; + int GetEndPosition(bool usePadded = false, bool closedInterval = false) const; // returns a description of the last error that occurred std::string GetErrorString(void) const; diff --git a/src/api/BamAux.h b/src/api/BamAux.h index 14e6547..610b3fc 100644 --- a/src/api/BamAux.h +++ b/src/api/BamAux.h @@ -2,7 +2,7 @@ // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides data structures & utility methods that are used throughout the API. // *************************************************************************** @@ -78,6 +78,13 @@ typedef std::vector RefVector; \brief Represents a sequential genomic region Allowed to span multiple (sequential) references. + + \warning BamRegion now represents a zero-based, HALF-OPEN interval. + In previous versions of BamTools (0.x & 1.x) all intervals were treated + as zero-based, CLOSED. I whole-heartedly apologize for any inconsistencies this + may have caused if you assumed that BT was always half-open; full aplogies also + to those who recognized that BamTools originally used a closed interval, but may + need to update their code to reflect this new change. */ struct API_EXPORT BamRegion { @@ -123,7 +130,7 @@ struct API_EXPORT BamRegion { //! Returns true if region has a right boundary bool isRightBoundSpecified(void) const { - return ( RightRefID >= 0 && RightPosition >= 0 ); + return ( RightRefID >= 0 && RightPosition >= 1 ); } }; diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp index 562b899..d031568 100644 --- a/src/api/BamMultiReader.cpp +++ b/src/api/BamMultiReader.cpp @@ -2,7 +2,7 @@ // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Convenience class for reading multiple BAM files. // @@ -358,6 +358,5 @@ bool BamMultiReader::SetRegion(const int& leftRefID, const int& rightRefID, const int& rightPosition) { - BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition); - return d->SetRegion(region); + return d->SetRegion( BamRegion(leftRefID, leftPosition, rightRefID, rightPosition) ); } diff --git a/src/api/internal/BamRandomAccessController_p.cpp b/src/api/internal/BamRandomAccessController_p.cpp index 437b609..a8b64b6 100644 --- a/src/api/internal/BamRandomAccessController_p.cpp +++ b/src/api/internal/BamRandomAccessController_p.cpp @@ -2,7 +2,7 @@ // BamRandomAccessController_p.cpp (c) 2011 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 October 2011(DB) +// Last modified: 8 October 2011(DB) // --------------------------------------------------------------------------- // Manages random access operations in a BAM file // ************************************************************************** @@ -79,9 +79,9 @@ BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const { // if alignment starts at or after left bound position if ( alignment.Position >= m_region.LeftPosition) { - if ( m_region.isRightBoundSpecified() && // right bound is specified AND - m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND - alignment.Position > m_region.RightPosition ) // alignment starts after right bound position + if ( m_region.isRightBoundSpecified() && // right bound is specified AND + m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND + alignment.Position >= m_region.RightPosition ) // alignment starts on or after right bound position return AfterRegion; // otherwise, alignment overlaps region @@ -92,7 +92,7 @@ BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const { else { // if alignment overlaps left bound position - if ( alignment.GetEndPosition() >= m_region.LeftPosition ) + if ( alignment.GetEndPosition() > m_region.LeftPosition ) return OverlapsRegion; else return BeforeRegion; @@ -116,15 +116,15 @@ BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const { // alignment is on right bound reference else { - // if alignment starts on or before right bound position - if ( alignment.Position <= m_region.RightPosition ) + // if alignment starts before right bound position + if ( alignment.Position < m_region.RightPosition ) return OverlapsRegion; else return AfterRegion; } } - // otherwise, alignment starts after left bound and there is no right bound + // otherwise, alignment starts after left bound and there is no right bound given else return OverlapsRegion; } } diff --git a/src/api/internal/BamStandardIndex_p.cpp b/src/api/internal/BamStandardIndex_p.cpp index ad21fec..6f280c2 100644 --- a/src/api/internal/BamStandardIndex_p.cpp +++ b/src/api/internal/BamStandardIndex_p.cpp @@ -2,7 +2,7 @@ // BamStandardIndex.cpp (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 6 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the standardized BAM index format (".bai") // *************************************************************************** @@ -78,8 +78,8 @@ void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, ui // retrieve references from reader const RefVector& references = m_reader->GetReferenceData(); - // make sure left-bound position is valid - if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) + // LeftPosition cannot be greater than or equal to reference length + if ( region.LeftPosition >= references.at(region.LeftRefID).RefLength ) throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested"); // set region 'begin' @@ -91,9 +91,10 @@ void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, ui end = (unsigned int)region.RightPosition; // otherwise, set region 'end' to last reference base - else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1; + else end = (unsigned int)references.at(region.LeftRefID).RefLength; } +// [begin, end) void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin, const uint32_t& end, set& candidateBins) @@ -478,7 +479,7 @@ void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al); // check alignment against region - if ( al.GetEndPosition() < region.LeftPosition ) { + if ( al.GetEndPosition() <= region.LeftPosition ) { offsetFirst = ++offsetIter; count -= step+1; } else count = step; diff --git a/src/api/internal/BamToolsIndex_p.cpp b/src/api/internal/BamToolsIndex_p.cpp index 09f74d1..1142fbd 100644 --- a/src/api/internal/BamToolsIndex_p.cpp +++ b/src/api/internal/BamToolsIndex_p.cpp @@ -2,7 +2,7 @@ // BamToolsIndex.cpp (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 6 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the BamTools index format (".bti") // *************************************************************************** @@ -56,7 +56,7 @@ BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader) , m_cacheMode(BamIndex::LimitedIndexCaching) , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH) , m_inputVersion(0) - , m_outputVersion(BTI_1_2) // latest version - used for writing new index files + , m_outputVersion(BTI_2_0) // latest version - used for writing new index files { m_isBigEndian = BamTools::SystemIsBigEndian(); } @@ -103,17 +103,12 @@ void BamToolsIndex::CheckVersion(void) { // check for deprecated, unsupported versions // (the format had to be modified to accomodate a particular bug fix) - else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) { + // Version 2.0: introduced support for half-open intervals, instead of the old closed intervals + // respondBy: throwing exception - we're not going to try to handle the old BTI files. + else if ( (Version)m_inputVersion < BamToolsIndex::BTI_2_0 ) { const string message = "unsupported format: this version of the index may not properly handle " - "reference ends. Please run 'bamtools index -bti -in yourData.bam' to " - "generate an up-to-date, fixed BTI file."; - throw BamException("BamToolsIndex::CheckVersion", message); - } - - else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) { - const string message = "unsupported format: this version of the index may not properly handle " - "empty references. Please run 'bamtools index -bti -in yourData.bam to " - "generate an up-to-date, fixed BTI file."; + "coordinate intervals. Please run 'bamtools index -bti -in yourData.bam' " + "to generate an up-to-date, fixed BTI file."; throw BamException("BamToolsIndex::CheckVersion", message); } } @@ -150,7 +145,7 @@ bool BamToolsIndex::Create(void) { try { // open new index file (read & write) - string indexFilename = m_reader->Filename() + Extension(); + const string indexFilename = m_reader->Filename() + Extension(); OpenFile(indexFilename, "w+b"); // initialize BtiFileSummary with number of references @@ -188,7 +183,7 @@ bool BamToolsIndex::Create(void) { else { // store previous BTI block data in reference entry - BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); + const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); refEntry.Blocks.push_back(block); // write reference entry, then clear @@ -220,7 +215,7 @@ bool BamToolsIndex::Create(void) { ++currentBlockCount; // check end position - int32_t alignmentEndPosition = al.GetEndPosition(); + const int32_t alignmentEndPosition = al.GetEndPosition(); if ( alignmentEndPosition > blockMaxEndPosition ) blockMaxEndPosition = alignmentEndPosition; @@ -228,7 +223,7 @@ bool BamToolsIndex::Create(void) { if ( currentBlockCount == m_blockSize ) { // store previous block data in reference entry - BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); + const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); refEntry.Blocks.push_back(block); // update markers @@ -246,7 +241,7 @@ bool BamToolsIndex::Create(void) { if ( blockRefId >= 0 ) { // store last BTI block data in reference entry - BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); + const BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition); refEntry.Blocks.push_back(block); // write last reference entry, then clear @@ -305,7 +300,7 @@ void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha const BtiBlock& block = (*blockIter); if ( block.StartPosition <= region.RightPosition ) { - if ( block.MaxEndPosition >= region.LeftPosition ) { + if ( block.MaxEndPosition > region.LeftPosition ) { offset = block.StartOffset; break; } @@ -324,7 +319,7 @@ void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha --blockIter; const BtiBlock& previousBlock = (*blockIter); - if ( previousBlock.MaxEndPosition < region.LeftPosition ) { + if ( previousBlock.MaxEndPosition <= region.LeftPosition ) { offset = currentBlock.StartOffset; found = true; break; diff --git a/src/api/internal/BamToolsIndex_p.h b/src/api/internal/BamToolsIndex_p.h index ec846c5..96e35dc 100644 --- a/src/api/internal/BamToolsIndex_p.h +++ b/src/api/internal/BamToolsIndex_p.h @@ -2,7 +2,7 @@ // BamToolsIndex.h (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 6 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the BamTools index format (".bti") // *************************************************************************** @@ -87,7 +87,7 @@ class BamToolsIndex : public BamIndex { // (might be useful later to handle any 'legacy' versions if the format changes) // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on // - // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by: + // so a change introduced in BTI_1_2 may be handled from then on by: // // if ( indexVersion >= BTI_1_2 ) // do something new @@ -96,6 +96,7 @@ class BamToolsIndex : public BamIndex { enum Version { BTI_1_0 = 1 , BTI_1_1 , BTI_1_2 + , BTI_2_0 }; // ctor & dtor diff --git a/src/api/internal/BamWriter_p.cpp b/src/api/internal/BamWriter_p.cpp index 100de2d..28fbce7 100644 --- a/src/api/internal/BamWriter_p.cpp +++ b/src/api/internal/BamWriter_p.cpp @@ -2,7 +2,7 @@ // BamWriter_p.cpp (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 October 2011 (DB) +// Last modified: 8 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the basic functionality for producing BAM files // *************************************************************************** @@ -29,7 +29,7 @@ BamWriterPrivate::~BamWriterPrivate(void) { Close(); } -// calculates minimum bin for a BAM alignment interval +// calculates minimum bin for a BAM alignment interval [begin, end) uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const { --end; if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14); @@ -213,10 +213,9 @@ void BamWriterPrivate::WriteAlignment(const BamAlignment& al) { const unsigned int queryLength = al.QueryBases.size(); const unsigned int tagDataLength = al.TagData.size(); - // no way to tell if BamAlignment.Bin is already defined (no default, invalid value) - // force calculation of Bin before storing - const int endPosition = al.GetEndPosition(); - const uint32_t alignmentBin = CalculateMinimumBin(al.Position, endPosition); + // no way to tell if alignment's bin is already defined (there is no default, invalid value) + // so we'll go ahead calculate its bin ID before storing + const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition()); // create our packed cigar string string packedCigar; diff --git a/src/toolkit/bamtools_convert.cpp b/src/toolkit/bamtools_convert.cpp index 991cebc..907c4ba 100644 --- a/src/toolkit/bamtools_convert.cpp +++ b/src/toolkit/bamtools_convert.cpp @@ -2,7 +2,7 @@ // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 11 June 2011 +// Last modified: 8 October 2011 // --------------------------------------------------------------------------- // Converts between BAM and a number of other formats // *************************************************************************** @@ -277,7 +277,7 @@ void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) { m_out << m_references.at(a.RefID).RefName << "\t" << a.Position << "\t" - << a.GetEndPosition() + 1 << "\t" + << a.GetEndPosition() << "\t" << a.Name << "\t" << a.MapQuality << "\t" << (a.IsReverseStrand() ? "-" : "+") << endl; diff --git a/src/toolkit/bamtools_count.cpp b/src/toolkit/bamtools_count.cpp index 7244f24..3593f4d 100644 --- a/src/toolkit/bamtools_count.cpp +++ b/src/toolkit/bamtools_count.cpp @@ -62,26 +62,6 @@ struct CountTool::CountToolPrivate { CountTool::CountSettings* m_settings; }; -static -void printAlignments(const vector& alignments) { - - vector::const_iterator alIter = alignments.begin(); - vector::const_iterator alEnd = alignments.end(); - for ( ; alIter != alEnd; ++alIter ) { - const BamAlignment& a = (*alIter); - - cerr << a.Name - << "\t" << a.RefID << ":" << a.Position; - - int aqValue; - bool hasTag = a.GetTag("Aq", aqValue); - cerr << "\tAq="; - if ( hasTag ) cerr << aqValue; - else cerr << "?"; - cerr << endl; - } -} - bool CountTool::CountToolPrivate::Run(void) { // if no '-in' args supplied, default to stdin @@ -101,74 +81,8 @@ bool CountTool::CountToolPrivate::Run(void) { // if no region specified, count entire file if ( !m_settings->HasRegion ) { - - - vector alignments; - while ( reader.GetNextAlignment(al) ) { + while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; - - if ( alignments.size() < 100 ) - alignments.push_back(al); - } - - using namespace BamTools::Algorithms; - - cerr << endl - << "------------------------------" << endl - << "Unsorted Alignments" << endl - << "------------------------------" << endl - << endl; - std::stable_sort(alignments.begin(), alignments.end(), Sort::Unsorted()); - printAlignments(alignments); - cerr << "------------------------------" << endl - << endl; - - cerr << endl - << "------------------------------" << endl - << "Sorted Alignments (by name)" << endl - << "------------------------------" << endl - << endl; -// std::sort(alignments.begin(), alignments.end(), Sort::ByName()); - Algorithms::SortAlignments(alignments, Sort::ByName()); - printAlignments(alignments); - cerr << endl - << "------------------------------" << endl - << endl; - - cerr << endl - << "------------------------------" << endl - << "Sorted Alignments (by tag Aq)" << endl - << "------------------------------" << endl - << endl; -// std::sort(alignments.begin(), alignments.end(), Sort::ByTag("Aq")); - Algorithms::SortAlignments(alignments, Sort::ByTag("Aq")); - printAlignments(alignments); - cerr << endl - << "------------------------------" << endl - << endl; - - cerr << endl - << "------------------------------" << endl - << "Sorted Alignments (by tag Aq) desc" << endl - << "------------------------------" << endl - << endl; - std::sort(alignments.begin(), alignments.end(), Sort::ByTag("Aq", Sort::DescendingOrder)); - printAlignments(alignments); - cerr << endl - << "------------------------------" << endl - << endl; - - - - -// // ######################################## -// // original -// // ######################################## -// -// while ( reader.GetNextAlignmentCore(al) ) -// ++alignmentCount; -// -// //######################################### } // otherwise attempt to use region as constraint @@ -184,27 +98,16 @@ bool CountTool::CountToolPrivate::Run(void) { // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { - vector alignments; - using namespace BamTools::Algorithms; - - alignments = GetSortedRegion(reader, region, Sort::ByName() ); - printAlignments(alignments); - - cerr << "################################" << endl; - - alignments = GetSortedRegion(reader, region, Sort::ByTag("Aq")); - printAlignments(alignments); - -// // attempt to set region on reader -// if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { -// cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl; -// reader.Close(); -// return false; -// } + // attempt to set region on reader + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl; + reader.Close(); + return false; + } -// // everything checks out, just iterate through specified region, counting alignments -// while ( reader.GetNextAlignmentCore(al) ) -// ++alignmentCount; + // everything checks out, just iterate through specified region, counting alignments + while ( reader.GetNextAlignmentCore(al) ) + ++alignmentCount; } // no index data available, we have to iterate through until we diff --git a/src/utils/bamtools_utilities.cpp b/src/utils/bamtools_utilities.cpp index f79faac..c7c45ec 100644 --- a/src/utils/bamtools_utilities.cpp +++ b/src/utils/bamtools_utilities.cpp @@ -2,7 +2,7 @@ // bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 9 June 2011 +// Last modified: 8 October 2011 // --------------------------------------------------------------------------- // Provides general utilities used by BamTools sub-tools. // *************************************************************************** @@ -88,7 +88,7 @@ bool Utilities::ParseRegionString(const string& regionString, startChrom = regionString; startPos = 0; stopChrom = regionString; - stopPos = -1; + stopPos = 0; } // colon found, so we at least have some sort of startPos requested @@ -141,17 +141,17 @@ bool Utilities::ParseRegionString(const string& regionString, // if startRefID not found, return false int startRefID = reader.GetReferenceID(startChrom); - if ( startRefID == (int)references.size() ) return false; + if ( startRefID == -1 ) return false; - // if startPos is larger than reference, return false + // startPos cannot be greater than or equal to reference length const RefData& startReference = references.at(startRefID); - if ( startPos > startReference.RefLength ) return false; + if ( startPos >= startReference.RefLength ) return false; // if stopRefID not found, return false int stopRefID = reader.GetReferenceID(stopChrom); - if ( stopRefID == (int)references.size() ) return false; + if ( stopRefID == -1 ) return false; - // if stopPosition larger than reference, return false + // stopPosition cannot be larger than reference length const RefData& stopReference = references.at(stopRefID); if ( stopPos > stopReference.RefLength ) return false; @@ -161,9 +161,9 @@ bool Utilities::ParseRegionString(const string& regionString, // ------------------------------- // set up Region struct & return - region.LeftRefID = startRefID; - region.LeftPosition = startPos; - region.RightRefID = stopRefID;; + region.LeftRefID = startRefID; + region.LeftPosition = startPos; + region.RightRefID = stopRefID;; region.RightPosition = stopPos; return true; } @@ -245,34 +245,34 @@ bool Utilities::ParseRegionString(const string& regionString, // ------------------------------- // validate reference IDs & genomic positions - + const RefVector references = reader.GetReferenceData(); - + // if startRefID not found, return false int startRefID = reader.GetReferenceID(startChrom); - if ( startRefID == (int)references.size() ) return false; - - // if startPos is larger than reference, return false + if ( startRefID == -1 ) return false; + + // startPos cannot be greater than or equal to reference length const RefData& startReference = references.at(startRefID); - if ( startPos > startReference.RefLength ) return false; - + if ( startPos >= startReference.RefLength ) return false; + // if stopRefID not found, return false int stopRefID = reader.GetReferenceID(stopChrom); - if ( stopRefID == (int)references.size() ) return false; - - // if stopPosition larger than reference, return false + if ( stopRefID == -1 ) return false; + + // stopPosition cannot be larger than reference length const RefData& stopReference = references.at(stopRefID); if ( stopPos > stopReference.RefLength ) return false; - + // if no stopPosition specified, set to reference end - if ( stopPos == -1 ) stopPos = stopReference.RefLength; - + if ( stopPos == -1 ) stopPos = stopReference.RefLength; + // ------------------------------- // set up Region struct & return - - region.LeftRefID = startRefID; - region.LeftPosition = startPos; - region.RightRefID = stopRefID;; + + region.LeftRefID = startRefID; + region.LeftPosition = startPos; + region.RightRefID = stopRefID;; region.RightPosition = stopPos; return true; } -- 2.39.2