From 80215a39a701ab6241b7761a3ea92477a503af7e Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 3 Jun 2010 13:42:31 -0400 Subject: [PATCH] Added a second Utilities::ParseRegionString() that accepts BamMultiReader --- bamtools_utilities.cpp | 110 ++++++++++++++++++++++++++++++++++++++++- bamtools_utilities.h | 6 ++- 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/bamtools_utilities.cpp b/bamtools_utilities.cpp index e3ab868..fcbabdf 100644 --- a/bamtools_utilities.cpp +++ b/bamtools_utilities.cpp @@ -11,11 +11,13 @@ #include #include "bamtools_utilities.h" #include "BamReader.h" +#include "BamMultiReader.h" using namespace std; using namespace BamTools; // Parses a region string, does validation (valid ID's, positions), stores in Region struct +// Returns success (true/false) bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region) { // ------------------------------- @@ -110,7 +112,7 @@ bool Utilities::ParseRegionString(const std::string& regionString, const BamRead if ( stopPos > stopReference.RefLength ) return false; // if no stopPosition specified, set to reference end - if ( stopPos == -1 )stopPos = stopReference.RefLength; + if ( stopPos == -1 ) stopPos = stopReference.RefLength; // ------------------------------- // set up Region struct & return @@ -122,3 +124,109 @@ bool Utilities::ParseRegionString(const std::string& regionString, const BamRead return true; } +// Same as ParseRegionString() above, but accepts a BamMultiReader +bool Utilities::ParseRegionString(const std::string& regionString, const BamMultiReader& reader, Region& region) { + + // ------------------------------- + // parse region string + + // check first for empty string + if ( regionString.empty() ) + return false; + + // non-empty string, look for a colom + size_t foundFirstColon = regionString.find(':'); + + // store chrom strings, and numeric positions + string startChrom; + string stopChrom; + int startPos; + int stopPos; + + // no colon found + // going to use entire contents of requested chromosome + // just store entire region string as startChrom name + // use BamReader methods to check if its valid for current BAM file + if ( foundFirstColon == string::npos ) { + startChrom = regionString; + startPos = 0; + stopChrom = regionString; + stopPos = -1; + } + + // colon found, so we at least have some sort of startPos requested + else { + + // store start chrom from beginning to first colon + startChrom = regionString.substr(0,foundFirstColon); + + // look for ".." after the colon + size_t foundRangeDots = regionString.find("..", foundFirstColon+1); + + // no dots found + // so we have a startPos but no range + // store contents before colon as startChrom, after as startPos + if ( foundRangeDots == string::npos ) { + startPos = atoi( regionString.substr(foundFirstColon+1).c_str() ); + stopChrom = startChrom; + stopPos = -1; + } + + // ".." found, so we have some sort of range selected + else { + + // store startPos between first colon and range dots ".." + startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() ); + + // look for second colon + size_t foundSecondColon = regionString.find(':', foundRangeDots+1); + + // no second colon found + // so we have a "standard" chrom:start..stop input format (on single chrom) + if ( foundSecondColon == string::npos ) { + stopChrom = startChrom; + stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() ); + } + + // second colon found + // so we have a range requested across 2 chrom's + else { + stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2)); + stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() ); + } + } + } + + // ------------------------------- + // validate reference IDs & genomic positions + + const RefVector references = reader.GetReferenceData(); + + // if startRefID not found, return false + int startRefID = reader.GetReferenceID(startChrom); + if ( startRefID == (int)references.size() ) return false; + + // if startPos is larger than reference, return false + const RefData& startReference = references.at(startRefID); + if ( startPos > startReference.RefLength ) return false; + + // if stopRefID not found, return false + int stopRefID = reader.GetReferenceID(stopChrom); + if ( stopRefID == (int)references.size() ) return false; + + // if stopPosition larger than reference, return false + const RefData& stopReference = references.at(stopRefID); + if ( stopPos > stopReference.RefLength ) return false; + + // if no stopPosition specified, set to reference end + if ( stopPos == -1 ) stopPos = stopReference.RefLength; + + // ------------------------------- + // set up Region struct & return + + region.StartChromID = startRefID; + region.StopChromID = stopRefID; + region.StartPosition = startPos; + region.StopPosition = stopPos; + return true; +} diff --git a/bamtools_utilities.h b/bamtools_utilities.h index 1e280f1..7d655e6 100644 --- a/bamtools_utilities.h +++ b/bamtools_utilities.h @@ -15,7 +15,8 @@ namespace BamTools { -class BamReader; +class BamReader; +class BamMultiReader; struct Region { int StartChromID; @@ -28,7 +29,10 @@ class Utilities { public: // Parses a region string, uses reader to do validation (valid ID's, positions), stores in Region struct + // Returns success (true/false) static bool ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region); + // Same as above, but accepts a BamMultiReader + static bool ParseRegionString(const std::string& regionString, const BamMultiReader& reader, Region& region); }; } // namespace BamTools -- 2.39.5