// more complicated - region specified
else {
- // parse region string for desired region
- string startChrom;
- string stopChrom;
- int startPos;
- int stopPos;
- if ( Utilities::ParseRegionString(m_settings->Region, startChrom, startPos, stopChrom, stopPos) ) {
+ Region region;
+ if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
- const RefVector references = reader.GetReferenceData();
-
- // -------------------------------
- // validate start ref & position
-
- int startRefID = reader.GetReferenceID(startChrom);
-
- // startRefID not found
- if ( startRefID == (int)references.size() ) {
- foundError = true;
- errorStream << "Start chrom: " << startChrom << " not found in BAM file." << endl;
- }
-
- // valid startRefID, check startPos
- else {
- const RefData& reference = references.at(startRefID);
-
- // startPos too large
- if ( startPos > reference.RefLength ) {
+ // has index, we can jump directly to
+ if ( m_settings->HasBamIndexFilename ) {
+
+ // this is kind of a hack...?
+ // re-open BamReader, this time with the index file
+ reader.Close();
+ reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename);
+
+ // attempt Jump(), catch error
+ if ( !reader.Jump(region.StartChromID, region.StartPosition) ) {
foundError = true;
- errorStream << "Start pos: " << startPos << " is larger than expected." << endl;
+ errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl;
}
}
- // -------------------------------
- // validate stop ref & position
-
- int stopRefID = reader.GetReferenceID(stopChrom);
-
- // skip if error already found
- if ( !foundError ) {
+ else {
- // stopRefID not found
- if ( stopRefID == (int)references.size() ) {
- foundError = true;
- errorStream << "Stop chrom: " << stopChrom << " not found in BAM file." << endl;
- }
-
- // valid stopRefID, check stopPos
- else {
- const RefData& reference = references.at(stopRefID);
-
- // stopPos too large
- if ( stopPos > reference.RefLength ) {
- foundError = true;
- errorStream << "Stop pos: " << stopPos << " is larger than expected." << endl;
- }
-
- // no stopPos specified, set to reference end
- else if ( stopPos == -1 ) {
- stopPos = reference.RefLength;
- }
+ // read through sequentially, until first overlapping read is found
+ BamAlignment al;
+ bool alignmentFound(false);
+ while( reader.GetNextAlignment(al) ) {
+ if ( (al.RefID == region.StartChromID ) && ( (al.Position + al.Length) >= region.StartPosition) ) {
+ alignmentFound = true;
+ break;
+ }
}
+
+ // if overlapping alignment found (not EOF), increment counter
+ if ( alignmentFound) ++ alignmentCount;
}
-
- // -------------------------------
- // if refs & positions valid, retrieve data
+
+ // -----------------------------
+ // count alignments until stop hit
if ( !foundError ) {
-
- // has index, we can jump directly to
- if ( m_settings->HasBamIndexFilename ) {
-
- // this is kind of a hack...?
- // re-open BamReader, this time with the index file
- reader.Close();
- reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename);
-
- // attempt Jump(), catch error
- if ( !reader.Jump(startRefID, startPos) ) {
- foundError = true;
- errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl;
- }
- }
-
- else {
-
- // read through sequentially, until first overlapping read is found
- BamAlignment al;
- bool alignmentFound(false);
- while( reader.GetNextAlignment(al) ) {
- if ( (al.RefID == startRefID ) && ( (al.Position + al.Length) >= startPos) ) {
- alignmentFound = true;
- break;
- }
- }
-
- // if overlapping alignment found (not EOF), increment counter
- if ( alignmentFound) ++ alignmentCount;
- }
-
- // -----------------------------
- // count alignments until stop hit
-
- if ( !foundError ) {
- // while valid alignment AND
- // ( either current ref is before stopRefID OR
- // current ref stopRefID but we're before stopPos )
- BamAlignment al;
- while ( reader.GetNextAlignment(al) && ((al.RefID < stopRefID ) || (al.RefID == stopRefID && al.Position <= stopPos)) )
- ++alignmentCount;
- }
+ // while valid alignment AND
+ // ( either current ref is before stopRefID OR
+ // current ref stopRefID but we're before stopPos )
+ BamAlignment al;
+ while ( reader.GetNextAlignment(al) && ((al.RefID < region.StopChromID ) || (al.RefID == region.StopChromID && al.Position <= region.StopPosition)) )
+ ++alignmentCount;
}
- }
-
- // could not parse REGION string, set error
- else {
+
+ } else {
foundError = true;
- errorStream << "Could not parse desired REGION: " << m_settings->Region << endl;
- errorStream << "Please see README for details on accepted REGION formats" << endl;
+ errorStream << "Could not parse REGION: " << m_settings->Region << endl;
+ errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
}
}
// ***************************************************************************
#include <cstdlib>
-#include <iostream>
#include "bamtools_utilities.h"
+#include "BamReader.h"
using namespace std;
using namespace BamTools;
-// Parses a REGION string, stores in (startChrom, startPos, stopChrom, stopPos) variables
-// Returns successful parse (true/false)
-bool Utilities::ParseRegionString(const string& regionString, string& startChrom, int& startPos, string& stopChrom, int& stopPos) {
-
- // shouldn't call this function with empty string but worth checking
- // checked first for clarity purposes later on, since we can assume at least some content in the string
- if ( regionString.empty() ) {
- cerr << "Empty REGION. Usual format (e.g. chr2:1000..2000). See README for more detailed uses." << endl;
- return false;
- }
+// Parses a region string, does validation (valid ID's, positions), stores in Region struct
+bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region) {
+ // -------------------------------
+ // parse region string
+
+ // check first for empty string
+ if ( regionString.empty() )
+ return false;
+
// non-empty string, look for a colom
size_t foundFirstColon = regionString.find(':');
+ // store chrom strings, and numeric positions
+ string startChrom;
+ string stopChrom;
+ int startPos;
+ int stopPos;
+
// no colon found
// going to use entire contents of requested chromosome
// just store entire region string as startChrom name
startPos = 0;
stopChrom = regionString;
stopPos = -1;
- return true;
}
// colon found, so we at least have some sort of startPos requested
startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
stopChrom = startChrom;
stopPos = -1;
- return true;
}
// ".." found, so we have some sort of range selected
if ( foundSecondColon == string::npos ) {
stopChrom = startChrom;
stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
- return true;
}
// second colon found
else {
stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
- return true;
}
}
}
-
- // shouldn't get here - any code path that does?
- // if not, what does true/false really signify?
- return false;
-}
\ No newline at end of file
+
+ // -------------------------------
+ // validate reference IDs & genomic positions
+
+ const RefVector references = reader.GetReferenceData();
+
+ // if startRefID not found, return false
+ int startRefID = reader.GetReferenceID(startChrom);
+ if ( startRefID == (int)references.size() ) return false;
+
+ // if startPos is larger than reference, return false
+ const RefData& startReference = references.at(startRefID);
+ if ( startPos > startReference.RefLength ) return false;
+
+ // if stopRefID not found, return false
+ int stopRefID = reader.GetReferenceID(stopChrom);
+ if ( stopRefID == (int)references.size() ) return false;
+
+ // if stopPosition larger than reference, return false
+ const RefData& stopReference = references.at(stopRefID);
+ if ( stopPos > stopReference.RefLength ) return false;
+
+ // if no stopPosition specified, set to reference end
+ if ( stopPos == -1 )stopPos = stopReference.RefLength;
+
+ // -------------------------------
+ // set up Region struct & return
+
+ region.StartChromID = startRefID;
+ region.StopChromID = stopRefID;
+ region.StartPosition = startPos;
+ region.StopPosition = stopPos;
+ return true;
+}
+