// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 8 June 2010 (DB)\r
+// Last modified: 16 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
bool IsBigEndian;\r
\r
// user-specified region values\r
+ BamRegion Region;\r
+ bool IsLeftBoundSpecified;\r
+ bool IsRightBoundSpecified;\r
+ \r
bool IsRegionSpecified;\r
int CurrentRefID;\r
int CurrentLeft;\r
bool Jump(int refID, int position = 0);\r
void Open(const string& filename, const string& indexFilename = "");\r
bool Rewind(void);\r
+ bool SetRegion(const BamRegion& region);\r
\r
// access alignment data\r
bool GetNextAlignment(BamAlignment& bAlignment);\r
\r
// file operations\r
void BamReader::Close(void) { d->Close(); }\r
-bool BamReader::Jump(int refID, int position) { return d->Jump(refID, position); }\r
+bool BamReader::Jump(int refID, int position) { \r
+ d->Region.LeftRefID = refID;\r
+ d->Region.LeftPosition = position;\r
+ d->IsLeftBoundSpecified = true;\r
+ d->IsRightBoundSpecified = false;\r
+ return d->Jump(refID, position); \r
+}\r
void BamReader::Open(const string& filename, const string& indexFilename) { d->Open(filename, indexFilename); }\r
bool BamReader::Rewind(void) { return d->Rewind(); }\r
+bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); }\r
+bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) {\r
+ return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );\r
+}\r
\r
// access alignment data\r
bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
BamReader::BamReaderPrivate::BamReaderPrivate(void)\r
: IsIndexLoaded(false)\r
, AlignmentsBeginOffset(0)\r
+ , IsLeftBoundSpecified(false)\r
+ , IsRightBoundSpecified(false)\r
, IsRegionSpecified(false)\r
, CurrentRefID(0)\r
, CurrentLeft(0)\r
mBGZF.Close();\r
ClearIndex();\r
HeaderText.clear();\r
+ IsLeftBoundSpecified = false;\r
+ IsRightBoundSpecified = false;\r
IsRegionSpecified = false;\r
}\r
\r
// get next alignment (from specified region, if given)\r
bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {\r
\r
- // if valid alignment available\r
- if ( LoadNextAlignment(bAlignment) ) {\r
-\r
- // if region not specified, return success\r
- if ( !IsRegionSpecified ) { \r
- bool ok = BuildCharData(bAlignment);\r
- return ok; \r
- }\r
-\r
- // load next alignment until region overlap is found\r
- while ( !IsOverlap(bAlignment) ) {\r
- // if no valid alignment available (likely EOF) return failure\r
- if ( !LoadNextAlignment(bAlignment) ) return false;\r
- }\r
-\r
- // return success (alignment found that overlaps region)\r
- bool ok = BuildCharData(bAlignment);\r
- return ok;\r
- }\r
-\r
- // no valid alignment\r
- else \r
+ // if valid alignment found, attempt to parse char data, and return success/failure\r
+ if ( GetNextAlignmentCore(bAlignment) )\r
+ return BuildCharData(bAlignment);\r
+ \r
+ // no valid alignment found\r
+ else\r
return false;\r
}\r
\r
if ( LoadNextAlignment(bAlignment) ) {\r
\r
// if region not specified, return success\r
- if ( !IsRegionSpecified ) return true;\r
+ if ( !IsLeftBoundSpecified ) return true;\r
\r
// load next alignment until region overlap is found\r
while ( !IsOverlap(bAlignment) ) {\r
}\r
}\r
\r
-// returns whether alignment overlaps currently specified region (refID, leftBound)\r
+// returns whether alignment overlaps currently specified region\r
+// this *internal* method should ONLY called when (at least) IsLeftBoundSpecified == true\r
bool BamReader::BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {\r
+ \r
+ // --------------------------------------------------\r
+ // check alignment start against right bound cutoff\r
+ \r
+ // if full region of interest was given\r
+ if ( IsRightBoundSpecified ) {\r
+ \r
+ // read starts on right bound reference, but AFTER right bound position\r
+ if ( bAlignment.RefID == Region.RightRefID && bAlignment.Position > Region.RightPosition )\r
+ return false;\r
+ \r
+ // if read starts on reference AFTER right bound, return false\r
+ if ( bAlignment.RefID > Region.RightRefID ) \r
+ return false;\r
+ }\r
+ \r
+ // --------------------------------------------------------\r
+ // no right bound given OR read starts before right bound\r
+ // so, check if it overlaps left bound \r
+ \r
+ // if read starts on left bound reference AND after left boundary, return success\r
+ if ( bAlignment.RefID == Region.LeftRefID && bAlignment.Position >= Region.LeftPosition)\r
+ return true;\r
+ \r
+ // if read is on any reference sequence before left bound, return false\r
+ if ( bAlignment.RefID < Region.LeftRefID )\r
+ return false;\r
\r
- // if on different reference sequence, quit\r
- if ( bAlignment.RefID != CurrentRefID ) { return false; }\r
-\r
- // read starts after left boundary\r
- if ( bAlignment.Position >= CurrentLeft) { return true; }\r
-\r
- // return whether alignment end overlaps left boundary\r
- return ( bAlignment.GetEndPosition() >= CurrentLeft );\r
+ // read is on left bound reference, but starts before left bound position\r
+ // calculate the alignment end to see if it overlaps\r
+ return ( bAlignment.GetEndPosition() >= Region.LeftPosition );\r
}\r
\r
// jumps to specified region(refID, leftBound) in BAM file, returns success/fail\r
// if data exists for this reference and position is valid \r
if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {\r
\r
- // set current region\r
- CurrentRefID = refID;\r
- CurrentLeft = position;\r
- IsRegionSpecified = true;\r
-\r
// calculate offset\r
- int64_t offset = GetOffset(CurrentRefID, CurrentLeft);\r
+ int64_t offset = GetOffset(Region.LeftRefID, Region.LeftPosition);\r
\r
// if in valid offset, return failure\r
- if ( offset == -1 ) { return false; }\r
-\r
// otherwise return success of seek operation\r
- else { return mBGZF.Seek(offset); }\r
+ if ( offset == -1 ) \r
+ return false;\r
+ else \r
+ return mBGZF.Seek(offset);\r
}\r
\r
// invalid jump request parameters, return failure\r
if ( References.at(refID).RefHasAlignments ) { break; }\r
}\r
\r
- // store default bounds for first alignment\r
- CurrentRefID = refID;\r
- CurrentLeft = 0;\r
- IsRegionSpecified = false;\r
+ // reset default region info\r
+ Region.LeftRefID = refID;\r
+ Region.LeftPosition = 0;\r
+ Region.RightRefID = -1;\r
+ Region.RightPosition = -1;\r
+ IsLeftBoundSpecified = false;\r
+ IsRightBoundSpecified = false;\r
\r
// return success/failure of seek\r
return mBGZF.Seek(AlignmentsBeginOffset);\r
}\r
\r
+// sets a region of interest (with left & right bound reference/position)\r
+// attempts a Jump() to left bound as well\r
+// returns success/failure of Jump()\r
+bool BamReader::BamReaderPrivate::SetRegion(const BamRegion& region) {\r
+ \r
+ // save region of interest\r
+ Region = region;\r
+ \r
+ // set flags\r
+ if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) \r
+ IsLeftBoundSpecified = true;\r
+ if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) \r
+ IsRightBoundSpecified = true;\r
+ \r
+ // attempt jump to beginning of region, return success/fail of Jump()\r
+ return Jump( Region.LeftRefID, Region.LeftPosition );\r
+}\r
+\r
// saves index data to BAM index file (".bai"), returns success/fail\r
bool BamReader::BamReaderPrivate::WriteIndex(void) {\r
\r