#include <iterator>\r
#include <string>\r
#include <vector>\r
+#include <iostream>\r
\r
// BamTools includes\r
#include "BGZF.h"\r
\r
struct BamReader::BamReaderPrivate {\r
\r
+ // -------------------------------\r
+ // structs, enums, typedefs\r
+ // -------------------------------\r
+ enum RegionState { BEFORE_REGION = 0\r
+ , WITHIN_REGION\r
+ , AFTER_REGION\r
+ };\r
+\r
// -------------------------------\r
// data members\r
// -------------------------------\r
// fills out character data for BamAlignment data\r
bool BuildCharData(BamAlignment& bAlignment);\r
// calculate file offset for first alignment chunk overlapping specified region\r
- int64_t GetOffset(void);\r
+ int64_t GetOffset(std::vector<int64_t>& chunkStarts);\r
// checks to see if alignment overlaps current region\r
- bool IsOverlap(BamAlignment& bAlignment);\r
+ RegionState IsOverlap(BamAlignment& bAlignment);\r
// retrieves header text from BAM file\r
void LoadHeaderData(void);\r
// retrieves BAM alignment under file pointer\r
// if region not specified, return success\r
if ( !IsLeftBoundSpecified ) return true;\r
\r
- // load next alignment until region overlap is found\r
- while ( !IsOverlap(bAlignment) ) {\r
+ // determine region state (before, within, after)\r
+ BamReader::BamReaderPrivate::RegionState state = IsOverlap(bAlignment);\r
+ \r
+ // if alignment lies after region, return false\r
+ if ( state == AFTER_REGION ) \r
+ return false;\r
+\r
+ while ( state != WITHIN_REGION ) {\r
// if no valid alignment available (likely EOF) return failure\r
if ( !LoadNextAlignment(bAlignment) ) return false;\r
+ // if alignment lies after region, return false (no available read within region)\r
+ state = IsOverlap(bAlignment);\r
+ if ( state == AFTER_REGION) return false;\r
+ \r
}\r
\r
// return success (alignment found that overlaps region)\r
}\r
\r
// calculate file offset for first alignment chunk overlapping specified region\r
-int64_t BamReader::BamReaderPrivate::GetOffset(void) {\r
+int64_t BamReader::BamReaderPrivate::GetOffset(std::vector<int64_t>& chunkStarts) {\r
\r
// calculate which bins overlap this region\r
uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);\r
uint64_t minOffset = ( (unsigned int)(Region.LeftPosition>>BAM_LIDX_SHIFT) >= offsets.size() ) ? 0 : offsets.at(Region.LeftPosition>>BAM_LIDX_SHIFT);\r
\r
// store offsets to beginning of alignment 'chunks'\r
- std::vector<int64_t> chunkStarts;\r
+ //std::vector<int64_t> chunkStarts;\r
\r
// store all alignment 'chunk' starts for bins in this region\r
for (int i = 0; i < numBins; ++i ) {\r
\r
const uint16_t binKey = bins[i];\r
-\r
map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);\r
if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {\r
\r
}\r
}\r
\r
-// returns whether alignment overlaps currently specified region\r
+// returns region state - whether alignment ends before, overlaps, or starts after currently specified region\r
// this *internal* method should ONLY called when (at least) IsLeftBoundSpecified == true\r
-bool BamReader::BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {\r
+BamReader::BamReaderPrivate::RegionState BamReader::BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {\r
\r
// --------------------------------------------------\r
// check alignment start against right bound cutoff\r
\r
// read starts on right bound reference, but AFTER right bound position\r
if ( bAlignment.RefID == Region.RightRefID && bAlignment.Position > Region.RightPosition )\r
- return false;\r
+ return AFTER_REGION;\r
\r
// if read starts on reference AFTER right bound, return false\r
if ( bAlignment.RefID > Region.RightRefID ) \r
- return false;\r
+ return AFTER_REGION;\r
}\r
\r
// --------------------------------------------------------\r
\r
// if read starts on left bound reference AND after left boundary, return success\r
if ( bAlignment.RefID == Region.LeftRefID && bAlignment.Position >= Region.LeftPosition)\r
- return true;\r
+ return WITHIN_REGION;\r
\r
// if read is on any reference sequence before left bound, return false\r
if ( bAlignment.RefID < Region.LeftRefID )\r
- return false;\r
+ return BEFORE_REGION;\r
\r
+ // --------------------------------------------------------\r
// read is on left bound reference, but starts before left bound position\r
- // calculate the alignment end to see if it overlaps\r
- return ( bAlignment.GetEndPosition() >= Region.LeftPosition );\r
+\r
+ // if it overlaps, return WITHIN_REGION\r
+ if ( bAlignment.GetEndPosition() >= Region.LeftPosition )\r
+ return WITHIN_REGION;\r
+ // else begins before left bound position\r
+ else\r
+ return BEFORE_REGION;\r
}\r
\r
// jumps to specified region(refID, leftBound) in BAM file, returns success/fail\r
if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {\r
\r
// calculate offset\r
- int64_t offset = GetOffset();\r
+ std::vector<int64_t> chunkStarts;\r
+ int64_t offset = GetOffset(chunkStarts);\r
+ sort(chunkStarts.begin(), chunkStarts.end());\r
\r
// if in valid offset, return failure\r
// otherwise return success of seek operation\r
- if ( offset == -1 ) \r
+ if ( offset == -1 ) {\r
return false;\r
- else \r
- return mBGZF.Seek(offset);\r
+ } else {\r
+ //return mBGZF.Seek(offset);\r
+ BamAlignment bAlignment;\r
+ bool result = true;\r
+ for (std::vector<int64_t>::const_iterator o = chunkStarts.begin(); o != chunkStarts.end(); ++o) {\r
+ // std::cerr << *o << endl;\r
+ // std::cerr << "Seeking to offset: " << *o << endl;\r
+ result &= mBGZF.Seek(*o);\r
+ LoadNextAlignment(bAlignment);\r
+ // std::cerr << "alignment: " << bAlignment.RefID \r
+ // << ":" << bAlignment.Position << ".." << bAlignment.Position + bAlignment.Length << endl;\r
+ if ((bAlignment.RefID == refID && bAlignment.Position + bAlignment.Length > position) || bAlignment.RefID > refID) {\r
+ // std::cerr << "here i am" << endl;\r
+ // std::cerr << "seeking to offset: " << (*(o-1)) << endl;\r
+ // std::cerr << "*** Finished jumping ***" << endl;\r
+ return mBGZF.Seek(*o);\r
+ }\r
+ }\r
+\r
+ //std::cerr << "*** Finished jumping ***" << endl;\r
+ return result;\r
+ }\r
}\r
\r
// invalid jump request parameters, return failure\r