// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 16 June 2010 (DB)\r
+// Last modified: 17 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
\r
// *** reading alignments and auxiliary data *** //\r
\r
- // calculate bins that overlap region ( left to reference end for now )\r
- int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);\r
+ // calculate bins that overlap region\r
+ int BinsFromRegion(uint16_t bins[MAX_BIN]);\r
// fills out character data for BamAlignment data\r
bool BuildCharData(BamAlignment& bAlignment);\r
- // calculate file offset for first alignment chunk overlapping 'left'\r
- int64_t GetOffset(int refID, int left);\r
+ // calculate file offset for first alignment chunk overlapping specified region\r
+ int64_t GetOffset(void);\r
// checks to see if alignment overlaps current region\r
bool IsOverlap(BamAlignment& bAlignment);\r
// retrieves header text from BAM file\r
Close();\r
}\r
\r
-// calculate bins that overlap region ( left to reference end for now )\r
-int BamReader::BamReaderPrivate::BinsFromRegion(int refID, int left, uint16_t list[MAX_BIN]) {\r
+// calculate bins that overlap region\r
+int BamReader::BamReaderPrivate::BinsFromRegion(uint16_t list[MAX_BIN]) {\r
\r
// get region boundaries\r
- uint32_t begin = (unsigned int)left;\r
- uint32_t end = (unsigned int)References.at(refID).RefLength - 1;\r
-\r
+ uint32_t begin = (unsigned int)Region.LeftPosition;\r
+ uint32_t end;\r
+ \r
+ // if right bound specified AND left&right bounds are on same reference\r
+ // OK to use right bound position\r
+ if ( IsRightBoundSpecified && ( Region.LeftRefID == Region.RightRefID ) )\r
+ end = (unsigned int)Region.RightPosition; // -1 ??\r
+ \r
+ // otherwise, use end of left bound reference as cutoff\r
+ else\r
+ end = (unsigned int)References.at(Region.LeftRefID).RefLength - 1;\r
+ \r
// initialize list, bin '0' always a valid bin\r
int i = 0;\r
list[i++] = 0;\r
return ok;\r
}\r
\r
-// returns RefID for given RefName (returns References.size() if not found)\r
-int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {\r
-\r
- // retrieve names from reference data\r
- vector<string> refNames;\r
- RefVector::const_iterator refIter = References.begin();\r
- RefVector::const_iterator refEnd = References.end();\r
- for ( ; refIter != refEnd; ++refIter) {\r
- refNames.push_back( (*refIter).RefName );\r
- }\r
-\r
- // return 'index-of' refName ( if not found, returns refNames.size() )\r
- return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));\r
-}\r
-\r
// get next alignment (from specified region, if given)\r
bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {\r
\r
return false;\r
}\r
\r
-// calculate closest indexed file offset for region specified\r
-int64_t BamReader::BamReaderPrivate::GetOffset(int refID, int left) {\r
+// calculate file offset for first alignment chunk overlapping specified region\r
+int64_t BamReader::BamReaderPrivate::GetOffset(void) {\r
\r
// calculate which bins overlap this region\r
uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);\r
- int numBins = BinsFromRegion(refID, left, bins);\r
+ int numBins = BinsFromRegion(bins);\r
\r
// get bins for this reference\r
- const ReferenceIndex& refIndex = Index.at(refID);\r
+ const ReferenceIndex& refIndex = Index.at(Region.LeftRefID);\r
const BamBinMap& binMap = refIndex.Bins;\r
\r
// get minimum offset to consider\r
const LinearOffsetVector& offsets = refIndex.Offsets;\r
- uint64_t minOffset = ( (unsigned int)(left>>BAM_LIDX_SHIFT) >= offsets.size() ) ? 0 : offsets.at(left>>BAM_LIDX_SHIFT);\r
+ uint64_t minOffset = ( (unsigned int)(Region.LeftPosition>>BAM_LIDX_SHIFT) >= offsets.size() ) ? 0 : offsets.at(Region.LeftPosition>>BAM_LIDX_SHIFT);\r
\r
// store offsets to beginning of alignment 'chunks'\r
std::vector<int64_t> chunkStarts;\r
\r
// store all alignment 'chunk' starts for bins in this region\r
for (int i = 0; i < numBins; ++i ) {\r
- uint16_t binKey = bins[i];\r
+ \r
+ const uint16_t binKey = bins[i];\r
\r
map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);\r
if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {\r
else { return *min_element(chunkStarts.begin(), chunkStarts.end()); }\r
}\r
\r
+// returns RefID for given RefName (returns References.size() if not found)\r
+int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {\r
+\r
+ // retrieve names from reference data\r
+ vector<string> refNames;\r
+ RefVector::const_iterator refIter = References.begin();\r
+ RefVector::const_iterator refEnd = References.end();\r
+ for ( ; refIter != refEnd; ++refIter) {\r
+ refNames.push_back( (*refIter).RefName );\r
+ }\r
+\r
+ // return 'index-of' refName ( if not found, returns refNames.size() )\r
+ return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));\r
+}\r
+\r
// saves BAM bin entry for index\r
void BamReader::BamReaderPrivate::InsertBinEntry(BamBinMap& binMap,\r
const uint32_t& saveBin,\r
if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {\r
\r
// calculate offset\r
- int64_t offset = GetOffset(Region.LeftRefID, Region.LeftPosition);\r
+ int64_t offset = GetOffset();\r
\r
// if in valid offset, return failure\r
// otherwise return success of seek operation\r