// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 6 April 2010 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
using namespace BamTools;\r
using namespace std;\r
\r
-namespace BamTools {\r
- struct BamAlignmentSupportData {\r
- string AllCharData;\r
- uint32_t BlockLength;\r
- uint32_t NumCigarOperations;\r
- uint32_t QueryNameLength;\r
- uint32_t QuerySequenceLength;\r
- };\r
-} // namespace BamTools\r
-\r
struct BamReader::BamReaderPrivate {\r
\r
// -------------------------------\r
// data members\r
// -------------------------------\r
\r
- // general data\r
+ // general file data\r
BgzfData mBGZF;\r
string HeaderText;\r
BamIndex Index;\r
string Filename;\r
string IndexFilename;\r
\r
+ // system data\r
bool IsBigEndian;\r
\r
// user-specified region values\r
// "public" interface\r
// -------------------------------\r
\r
- // flie operations\r
+ // file operations\r
void Close(void);\r
bool Jump(int refID, int position = 0);\r
void Open(const string& filename, const string& indexFilename = "");\r
\r
// access alignment data\r
bool GetNextAlignment(BamAlignment& bAlignment);\r
+ bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
\r
// access auxiliary data\r
int GetReferenceID(const string& refName) const;\r
int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);\r
// fills out character data for BamAlignment data\r
bool BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData);\r
- // calculates alignment end position based on starting position and provided CIGAR operations\r
- int CalculateAlignmentEnd(const int& position, const std::vector<CigarOp>& cigarData);\r
// calculate file offset for first alignment chunk overlapping 'left'\r
int64_t GetOffset(int refID, int left);\r
// checks to see if alignment overlaps current region\r
\r
// access alignment data\r
bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }\r
\r
// access auxiliary data\r
const string BamReader::GetHeaderText(void) const { return d->HeaderText; }\r
int BamReader::GetReferenceCount(void) const { return d->References.size(); }\r
const RefVector BamReader::GetReferenceData(void) const { return d->References; }\r
int BamReader::GetReferenceID(const string& refName) const { return d->GetReferenceID(refName); }\r
+const std::string BamReader::GetFilename(void) const { return d->Filename; }\r
\r
// index operations\r
bool BamReader::CreateIndex(void) { return d->CreateIndex(); }\r
return Rewind();\r
}\r
\r
-// calculates alignment end position based on starting position and provided CIGAR operations\r
-int BamReader::BamReaderPrivate::CalculateAlignmentEnd(const int& position, const vector<CigarOp>& cigarData) {\r
-\r
- // initialize alignment end to starting position\r
- int alignEnd = position;\r
-\r
- // iterate over cigar operations\r
- vector<CigarOp>::const_iterator cigarIter = cigarData.begin();\r
- vector<CigarOp>::const_iterator cigarEnd = cigarData.end();\r
- for ( ; cigarIter != cigarEnd; ++cigarIter) {\r
- char cigarType = (*cigarIter).Type;\r
- if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
- alignEnd += (*cigarIter).Length;\r
- }\r
- }\r
- return alignEnd;\r
-}\r
-\r
\r
// clear index data structure\r
void BamReader::BamReaderPrivate::ClearIndex(void) {\r
// load next alignment until region overlap is found\r
while ( !IsOverlap(bAlignment) ) {\r
// if no valid alignment available (likely EOF) return failure\r
- if ( !LoadNextAlignment(bAlignment, supportData) ) { return false; }\r
+ if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
}\r
\r
// return success (alignment found that overlaps region)\r
}\r
\r
// no valid alignment\r
- else { return false; }\r
+ else \r
+ return false;\r
+}\r
+\r
+// retrieves next available alignment core data (returns success/fail)\r
+// ** DOES NOT parse any character data (bases, qualities, tag data)\r
+// these can be accessed, if necessary, from the supportData \r
+// useful for operations requiring ONLY positional or other alignment-related information\r
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+\r
+ // if valid alignment available\r
+ if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+\r
+ // if region not specified, return success\r
+ if ( !IsRegionSpecified ) return true;\r
+\r
+ // load next alignment until region overlap is found\r
+ while ( !IsOverlap(bAlignment) ) {\r
+ // if no valid alignment available (likely EOF) return failure\r
+ if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+ }\r
+\r
+ // return success (alignment found that overlaps region)\r
+ return true;\r
+ }\r
+\r
+ // no valid alignment\r
+ else\r
+ return false;\r
}\r
\r
// calculate closest indexed file offset for region specified\r
{\r
// get converted offsets\r
int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;\r
- int endOffset = ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) - 1) >> BAM_LIDX_SHIFT;\r
+ int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;\r
\r
// resize vector if necessary\r
int oldSize = offsets.size();\r
if ( bAlignment.Position >= CurrentLeft) { return true; }\r
\r
// return whether alignment end overlaps left boundary\r
- return ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) >= CurrentLeft );\r
+ return ( bAlignment.GetEndPosition() >= CurrentLeft );\r
}\r
\r
// jumps to specified region(refID, leftBound) in BAM file, returns success/fail\r
else {\r
\r
// store alignment name and length\r
-// bAlignment.Name.clear();\r
bAlignment.Name.assign((const char*)(allCharData));\r
bAlignment.Length = supportData.QuerySequenceLength;\r
\r
// store remaining 'allCharData' in supportData structure\r
-// supportData.AllCharData.clear();\r
supportData.AllCharData.assign((const char*)allCharData, dataLength);\r
\r
// save CigarOps for BamAlignment\r