, RightRefID(rightID)\r
, RightPosition(rightPos)\r
{ }\r
+ \r
+ // member functions\r
+ void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }\r
+ bool isLeftBoundSpecified(void) const { return ( LeftRefID != -1 && LeftPosition != -1 ); }\r
+ bool isNull(void) const { return !( isLeftBoundSpecified() && isRightBoundSpecified() ); }\r
+ bool isRightBoundSpecified(void) const { return ( RightRefID != -1 && RightPosition != -1 ); }\r
};\r
\r
// ----------------------------------------------------------------\r
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 3 September 2010 (DB)
+// Last modified: 9 September 2010 (DB)
// ---------------------------------------------------------------------------
// Provides index functionality - both for the default (standardized) BAM
// index format (.bai) as well as a BamTools-specific (nonstandard) index
#include <cstdio>
#include <cstdlib>
#include <algorithm>
+#include <iostream>
#include <map>
#include "BamIndex.h"
#include "BamReader.h"
}
}
+bool BamStandardIndex::Jump(const BamRegion& region) {
+
+ cerr << "Jumping in BAI" << endl;
+
+ if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
+ fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
+ return false;
+ }
+
+ // see if left-bound reference of region has alignments
+ if ( !HasAlignments(region.LeftRefID) ) return false;
+
+ // make sure left-bound position is valid
+ if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength ) return false;
+
+ vector<int64_t> offsets;
+ if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets) ) {
+ fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ return false;
+ }
+
+ // iterate through offsets
+ BamAlignment bAlignment;
+ bool result = true;
+ for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
+
+ // attempt seek & load first available alignment
+ result &= m_BGZF->Seek(*o);
+ m_reader->GetNextAlignmentCore(bAlignment);
+
+ // if this alignment corresponds to desired position
+ // return success of seeking back to 'current offset'
+ if ( (bAlignment.RefID == region.LeftRefID && bAlignment.Position + bAlignment.Length > region.LeftPosition) || (bAlignment.RefID > region.LeftRefID) ) {
+ if ( o != offsets.begin() ) --o;
+ return m_BGZF->Seek(*o);
+ }
+ }
+
+ if ( !result ) fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ return result;
+}
+
bool BamStandardIndex::Load(const string& filename) {
// open index file, abort on error
// data members
int64_t Offset;
- int RefID;
- int Position;
+ int32_t RefID;
+ int32_t StartPosition;
// ctor
- BamToolsIndexEntry(const uint64_t& offset = 0,
- const int& id = -1,
- const int& position = -1)
+ BamToolsIndexEntry(const int64_t& offset = 0,
+ const int32_t& id = -1,
+ const int32_t& position = -1)
: Offset(offset)
, RefID(id)
- , Position(position)
+ , StartPosition(position)
{ }
};
// plow through alignments, store block offsets
int32_t currentBlockCount = 0;
int64_t blockStartOffset = m_BGZF->Tell();
- int blockStartId = -1;
- int blockStartPosition = -1;
+ int32_t blockStartId = -1;
+ int32_t blockStartPosition = -1;
BamAlignment al;
while ( m_reader->GetNextAlignmentCore(al) ) {
// clear any prior data
offsets.clear();
-
+
+
+/* bool found = false;
+ int64_t previousOffset = -1;
+ BamToolsIndexData::const_iterator indexIter = d->m_indexData.end() - 1;
+ BamToolsIndexData::const_iterator indexBegin = d->m_indexData.begin();
+ for ( ; indexIter >= indexBegin; --indexIter ) {
+
+ const BamToolsIndexEntry& entry = (*indexIter);
+
+ cerr << "Considering entry at " << entry.RefID << ":" << entry.Position << " => " << entry.Offset << endl;
+
+ if ( entry.RefID < region.LeftRefID) {
+ found = true;
+ break;
+ }
+
+ if ( (entry.RefID == region.LeftRefID) && (entry.Position < region.LeftPosition) ) {
+ found = true;
+ break;
+ }
+
+ previousOffset = entry.Offset;
+ }
+
+ if ( !found || previousOffset == -1 ) return false;
+
+ // store offset & return success
+ cerr << endl;
+ cerr << "Using offset: " << previousOffset << endl;
+ cerr << endl;
+
+ offsets.push_back(previousOffset);
+ return true;*/
+
+
+// cerr << "--------------------------------------------------" << endl;
+// cerr << "Looking for " << region.LeftRefID << ":" << region.LeftPosition << endl;
+// cerr << endl;
+
// calculate nearest index to jump to
int64_t previousOffset = -1;
BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
const BamToolsIndexEntry& entry = (*indexIter);
+// cerr << "Considering entry at " << entry.RefID << ":" << entry.StartPosition << " => " << entry.Offset << endl;
+
// check if we are 'past' beginning of desired region
// if so, we will break out & use previously stored offset
if ( entry.RefID > region.LeftRefID ) break;
- if ( (entry.RefID == region.LeftRefID) && (entry.Position > region.LeftPosition) ) break;
+ if ( (entry.RefID == region.LeftRefID) && (entry.StartPosition >= region.LeftPosition) ) break;
// not past desired region, so store current entry offset in previousOffset
previousOffset = entry.Offset;
}
- // no index was found
- if ( previousOffset == -1 ) return false;
+ // no index found
+ if ( indexIter == indexEnd ) return false;
+
+ // first offset matches (so previousOffset was never set)
+ if ( previousOffset == -1 ) previousOffset = (*indexIter).Offset;
// store offset & return success
+// cerr << endl;
+// cerr << "Using offset: " << previousOffset << endl;
+// cerr << endl;
offsets.push_back(previousOffset);
return true;
}
+bool BamToolsIndex::Jump(const BamRegion& region) {
+
+ if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
+ fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
+ return false;
+ }
+
+ // see if left-bound reference of region has alignments
+ if ( !HasAlignments(region.LeftRefID) ) return false;
+
+ // make sure left-bound position is valid
+ if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength ) return false;
+
+ vector<int64_t> offsets;
+ if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets) ) {
+ fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ return false;
+ }
+
+ // iterate through offsets
+ BamAlignment bAlignment;
+ bool result = true;
+ for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
+
+ // attempt seek & load first available alignment
+ result &= m_BGZF->Seek(*o);
+ m_reader->GetNextAlignmentCore(bAlignment);
+
+ // if this alignment corresponds to desired position
+ // return success of seeking back to 'current offset'
+ if ( (bAlignment.RefID == region.LeftRefID && bAlignment.Position + bAlignment.Length > region.LeftPosition) || (bAlignment.RefID > region.LeftRefID) ) {
+ if ( o != offsets.begin() ) --o;
+ return m_BGZF->Seek(*o);
+ }
+ }
+
+ if ( !result ) fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ return result;
+}
+
bool BamToolsIndex::Load(const string& filename) {
// open index file, abort on error
// iterate over index entries
for ( unsigned int i = 0; i < numOffsets; ++i ) {
- uint64_t offset;
- int id;
- int position;
+ int64_t offset;
+ int32_t id;
+ int32_t position;
// read in data
- elementsRead = fread(&offset, sizeof(offset), 1, indexStream);
- elementsRead = fread(&id, sizeof(id), 1, indexStream);
+ elementsRead = fread(&offset, sizeof(offset), 1, indexStream);
+ elementsRead = fread(&id, sizeof(id), 1, indexStream);
elementsRead = fread(&position, sizeof(position), 1, indexStream);
// swap endian-ness if necessary
// N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
bool BamToolsIndex::Write(const std::string& bamFilename) {
+ // open index file for writing
string indexFilename = bamFilename + ".bti";
FILE* indexStream = fopen(indexFilename.c_str(), "wb");
if ( indexStream == 0 ) {
const BamToolsIndexEntry& entry = (*indexIter);
// copy entry data
- uint64_t offset = entry.Offset;
- int id = entry.RefID;
- int position = entry.Position;
+ int64_t offset = entry.Offset;
+ int32_t id = entry.RefID;
+ int32_t position = entry.StartPosition;
// swap endian-ness if necessary
if ( m_isBigEndian ) {
}
// write the reference index entry
- fwrite(&offset, sizeof(offset), 1, indexStream);
- fwrite(&id, sizeof(id), 1, indexStream);
+ fwrite(&offset, sizeof(offset), 1, indexStream);
+ fwrite(&id, sizeof(id), 1, indexStream);
fwrite(&position, sizeof(position), 1, indexStream);
}
- // flush file buffer, close file, and return success
+ // flush any remaining output, close file, and return success
fflush(indexStream);
fclose(indexStream);
return true;
// returns supported file extension
virtual const std::string Extension(void) const =0;
// calculates offset(s) for a given region
- virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
+ //virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
+ virtual bool Jump(const BamTools::BamRegion& region) =0;
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID);
// loads existing data from file into memory
const std::string Extension(void) const { return std::string(".bai"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
+ bool Jump(const BamTools::BamRegion& region);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
const std::string Extension(void) const { return std::string(".bti"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
+ bool Jump(const BamTools::BamRegion& region);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 7 September 2010 (DB)\r
+// Last modified: 10 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
// general file data\r
BgzfData mBGZF;\r
string HeaderText;\r
- //BamIndex Index;\r
- BamIndex* NewIndex;\r
+ BamIndex* Index;\r
RefVector References;\r
bool IsIndexLoaded;\r
int64_t AlignmentsBeginOffset;\r
bool IsBigEndian;\r
\r
// user-specified region values\r
- BamRegion Region;\r
- bool IsLeftBoundSpecified;\r
- bool IsRightBoundSpecified;\r
- \r
- bool IsRegionSpecified;\r
+ BamRegion Region; \r
int CurrentRefID;\r
int CurrentLeft;\r
\r
\r
// file operations\r
void Close(void);\r
- bool Jump(int refID, int position);\r
bool Open(const std::string& filename, \r
const std::string& indexFilename, \r
const bool lookForIndex, \r
void BamReader::Close(void) { d->Close(); }\r
bool BamReader::IsIndexLoaded(void) const { return d->IsIndexLoaded; }\r
bool BamReader::IsOpen(void) const { return d->mBGZF.IsOpen; }\r
-bool BamReader::Jump(int refID, int position) \r
-{ \r
- d->Region.LeftRefID = refID;\r
- d->Region.LeftPosition = position;\r
- d->IsLeftBoundSpecified = true;\r
- d->IsRightBoundSpecified = false;\r
- return d->Jump(refID, position); \r
-}\r
+bool BamReader::Jump(int refID, int position) { return d->SetRegion( BamRegion(refID, position) ); }\r
bool BamReader::Open(const std::string& filename, \r
const std::string& indexFilename, \r
const bool lookForIndex, \r
\r
// constructor\r
BamReader::BamReaderPrivate::BamReaderPrivate(BamReader* parent)\r
- : NewIndex(0)\r
+ : Index(0)\r
, IsIndexLoaded(false)\r
, AlignmentsBeginOffset(0)\r
- , IsLeftBoundSpecified(false)\r
- , IsRightBoundSpecified(false)\r
- , IsRegionSpecified(false)\r
, CurrentRefID(0)\r
, CurrentLeft(0)\r
, Parent(parent)\r
\r
// clear index data structure\r
void BamReader::BamReaderPrivate::ClearIndex(void) {\r
- delete NewIndex;\r
- NewIndex = 0;\r
+ delete Index;\r
+ Index = 0;\r
IsIndexLoaded = false;\r
}\r
\r
HeaderText.clear();\r
\r
// clear out region flags\r
- IsLeftBoundSpecified = false;\r
- IsRightBoundSpecified = false;\r
- IsRegionSpecified = false;\r
+ Region.clear();\r
}\r
\r
// creates index for BAM file, saves to file\r
\r
// create index based on type requested\r
if ( useStandardIndex ) \r
- NewIndex = new BamStandardIndex(&mBGZF, Parent, IsBigEndian);\r
+ Index = new BamStandardIndex(&mBGZF, Parent, IsBigEndian);\r
// create BamTools 'custom' index\r
else\r
- NewIndex = new BamToolsIndex(&mBGZF, Parent, IsBigEndian);\r
+ Index = new BamToolsIndex(&mBGZF, Parent, IsBigEndian);\r
\r
// build new index\r
bool ok = true;\r
- ok &= NewIndex->Build();\r
+ ok &= Index->Build();\r
IsIndexLoaded = ok;\r
\r
// attempt to save index data to file\r
- ok &= NewIndex->Write(Filename); \r
+ ok &= Index->Write(Filename); \r
\r
// return success/fail of both building & writing index\r
return ok;\r
bAlignment.SupportData.HasCoreOnly = true;\r
\r
// if region not specified, return success\r
- if ( !IsLeftBoundSpecified ) return true;\r
+ if ( !Region.isLeftBoundSpecified() ) return true;\r
\r
// determine region state (before, within, after)\r
BamReader::BamReaderPrivate::RegionState state = IsOverlap(bAlignment);\r
// check alignment start against right bound cutoff\r
\r
// if full region of interest was given\r
- if ( IsRightBoundSpecified ) {\r
+ if ( Region.isRightBoundSpecified() ) {\r
\r
// read starts on right bound reference, but AFTER right bound position\r
if ( bAlignment.RefID == Region.RightRefID && bAlignment.Position > Region.RightPosition )\r
return BEFORE_REGION;\r
}\r
\r
-// jumps to specified region(refID, leftBound) in BAM file, returns success/fail\r
-bool BamReader::BamReaderPrivate::Jump(int refID, int position) {\r
-\r
- // -----------------------------------------------------------------------\r
- // check for existing index \r
- if ( !IsIndexLoaded || NewIndex == 0 ) return false; \r
- // see if reference has alignments\r
- if ( !NewIndex->HasAlignments(refID) ) return false; \r
- // make sure position is valid\r
- if ( position > References.at(refID).RefLength ) return false;\r
- \r
- // determine possible offsets\r
- vector<int64_t> offsets;\r
- if ( !NewIndex->GetOffsets(Region, IsRightBoundSpecified, offsets) ) {\r
- fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");\r
- return false;\r
- }\r
- \r
- // iterate through offsets\r
- BamAlignment bAlignment;\r
- bool result = true;\r
- for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {\r
- \r
- // attempt seek & load first available alignment\r
- result &= mBGZF.Seek(*o);\r
- LoadNextAlignment(bAlignment);\r
- \r
- // if this alignment corresponds to desired position\r
- // return success of seeking back to 'current offset'\r
- if ( (bAlignment.RefID == refID && bAlignment.Position + bAlignment.Length > position) || (bAlignment.RefID > refID) ) {\r
- if ( o != offsets.begin() ) --o;\r
- return mBGZF.Seek(*o);\r
- }\r
- }\r
- \r
- return result;\r
-}\r
-\r
// load BAM header data\r
void BamReader::BamReaderPrivate::LoadHeaderData(void) {\r
\r
\r
// attempt to load BamIndex based on current Filename provided & preferStandardIndex flag\r
const BamIndex::PreferredIndexType type = (preferStandardIndex ? BamIndex::STANDARD : BamIndex::BAMTOOLS);\r
- NewIndex = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, IsBigEndian, type);\r
+ Index = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, IsBigEndian, type);\r
\r
// if null, return failure\r
- if ( NewIndex == 0 ) return false;\r
+ if ( Index == 0 ) return false;\r
\r
// generate proper IndexFilename based on type of index created\r
- IndexFilename = Filename + NewIndex->Extension();\r
+ IndexFilename = Filename + Index->Extension();\r
}\r
\r
else {\r
// attempt to load BamIndex based on IndexFilename provided by client\r
- NewIndex = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent, IsBigEndian);\r
+ Index = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent, IsBigEndian);\r
\r
// if null, return failure\r
- if ( NewIndex == 0 ) return false; \r
+ if ( Index == 0 ) return false; \r
}\r
\r
// an index file was found\r
// return success of loading the index data from file\r
- IsIndexLoaded = NewIndex->Load(IndexFilename);\r
+ IsIndexLoaded = Index->Load(IndexFilename);\r
return IsIndexLoaded;\r
}\r
\r
// returns BAM file pointer to beginning of alignment data\r
bool BamReader::BamReaderPrivate::Rewind(void) {\r
\r
- // rewind to first alignment\r
+ // rewind to first alignment, return false if unable to seek\r
if ( !mBGZF.Seek(AlignmentsBeginOffset) ) return false;\r
\r
- // retrieve first alignment data\r
+ // retrieve first alignment data, return false if unable to read\r
BamAlignment al;\r
if ( !LoadNextAlignment(al) ) return false;\r
\r
// reset default region info using first alignment in file\r
- Region.LeftRefID = al.RefID;\r
- Region.LeftPosition = al.Position;\r
- Region.RightRefID = -1;\r
- Region.RightPosition = -1;\r
- IsLeftBoundSpecified = false;\r
- IsRightBoundSpecified = false; \r
-\r
- // rewind back to before first alignment\r
+ Region.clear();\r
+ Region.LeftRefID = al.RefID;\r
+ Region.LeftPosition = al.Position;\r
+\r
+ // rewind back to beginning of first alignment\r
// return success/fail of seek\r
return mBGZF.Seek(AlignmentsBeginOffset);\r
}\r
\r
-// sets a region of interest (with left & right bound reference/position)\r
-// attempts a Jump() to left bound as well\r
-// returns success/failure of Jump()\r
+// asks Index to attempt a Jump() to specified region\r
+// returns success/failure\r
bool BamReader::BamReaderPrivate::SetRegion(const BamRegion& region) {\r
+ \r
+ // clear out any prior BamReader region data\r
+ //\r
+ // N.B. - this is cleared so that BamIndex now has free reign to call\r
+ // GetNextAlignmentCore() and do overlap checking without worrying about BamReader \r
+ // performing any overlap checking of its own and moving on to the next read... Calls \r
+ // to GetNextAlignmentCore() with no Region set, simply return the next alignment.\r
+ // This ensures that the Index is able to do just that. (All without exposing \r
+ // LoadNextAlignment() to the public API, and potentially confusing clients with the nomenclature)\r
+ Region.clear();\r
+ \r
+ // check for existing index \r
+ if ( !IsIndexLoaded || Index == 0 ) return false; \r
\r
- // save region of interest\r
+ // attempt jump to user-specified region, return false if failed\r
+ if ( !Index->Jump(region) ) return false;\r
+ \r
+ // if successful, save region data locally, return success\r
Region = region;\r
- \r
- // set flags\r
- if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) IsLeftBoundSpecified = true;\r
- if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) IsRightBoundSpecified = true;\r
- \r
- // attempt jump to beginning of region, return success/fail of Jump()\r
- return Jump( Region.LeftRefID, Region.LeftPosition );\r
+ return true;\r
}\r