// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 16 September 2010 (DB)
+// Last modified: 18 September 2010 (DB)
// ---------------------------------------------------------------------------
// Provides index functionality - both for the default (standardized) BAM
// index format (.bai) as well as a BamTools-specific (nonstandard) index
// creates index data (in-memory) from current reader data
bool Build(void);
// attempts to use index to jump to region; returns success/fail
- bool Jump(const BamTools::BamRegion& region);
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
}
}
-bool BamStandardIndex::BamStandardIndexPrivate::Jump(const BamRegion& region) {
+bool BamStandardIndex::BamStandardIndexPrivate::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
// localize parent data
if ( m_parent == 0 ) return false;
// make sure left-bound position is valid
if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) return false;
+ // calculate offsets for this region
+ // if failed, print message, set flag, and return failure
vector<int64_t> offsets;
if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets) ) {
fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ *hasAlignmentsInRegion = false;
return false;
}
for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
// attempt seek & load first available alignment
+ // set flag to true if data exists
result &= mBGZF->Seek(*o);
- reader->GetNextAlignmentCore(bAlignment);
+ *hasAlignmentsInRegion = reader->GetNextAlignmentCore(bAlignment);
// if this alignment corresponds to desired position
- // return success of seeking back to 'current offset'
+ // return success of seeking back to the offset before the 'current offset' (to cover overlaps)
if ( (bAlignment.RefID == region.LeftRefID && bAlignment.Position + bAlignment.Length > region.LeftPosition) || (bAlignment.RefID > region.LeftRefID) ) {
if ( o != offsets.begin() ) --o;
return mBGZF->Seek(*o);
}
}
- if ( !result ) fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ // if error in jumping, print message & set flag
+ if ( !result ) {
+ fprintf(stderr, "ERROR: Could not jump: unable to calculate offset for specified region.\n");
+ *hasAlignmentsInRegion = false;
+ }
+
+ // return success/failure
return result;
}
bool BamStandardIndex::Build(void) { return d->Build(); }
// attempts to use index to jump to region; returns success/fail
-bool BamStandardIndex::Jump(const BamRegion& region) { return d->Jump(region); }
+bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) { return d->Jump(region, hasAlignmentsInRegion); }
// loads existing data from file into memory
bool BamStandardIndex::Load(const string& filename) { return d->Load(filename); }
// do something new
// else
// do the old thing
- enum Version { BTI_1_0 = 1 };
+ enum Version { BTI_1_0 = 1
+ , BTI_1_1
+ };
// -------------------------
// data members
BamToolsIndexPrivate(BamToolsIndex* parent)
: m_parent(parent)
, m_blockSize(1000)
- , m_version(BTI_1_0) // latest version - used for writing new index files
+ , m_version(BTI_1_1) // latest version - used for writing new index files
{ }
~BamToolsIndexPrivate(void) { }
// returns supported file extension
const std::string Extension(void) const { return std::string(".bti"); }
// attempts to use index to jump to region; returns success/fail
- bool Jump(const BamTools::BamRegion& region);
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
// internal methods
// calculates offset for a given region
- bool GetOffset(const BamRegion& region, int64_t& offset);
+ bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
};
bool BamToolsIndex::BamToolsIndexPrivate::Build(void) {
currentAlignmentOffset = mBGZF->Tell();
}
+ // store final block
+ m_indexData[blockRefId].push_back( BamToolsIndexEntry(blockMaxEndPosition, blockStartOffset, blockStartPosition) );
+
// attempt to rewind back to begnning of alignments
// return success/fail
return reader->Rewind();
}
// N.B. - ignores isRightBoundSpecified
-bool BamToolsIndex::BamToolsIndexPrivate::GetOffset(const BamRegion& region, int64_t& offset) {
+bool BamToolsIndex::BamToolsIndexPrivate::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
// return false if leftBound refID is not found in index data
if ( m_indexData.find(region.LeftRefID) == m_indexData.end() ) return false;
// save first offset
offset = (*referenceOffsets.begin()).StartOffset;
+
+ // iterate over offsets entries on this reference
vector<BamToolsIndexEntry>::const_iterator indexIter = referenceOffsets.begin();
vector<BamToolsIndexEntry>::const_iterator indexEnd = referenceOffsets.end();
for ( ; indexIter != indexEnd; ++indexIter ) {
const BamToolsIndexEntry& entry = (*indexIter);
+ // break if alignment 'entry' overlaps region
if ( entry.MaxEndPosition >= region.LeftPosition ) break;
offset = (*indexIter).StartOffset;
}
- // no index found
- if ( indexIter == indexEnd ) return false;
-
- // return succes
+ // set flag based on whether an index entry was found for this region
+ *hasAlignmentsInRegion = ( indexIter != indexEnd );
return true;
}
-bool BamToolsIndex::BamToolsIndexPrivate::Jump(const BamRegion& region) {
+bool BamToolsIndex::BamToolsIndexPrivate::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+ // clear flag
+ *hasAlignmentsInRegion = false;
// localize parent data
if ( m_parent == 0 ) return false;
BgzfData* mBGZF = m_parent->m_BGZF;
RefVector& references = m_parent->m_references;
+ // check valid BamReader state
if ( reader == 0 || mBGZF == 0 || !reader->IsOpen() ) {
fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
return false;
// calculate nearest offset to jump to
int64_t offset;
- if ( !GetOffset(region, offset) ) {
+ if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
return false;
}
return false;
}
+ if ( (Version)indexVersion < BTI_1_1 ) {
+ fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
+ fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
+ fclose(indexStream);
+ exit(1);
+ }
+
// read in block size
elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, indexStream);
if ( isBigEndian ) SwapEndian_32(m_blockSize);
bool BamToolsIndex::Build(void) { return d->Build(); }
// attempts to use index to jump to region; returns success/fail
-bool BamToolsIndex::Jump(const BamRegion& region) { return d->Jump(region); }
+// a "successful" jump indicates no error, but not whether this region has data
+// * thus, the method sets a flag to indicate whether there are alignments
+// available after the jump position
+bool BamToolsIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) { return d->Jump(region, hasAlignmentsInRegion); }
// loads existing data from file into memory
bool BamToolsIndex::Load(const string& filename) { return d->Load(filename); }
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 10 September 2010 (DB)
+// Last modified: 17 September 2010 (DB)
// ---------------------------------------------------------------------------
// Provides index functionality - both for the standardized BAM index format
// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
// returns supported file extension
virtual const std::string Extension(void) const =0;
// attempts to use index to jump to region; returns success/fail
- virtual bool Jump(const BamTools::BamRegion& region) =0;
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID);
// loads existing data from file into memory
// returns supported file extension
const std::string Extension(void) const { return std::string(".bai"); }
// attempts to use index to jump to region; returns success/fail
- bool Jump(const BamTools::BamRegion& region);
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
// returns supported file extension
const std::string Extension(void) const { return std::string(".bti"); }
// attempts to use index to jump to region; returns success/fail
- bool Jump(const BamTools::BamRegion& region);
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
// loads existing data from file into memory
bool Load(const std::string& filename);
// writes in-memory index data out to file
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 10 September 2010 (DB)\r
+// Last modified: 17 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
#include <string>\r
#include <vector>\r
#include <iostream>\r
-\r
-// BamTools includes\r
#include "BGZF.h"\r
#include "BamReader.h"\r
#include "BamIndex.h"\r
bool IsBigEndian;\r
\r
// user-specified region values\r
- BamRegion Region; \r
- int CurrentRefID;\r
- int CurrentLeft;\r
+ BamRegion Region;\r
+ bool HasAlignmentsInRegion;\r
\r
// parent BamReader\r
BamReader* Parent;\r
: Index(0)\r
, IsIndexLoaded(false)\r
, AlignmentsBeginOffset(0)\r
- , CurrentRefID(0)\r
- , CurrentLeft(0)\r
, Parent(parent)\r
, DNA_LOOKUP("=ACMGRSVTWYHKDBN")\r
, CIGAR_LOOKUP("MIDNSHP")\r
// useful for operations requiring ONLY positional or other alignment-related information\r
bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {\r
\r
+ // if region is set but has no alignments\r
+ if ( !Region.isNull() && !HasAlignmentsInRegion ) \r
+ return false;\r
+ \r
// if valid alignment available\r
if ( LoadNextAlignment(bAlignment) ) {\r
\r
Region.clear();\r
Region.LeftRefID = al.RefID;\r
Region.LeftPosition = al.Position;\r
+ HasAlignmentsInRegion = true;\r
\r
// rewind back to beginning of first alignment\r
// return success/fail of seek\r
// check for existing index \r
if ( !IsIndexLoaded || Index == 0 ) return false; \r
\r
- // attempt jump to user-specified region, return false if failed\r
- if ( !Index->Jump(region) ) return false;\r
+ // attempt jump to user-specified region return false if jump could not be performed at all\r
+ // (invalid index, unknown reference, etc)\r
+ //\r
+ // Index::Jump() is allowed to modify the HasAlignmentsInRegion flag\r
+ // * This covers case where a region is requested that lies beyond the last alignment on a reference\r
+ // If this occurs, any subsequent calls to GetNexAlignment[Core] simply return false\r
+ // BamMultiReader is then able to successfully pull alignments from a region from multiple files\r
+ // even if one or more have no data.\r
+ if ( !Index->Jump(region, &HasAlignmentsInRegion) ) return false;\r
\r
- // if successful, save region data locally, return success\r
+ // if jump successful, save region data & return success\r
Region = region;\r
return true;\r
}\r