// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 22 November 2010 (DB)
+// Last modified: 13 January 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the standardized BAM index format (".bai")
// ***************************************************************************
// if right bound specified AND left&right bounds are on same reference
// OK to use right bound position
if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
- end = (unsigned int)region.RightPosition;
+ end = (unsigned int)region.RightPosition;
// otherwise, use end of left bound reference as cutoff
else
- end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1;
+ end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1;
// initialize list, bin '0' always a valid bin
int i = 0;
// be sure reader & BGZF file are valid & open for reading
if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
- return false;
+ return false;
// move file pointer to beginning of alignments
m_reader->Rewind();
BamAlignment bAlignment;
while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
- // change of chromosome, save ID, reset bin
- if ( lastRefID != bAlignment.RefID ) {
- lastRefID = bAlignment.RefID;
- lastBin = defaultValue;
- }
-
- // if lastCoordinate greater than BAM position - file not sorted properly
- else if ( lastCoordinate > bAlignment.Position ) {
- fprintf(stderr, "BAM file not properly sorted:\n");
- fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(),
- lastCoordinate, bAlignment.Position, bAlignment.RefID);
- exit(1);
- }
-
- // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
- if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
-
- // save linear offset entry (matched to BAM entry refID)
- BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID);
- if ( indexIter == m_indexData.end() ) return false; // error
- ReferenceIndex& refIndex = (*indexIter).second;
- LinearOffsetVector& offsets = refIndex.Offsets;
- SaveLinearOffset(offsets, bAlignment, lastOffset);
- }
-
- // if current BamAlignment bin != lastBin, "then possibly write the binning index"
- if ( bAlignment.Bin != lastBin ) {
-
- // if not first time through
- if ( saveBin != defaultValue ) {
-
- // save Bam bin entry
- BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
- if ( indexIter == m_indexData.end() ) return false; // error
- ReferenceIndex& refIndex = (*indexIter).second;
- BamBinMap& binMap = refIndex.Bins;
- SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
- }
-
- // update saveOffset
- saveOffset = lastOffset;
-
- // update bin values
- saveBin = bAlignment.Bin;
- lastBin = bAlignment.Bin;
-
- // update saveRefID
- saveRefID = bAlignment.RefID;
-
- // if invalid RefID, break out
- if ( saveRefID < 0 ) break;
- }
-
- // make sure that current file pointer is beyond lastOffset
- if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
- fprintf(stderr, "Error in BGZF offsets.\n");
- exit(1);
- }
-
- // update lastOffset
- lastOffset = m_BGZF->Tell();
-
- // update lastCoordinate
- lastCoordinate = bAlignment.Position;
+ // change of chromosome, save ID, reset bin
+ if ( lastRefID != bAlignment.RefID ) {
+ lastRefID = bAlignment.RefID;
+ lastBin = defaultValue;
+ }
+
+ // if lastCoordinate greater than BAM position - file not sorted properly
+ else if ( lastCoordinate > bAlignment.Position ) {
+ fprintf(stderr, "BAM file not properly sorted:\n");
+ fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(),
+ lastCoordinate, bAlignment.Position, bAlignment.RefID);
+ exit(1);
+ }
+
+ // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
+ if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
+
+ // save linear offset entry (matched to BAM entry refID)
+ BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID);
+ if ( indexIter == m_indexData.end() ) return false; // error
+ ReferenceIndex& refIndex = (*indexIter).second;
+ LinearOffsetVector& offsets = refIndex.Offsets;
+ SaveLinearOffset(offsets, bAlignment, lastOffset);
+ }
+
+ // if current BamAlignment bin != lastBin, "then possibly write the binning index"
+ if ( bAlignment.Bin != lastBin ) {
+
+ // if not first time through
+ if ( saveBin != defaultValue ) {
+
+ // save Bam bin entry
+ BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
+ if ( indexIter == m_indexData.end() ) return false; // error
+ ReferenceIndex& refIndex = (*indexIter).second;
+ BamBinMap& binMap = refIndex.Bins;
+ SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
+ }
+
+ // update saveOffset
+ saveOffset = lastOffset;
+
+ // update bin values
+ saveBin = bAlignment.Bin;
+ lastBin = bAlignment.Bin;
+
+ // update saveRefID
+ saveRefID = bAlignment.RefID;
+
+ // if invalid RefID, break out
+ if ( saveRefID < 0 ) break;
+ }
+
+ // make sure that current file pointer is beyond lastOffset
+ if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
+ fprintf(stderr, "Error in BGZF offsets.\n");
+ exit(1);
+ }
+
+ // update lastOffset
+ lastOffset = m_BGZF->Tell();
+
+ // update lastCoordinate
+ lastCoordinate = bAlignment.Position;
}
// save any leftover BAM data (as long as refID is valid)
if ( saveRefID >= 0 ) {
- // save Bam bin entry
- BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
- if ( indexIter == m_indexData.end() ) return false; // error
- ReferenceIndex& refIndex = (*indexIter).second;
- BamBinMap& binMap = refIndex.Bins;
- SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
+ // save Bam bin entry
+ BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
+ if ( indexIter == m_indexData.end() ) return false; // error
+ ReferenceIndex& refIndex = (*indexIter).second;
+ BamBinMap& binMap = refIndex.Bins;
+ SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
}
// simplify index by merging chunks
BamStandardIndexData::iterator indexEnd = m_indexData.end();
for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
- // get reference index data
- ReferenceIndex& refIndex = (*indexIter).second;
- LinearOffsetVector& offsets = refIndex.Offsets;
+ // get reference index data
+ ReferenceIndex& refIndex = (*indexIter).second;
+ LinearOffsetVector& offsets = refIndex.Offsets;
- // sort linear offsets
- sort(offsets.begin(), offsets.end());
+ // sort linear offsets
+ sort(offsets.begin(), offsets.end());
}
// rewind file pointer to beginning of alignments, return success/fail
// compare to expected value
if ( strncmp(magic, "BAI\1", 4) != 0 ) {
- fprintf(stderr, "Problem with index file - invalid format.\n");
- fclose(m_indexStream);
- return false;
+ fprintf(stderr, "Problem with index file - invalid format.\n");
+ fclose(m_indexStream);
+ return false;
}
// return success/failure of load
BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
BamStandardIndexData::const_iterator indexEnd = m_indexData.end();
for ( ; indexIter != indexEnd; ++indexIter ) {
- const int& refId = (*indexIter).first;
- ClearReferenceOffsets(refId);
+ const int& refId = (*indexIter).first;
+ ClearReferenceOffsets(refId);
}
}
{
// return false if leftBound refID is not found in index data
if ( m_indexData.find(region.LeftRefID) == m_indexData.end() )
- return false;
+ return false;
// load index data for region if not already cached
if ( !IsDataLoaded(region.LeftRefID) ) {
- bool loadedOk = true;
- loadedOk &= SkipToReference(region.LeftRefID);
- loadedOk &= LoadReference(region.LeftRefID);
- if ( !loadedOk ) return false;
+ bool loadedOk = true;
+ loadedOk &= SkipToReference(region.LeftRefID);
+ loadedOk &= LoadReference(region.LeftRefID);
+ if ( !loadedOk ) return false;
}
// calculate which bins overlap this region
// store all alignment 'chunk' starts (file offsets) for bins in this region
for ( int i = 0; i < numBins; ++i ) {
- const uint16_t binKey = bins[i];
- map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
- if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
-
- // iterate over chunks
- const ChunkVector& chunks = (*binIter).second;
- std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
- std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
- for ( ; chunksIter != chunksEnd; ++chunksIter) {
-
- // if valid chunk found, store its file offset
- const Chunk& chunk = (*chunksIter);
- if ( chunk.Stop > minOffset )
- offsets.push_back( chunk.Start );
- }
- }
+ const uint16_t binKey = bins[i];
+ map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
+ if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
+
+ // iterate over chunks
+ const ChunkVector& chunks = (*binIter).second;
+ std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
+ std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
+ for ( ; chunksIter != chunksEnd; ++chunksIter) {
+
+ // if valid chunk found, store its file offset
+ const Chunk& chunk = (*chunksIter);
+ if ( chunk.Stop > minOffset )
+ offsets.push_back( chunk.Start );
+ }
+ }
}
// clean up memory
// be sure reader & BGZF file are valid & open for reading
if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
- return false;
+ return false;
// make sure left-bound position is valid
if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
- return false;
+ return false;
// calculate offsets for this region
// if failed, print message, set flag, and return failure
vector<int64_t> offsets;
if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets, hasAlignmentsInRegion) ) {
- fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n");
- *hasAlignmentsInRegion = false;
- return false;
+ fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n");
+ *hasAlignmentsInRegion = false;
+ return false;
}
// iterate through offsets
bool result = true;
for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
- // attempt seek & load first available alignment
- // set flag to true if data exists
- result &= m_BGZF->Seek(*o);
- *hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment);
-
- // if this alignment corresponds to desired position
- // return success of seeking back to the offset before the 'current offset' (to cover overlaps)
- if ( ((bAlignment.RefID == region.LeftRefID) &&
- ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) ||
- (bAlignment.RefID > region.LeftRefID) )
- {
- if ( o != offsets.begin() ) --o;
- return m_BGZF->Seek(*o);
- }
+ // attempt seek & load first available alignment
+ // set flag to true if data exists
+ result &= m_BGZF->Seek(*o);
+ *hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment);
+
+ // if this alignment corresponds to desired position
+ // return success of seeking back to the offset before the 'current offset' (to cover overlaps)
+ if ( ((bAlignment.RefID == region.LeftRefID) &&
+ ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) ||
+ (bAlignment.RefID > region.LeftRefID) )
+ {
+ if ( o != offsets.begin() ) --o;
+ return m_BGZF->Seek(*o);
+ }
}
// if error in jumping, print message & set flag
if ( !result ) {
- fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n");
- *hasAlignmentsInRegion = false;
+ fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n");
+ *hasAlignmentsInRegion = false;
}
// return success/failure
BamStandardIndexData::iterator mapIter = m_indexData.begin();
BamStandardIndexData::iterator mapEnd = m_indexData.end();
for ( ; mapIter != mapEnd; ++mapIter ) {
- const int entryRefId = (*mapIter).first;
- if ( entryRefId != refId )
- ClearReferenceOffsets(entryRefId);
+ const int entryRefId = (*mapIter).first;
+ if ( entryRefId != refId )
+ ClearReferenceOffsets(entryRefId);
}
}
// get number of reference sequences
uint32_t numReferences;
if ( !LoadReferenceCount((int&)numReferences) )
- return false;
+ return false;
// iterate over reference entries
bool loadedOk = true;
for ( int i = 0; i < (int)numReferences; ++i )
- loadedOk &= LoadReference(i, saveData);
+ loadedOk &= LoadReference(i, saveData);
// set flag
if ( loadedOk && saveData )
- m_hasFullDataCache = true;
+ m_hasFullDataCache = true;
// return success/failure of loading references
return loadedOk;
// store bin entry
if ( chunksOk && saveData )
- refEntry.Bins.insert(pair<uint32_t, ChunkVector>(binId, chunks));
+ refEntry.Bins.insert(pair<uint32_t, ChunkVector>(binId, chunks));
// return success/failure of load
return ( (elementsRead == 1) && chunksOk );
// iterate over bins
bool binsOk = true;
for ( int i = 0; i < numBins; ++i )
- binsOk &= LoadBin(refEntry, saveData);
+ binsOk &= LoadBin(refEntry, saveData);
// return success/failure of load
return ( (elementsRead == 1) && binsOk );
// swap endian-ness if necessary
if ( m_isBigEndian ) {
- SwapEndian_64(start);
- SwapEndian_64(stop);
+ SwapEndian_64(start);
+ SwapEndian_64(stop);
}
// save data if requested
// iterate over chunks
bool chunksOk = true;
for ( int i = 0; i < (int)numChunks; ++i )
- chunksOk &= LoadChunk(chunks, saveData);
+ chunksOk &= LoadChunk(chunks, saveData);
// sort chunk vector
sort( chunks.begin(), chunks.end(), ChunkLessThan );
// iterate over linear offsets
uint64_t linearOffset;
for ( int i = 0; i < numLinearOffsets; ++i ) {
- elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
- if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- if ( saveData ) linearOffsets.push_back(linearOffset);
+ elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+ if ( saveData ) linearOffsets.push_back(linearOffset);
}
// sort linear offsets
// if reference not previously loaded, create new entry
if ( indexIter == m_indexData.end() ) {
- ReferenceIndex newEntry;
- newEntry.HasAlignments = false;
- m_indexData.insert( pair<int32_t, ReferenceIndex>(refId, newEntry) );
+ ReferenceIndex newEntry;
+ newEntry.HasAlignments = false;
+ m_indexData.insert( pair<int32_t, ReferenceIndex>(refId, newEntry) );
}
// load reference data
BamStandardIndexData::iterator indexEnd = m_indexData.end();
for ( ; indexIter != indexEnd; ++indexIter ) {
- // get BAM bin map for this reference
- ReferenceIndex& refIndex = (*indexIter).second;
- BamBinMap& bamBinMap = refIndex.Bins;
-
- // iterate over BAM bins
- BamBinMap::iterator binIter = bamBinMap.begin();
- BamBinMap::iterator binEnd = bamBinMap.end();
- for ( ; binIter != binEnd; ++binIter ) {
-
- // get chunk vector for this bin
- ChunkVector& binChunks = (*binIter).second;
- if ( binChunks.size() == 0 ) continue;
-
- ChunkVector mergedChunks;
- mergedChunks.push_back( binChunks[0] );
-
- // iterate over chunks
- int i = 0;
- ChunkVector::iterator chunkIter = binChunks.begin();
- ChunkVector::iterator chunkEnd = binChunks.end();
- for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
-
- // get 'currentChunk' based on numeric index
- Chunk& currentChunk = mergedChunks[i];
-
- // get iteratorChunk based on vector iterator
- Chunk& iteratorChunk = (*chunkIter);
-
- // if chunk ends where (iterator) chunk starts, then merge
- if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 )
- currentChunk.Stop = iteratorChunk.Stop;
-
- // otherwise
- else {
- // set currentChunk + 1 to iteratorChunk
- mergedChunks.push_back(iteratorChunk);
- ++i;
- }
- }
-
- // saved merged chunk vector
- (*binIter).second = mergedChunks;
- }
+ // get BAM bin map for this reference
+ ReferenceIndex& refIndex = (*indexIter).second;
+ BamBinMap& bamBinMap = refIndex.Bins;
+
+ // iterate over BAM bins
+ BamBinMap::iterator binIter = bamBinMap.begin();
+ BamBinMap::iterator binEnd = bamBinMap.end();
+ for ( ; binIter != binEnd; ++binIter ) {
+
+ // get chunk vector for this bin
+ ChunkVector& binChunks = (*binIter).second;
+ if ( binChunks.size() == 0 ) continue;
+
+ ChunkVector mergedChunks;
+ mergedChunks.push_back( binChunks[0] );
+
+ // iterate over chunks
+ int i = 0;
+ ChunkVector::iterator chunkIter = binChunks.begin();
+ ChunkVector::iterator chunkEnd = binChunks.end();
+ for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+ // get 'currentChunk' based on numeric index
+ Chunk& currentChunk = mergedChunks[i];
+
+ // get iteratorChunk based on vector iterator
+ Chunk& iteratorChunk = (*chunkIter);
+
+ // if chunk ends where (iterator) chunk starts, then merge
+ if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 )
+ currentChunk.Stop = iteratorChunk.Stop;
+
+ // otherwise
+ else {
+ // set currentChunk + 1 to iteratorChunk
+ mergedChunks.push_back(iteratorChunk);
+ ++i;
+ }
+ }
+
+ // saved merged chunk vector
+ (*binIter).second = mergedChunks;
+ }
}
}
// if entry doesn't exist
if ( binIter == binMap.end() ) {
- ChunkVector newChunks;
- newChunks.push_back(newChunk);
- binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
+ ChunkVector newChunks;
+ newChunks.push_back(newChunk);
+ binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
}
// otherwise
else {
- ChunkVector& binChunks = (*binIter).second;
- binChunks.push_back( newChunk );
+ ChunkVector& binChunks = (*binIter).second;
+ binChunks.push_back( newChunk );
}
}
int oldSize = offsets.size();
int newSize = endOffset + 1;
if ( oldSize < newSize )
- offsets.resize(newSize, 0);
+ offsets.resize(newSize, 0);
// store offset
for( int i = beginOffset + 1; i <= endOffset; ++i ) {
- if ( offsets[i] == 0 )
- offsets[i] = lastOffset;
- }
+ if ( offsets[i] == 0 )
+ offsets[i] = lastOffset;
+ }
}
// initializes index data structure to hold @count references
void BamStandardIndex::SetReferenceCount(const int& count) {
for ( int i = 0; i < count; ++i )
- m_indexData[i].HasAlignments = false;
+ m_indexData[i].HasAlignments = false;
}
bool BamStandardIndex::SkipToFirstReference(void) {
bool skippedOk = true;
int currentRefId = 0;
while (currentRefId != refId) {
- skippedOk &= LoadReference(currentRefId, false);
- ++currentRefId;
+ skippedOk &= LoadReference(currentRefId, false);
+ ++currentRefId;
}
// return success
BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
BamStandardIndexData::const_iterator indexEnd = m_indexData.end();
for ( ; indexIter != indexEnd; ++ indexIter )
- refsOk &= WriteReference( (*indexIter).second );
+ refsOk &= WriteReference( (*indexIter).second );
// return success/failure of write
return ( (elementsWritten == 1) && refsOk );
BamBinMap::const_iterator binIter = bins.begin();
BamBinMap::const_iterator binEnd = bins.end();
for ( ; binIter != binEnd; ++binIter )
- binsOk &= WriteBin( (*binIter).first, (*binIter).second );
+ binsOk &= WriteBin( (*binIter).first, (*binIter).second );
// return success/failure of write
return ( (elementsWritten == 1) && binsOk );
// swap endian-ness if necessary
if ( m_isBigEndian ) {
- SwapEndian_64(start);
- SwapEndian_64(stop);
+ SwapEndian_64(start);
+ SwapEndian_64(stop);
}
// write to index file
ChunkVector::const_iterator chunkIter = chunks.begin();
ChunkVector::const_iterator chunkEnd = chunks.end();
for ( ; chunkIter != chunkEnd; ++chunkIter )
- chunksOk &= WriteChunk( (*chunkIter) );
+ chunksOk &= WriteChunk( (*chunkIter) );
// return success/failure of write
return ( (elementsWritten == 1) && chunksOk );
LinearOffsetVector::const_iterator offsetEnd = offsets.end();
for ( ; offsetIter != offsetEnd; ++offsetIter ) {
- // write linear offset
- uint64_t linearOffset = (*offsetIter);
- if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+ // write linear offset
+ uint64_t linearOffset = (*offsetIter);
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+ elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
}
// return success/failure of write
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 22 November 2010 (DB)
+// Last modified: 13 January 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the BamTools index format (".bti")
// ***************************************************************************
// be sure reader & BGZF file are valid & open for reading
if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
- return false;
+ return false;
// move file pointer to beginning of alignments
if ( !m_reader->Rewind() ) return false;
BamAlignment al;
while ( m_reader->GetNextAlignmentCore(al) ) {
- // if block contains data (not the first time through) AND alignment is on a new reference
- if ( currentBlockCount > 0 && al.RefID != blockRefId ) {
-
- // store previous data
- BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- SaveOffsetEntry(blockRefId, entry);
-
- // intialize new block for current alignment's reference
- currentBlockCount = 0;
- blockMaxEndPosition = al.GetEndPosition();
- blockStartOffset = currentAlignmentOffset;
- }
-
- // if beginning of block, save first alignment's refID & position
- if ( currentBlockCount == 0 ) {
- blockRefId = al.RefID;
- blockStartPosition = al.Position;
- }
-
- // increment block counter
- ++currentBlockCount;
-
- // check end position
- int32_t alignmentEndPosition = al.GetEndPosition();
- if ( alignmentEndPosition > blockMaxEndPosition )
- blockMaxEndPosition = alignmentEndPosition;
-
- // if block is full, get offset for next block, reset currentBlockCount
- if ( currentBlockCount == m_blockSize ) {
- BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- SaveOffsetEntry(blockRefId, entry);
- blockStartOffset = m_BGZF->Tell();
- currentBlockCount = 0;
- }
-
- // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
- // necessary because we won't know if this next alignment is on a new reference until we actually read it
- currentAlignmentOffset = m_BGZF->Tell();
+ // if block contains data (not the first time through) AND alignment is on a new reference
+ if ( currentBlockCount > 0 && al.RefID != blockRefId ) {
+
+ // store previous data
+ BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ SaveOffsetEntry(blockRefId, entry);
+
+ // intialize new block for current alignment's reference
+ currentBlockCount = 0;
+ blockMaxEndPosition = al.GetEndPosition();
+ blockStartOffset = currentAlignmentOffset;
+ }
+
+ // if beginning of block, save first alignment's refID & position
+ if ( currentBlockCount == 0 ) {
+ blockRefId = al.RefID;
+ blockStartPosition = al.Position;
+ }
+
+ // increment block counter
+ ++currentBlockCount;
+
+ // check end position
+ int32_t alignmentEndPosition = al.GetEndPosition();
+ if ( alignmentEndPosition > blockMaxEndPosition )
+ blockMaxEndPosition = alignmentEndPosition;
+
+ // if block is full, get offset for next block, reset currentBlockCount
+ if ( currentBlockCount == m_blockSize ) {
+ BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ SaveOffsetEntry(blockRefId, entry);
+ blockStartOffset = m_BGZF->Tell();
+ currentBlockCount = 0;
+ }
+
+ // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
+ // necessary because we won't know if this next alignment is on a new reference until we actually read it
+ currentAlignmentOffset = m_BGZF->Tell();
}
// store final block with data
size_t elementsRead = fread(magic, 1, 4, m_indexStream);
if ( elementsRead != 4 ) return false;
if ( strncmp(magic, "BTI\1", 4) != 0 ) {
- fprintf(stderr, "Problem with index file - invalid format.\n");
- return false;
+ fprintf(stderr, "Problem with index file - invalid format.\n");
+ return false;
}
// otherwise ok
// if version is negative, or zero
if ( m_inputVersion <= 0 ) {
- fprintf(stderr, "Problem with index file - invalid version.\n");
- return false;
+ fprintf(stderr, "Problem with index file - invalid version.\n");
+ return false;
}
// if version is newer than can be supported by this version of bamtools
else if ( m_inputVersion > m_outputVersion ) {
- fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n");
- fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n");
- return false;
+ fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n");
+ fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n");
+ return false;
}
// ------------------------------------------------------------------
// (typically whose format did not accomodate a particular bug fix)
else if ( (Version)m_inputVersion == BTI_1_0 ) {
- fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
- fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
- return false;
+ fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
+ fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
+ return false;
}
else if ( (Version)m_inputVersion == BTI_1_1 ) {
- fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n");
- fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
- return false;
+ fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n");
+ fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
+ return false;
}
// otherwise ok
BamToolsIndexData::const_iterator indexIter = m_indexData.begin();
BamToolsIndexData::const_iterator indexEnd = m_indexData.end();
for ( ; indexIter != indexEnd; ++indexIter ) {
- const int& refId = (*indexIter).first;
- ClearReferenceOffsets(refId);
+ const int& refId = (*indexIter).first;
+ ClearReferenceOffsets(refId);
}
}
// load index data for region if not already cached
if ( !IsDataLoaded(region.LeftRefID) ) {
- bool loadedOk = true;
- loadedOk &= SkipToReference(region.LeftRefID);
- loadedOk &= LoadReference(region.LeftRefID);
- if ( !loadedOk ) return false;
+ bool loadedOk = true;
+ loadedOk &= SkipToReference(region.LeftRefID);
+ loadedOk &= LoadReference(region.LeftRefID);
+ if ( !loadedOk ) return false;
}
// localize index data for this reference (& sanity check that data actually exists)
vector<BamToolsIndexEntry>::const_iterator offsetIter = referenceOffsets.begin();
vector<BamToolsIndexEntry>::const_iterator offsetEnd = referenceOffsets.end();
for ( ; offsetIter != offsetEnd; ++offsetIter ) {
- const BamToolsIndexEntry& entry = (*offsetIter);
- // break if alignment 'entry' overlaps region
- if ( entry.MaxEndPosition >= region.LeftPosition ) break;
- offset = (*offsetIter).StartOffset;
+ const BamToolsIndexEntry& entry = (*offsetIter);
+ // break if alignment 'entry' overlaps region
+ if ( entry.MaxEndPosition >= region.LeftPosition ) break;
+ offset = (*offsetIter).StartOffset;
}
// set flag based on whether an index entry was found for this region
// if cache mode set to none, dump the data we just loaded
if (m_cacheMode == BamIndex::NoIndexCaching )
- ClearReferenceOffsets(region.LeftRefID);
+ ClearReferenceOffsets(region.LeftRefID);
// return success
return true;
// returns whether reference has alignments or no
bool BamToolsIndex::HasAlignments(const int& refId) const {
-
BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
if ( indexIter == m_indexData.end()) return false;
const BamToolsReferenceEntry& refEntry = (*indexIter).second;
// check valid BamReader state
if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
- fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
- return false;
+ fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
+ return false;
}
// make sure left-bound position is valid
if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
- return false;
+ return false;
// calculate nearest offset to jump to
int64_t offset;
if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
- fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
- return false;
+ fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
+ return false;
}
// return success/failure of seek
BamToolsIndexData::iterator mapIter = m_indexData.begin();
BamToolsIndexData::iterator mapEnd = m_indexData.end();
for ( ; mapIter != mapEnd; ++mapIter ) {
- const int entryRefId = (*mapIter).first;
- if ( entryRefId != refId )
- ClearReferenceOffsets(entryRefId);
+ const int entryRefId = (*mapIter).first;
+ if ( entryRefId != refId )
+ ClearReferenceOffsets(entryRefId);
}
}
// iterate over reference entries
bool loadedOk = true;
for ( int i = 0; i < numReferences; ++i )
- loadedOk &= LoadReference(i, saveData);
+ loadedOk &= LoadReference(i, saveData);
// set flag
if ( loadedOk && saveData )
- m_hasFullDataCache = true;
+ m_hasFullDataCache = true;
// return success/failure of load
return loadedOk;
elementsRead += fread(&entry.StartOffset, sizeof(entry.StartOffset), 1, m_indexStream);
elementsRead += fread(&entry.StartPosition, sizeof(entry.StartPosition), 1, m_indexStream);
if ( elementsRead != 3 ) {
- cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl;
- return false;
+ cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl;
+ return false;
}
// swap endian-ness if necessary
if ( m_isBigEndian ) {
- SwapEndian_32(entry.MaxEndPosition);
- SwapEndian_64(entry.StartOffset);
- SwapEndian_32(entry.StartPosition);
+ SwapEndian_32(entry.MaxEndPosition);
+ SwapEndian_64(entry.StartOffset);
+ SwapEndian_32(entry.StartPosition);
}
// save data
if ( saveData )
- SaveOffsetEntry(refId, entry);
+ SaveOffsetEntry(refId, entry);
// return success/failure of load
return true;
// iterate over offset entries
for ( unsigned int j = 0; j < numOffsets; ++j )
- LoadIndexEntry(refId, saveData);
+ LoadIndexEntry(refId, saveData);
// return success/failure of load
return true;
// initializes index data structure to hold @count references
void BamToolsIndex::SetReferenceCount(const int& count) {
for ( int i = 0; i < count; ++i )
- m_indexData[i].HasAlignments = false;
+ m_indexData[i].HasAlignments = false;
}
// position file pointer to first reference begin, return true if skipped OK
bool skippedOk = true;
int currentRefId = 0;
while (currentRefId != refId) {
- skippedOk &= LoadReference(currentRefId, false);
- ++currentRefId;
+ skippedOk &= LoadReference(currentRefId, false);
+ ++currentRefId;
}
// return success/failure of skip
BamToolsIndexData::const_iterator refIter = m_indexData.begin();
BamToolsIndexData::const_iterator refEnd = m_indexData.end();
for ( ; refIter != refEnd; ++refIter )
- refOk &= WriteReferenceEntry( (*refIter).second );
+ refOk &= WriteReferenceEntry( (*refIter).second );
return ( (elementsWritten == 1) && refOk );
}
vector<BamToolsIndexEntry>::const_iterator offsetIter = refEntry.Offsets.begin();
vector<BamToolsIndexEntry>::const_iterator offsetEnd = refEntry.Offsets.end();
for ( ; offsetIter != offsetEnd; ++offsetIter )
- entriesOk &= WriteIndexEntry( (*offsetIter) );
+ entriesOk &= WriteIndexEntry( (*offsetIter) );
return ( (elementsWritten == 1) && entriesOk );
}
// swap endian-ness if necessary
if ( m_isBigEndian ) {
- SwapEndian_32(maxEndPosition);
- SwapEndian_64(startOffset);
- SwapEndian_32(startPosition);
+ SwapEndian_32(maxEndPosition);
+ SwapEndian_64(startOffset);
+ SwapEndian_32(startPosition);
}
// write the reference index entry