From: derek Date: Thu, 13 Jan 2011 06:40:25 +0000 (-0500) Subject: Minor formatting cleanup X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4423944dcf9ab108ba012bceb5d5db4eb4cb561d;p=bamtools.git Minor formatting cleanup --- diff --git a/src/api/internal/BamStandardIndex_p.cpp b/src/api/internal/BamStandardIndex_p.cpp index c90aef6..f243dbc 100644 --- a/src/api/internal/BamStandardIndex_p.cpp +++ b/src/api/internal/BamStandardIndex_p.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 November 2010 (DB) +// Last modified: 13 January 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the standardized BAM index format (".bai") // *************************************************************************** @@ -46,11 +46,11 @@ int BamStandardIndex::BinsFromRegion(const BamRegion& region, // if right bound specified AND left&right bounds are on same reference // OK to use right bound position if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) ) - end = (unsigned int)region.RightPosition; + end = (unsigned int)region.RightPosition; // otherwise, use end of left bound reference as cutoff else - end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1; + end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1; // initialize list, bin '0' always a valid bin int i = 0; @@ -73,7 +73,7 @@ bool BamStandardIndex::Build(void) { // be sure reader & BGZF file are valid & open for reading if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen ) - return false; + return false; // move file pointer to beginning of alignments m_reader->Rewind(); @@ -105,80 +105,80 @@ bool BamStandardIndex::Build(void) { BamAlignment bAlignment; while ( m_reader->GetNextAlignmentCore(bAlignment) ) { - // change of chromosome, save ID, reset bin - if ( lastRefID != bAlignment.RefID ) { - lastRefID = bAlignment.RefID; - lastBin = defaultValue; - } - - // if lastCoordinate greater than BAM position - file not sorted properly - else if ( lastCoordinate > bAlignment.Position ) { - fprintf(stderr, "BAM file not properly sorted:\n"); - fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), - lastCoordinate, bAlignment.Position, bAlignment.RefID); - exit(1); - } - - // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions) - if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) { - - // save linear offset entry (matched to BAM entry refID) - BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID); - if ( indexIter == m_indexData.end() ) return false; // error - ReferenceIndex& refIndex = (*indexIter).second; - LinearOffsetVector& offsets = refIndex.Offsets; - SaveLinearOffset(offsets, bAlignment, lastOffset); - } - - // if current BamAlignment bin != lastBin, "then possibly write the binning index" - if ( bAlignment.Bin != lastBin ) { - - // if not first time through - if ( saveBin != defaultValue ) { - - // save Bam bin entry - BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID); - if ( indexIter == m_indexData.end() ) return false; // error - ReferenceIndex& refIndex = (*indexIter).second; - BamBinMap& binMap = refIndex.Bins; - SaveBinEntry(binMap, saveBin, saveOffset, lastOffset); - } - - // update saveOffset - saveOffset = lastOffset; - - // update bin values - saveBin = bAlignment.Bin; - lastBin = bAlignment.Bin; - - // update saveRefID - saveRefID = bAlignment.RefID; - - // if invalid RefID, break out - if ( saveRefID < 0 ) break; - } - - // make sure that current file pointer is beyond lastOffset - if ( m_BGZF->Tell() <= (int64_t)lastOffset ) { - fprintf(stderr, "Error in BGZF offsets.\n"); - exit(1); - } - - // update lastOffset - lastOffset = m_BGZF->Tell(); - - // update lastCoordinate - lastCoordinate = bAlignment.Position; + // change of chromosome, save ID, reset bin + if ( lastRefID != bAlignment.RefID ) { + lastRefID = bAlignment.RefID; + lastBin = defaultValue; + } + + // if lastCoordinate greater than BAM position - file not sorted properly + else if ( lastCoordinate > bAlignment.Position ) { + fprintf(stderr, "BAM file not properly sorted:\n"); + fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), + lastCoordinate, bAlignment.Position, bAlignment.RefID); + exit(1); + } + + // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions) + if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) { + + // save linear offset entry (matched to BAM entry refID) + BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID); + if ( indexIter == m_indexData.end() ) return false; // error + ReferenceIndex& refIndex = (*indexIter).second; + LinearOffsetVector& offsets = refIndex.Offsets; + SaveLinearOffset(offsets, bAlignment, lastOffset); + } + + // if current BamAlignment bin != lastBin, "then possibly write the binning index" + if ( bAlignment.Bin != lastBin ) { + + // if not first time through + if ( saveBin != defaultValue ) { + + // save Bam bin entry + BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID); + if ( indexIter == m_indexData.end() ) return false; // error + ReferenceIndex& refIndex = (*indexIter).second; + BamBinMap& binMap = refIndex.Bins; + SaveBinEntry(binMap, saveBin, saveOffset, lastOffset); + } + + // update saveOffset + saveOffset = lastOffset; + + // update bin values + saveBin = bAlignment.Bin; + lastBin = bAlignment.Bin; + + // update saveRefID + saveRefID = bAlignment.RefID; + + // if invalid RefID, break out + if ( saveRefID < 0 ) break; + } + + // make sure that current file pointer is beyond lastOffset + if ( m_BGZF->Tell() <= (int64_t)lastOffset ) { + fprintf(stderr, "Error in BGZF offsets.\n"); + exit(1); + } + + // update lastOffset + lastOffset = m_BGZF->Tell(); + + // update lastCoordinate + lastCoordinate = bAlignment.Position; } // save any leftover BAM data (as long as refID is valid) if ( saveRefID >= 0 ) { - // save Bam bin entry - BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID); - if ( indexIter == m_indexData.end() ) return false; // error - ReferenceIndex& refIndex = (*indexIter).second; - BamBinMap& binMap = refIndex.Bins; - SaveBinEntry(binMap, saveBin, saveOffset, lastOffset); + // save Bam bin entry + BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID); + if ( indexIter == m_indexData.end() ) return false; // error + ReferenceIndex& refIndex = (*indexIter).second; + BamBinMap& binMap = refIndex.Bins; + SaveBinEntry(binMap, saveBin, saveOffset, lastOffset); } // simplify index by merging chunks @@ -190,12 +190,12 @@ bool BamStandardIndex::Build(void) { BamStandardIndexData::iterator indexEnd = m_indexData.end(); for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) { - // get reference index data - ReferenceIndex& refIndex = (*indexIter).second; - LinearOffsetVector& offsets = refIndex.Offsets; + // get reference index data + ReferenceIndex& refIndex = (*indexIter).second; + LinearOffsetVector& offsets = refIndex.Offsets; - // sort linear offsets - sort(offsets.begin(), offsets.end()); + // sort linear offsets + sort(offsets.begin(), offsets.end()); } // rewind file pointer to beginning of alignments, return success/fail @@ -211,9 +211,9 @@ bool BamStandardIndex::CheckMagicNumber(void) { // compare to expected value if ( strncmp(magic, "BAI\1", 4) != 0 ) { - fprintf(stderr, "Problem with index file - invalid format.\n"); - fclose(m_indexStream); - return false; + fprintf(stderr, "Problem with index file - invalid format.\n"); + fclose(m_indexStream); + return false; } // return success/failure of load @@ -225,8 +225,8 @@ void BamStandardIndex::ClearAllData(void) { BamStandardIndexData::const_iterator indexIter = m_indexData.begin(); BamStandardIndexData::const_iterator indexEnd = m_indexData.end(); for ( ; indexIter != indexEnd; ++indexIter ) { - const int& refId = (*indexIter).first; - ClearReferenceOffsets(refId); + const int& refId = (*indexIter).first; + ClearReferenceOffsets(refId); } } @@ -259,14 +259,14 @@ bool BamStandardIndex::GetOffsets(const BamRegion& region, { // return false if leftBound refID is not found in index data if ( m_indexData.find(region.LeftRefID) == m_indexData.end() ) - return false; + return false; // load index data for region if not already cached if ( !IsDataLoaded(region.LeftRefID) ) { - bool loadedOk = true; - loadedOk &= SkipToReference(region.LeftRefID); - loadedOk &= LoadReference(region.LeftRefID); - if ( !loadedOk ) return false; + bool loadedOk = true; + loadedOk &= SkipToReference(region.LeftRefID); + loadedOk &= LoadReference(region.LeftRefID); + if ( !loadedOk ) return false; } // calculate which bins overlap this region @@ -287,22 +287,22 @@ bool BamStandardIndex::GetOffsets(const BamRegion& region, // store all alignment 'chunk' starts (file offsets) for bins in this region for ( int i = 0; i < numBins; ++i ) { - const uint16_t binKey = bins[i]; - map::const_iterator binIter = binMap.find(binKey); - if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) { - - // iterate over chunks - const ChunkVector& chunks = (*binIter).second; - std::vector::const_iterator chunksIter = chunks.begin(); - std::vector::const_iterator chunksEnd = chunks.end(); - for ( ; chunksIter != chunksEnd; ++chunksIter) { - - // if valid chunk found, store its file offset - const Chunk& chunk = (*chunksIter); - if ( chunk.Stop > minOffset ) - offsets.push_back( chunk.Start ); - } - } + const uint16_t binKey = bins[i]; + map::const_iterator binIter = binMap.find(binKey); + if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) { + + // iterate over chunks + const ChunkVector& chunks = (*binIter).second; + std::vector::const_iterator chunksIter = chunks.begin(); + std::vector::const_iterator chunksEnd = chunks.end(); + for ( ; chunksIter != chunksEnd; ++chunksIter) { + + // if valid chunk found, store its file offset + const Chunk& chunk = (*chunksIter); + if ( chunk.Stop > minOffset ) + offsets.push_back( chunk.Start ); + } + } } // clean up memory @@ -356,19 +356,19 @@ bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion // be sure reader & BGZF file are valid & open for reading if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen ) - return false; + return false; // make sure left-bound position is valid if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength ) - return false; + return false; // calculate offsets for this region // if failed, print message, set flag, and return failure vector offsets; if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets, hasAlignmentsInRegion) ) { - fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n"); - *hasAlignmentsInRegion = false; - return false; + fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n"); + *hasAlignmentsInRegion = false; + return false; } // iterate through offsets @@ -376,26 +376,26 @@ bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion bool result = true; for ( vector::const_iterator o = offsets.begin(); o != offsets.end(); ++o) { - // attempt seek & load first available alignment - // set flag to true if data exists - result &= m_BGZF->Seek(*o); - *hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment); - - // if this alignment corresponds to desired position - // return success of seeking back to the offset before the 'current offset' (to cover overlaps) - if ( ((bAlignment.RefID == region.LeftRefID) && - ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) || - (bAlignment.RefID > region.LeftRefID) ) - { - if ( o != offsets.begin() ) --o; - return m_BGZF->Seek(*o); - } + // attempt seek & load first available alignment + // set flag to true if data exists + result &= m_BGZF->Seek(*o); + *hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment); + + // if this alignment corresponds to desired position + // return success of seeking back to the offset before the 'current offset' (to cover overlaps) + if ( ((bAlignment.RefID == region.LeftRefID) && + ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) || + (bAlignment.RefID > region.LeftRefID) ) + { + if ( o != offsets.begin() ) --o; + return m_BGZF->Seek(*o); + } } // if error in jumping, print message & set flag if ( !result ) { - fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n"); - *hasAlignmentsInRegion = false; + fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n"); + *hasAlignmentsInRegion = false; } // return success/failure @@ -413,9 +413,9 @@ void BamStandardIndex::KeepOnlyReferenceOffsets(const int& refId) { BamStandardIndexData::iterator mapIter = m_indexData.begin(); BamStandardIndexData::iterator mapEnd = m_indexData.end(); for ( ; mapIter != mapEnd; ++mapIter ) { - const int entryRefId = (*mapIter).first; - if ( entryRefId != refId ) - ClearReferenceOffsets(entryRefId); + const int entryRefId = (*mapIter).first; + if ( entryRefId != refId ) + ClearReferenceOffsets(entryRefId); } } @@ -427,16 +427,16 @@ bool BamStandardIndex::LoadAllReferences(bool saveData) { // get number of reference sequences uint32_t numReferences; if ( !LoadReferenceCount((int&)numReferences) ) - return false; + return false; // iterate over reference entries bool loadedOk = true; for ( int i = 0; i < (int)numReferences; ++i ) - loadedOk &= LoadReference(i, saveData); + loadedOk &= LoadReference(i, saveData); // set flag if ( loadedOk && saveData ) - m_hasFullDataCache = true; + m_hasFullDataCache = true; // return success/failure of loading references return loadedOk; @@ -471,7 +471,7 @@ bool BamStandardIndex::LoadBin(ReferenceIndex& refEntry, bool saveData) { // store bin entry if ( chunksOk && saveData ) - refEntry.Bins.insert(pair(binId, chunks)); + refEntry.Bins.insert(pair(binId, chunks)); // return success/failure of load return ( (elementsRead == 1) && chunksOk ); @@ -492,7 +492,7 @@ bool BamStandardIndex::LoadBins(ReferenceIndex& refEntry, bool saveData) { // iterate over bins bool binsOk = true; for ( int i = 0; i < numBins; ++i ) - binsOk &= LoadBin(refEntry, saveData); + binsOk &= LoadBin(refEntry, saveData); // return success/failure of load return ( (elementsRead == 1) && binsOk ); @@ -512,8 +512,8 @@ bool BamStandardIndex::LoadChunk(ChunkVector& chunks, bool saveData) { // swap endian-ness if necessary if ( m_isBigEndian ) { - SwapEndian_64(start); - SwapEndian_64(stop); + SwapEndian_64(start); + SwapEndian_64(stop); } // save data if requested @@ -538,7 +538,7 @@ bool BamStandardIndex::LoadChunks(ChunkVector& chunks, bool saveData) { // iterate over chunks bool chunksOk = true; for ( int i = 0; i < (int)numChunks; ++i ) - chunksOk &= LoadChunk(chunks, saveData); + chunksOk &= LoadChunk(chunks, saveData); // sort chunk vector sort( chunks.begin(), chunks.end(), ChunkLessThan ); @@ -565,9 +565,9 @@ bool BamStandardIndex::LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData // iterate over linear offsets uint64_t linearOffset; for ( int i = 0; i < numLinearOffsets; ++i ) { - elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream); - if ( m_isBigEndian ) SwapEndian_64(linearOffset); - if ( saveData ) linearOffsets.push_back(linearOffset); + elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream); + if ( m_isBigEndian ) SwapEndian_64(linearOffset); + if ( saveData ) linearOffsets.push_back(linearOffset); } // sort linear offsets @@ -594,9 +594,9 @@ bool BamStandardIndex::LoadReference(const int& refId, bool saveData) { // if reference not previously loaded, create new entry if ( indexIter == m_indexData.end() ) { - ReferenceIndex newEntry; - newEntry.HasAlignments = false; - m_indexData.insert( pair(refId, newEntry) ); + ReferenceIndex newEntry; + newEntry.HasAlignments = false; + m_indexData.insert( pair(refId, newEntry) ); } // load reference data @@ -629,49 +629,49 @@ void BamStandardIndex::MergeChunks(void) { BamStandardIndexData::iterator indexEnd = m_indexData.end(); for ( ; indexIter != indexEnd; ++indexIter ) { - // get BAM bin map for this reference - ReferenceIndex& refIndex = (*indexIter).second; - BamBinMap& bamBinMap = refIndex.Bins; - - // iterate over BAM bins - BamBinMap::iterator binIter = bamBinMap.begin(); - BamBinMap::iterator binEnd = bamBinMap.end(); - for ( ; binIter != binEnd; ++binIter ) { - - // get chunk vector for this bin - ChunkVector& binChunks = (*binIter).second; - if ( binChunks.size() == 0 ) continue; - - ChunkVector mergedChunks; - mergedChunks.push_back( binChunks[0] ); - - // iterate over chunks - int i = 0; - ChunkVector::iterator chunkIter = binChunks.begin(); - ChunkVector::iterator chunkEnd = binChunks.end(); - for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) { - - // get 'currentChunk' based on numeric index - Chunk& currentChunk = mergedChunks[i]; - - // get iteratorChunk based on vector iterator - Chunk& iteratorChunk = (*chunkIter); - - // if chunk ends where (iterator) chunk starts, then merge - if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) - currentChunk.Stop = iteratorChunk.Stop; - - // otherwise - else { - // set currentChunk + 1 to iteratorChunk - mergedChunks.push_back(iteratorChunk); - ++i; - } - } - - // saved merged chunk vector - (*binIter).second = mergedChunks; - } + // get BAM bin map for this reference + ReferenceIndex& refIndex = (*indexIter).second; + BamBinMap& bamBinMap = refIndex.Bins; + + // iterate over BAM bins + BamBinMap::iterator binIter = bamBinMap.begin(); + BamBinMap::iterator binEnd = bamBinMap.end(); + for ( ; binIter != binEnd; ++binIter ) { + + // get chunk vector for this bin + ChunkVector& binChunks = (*binIter).second; + if ( binChunks.size() == 0 ) continue; + + ChunkVector mergedChunks; + mergedChunks.push_back( binChunks[0] ); + + // iterate over chunks + int i = 0; + ChunkVector::iterator chunkIter = binChunks.begin(); + ChunkVector::iterator chunkEnd = binChunks.end(); + for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) { + + // get 'currentChunk' based on numeric index + Chunk& currentChunk = mergedChunks[i]; + + // get iteratorChunk based on vector iterator + Chunk& iteratorChunk = (*chunkIter); + + // if chunk ends where (iterator) chunk starts, then merge + if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) + currentChunk.Stop = iteratorChunk.Stop; + + // otherwise + else { + // set currentChunk + 1 to iteratorChunk + mergedChunks.push_back(iteratorChunk); + ++i; + } + } + + // saved merged chunk vector + (*binIter).second = mergedChunks; + } } } @@ -689,15 +689,15 @@ void BamStandardIndex::SaveBinEntry(BamBinMap& binMap, // if entry doesn't exist if ( binIter == binMap.end() ) { - ChunkVector newChunks; - newChunks.push_back(newChunk); - binMap.insert( pair(saveBin, newChunks)); + ChunkVector newChunks; + newChunks.push_back(newChunk); + binMap.insert( pair(saveBin, newChunks)); } // otherwise else { - ChunkVector& binChunks = (*binIter).second; - binChunks.push_back( newChunk ); + ChunkVector& binChunks = (*binIter).second; + binChunks.push_back( newChunk ); } } @@ -714,19 +714,19 @@ void BamStandardIndex::SaveLinearOffset(LinearOffsetVector& offsets, int oldSize = offsets.size(); int newSize = endOffset + 1; if ( oldSize < newSize ) - offsets.resize(newSize, 0); + offsets.resize(newSize, 0); // store offset for( int i = beginOffset + 1; i <= endOffset; ++i ) { - if ( offsets[i] == 0 ) - offsets[i] = lastOffset; - } + if ( offsets[i] == 0 ) + offsets[i] = lastOffset; + } } // initializes index data structure to hold @count references void BamStandardIndex::SetReferenceCount(const int& count) { for ( int i = 0; i < count; ++i ) - m_indexData[i].HasAlignments = false; + m_indexData[i].HasAlignments = false; } bool BamStandardIndex::SkipToFirstReference(void) { @@ -750,8 +750,8 @@ bool BamStandardIndex::SkipToReference(const int& refId) { bool skippedOk = true; int currentRefId = 0; while (currentRefId != refId) { - skippedOk &= LoadReference(currentRefId, false); - ++currentRefId; + skippedOk &= LoadReference(currentRefId, false); + ++currentRefId; } // return success @@ -788,7 +788,7 @@ bool BamStandardIndex::WriteAllReferences(void) { BamStandardIndexData::const_iterator indexIter = m_indexData.begin(); BamStandardIndexData::const_iterator indexEnd = m_indexData.end(); for ( ; indexIter != indexEnd; ++ indexIter ) - refsOk &= WriteReference( (*indexIter).second ); + refsOk &= WriteReference( (*indexIter).second ); // return success/failure of write return ( (elementsWritten == 1) && refsOk ); @@ -826,7 +826,7 @@ bool BamStandardIndex::WriteBins(const BamBinMap& bins) { BamBinMap::const_iterator binIter = bins.begin(); BamBinMap::const_iterator binEnd = bins.end(); for ( ; binIter != binEnd; ++binIter ) - binsOk &= WriteBin( (*binIter).first, (*binIter).second ); + binsOk &= WriteBin( (*binIter).first, (*binIter).second ); // return success/failure of write return ( (elementsWritten == 1) && binsOk ); @@ -843,8 +843,8 @@ bool BamStandardIndex::WriteChunk(const Chunk& chunk) { // swap endian-ness if necessary if ( m_isBigEndian ) { - SwapEndian_64(start); - SwapEndian_64(stop); + SwapEndian_64(start); + SwapEndian_64(stop); } // write to index file @@ -870,7 +870,7 @@ bool BamStandardIndex::WriteChunks(const ChunkVector& chunks) { ChunkVector::const_iterator chunkIter = chunks.begin(); ChunkVector::const_iterator chunkEnd = chunks.end(); for ( ; chunkIter != chunkEnd; ++chunkIter ) - chunksOk &= WriteChunk( (*chunkIter) ); + chunksOk &= WriteChunk( (*chunkIter) ); // return success/failure of write return ( (elementsWritten == 1) && chunksOk ); @@ -891,10 +891,10 @@ bool BamStandardIndex::WriteLinearOffsets(const LinearOffsetVector& offsets) { LinearOffsetVector::const_iterator offsetEnd = offsets.end(); for ( ; offsetIter != offsetEnd; ++offsetIter ) { - // write linear offset - uint64_t linearOffset = (*offsetIter); - if ( m_isBigEndian ) SwapEndian_64(linearOffset); - elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream); + // write linear offset + uint64_t linearOffset = (*offsetIter); + if ( m_isBigEndian ) SwapEndian_64(linearOffset); + elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream); } // return success/failure of write diff --git a/src/api/internal/BamToolsIndex_p.cpp b/src/api/internal/BamToolsIndex_p.cpp index 5590e8e..cccb484 100644 --- a/src/api/internal/BamToolsIndex_p.cpp +++ b/src/api/internal/BamToolsIndex_p.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 November 2010 (DB) +// Last modified: 13 January 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the BamTools index format (".bti") // *************************************************************************** @@ -43,7 +43,7 @@ bool BamToolsIndex::Build(void) { // be sure reader & BGZF file are valid & open for reading if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen ) - return false; + return false; // move file pointer to beginning of alignments if ( !m_reader->Rewind() ) return false; @@ -66,44 +66,44 @@ bool BamToolsIndex::Build(void) { BamAlignment al; while ( m_reader->GetNextAlignmentCore(al) ) { - // if block contains data (not the first time through) AND alignment is on a new reference - if ( currentBlockCount > 0 && al.RefID != blockRefId ) { - - // store previous data - BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition); - SaveOffsetEntry(blockRefId, entry); - - // intialize new block for current alignment's reference - currentBlockCount = 0; - blockMaxEndPosition = al.GetEndPosition(); - blockStartOffset = currentAlignmentOffset; - } - - // if beginning of block, save first alignment's refID & position - if ( currentBlockCount == 0 ) { - blockRefId = al.RefID; - blockStartPosition = al.Position; - } - - // increment block counter - ++currentBlockCount; - - // check end position - int32_t alignmentEndPosition = al.GetEndPosition(); - if ( alignmentEndPosition > blockMaxEndPosition ) - blockMaxEndPosition = alignmentEndPosition; - - // if block is full, get offset for next block, reset currentBlockCount - if ( currentBlockCount == m_blockSize ) { - BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition); - SaveOffsetEntry(blockRefId, entry); - blockStartOffset = m_BGZF->Tell(); - currentBlockCount = 0; - } - - // not the best name, but for the next iteration, this value will be the offset of the *current* alignment - // necessary because we won't know if this next alignment is on a new reference until we actually read it - currentAlignmentOffset = m_BGZF->Tell(); + // if block contains data (not the first time through) AND alignment is on a new reference + if ( currentBlockCount > 0 && al.RefID != blockRefId ) { + + // store previous data + BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition); + SaveOffsetEntry(blockRefId, entry); + + // intialize new block for current alignment's reference + currentBlockCount = 0; + blockMaxEndPosition = al.GetEndPosition(); + blockStartOffset = currentAlignmentOffset; + } + + // if beginning of block, save first alignment's refID & position + if ( currentBlockCount == 0 ) { + blockRefId = al.RefID; + blockStartPosition = al.Position; + } + + // increment block counter + ++currentBlockCount; + + // check end position + int32_t alignmentEndPosition = al.GetEndPosition(); + if ( alignmentEndPosition > blockMaxEndPosition ) + blockMaxEndPosition = alignmentEndPosition; + + // if block is full, get offset for next block, reset currentBlockCount + if ( currentBlockCount == m_blockSize ) { + BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition); + SaveOffsetEntry(blockRefId, entry); + blockStartOffset = m_BGZF->Tell(); + currentBlockCount = 0; + } + + // not the best name, but for the next iteration, this value will be the offset of the *current* alignment + // necessary because we won't know if this next alignment is on a new reference until we actually read it + currentAlignmentOffset = m_BGZF->Tell(); } // store final block with data @@ -125,8 +125,8 @@ bool BamToolsIndex::CheckMagicNumber(void) { size_t elementsRead = fread(magic, 1, 4, m_indexStream); if ( elementsRead != 4 ) return false; if ( strncmp(magic, "BTI\1", 4) != 0 ) { - fprintf(stderr, "Problem with index file - invalid format.\n"); - return false; + fprintf(stderr, "Problem with index file - invalid format.\n"); + return false; } // otherwise ok @@ -143,15 +143,15 @@ bool BamToolsIndex::CheckVersion(void) { // if version is negative, or zero if ( m_inputVersion <= 0 ) { - fprintf(stderr, "Problem with index file - invalid version.\n"); - return false; + fprintf(stderr, "Problem with index file - invalid version.\n"); + return false; } // if version is newer than can be supported by this version of bamtools else if ( m_inputVersion > m_outputVersion ) { - fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n"); - fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n"); - return false; + fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n"); + fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n"); + return false; } // ------------------------------------------------------------------ @@ -159,15 +159,15 @@ bool BamToolsIndex::CheckVersion(void) { // (typically whose format did not accomodate a particular bug fix) else if ( (Version)m_inputVersion == BTI_1_0 ) { - fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n"); - fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n"); - return false; + fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n"); + fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n"); + return false; } else if ( (Version)m_inputVersion == BTI_1_1 ) { - fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n"); - fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n"); - return false; + fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n"); + fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n"); + return false; } // otherwise ok @@ -179,8 +179,8 @@ void BamToolsIndex::ClearAllData(void) { BamToolsIndexData::const_iterator indexIter = m_indexData.begin(); BamToolsIndexData::const_iterator indexEnd = m_indexData.end(); for ( ; indexIter != indexEnd; ++indexIter ) { - const int& refId = (*indexIter).first; - ClearReferenceOffsets(refId); + const int& refId = (*indexIter).first; + ClearReferenceOffsets(refId); } } @@ -212,10 +212,10 @@ bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha // load index data for region if not already cached if ( !IsDataLoaded(region.LeftRefID) ) { - bool loadedOk = true; - loadedOk &= SkipToReference(region.LeftRefID); - loadedOk &= LoadReference(region.LeftRefID); - if ( !loadedOk ) return false; + bool loadedOk = true; + loadedOk &= SkipToReference(region.LeftRefID); + loadedOk &= LoadReference(region.LeftRefID); + if ( !loadedOk ) return false; } // localize index data for this reference (& sanity check that data actually exists) @@ -234,10 +234,10 @@ bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha vector::const_iterator offsetIter = referenceOffsets.begin(); vector::const_iterator offsetEnd = referenceOffsets.end(); for ( ; offsetIter != offsetEnd; ++offsetIter ) { - const BamToolsIndexEntry& entry = (*offsetIter); - // break if alignment 'entry' overlaps region - if ( entry.MaxEndPosition >= region.LeftPosition ) break; - offset = (*offsetIter).StartOffset; + const BamToolsIndexEntry& entry = (*offsetIter); + // break if alignment 'entry' overlaps region + if ( entry.MaxEndPosition >= region.LeftPosition ) break; + offset = (*offsetIter).StartOffset; } // set flag based on whether an index entry was found for this region @@ -245,7 +245,7 @@ bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha // if cache mode set to none, dump the data we just loaded if (m_cacheMode == BamIndex::NoIndexCaching ) - ClearReferenceOffsets(region.LeftRefID); + ClearReferenceOffsets(region.LeftRefID); // return success return true; @@ -253,7 +253,6 @@ bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* ha // returns whether reference has alignments or no bool BamToolsIndex::HasAlignments(const int& refId) const { - BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId); if ( indexIter == m_indexData.end()) return false; const BamToolsReferenceEntry& refEntry = (*indexIter).second; @@ -286,19 +285,19 @@ bool BamToolsIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) { // check valid BamReader state if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) { - fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n"); - return false; + fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n"); + return false; } // make sure left-bound position is valid if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength ) - return false; + return false; // calculate nearest offset to jump to int64_t offset; if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) { - fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n"); - return false; + fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n"); + return false; } // return success/failure of seek @@ -316,9 +315,9 @@ void BamToolsIndex::KeepOnlyReferenceOffsets(const int& refId) { BamToolsIndexData::iterator mapIter = m_indexData.begin(); BamToolsIndexData::iterator mapEnd = m_indexData.end(); for ( ; mapIter != mapEnd; ++mapIter ) { - const int entryRefId = (*mapIter).first; - if ( entryRefId != refId ) - ClearReferenceOffsets(entryRefId); + const int entryRefId = (*mapIter).first; + if ( entryRefId != refId ) + ClearReferenceOffsets(entryRefId); } } @@ -336,11 +335,11 @@ bool BamToolsIndex::LoadAllReferences(bool saveData) { // iterate over reference entries bool loadedOk = true; for ( int i = 0; i < numReferences; ++i ) - loadedOk &= LoadReference(i, saveData); + loadedOk &= LoadReference(i, saveData); // set flag if ( loadedOk && saveData ) - m_hasFullDataCache = true; + m_hasFullDataCache = true; // return success/failure of load return loadedOk; @@ -378,20 +377,20 @@ bool BamToolsIndex::LoadIndexEntry(const int& refId, bool saveData) { elementsRead += fread(&entry.StartOffset, sizeof(entry.StartOffset), 1, m_indexStream); elementsRead += fread(&entry.StartPosition, sizeof(entry.StartPosition), 1, m_indexStream); if ( elementsRead != 3 ) { - cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl; - return false; + cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl; + return false; } // swap endian-ness if necessary if ( m_isBigEndian ) { - SwapEndian_32(entry.MaxEndPosition); - SwapEndian_64(entry.StartOffset); - SwapEndian_32(entry.StartPosition); + SwapEndian_32(entry.MaxEndPosition); + SwapEndian_64(entry.StartOffset); + SwapEndian_32(entry.StartPosition); } // save data if ( saveData ) - SaveOffsetEntry(refId, entry); + SaveOffsetEntry(refId, entry); // return success/failure of load return true; @@ -419,7 +418,7 @@ bool BamToolsIndex::LoadReference(const int& refId, bool saveData) { // iterate over offset entries for ( unsigned int j = 0; j < numOffsets; ++j ) - LoadIndexEntry(refId, saveData); + LoadIndexEntry(refId, saveData); // return success/failure of load return true; @@ -455,7 +454,7 @@ void BamToolsIndex::SetOffsetCount(const int& refId, const int& offsetCount) { // initializes index data structure to hold @count references void BamToolsIndex::SetReferenceCount(const int& count) { for ( int i = 0; i < count; ++i ) - m_indexData[i].HasAlignments = false; + m_indexData[i].HasAlignments = false; } // position file pointer to first reference begin, return true if skipped OK @@ -480,8 +479,8 @@ bool BamToolsIndex::SkipToReference(const int& refId) { bool skippedOk = true; int currentRefId = 0; while (currentRefId != refId) { - skippedOk &= LoadReference(currentRefId, false); - ++currentRefId; + skippedOk &= LoadReference(currentRefId, false); + ++currentRefId; } // return success/failure of skip @@ -528,7 +527,7 @@ bool BamToolsIndex::WriteAllReferences(void) { BamToolsIndexData::const_iterator refIter = m_indexData.begin(); BamToolsIndexData::const_iterator refEnd = m_indexData.end(); for ( ; refIter != refEnd; ++refIter ) - refOk &= WriteReferenceEntry( (*refIter).second ); + refOk &= WriteReferenceEntry( (*refIter).second ); return ( (elementsWritten == 1) && refOk ); } @@ -548,7 +547,7 @@ bool BamToolsIndex::WriteReferenceEntry(const BamToolsReferenceEntry& refEntry) vector::const_iterator offsetIter = refEntry.Offsets.begin(); vector::const_iterator offsetEnd = refEntry.Offsets.end(); for ( ; offsetIter != offsetEnd; ++offsetIter ) - entriesOk &= WriteIndexEntry( (*offsetIter) ); + entriesOk &= WriteIndexEntry( (*offsetIter) ); return ( (elementsWritten == 1) && entriesOk ); } @@ -563,9 +562,9 @@ bool BamToolsIndex::WriteIndexEntry(const BamToolsIndexEntry& entry) { // swap endian-ness if necessary if ( m_isBigEndian ) { - SwapEndian_32(maxEndPosition); - SwapEndian_64(startOffset); - SwapEndian_32(startPosition); + SwapEndian_32(maxEndPosition); + SwapEndian_64(startOffset); + SwapEndian_32(startPosition); } // write the reference index entry