1 // ***************************************************************************
2 // BamStandardIndex.cpp (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 27 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the standardized BAM index format (".bai")
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 #include <api/internal/BamReader_p.h>
13 #include <api/internal/BamStandardIndex_p.h>
14 using namespace BamTools;
15 using namespace BamTools::Internal;
24 // static BamStandardIndex constants
25 const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
26 const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
27 const string BamStandardIndex::BAI_EXTENSION = ".bai";
28 const char* const BamStandardIndex::BAI_MAGIC = "BAI\1";
29 const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
30 const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
31 const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);
34 BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
37 , m_cacheMode(BamIndex::LimitedIndexCaching)
41 m_isBigEndian = BamTools::SystemIsBigEndian();
45 BamStandardIndex::~BamStandardIndex(void) {
49 bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
51 // retrieve references from reader
52 const RefVector& references = m_reader->GetReferenceData();
54 // make sure left-bound position is valid
55 if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
59 begin = (unsigned int)region.LeftPosition;
61 // if right bound specified AND left&right bounds are on same reference
62 // OK to use right bound position as region 'end'
63 if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
64 end = (unsigned int)region.RightPosition;
66 // otherwise, set region 'end' to last reference base
67 else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;
73 void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
75 set<uint16_t>& candidateBins)
77 // initialize list, bin '0' is always a valid bin
78 candidateBins.insert(0);
80 // get rest of bins that contain this region
82 for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }
83 for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }
84 for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }
85 for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }
86 for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
89 bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
90 const uint64_t& minOffset,
91 set<uint16_t>& candidateBins,
92 vector<int64_t>& offsets)
94 // attempt seek to first bin
95 if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )
98 // iterate over reference bins
100 int32_t numAlignmentChunks;
101 set<uint16_t>::iterator candidateBinIter;
102 for ( int i = 0; i < refSummary.NumBins; ++i ) {
104 // read bin contents (if successful, alignment chunks are now in m_buffer)
105 if ( !ReadBinIntoBuffer(binId, numAlignmentChunks) )
108 // see if bin is a 'candidate bin'
109 candidateBinIter = candidateBins.find(binId);
111 // if not, move on to next bin
112 if ( candidateBinIter == candidateBins.end() )
115 // otherwise, check bin's contents against for overlap
118 unsigned int offset = 0;
122 // iterate over alignment chunks
123 for (int j = 0; j < numAlignmentChunks; ++j ) {
125 // read chunk start & stop from buffer
126 memcpy((char*)&chunkStart, m_buffer+offset, sizeof(uint64_t));
127 offset += sizeof(uint64_t);
128 memcpy((char*)&chunkStop, m_buffer, sizeof(uint64_t));
129 offset += sizeof(uint64_t);
131 // swap endian-ness if necessary
132 if ( m_isBigEndian ) {
133 SwapEndian_64(chunkStart);
134 SwapEndian_64(chunkStop);
137 // store alignment chunk's start offset
138 // if its stop offset is larger than our 'minOffset'
139 if ( chunkStop > minOffset )
140 offsets.push_back(chunkStart);
143 // 'pop' bin ID from candidate bins set
144 candidateBins.erase(candidateBinIter);
146 // quit if no more candidates
147 if ( candidateBins.empty() )
152 // return success/failure on calculating at least 1 offset
153 return ( !offsets.empty() );
156 uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
157 const uint32_t& begin)
159 // if no linear offsets exist, return 0
160 if ( refSummary.NumLinearOffsets == 0 )
163 // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
164 // else use the offset corresponding to the requested start position
165 const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
166 if ( shiftedBegin >= refSummary.NumLinearOffsets )
167 return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
169 return LookupLinearOffset( refSummary, shiftedBegin );
172 void BamStandardIndex::CheckBufferSize(char*& buffer,
173 unsigned int& bufferLength,
174 const unsigned int& requestedBytes)
177 if ( requestedBytes > bufferLength ) {
178 bufferLength = requestedBytes + 10;
180 buffer = new char[bufferLength];
182 } catch ( std::bad_alloc ) {
183 cerr << "BamStandardIndex ERROR: out of memory when allocating "
184 << requestedBytes << " byes" << endl;
189 void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
190 unsigned int& bufferLength,
191 const unsigned int& requestedBytes)
194 if ( requestedBytes > bufferLength ) {
195 bufferLength = requestedBytes + 10;
197 buffer = new unsigned char[bufferLength];
199 } catch ( std::bad_alloc ) {
200 cerr << "BamStandardIndex ERROR: out of memory when allocating "
201 << requestedBytes << " byes" << endl;
206 bool BamStandardIndex::CheckMagicNumber(void) {
208 // check 'magic number' to see if file is BAI index
210 size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
211 if ( elementsRead != 4 ) {
212 cerr << "BamStandardIndex ERROR: could not read format 'magic number'" << endl;
216 // compare to expected value
217 if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 ) {
218 cerr << "BamStandardIndex ERROR: invalid format" << endl;
226 void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
228 refEntry.Bins.clear();
229 refEntry.LinearOffsets.clear();
232 void BamStandardIndex::CloseFile(void) {
236 fclose(m_indexStream);
238 // clear index file summary data
239 m_indexFileSummary.clear();
241 // clean up I/O buffer
247 // builds index from associated BAM file & writes out to index file
248 bool BamStandardIndex::Create(void) {
250 cerr << "Creating BAI..." << endl;
252 // return false if BamReader is invalid or not open
253 if ( m_reader == 0 || !m_reader->IsOpen() ) {
254 cerr << "BamStandardIndex ERROR: BamReader is not open"
255 << ", aborting index creation" << endl;
259 cerr << "BAM file is open" << endl;
262 if ( !m_reader->Rewind() ) {
263 cerr << "BamStandardIndex ERROR: could not rewind BamReader to create index"
264 << ", aborting index creation" << endl;
268 cerr << "BAM file is rewound" << endl;
270 // open new index file (read & write)
271 string indexFilename = m_reader->Filename() + Extension();
272 if ( !OpenFile(indexFilename, "w+b") ) {
273 cerr << "BamStandardIndex ERROR: could not open ouput index file: " << indexFilename
274 << ", aborting index creation" << endl;
278 // initialize BaiFileSummary with number of references
279 const int& numReferences = m_reader->GetReferenceCount();
280 ReserveForSummary(numReferences);
282 // initialize output file
283 bool createdOk = true;
284 createdOk &= WriteHeader();
286 // set up bin, ID, offset, & coordinate markers
287 const uint32_t defaultValue = 0xffffffffu;
288 uint32_t currentBin = defaultValue;
289 uint32_t lastBin = defaultValue;
290 int32_t currentRefID = defaultValue;
291 int32_t lastRefID = defaultValue;
292 uint64_t currentOffset = (uint64_t)m_reader->Tell();
293 uint64_t lastOffset = currentOffset;
294 int32_t lastPosition = defaultValue;
296 // iterate through alignments in BAM file
298 BaiReferenceEntry refEntry;
299 while ( m_reader->LoadNextAlignment(al) ) {
301 // changed to new reference
302 if ( lastRefID != al.RefID ) {
304 // if not first reference, save previous reference data
305 if ( lastRefID != (int32_t)defaultValue ) {
307 SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
308 createdOk &= WriteReferenceEntry(refEntry);
309 ClearReferenceEntry(refEntry);
311 // write any empty references between (but *NOT* including) lastRefID & al.RefID
312 for ( int i = lastRefID+1; i < al.RefID; ++i ) {
313 BaiReferenceEntry emptyEntry(i);
314 createdOk &= WriteReferenceEntry(emptyEntry);
317 // update bin markers
318 currentOffset = lastOffset;
321 currentRefID = al.RefID;
325 // write any empty references up to (but *NOT* including) al.RefID
327 for ( int i = 0; i < al.RefID; ++i ) {
328 BaiReferenceEntry emptyEntry(i);
329 createdOk &= WriteReferenceEntry(emptyEntry);
333 // update reference markers
334 refEntry.ID = al.RefID;
335 lastRefID = al.RefID;
336 lastBin = defaultValue;
339 // if lastPosition greater than current alignment position - file not sorted properly
340 else if ( lastPosition > al.Position ) {
341 cerr << "BamStandardIndex ERROR: BAM file is not properly sorted by coordinate"
342 << ", aborting index creation"
344 << "At alignment: " << al.Name
345 << " : previous position " << lastPosition
346 << " > this alignment position " << al.Position
347 << " on reference id: " << al.RefID << endl;
351 // if alignment's ref ID is valid & its bin is not a 'leaf'
352 if ( (al.RefID >= 0) && (al.Bin < 4681) )
353 SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
355 // changed to new BAI bin
356 if ( al.Bin != lastBin ) {
358 // if not first bin on reference, save previous bin data
359 if ( currentBin != defaultValue )
360 SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
363 currentOffset = lastOffset;
366 currentRefID = al.RefID;
368 // if invalid RefID, break out
369 if ( currentRefID < 0 )
373 // make sure that current file pointer is beyond lastOffset
374 if ( m_reader->Tell() <= (int64_t)lastOffset ) {
375 cerr << "BamStandardIndex ERROR: calculating offsets failed"
376 << ", aborting index creation" << endl;
380 // update lastOffset & lastPosition
381 lastOffset = m_reader->Tell();
382 lastPosition = al.Position;
385 // after finishing alignments, if any data was read, check:
386 if ( currentRefID >= 0 ) {
388 // store last alignment chunk to its bin, then write last reference entry with data
389 SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
390 createdOk &= WriteReferenceEntry(refEntry);
392 // then write any empty references remaining at end of file
393 for ( int i = currentRefID+1; i < numReferences; ++i ) {
394 BaiReferenceEntry emptyEntry(i);
395 createdOk &= WriteReferenceEntry(emptyEntry);
399 // rewind reader now that we're done building
400 createdOk &= m_reader->Rewind();
406 // returns format's file extension
407 const string BamStandardIndex::Extension(void) {
408 return BamStandardIndex::BAI_EXTENSION;
411 bool BamStandardIndex::GetOffsets(const BamRegion& region, vector<int64_t>& offsets) {
413 // cannot calculate offsets if unknown/invalid reference ID requested
414 if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
417 // retrieve index summary for left bound reference
418 const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
420 // set up region boundaries based on actual BamReader data
423 if ( !AdjustRegion(region, begin, end) )
426 // retrieve all candidate bin IDs for region
427 set<uint16_t> candidateBins;
428 CalculateCandidateBins(begin, end, candidateBins);
430 // use reference's linear offsets to calculate the minimum offset
431 // that must be considered to find overlap
432 const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
434 // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
435 if ( !CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets) )
438 // ensure that offsets are sorted before returning
439 sort( offsets.begin(), offsets.end() );
445 // returns whether reference has alignments or no
446 bool BamStandardIndex::HasAlignments(const int& referenceID) const {
447 if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
449 const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
450 return ( refSummary.NumBins > 0 );
453 bool BamStandardIndex::IsFileOpen(void) const {
454 return ( m_indexStream != 0 );
457 // attempts to use index data to jump to @region, returns success/fail
458 // a "successful" jump indicates no error, but not whether this region has data
459 // * thus, the method sets a flag to indicate whether there are alignments
460 // available after the jump position
461 bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
463 // skip if reader is not valid or is not open
464 if ( m_reader == 0 || !m_reader->IsOpen() )
467 // calculate offsets for this region
468 // if failed, print message, set flag, and return failure
469 vector<int64_t> offsets;
470 if ( !GetOffsets(region, offsets) ) {
471 cerr << "BamStandardIndex ERROR: could not jump"
472 << ", unable to retrieve offsets for region" << endl;
473 *hasAlignmentsInRegion = false;
477 // if no offsets retrieved, set flag
478 if ( offsets.empty() )
479 *hasAlignmentsInRegion = false;
481 // iterate through candidate offsets
484 vector<int64_t>::const_iterator offsetIter = offsets.begin();
485 vector<int64_t>::const_iterator offsetEnd = offsets.end();
486 for ( ; offsetIter != offsetEnd; ++offsetIter) {
488 // attempt seek & load first available alignment
489 // set flag to true if data exists
490 result &= m_reader->Seek(*offsetIter);
491 *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
493 // if this alignment corresponds to desired position
494 // return success of seeking back to the offset before the 'current offset' (to cover overlaps)
495 if ( ((al.RefID == region.LeftRefID) &&
496 ((al.Position + al.Length) > region.LeftPosition)) ||
497 (al.RefID > region.LeftRefID) )
499 if ( offsetIter != offsets.begin() )
501 return m_reader->Seek(*offsetIter);
505 // if error in jumping, print message & set flag
507 cerr << "BamStandardIndex ERROR: could not jump"
508 << ", there was a problem seeking in BAM file" << endl;
509 *hasAlignmentsInRegion = false;
512 // return success/failure
516 // loads existing data from file into memory
517 bool BamStandardIndex::Load(const std::string& filename) {
519 // attempt open index file (read-only)
520 if ( !OpenFile(filename, "rb") ) {
521 cerr << "BamStandardIndex ERROR: could not open input index file: " << filename
522 << ", aborting index load" << endl;
526 // if invalid format 'magic number', close & return failure
527 if ( !CheckMagicNumber() ) {
528 cerr << "BamStandardIndex ERROR: unexpected format for index file: " << filename
529 << ", aborting index load" << endl;
534 // attempt to load index file summary, return success/failure
535 if ( !SummarizeIndexFile() ) {
536 cerr << "BamStandardIndex ERROR: could not generate a summary of index file " << filename
537 << ", aborting index load" << endl;
542 // if we get here, index summary is loaded OK
546 uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
548 // attempt seek to proper index file position
549 const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
550 index*BamStandardIndex::SIZEOF_LINEAROFFSET;
551 if ( !Seek(linearOffsetFilePosition, SEEK_SET) )
554 // read linear offset from BAI file
555 uint64_t linearOffset(0);
556 if ( !ReadLinearOffset(linearOffset) )
561 void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
563 // skip if chunks are empty, nothing to merge
564 if ( chunks.empty() )
567 // set up merged alignment chunk container
568 BaiAlignmentChunkVector mergedChunks;
569 mergedChunks.push_back( chunks[0] );
571 // iterate over chunks
573 BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
574 BaiAlignmentChunkVector::iterator chunkEnd = chunks.end();
575 for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
577 // get 'currentMergeChunk' based on numeric index
578 BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
580 // get sourceChunk based on source vector iterator
581 BaiAlignmentChunk& sourceChunk = (*chunkIter);
583 // if currentMergeChunk ends where sourceChunk starts, then merge the two
584 if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
585 currentMergeChunk.Stop = sourceChunk.Stop;
589 // append sourceChunk after currentMergeChunk
590 mergedChunks.push_back(sourceChunk);
592 // update i, so the next iteration will consider the
593 // recently-appended sourceChunk as new mergeChunk candidate
598 // saved newly-merged chunks into (parameter) chunks
599 chunks = mergedChunks;
602 bool BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
604 // make sure any previous index file is closed
607 // attempt to open file
608 m_indexStream = fopen(filename.c_str(), mode);
612 bool BamStandardIndex::ReadBinID(uint32_t& binId) {
613 size_t elementsRead = 0;
614 elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
615 if ( m_isBigEndian ) SwapEndian_32(binId);
616 return ( elementsRead == 1 );
619 bool BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
624 readOk &= ReadBinID(binId);
625 readOk &= ReadNumAlignmentChunks(numAlignmentChunks);
628 const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
629 readOk &= ReadIntoBuffer(bytesRequested);
631 // return success/failure
635 bool BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
637 // ensure that our buffer is big enough for request
638 BamStandardIndex::CheckBufferSize(m_buffer, m_bufferLength, bytesRequested);
640 // read from BAI file stream
641 size_t bytesRead = fread( m_buffer, sizeof(char), bytesRequested, m_indexStream );
642 return ( bytesRead == (size_t)bytesRequested );
645 bool BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
646 size_t elementsRead = 0;
647 elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
648 if ( m_isBigEndian ) SwapEndian_64(linearOffset);
649 return ( elementsRead == 1 );
652 bool BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
653 size_t elementsRead = 0;
654 elementsRead += fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, m_indexStream);
655 if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
656 return ( elementsRead == 1 );
659 bool BamStandardIndex::ReadNumBins(int& numBins) {
660 size_t elementsRead = 0;
661 elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
662 if ( m_isBigEndian ) SwapEndian_32(numBins);
663 return ( elementsRead == 1 );
666 bool BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
667 size_t elementsRead = 0;
668 elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
669 if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
670 return ( elementsRead == 1 );
673 bool BamStandardIndex::ReadNumReferences(int& numReferences) {
674 size_t elementsRead = 0;
675 elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
676 if ( m_isBigEndian ) SwapEndian_32(numReferences);
677 return ( elementsRead == 1 );
680 void BamStandardIndex::ReserveForSummary(const int& numReferences) {
681 m_indexFileSummary.clear();
682 m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
685 void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
686 const uint32_t& currentBin,
687 const uint64_t& currentOffset,
688 const uint64_t& lastOffset)
690 // create new alignment chunk
691 BaiAlignmentChunk newChunk(currentOffset, lastOffset);
693 // if no entry exists yet for this bin, create one and store alignment chunk
694 BaiBinMap::iterator binIter = binMap.find(currentBin);
695 if ( binIter == binMap.end() ) {
696 BaiAlignmentChunkVector newChunks;
697 newChunks.push_back(newChunk);
698 binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
701 // otherwise, just append alignment chunk
703 BaiAlignmentChunkVector& binChunks = (*binIter).second;
704 binChunks.push_back( newChunk );
708 void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
709 BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
710 refSummary.NumBins = numBins;
711 refSummary.FirstBinFilePosition = Tell();
714 void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
715 const int& alignmentStartPosition,
716 const int& alignmentStopPosition,
717 const uint64_t& lastOffset)
719 // get converted offsets
720 const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
721 const int endOffset = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
723 // resize vector if necessary
724 int oldSize = offsets.size();
725 int newSize = endOffset + 1;
726 if ( oldSize < newSize )
727 offsets.resize(newSize, 0);
730 for( int i = beginOffset + 1; i <= endOffset; ++i ) {
731 if ( offsets[i] == 0 )
732 offsets[i] = lastOffset;
736 void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
737 BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
738 refSummary.NumLinearOffsets = numLinearOffsets;
739 refSummary.FirstLinearOffsetFilePosition = Tell();
742 // seek to position in index file stream
743 bool BamStandardIndex::Seek(const int64_t& position, const int& origin) {
744 return ( fseek64(m_indexStream, position, origin) == 0 );
747 // change the index caching behavior
748 void BamStandardIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {
750 // do nothing else here ? cache mode will be ignored from now on, most likely
753 bool BamStandardIndex::SkipBins(const int& numBins) {
755 int32_t numAlignmentChunks;
756 bool skippedOk = true;
757 for (int i = 0; i < numBins; ++i)
758 skippedOk &= ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
762 bool BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
763 const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
764 return ReadIntoBuffer(bytesRequested);
767 void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
768 sort( linearOffsets.begin(), linearOffsets.end() );
771 bool BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
773 // load number of bins
775 if ( !ReadNumBins(numBins) )
778 // store bins summary for this reference
779 refSummary.NumBins = numBins;
780 refSummary.FirstBinFilePosition = Tell();
782 // attempt skip reference bins, return success/failure
783 if ( !SkipBins(numBins) )
786 // if we get here, bin summarized OK
790 bool BamStandardIndex::SummarizeIndexFile(void) {
792 // load number of reference sequences
794 if ( !ReadNumReferences(numReferences) )
797 // initialize file summary data
798 ReserveForSummary(numReferences);
800 // iterate over reference entries
801 bool loadedOk = true;
802 BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
803 BaiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
804 for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
805 loadedOk &= SummarizeReference(*summaryIter);
811 bool BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
813 // load number of linear offsets
814 int numLinearOffsets;
815 if ( !ReadNumLinearOffsets(numLinearOffsets) )
818 // store bin summary data for this reference
819 refSummary.NumLinearOffsets = numLinearOffsets;
820 refSummary.FirstLinearOffsetFilePosition = Tell();
822 // skip linear offsets in index file
823 if ( !SkipLinearOffsets(numLinearOffsets) )
826 // if get here, linear offsets summarized OK
830 bool BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
831 bool loadedOk = true;
832 loadedOk &= SummarizeBins(refSummary);
833 loadedOk &= SummarizeLinearOffsets(refSummary);
837 // return position of file pointer in index file stream
838 int64_t BamStandardIndex::Tell(void) const {
839 return ftell64(m_indexStream);
842 bool BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
844 size_t elementsWritten = 0;
846 // localize alignment chunk offsets
847 uint64_t start = chunk.Start;
848 uint64_t stop = chunk.Stop;
850 // swap endian-ness if necessary
851 if ( m_isBigEndian ) {
852 SwapEndian_64(start);
856 // write to index file
857 elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
858 elementsWritten += fwrite(&stop, sizeof(stop), 1, m_indexStream);
860 // return success/failure of write
861 return ( elementsWritten == 2 );
864 bool BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
866 // make sure chunks are merged (simplified) before writing & saving summary
867 MergeAlignmentChunks(chunks);
869 size_t elementsWritten = 0;
872 int32_t chunkCount = chunks.size();
873 if ( m_isBigEndian ) SwapEndian_32(chunkCount);
874 elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
876 // iterate over chunks
877 bool chunksOk = true;
878 BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
879 BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();
880 for ( ; chunkIter != chunkEnd; ++chunkIter )
881 chunksOk &= WriteAlignmentChunk( (*chunkIter) );
883 // return success/failure of write
884 return ( (elementsWritten == 1) && chunksOk );
887 bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
889 size_t elementsWritten = 0;
892 uint32_t binKey = binId;
893 if ( m_isBigEndian ) SwapEndian_32(binKey);
894 elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
896 // write bin's alignment chunks
897 bool chunksOk = WriteAlignmentChunks(chunks);
899 // return success/failure of write
900 return ( (elementsWritten == 1) && chunksOk );
903 bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
905 size_t elementsWritten = 0;
907 // write number of bins
908 int32_t binCount = bins.size();
909 if ( m_isBigEndian ) SwapEndian_32(binCount);
910 elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
912 // save summary for reference's bins
913 SaveBinsSummary(refId, bins.size());
917 BaiBinMap::iterator binIter = bins.begin();
918 BaiBinMap::iterator binEnd = bins.end();
919 for ( ; binIter != binEnd; ++binIter )
920 binsOk &= WriteBin( (*binIter).first, (*binIter).second );
922 // return success/failure of write
923 return ( (elementsWritten == 1) && binsOk );
926 bool BamStandardIndex::WriteHeader(void) {
928 size_t elementsWritten = 0;
930 // write magic number
931 elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);
933 // write number of reference sequences
934 int32_t numReferences = m_indexFileSummary.size();
935 if ( m_isBigEndian ) SwapEndian_32(numReferences);
936 elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
938 // return success/failure of write
939 return (elementsWritten == 5);
942 bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
944 // make sure linear offsets are sorted before writing & saving summary
945 SortLinearOffsets(linearOffsets);
947 size_t elementsWritten = 0;
949 // write number of linear offsets
950 int32_t offsetCount = linearOffsets.size();
951 if ( m_isBigEndian ) SwapEndian_32(offsetCount);
952 elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
954 // save summary for reference's linear offsets
955 SaveLinearOffsetsSummary(refId, linearOffsets.size());
957 // iterate over linear offsets
958 BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
959 BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();
960 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
962 // write linear offset
963 uint64_t linearOffset = (*offsetIter);
964 if ( m_isBigEndian ) SwapEndian_64(linearOffset);
965 elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
968 // return success/failure of write
969 return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );
972 bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
974 refOk &= WriteBins(refEntry.ID, refEntry.Bins);
975 refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);