1 // ***************************************************************************
2 // BamIndex.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 13 July 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the default (standardized) BAM
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index
11 // ***************************************************************************
19 #include "BamReader.h"
22 using namespace BamTools;
24 // -------------------------------
25 // BamIndex implementation
27 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian)
30 , m_isBigEndian(isBigEndian)
32 if ( m_reader && m_reader->IsOpen() )
33 m_references = m_reader->GetReferenceData();
36 bool BamIndex::HasAlignments(const int& referenceID) {
38 // return false if invalid ID
39 if ( (referenceID < 0) || (referenceID >= (int)m_references.size()) )
42 // else return status of reference (has alignments?)
44 return m_references.at(referenceID).RefHasAlignments;
47 // #########################################################################################
48 // #########################################################################################
50 // -------------------------------
51 // BamDefaultIndex structs & typedefs
62 Chunk(const uint64_t& start = 0,
63 const uint64_t& stop = 0)
69 bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
70 return lhs.Start < rhs.Start;
73 typedef vector<Chunk> ChunkVector;
74 typedef map<uint32_t, ChunkVector> BamBinMap;
75 typedef vector<uint64_t> LinearOffsetVector;
77 struct ReferenceIndex {
81 LinearOffsetVector Offsets;
84 ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
85 const LinearOffsetVector& offsets = LinearOffsetVector())
91 typedef vector<ReferenceIndex> BamDefaultIndexData;
93 } // namespace BamTools
95 // -------------------------------
96 // BamDefaultIndex implementation
98 struct BamDefaultIndex::BamDefaultIndexPrivate {
100 // -------------------------
103 BamDefaultIndexData m_indexData;
104 BamDefaultIndex* m_parent;
106 // -------------------------
109 BamDefaultIndexPrivate(BamDefaultIndex* parent) : m_parent(parent) { }
110 ~BamDefaultIndexPrivate(void) { }
112 // -------------------------
115 // calculate bins that overlap region
116 int BinsFromRegion(const BamTools::BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[BamTools::MAX_BIN]);
117 // saves BAM bin entry for index
118 void InsertBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset);
119 // saves linear offset entry for index
120 void InsertLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset);
121 // simplifies index by merging 'chunks'
122 void MergeChunks(void);
126 BamDefaultIndex::BamDefaultIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
127 : BamIndex(bgzf, reader, isBigEndian)
129 d = new BamDefaultIndexPrivate(this);
132 BamDefaultIndex::~BamDefaultIndex(void) {
133 d->m_indexData.clear();
138 // calculate bins that overlap region
139 int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) {
141 // get region boundaries
142 uint32_t begin = (unsigned int)region.LeftPosition;
145 // if right bound specified AND left&right bounds are on same reference
146 // OK to use right bound position
147 if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
148 end = (unsigned int)region.RightPosition;
150 // otherwise, use end of left bound reference as cutoff
152 end = (unsigned int)m_parent->m_references.at(region.LeftRefID).RefLength - 1;
154 // initialize list, bin '0' always a valid bin
158 // get rest of bins that contain this region
160 for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { bins[i++] = k; }
161 for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { bins[i++] = k; }
162 for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { bins[i++] = k; }
163 for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { bins[i++] = k; }
164 for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { bins[i++] = k; }
166 // return number of bins stored
170 bool BamDefaultIndex::Build(void) {
172 // be sure reader & BGZF file are valid & open for reading
173 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
176 // move file pointer to beginning of alignments
179 // get reference count, reserve index space
180 int numReferences = (int)m_references.size();
181 for ( int i = 0; i < numReferences; ++i ) {
182 d->m_indexData.push_back(ReferenceIndex());
185 // sets default constant for bin, ID, offset, coordinate variables
186 const uint32_t defaultValue = 0xffffffffu;
189 uint32_t saveBin(defaultValue);
190 uint32_t lastBin(defaultValue);
193 int32_t saveRefID(defaultValue);
194 int32_t lastRefID(defaultValue);
197 uint64_t saveOffset = m_BGZF->Tell();
198 uint64_t lastOffset = saveOffset;
201 int32_t lastCoordinate = defaultValue;
203 BamAlignment bAlignment;
204 while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
206 // change of chromosome, save ID, reset bin
207 if ( lastRefID != bAlignment.RefID ) {
208 lastRefID = bAlignment.RefID;
209 lastBin = defaultValue;
212 // if lastCoordinate greater than BAM position - file not sorted properly
213 else if ( lastCoordinate > bAlignment.Position ) {
214 printf("BAM file not properly sorted:\n");
215 printf("Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), lastCoordinate, bAlignment.Position, bAlignment.RefID);
219 // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
220 if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
222 // save linear offset entry (matched to BAM entry refID)
223 ReferenceIndex& refIndex = d->m_indexData.at(bAlignment.RefID);
224 LinearOffsetVector& offsets = refIndex.Offsets;
225 d->InsertLinearOffset(offsets, bAlignment, lastOffset);
228 // if current BamAlignment bin != lastBin, "then possibly write the binning index"
229 if ( bAlignment.Bin != lastBin ) {
231 // if not first time through
232 if ( saveBin != defaultValue ) {
234 // save Bam bin entry
235 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
236 BamBinMap& binMap = refIndex.Bins;
237 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
241 saveOffset = lastOffset;
244 saveBin = bAlignment.Bin;
245 lastBin = bAlignment.Bin;
248 saveRefID = bAlignment.RefID;
250 // if invalid RefID, break out (why?)
251 if ( saveRefID < 0 ) { break; }
254 // make sure that current file pointer is beyond lastOffset
255 if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
256 printf("Error in BGZF offsets.\n");
261 lastOffset = m_BGZF->Tell();
263 // update lastCoordinate
264 lastCoordinate = bAlignment.Position;
267 // save any leftover BAM data (as long as refID is valid)
268 if ( saveRefID >= 0 ) {
269 // save Bam bin entry
270 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
271 BamBinMap& binMap = refIndex.Bins;
272 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
275 // simplify index by merging chunks
278 // iterate through references in index
279 // store whether reference has data &
280 // sort offsets in linear offset vector
281 BamDefaultIndexData::iterator indexIter = d->m_indexData.begin();
282 BamDefaultIndexData::iterator indexEnd = d->m_indexData.end();
283 for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
285 // get reference index data
286 ReferenceIndex& refIndex = (*indexIter);
287 BamBinMap& binMap = refIndex.Bins;
288 LinearOffsetVector& offsets = refIndex.Offsets;
290 // store whether reference has alignments or no
291 m_references[i].RefHasAlignments = ( binMap.size() > 0 );
293 // sort linear offsets
294 sort(offsets.begin(), offsets.end());
297 // rewind file pointer to beginning of alignments, return success/fail
298 return m_reader->Rewind();
301 bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
303 // calculate which bins overlap this region
304 uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
305 int numBins = d->BinsFromRegion(region, isRightBoundSpecified, bins);
307 // get bins for this reference
308 const ReferenceIndex& refIndex = d->m_indexData.at(region.LeftRefID);
309 const BamBinMap& binMap = refIndex.Bins;
311 // get minimum offset to consider
312 const LinearOffsetVector& linearOffsets = refIndex.Offsets;
313 uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() ) ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
315 // store all alignment 'chunk' starts (file offsets) for bins in this region
316 for ( int i = 0; i < numBins; ++i ) {
318 const uint16_t binKey = bins[i];
319 map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
320 if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
322 const ChunkVector& chunks = (*binIter).second;
323 std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
324 std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
325 for ( ; chunksIter != chunksEnd; ++chunksIter) {
327 // if valid chunk found, store its file offset
328 const Chunk& chunk = (*chunksIter);
329 if ( chunk.Stop > minOffset )
330 offsets.push_back( chunk.Start );
338 // sort the offsets before returning
339 sort(offsets.begin(), offsets.end());
341 // return whether any offsets were found
342 return ( offsets.size() != 0 );
345 // saves BAM bin entry for index
346 void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& binMap,
347 const uint32_t& saveBin,
348 const uint64_t& saveOffset,
349 const uint64_t& lastOffset)
352 BamBinMap::iterator binIter = binMap.find(saveBin);
355 Chunk newChunk(saveOffset, lastOffset);
357 // if entry doesn't exist
358 if ( binIter == binMap.end() ) {
359 ChunkVector newChunks;
360 newChunks.push_back(newChunk);
361 binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
366 ChunkVector& binChunks = (*binIter).second;
367 binChunks.push_back( newChunk );
371 // saves linear offset entry for index
372 void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
373 const BamAlignment& bAlignment,
374 const uint64_t& lastOffset)
376 // get converted offsets
377 int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
378 int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
380 // resize vector if necessary
381 int oldSize = offsets.size();
382 int newSize = endOffset + 1;
383 if ( oldSize < newSize )
384 offsets.resize(newSize, 0);
387 for( int i = beginOffset + 1; i <= endOffset; ++i ) {
388 if ( offsets[i] == 0 )
389 offsets[i] = lastOffset;
393 bool BamDefaultIndex::Load(const string& filename) {
395 // open index file, abort on error
396 FILE* indexStream = fopen(filename.c_str(), "rb");
398 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
402 // set placeholder to receive input byte count (suppresses compiler warnings)
403 size_t elementsRead = 0;
405 // see if index is valid BAM index
407 elementsRead = fread(magic, 1, 4, indexStream);
408 if ( strncmp(magic, "BAI\1", 4) ) {
409 printf("Problem with index file - invalid format.\n");
414 // get number of reference sequences
416 elementsRead = fread(&numRefSeqs, 4, 1, indexStream);
417 if ( m_isBigEndian ) { SwapEndian_32(numRefSeqs); }
419 // intialize space for BamDefaultIndexData data structure
420 d->m_indexData.reserve(numRefSeqs);
422 // iterate over reference sequences
423 for ( unsigned int i = 0; i < numRefSeqs; ++i ) {
425 // get number of bins for this reference sequence
427 elementsRead = fread(&numBins, 4, 1, indexStream);
428 if ( m_isBigEndian ) { SwapEndian_32(numBins); }
431 RefData& refEntry = m_references[i];
432 refEntry.RefHasAlignments = true;
435 // intialize BinVector
438 // iterate over bins for that reference sequence
439 for ( int j = 0; j < numBins; ++j ) {
443 elementsRead = fread(&binID, 4, 1, indexStream);
445 // get number of regionChunks in this bin
447 elementsRead = fread(&numChunks, 4, 1, indexStream);
449 if ( m_isBigEndian ) {
450 SwapEndian_32(binID);
451 SwapEndian_32(numChunks);
454 // intialize ChunkVector
455 ChunkVector regionChunks;
456 regionChunks.reserve(numChunks);
458 // iterate over regionChunks in this bin
459 for ( unsigned int k = 0; k < numChunks; ++k ) {
461 // get chunk boundaries (left, right)
464 elementsRead = fread(&left, 8, 1, indexStream);
465 elementsRead = fread(&right, 8, 1, indexStream);
467 if ( m_isBigEndian ) {
469 SwapEndian_64(right);
473 regionChunks.push_back( Chunk(left, right) );
476 // sort chunks for this bin
477 sort( regionChunks.begin(), regionChunks.end(), ChunkLessThan );
479 // save binID, chunkVector for this bin
480 binMap.insert( pair<uint32_t, ChunkVector>(binID, regionChunks) );
483 // load linear index for this reference sequence
485 // get number of linear offsets
486 int32_t numLinearOffsets;
487 elementsRead = fread(&numLinearOffsets, 4, 1, indexStream);
488 if ( m_isBigEndian ) { SwapEndian_32(numLinearOffsets); }
490 // intialize LinearOffsetVector
491 LinearOffsetVector offsets;
492 offsets.reserve(numLinearOffsets);
494 // iterate over linear offsets for this reference sequeence
495 uint64_t linearOffset;
496 for ( int j = 0; j < numLinearOffsets; ++j ) {
497 // read a linear offset & store
498 elementsRead = fread(&linearOffset, 8, 1, indexStream);
499 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
500 offsets.push_back(linearOffset);
503 // sort linear offsets
504 sort( offsets.begin(), offsets.end() );
506 // store index data for that reference sequence
507 d->m_indexData.push_back( ReferenceIndex(binMap, offsets) );
510 // close index file (.bai) and return
515 // merges 'alignment chunks' in BAM bin (used for index building)
516 void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) {
518 // iterate over reference enties
519 BamDefaultIndexData::iterator indexIter = m_indexData.begin();
520 BamDefaultIndexData::iterator indexEnd = m_indexData.end();
521 for ( ; indexIter != indexEnd; ++indexIter ) {
523 // get BAM bin map for this reference
524 ReferenceIndex& refIndex = (*indexIter);
525 BamBinMap& bamBinMap = refIndex.Bins;
527 // iterate over BAM bins
528 BamBinMap::iterator binIter = bamBinMap.begin();
529 BamBinMap::iterator binEnd = bamBinMap.end();
530 for ( ; binIter != binEnd; ++binIter ) {
532 // get chunk vector for this bin
533 ChunkVector& binChunks = (*binIter).second;
534 if ( binChunks.size() == 0 ) { continue; }
536 ChunkVector mergedChunks;
537 mergedChunks.push_back( binChunks[0] );
539 // iterate over chunks
541 ChunkVector::iterator chunkIter = binChunks.begin();
542 ChunkVector::iterator chunkEnd = binChunks.end();
543 for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
545 // get 'currentChunk' based on numeric index
546 Chunk& currentChunk = mergedChunks[i];
548 // get iteratorChunk based on vector iterator
549 Chunk& iteratorChunk = (*chunkIter);
551 // if currentChunk.Stop(shifted) == iterator Chunk.Start(shifted)
552 if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) {
554 // set currentChunk.Stop to iteratorChunk.Stop
555 currentChunk.Stop = iteratorChunk.Stop;
560 // set currentChunk + 1 to iteratorChunk
561 mergedChunks.push_back(iteratorChunk);
566 // saved merged chunk vector
567 (*binIter).second = mergedChunks;
572 // writes in-memory index data out to file
573 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
574 bool BamDefaultIndex::Write(const std::string& bamFilename) {
576 string indexFilename = bamFilename + ".bai";
577 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
578 if ( indexStream == 0 ) {
579 printf("ERROR: Could not open file to save index.\n");
583 // write BAM index header
584 fwrite("BAI\1", 1, 4, indexStream);
586 // write number of reference sequences
587 int32_t numReferenceSeqs = d->m_indexData.size();
588 if ( m_isBigEndian ) { SwapEndian_32(numReferenceSeqs); }
589 fwrite(&numReferenceSeqs, 4, 1, indexStream);
591 // iterate over reference sequences
592 BamDefaultIndexData::const_iterator indexIter = d->m_indexData.begin();
593 BamDefaultIndexData::const_iterator indexEnd = d->m_indexData.end();
594 for ( ; indexIter != indexEnd; ++ indexIter ) {
596 // get reference index data
597 const ReferenceIndex& refIndex = (*indexIter);
598 const BamBinMap& binMap = refIndex.Bins;
599 const LinearOffsetVector& offsets = refIndex.Offsets;
601 // write number of bins
602 int32_t binCount = binMap.size();
603 if ( m_isBigEndian ) { SwapEndian_32(binCount); }
604 fwrite(&binCount, 4, 1, indexStream);
607 BamBinMap::const_iterator binIter = binMap.begin();
608 BamBinMap::const_iterator binEnd = binMap.end();
609 for ( ; binIter != binEnd; ++binIter ) {
611 // get bin data (key and chunk vector)
612 uint32_t binKey = (*binIter).first;
613 const ChunkVector& binChunks = (*binIter).second;
616 if ( m_isBigEndian ) { SwapEndian_32(binKey); }
617 fwrite(&binKey, 4, 1, indexStream);
620 int32_t chunkCount = binChunks.size();
621 if ( m_isBigEndian ) { SwapEndian_32(chunkCount); }
622 fwrite(&chunkCount, 4, 1, indexStream);
624 // iterate over chunks
625 ChunkVector::const_iterator chunkIter = binChunks.begin();
626 ChunkVector::const_iterator chunkEnd = binChunks.end();
627 for ( ; chunkIter != chunkEnd; ++chunkIter ) {
629 // get current chunk data
630 const Chunk& chunk = (*chunkIter);
631 uint64_t start = chunk.Start;
632 uint64_t stop = chunk.Stop;
634 if ( m_isBigEndian ) {
635 SwapEndian_64(start);
639 // save chunk offsets
640 fwrite(&start, 8, 1, indexStream);
641 fwrite(&stop, 8, 1, indexStream);
645 // write linear offsets size
646 int32_t offsetSize = offsets.size();
647 if ( m_isBigEndian ) { SwapEndian_32(offsetSize); }
648 fwrite(&offsetSize, 4, 1, indexStream);
650 // iterate over linear offsets
651 LinearOffsetVector::const_iterator offsetIter = offsets.begin();
652 LinearOffsetVector::const_iterator offsetEnd = offsets.end();
653 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
655 // write linear offset value
656 uint64_t linearOffset = (*offsetIter);
657 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
658 fwrite(&linearOffset, 8, 1, indexStream);
662 // flush buffer, close file, and return success
668 // #########################################################################################
669 // #########################################################################################
671 // -------------------------------------
672 // BamToolsIndex implementation
676 struct BamToolsIndexEntry {
684 BamToolsIndexEntry(const uint64_t& offset = 0,
686 const int& position = -1)
693 typedef vector<BamToolsIndexEntry> BamToolsIndexData;
695 } // namespace BamTools
697 struct BamToolsIndex::BamToolsIndexPrivate {
699 // -------------------------
701 BamToolsIndexData m_indexData;
702 BamToolsIndex* m_parent;
705 // -------------------------
708 BamToolsIndexPrivate(BamToolsIndex* parent)
713 ~BamToolsIndexPrivate(void) { }
715 // -------------------------
719 BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
720 : BamIndex(bgzf, reader, isBigEndian)
722 d = new BamToolsIndexPrivate(this);
725 BamToolsIndex::~BamToolsIndex(void) {
730 bool BamToolsIndex::Build(void) {
732 // be sure reader & BGZF file are valid & open for reading
733 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
736 // move file pointer to beginning of alignments
739 // plow through alignments, store block offsets
740 int32_t currentBlockCount = 0;
741 int64_t blockStartOffset = m_BGZF->Tell();
742 int blockStartId = -1;
743 int blockStartPosition = -1;
745 while ( m_reader->GetNextAlignmentCore(al) ) {
747 // set reference flag
748 m_references[al.RefID].RefHasAlignments = true;
750 // if beginning of block, save first alignment's refID & position
751 if ( currentBlockCount == 0 ) {
752 blockStartId = al.RefID;
753 blockStartPosition = al.Position;
756 // increment block counter
759 // if block is full, get offset for next block, reset currentBlockCount
760 if ( currentBlockCount == d->m_blockSize ) {
762 d->m_indexData.push_back( BamToolsIndexEntry(blockStartOffset, blockStartId, blockStartPosition) );
763 blockStartOffset = m_BGZF->Tell();
764 currentBlockCount = 0;
768 return m_reader->Rewind();
771 // N.B. - ignores isRightBoundSpecified
772 bool BamToolsIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
774 // return false if no index data present
775 if ( d->m_indexData.empty() ) return false;
777 // clear any prior data
780 // calculate nearest index to jump to
781 int64_t previousOffset = -1;
782 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
783 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
784 for ( ; indexIter != indexEnd; ++indexIter ) {
786 const BamToolsIndexEntry& entry = (*indexIter);
788 // check if we are 'past' beginning of desired region
789 // if so, we will break out & use previously stored offset
790 if ( entry.RefID > region.LeftRefID ) break;
791 if ( (entry.RefID == region.LeftRefID) && (entry.Position > region.LeftPosition) ) break;
793 // not past desired region, so store current entry offset in previousOffset
794 previousOffset = entry.Offset;
797 // no index was found
798 if ( previousOffset == -1 )
801 // store offset & return success
802 offsets.push_back(previousOffset);
806 bool BamToolsIndex::Load(const string& filename) {
808 // open index file, abort on error
809 FILE* indexStream = fopen(filename.c_str(), "rb");
811 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
815 // set placeholder to receive input byte count (suppresses compiler warnings)
816 size_t elementsRead = 0;
818 // see if index is valid BAM index
820 elementsRead = fread(magic, 1, 4, indexStream);
821 if ( strncmp(magic, "BTI\1", 4) ) {
822 printf("Problem with index file - invalid format.\n");
827 // read in block size
828 elementsRead = fread(&d->m_blockSize, sizeof(d->m_blockSize), 1, indexStream);
829 if ( m_isBigEndian ) { SwapEndian_32(d->m_blockSize); }
831 // read in number of offsets
833 elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, indexStream);
834 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
836 // reserve space for index data
837 d->m_indexData.reserve(numOffsets);
839 // iterate over index entries
840 for ( unsigned int i = 0; i < numOffsets; ++i ) {
847 elementsRead = fread(&offset, sizeof(offset), 1, indexStream);
848 elementsRead = fread(&id, sizeof(id), 1, indexStream);
849 elementsRead = fread(&position, sizeof(position), 1, indexStream);
851 // swap endian-ness if necessary
852 if ( m_isBigEndian ) {
853 SwapEndian_64(offset);
855 SwapEndian_32(position);
858 // save reference index entry
859 d->m_indexData.push_back( BamToolsIndexEntry(offset, id, position) );
861 // set reference flag
862 m_references[id].RefHasAlignments = true; // what about sparse references? wont be able to set flag?
865 // close index file and return
870 // writes in-memory index data out to file
871 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
872 bool BamToolsIndex::Write(const std::string& bamFilename) {
874 string indexFilename = bamFilename + ".bti";
875 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
876 if ( indexStream == 0 ) {
877 printf("ERROR: Could not open file to save index.\n");
881 // write BAM index header
882 fwrite("BTI\1", 1, 4, indexStream);
885 int32_t blockSize = d->m_blockSize;
886 if ( m_isBigEndian ) { SwapEndian_32(blockSize); }
887 fwrite(&blockSize, sizeof(blockSize), 1, indexStream);
889 // write number of offset entries
890 uint32_t numOffsets = d->m_indexData.size();
891 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
892 fwrite(&numOffsets, sizeof(numOffsets), 1, indexStream);
894 // iterate over offset entries
895 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
896 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
897 for ( ; indexIter != indexEnd; ++ indexIter ) {
899 // get reference index data
900 const BamToolsIndexEntry& entry = (*indexIter);
903 uint64_t offset = entry.Offset;
904 int id = entry.RefID;
905 int position = entry.Position;
907 // swap endian-ness if necessary
908 if ( m_isBigEndian ) {
909 SwapEndian_64(offset);
911 SwapEndian_32(position);
914 // write the reference index entry
915 fwrite(&offset, sizeof(offset), 1, indexStream);
916 fwrite(&id, sizeof(id), 1, indexStream);
917 fwrite(&position, sizeof(position), 1, indexStream);
920 // flush file buffer, close file, and return success