1 // ***************************************************************************
2 // BamIndex.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 17 August 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the default (standardized) BAM
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index
11 // ***************************************************************************
16 // #include <iostream>
19 #include "BamReader.h"
22 using namespace BamTools;
24 // -------------------------------
25 // BamIndex implementation
27 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian)
30 , m_isBigEndian(isBigEndian)
32 if ( m_reader && m_reader->IsOpen() )
33 m_references = m_reader->GetReferenceData();
36 bool BamIndex::HasAlignments(const int& referenceID) {
38 // return false if invalid ID
39 if ( (referenceID < 0) || (referenceID >= (int)m_references.size()) )
42 // else return status of reference (has alignments?)
44 return m_references.at(referenceID).RefHasAlignments;
47 // #########################################################################################
48 // #########################################################################################
50 // -------------------------------
51 // BamDefaultIndex structs & typedefs
55 // --------------------------------------------------
56 // BamDefaultIndex data structures & typedefs
64 Chunk(const uint64_t& start = 0,
65 const uint64_t& stop = 0)
71 bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
72 return lhs.Start < rhs.Start;
75 typedef vector<Chunk> ChunkVector;
76 typedef map<uint32_t, ChunkVector> BamBinMap;
77 typedef vector<uint64_t> LinearOffsetVector;
79 struct ReferenceIndex {
83 LinearOffsetVector Offsets;
86 ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
87 const LinearOffsetVector& offsets = LinearOffsetVector())
93 typedef vector<ReferenceIndex> BamDefaultIndexData;
95 } // namespace BamTools
97 // -------------------------------
98 // BamDefaultIndex implementation
100 struct BamDefaultIndex::BamDefaultIndexPrivate {
102 // -------------------------
105 BamDefaultIndexData m_indexData;
106 BamDefaultIndex* m_parent;
108 // -------------------------
111 BamDefaultIndexPrivate(BamDefaultIndex* parent) : m_parent(parent) { }
112 ~BamDefaultIndexPrivate(void) { }
114 // -------------------------
117 // calculate bins that overlap region
118 int BinsFromRegion(const BamTools::BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[BamTools::MAX_BIN]);
119 // saves BAM bin entry for index
120 void InsertBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset);
121 // saves linear offset entry for index
122 void InsertLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset);
123 // simplifies index by merging 'chunks'
124 void MergeChunks(void);
128 BamDefaultIndex::BamDefaultIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
129 : BamIndex(bgzf, reader, isBigEndian)
131 d = new BamDefaultIndexPrivate(this);
134 BamDefaultIndex::~BamDefaultIndex(void) {
135 d->m_indexData.clear();
140 // calculate bins that overlap region
141 int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) {
143 // get region boundaries
144 uint32_t begin = (unsigned int)region.LeftPosition;
147 // if right bound specified AND left&right bounds are on same reference
148 // OK to use right bound position
149 if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
150 end = (unsigned int)region.RightPosition;
152 // otherwise, use end of left bound reference as cutoff
154 end = (unsigned int)m_parent->m_references.at(region.LeftRefID).RefLength - 1;
156 // initialize list, bin '0' always a valid bin
160 // get rest of bins that contain this region
162 for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { bins[i++] = k; }
163 for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { bins[i++] = k; }
164 for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { bins[i++] = k; }
165 for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { bins[i++] = k; }
166 for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { bins[i++] = k; }
168 // return number of bins stored
172 bool BamDefaultIndex::Build(void) {
174 // be sure reader & BGZF file are valid & open for reading
175 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
178 // move file pointer to beginning of alignments
181 // get reference count, reserve index space
182 int numReferences = (int)m_references.size();
183 for ( int i = 0; i < numReferences; ++i ) {
184 d->m_indexData.push_back(ReferenceIndex());
187 // sets default constant for bin, ID, offset, coordinate variables
188 const uint32_t defaultValue = 0xffffffffu;
191 uint32_t saveBin(defaultValue);
192 uint32_t lastBin(defaultValue);
195 int32_t saveRefID(defaultValue);
196 int32_t lastRefID(defaultValue);
199 uint64_t saveOffset = m_BGZF->Tell();
200 uint64_t lastOffset = saveOffset;
203 int32_t lastCoordinate = defaultValue;
205 BamAlignment bAlignment;
206 while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
208 // change of chromosome, save ID, reset bin
209 if ( lastRefID != bAlignment.RefID ) {
210 lastRefID = bAlignment.RefID;
211 lastBin = defaultValue;
214 // if lastCoordinate greater than BAM position - file not sorted properly
215 else if ( lastCoordinate > bAlignment.Position ) {
216 printf("BAM file not properly sorted:\n");
217 printf("Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), lastCoordinate, bAlignment.Position, bAlignment.RefID);
221 // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
222 if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
224 // save linear offset entry (matched to BAM entry refID)
225 ReferenceIndex& refIndex = d->m_indexData.at(bAlignment.RefID);
226 LinearOffsetVector& offsets = refIndex.Offsets;
227 d->InsertLinearOffset(offsets, bAlignment, lastOffset);
230 // if current BamAlignment bin != lastBin, "then possibly write the binning index"
231 if ( bAlignment.Bin != lastBin ) {
233 // if not first time through
234 if ( saveBin != defaultValue ) {
236 // save Bam bin entry
237 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
238 BamBinMap& binMap = refIndex.Bins;
239 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
243 saveOffset = lastOffset;
246 saveBin = bAlignment.Bin;
247 lastBin = bAlignment.Bin;
250 saveRefID = bAlignment.RefID;
252 // if invalid RefID, break out (why?)
253 if ( saveRefID < 0 ) { break; }
256 // make sure that current file pointer is beyond lastOffset
257 if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
258 printf("Error in BGZF offsets.\n");
263 lastOffset = m_BGZF->Tell();
265 // update lastCoordinate
266 lastCoordinate = bAlignment.Position;
269 // save any leftover BAM data (as long as refID is valid)
270 if ( saveRefID >= 0 ) {
271 // save Bam bin entry
272 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
273 BamBinMap& binMap = refIndex.Bins;
274 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
277 // simplify index by merging chunks
280 // iterate through references in index
281 // store whether reference has data &
282 // sort offsets in linear offset vector
283 BamDefaultIndexData::iterator indexIter = d->m_indexData.begin();
284 BamDefaultIndexData::iterator indexEnd = d->m_indexData.end();
285 for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
287 // get reference index data
288 ReferenceIndex& refIndex = (*indexIter);
289 BamBinMap& binMap = refIndex.Bins;
290 LinearOffsetVector& offsets = refIndex.Offsets;
292 // store whether reference has alignments or no
293 m_references[i].RefHasAlignments = ( binMap.size() > 0 );
295 // sort linear offsets
296 sort(offsets.begin(), offsets.end());
299 // rewind file pointer to beginning of alignments, return success/fail
300 return m_reader->Rewind();
303 bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
305 // calculate which bins overlap this region
306 uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
307 int numBins = d->BinsFromRegion(region, isRightBoundSpecified, bins);
309 // get bins for this reference
310 const ReferenceIndex& refIndex = d->m_indexData.at(region.LeftRefID);
311 const BamBinMap& binMap = refIndex.Bins;
313 // get minimum offset to consider
314 const LinearOffsetVector& linearOffsets = refIndex.Offsets;
315 uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() ) ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
317 // store all alignment 'chunk' starts (file offsets) for bins in this region
318 for ( int i = 0; i < numBins; ++i ) {
320 const uint16_t binKey = bins[i];
321 map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
322 if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
324 const ChunkVector& chunks = (*binIter).second;
325 std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
326 std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
327 for ( ; chunksIter != chunksEnd; ++chunksIter) {
329 // if valid chunk found, store its file offset
330 const Chunk& chunk = (*chunksIter);
331 if ( chunk.Stop > minOffset )
332 offsets.push_back( chunk.Start );
340 // sort the offsets before returning
341 sort(offsets.begin(), offsets.end());
343 // return whether any offsets were found
344 return ( offsets.size() != 0 );
347 // saves BAM bin entry for index
348 void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& binMap,
349 const uint32_t& saveBin,
350 const uint64_t& saveOffset,
351 const uint64_t& lastOffset)
354 BamBinMap::iterator binIter = binMap.find(saveBin);
357 Chunk newChunk(saveOffset, lastOffset);
359 // if entry doesn't exist
360 if ( binIter == binMap.end() ) {
361 ChunkVector newChunks;
362 newChunks.push_back(newChunk);
363 binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
368 ChunkVector& binChunks = (*binIter).second;
369 binChunks.push_back( newChunk );
373 // saves linear offset entry for index
374 void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
375 const BamAlignment& bAlignment,
376 const uint64_t& lastOffset)
378 // get converted offsets
379 int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
380 int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
382 // resize vector if necessary
383 int oldSize = offsets.size();
384 int newSize = endOffset + 1;
385 if ( oldSize < newSize )
386 offsets.resize(newSize, 0);
389 for( int i = beginOffset + 1; i <= endOffset; ++i ) {
390 if ( offsets[i] == 0 )
391 offsets[i] = lastOffset;
395 bool BamDefaultIndex::Load(const string& filename) {
397 // open index file, abort on error
398 FILE* indexStream = fopen(filename.c_str(), "rb");
400 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
404 // set placeholder to receive input byte count (suppresses compiler warnings)
405 size_t elementsRead = 0;
407 // see if index is valid BAM index
409 elementsRead = fread(magic, 1, 4, indexStream);
410 if ( strncmp(magic, "BAI\1", 4) ) {
411 printf("Problem with index file - invalid format.\n");
416 // get number of reference sequences
418 elementsRead = fread(&numRefSeqs, 4, 1, indexStream);
419 if ( m_isBigEndian ) { SwapEndian_32(numRefSeqs); }
421 // intialize space for BamDefaultIndexData data structure
422 d->m_indexData.reserve(numRefSeqs);
424 // iterate over reference sequences
425 for ( unsigned int i = 0; i < numRefSeqs; ++i ) {
427 // get number of bins for this reference sequence
429 elementsRead = fread(&numBins, 4, 1, indexStream);
430 if ( m_isBigEndian ) { SwapEndian_32(numBins); }
433 RefData& refEntry = m_references[i];
434 refEntry.RefHasAlignments = true;
437 // intialize BinVector
440 // iterate over bins for that reference sequence
441 for ( int j = 0; j < numBins; ++j ) {
445 elementsRead = fread(&binID, 4, 1, indexStream);
447 // get number of regionChunks in this bin
449 elementsRead = fread(&numChunks, 4, 1, indexStream);
451 if ( m_isBigEndian ) {
452 SwapEndian_32(binID);
453 SwapEndian_32(numChunks);
456 // intialize ChunkVector
457 ChunkVector regionChunks;
458 regionChunks.reserve(numChunks);
460 // iterate over regionChunks in this bin
461 for ( unsigned int k = 0; k < numChunks; ++k ) {
463 // get chunk boundaries (left, right)
466 elementsRead = fread(&left, 8, 1, indexStream);
467 elementsRead = fread(&right, 8, 1, indexStream);
469 if ( m_isBigEndian ) {
471 SwapEndian_64(right);
475 regionChunks.push_back( Chunk(left, right) );
478 // sort chunks for this bin
479 sort( regionChunks.begin(), regionChunks.end(), ChunkLessThan );
481 // save binID, chunkVector for this bin
482 binMap.insert( pair<uint32_t, ChunkVector>(binID, regionChunks) );
485 // load linear index for this reference sequence
487 // get number of linear offsets
488 int32_t numLinearOffsets;
489 elementsRead = fread(&numLinearOffsets, 4, 1, indexStream);
490 if ( m_isBigEndian ) { SwapEndian_32(numLinearOffsets); }
492 // intialize LinearOffsetVector
493 LinearOffsetVector offsets;
494 offsets.reserve(numLinearOffsets);
496 // iterate over linear offsets for this reference sequeence
497 uint64_t linearOffset;
498 for ( int j = 0; j < numLinearOffsets; ++j ) {
499 // read a linear offset & store
500 elementsRead = fread(&linearOffset, 8, 1, indexStream);
501 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
502 offsets.push_back(linearOffset);
505 // sort linear offsets
506 sort( offsets.begin(), offsets.end() );
508 // store index data for that reference sequence
509 d->m_indexData.push_back( ReferenceIndex(binMap, offsets) );
512 // close index file (.bai) and return
517 // merges 'alignment chunks' in BAM bin (used for index building)
518 void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) {
520 // iterate over reference enties
521 BamDefaultIndexData::iterator indexIter = m_indexData.begin();
522 BamDefaultIndexData::iterator indexEnd = m_indexData.end();
523 for ( ; indexIter != indexEnd; ++indexIter ) {
525 // get BAM bin map for this reference
526 ReferenceIndex& refIndex = (*indexIter);
527 BamBinMap& bamBinMap = refIndex.Bins;
529 // iterate over BAM bins
530 BamBinMap::iterator binIter = bamBinMap.begin();
531 BamBinMap::iterator binEnd = bamBinMap.end();
532 for ( ; binIter != binEnd; ++binIter ) {
534 // get chunk vector for this bin
535 ChunkVector& binChunks = (*binIter).second;
536 if ( binChunks.size() == 0 ) { continue; }
538 ChunkVector mergedChunks;
539 mergedChunks.push_back( binChunks[0] );
541 // iterate over chunks
543 ChunkVector::iterator chunkIter = binChunks.begin();
544 ChunkVector::iterator chunkEnd = binChunks.end();
545 for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
547 // get 'currentChunk' based on numeric index
548 Chunk& currentChunk = mergedChunks[i];
550 // get iteratorChunk based on vector iterator
551 Chunk& iteratorChunk = (*chunkIter);
553 // if currentChunk.Stop(shifted) == iterator Chunk.Start(shifted)
554 if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) {
556 // set currentChunk.Stop to iteratorChunk.Stop
557 currentChunk.Stop = iteratorChunk.Stop;
562 // set currentChunk + 1 to iteratorChunk
563 mergedChunks.push_back(iteratorChunk);
568 // saved merged chunk vector
569 (*binIter).second = mergedChunks;
574 // writes in-memory index data out to file
575 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
576 bool BamDefaultIndex::Write(const std::string& bamFilename) {
578 string indexFilename = bamFilename + ".bai";
579 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
580 if ( indexStream == 0 ) {
581 printf("ERROR: Could not open file to save index.\n");
585 // write BAM index header
586 fwrite("BAI\1", 1, 4, indexStream);
588 // write number of reference sequences
589 int32_t numReferenceSeqs = d->m_indexData.size();
590 if ( m_isBigEndian ) { SwapEndian_32(numReferenceSeqs); }
591 fwrite(&numReferenceSeqs, 4, 1, indexStream);
593 // iterate over reference sequences
594 BamDefaultIndexData::const_iterator indexIter = d->m_indexData.begin();
595 BamDefaultIndexData::const_iterator indexEnd = d->m_indexData.end();
596 for ( ; indexIter != indexEnd; ++ indexIter ) {
598 // get reference index data
599 const ReferenceIndex& refIndex = (*indexIter);
600 const BamBinMap& binMap = refIndex.Bins;
601 const LinearOffsetVector& offsets = refIndex.Offsets;
603 // write number of bins
604 int32_t binCount = binMap.size();
605 if ( m_isBigEndian ) { SwapEndian_32(binCount); }
606 fwrite(&binCount, 4, 1, indexStream);
609 BamBinMap::const_iterator binIter = binMap.begin();
610 BamBinMap::const_iterator binEnd = binMap.end();
611 for ( ; binIter != binEnd; ++binIter ) {
613 // get bin data (key and chunk vector)
614 uint32_t binKey = (*binIter).first;
615 const ChunkVector& binChunks = (*binIter).second;
618 if ( m_isBigEndian ) { SwapEndian_32(binKey); }
619 fwrite(&binKey, 4, 1, indexStream);
622 int32_t chunkCount = binChunks.size();
623 if ( m_isBigEndian ) { SwapEndian_32(chunkCount); }
624 fwrite(&chunkCount, 4, 1, indexStream);
626 // iterate over chunks
627 ChunkVector::const_iterator chunkIter = binChunks.begin();
628 ChunkVector::const_iterator chunkEnd = binChunks.end();
629 for ( ; chunkIter != chunkEnd; ++chunkIter ) {
631 // get current chunk data
632 const Chunk& chunk = (*chunkIter);
633 uint64_t start = chunk.Start;
634 uint64_t stop = chunk.Stop;
636 if ( m_isBigEndian ) {
637 SwapEndian_64(start);
641 // save chunk offsets
642 fwrite(&start, 8, 1, indexStream);
643 fwrite(&stop, 8, 1, indexStream);
647 // write linear offsets size
648 int32_t offsetSize = offsets.size();
649 if ( m_isBigEndian ) { SwapEndian_32(offsetSize); }
650 fwrite(&offsetSize, 4, 1, indexStream);
652 // iterate over linear offsets
653 LinearOffsetVector::const_iterator offsetIter = offsets.begin();
654 LinearOffsetVector::const_iterator offsetEnd = offsets.end();
655 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
657 // write linear offset value
658 uint64_t linearOffset = (*offsetIter);
659 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
660 fwrite(&linearOffset, 8, 1, indexStream);
664 // flush buffer, close file, and return success
670 // #########################################################################################
671 // #########################################################################################
673 // -------------------------------------
674 // BamToolsIndex implementation
678 struct BamToolsIndexEntry {
686 BamToolsIndexEntry(const uint64_t& offset = 0,
688 const int& position = -1)
695 typedef vector<BamToolsIndexEntry> BamToolsIndexData;
697 } // namespace BamTools
699 struct BamToolsIndex::BamToolsIndexPrivate {
701 // -------------------------
703 BamToolsIndexData m_indexData;
704 BamToolsIndex* m_parent;
707 // -------------------------
710 BamToolsIndexPrivate(BamToolsIndex* parent)
715 ~BamToolsIndexPrivate(void) { }
717 // -------------------------
721 BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
722 : BamIndex(bgzf, reader, isBigEndian)
724 d = new BamToolsIndexPrivate(this);
727 BamToolsIndex::~BamToolsIndex(void) {
732 bool BamToolsIndex::Build(void) {
734 // be sure reader & BGZF file are valid & open for reading
735 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
738 // move file pointer to beginning of alignments
741 // plow through alignments, store block offsets
742 int32_t currentBlockCount = 0;
743 int64_t blockStartOffset = m_BGZF->Tell();
744 int blockStartId = -1;
745 int blockStartPosition = -1;
747 while ( m_reader->GetNextAlignmentCore(al) ) {
749 // set reference flag
750 m_references[al.RefID].RefHasAlignments = true;
752 // if beginning of block, save first alignment's refID & position
753 if ( currentBlockCount == 0 ) {
754 blockStartId = al.RefID;
755 blockStartPosition = al.Position;
758 // increment block counter
761 // if block is full, get offset for next block, reset currentBlockCount
762 if ( currentBlockCount == d->m_blockSize ) {
764 d->m_indexData.push_back( BamToolsIndexEntry(blockStartOffset, blockStartId, blockStartPosition) );
765 blockStartOffset = m_BGZF->Tell();
766 currentBlockCount = 0;
770 return m_reader->Rewind();
773 // N.B. - ignores isRightBoundSpecified
774 bool BamToolsIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
776 // return false if no index data present
777 if ( d->m_indexData.empty() ) return false;
779 // clear any prior data
782 // calculate nearest index to jump to
783 int64_t previousOffset = -1;
784 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
785 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
786 for ( ; indexIter != indexEnd; ++indexIter ) {
788 const BamToolsIndexEntry& entry = (*indexIter);
790 // check if we are 'past' beginning of desired region
791 // if so, we will break out & use previously stored offset
792 if ( entry.RefID > region.LeftRefID ) break;
793 if ( (entry.RefID == region.LeftRefID) && (entry.Position > region.LeftPosition) ) break;
795 // not past desired region, so store current entry offset in previousOffset
796 previousOffset = entry.Offset;
799 // no index was found
800 if ( previousOffset == -1 )
803 // store offset & return success
804 offsets.push_back(previousOffset);
808 bool BamToolsIndex::Load(const string& filename) {
810 // open index file, abort on error
811 FILE* indexStream = fopen(filename.c_str(), "rb");
813 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
817 // set placeholder to receive input byte count (suppresses compiler warnings)
818 size_t elementsRead = 0;
820 // see if index is valid BAM index
822 elementsRead = fread(magic, 1, 4, indexStream);
823 if ( strncmp(magic, "BTI\1", 4) ) {
824 printf("Problem with index file - invalid format.\n");
829 // read in block size
830 elementsRead = fread(&d->m_blockSize, sizeof(d->m_blockSize), 1, indexStream);
831 if ( m_isBigEndian ) { SwapEndian_32(d->m_blockSize); }
833 // read in number of offsets
835 elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, indexStream);
836 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
838 // reserve space for index data
839 d->m_indexData.reserve(numOffsets);
841 // iterate over index entries
842 for ( unsigned int i = 0; i < numOffsets; ++i ) {
849 elementsRead = fread(&offset, sizeof(offset), 1, indexStream);
850 elementsRead = fread(&id, sizeof(id), 1, indexStream);
851 elementsRead = fread(&position, sizeof(position), 1, indexStream);
853 // swap endian-ness if necessary
854 if ( m_isBigEndian ) {
855 SwapEndian_64(offset);
857 SwapEndian_32(position);
860 // save reference index entry
861 d->m_indexData.push_back( BamToolsIndexEntry(offset, id, position) );
863 // set reference flag
864 m_references[id].RefHasAlignments = true; // what about sparse references? wont be able to set flag?
867 // close index file and return
872 // writes in-memory index data out to file
873 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
874 bool BamToolsIndex::Write(const std::string& bamFilename) {
876 string indexFilename = bamFilename + ".bti";
877 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
878 if ( indexStream == 0 ) {
879 printf("ERROR: Could not open file to save index.\n");
883 // write BAM index header
884 fwrite("BTI\1", 1, 4, indexStream);
887 int32_t blockSize = d->m_blockSize;
888 if ( m_isBigEndian ) { SwapEndian_32(blockSize); }
889 fwrite(&blockSize, sizeof(blockSize), 1, indexStream);
891 // write number of offset entries
892 uint32_t numOffsets = d->m_indexData.size();
893 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
894 fwrite(&numOffsets, sizeof(numOffsets), 1, indexStream);
896 // iterate over offset entries
897 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
898 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
899 for ( ; indexIter != indexEnd; ++ indexIter ) {
901 // get reference index data
902 const BamToolsIndexEntry& entry = (*indexIter);
905 uint64_t offset = entry.Offset;
906 int id = entry.RefID;
907 int position = entry.Position;
909 // swap endian-ness if necessary
910 if ( m_isBigEndian ) {
911 SwapEndian_64(offset);
913 SwapEndian_32(position);
916 // write the reference index entry
917 fwrite(&offset, sizeof(offset), 1, indexStream);
918 fwrite(&id, sizeof(id), 1, indexStream);
919 fwrite(&position, sizeof(position), 1, indexStream);
922 // flush file buffer, close file, and return success