10 using namespace BamTools;
12 // -------------------------------
13 // BamIndex implementation
15 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian)
18 , m_isBigEndian(isBigEndian)
20 if ( m_reader && m_reader->IsOpen() )
21 m_references = m_reader->GetReferenceData();
24 bool BamIndex::HasAlignments(const int& referenceID) {
26 // return false if invalid ID
27 if ( (referenceID < 0) || (referenceID >= (int)m_references.size()) )
30 // else return status of reference (has alignments?)
32 return m_references.at(referenceID).RefHasAlignments;
35 // #########################################################################################
36 // #########################################################################################
38 // -------------------------------
39 // BamDefaultIndex structs & typedefs
50 Chunk(const uint64_t& start = 0,
51 const uint64_t& stop = 0)
57 bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
58 return lhs.Start < rhs.Start;
61 typedef vector<Chunk> ChunkVector;
62 typedef map<uint32_t, ChunkVector> BamBinMap;
63 typedef vector<uint64_t> LinearOffsetVector;
65 struct ReferenceIndex {
69 LinearOffsetVector Offsets;
72 ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
73 const LinearOffsetVector& offsets = LinearOffsetVector())
79 typedef vector<ReferenceIndex> BamDefaultIndexData;
81 } // namespace BamTools
83 // -------------------------------
84 // BamDefaultIndex implementation
86 struct BamDefaultIndex::BamDefaultIndexPrivate {
88 // -------------------------
91 BamDefaultIndexData m_indexData;
92 BamDefaultIndex* m_parent;
94 // -------------------------
97 BamDefaultIndexPrivate(BamDefaultIndex* parent) : m_parent(parent) { }
98 ~BamDefaultIndexPrivate(void) { }
100 // -------------------------
103 // calculate bins that overlap region
104 int BinsFromRegion(const BamTools::BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[BamTools::MAX_BIN]);
105 // saves BAM bin entry for index
106 void InsertBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset);
107 // saves linear offset entry for index
108 void InsertLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset);
109 // simplifies index by merging 'chunks'
110 void MergeChunks(void);
114 BamDefaultIndex::BamDefaultIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
115 : BamIndex(bgzf, reader, isBigEndian)
117 d = new BamDefaultIndexPrivate(this);
120 BamDefaultIndex::~BamDefaultIndex(void) {
121 d->m_indexData.clear();
126 // calculate bins that overlap region
127 int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) {
129 // get region boundaries
130 uint32_t begin = (unsigned int)region.LeftPosition;
133 // if right bound specified AND left&right bounds are on same reference
134 // OK to use right bound position
135 if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
136 end = (unsigned int)region.RightPosition;
138 // otherwise, use end of left bound reference as cutoff
140 end = (unsigned int)m_parent->m_references.at(region.LeftRefID).RefLength - 1;
142 // initialize list, bin '0' always a valid bin
146 // get rest of bins that contain this region
148 for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { bins[i++] = k; }
149 for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { bins[i++] = k; }
150 for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { bins[i++] = k; }
151 for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { bins[i++] = k; }
152 for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { bins[i++] = k; }
154 // return number of bins stored
158 bool BamDefaultIndex::Build(void) {
160 // be sure reader & BGZF file are valid & open for reading
161 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
164 // move file pointer to beginning of alignments
167 // get reference count, reserve index space
168 int numReferences = (int)m_references.size();
169 for ( int i = 0; i < numReferences; ++i ) {
170 d->m_indexData.push_back(ReferenceIndex());
173 // sets default constant for bin, ID, offset, coordinate variables
174 const uint32_t defaultValue = 0xffffffffu;
177 uint32_t saveBin(defaultValue);
178 uint32_t lastBin(defaultValue);
181 int32_t saveRefID(defaultValue);
182 int32_t lastRefID(defaultValue);
185 uint64_t saveOffset = m_BGZF->Tell();
186 uint64_t lastOffset = saveOffset;
189 int32_t lastCoordinate = defaultValue;
191 BamAlignment bAlignment;
192 while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
194 // change of chromosome, save ID, reset bin
195 if ( lastRefID != bAlignment.RefID ) {
196 lastRefID = bAlignment.RefID;
197 lastBin = defaultValue;
200 // if lastCoordinate greater than BAM position - file not sorted properly
201 else if ( lastCoordinate > bAlignment.Position ) {
202 printf("BAM file not properly sorted:\n");
203 printf("Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), lastCoordinate, bAlignment.Position, bAlignment.RefID);
207 // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
208 if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
210 // save linear offset entry (matched to BAM entry refID)
211 ReferenceIndex& refIndex = d->m_indexData.at(bAlignment.RefID);
212 LinearOffsetVector& offsets = refIndex.Offsets;
213 d->InsertLinearOffset(offsets, bAlignment, lastOffset);
216 // if current BamAlignment bin != lastBin, "then possibly write the binning index"
217 if ( bAlignment.Bin != lastBin ) {
219 // if not first time through
220 if ( saveBin != defaultValue ) {
222 // save Bam bin entry
223 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
224 BamBinMap& binMap = refIndex.Bins;
225 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
229 saveOffset = lastOffset;
232 saveBin = bAlignment.Bin;
233 lastBin = bAlignment.Bin;
236 saveRefID = bAlignment.RefID;
238 // if invalid RefID, break out (why?)
239 if ( saveRefID < 0 ) { break; }
242 // make sure that current file pointer is beyond lastOffset
243 if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
244 printf("Error in BGZF offsets.\n");
249 lastOffset = m_BGZF->Tell();
251 // update lastCoordinate
252 lastCoordinate = bAlignment.Position;
255 // save any leftover BAM data (as long as refID is valid)
256 if ( saveRefID >= 0 ) {
257 // save Bam bin entry
258 ReferenceIndex& refIndex = d->m_indexData.at(saveRefID);
259 BamBinMap& binMap = refIndex.Bins;
260 d->InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
263 // simplify index by merging chunks
266 // iterate through references in index
267 // store whether reference has data &
268 // sort offsets in linear offset vector
269 BamDefaultIndexData::iterator indexIter = d->m_indexData.begin();
270 BamDefaultIndexData::iterator indexEnd = d->m_indexData.end();
271 for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
273 // get reference index data
274 ReferenceIndex& refIndex = (*indexIter);
275 BamBinMap& binMap = refIndex.Bins;
276 LinearOffsetVector& offsets = refIndex.Offsets;
278 // store whether reference has alignments or no
279 m_references[i].RefHasAlignments = ( binMap.size() > 0 );
281 // sort linear offsets
282 sort(offsets.begin(), offsets.end());
285 // rewind file pointer to beginning of alignments, return success/fail
286 return m_reader->Rewind();
289 bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
291 // calculate which bins overlap this region
292 uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
293 int numBins = d->BinsFromRegion(region, isRightBoundSpecified, bins);
295 // get bins for this reference
296 const ReferenceIndex& refIndex = d->m_indexData.at(region.LeftRefID);
297 const BamBinMap& binMap = refIndex.Bins;
299 // get minimum offset to consider
300 const LinearOffsetVector& linearOffsets = refIndex.Offsets;
301 uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() ) ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
303 // store all alignment 'chunk' starts (file offsets) for bins in this region
304 for ( int i = 0; i < numBins; ++i ) {
306 const uint16_t binKey = bins[i];
307 map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
308 if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
310 const ChunkVector& chunks = (*binIter).second;
311 std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
312 std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
313 for ( ; chunksIter != chunksEnd; ++chunksIter) {
315 // if valid chunk found, store its file offset
316 const Chunk& chunk = (*chunksIter);
317 if ( chunk.Stop > minOffset )
318 offsets.push_back( chunk.Start );
326 // sort the offsets before returning
327 sort(offsets.begin(), offsets.end());
329 // return whether any offsets were found
330 return ( offsets.size() != 0 );
333 // saves BAM bin entry for index
334 void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& binMap,
335 const uint32_t& saveBin,
336 const uint64_t& saveOffset,
337 const uint64_t& lastOffset)
340 BamBinMap::iterator binIter = binMap.find(saveBin);
343 Chunk newChunk(saveOffset, lastOffset);
345 // if entry doesn't exist
346 if ( binIter == binMap.end() ) {
347 ChunkVector newChunks;
348 newChunks.push_back(newChunk);
349 binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
354 ChunkVector& binChunks = (*binIter).second;
355 binChunks.push_back( newChunk );
359 // saves linear offset entry for index
360 void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
361 const BamAlignment& bAlignment,
362 const uint64_t& lastOffset)
364 // get converted offsets
365 int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
366 int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
368 // resize vector if necessary
369 int oldSize = offsets.size();
370 int newSize = endOffset + 1;
371 if ( oldSize < newSize )
372 offsets.resize(newSize, 0);
375 for( int i = beginOffset + 1; i <= endOffset; ++i ) {
376 if ( offsets[i] == 0 )
377 offsets[i] = lastOffset;
381 bool BamDefaultIndex::Load(const string& filename) {
383 // open index file, abort on error
384 FILE* indexStream = fopen(filename.c_str(), "rb");
386 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
390 // set placeholder to receive input byte count (suppresses compiler warnings)
391 size_t elementsRead = 0;
393 // see if index is valid BAM index
395 elementsRead = fread(magic, 1, 4, indexStream);
396 if ( strncmp(magic, "BAI\1", 4) ) {
397 printf("Problem with index file - invalid format.\n");
402 // get number of reference sequences
404 elementsRead = fread(&numRefSeqs, 4, 1, indexStream);
405 if ( m_isBigEndian ) { SwapEndian_32(numRefSeqs); }
407 // intialize space for BamDefaultIndexData data structure
408 d->m_indexData.reserve(numRefSeqs);
410 // iterate over reference sequences
411 for ( unsigned int i = 0; i < numRefSeqs; ++i ) {
413 // get number of bins for this reference sequence
415 elementsRead = fread(&numBins, 4, 1, indexStream);
416 if ( m_isBigEndian ) { SwapEndian_32(numBins); }
419 RefData& refEntry = m_references[i];
420 refEntry.RefHasAlignments = true;
423 // intialize BinVector
426 // iterate over bins for that reference sequence
427 for ( int j = 0; j < numBins; ++j ) {
431 elementsRead = fread(&binID, 4, 1, indexStream);
433 // get number of regionChunks in this bin
435 elementsRead = fread(&numChunks, 4, 1, indexStream);
437 if ( m_isBigEndian ) {
438 SwapEndian_32(binID);
439 SwapEndian_32(numChunks);
442 // intialize ChunkVector
443 ChunkVector regionChunks;
444 regionChunks.reserve(numChunks);
446 // iterate over regionChunks in this bin
447 for ( unsigned int k = 0; k < numChunks; ++k ) {
449 // get chunk boundaries (left, right)
452 elementsRead = fread(&left, 8, 1, indexStream);
453 elementsRead = fread(&right, 8, 1, indexStream);
455 if ( m_isBigEndian ) {
457 SwapEndian_64(right);
461 regionChunks.push_back( Chunk(left, right) );
464 // sort chunks for this bin
465 sort( regionChunks.begin(), regionChunks.end(), ChunkLessThan );
467 // save binID, chunkVector for this bin
468 binMap.insert( pair<uint32_t, ChunkVector>(binID, regionChunks) );
471 // load linear index for this reference sequence
473 // get number of linear offsets
474 int32_t numLinearOffsets;
475 elementsRead = fread(&numLinearOffsets, 4, 1, indexStream);
476 if ( m_isBigEndian ) { SwapEndian_32(numLinearOffsets); }
478 // intialize LinearOffsetVector
479 LinearOffsetVector offsets;
480 offsets.reserve(numLinearOffsets);
482 // iterate over linear offsets for this reference sequeence
483 uint64_t linearOffset;
484 for ( int j = 0; j < numLinearOffsets; ++j ) {
485 // read a linear offset & store
486 elementsRead = fread(&linearOffset, 8, 1, indexStream);
487 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
488 offsets.push_back(linearOffset);
491 // sort linear offsets
492 sort( offsets.begin(), offsets.end() );
494 // store index data for that reference sequence
495 d->m_indexData.push_back( ReferenceIndex(binMap, offsets) );
498 // close index file (.bai) and return
503 // merges 'alignment chunks' in BAM bin (used for index building)
504 void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) {
506 // iterate over reference enties
507 BamDefaultIndexData::iterator indexIter = m_indexData.begin();
508 BamDefaultIndexData::iterator indexEnd = m_indexData.end();
509 for ( ; indexIter != indexEnd; ++indexIter ) {
511 // get BAM bin map for this reference
512 ReferenceIndex& refIndex = (*indexIter);
513 BamBinMap& bamBinMap = refIndex.Bins;
515 // iterate over BAM bins
516 BamBinMap::iterator binIter = bamBinMap.begin();
517 BamBinMap::iterator binEnd = bamBinMap.end();
518 for ( ; binIter != binEnd; ++binIter ) {
520 // get chunk vector for this bin
521 ChunkVector& binChunks = (*binIter).second;
522 if ( binChunks.size() == 0 ) { continue; }
524 ChunkVector mergedChunks;
525 mergedChunks.push_back( binChunks[0] );
527 // iterate over chunks
529 ChunkVector::iterator chunkIter = binChunks.begin();
530 ChunkVector::iterator chunkEnd = binChunks.end();
531 for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
533 // get 'currentChunk' based on numeric index
534 Chunk& currentChunk = mergedChunks[i];
536 // get iteratorChunk based on vector iterator
537 Chunk& iteratorChunk = (*chunkIter);
539 // if currentChunk.Stop(shifted) == iterator Chunk.Start(shifted)
540 if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) {
542 // set currentChunk.Stop to iteratorChunk.Stop
543 currentChunk.Stop = iteratorChunk.Stop;
548 // set currentChunk + 1 to iteratorChunk
549 mergedChunks.push_back(iteratorChunk);
554 // saved merged chunk vector
555 (*binIter).second = mergedChunks;
560 // writes in-memory index data out to file
561 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
562 bool BamDefaultIndex::Write(const std::string& bamFilename) {
564 string indexFilename = bamFilename + ".bai";
565 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
566 if ( indexStream == 0 ) {
567 printf("ERROR: Could not open file to save index.\n");
571 // write BAM index header
572 fwrite("BAI\1", 1, 4, indexStream);
574 // write number of reference sequences
575 int32_t numReferenceSeqs = d->m_indexData.size();
576 if ( m_isBigEndian ) { SwapEndian_32(numReferenceSeqs); }
577 fwrite(&numReferenceSeqs, 4, 1, indexStream);
579 // iterate over reference sequences
580 BamDefaultIndexData::const_iterator indexIter = d->m_indexData.begin();
581 BamDefaultIndexData::const_iterator indexEnd = d->m_indexData.end();
582 for ( ; indexIter != indexEnd; ++ indexIter ) {
584 // get reference index data
585 const ReferenceIndex& refIndex = (*indexIter);
586 const BamBinMap& binMap = refIndex.Bins;
587 const LinearOffsetVector& offsets = refIndex.Offsets;
589 // write number of bins
590 int32_t binCount = binMap.size();
591 if ( m_isBigEndian ) { SwapEndian_32(binCount); }
592 fwrite(&binCount, 4, 1, indexStream);
595 BamBinMap::const_iterator binIter = binMap.begin();
596 BamBinMap::const_iterator binEnd = binMap.end();
597 for ( ; binIter != binEnd; ++binIter ) {
599 // get bin data (key and chunk vector)
600 uint32_t binKey = (*binIter).first;
601 const ChunkVector& binChunks = (*binIter).second;
604 if ( m_isBigEndian ) { SwapEndian_32(binKey); }
605 fwrite(&binKey, 4, 1, indexStream);
608 int32_t chunkCount = binChunks.size();
609 if ( m_isBigEndian ) { SwapEndian_32(chunkCount); }
610 fwrite(&chunkCount, 4, 1, indexStream);
612 // iterate over chunks
613 ChunkVector::const_iterator chunkIter = binChunks.begin();
614 ChunkVector::const_iterator chunkEnd = binChunks.end();
615 for ( ; chunkIter != chunkEnd; ++chunkIter ) {
617 // get current chunk data
618 const Chunk& chunk = (*chunkIter);
619 uint64_t start = chunk.Start;
620 uint64_t stop = chunk.Stop;
622 if ( m_isBigEndian ) {
623 SwapEndian_64(start);
627 // save chunk offsets
628 fwrite(&start, 8, 1, indexStream);
629 fwrite(&stop, 8, 1, indexStream);
633 // write linear offsets size
634 int32_t offsetSize = offsets.size();
635 if ( m_isBigEndian ) { SwapEndian_32(offsetSize); }
636 fwrite(&offsetSize, 4, 1, indexStream);
638 // iterate over linear offsets
639 LinearOffsetVector::const_iterator offsetIter = offsets.begin();
640 LinearOffsetVector::const_iterator offsetEnd = offsets.end();
641 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
643 // write linear offset value
644 uint64_t linearOffset = (*offsetIter);
645 if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
646 fwrite(&linearOffset, 8, 1, indexStream);
650 // flush buffer, close file, and return success
656 // #########################################################################################
657 // #########################################################################################
659 // -------------------------------------
660 // BamToolsIndex implementation
664 struct BamToolsIndexEntry {
672 BamToolsIndexEntry(const uint64_t& offset = 0,
674 const int& position = -1)
681 typedef vector<BamToolsIndexEntry> BamToolsIndexData;
683 } // namespace BamTools
685 struct BamToolsIndex::BamToolsIndexPrivate {
687 // -------------------------
689 BamToolsIndexData m_indexData;
690 BamToolsIndex* m_parent;
693 // -------------------------
696 BamToolsIndexPrivate(BamToolsIndex* parent)
701 ~BamToolsIndexPrivate(void) { }
703 // -------------------------
707 BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
708 : BamIndex(bgzf, reader, isBigEndian)
710 d = new BamToolsIndexPrivate(this);
713 BamToolsIndex::~BamToolsIndex(void) {
718 bool BamToolsIndex::Build(void) {
720 // be sure reader & BGZF file are valid & open for reading
721 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
724 // move file pointer to beginning of alignments
727 // plow through alignments, store block offsets
728 int32_t currentBlockCount = 0;
729 int64_t blockStartOffset = m_BGZF->Tell();
730 int blockStartId = -1;
731 int blockStartPosition = -1;
733 while ( m_reader->GetNextAlignmentCore(al) ) {
735 // set reference flag
736 m_references[al.RefID].RefHasAlignments = true;
738 // if beginning of block, save first alignment's refID & position
739 if ( currentBlockCount == 0 ) {
740 blockStartId = al.RefID;
741 blockStartPosition = al.Position;
744 // increment block counter
747 // if block is full, get offset for next block, reset currentBlockCount
748 if ( currentBlockCount == d->m_blockSize ) {
750 // cerr << "-------------------------------" << endl;
751 // cerr << "BlockCount = " << currentBlockCount << endl;
753 // cerr << "Storing entry: " << endl;
754 // cerr << "\trefID : " << blockStartId << endl;
755 // cerr << "\tpos : " << blockStartPosition << endl;
756 // cerr << "\toffset : " << blockStartOffset << endl;
759 d->m_indexData.push_back( BamToolsIndexEntry(blockStartOffset, blockStartId, blockStartPosition) );
760 blockStartOffset = m_BGZF->Tell();
761 currentBlockCount = 0;
765 return m_reader->Rewind();
768 // N.B. - ignores isRightBoundSpecified
769 bool BamToolsIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
771 // return false if no index data present
772 if ( d->m_indexData.empty() ) return false;
774 // clear any prior data
777 // calculate nearest index to jump to
778 int64_t previousOffset = -1;
779 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
780 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
781 for ( ; indexIter != indexEnd; ++indexIter ) {
783 const BamToolsIndexEntry& entry = (*indexIter);
785 // check if we are 'past' beginning of desired region
786 // if so, we will break out & use previously stored offset
787 if ( entry.RefID > region.LeftRefID ) break;
788 if ( (entry.RefID == region.LeftRefID) && (entry.Position > region.LeftPosition) ) break;
790 // not past desired region, so store current entry offset in previousOffset
791 previousOffset = entry.Offset;
794 // no index was found
795 if ( previousOffset == -1 )
798 // store offset & return success
799 /* cerr << "BTI::GetOffsets() : calculated offset = " << previousOffset << endl;*/
800 offsets.push_back(previousOffset);
804 bool BamToolsIndex::Load(const string& filename) {
806 // open index file, abort on error
807 FILE* indexStream = fopen(filename.c_str(), "rb");
809 printf("ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
813 // set placeholder to receive input byte count (suppresses compiler warnings)
814 size_t elementsRead = 0;
816 // see if index is valid BAM index
818 elementsRead = fread(magic, 1, 4, indexStream);
819 if ( strncmp(magic, "BTI\1", 4) ) {
820 printf("Problem with index file - invalid format.\n");
825 // read in block size
826 elementsRead = fread(&d->m_blockSize, sizeof(d->m_blockSize), 1, indexStream);
827 if ( m_isBigEndian ) { SwapEndian_32(d->m_blockSize); }
829 // read in number of offsets
831 elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, indexStream);
832 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
834 // reserve space for index data
835 d->m_indexData.reserve(numOffsets);
837 // iterate over index entries
838 for ( unsigned int i = 0; i < numOffsets; ++i ) {
845 elementsRead = fread(&offset, sizeof(offset), 1, indexStream);
846 elementsRead = fread(&id, sizeof(id), 1, indexStream);
847 elementsRead = fread(&position, sizeof(position), 1, indexStream);
849 // swap endian-ness if necessary
850 if ( m_isBigEndian ) {
851 SwapEndian_64(offset);
853 SwapEndian_32(position);
856 // save reference index entry
857 d->m_indexData.push_back( BamToolsIndexEntry(offset, id, position) );
859 // set reference flag
860 m_references[id].RefHasAlignments = true; // what about sparse references? wont be able to set flag?
863 // close index file and return
868 // writes in-memory index data out to file
869 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
870 bool BamToolsIndex::Write(const std::string& bamFilename) {
872 string indexFilename = bamFilename + ".bti";
873 FILE* indexStream = fopen(indexFilename.c_str(), "wb");
874 if ( indexStream == 0 ) {
875 printf("ERROR: Could not open file to save index.\n");
879 // write BAM index header
880 fwrite("BTI\1", 1, 4, indexStream);
883 int32_t blockSize = d->m_blockSize;
884 if ( m_isBigEndian ) { SwapEndian_32(blockSize); }
885 fwrite(&blockSize, sizeof(blockSize), 1, indexStream);
887 // write number of offset entries
888 uint32_t numOffsets = d->m_indexData.size();
889 if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
890 fwrite(&numOffsets, sizeof(numOffsets), 1, indexStream);
892 // iterate over offset entries
893 BamToolsIndexData::const_iterator indexIter = d->m_indexData.begin();
894 BamToolsIndexData::const_iterator indexEnd = d->m_indexData.end();
895 for ( ; indexIter != indexEnd; ++ indexIter ) {
897 // get reference index data
898 const BamToolsIndexEntry& entry = (*indexIter);
901 uint64_t offset = entry.Offset;
902 int id = entry.RefID;
903 int position = entry.Position;
905 // swap endian-ness if necessary
906 if ( m_isBigEndian ) {
907 SwapEndian_64(offset);
909 SwapEndian_32(position);
912 // write the reference index entry
913 fwrite(&offset, sizeof(offset), 1, indexStream);
914 fwrite(&id, sizeof(id), 1, indexStream);
915 fwrite(&position, sizeof(position), 1, indexStream);
918 // flush file buffer, close file, and return success