-bool BamStandardIndex::BamStandardIndexPrivate::Build(void) {
-
- // localize parent data
- if ( m_parent == 0 ) return false;
- BamReader* reader = m_parent->m_reader;
- BgzfData* mBGZF = m_parent->m_BGZF;
- RefVector& references = m_parent->m_references;
-
- // be sure reader & BGZF file are valid & open for reading
- if ( reader == 0 || mBGZF == 0 || !mBGZF->IsOpen )
- return false;
-
- // move file pointer to beginning of alignments
- reader->Rewind();
-
- // get reference count, reserve index space
- int numReferences = (int)references.size();
- for ( int i = 0; i < numReferences; ++i )
- m_indexData.push_back(ReferenceIndex());
-
- // sets default constant for bin, ID, offset, coordinate variables
- const uint32_t defaultValue = 0xffffffffu;
-
- // bin data
- uint32_t saveBin(defaultValue);
- uint32_t lastBin(defaultValue);
-
- // reference ID data
- int32_t saveRefID(defaultValue);
- int32_t lastRefID(defaultValue);
-
- // offset data
- uint64_t saveOffset = mBGZF->Tell();
- uint64_t lastOffset = saveOffset;
-
- // coordinate data
- int32_t lastCoordinate = defaultValue;
-
- BamAlignment bAlignment;
- while ( reader->GetNextAlignmentCore(bAlignment) ) {
-
- // change of chromosome, save ID, reset bin
- if ( lastRefID != bAlignment.RefID ) {
- lastRefID = bAlignment.RefID;
- lastBin = defaultValue;
- }
-
- // if lastCoordinate greater than BAM position - file not sorted properly
- else if ( lastCoordinate > bAlignment.Position ) {
- fprintf(stderr, "BAM file not properly sorted:\n");
- fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(), lastCoordinate, bAlignment.Position, bAlignment.RefID);
- exit(1);
- }
-
- // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
- if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
-
- // save linear offset entry (matched to BAM entry refID)
- ReferenceIndex& refIndex = m_indexData.at(bAlignment.RefID);
- LinearOffsetVector& offsets = refIndex.Offsets;
- InsertLinearOffset(offsets, bAlignment, lastOffset);
- }
-
- // if current BamAlignment bin != lastBin, "then possibly write the binning index"
- if ( bAlignment.Bin != lastBin ) {
-
- // if not first time through
- if ( saveBin != defaultValue ) {
-
- // save Bam bin entry
- ReferenceIndex& refIndex = m_indexData.at(saveRefID);
- BamBinMap& binMap = refIndex.Bins;
- InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
- }
-
- // update saveOffset
- saveOffset = lastOffset;
-
- // update bin values
- saveBin = bAlignment.Bin;
- lastBin = bAlignment.Bin;
-
- // update saveRefID
- saveRefID = bAlignment.RefID;
-
- // if invalid RefID, break out
- if ( saveRefID < 0 ) break;
- }
-
- // make sure that current file pointer is beyond lastOffset
- if ( mBGZF->Tell() <= (int64_t)lastOffset ) {
- fprintf(stderr, "Error in BGZF offsets.\n");
- exit(1);
- }
-
- // update lastOffset
- lastOffset = mBGZF->Tell();
-
- // update lastCoordinate
- lastCoordinate = bAlignment.Position;
- }
-
- // save any leftover BAM data (as long as refID is valid)
- if ( saveRefID >= 0 ) {
- // save Bam bin entry
- ReferenceIndex& refIndex = m_indexData.at(saveRefID);
- BamBinMap& binMap = refIndex.Bins;
- InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
- }
-
- // simplify index by merging chunks
- MergeChunks();
-
- // iterate through references in index
- // store whether reference has data &
- // sort offsets in linear offset vector
- BamStandardIndexData::iterator indexIter = m_indexData.begin();
- BamStandardIndexData::iterator indexEnd = m_indexData.end();
- for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
-
- // get reference index data
- ReferenceIndex& refIndex = (*indexIter);
- BamBinMap& binMap = refIndex.Bins;
- LinearOffsetVector& offsets = refIndex.Offsets;
-
- // store whether reference has alignments or no
- references[i].RefHasAlignments = ( binMap.size() > 0 );
-
- // sort linear offsets
- sort(offsets.begin(), offsets.end());
- }
-
- // rewind file pointer to beginning of alignments, return success/fail
- return reader->Rewind();
-}
-
-bool BamStandardIndex::BamStandardIndexPrivate::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
-
- // calculate which bins overlap this region
- uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
- int numBins = BinsFromRegion(region, isRightBoundSpecified, bins);
-
- // get bins for this reference
- const ReferenceIndex& refIndex = m_indexData.at(region.LeftRefID);
- const BamBinMap& binMap = refIndex.Bins;
-
- // get minimum offset to consider
- const LinearOffsetVector& linearOffsets = refIndex.Offsets;
- uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() ) ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
-
- // store all alignment 'chunk' starts (file offsets) for bins in this region
- for ( int i = 0; i < numBins; ++i ) {
-
- const uint16_t binKey = bins[i];
- map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
- if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
-
- // iterate over chunks
- const ChunkVector& chunks = (*binIter).second;
- std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
- std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
- for ( ; chunksIter != chunksEnd; ++chunksIter) {
-
- // if valid chunk found, store its file offset
- const Chunk& chunk = (*chunksIter);
- if ( chunk.Stop > minOffset )
- offsets.push_back( chunk.Start );
- }
- }
- }
-
- // clean up memory
- free(bins);
-
- // sort the offsets before returning
- sort(offsets.begin(), offsets.end());
-
- // return whether any offsets were found
- return ( offsets.size() != 0 );
-}
-
-// saves BAM bin entry for index
-void BamStandardIndex::BamStandardIndexPrivate::InsertBinEntry(BamBinMap& binMap,
- const uint32_t& saveBin,
- const uint64_t& saveOffset,
- const uint64_t& lastOffset)
-{
- // look up saveBin
- BamBinMap::iterator binIter = binMap.find(saveBin);
-
- // create new chunk
- Chunk newChunk(saveOffset, lastOffset);
-
- // if entry doesn't exist
- if ( binIter == binMap.end() ) {
- ChunkVector newChunks;
- newChunks.push_back(newChunk);
- binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
- }
-
- // otherwise
- else {
- ChunkVector& binChunks = (*binIter).second;
- binChunks.push_back( newChunk );
- }
-}
-
-// saves linear offset entry for index
-void BamStandardIndex::BamStandardIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
- const BamAlignment& bAlignment,
- const uint64_t& lastOffset)
-{
- // get converted offsets
- int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
- int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
-
- // resize vector if necessary
- int oldSize = offsets.size();
- int newSize = endOffset + 1;
- if ( oldSize < newSize )
- offsets.resize(newSize, 0);
-
- // store offset
- for( int i = beginOffset + 1; i <= endOffset; ++i ) {
- if ( offsets[i] == 0 )
- offsets[i] = lastOffset;
- }
-}