1 // ***************************************************************************
2 // BamStandardIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 6 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides index operations for the standardized BAM index format (".bai")
8 // ***************************************************************************
10 #ifndef BAM_STANDARD_INDEX_FORMAT_H
11 #define BAM_STANDARD_INDEX_FORMAT_H
17 // This file is not part of the BamTools API. It exists purely as an
18 // implementation detail. This header file may change from version to
19 // version without notice, or even be removed.
23 #include <api/BamAux.h>
24 #include <api/BamIndex.h>
33 // -----------------------------------------------------------------------------
34 // BamStandardIndex data structures
36 // defines start and end of a contiguous run of alignments
37 struct BaiAlignmentChunk {
44 BaiAlignmentChunk(const uint64_t& start = 0,
45 const uint64_t& stop = 0)
51 // comparison operator (for sorting)
53 bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
54 return lhs.Start < rhs.Start;
57 // convenience typedef for a list of all alignment 'chunks' in a BAI bin
58 typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
60 // convenience typedef for a map of all BAI bins in a reference (ID => chunks)
61 typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
63 // convenience typedef for a list of all 'linear offsets' in a reference
64 typedef std::vector<uint64_t> BaiLinearOffsetVector;
66 // contains all fields necessary for building, loading, & writing
67 // full BAI index data for a single reference
68 struct BaiReferenceEntry {
73 BaiLinearOffsetVector LinearOffsets;
76 BaiReferenceEntry(const int32_t& id = -1)
81 // provides (persistent) summary of BaiReferenceEntry's index data
82 struct BaiReferenceSummary {
87 uint64_t FirstBinFilePosition;
88 uint64_t FirstLinearOffsetFilePosition;
91 BaiReferenceSummary(void)
94 , FirstBinFilePosition(0)
95 , FirstLinearOffsetFilePosition(0)
99 // convenience typedef for describing a full BAI index file summary
100 typedef std::vector<BaiReferenceSummary> BaiFileSummary;
102 // end BamStandardIndex data structures
103 // -----------------------------------------------------------------------------
105 class BamStandardIndex : public BamIndex {
109 BamStandardIndex(Internal::BamReaderPrivate* reader);
110 ~BamStandardIndex(void);
112 // BamIndex implementation
114 // builds index from associated BAM file & writes out to index file
116 // returns whether reference has alignments or no
117 bool HasAlignments(const int& referenceID) const;
118 // attempts to use index data to jump to @region, returns success/fail
119 // a "successful" jump indicates no error, but not whether this region has data
120 // * thus, the method sets a flag to indicate whether there are alignments
121 // available after the jump position
122 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
123 // loads existing data from file into memory
124 bool Load(const std::string& filename);
125 // change the index caching behavior
126 void SetCacheMode(const BamIndex::IndexCacheMode& mode);
128 // returns format's file extension
129 static const std::string Extension(void);
135 void CheckMagicNumber(void);
136 void CloseFile(void);
137 bool IsFileOpen(void) const;
138 void OpenFile(const std::string& filename, const char* mode);
139 void Seek(const int64_t& position, const int& origin);
140 int64_t Tell(void) const;
142 // BAI index building methods
143 void ClearReferenceEntry(BaiReferenceEntry& refEntry);
144 void SaveAlignmentChunkToBin(BaiBinMap& binMap,
145 const uint32_t& currentBin,
146 const uint64_t& currentOffset,
147 const uint64_t& lastOffset);
148 void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
149 const int& alignmentStartPosition,
150 const int& alignmentStopPosition,
151 const uint64_t& lastOffset);
153 // random-access methods
154 void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
155 void CalculateCandidateBins(const uint32_t& begin,
157 std::set<uint16_t>& candidateBins);
158 void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
159 const uint64_t& minOffset,
160 std::set<uint16_t>& candidateBins,
161 std::vector<int64_t>& offsets);
162 uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
163 void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
164 uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
166 // BAI summary (create/load) methods
167 void ReserveForSummary(const int& numReferences);
168 void SaveBinsSummary(const int& refId, const int& numBins);
169 void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
170 void SkipBins(const int& numBins);
171 void SkipLinearOffsets(const int& numLinearOffsets);
172 void SummarizeBins(BaiReferenceSummary& refSummary);
173 void SummarizeIndexFile(void);
174 void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
175 void SummarizeReference(BaiReferenceSummary& refSummary);
177 // BAI full index input methods
178 void ReadBinID(uint32_t& binId);
179 void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
180 void ReadIntoBuffer(const unsigned int& bytesRequested);
181 void ReadLinearOffset(uint64_t& linearOffset);
182 void ReadNumAlignmentChunks(int& numAlignmentChunks);
183 void ReadNumBins(int& numBins);
184 void ReadNumLinearOffsets(int& numLinearOffsets);
185 void ReadNumReferences(int& numReferences);
187 // BAI full index output methods
188 void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
189 void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
190 void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
191 void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
192 void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
193 void WriteBins(const int& refId, BaiBinMap& bins);
194 void WriteHeader(void);
195 void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
196 void WriteReferenceEntry(BaiReferenceEntry& refEntry);
201 BamIndex::IndexCacheMode m_cacheMode;
202 BaiFileSummary m_indexFileSummary;
205 unsigned int m_bufferLength;
213 RaiiWrapper Resources;
217 // checks if the buffer is large enough to accomodate the requested size
218 static void CheckBufferSize(char*& buffer,
219 unsigned int& bufferLength,
220 const unsigned int& requestedBytes);
221 // checks if the buffer is large enough to accomodate the requested size
222 static void CheckBufferSize(unsigned char*& buffer,
223 unsigned int& bufferLength,
224 const unsigned int& requestedBytes);
227 static const int MAX_BIN;
228 static const int BAM_LIDX_SHIFT;
229 static const std::string BAI_EXTENSION;
230 static const char* const BAI_MAGIC;
231 static const int SIZEOF_ALIGNMENTCHUNK;
232 static const int SIZEOF_BINCORE;
233 static const int SIZEOF_LINEAROFFSET;
236 } // namespace Internal
237 } // namespace BamTools
239 #endif // BAM_STANDARD_INDEX_FORMAT_H