1 // ***************************************************************************
2 // BamStandardIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 5 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the standardized BAM index format (".bai")
9 // ***************************************************************************
11 #ifndef BAM_STANDARD_INDEX_FORMAT_H
12 #define BAM_STANDARD_INDEX_FORMAT_H
18 // This file is not part of the BamTools API. It exists purely as an
19 // implementation detail. This header file may change from version to
20 // version without notice, or even be removed.
24 #include <api/BamAux.h>
25 #include <api/BamIndex.h>
34 // -----------------------------------------------------------------------------
35 // BamStandardIndex data structures
37 // defines start and end of a contiguous run of alignments
38 struct BaiAlignmentChunk {
45 BaiAlignmentChunk(const uint64_t& start = 0,
46 const uint64_t& stop = 0)
52 // comparison operator (for sorting)
54 bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
55 return lhs.Start < rhs.Start;
58 // convenience typedef for a list of all alignment 'chunks' in a BAI bin
59 typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
61 // convenience typedef for a map of all BAI bins in a reference (ID => chunks)
62 typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
64 // convenience typedef for a list of all 'linear offsets' in a reference
65 typedef std::vector<uint64_t> BaiLinearOffsetVector;
67 // contains all fields necessary for building, loading, & writing
68 // full BAI index data for a single reference
69 struct BaiReferenceEntry {
74 BaiLinearOffsetVector LinearOffsets;
77 BaiReferenceEntry(const int32_t& id = -1)
82 // provides (persistent) summary of BaiReferenceEntry's index data
83 struct BaiReferenceSummary {
88 uint64_t FirstBinFilePosition;
89 uint64_t FirstLinearOffsetFilePosition;
92 BaiReferenceSummary(void)
95 , FirstBinFilePosition(0)
96 , FirstLinearOffsetFilePosition(0)
100 // convenience typedef for describing a full BAI index file summary
101 typedef std::vector<BaiReferenceSummary> BaiFileSummary;
103 // end BamStandardIndex data structures
104 // -----------------------------------------------------------------------------
106 class BamStandardIndex : public BamIndex {
110 BamStandardIndex(Internal::BamReaderPrivate* reader);
111 ~BamStandardIndex(void);
113 // BamIndex implementation
115 // builds index from associated BAM file & writes out to index file
117 // returns whether reference has alignments or no
118 bool HasAlignments(const int& referenceID) const;
119 // attempts to use index data to jump to @region, returns success/fail
120 // a "successful" jump indicates no error, but not whether this region has data
121 // * thus, the method sets a flag to indicate whether there are alignments
122 // available after the jump position
123 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
124 // loads existing data from file into memory
125 bool Load(const std::string& filename);
126 // change the index caching behavior
127 void SetCacheMode(const BamIndex::IndexCacheMode& mode);
129 // returns format's file extension
130 static const std::string Extension(void);
134 bool CheckMagicNumber(void);
135 void CloseFile(void);
136 bool IsFileOpen(void) const;
137 bool OpenFile(const std::string& filename, const char* mode);
138 bool Seek(const int64_t& position, const int& origin);
139 int64_t Tell(void) const;
141 // internal BAI index building methods
143 void ClearReferenceEntry(BaiReferenceEntry& refEntry);
144 void SaveAlignmentChunkToBin(BaiBinMap& binMap,
145 const uint32_t& currentBin,
146 const uint64_t& currentOffset,
147 const uint64_t& lastOffset);
148 void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
149 const int& alignmentStartPosition,
150 const int& alignmentStopPosition,
151 const uint64_t& lastOffset);
153 // internal random-access methods
155 bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
156 void CalculateCandidateBins(const uint32_t& begin,
158 std::set<uint16_t>& candidateBins);
159 bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
160 const uint64_t& minOffset,
161 std::set<uint16_t>& candidateBins,
162 std::vector<int64_t>& offsets);
163 uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
164 bool GetOffsets(const BamRegion& region, std::vector<int64_t>& offsets);
165 uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
167 // internal BAI summary (create/load) methods
169 void ReserveForSummary(const int& numReferences);
170 void SaveBinsSummary(const int& refId, const int& numBins);
171 void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
172 bool SkipBins(const int& numBins);
173 bool SkipLinearOffsets(const int& numLinearOffsets);
174 bool SummarizeBins(BaiReferenceSummary& refSummary);
175 bool SummarizeIndexFile(void);
176 bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
177 bool SummarizeReference(BaiReferenceSummary& refSummary);
179 // internal BAI full index input methods
181 bool ReadBinID(uint32_t& binId);
182 bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
183 bool ReadIntoBuffer(const unsigned int& bytesRequested);
184 bool ReadLinearOffset(uint64_t& linearOffset);
185 bool ReadNumAlignmentChunks(int& numAlignmentChunks);
186 bool ReadNumBins(int& numBins);
187 bool ReadNumLinearOffsets(int& numLinearOffsets);
188 bool ReadNumReferences(int& numReferences);
190 // internal BAI full index output methods
192 void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
193 void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
194 bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
195 bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
196 bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
197 bool WriteBins(const int& refId, BaiBinMap& bins);
198 bool WriteHeader(void);
199 bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
200 bool WriteReferenceEntry(BaiReferenceEntry& refEntry);
206 BamIndex::IndexCacheMode m_cacheMode;
207 BaiFileSummary m_indexFileSummary;
211 unsigned int m_bufferLength;
215 // checks if the buffer is large enough to accomodate the requested size
216 static void CheckBufferSize(char*& buffer,
217 unsigned int& bufferLength,
218 const unsigned int& requestedBytes);
219 // checks if the buffer is large enough to accomodate the requested size
220 static void CheckBufferSize(unsigned char*& buffer,
221 unsigned int& bufferLength,
222 const unsigned int& requestedBytes);
225 static const int MAX_BIN;
226 static const int BAM_LIDX_SHIFT;
227 static const std::string BAI_EXTENSION;
228 static const char* const BAI_MAGIC;
229 static const int SIZEOF_ALIGNMENTCHUNK;
230 static const int SIZEOF_BINCORE;
231 static const int SIZEOF_LINEAROFFSET;
234 } // namespace Internal
235 } // namespace BamTools
237 #endif // BAM_STANDARD_INDEX_FORMAT_H