1 // ***************************************************************************
2 // BamStandardIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the standardized BAM index format (".bai")
9 // ***************************************************************************
11 #ifndef BAM_STANDARD_INDEX_FORMAT_H
12 #define BAM_STANDARD_INDEX_FORMAT_H
18 // This file is not part of the BamTools API. It exists purely as an
19 // implementation detail. This header file may change from version to
20 // version without notice, or even be removed.
24 #include <api/BamAux.h>
25 #include <api/BamIndex.h>
36 // BAM index constants
37 const int MAX_BIN = 37450; // =(8^6-1)/7+1
38 const int BAM_LIDX_SHIFT = 14;
40 // --------------------------------------------------
41 // BamStandardIndex data structures & typedefs
49 Chunk(const uint64_t& start = 0,
50 const uint64_t& stop = 0)
57 bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
58 return lhs.Start < rhs.Start;
61 typedef std::vector<Chunk> ChunkVector;
62 typedef std::map<uint32_t, ChunkVector> BamBinMap;
63 typedef std::vector<uint64_t> LinearOffsetVector;
65 struct ReferenceIndex {
69 LinearOffsetVector Offsets;
73 ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
74 const LinearOffsetVector& offsets = LinearOffsetVector(),
75 const bool hasAlignments = false)
78 , HasAlignments(hasAlignments)
82 typedef std::map<int32_t, ReferenceIndex> BamStandardIndexData;
84 class BamStandardIndex : public BamIndex {
88 BamStandardIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
89 ~BamStandardIndex(void);
91 // interface (implements BamIndex virtual methods)
93 // creates index data (in-memory) from current reader data
95 // returns supported file extension
96 const std::string Extension(void) const { return std::string(".bai"); }
97 // returns whether reference has alignments or no
98 bool HasAlignments(const int& referenceID) const;
99 // attempts to use index to jump to region; returns success/fail
100 // a "successful" jump indicates no error, but not whether this region has data
101 // * thus, the method sets a flag to indicate whether there are alignments
102 // available after the jump position
103 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
105 // clear all current index offset data in memory
106 void ClearAllData(void);
107 // return file position after header metadata
108 const off_t DataBeginOffset(void) const;
109 // return true if all index data is cached
110 bool HasFullDataCache(void) const;
111 // clears index data from all references except the first
112 void KeepOnlyFirstReferenceOffsets(void);
113 // load index data for all references, return true if loaded OK
114 // @saveData - save data in memory if true, just read & discard if false
115 bool LoadAllReferences(bool saveData = true);
116 // load first reference from file, return true if loaded OK
117 // @saveData - save data in memory if true, just read & discard if false
118 bool LoadFirstReference(bool saveData = true);
119 // load header data from index file, return true if loaded OK
120 bool LoadHeader(void);
121 // position file pointer to first reference begin, return true if skipped OK
122 bool SkipToFirstReference(void);
123 // write index reference data
124 bool WriteAllReferences(void);
125 // write index header data
126 bool WriteHeader(void);
128 // 'internal' methods
131 // -----------------------
132 // index file operations
134 // check index file magic number, return true if OK
135 bool CheckMagicNumber(void);
136 // check index file version, return true if OK
137 bool CheckVersion(void);
138 // load a single index bin entry from file, return true if loaded OK
139 // @saveData - save data in memory if true, just read & discard if false
140 bool LoadBin(ReferenceIndex& refEntry, bool saveData = true);
141 bool LoadBins(ReferenceIndex& refEntry, bool saveData = true);
142 // load a single index bin entry from file, return true if loaded OK
143 // @saveData - save data in memory if true, just read & discard if false
144 bool LoadChunk(ChunkVector& chunks, bool saveData = true);
145 bool LoadChunks(ChunkVector& chunks, bool saveData = true);
146 // load a single index linear offset entry from file, return true if loaded OK
147 // @saveData - save data in memory if true, just read & discard if false
148 bool LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData = true);
149 // load a single reference from file, return true if loaded OK
150 // @saveData - save data in memory if true, just read & discard if false
151 bool LoadReference(const int& refId, bool saveData = true);
152 // loads number of references, return true if loaded OK
153 bool LoadReferenceCount(int& numReferences);
154 // position file pointer to desired reference begin, return true if skipped OK
155 bool SkipToReference(const int& refId);
156 // write index data for bin to new index file
157 bool WriteBin(const uint32_t& binId, const ChunkVector& chunks);
158 // write index data for bins to new index file
159 bool WriteBins(const BamBinMap& bins);
160 // write index data for chunk entry to new index file
161 bool WriteChunk(const Chunk& chunk);
162 // write index data for chunk entry to new index file
163 bool WriteChunks(const ChunkVector& chunks);
164 // write index data for linear offsets entry to new index file
165 bool WriteLinearOffsets(const LinearOffsetVector& offsets);
166 // write index data single reference to new index file
167 bool WriteReference(const ReferenceIndex& refEntry);
169 // -----------------------
170 // index data operations
172 // calculate bins that overlap region
173 int BinsFromRegion(const BamRegion& region,
174 const bool isRightBoundSpecified,
175 uint16_t bins[MAX_BIN]);
176 // clear all index offset data for desired reference
177 void ClearReferenceOffsets(const int& refId);
178 // calculates offset(s) for a given region
179 bool GetOffsets(const BamRegion& region,
180 const bool isRightBoundSpecified,
181 std::vector<int64_t>& offsets,
182 bool* hasAlignmentsInRegion);
183 // returns true if index cache has data for desired reference
184 bool IsDataLoaded(const int& refId) const;
185 // clears index data from all references except the one specified
186 void KeepOnlyReferenceOffsets(const int& refId);
187 // simplifies index by merging 'chunks'
188 void MergeChunks(void);
189 // saves BAM bin entry for index
190 void SaveBinEntry(BamBinMap& binMap,
191 const uint32_t& saveBin,
192 const uint64_t& saveOffset,
193 const uint64_t& lastOffset);
194 // saves linear offset entry for index
195 void SaveLinearOffset(LinearOffsetVector& offsets,
196 const BamAlignment& bAlignment,
197 const uint64_t& lastOffset);
198 // initializes index data structure to hold @count references
199 void SetReferenceCount(const int& count);
204 BamStandardIndexData m_indexData;
205 off_t m_dataBeginOffset;
206 bool m_hasFullDataCache;
210 } // namespace Internal
211 } // namespace BamTools
213 #endif // BAM_STANDARD_INDEX_FORMAT_H