1 // ***************************************************************************
2 // BamStandardIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 January 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the standardized BAM index format (".bai")
9 // ***************************************************************************
11 #ifndef BAM_STANDARD_INDEX_FORMAT_H
12 #define BAM_STANDARD_INDEX_FORMAT_H
18 // This file is not part of the BamTools API. It exists purely as an
19 // implementation detail. This header file may change from version to
20 // version without notice, or even be removed.
24 #include <api/BamAux.h>
25 #include <api/BamIndex.h>
36 // BAM index constants
37 const int MAX_BIN = 37450; // =(8^6-1)/7+1
38 const int BAM_LIDX_SHIFT = 14;
39 const std::string BAI_EXTENSION = ".bai";
41 // --------------------------------------------------
42 // BamStandardIndex data structures & typedefs
50 Chunk(const uint64_t& start = 0,
51 const uint64_t& stop = 0)
58 bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
59 return lhs.Start < rhs.Start;
62 typedef std::vector<Chunk> ChunkVector;
63 typedef std::map<uint32_t, ChunkVector> BamBinMap;
64 typedef std::vector<uint64_t> LinearOffsetVector;
66 struct ReferenceIndex {
70 LinearOffsetVector Offsets;
74 ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
75 const LinearOffsetVector& offsets = LinearOffsetVector(),
76 const bool hasAlignments = false)
79 , HasAlignments(hasAlignments)
83 typedef std::map<int32_t, ReferenceIndex> BamStandardIndexData;
85 class BamStandardIndex : public BamIndex {
89 BamStandardIndex(void);
90 ~BamStandardIndex(void);
92 // interface (implements BamIndex virtual methods)
94 // creates index data (in-memory) from @reader data
95 bool Build(Internal::BamReaderPrivate* reader);
96 // returns supported file extension
97 const std::string Extension(void) { return BAI_EXTENSION; }
98 // returns whether reference has alignments or no
99 bool HasAlignments(const int& referenceID) const;
100 // attempts to use index to jump to @region in @reader; returns success/fail
101 // a "successful" jump indicates no error, but not whether this region has data
102 // * thus, the method sets a flag to indicate whether there are alignments
103 // available after the jump position
104 bool Jump(Internal::BamReaderPrivate* reader,
105 const BamTools::BamRegion& region,
106 bool* hasAlignmentsInRegion);
109 // clear all current index offset data in memory
110 void ClearAllData(void);
111 // return file position after header metadata
112 off_t DataBeginOffset(void) const;
113 // return true if all index data is cached
114 bool HasFullDataCache(void) const;
115 // clears index data from all references except the first
116 void KeepOnlyFirstReferenceOffsets(void);
117 // load index data for all references, return true if loaded OK
118 // @saveData - save data in memory if true, just read & discard if false
119 bool LoadAllReferences(bool saveData = true);
120 // load first reference from file, return true if loaded OK
121 // @saveData - save data in memory if true, just read & discard if false
122 bool LoadFirstReference(bool saveData = true);
123 // load header data from index file, return true if loaded OK
124 bool LoadHeader(void);
125 // position file pointer to first reference begin, return true if skipped OK
126 bool SkipToFirstReference(void);
127 // write index reference data
128 bool WriteAllReferences(void);
129 // write index header data
130 bool WriteHeader(void);
132 // 'internal' methods
135 // -----------------------
136 // index file operations
138 // check index file magic number, return true if OK
139 bool CheckMagicNumber(void);
140 // check index file version, return true if OK
141 bool CheckVersion(void);
142 // load a single index bin entry from file, return true if loaded OK
143 // @saveData - save data in memory if true, just read & discard if false
144 bool LoadBin(ReferenceIndex& refEntry, bool saveData = true);
145 bool LoadBins(ReferenceIndex& refEntry, bool saveData = true);
146 // load a single index bin entry from file, return true if loaded OK
147 // @saveData - save data in memory if true, just read & discard if false
148 bool LoadChunk(ChunkVector& chunks, bool saveData = true);
149 bool LoadChunks(ChunkVector& chunks, bool saveData = true);
150 // load a single index linear offset entry from file, return true if loaded OK
151 // @saveData - save data in memory if true, just read & discard if false
152 bool LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData = true);
153 // load a single reference from file, return true if loaded OK
154 // @saveData - save data in memory if true, just read & discard if false
155 bool LoadReference(const int& refId, bool saveData = true);
156 // loads number of references, return true if loaded OK
157 bool LoadReferenceCount(int& numReferences);
158 // position file pointer to desired reference begin, return true if skipped OK
159 bool SkipToReference(const int& refId);
160 // write index data for bin to new index file
161 bool WriteBin(const uint32_t& binId, const ChunkVector& chunks);
162 // write index data for bins to new index file
163 bool WriteBins(const BamBinMap& bins);
164 // write index data for chunk entry to new index file
165 bool WriteChunk(const Chunk& chunk);
166 // write index data for chunk entry to new index file
167 bool WriteChunks(const ChunkVector& chunks);
168 // write index data for linear offsets entry to new index file
169 bool WriteLinearOffsets(const LinearOffsetVector& offsets);
170 // write index data single reference to new index file
171 bool WriteReference(const ReferenceIndex& refEntry);
173 // -----------------------
174 // index data operations
176 // calculate bins that overlap region
177 int BinsFromRegion(const BamRegion& region,
178 const RefVector& references,
179 const bool isRightBoundSpecified,
180 uint16_t bins[MAX_BIN]);
181 // clear all index offset data for desired reference
182 void ClearReferenceOffsets(const int& refId);
183 // calculates offset(s) for a given region
184 bool GetOffsets(const BamRegion& region,
185 const RefVector& references,
186 const bool isRightBoundSpecified,
187 std::vector<int64_t>& offsets,
188 bool* hasAlignmentsInRegion);
189 // returns true if index cache has data for desired reference
190 bool IsDataLoaded(const int& refId) const;
191 // clears index data from all references except the one specified
192 void KeepOnlyReferenceOffsets(const int& refId);
193 // simplifies index by merging 'chunks'
194 void MergeChunks(void);
195 // saves BAM bin entry for index
196 void SaveBinEntry(BamBinMap& binMap,
197 const uint32_t& saveBin,
198 const uint64_t& saveOffset,
199 const uint64_t& lastOffset);
200 // saves linear offset entry for index
201 void SaveLinearOffset(LinearOffsetVector& offsets,
202 const BamAlignment& bAlignment,
203 const uint64_t& lastOffset);
204 // initializes index data structure to hold @count references
205 void SetReferenceCount(const int& count);
210 BamStandardIndexData m_indexData;
211 off_t m_dataBeginOffset;
212 bool m_hasFullDataCache;
216 } // namespace Internal
217 } // namespace BamTools
219 #endif // BAM_STANDARD_INDEX_FORMAT_H