1 // ***************************************************************************
2 // BamToolsIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the BamTools index format (".bti")
9 // ***************************************************************************
11 #ifndef BAMTOOLS_INDEX_FORMAT_H
12 #define BAMTOOLS_INDEX_FORMAT_H
18 // This file is not part of the BamTools API. It exists purely as an
19 // implementation detail. This header file may change from version to
20 // version without notice, or even be removed.
24 #include <api/BamAux.h>
25 #include <api/BamIndex.h>
34 // individual index offset entry
35 struct BamToolsIndexEntry {
38 int32_t MaxEndPosition;
40 int32_t StartPosition;
43 BamToolsIndexEntry(const int32_t& maxEndPosition = 0,
44 const int64_t& startOffset = 0,
45 const int32_t& startPosition = 0)
46 : MaxEndPosition(maxEndPosition)
47 , StartOffset(startOffset)
48 , StartPosition(startPosition)
52 // reference index entry
53 struct BamToolsReferenceEntry {
57 std::vector<BamToolsIndexEntry> Offsets;
60 BamToolsReferenceEntry(void)
61 : HasAlignments(false)
65 // the actual index data structure
66 typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData;
68 class BamToolsIndex : public BamIndex {
70 // keep a list of any supported versions here
71 // (might be useful later to handle any 'legacy' versions if the format changes)
72 // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
74 // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
76 // if ( indexVersion >= BTI_1_2 )
80 enum Version { BTI_1_0 = 1
88 BamToolsIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
91 // interface (implements BamIndex virtual methods)
93 // creates index data (in-memory) from current reader data
95 // returns supported file extension
96 const std::string Extension(void) const { return std::string(".bti"); }
97 // returns whether reference has alignments or no
98 bool HasAlignments(const int& referenceID) const;
99 // attempts to use index to jump to region; returns success/fail
100 // a "successful" jump indicates no error, but not whether this region has data
101 // * thus, the method sets a flag to indicate whether there are alignments
102 // available after the jump position
103 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
105 // clear all current index offset data in memory
106 void ClearAllData(void);
107 // return file position after header metadata
108 const off_t DataBeginOffset(void) const;
109 // return true if all index data is cached
110 bool HasFullDataCache(void) const;
111 // clears index data from all references except the first
112 void KeepOnlyFirstReferenceOffsets(void);
113 // load index data for all references, return true if loaded OK
114 // @saveData - save data in memory if true, just read & discard if false
115 bool LoadAllReferences(bool saveData = true);
116 // load first reference from file, return true if loaded OK
117 // @saveData - save data in memory if true, just read & discard if false
118 bool LoadFirstReference(bool saveData = true);
119 // load header data from index file, return true if loaded OK
120 bool LoadHeader(void);
121 // position file pointer to first reference begin, return true if skipped OK
122 bool SkipToFirstReference(void);
123 // write index reference data
124 bool WriteAllReferences(void);
125 // write index header data
126 bool WriteHeader(void);
128 // 'internal' methods
131 // -----------------------
132 // index file operations
134 // check index file magic number, return true if OK
135 bool CheckMagicNumber(void);
136 // check index file version, return true if OK
137 bool CheckVersion(void);
138 // return true if FILE* is open
139 bool IsOpen(void) const;
140 // load a single index entry from file, return true if loaded OK
141 // @saveData - save data in memory if true, just read & discard if false
142 bool LoadIndexEntry(const int& refId, bool saveData = true);
143 // load a single reference from file, return true if loaded OK
144 // @saveData - save data in memory if true, just read & discard if false
145 bool LoadReference(const int& refId, bool saveData = true);
146 // loads number of references, return true if loaded OK
147 bool LoadReferenceCount(int& numReferences);
148 // position file pointer to desired reference begin, return true if skipped OK
149 bool SkipToReference(const int& refId);
150 // write current reference index data to new index file
151 bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry);
152 // write current index offset entry to new index file
153 bool WriteIndexEntry(const BamToolsIndexEntry& entry);
155 // -----------------------
156 // index data operations
158 // clear all index offset data for desired reference
159 void ClearReferenceOffsets(const int& refId);
160 // calculate BAM file offset for desired region
161 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
162 // check @hasAlignmentsInRegion to determine this status
163 // @region - target region
164 // @offset - resulting seek target
165 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
166 bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
167 // returns true if index cache has data for desired reference
168 bool IsDataLoaded(const int& refId) const;
169 // clears index data from all references except the one specified
170 void KeepOnlyReferenceOffsets(const int& refId);
171 // saves an index offset entry in memory
172 void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry);
173 // pre-allocates size for offset vector
174 void SetOffsetCount(const int& refId, const int& offsetCount);
175 // initializes index data structure to hold @count references
176 void SetReferenceCount(const int& count);
181 BamToolsIndexData m_indexData;
182 off_t m_dataBeginOffset;
183 bool m_hasFullDataCache;
185 int32_t m_inputVersion; // Version is serialized as int
186 Version m_outputVersion;
189 } // namespace Internal
190 } // namespace BamTools
192 #endif // BAMTOOLS_INDEX_FORMAT_H