1 // ***************************************************************************
2 // BamToolsIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 January 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index operations for the BamTools index format (".bti")
9 // ***************************************************************************
11 #ifndef BAMTOOLS_INDEX_FORMAT_H
12 #define BAMTOOLS_INDEX_FORMAT_H
18 // This file is not part of the BamTools API. It exists purely as an
19 // implementation detail. This header file may change from version to
20 // version without notice, or even be removed.
24 #include <api/BamAux.h>
25 #include <api/BamIndex.h>
34 const std::string BTI_EXTENSION = ".bti";
36 // individual index offset entry
37 struct BamToolsIndexEntry {
40 int32_t MaxEndPosition;
42 int32_t StartPosition;
45 BamToolsIndexEntry(const int32_t& maxEndPosition = 0,
46 const int64_t& startOffset = 0,
47 const int32_t& startPosition = 0)
48 : MaxEndPosition(maxEndPosition)
49 , StartOffset(startOffset)
50 , StartPosition(startPosition)
54 // reference index entry
55 struct BamToolsReferenceEntry {
59 std::vector<BamToolsIndexEntry> Offsets;
62 BamToolsReferenceEntry(void)
63 : HasAlignments(false)
67 // the actual index data structure
68 typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData;
70 class BamToolsIndex : public BamIndex {
72 // keep a list of any supported versions here
73 // (might be useful later to handle any 'legacy' versions if the format changes)
74 // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
76 // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
78 // if ( indexVersion >= BTI_1_2 )
82 enum Version { BTI_1_0 = 1
93 // interface (implements BamIndex virtual methods)
95 // creates index data (in-memory) from @reader data
96 bool Build(Internal::BamReaderPrivate* reader);
97 // returns supported file extension
98 const std::string Extension(void) { return BTI_EXTENSION; }
99 // returns whether reference has alignments or no
100 bool HasAlignments(const int& referenceID) const;
101 // attempts to use index to jump to @region in @reader; returns success/fail
102 // a "successful" jump indicates no error, but not whether this region has data
103 // * thus, the method sets a flag to indicate whether there are alignments
104 // available after the jump position
105 bool Jump(Internal::BamReaderPrivate* reader,
106 const BamTools::BamRegion& region,
107 bool *hasAlignmentsInRegion);
110 // clear all current index offset data in memory
111 void ClearAllData(void);
112 // return file position after header metadata
113 off_t DataBeginOffset(void) const;
114 // return true if all index data is cached
115 bool HasFullDataCache(void) const;
116 // clears index data from all references except the first
117 void KeepOnlyFirstReferenceOffsets(void);
118 // load index data for all references, return true if loaded OK
119 // @saveData - save data in memory if true, just read & discard if false
120 bool LoadAllReferences(bool saveData = true);
121 // load first reference from file, return true if loaded OK
122 // @saveData - save data in memory if true, just read & discard if false
123 bool LoadFirstReference(bool saveData = true);
124 // load header data from index file, return true if loaded OK
125 bool LoadHeader(void);
126 // position file pointer to first reference begin, return true if skipped OK
127 bool SkipToFirstReference(void);
128 // write index reference data
129 bool WriteAllReferences(void);
130 // write index header data
131 bool WriteHeader(void);
136 // -----------------------
137 // index file operations
139 // check index file magic number, return true if OK
140 bool CheckMagicNumber(void);
141 // check index file version, return true if OK
142 bool CheckVersion(void);
143 // load a single index entry from file, return true if loaded OK
144 // @saveData - save data in memory if true, just read & discard if false
145 bool LoadIndexEntry(const int& refId, bool saveData = true);
146 // load a single reference from file, return true if loaded OK
147 // @saveData - save data in memory if true, just read & discard if false
148 bool LoadReference(const int& refId, bool saveData = true);
149 // loads number of references, return true if loaded OK
150 bool LoadReferenceCount(int& numReferences);
151 // position file pointer to desired reference begin, return true if skipped OK
152 bool SkipToReference(const int& refId);
153 // write current reference index data to new index file
154 bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry);
155 // write current index offset entry to new index file
156 bool WriteIndexEntry(const BamToolsIndexEntry& entry);
158 // -----------------------
159 // index data operations
161 // clear all index offset data for desired reference
162 void ClearReferenceOffsets(const int& refId);
163 // calculate BAM file offset for desired region
164 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
165 // check @hasAlignmentsInRegion to determine this status
166 // @region - target region
167 // @offset - resulting seek target
168 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
169 bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
170 // returns true if index cache has data for desired reference
171 bool IsDataLoaded(const int& refId) const;
172 // clears index data from all references except the one specified
173 void KeepOnlyReferenceOffsets(const int& refId);
174 // saves an index offset entry in memory
175 void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry);
176 // pre-allocates size for offset vector
177 void SetOffsetCount(const int& refId, const int& offsetCount);
178 // initializes index data structure to hold @count references
179 void SetReferenceCount(const int& count);
184 BamToolsIndexData m_indexData;
185 off_t m_dataBeginOffset;
186 bool m_hasFullDataCache;
188 int32_t m_inputVersion; // Version is serialized as int
189 Version m_outputVersion;
192 } // namespace Internal
193 } // namespace BamTools
195 #endif // BAMTOOLS_INDEX_FORMAT_H