1 // ***************************************************************************
2 // BamIndex.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 20 October 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the standardized BAM index format
9 // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
10 // ***************************************************************************
25 // --------------------------------------------------
26 // BamIndex base class
29 // specify index-caching behavior
31 // @FullIndexCaching - store entire index file contents in memory
32 // @LimitedIndexCaching - store only index data for current reference
34 // @NoIndexCaching - do not store any index data. Load as needed to
35 // calculate jump offset
36 public: enum BamIndexCacheMode { FullIndexCaching = 0
43 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
44 virtual ~BamIndex(void);
48 // creates index data (in-memory) from current reader data
49 virtual bool Build(void) =0;
50 // returns supported file extension
51 virtual const std::string Extension(void) const =0;
52 // returns whether reference has alignments or no
53 virtual bool HasAlignments(const int& referenceID) const =0;
54 // attempts to use index to jump to region; returns success/fail
55 // a "successful" jump indicates no error, but not whether this region has data
56 // * thus, the method sets a flag to indicate whether there are alignments
57 // available after the jump position
58 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
59 // loads existing data from file into memory
60 virtual bool Load(const std::string& filename);
61 // change the index caching behavior
62 virtual void SetCacheMode(const BamIndexCacheMode mode);
63 // writes in-memory index data out to file
64 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
65 virtual bool Write(const std::string& bamFilename);
67 // derived-classes MUST provide implementation
69 // clear all current index offset data in memory
70 virtual void ClearAllData(void) =0;
71 // return file position after header metadata
72 virtual const off_t DataBeginOffset(void) const =0;
73 // return true if all index data is cached
74 virtual bool HasFullDataCache(void) const =0;
75 // clears index data from all references except the first
76 virtual void KeepOnlyFirstReferenceOffsets(void) =0;
77 // load index data for all references, return true if loaded OK
78 // @saveData - save data in memory if true, just read & discard if false
79 virtual bool LoadAllReferences(bool saveData = true) =0;
80 // load first reference from file, return true if loaded OK
81 // @saveData - save data in memory if true, just read & discard if false
82 virtual bool LoadFirstReference(bool saveData = true) =0;
83 // load header data from index file, return true if loaded OK
84 virtual bool LoadHeader(void) =0;
85 // position file pointer to first reference begin, return true if skipped OK
86 virtual bool SkipToFirstReference(void) =0;
87 // write index reference data
88 virtual bool WriteAllReferences(void) =0;
89 // write index header data
90 virtual bool WriteHeader(void) =0;
94 // rewind index file to beginning of index data, return true if rewound OK
98 // return true if FILE* is open
99 bool IsOpen(void) const;
100 // opens index file according to requested mode, return true if opened OK
101 bool OpenIndexFile(const std::string& filename, const std::string& mode);
102 // updates in-memory cache of index data, depending on current cache mode
103 void UpdateCache(void);
105 // factory methods for returning proper BamIndex-derived type based on available index files
108 // returns index based on BAM filename 'stub'
109 // checks first for preferred type, returns that type if found
110 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
112 // ** default preferred type is BamToolsIndex ** use this anytime it exists
113 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
114 static BamIndex* FromBamFilename(const std::string& bamFilename,
115 BamTools::BgzfData* bgzf,
116 BamTools::BamReader* reader,
117 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
119 // returns index based on explicitly named index file (or 0 if not found)
120 static BamIndex* FromIndexFilename(const std::string& indexFilename,
121 BamTools::BgzfData* bgzf,
122 BamTools::BamReader* reader);
126 BamTools::BgzfData* m_BGZF;
127 BamTools::BamReader* m_reader;
128 BamTools::RefVector m_references;
129 BamIndex::BamIndexCacheMode m_cacheMode;
133 // --------------------------------------------------
134 // BamStandardIndex class
136 // implements standardized (per SAM/BAM spec) index file ops
137 class BamStandardIndex : public BamIndex {
142 BamStandardIndex(BamTools::BgzfData* bgzf,
143 BamTools::BamReader* reader);
144 ~BamStandardIndex(void);
146 // interface (implements BamIndex virtual methods)
148 // creates index data (in-memory) from current reader data
150 // returns supported file extension
151 const std::string Extension(void) const { return std::string(".bai"); }
152 // returns whether reference has alignments or no
153 bool HasAlignments(const int& referenceID) const;
154 // attempts to use index to jump to region; returns success/fail
155 // a "successful" jump indicates no error, but not whether this region has data
156 // * thus, the method sets a flag to indicate whether there are alignments
157 // available after the jump position
158 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
160 // clear all current index offset data in memory
161 void ClearAllData(void);
162 // return file position after header metadata
163 const off_t DataBeginOffset(void) const;
164 // return true if all index data is cached
165 bool HasFullDataCache(void) const;
166 // clears index data from all references except the first
167 void KeepOnlyFirstReferenceOffsets(void);
168 // load index data for all references, return true if loaded OK
169 // @saveData - save data in memory if true, just read & discard if false
170 bool LoadAllReferences(bool saveData = true);
171 // load first reference from file, return true if loaded OK
172 // @saveData - save data in memory if true, just read & discard if false
173 bool LoadFirstReference(bool saveData = true);
174 // load header data from index file, return true if loaded OK
175 bool LoadHeader(void);
176 // position file pointer to first reference begin, return true if skipped OK
177 bool SkipToFirstReference(void);
178 // write index reference data
179 bool WriteAllReferences(void);
180 // write index header data
181 bool WriteHeader(void);
183 // internal implementation
185 struct BamStandardIndexPrivate;
186 BamStandardIndexPrivate* d;
189 // --------------------------------------------------
190 // BamToolsIndex class
192 // implements BamTools-specific index file ops
193 class BamToolsIndex : public BamIndex {
197 BamToolsIndex(BamTools::BgzfData* bgzf,
198 BamTools::BamReader* reader);
199 ~BamToolsIndex(void);
201 // interface (implements BamIndex virtual methods)
203 // creates index data (in-memory) from current reader data
205 // returns supported file extension
206 const std::string Extension(void) const { return std::string(".bti"); }
207 // returns whether reference has alignments or no
208 bool HasAlignments(const int& referenceID) const;
209 // attempts to use index to jump to region; returns success/fail
210 // a "successful" jump indicates no error, but not whether this region has data
211 // * thus, the method sets a flag to indicate whether there are alignments
212 // available after the jump position
213 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
215 // clear all current index offset data in memory
216 void ClearAllData(void);
217 // return file position after header metadata
218 const off_t DataBeginOffset(void) const;
219 // return true if all index data is cached
220 bool HasFullDataCache(void) const;
221 // clears index data from all references except the first
222 void KeepOnlyFirstReferenceOffsets(void);
223 // load index data for all references, return true if loaded OK
224 // @saveData - save data in memory if true, just read & discard if false
225 bool LoadAllReferences(bool saveData = true);
226 // load first reference from file, return true if loaded OK
227 // @saveData - save data in memory if true, just read & discard if false
228 bool LoadFirstReference(bool saveData = true);
229 // load header data from index file, return true if loaded OK
230 bool LoadHeader(void);
231 // position file pointer to first reference begin, return true if skipped OK
232 bool SkipToFirstReference(void);
233 // write index reference data
234 bool WriteAllReferences(void);
235 // write index header data
236 bool WriteHeader(void);
238 // internal implementation
240 struct BamToolsIndexPrivate;
241 BamToolsIndexPrivate* d;
244 // --------------------------------------------------
245 // BamIndex factory methods
247 // returns index based on BAM filename 'stub'
248 // checks first for preferred type, returns that type if found
249 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
251 // ** default preferred type is BamToolsIndex ** use this anytime it exists
253 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
254 BamTools::BgzfData* bgzf,
255 BamTools::BamReader* reader,
256 const BamIndex::PreferredIndexType& type)
258 // ---------------------------------------------------
259 // attempt to load preferred type first
261 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
262 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
263 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
264 return new BamToolsIndex(bgzf, reader);
266 const std::string standardIndexFilename = bamFilename + ".bai";
267 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
268 if ( (type == BamIndex::STANDARD) && standardIndexExists )
269 return new BamStandardIndex(bgzf, reader);
271 // ----------------------------------------------------
272 // preferred type could not be found, try other (non-preferred) types
273 // if none found, return 0
275 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
276 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
280 // returns index based on explicitly named index file (or 0 if not found)
282 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
283 BamTools::BgzfData* bgzf,
284 BamTools::BamReader* reader)
286 // see if specified file exists
287 const bool indexExists = BamTools::FileExists(indexFilename);
288 if ( !indexExists ) return 0;
290 const std::string bamtoolsIndexExtension(".bti");
291 const std::string standardIndexExtension(".bai");
293 // if has bamtoolsIndexExtension
294 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
295 return new BamToolsIndex(bgzf, reader);
297 // if has standardIndexExtension
298 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
299 return new BamStandardIndex(bgzf, reader);
301 // otherwise, unsupported file type
305 } // namespace BamTools
307 #endif // BAM_INDEX_H