1 // ***************************************************************************
2 // BamIndex.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 20 October 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the standardized BAM index format
9 // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
10 // ***************************************************************************
15 #include <api/api_global.h>
16 #include <api/BamAux.h>
26 // --------------------------------------------------
27 // BamIndex base class
28 class API_EXPORT BamIndex {
30 // specify index-caching behavior
32 // @FullIndexCaching - store entire index file contents in memory
33 // @LimitedIndexCaching - store only index data for current reference
35 // @NoIndexCaching - do not store any index data. Load as needed to
36 // calculate jump offset
37 public: enum BamIndexCacheMode { FullIndexCaching = 0
44 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
45 virtual ~BamIndex(void);
49 // creates index data (in-memory) from current reader data
50 virtual bool Build(void) =0;
51 // returns supported file extension
52 virtual const std::string Extension(void) const =0;
53 // returns whether reference has alignments or no
54 virtual bool HasAlignments(const int& referenceID) const =0;
55 // attempts to use index to jump to region; returns success/fail
56 // a "successful" jump indicates no error, but not whether this region has data
57 // * thus, the method sets a flag to indicate whether there are alignments
58 // available after the jump position
59 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
60 // loads existing data from file into memory
61 virtual bool Load(const std::string& filename);
62 // change the index caching behavior
63 virtual void SetCacheMode(const BamIndexCacheMode mode);
64 // writes in-memory index data out to file
65 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
66 virtual bool Write(const std::string& bamFilename);
68 // derived-classes MUST provide implementation
70 // clear all current index offset data in memory
71 virtual void ClearAllData(void) =0;
72 // return file position after header metadata
73 virtual const off_t DataBeginOffset(void) const =0;
74 // return true if all index data is cached
75 virtual bool HasFullDataCache(void) const =0;
76 // clears index data from all references except the first
77 virtual void KeepOnlyFirstReferenceOffsets(void) =0;
78 // load index data for all references, return true if loaded OK
79 // @saveData - save data in memory if true, just read & discard if false
80 virtual bool LoadAllReferences(bool saveData = true) =0;
81 // load first reference from file, return true if loaded OK
82 // @saveData - save data in memory if true, just read & discard if false
83 virtual bool LoadFirstReference(bool saveData = true) =0;
84 // load header data from index file, return true if loaded OK
85 virtual bool LoadHeader(void) =0;
86 // position file pointer to first reference begin, return true if skipped OK
87 virtual bool SkipToFirstReference(void) =0;
88 // write index reference data
89 virtual bool WriteAllReferences(void) =0;
90 // write index header data
91 virtual bool WriteHeader(void) =0;
95 // rewind index file to beginning of index data, return true if rewound OK
99 // return true if FILE* is open
100 bool IsOpen(void) const;
101 // opens index file according to requested mode, return true if opened OK
102 bool OpenIndexFile(const std::string& filename, const std::string& mode);
103 // updates in-memory cache of index data, depending on current cache mode
104 void UpdateCache(void);
106 // factory methods for returning proper BamIndex-derived type based on available index files
109 // returns index based on BAM filename 'stub'
110 // checks first for preferred type, returns that type if found
111 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
113 // ** default preferred type is BamToolsIndex ** use this anytime it exists
114 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
115 static BamIndex* FromBamFilename(const std::string& bamFilename,
116 BamTools::BgzfData* bgzf,
117 BamTools::BamReader* reader,
118 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
120 // returns index based on explicitly named index file (or 0 if not found)
121 static BamIndex* FromIndexFilename(const std::string& indexFilename,
122 BamTools::BgzfData* bgzf,
123 BamTools::BamReader* reader);
127 BamTools::BgzfData* m_BGZF;
128 BamTools::BamReader* m_reader;
129 BamTools::RefVector m_references;
130 BamIndex::BamIndexCacheMode m_cacheMode;
134 // --------------------------------------------------
135 // BamStandardIndex class
137 // implements standardized (per SAM/BAM spec) index file ops
138 class BamStandardIndex : public BamIndex {
143 BamStandardIndex(BamTools::BgzfData* bgzf,
144 BamTools::BamReader* reader);
145 ~BamStandardIndex(void);
147 // interface (implements BamIndex virtual methods)
149 // creates index data (in-memory) from current reader data
151 // returns supported file extension
152 const std::string Extension(void) const { return std::string(".bai"); }
153 // returns whether reference has alignments or no
154 bool HasAlignments(const int& referenceID) const;
155 // attempts to use index to jump to region; returns success/fail
156 // a "successful" jump indicates no error, but not whether this region has data
157 // * thus, the method sets a flag to indicate whether there are alignments
158 // available after the jump position
159 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
161 // clear all current index offset data in memory
162 void ClearAllData(void);
163 // return file position after header metadata
164 const off_t DataBeginOffset(void) const;
165 // return true if all index data is cached
166 bool HasFullDataCache(void) const;
167 // clears index data from all references except the first
168 void KeepOnlyFirstReferenceOffsets(void);
169 // load index data for all references, return true if loaded OK
170 // @saveData - save data in memory if true, just read & discard if false
171 bool LoadAllReferences(bool saveData = true);
172 // load first reference from file, return true if loaded OK
173 // @saveData - save data in memory if true, just read & discard if false
174 bool LoadFirstReference(bool saveData = true);
175 // load header data from index file, return true if loaded OK
176 bool LoadHeader(void);
177 // position file pointer to first reference begin, return true if skipped OK
178 bool SkipToFirstReference(void);
179 // write index reference data
180 bool WriteAllReferences(void);
181 // write index header data
182 bool WriteHeader(void);
184 // internal implementation
186 struct BamStandardIndexPrivate;
187 BamStandardIndexPrivate* d;
190 // --------------------------------------------------
191 // BamToolsIndex class
193 // implements BamTools-specific index file ops
194 class BamToolsIndex : public BamIndex {
198 BamToolsIndex(BamTools::BgzfData* bgzf,
199 BamTools::BamReader* reader);
200 ~BamToolsIndex(void);
202 // interface (implements BamIndex virtual methods)
204 // creates index data (in-memory) from current reader data
206 // returns supported file extension
207 const std::string Extension(void) const { return std::string(".bti"); }
208 // returns whether reference has alignments or no
209 bool HasAlignments(const int& referenceID) const;
210 // attempts to use index to jump to region; returns success/fail
211 // a "successful" jump indicates no error, but not whether this region has data
212 // * thus, the method sets a flag to indicate whether there are alignments
213 // available after the jump position
214 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
216 // clear all current index offset data in memory
217 void ClearAllData(void);
218 // return file position after header metadata
219 const off_t DataBeginOffset(void) const;
220 // return true if all index data is cached
221 bool HasFullDataCache(void) const;
222 // clears index data from all references except the first
223 void KeepOnlyFirstReferenceOffsets(void);
224 // load index data for all references, return true if loaded OK
225 // @saveData - save data in memory if true, just read & discard if false
226 bool LoadAllReferences(bool saveData = true);
227 // load first reference from file, return true if loaded OK
228 // @saveData - save data in memory if true, just read & discard if false
229 bool LoadFirstReference(bool saveData = true);
230 // load header data from index file, return true if loaded OK
231 bool LoadHeader(void);
232 // position file pointer to first reference begin, return true if skipped OK
233 bool SkipToFirstReference(void);
234 // write index reference data
235 bool WriteAllReferences(void);
236 // write index header data
237 bool WriteHeader(void);
239 // internal implementation
241 struct BamToolsIndexPrivate;
242 BamToolsIndexPrivate* d;
245 // --------------------------------------------------
246 // BamIndex factory methods
248 // returns index based on BAM filename 'stub'
249 // checks first for preferred type, returns that type if found
250 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
252 // ** default preferred type is BamToolsIndex ** use this anytime it exists
254 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
255 BamTools::BgzfData* bgzf,
256 BamTools::BamReader* reader,
257 const BamIndex::PreferredIndexType& type)
259 // ---------------------------------------------------
260 // attempt to load preferred type first
262 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
263 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
264 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
265 return new BamToolsIndex(bgzf, reader);
267 const std::string standardIndexFilename = bamFilename + ".bai";
268 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
269 if ( (type == BamIndex::STANDARD) && standardIndexExists )
270 return new BamStandardIndex(bgzf, reader);
272 // ----------------------------------------------------
273 // preferred type could not be found, try other (non-preferred) types
274 // if none found, return 0
276 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
277 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
281 // returns index based on explicitly named index file (or 0 if not found)
283 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
284 BamTools::BgzfData* bgzf,
285 BamTools::BamReader* reader)
287 // see if specified file exists
288 const bool indexExists = BamTools::FileExists(indexFilename);
289 if ( !indexExists ) return 0;
291 const std::string bamtoolsIndexExtension(".bti");
292 const std::string standardIndexExtension(".bai");
294 // if has bamtoolsIndexExtension
295 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
296 return new BamToolsIndex(bgzf, reader);
298 // if has standardIndexExtension
299 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
300 return new BamStandardIndex(bgzf, reader);
302 // otherwise, unsupported file type
306 } // namespace BamTools
308 #endif // BAM_INDEX_H