1 // ***************************************************************************
2 // BamIndex.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 8 October 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the standardized BAM index format
9 // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
10 // ***************************************************************************
18 #include "BamAlignment.h"
25 // --------------------------------------------------
26 // BamIndex base class
31 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
32 virtual ~BamIndex(void) { }
36 // creates index data (in-memory) from current reader data
37 virtual bool Build(void) =0;
38 // returns supported file extension
39 virtual const std::string Extension(void) const =0;
40 // attempts to use index to jump to region; returns success/fail
41 // a "successful" jump indicates no error, but not whether this region has data
42 // * thus, the method sets a flag to indicate whether there are alignments
43 // available after the jump position
44 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
45 // returns whether reference has alignments or no
46 virtual bool HasAlignments(const int& referenceID) const =0;
47 // loads existing data from file into memory
48 virtual bool Load(const std::string& filename) =0;
49 // writes in-memory index data out to file
50 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
51 virtual bool Write(const std::string& bamFilename) =0;
53 // factory methods for returning proper BamIndex-derived type based on available index files
56 // returns index based on BAM filename 'stub'
57 // checks first for preferred type, returns that type if found
58 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
60 // ** default preferred type is BamToolsIndex ** use this anytime it exists
61 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
62 static BamIndex* FromBamFilename(const std::string& bamFilename,
63 BamTools::BgzfData* bgzf,
64 BamTools::BamReader* reader,
66 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
68 // returns index based on explicitly named index file (or 0 if not found)
69 static BamIndex* FromIndexFilename(const std::string& indexFilename,
70 BamTools::BgzfData* bgzf,
71 BamTools::BamReader* reader,
76 BamTools::BgzfData* m_BGZF;
77 BamTools::BamReader* m_reader;
78 BamTools::RefVector m_references;
82 // --------------------------------------------------
83 // BamStandardIndex class
85 // implements standardized (per SAM/BAM spec) index file ops
86 class BamStandardIndex : public BamIndex {
91 BamStandardIndex(BamTools::BgzfData* bgzf,
92 BamTools::BamReader* reader,
94 ~BamStandardIndex(void);
96 // interface (implements BamIndex virtual methods)
98 // creates index data (in-memory) from current reader data
100 // returns supported file extension
101 const std::string Extension(void) const { return std::string(".bai"); }
102 // returns whether reference has alignments or no
103 bool HasAlignments(const int& referenceID) const;
104 // attempts to use index to jump to region; returns success/fail
105 // a "successful" jump indicates no error, but not whether this region has data
106 // * thus, the method sets a flag to indicate whether there are alignments
107 // available after the jump position
108 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
109 // loads existing data from file into memory
110 bool Load(const std::string& filename);
111 // writes in-memory index data out to file
112 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
113 bool Write(const std::string& bamFilename);
115 // internal implementation
117 struct BamStandardIndexPrivate;
118 BamStandardIndexPrivate* d;
121 // --------------------------------------------------
122 // BamToolsIndex class
124 // implements BamTools-specific index file ops
125 class BamToolsIndex : public BamIndex {
129 BamToolsIndex(BamTools::BgzfData* bgzf,
130 BamTools::BamReader* reader,
132 ~BamToolsIndex(void);
134 // interface (implements BamIndex virtual methods)
136 // creates index data (in-memory) from current reader data
138 // returns supported file extension
139 const std::string Extension(void) const { return std::string(".bti"); }
140 // returns whether reference has alignments or no
141 bool HasAlignments(const int& referenceID) const;
142 // attempts to use index to jump to region; returns success/fail
143 // a "successful" jump indicates no error, but not whether this region has data
144 // * thus, the method sets a flag to indicate whether there are alignments
145 // available after the jump position
146 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
147 // loads existing data from file into memory
148 bool Load(const std::string& filename);
149 // writes in-memory index data out to file
150 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
151 bool Write(const std::string& bamFilename);
153 // internal implementation
155 struct BamToolsIndexPrivate;
156 BamToolsIndexPrivate* d;
159 // --------------------------------------------------
160 // BamIndex factory methods
162 // return proper BamIndex-derived type based on available index files
165 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
166 BamTools::BgzfData* bgzf,
167 BamTools::BamReader* reader,
169 const BamIndex::PreferredIndexType& type)
171 // ---------------------------------------------------
172 // attempt to load preferred type first
174 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
175 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
176 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
177 return new BamToolsIndex(bgzf, reader, isBigEndian);
179 const std::string standardIndexFilename = bamFilename + ".bai";
180 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
181 if ( (type == BamIndex::STANDARD) && standardIndexExists )
182 return new BamStandardIndex(bgzf, reader, isBigEndian);
184 // ----------------------------------------------------
185 // preferred type could not be found, try other (non-preferred) types
186 // if none found, return 0
188 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
189 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
194 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
195 BamTools::BgzfData* bgzf,
196 BamTools::BamReader* reader,
199 // see if specified file exists
200 const bool indexExists = BamTools::FileExists(indexFilename);
201 if ( !indexExists ) return 0;
203 const std::string bamtoolsIndexExtension(".bti");
204 const std::string standardIndexExtension(".bai");
206 // if has bamtoolsIndexExtension
207 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
208 return new BamToolsIndex(bgzf, reader, isBigEndian);
210 // if has standardIndexExtension
211 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
212 return new BamStandardIndex(bgzf, reader, isBigEndian);
214 // otherwise, unsupported file type
218 } // namespace BamTools
220 #endif // BAM_INDEX_H