1 // ***************************************************************************
2 // BamIndex.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 17 September 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the standardized BAM index format
9 // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
10 // ***************************************************************************
25 // --------------------------------------------------
26 // BamIndex base class
31 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
32 virtual ~BamIndex(void) { }
36 // creates index data (in-memory) from current reader data
37 virtual bool Build(void) =0;
38 // returns supported file extension
39 virtual const std::string Extension(void) const =0;
40 // attempts to use index to jump to region; returns success/fail
41 // a "successful" jump indicates no error, but not whether this region has data
42 // * thus, the method sets a flag to indicate whether there are alignments
43 // available after the jump position
44 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
45 // returns whether reference has alignments or no
46 virtual bool HasAlignments(const int& referenceID);
47 // loads existing data from file into memory
48 virtual bool Load(const std::string& filename) =0;
49 // writes in-memory index data out to file
50 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
51 virtual bool Write(const std::string& bamFilename) =0;
53 // factory methods for returning proper BamIndex-derived type based on available index files
56 // returns index based on BAM filename 'stub'
57 // checks first for preferred type, returns that type if found
58 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
60 // ** default preferred type is BamToolsIndex ** use this anytime it exists
61 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
62 static BamIndex* FromBamFilename(const std::string& bamFilename,
63 BamTools::BgzfData* bgzf,
64 BamTools::BamReader* reader,
66 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
68 // returns index based on explicitly named index file (or 0 if not found)
69 static BamIndex* FromIndexFilename(const std::string& indexFilename,
70 BamTools::BgzfData* bgzf,
71 BamTools::BamReader* reader,
76 BamTools::BgzfData* m_BGZF;
77 BamTools::BamReader* m_reader;
78 BamTools::RefVector m_references;
82 // --------------------------------------------------
83 // BamStandardIndex class
85 // implements standardized (per SAM/BAM spec) index file ops
86 class BamStandardIndex : public BamIndex {
91 BamStandardIndex(BamTools::BgzfData* bgzf,
92 BamTools::BamReader* reader,
94 ~BamStandardIndex(void);
96 // interface (implements BamIndex virtual methods)
98 // creates index data (in-memory) from current reader data
100 // returns supported file extension
101 const std::string Extension(void) const { return std::string(".bai"); }
102 // attempts to use index to jump to region; returns success/fail
103 // a "successful" jump indicates no error, but not whether this region has data
104 // * thus, the method sets a flag to indicate whether there are alignments
105 // available after the jump position
106 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
107 // loads existing data from file into memory
108 bool Load(const std::string& filename);
109 // writes in-memory index data out to file
110 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
111 bool Write(const std::string& bamFilename);
113 // internal implementation
115 struct BamStandardIndexPrivate;
116 BamStandardIndexPrivate* d;
119 // --------------------------------------------------
120 // BamToolsIndex class
122 // implements BamTools-specific index file ops
123 class BamToolsIndex : public BamIndex {
127 BamToolsIndex(BamTools::BgzfData* bgzf,
128 BamTools::BamReader* reader,
130 ~BamToolsIndex(void);
132 // interface (implements BamIndex virtual methods)
134 // creates index data (in-memory) from current reader data
136 // returns supported file extension
137 const std::string Extension(void) const { return std::string(".bti"); }
138 // attempts to use index to jump to region; returns success/fail
139 // a "successful" jump indicates no error, but not whether this region has data
140 // * thus, the method sets a flag to indicate whether there are alignments
141 // available after the jump position
142 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
143 // loads existing data from file into memory
144 bool Load(const std::string& filename);
145 // writes in-memory index data out to file
146 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
147 bool Write(const std::string& bamFilename);
149 // internal implementation
151 struct BamToolsIndexPrivate;
152 BamToolsIndexPrivate* d;
155 // --------------------------------------------------
156 // BamIndex factory methods
158 // return proper BamIndex-derived type based on available index files
161 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
162 BamTools::BgzfData* bgzf,
163 BamTools::BamReader* reader,
165 const BamIndex::PreferredIndexType& type)
167 // ---------------------------------------------------
168 // attempt to load preferred type first
170 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
171 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
172 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
173 return new BamToolsIndex(bgzf, reader, isBigEndian);
175 const std::string standardIndexFilename = bamFilename + ".bai";
176 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
177 if ( (type == BamIndex::STANDARD) && standardIndexExists )
178 return new BamStandardIndex(bgzf, reader, isBigEndian);
180 // ----------------------------------------------------
181 // preferred type could not be found, try other (non-preferred) types
182 // if none found, return 0
184 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
185 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
190 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
191 BamTools::BgzfData* bgzf,
192 BamTools::BamReader* reader,
195 // see if specified file exists
196 const bool indexExists = BamTools::FileExists(indexFilename);
197 if ( !indexExists ) return 0;
199 const std::string bamtoolsIndexExtension(".bti");
200 const std::string standardIndexExtension(".bai");
202 // if has bamtoolsIndexExtension
203 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
204 return new BamToolsIndex(bgzf, reader, isBigEndian);
206 // if has standardIndexExtension
207 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
208 return new BamStandardIndex(bgzf, reader, isBigEndian);
210 // otherwise, unsupported file type
214 } // namespace BamTools
216 #endif // BAM_INDEX_H