1 // ***************************************************************************
2 // BamIndex.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 3 September 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the standardized BAM index format
9 // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
10 // ***************************************************************************
25 // --------------------------------------------------
26 // BamIndex base class
31 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
32 virtual ~BamIndex(void) { }
36 // creates index data (in-memory) from current reader data
37 virtual bool Build(void) =0;
38 // returns supported file extension
39 virtual const std::string Extension(void) const =0;
40 // calculates offset(s) for a given region
41 //virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
42 virtual bool Jump(const BamTools::BamRegion& region) =0;
43 // returns whether reference has alignments or no
44 virtual bool HasAlignments(const int& referenceID);
45 // loads existing data from file into memory
46 virtual bool Load(const std::string& filename) =0;
47 // writes in-memory index data out to file
48 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
49 virtual bool Write(const std::string& bamFilename) =0;
51 // factory methods for returning proper BamIndex-derived type based on available index files
54 // returns index based on BAM filename 'stub'
55 // checks first for preferred type, returns that type if found
56 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
58 // ** default preferred type is BamToolsIndex ** use this anytime it exists
59 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
60 static BamIndex* FromBamFilename(const std::string& bamFilename,
61 BamTools::BgzfData* bgzf,
62 BamTools::BamReader* reader,
64 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
66 // returns index based on explicitly named index file (or 0 if not found)
67 static BamIndex* FromIndexFilename(const std::string& indexFilename,
68 BamTools::BgzfData* bgzf,
69 BamTools::BamReader* reader,
74 BamTools::BgzfData* m_BGZF;
75 BamTools::BamReader* m_reader;
76 BamTools::RefVector m_references;
80 // --------------------------------------------------
81 // BamStandardIndex class
83 // implements standardized (per SAM/BAM spec) index file ops
84 class BamStandardIndex : public BamIndex {
89 BamStandardIndex(BamTools::BgzfData* bgzf,
90 BamTools::BamReader* reader,
92 ~BamStandardIndex(void);
94 // interface (implements BamIndex virtual methods)
96 // creates index data (in-memory) from current reader data
98 // returns supported file extension
99 const std::string Extension(void) const { return std::string(".bai"); }
100 // calculates offset(s) for a given region
101 bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
102 bool Jump(const BamTools::BamRegion& region);
103 // loads existing data from file into memory
104 bool Load(const std::string& filename);
105 // writes in-memory index data out to file
106 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
107 bool Write(const std::string& bamFilename);
109 // internal implementation
111 struct BamStandardIndexPrivate;
112 BamStandardIndexPrivate* d;
115 // --------------------------------------------------
116 // BamToolsIndex class
118 // implements BamTools-specific index file ops
119 class BamToolsIndex : public BamIndex {
123 BamToolsIndex(BamTools::BgzfData* bgzf,
124 BamTools::BamReader* reader,
126 ~BamToolsIndex(void);
128 // interface (implements BamIndex virtual methods)
130 // creates index data (in-memory) from current reader data
132 // returns supported file extension
133 const std::string Extension(void) const { return std::string(".bti"); }
134 // calculates offset(s) for a given region
135 bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
136 bool Jump(const BamTools::BamRegion& region);
137 // loads existing data from file into memory
138 bool Load(const std::string& filename);
139 // writes in-memory index data out to file
140 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
141 bool Write(const std::string& bamFilename);
143 // internal implementation
145 struct BamToolsIndexPrivate;
146 BamToolsIndexPrivate* d;
149 // --------------------------------------------------
150 // BamIndex factory methods
152 // return proper BamIndex-derived type based on available index files
155 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
156 BamTools::BgzfData* bgzf,
157 BamTools::BamReader* reader,
159 const BamIndex::PreferredIndexType& type)
161 // ---------------------------------------------------
162 // attempt to load preferred type first
164 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
165 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
166 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
167 return new BamToolsIndex(bgzf, reader, isBigEndian);
169 const std::string standardIndexFilename = bamFilename + ".bai";
170 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
171 if ( (type == BamIndex::STANDARD) && standardIndexExists )
172 return new BamStandardIndex(bgzf, reader, isBigEndian);
174 // ----------------------------------------------------
175 // preferred type could not be found, try other (non-preferred) types
176 // if none found, return 0
178 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
179 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
184 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
185 BamTools::BgzfData* bgzf,
186 BamTools::BamReader* reader,
189 // see if specified file exists
190 const bool indexExists = BamTools::FileExists(indexFilename);
191 if ( !indexExists ) return 0;
193 const std::string bamtoolsIndexExtension(".bti");
194 const std::string standardIndexExtension(".bai");
196 // if has bamtoolsIndexExtension
197 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
198 return new BamToolsIndex(bgzf, reader, isBigEndian);
200 // if has standardIndexExtension
201 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
202 return new BamStandardIndex(bgzf, reader, isBigEndian);
204 // otherwise, unsupported file type
208 } // namespace BamTools
210 #endif // BAM_INDEX_H