// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 17 August 2010 (DB)
+// Last modified: 3 September 2010 (DB)
// ---------------------------------------------------------------------------
-// Provides index functionality - both for the default (standardized) BAM
-// index format (.bai) as well as a BamTools-specific (nonstandard) index
-// format (.bti).
+// Provides index functionality - both for the standardized BAM index format
+// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
// ***************************************************************************
#ifndef BAM_INDEX_H
#define BAM_INDEX_H
+#include <iostream>
#include <string>
#include <vector>
#include "BamAux.h"
// BamIndex base class
class BamIndex {
+ // ctor & dtor
public:
- BamIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
+ BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
virtual ~BamIndex(void) { }
+ // index interface
public:
// creates index data (in-memory) from current reader data
virtual bool Build(void) =0;
+ // returns supported file extension
+ virtual const std::string Extension(void) const =0;
// calculates offset(s) for a given region
virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
- // loads existing data from file into memory
- virtual bool Load(const std::string& filename) =0;
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID);
+ // loads existing data from file into memory
+ virtual bool Load(const std::string& filename) =0;
// writes in-memory index data out to file
// N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
virtual bool Write(const std::string& bamFilename) =0;
+ // factory methods for returning proper BamIndex-derived type based on available index files
+ public:
+
+ // returns index based on BAM filename 'stub'
+ // checks first for preferred type, returns that type if found
+ // (if not found, attmempts to load other type(s), returns 0 if NONE found)
+ //
+ // ** default preferred type is BamToolsIndex ** use this anytime it exists
+ enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
+ static BamIndex* FromBamFilename(const std::string& bamFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian,
+ const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
+
+ // returns index based on explicitly named index file (or 0 if not found)
+ static BamIndex* FromIndexFilename(const std::string& indexFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian);
+
+ // data members
protected:
BamTools::BgzfData* m_BGZF;
BamTools::BamReader* m_reader;
};
// --------------------------------------------------
-// BamDefaultIndex class
+// BamStandardIndex class
//
-// implements default (per SAM/BAM spec) index file ops
-class BamDefaultIndex : public BamIndex {
+// implements standardized (per SAM/BAM spec) index file ops
+class BamStandardIndex : public BamIndex {
// ctor & dtor
public:
- BamDefaultIndex(BamTools::BgzfData* bgzf,
+ BamStandardIndex(BamTools::BgzfData* bgzf,
BamTools::BamReader* reader,
bool isBigEndian);
- ~BamDefaultIndex(void);
+ ~BamStandardIndex(void);
// interface (implements BamIndex virtual methods)
public:
// creates index data (in-memory) from current reader data
bool Build(void);
+ // returns supported file extension
+ const std::string Extension(void) const { return std::string(".bai"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
// loads existing data from file into memory
// internal implementation
private:
- struct BamDefaultIndexPrivate;
- BamDefaultIndexPrivate* d;
+ struct BamStandardIndexPrivate;
+ BamStandardIndexPrivate* d;
};
// --------------------------------------------------
public:
// creates index data (in-memory) from current reader data
bool Build(void);
+ // returns supported file extension
+ const std::string Extension(void) const { return std::string(".bti"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
// loads existing data from file into memory
BamToolsIndexPrivate* d;
};
+// --------------------------------------------------
+// BamIndex factory methods
+//
+// return proper BamIndex-derived type based on available index files
+
+inline
+BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian,
+ const BamIndex::PreferredIndexType& type)
+{
+ // ---------------------------------------------------
+ // attempt to load preferred type first
+
+ const std::string bamtoolsIndexFilename = bamFilename + ".bti";
+ const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
+ if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
+ return new BamToolsIndex(bgzf, reader, isBigEndian);
+
+ const std::string standardIndexFilename = bamFilename + ".bai";
+ const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
+ if ( (type == BamIndex::STANDARD) && standardIndexExists )
+ return new BamStandardIndex(bgzf, reader, isBigEndian);
+
+ // ----------------------------------------------------
+ // preferred type could not be found, try other (non-preferred) types
+ // if none found, return 0
+
+ if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
+ if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
+ return 0;
+}
+
+inline
+BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian)
+{
+ // see if specified file exists
+ const bool indexExists = BamTools::FileExists(indexFilename);
+ if ( !indexExists ) return 0;
+
+ const std::string bamtoolsIndexExtension(".bti");
+ const std::string standardIndexExtension(".bai");
+
+ // if has bamtoolsIndexExtension
+ if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
+ return new BamToolsIndex(bgzf, reader, isBigEndian);
+
+ // if has standardIndexExtension
+ if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
+ return new BamStandardIndex(bgzf, reader, isBigEndian);
+
+ // otherwise, unsupported file type
+ return 0;
+}
+
} // namespace BamTools
-#endif // BAM_INDEX_H
\ No newline at end of file
+#endif // BAM_INDEX_H