// ***************************************************************************
// BamIndex.h (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 3 September 2010 (DB)
+// Last modified: 5 April 2011 (DB)
// ---------------------------------------------------------------------------
-// Provides index functionality - both for the standardized BAM index format
-// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
+// Provides basic BAM index interface
// ***************************************************************************
#ifndef BAM_INDEX_H
#define BAM_INDEX_H
-#include <iostream>
+#include <api/api_global.h>
+#include <api/BamAux.h>
#include <string>
-#include <vector>
-#include "BamAux.h"
namespace BamTools {
-class BamReader;
-class BgzfData;
-
-// --------------------------------------------------
-// BamIndex base class
-class BamIndex {
+namespace Internal {
+ class BamReaderPrivate;
+} // namespace Internal
- // ctor & dtor
- public:
- BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
- virtual ~BamIndex(void) { }
+/*! \class BamTools::BamIndex
+ \brief Provides methods for generating & loading BAM index files.
- // index interface
- public:
- // creates index data (in-memory) from current reader data
- virtual bool Build(void) =0;
- // returns supported file extension
- virtual const std::string Extension(void) const =0;
- // calculates offset(s) for a given region
- //virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
- virtual bool Jump(const BamTools::BamRegion& region) =0;
- // returns whether reference has alignments or no
- virtual bool HasAlignments(const int& referenceID);
- // loads existing data from file into memory
- virtual bool Load(const std::string& filename) =0;
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- virtual bool Write(const std::string& bamFilename) =0;
-
- // factory methods for returning proper BamIndex-derived type based on available index files
- public:
-
- // returns index based on BAM filename 'stub'
- // checks first for preferred type, returns that type if found
- // (if not found, attmempts to load other type(s), returns 0 if NONE found)
- //
- // ** default preferred type is BamToolsIndex ** use this anytime it exists
- enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
- static BamIndex* FromBamFilename(const std::string& bamFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian,
- const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
-
- // returns index based on explicitly named index file (or 0 if not found)
- static BamIndex* FromIndexFilename(const std::string& indexFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
+ This class straddles the line between public API and internal
+ implementation detail. Most client code should never have to use this
+ class directly.
- // data members
- protected:
- BamTools::BgzfData* m_BGZF;
- BamTools::BamReader* m_reader;
- BamTools::RefVector m_references;
- bool m_isBigEndian;
-};
+ It is exposed to the public API to allow advanced users to implement
+ their own custom indexing schemes.
-// --------------------------------------------------
-// BamStandardIndex class
-//
-// implements standardized (per SAM/BAM spec) index file ops
-class BamStandardIndex : public BamIndex {
+ More documentation on methods & enums coming soon.
+*/
+class API_EXPORT BamIndex {
+
+ // enums
+ public:
+ // specify index-caching behavior
+ enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory
+ , LimitedIndexCaching // store only index data for current reference
+ , NoIndexCaching // do not store any index data between jumps
+ };
+
+ // list of supported BamIndex types
+ enum IndexType { BAMTOOLS = 0
+ , STANDARD
+ };
// ctor & dtor
public:
- BamStandardIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
- ~BamStandardIndex(void);
+ BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { }
+ virtual ~BamIndex(void) { }
- // interface (implements BamIndex virtual methods)
+ // index interface
public:
- // creates index data (in-memory) from current reader data
- bool Build(void);
- // returns supported file extension
- const std::string Extension(void) const { return std::string(".bai"); }
- // calculates offset(s) for a given region
- bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
- bool Jump(const BamTools::BamRegion& region);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- bool Write(const std::string& bamFilename);
-
- // internal implementation
- private:
- struct BamStandardIndexPrivate;
- BamStandardIndexPrivate* d;
-};
-
-// --------------------------------------------------
-// BamToolsIndex class
-//
-// implements BamTools-specific index file ops
-class BamToolsIndex : public BamIndex {
+ // builds index from associated BAM file & writes out to index file
+ virtual bool Create(void) =0; // creates index file from BAM file
+ // returns whether reference has alignments or no
+ virtual bool HasAlignments(const int& referenceID) const =0;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+ // loads existing data from file into memory
+ virtual bool Load(const std::string& filename) =0;
+ // change the index caching behavior
+ virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0;
- // ctor & dtor
- public:
- BamToolsIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
- ~BamToolsIndex(void);
-
- // interface (implements BamIndex virtual methods)
- public:
- // creates index data (in-memory) from current reader data
- bool Build(void);
- // returns supported file extension
- const std::string Extension(void) const { return std::string(".bti"); }
- // calculates offset(s) for a given region
- bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
- bool Jump(const BamTools::BamRegion& region);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- bool Write(const std::string& bamFilename);
-
- // internal implementation
- private:
- struct BamToolsIndexPrivate;
- BamToolsIndexPrivate* d;
+ // data members
+ protected:
+ Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate:
};
-// --------------------------------------------------
-// BamIndex factory methods
-//
-// return proper BamIndex-derived type based on available index files
-
-inline
-BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian,
- const BamIndex::PreferredIndexType& type)
-{
- // ---------------------------------------------------
- // attempt to load preferred type first
-
- const std::string bamtoolsIndexFilename = bamFilename + ".bti";
- const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
- if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
- return new BamToolsIndex(bgzf, reader, isBigEndian);
-
- const std::string standardIndexFilename = bamFilename + ".bai";
- const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
- if ( (type == BamIndex::STANDARD) && standardIndexExists )
- return new BamStandardIndex(bgzf, reader, isBigEndian);
-
- // ----------------------------------------------------
- // preferred type could not be found, try other (non-preferred) types
- // if none found, return 0
-
- if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
- if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
- return 0;
-}
-
-inline
-BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian)
-{
- // see if specified file exists
- const bool indexExists = BamTools::FileExists(indexFilename);
- if ( !indexExists ) return 0;
-
- const std::string bamtoolsIndexExtension(".bti");
- const std::string standardIndexExtension(".bai");
-
- // if has bamtoolsIndexExtension
- if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
- return new BamToolsIndex(bgzf, reader, isBigEndian);
-
- // if has standardIndexExtension
- if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
- return new BamStandardIndex(bgzf, reader, isBigEndian);
-
- // otherwise, unsupported file type
- return 0;
-}
-
} // namespace BamTools
#endif // BAM_INDEX_H