X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamIndex.h;h=067244ec2d2fd845458d7868dbee553771388be7;hb=2e1822c9ed231b25fd474117a01a1492d4209fa4;hp=fdd0d134bc3fd99cc98e4ed2d6b6aeb6eb21c15f;hpb=824ecac29bfd739f996aea108a5597d9a564df57;p=bamtools.git diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h index fdd0d13..067244e 100644 --- a/src/api/BamIndex.h +++ b/src/api/BamIndex.h @@ -1,215 +1,92 @@ // *************************************************************************** // BamIndex.h (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 17 September 2010 (DB) +// Last modified: 6 October 2011 (DB) // --------------------------------------------------------------------------- -// Provides index functionality - both for the standardized BAM index format -// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti"). +// Provides basic BAM index interface // *************************************************************************** #ifndef BAM_INDEX_H #define BAM_INDEX_H -#include +#include +#include #include -#include -#include "BamAux.h" namespace BamTools { -class BamReader; -class BgzfData; - -// -------------------------------------------------- -// BamIndex base class -class BamIndex { +namespace Internal { + class BamReaderPrivate; +} // namespace Internal + +/*! \class BamTools::BamIndex + \brief Provides methods for generating & loading BAM index files. + + This class straddles the line between public API and internal + implementation detail. Most client code should never have to use this + class directly. + + It is exposed to the public API to allow advanced users to implement + their own custom indexing schemes. + + More documentation on methods & enums coming soon. +*/ + +class API_EXPORT BamIndex { + // enums + public: + // specify index-caching behavior + enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory + , LimitedIndexCaching // store only index data for current reference + , NoIndexCaching // do not store any index data between jumps + }; + + // list of supported BamIndex types + enum IndexType { BAMTOOLS = 0 + , STANDARD + }; + // ctor & dtor public: - BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian); + BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } virtual ~BamIndex(void) { } // index interface public: - // creates index data (in-memory) from current reader data - virtual bool Build(void) =0; - // returns supported file extension - virtual const std::string Extension(void) const =0; - // attempts to use index to jump to region; returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; - // returns whether reference has alignments or no - virtual bool HasAlignments(const int& referenceID); - // loads existing data from file into memory - virtual bool Load(const std::string& filename) =0; - // writes in-memory index data out to file - // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) - virtual bool Write(const std::string& bamFilename) =0; - - // factory methods for returning proper BamIndex-derived type based on available index files - public: - - // returns index based on BAM filename 'stub' - // checks first for preferred type, returns that type if found - // (if not found, attmempts to load other type(s), returns 0 if NONE found) - // - // ** default preferred type is BamToolsIndex ** use this anytime it exists - enum PreferredIndexType { BAMTOOLS = 0, STANDARD }; - static BamIndex* FromBamFilename(const std::string& bamFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian, - const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS); - - // returns index based on explicitly named index file (or 0 if not found) - static BamIndex* FromIndexFilename(const std::string& indexFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian); + // builds index from associated BAM file & writes out to index file + virtual bool Create(void) =0; - // data members - protected: - BamTools::BgzfData* m_BGZF; - BamTools::BamReader* m_reader; - BamTools::RefVector m_references; - bool m_isBigEndian; -}; + // returns a human-readable description of the last error encountered + std::string GetErrorString(void) { return m_errorString; } -// -------------------------------------------------- -// BamStandardIndex class -// -// implements standardized (per SAM/BAM spec) index file ops -class BamStandardIndex : public BamIndex { + // returns whether reference has alignments or no + virtual bool HasAlignments(const int& referenceID) const =0; - - // ctor & dtor - public: - BamStandardIndex(BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian); - ~BamStandardIndex(void); - - // interface (implements BamIndex virtual methods) - public: - // creates index data (in-memory) from current reader data - bool Build(void); - // returns supported file extension - const std::string Extension(void) const { return std::string(".bai"); } - // attempts to use index to jump to region; returns success/fail + // attempts to use index data to jump to @region, returns success/fail // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments + // * thus, the method sets a flag to indicate whether there are alignments // available after the jump position - bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); - // loads existing data from file into memory - bool Load(const std::string& filename); - // writes in-memory index data out to file - // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) - bool Write(const std::string& bamFilename); - - // internal implementation - private: - struct BamStandardIndexPrivate; - BamStandardIndexPrivate* d; -}; + virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; -// -------------------------------------------------- -// BamToolsIndex class -// -// implements BamTools-specific index file ops -class BamToolsIndex : public BamIndex { + // loads existing data from file into memory + virtual bool Load(const std::string& filename) =0; - // ctor & dtor - public: - BamToolsIndex(BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian); - ~BamToolsIndex(void); - - // interface (implements BamIndex virtual methods) - public: - // creates index data (in-memory) from current reader data - bool Build(void); - // returns supported file extension - const std::string Extension(void) const { return std::string(".bti"); } - // attempts to use index to jump to region; returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); - // loads existing data from file into memory - bool Load(const std::string& filename); - // writes in-memory index data out to file - // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) - bool Write(const std::string& bamFilename); - - // internal implementation - private: - struct BamToolsIndexPrivate; - BamToolsIndexPrivate* d; -}; + // change the index caching behavior + virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0; -// -------------------------------------------------- -// BamIndex factory methods -// -// return proper BamIndex-derived type based on available index files - -inline -BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian, - const BamIndex::PreferredIndexType& type) -{ - // --------------------------------------------------- - // attempt to load preferred type first - - const std::string bamtoolsIndexFilename = bamFilename + ".bti"; - const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename); - if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists ) - return new BamToolsIndex(bgzf, reader, isBigEndian); - - const std::string standardIndexFilename = bamFilename + ".bai"; - const bool standardIndexExists = BamTools::FileExists(standardIndexFilename); - if ( (type == BamIndex::STANDARD) && standardIndexExists ) - return new BamStandardIndex(bgzf, reader, isBigEndian); - - // ---------------------------------------------------- - // preferred type could not be found, try other (non-preferred) types - // if none found, return 0 - - if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian); - if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian); - return 0; -} - -inline -BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian) -{ - // see if specified file exists - const bool indexExists = BamTools::FileExists(indexFilename); - if ( !indexExists ) return 0; - - const std::string bamtoolsIndexExtension(".bti"); - const std::string standardIndexExtension(".bai"); - - // if has bamtoolsIndexExtension - if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) ) - return new BamToolsIndex(bgzf, reader, isBigEndian); - - // if has standardIndexExtension - if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) ) - return new BamStandardIndex(bgzf, reader, isBigEndian); - - // otherwise, unsupported file type - return 0; -} + // internal methods + protected: + void SetErrorString(const std::string& where, const std::string& what) const { + m_errorString = where + ": " + what; + } + + // data members + protected: + Internal::BamReaderPrivate* m_reader; // copy, not owned + mutable std::string m_errorString; +}; } // namespace BamTools