X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamIndex.h;h=067244ec2d2fd845458d7868dbee553771388be7;hb=2e1822c9ed231b25fd474117a01a1492d4209fa4;hp=89e9d2b06cd8361e8f7bac4d7c09e783a36909e7;hpb=3a975af617ffe4f1f8077e7e45fa484296f5e704;p=bamtools.git diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h index 89e9d2b..067244e 100644 --- a/src/api/BamIndex.h +++ b/src/api/BamIndex.h @@ -1,306 +1,92 @@ // *************************************************************************** // BamIndex.h (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 20 October 2010 (DB) +// Last modified: 6 October 2011 (DB) // --------------------------------------------------------------------------- -// Provides index functionality - both for the standardized BAM index format -// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti"). +// Provides basic BAM index interface // *************************************************************************** #ifndef BAM_INDEX_H #define BAM_INDEX_H -#include +#include +#include #include -#include -#include "BamAux.h" namespace BamTools { -class BamReader; -class BgzfData; - -// -------------------------------------------------- -// BamIndex base class -class BamIndex { +namespace Internal { + class BamReaderPrivate; +} // namespace Internal - // specify index-caching behavior - // - // @FullIndexCaching - store entire index file contents in memory - // @LimitedIndexCaching - store only index data for current reference - // being processed - // @NoIndexCaching - do not store any index data. Load as needed to - // calculate jump offset - public: enum BamIndexCacheMode { FullIndexCaching = 0 - , LimitedIndexCaching - , NoIndexCaching - }; - - // ctor & dtor - public: - BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader); - virtual ~BamIndex(void); - - // index interface - public: - // creates index data (in-memory) from current reader data - virtual bool Build(void) =0; - // returns supported file extension - virtual const std::string Extension(void) const =0; - // returns whether reference has alignments or no - virtual bool HasAlignments(const int& referenceID) const =0; - // attempts to use index to jump to region; returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; - // loads existing data from file into memory - virtual bool Load(const std::string& filename); - // change the index caching behavior - virtual void SetCacheMode(const BamIndexCacheMode mode); - // writes in-memory index data out to file - // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) - virtual bool Write(const std::string& bamFilename); - - // derived-classes MUST provide implementation - protected: - // clear all current index offset data in memory - virtual void ClearAllData(void) =0; - // return file position after header metadata - virtual const off_t DataBeginOffset(void) const =0; - // return true if all index data is cached - virtual bool HasFullDataCache(void) const =0; - // clears index data from all references except the first - virtual void KeepOnlyFirstReferenceOffsets(void) =0; - // load index data for all references, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - virtual bool LoadAllReferences(bool saveData = true) =0; - // load first reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - virtual bool LoadFirstReference(bool saveData = true) =0; - // load header data from index file, return true if loaded OK - virtual bool LoadHeader(void) =0; - // position file pointer to first reference begin, return true if skipped OK - virtual bool SkipToFirstReference(void) =0; - // write index reference data - virtual bool WriteAllReferences(void) =0; - // write index header data - virtual bool WriteHeader(void) =0; - - // internal methods - protected: - // rewind index file to beginning of index data, return true if rewound OK - bool Rewind(void); +/*! \class BamTools::BamIndex + \brief Provides methods for generating & loading BAM index files. - private: - // return true if FILE* is open - bool IsOpen(void) const; - // opens index file according to requested mode, return true if opened OK - bool OpenIndexFile(const std::string& filename, const std::string& mode); - // updates in-memory cache of index data, depending on current cache mode - void UpdateCache(void); + This class straddles the line between public API and internal + implementation detail. Most client code should never have to use this + class directly. - // factory methods for returning proper BamIndex-derived type based on available index files - public: - - // returns index based on BAM filename 'stub' - // checks first for preferred type, returns that type if found - // (if not found, attmempts to load other type(s), returns 0 if NONE found) - // - // ** default preferred type is BamToolsIndex ** use this anytime it exists - enum PreferredIndexType { BAMTOOLS = 0, STANDARD }; - static BamIndex* FromBamFilename(const std::string& bamFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS); - - // returns index based on explicitly named index file (or 0 if not found) - static BamIndex* FromIndexFilename(const std::string& indexFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader); + It is exposed to the public API to allow advanced users to implement + their own custom indexing schemes. - // data members - protected: - BamTools::BgzfData* m_BGZF; - BamTools::BamReader* m_reader; - BamTools::RefVector m_references; - BamIndex::BamIndexCacheMode m_cacheMode; - FILE* m_indexStream; -}; + More documentation on methods & enums coming soon. +*/ -// -------------------------------------------------- -// BamStandardIndex class -// -// implements standardized (per SAM/BAM spec) index file ops -class BamStandardIndex : public BamIndex { +class API_EXPORT BamIndex { + // enums + public: + // specify index-caching behavior + enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory + , LimitedIndexCaching // store only index data for current reference + , NoIndexCaching // do not store any index data between jumps + }; + + // list of supported BamIndex types + enum IndexType { BAMTOOLS = 0 + , STANDARD + }; // ctor & dtor public: - BamStandardIndex(BamTools::BgzfData* bgzf, - BamTools::BamReader* reader); - ~BamStandardIndex(void); + BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } + virtual ~BamIndex(void) { } - // interface (implements BamIndex virtual methods) + // index interface public: - // creates index data (in-memory) from current reader data - bool Build(void); - // returns supported file extension - const std::string Extension(void) const { return std::string(".bai"); } - // returns whether reference has alignments or no - bool HasAlignments(const int& referenceID) const; - // attempts to use index to jump to region; returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); - protected: - // clear all current index offset data in memory - void ClearAllData(void); - // return file position after header metadata - const off_t DataBeginOffset(void) const; - // return true if all index data is cached - bool HasFullDataCache(void) const; - // clears index data from all references except the first - void KeepOnlyFirstReferenceOffsets(void); - // load index data for all references, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadAllReferences(bool saveData = true); - // load first reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadFirstReference(bool saveData = true); - // load header data from index file, return true if loaded OK - bool LoadHeader(void); - // position file pointer to first reference begin, return true if skipped OK - bool SkipToFirstReference(void); - // write index reference data - bool WriteAllReferences(void); - // write index header data - bool WriteHeader(void); + // builds index from associated BAM file & writes out to index file + virtual bool Create(void) =0; - // internal implementation - private: - struct BamStandardIndexPrivate; - BamStandardIndexPrivate* d; -}; - -// -------------------------------------------------- -// BamToolsIndex class -// -// implements BamTools-specific index file ops -class BamToolsIndex : public BamIndex { + // returns a human-readable description of the last error encountered + std::string GetErrorString(void) { return m_errorString; } - // ctor & dtor - public: - BamToolsIndex(BamTools::BgzfData* bgzf, - BamTools::BamReader* reader); - ~BamToolsIndex(void); - - // interface (implements BamIndex virtual methods) - public: - // creates index data (in-memory) from current reader data - bool Build(void); - // returns supported file extension - const std::string Extension(void) const { return std::string(".bti"); } // returns whether reference has alignments or no - bool HasAlignments(const int& referenceID) const; - // attempts to use index to jump to region; returns success/fail + virtual bool HasAlignments(const int& referenceID) const =0; + + // attempts to use index data to jump to @region, returns success/fail // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments + // * thus, the method sets a flag to indicate whether there are alignments // available after the jump position - bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); - protected: - // clear all current index offset data in memory - void ClearAllData(void); - // return file position after header metadata - const off_t DataBeginOffset(void) const; - // return true if all index data is cached - bool HasFullDataCache(void) const; - // clears index data from all references except the first - void KeepOnlyFirstReferenceOffsets(void); - // load index data for all references, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadAllReferences(bool saveData = true); - // load first reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadFirstReference(bool saveData = true); - // load header data from index file, return true if loaded OK - bool LoadHeader(void); - // position file pointer to first reference begin, return true if skipped OK - bool SkipToFirstReference(void); - // write index reference data - bool WriteAllReferences(void); - // write index header data - bool WriteHeader(void); - - // internal implementation - private: - struct BamToolsIndexPrivate; - BamToolsIndexPrivate* d; -}; - -// -------------------------------------------------- -// BamIndex factory methods + virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; -// returns index based on BAM filename 'stub' -// checks first for preferred type, returns that type if found -// (if not found, attmempts to load other type(s), returns 0 if NONE found) -// -// ** default preferred type is BamToolsIndex ** use this anytime it exists -inline -BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - const BamIndex::PreferredIndexType& type) -{ - // --------------------------------------------------- - // attempt to load preferred type first - - const std::string bamtoolsIndexFilename = bamFilename + ".bti"; - const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename); - if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists ) - return new BamToolsIndex(bgzf, reader); + // loads existing data from file into memory + virtual bool Load(const std::string& filename) =0; - const std::string standardIndexFilename = bamFilename + ".bai"; - const bool standardIndexExists = BamTools::FileExists(standardIndexFilename); - if ( (type == BamIndex::STANDARD) && standardIndexExists ) - return new BamStandardIndex(bgzf, reader); - - // ---------------------------------------------------- - // preferred type could not be found, try other (non-preferred) types - // if none found, return 0 - - if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader); - if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader); - return 0; -} + // change the index caching behavior + virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0; -// returns index based on explicitly named index file (or 0 if not found) -inline -BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader) -{ - // see if specified file exists - const bool indexExists = BamTools::FileExists(indexFilename); - if ( !indexExists ) return 0; - - const std::string bamtoolsIndexExtension(".bti"); - const std::string standardIndexExtension(".bai"); - - // if has bamtoolsIndexExtension - if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) ) - return new BamToolsIndex(bgzf, reader); - - // if has standardIndexExtension - if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) ) - return new BamStandardIndex(bgzf, reader); + // internal methods + protected: + void SetErrorString(const std::string& where, const std::string& what) const { + m_errorString = where + ": " + what; + } - // otherwise, unsupported file type - return 0; -} + // data members + protected: + Internal::BamReaderPrivate* m_reader; // copy, not owned + mutable std::string m_errorString; +}; } // namespace BamTools