X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamIndex.h;h=5e40d87eb517d4fbc8797e8f5d874eb520602f56;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=b54aace432cf839e326a894d091866ae0175e060;hpb=6f3fff23561d95067651931a8768c6466cabe99f;p=bamtools.git diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h index b54aace..5e40d87 100644 --- a/src/api/BamIndex.h +++ b/src/api/BamIndex.h @@ -1,9 +1,8 @@ // *************************************************************************** // BamIndex.h (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 19 November 2010 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides basic BAM index interface // *************************************************************************** @@ -11,132 +10,84 @@ #ifndef BAM_INDEX_H #define BAM_INDEX_H -#include -#include -#include +#include "api/api_global.h" +#include "api/BamAux.h" #include -#include namespace BamTools { -class BamReader; -class BgzfData; - namespace Internal { - class BamStandardIndex; - class BamToolsIndex; + class BamReaderPrivate; } // namespace Internal -// -------------------------------------------------- -// BamIndex base class +/*! \class BamTools::BamIndex + \brief Provides methods for generating & loading BAM index files. + + This class straddles the line between public API and internal + implementation detail. Most client code should never have to use this + class directly. + + It is exposed to the public API to allow advanced users to implement + their own custom indexing schemes. +*/ + class API_EXPORT BamIndex { - // specify index-caching behavior - // - // @FullIndexCaching - store entire index file contents in memory - // @LimitedIndexCaching - store only index data for current reference - // being processed - // @NoIndexCaching - do not store any index data. Load as needed to - // calculate jump offset - public: enum BamIndexCacheMode { FullIndexCaching = 0 - , LimitedIndexCaching - , NoIndexCaching - }; + // enums + public: + // specify index-caching behavior + enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory + , LimitedIndexCaching // store only index data for current reference + , NoIndexCaching // do not store any index data between jumps + }; + + // list of supported BamIndex types + enum IndexType { BAMTOOLS = 0 + , STANDARD + }; // ctor & dtor public: - BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader); - virtual ~BamIndex(void); + BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } + virtual ~BamIndex(void) { } // index interface public: - // creates index data (in-memory) from current reader data - virtual bool Build(void) =0; - // returns supported file extension - virtual const std::string Extension(void) const =0; + // builds index from associated BAM file & writes out to index file + virtual bool Create(void) =0; + + // returns a human-readable description of the last error encountered + std::string GetErrorString(void) { return m_errorString; } + // returns whether reference has alignments or no virtual bool HasAlignments(const int& referenceID) const =0; - // attempts to use index to jump to region; returns success/fail + + // attempts to use index data to jump to @region, returns success/fail // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments + // * thus, the method sets a flag to indicate whether there are alignments // available after the jump position virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; + // loads existing data from file into memory - virtual bool Load(const std::string& filename); - // change the index caching behavior - virtual void SetCacheMode(const BamIndexCacheMode mode); - // writes in-memory index data out to file - // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) - virtual bool Write(const std::string& bamFilename); - - // derived-classes MUST provide implementation - protected: - // clear all current index offset data in memory - virtual void ClearAllData(void) =0; - // return file position after header metadata - virtual const off_t DataBeginOffset(void) const =0; - // return true if all index data is cached - virtual bool HasFullDataCache(void) const =0; - // clears index data from all references except the first - virtual void KeepOnlyFirstReferenceOffsets(void) =0; - // load index data for all references, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - virtual bool LoadAllReferences(bool saveData = true) =0; - // load first reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - virtual bool LoadFirstReference(bool saveData = true) =0; - // load header data from index file, return true if loaded OK - virtual bool LoadHeader(void) =0; - // position file pointer to first reference begin, return true if skipped OK - virtual bool SkipToFirstReference(void) =0; - // write index reference data - virtual bool WriteAllReferences(void) =0; - // write index header data - virtual bool WriteHeader(void) =0; + virtual bool Load(const std::string& filename) =0; + + // change the index caching behavior + virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0; + + //! \cond // internal methods protected: - // rewind index file to beginning of index data, return true if rewound OK - bool Rewind(void); - - private: - // return true if FILE* is open - bool IsOpen(void) const; - // opens index file according to requested mode, return true if opened OK - bool OpenIndexFile(const std::string& filename, const std::string& mode); - // updates in-memory cache of index data, depending on current cache mode - void UpdateCache(void); - - // factory methods for returning proper BamIndex-derived type based on available index files - public: - - // returns index based on BAM filename 'stub' - // checks first for preferred type, returns that type if found - // (if not found, attmempts to load other type(s), returns 0 if NONE found) - // - // ** default preferred type is BamToolsIndex ** use this anytime it exists - enum PreferredIndexType { BAMTOOLS = 0, STANDARD }; - static BamIndex* FromBamFilename(const std::string& bamFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS); - - // returns index based on explicitly named index file (or 0 if not found) - static BamIndex* FromIndexFilename(const std::string& indexFilename, - BamTools::BgzfData* bgzf, - BamTools::BamReader* reader); + void SetErrorString(const std::string& where, const std::string& what) const { + m_errorString = where + ": " + what; + } // data members protected: - BamTools::BgzfData* m_BGZF; - BamTools::BamReader* m_reader; - BamTools::RefVector m_references; - BamIndex::BamIndexCacheMode m_cacheMode; - FILE* m_indexStream; - + Internal::BamReaderPrivate* m_reader; // copy, not owned + mutable std::string m_errorString; - friend class Internal::BamStandardIndex; - friend class Internal::BamToolsIndex; + //! \endcond }; } // namespace BamTools