X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Finternal%2FBamToolsIndex_p.h;h=902be794295ad26bdb6fd35c8a52f5f1a69b974d;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=ee5abbc8ad730bd86bab43cfd01d8a12afb9e095;hpb=8c80d760637f8df39262683cd2570f0589423d36;p=bamtools.git diff --git a/src/api/internal/BamToolsIndex_p.h b/src/api/internal/BamToolsIndex_p.h index ee5abbc..902be79 100644 --- a/src/api/internal/BamToolsIndex_p.h +++ b/src/api/internal/BamToolsIndex_p.h @@ -1,9 +1,8 @@ // *************************************************************************** // BamToolsIndex.h (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 19 January 2011 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides index operations for the BamTools index format (".bti") // *************************************************************************** @@ -21,8 +20,8 @@ // // We mean it. -#include -#include +#include "api/BamAux.h" +#include "api/BamIndex.h" #include #include #include @@ -30,11 +29,8 @@ namespace BamTools { namespace Internal { -// BTI constants -const std::string BTI_EXTENSION = ".bti"; - -// individual index offset entry -struct BamToolsIndexEntry { +// contains data for each 'block' in a BTI index +struct BtiBlock { // data members int32_t MaxEndPosition; @@ -42,30 +38,48 @@ struct BamToolsIndexEntry { int32_t StartPosition; // ctor - BamToolsIndexEntry(const int32_t& maxEndPosition = 0, - const int64_t& startOffset = 0, - const int32_t& startPosition = 0) + BtiBlock(const int32_t& maxEndPosition = 0, + const int64_t& startOffset = 0, + const int32_t& startPosition = 0) : MaxEndPosition(maxEndPosition) , StartOffset(startOffset) , StartPosition(startPosition) { } }; -// reference index entry -struct BamToolsReferenceEntry { +// convenience typedef for describing a a list of BTI blocks on a reference +typedef std::vector BtiBlockVector; + +// contains all fields necessary for building, loading, & writing +// full BTI index data for a single reference +struct BtiReferenceEntry { + + // data members + int32_t ID; + BtiBlockVector Blocks; + + // ctor + BtiReferenceEntry(const int& id = -1) + : ID(id) + { } +}; + +// provides (persistent) summary of BtiReferenceEntry's index data +struct BtiReferenceSummary { // data members - bool HasAlignments; - std::vector Offsets; + int NumBlocks; + uint64_t FirstBlockFilePosition; // ctor - BamToolsReferenceEntry(void) - : HasAlignments(false) + BtiReferenceSummary(void) + : NumBlocks(0) + , FirstBlockFilePosition(0) { } }; -// the actual index data structure -typedef std::map BamToolsIndexData; +// convenience typedef for describing a full BTI index file summary +typedef std::vector BtiFileSummary; class BamToolsIndex : public BamIndex { @@ -73,7 +87,7 @@ class BamToolsIndex : public BamIndex { // (might be useful later to handle any 'legacy' versions if the format changes) // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on // - // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by: + // so a change introduced in BTI_1_2 may be handled from then on by: // // if ( indexVersion >= BTI_1_2 ) // do something new @@ -82,111 +96,89 @@ class BamToolsIndex : public BamIndex { enum Version { BTI_1_0 = 1 , BTI_1_1 , BTI_1_2 + , BTI_2_0 }; - // ctor & dtor public: - BamToolsIndex(void); + BamToolsIndex(Internal::BamReaderPrivate* reader); ~BamToolsIndex(void); - // interface (implements BamIndex virtual methods) + // BamIndex implementation public: - // creates index data (in-memory) from @reader data - bool Build(Internal::BamReaderPrivate* reader); - // returns supported file extension - const std::string Extension(void) { return BTI_EXTENSION; } + // builds index from associated BAM file & writes out to index file + bool Create(void); // returns whether reference has alignments or no bool HasAlignments(const int& referenceID) const; - // attempts to use index to jump to @region in @reader; returns success/fail + // attempts to use index data to jump to @region, returns success/fail // a "successful" jump indicates no error, but not whether this region has data // * thus, the method sets a flag to indicate whether there are alignments // available after the jump position - bool Jump(Internal::BamReaderPrivate* reader, - const BamTools::BamRegion& region, - bool *hasAlignmentsInRegion); - + bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); + // loads existing data from file into memory + bool Load(const std::string& filename); + // change the index caching behavior + void SetCacheMode(const BamIndex::IndexCacheMode& mode); public: - // clear all current index offset data in memory - void ClearAllData(void); - // return file position after header metadata - off_t DataBeginOffset(void) const; - // return true if all index data is cached - bool HasFullDataCache(void) const; - // clears index data from all references except the first - void KeepOnlyFirstReferenceOffsets(void); - // load index data for all references, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadAllReferences(bool saveData = true); - // load first reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadFirstReference(bool saveData = true); - // load header data from index file, return true if loaded OK - bool LoadHeader(void); - // position file pointer to first reference begin, return true if skipped OK - bool SkipToFirstReference(void); - // write index reference data - bool WriteAllReferences(void); - // write index header data - bool WriteHeader(void); + // returns format's file extension + static const std::string Extension(void); // internal methods - public: + private: - // ----------------------- - // index file operations - - // check index file magic number, return true if OK - bool CheckMagicNumber(void); - // check index file version, return true if OK - bool CheckVersion(void); - // load a single index entry from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadIndexEntry(const int& refId, bool saveData = true); - // load a single reference from file, return true if loaded OK - // @saveData - save data in memory if true, just read & discard if false - bool LoadReference(const int& refId, bool saveData = true); - // loads number of references, return true if loaded OK - bool LoadReferenceCount(int& numReferences); - // position file pointer to desired reference begin, return true if skipped OK - bool SkipToReference(const int& refId); - // write current reference index data to new index file - bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry); - // write current index offset entry to new index file - bool WriteIndexEntry(const BamToolsIndexEntry& entry); - - // ----------------------- - // index data operations - - // clear all index offset data for desired reference - void ClearReferenceOffsets(const int& refId); - // calculate BAM file offset for desired region - // return true if no error (*NOT* equivalent to "has alignments or valid offset") - // check @hasAlignmentsInRegion to determine this status - // @region - target region - // @offset - resulting seek target - // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status - bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion); - // returns true if index cache has data for desired reference - bool IsDataLoaded(const int& refId) const; - // clears index data from all references except the one specified - void KeepOnlyReferenceOffsets(const int& refId); - // saves an index offset entry in memory - void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry); - // pre-allocates size for offset vector - void SetOffsetCount(const int& refId, const int& offsetCount); - // initializes index data structure to hold @count references - void SetReferenceCount(const int& count); + // index file ops + void CheckMagicNumber(void); + void CheckVersion(void); + void CloseFile(void); + bool IsFileOpen(void) const; + void OpenFile(const std::string& filename, const char* mode); + void Seek(const int64_t& position, const int& origin); + int64_t Tell(void) const; + + // index-creation methods + void ClearReferenceEntry(BtiReferenceEntry& refEntry); + void WriteBlock(const BtiBlock& block); + void WriteBlocks(const BtiBlockVector& blocks); + void WriteHeader(void); + void WriteReferenceEntry(const BtiReferenceEntry& refEntry); + + // random-access methods + void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion); + void ReadBlock(BtiBlock& block); + void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks); + void ReadReferenceEntry(BtiReferenceEntry& refEntry); + + // BTI summary data methods + void InitializeFileSummary(const int& numReferences); + void LoadFileSummary(void); + void LoadHeader(void); + void LoadNumBlocks(int& numBlocks); + void LoadNumReferences(int& numReferences); + void LoadReferenceSummary(BtiReferenceSummary& refSummary); + void SkipBlocks(const int& numBlocks); // data members private: - int32_t m_blockSize; - BamToolsIndexData m_indexData; - off_t m_dataBeginOffset; - bool m_hasFullDataCache; - bool m_isBigEndian; - int32_t m_inputVersion; // Version is serialized as int - Version m_outputVersion; + bool m_isBigEndian; + BamIndex::IndexCacheMode m_cacheMode; + BtiFileSummary m_indexFileSummary; + uint32_t m_blockSize; + int32_t m_inputVersion; // Version is serialized as int + Version m_outputVersion; + + struct RaiiWrapper { + FILE* IndexStream; + RaiiWrapper(void); + ~RaiiWrapper(void); + }; + RaiiWrapper Resources; + + // static constants + private: + static const uint32_t DEFAULT_BLOCK_LENGTH; + static const std::string BTI_EXTENSION; + static const char* const BTI_MAGIC; + static const int SIZEOF_BLOCK; }; } // namespace Internal