// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 17 August 2010 (DB)
+// Last modified: 24 February 2011 (DB)
// ---------------------------------------------------------------------------
-// Provides index functionality - both for the default (standardized) BAM
-// index format (.bai) as well as a BamTools-specific (nonstandard) index
-// format (.bti).
+// Provides basic BAM index interface
// ***************************************************************************
#ifndef BAM_INDEX_H
#define BAM_INDEX_H
+#include <api/api_global.h>
+#include <api/BamAux.h>
+#include <iostream>
#include <string>
#include <vector>
-#include "BamAux.h"
namespace BamTools {
class BamReader;
-class BgzfData;
-
+
+namespace Internal {
+ class BamReaderPrivate;
+} // namespace Internal
+
// --------------------------------------------------
// BamIndex base class
-class BamIndex {
+class API_EXPORT BamIndex {
+ // enums
public:
- BamIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
- virtual ~BamIndex(void) { }
-
- public:
- // creates index data (in-memory) from current reader data
- virtual bool Build(void) =0;
- // calculates offset(s) for a given region
- virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
- // loads existing data from file into memory
- virtual bool Load(const std::string& filename) =0;
- // returns whether reference has alignments or no
- virtual bool HasAlignments(const int& referenceID);
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- virtual bool Write(const std::string& bamFilename) =0;
-
- protected:
- BamTools::BgzfData* m_BGZF;
- BamTools::BamReader* m_reader;
- BamTools::RefVector m_references;
- bool m_isBigEndian;
-};
-
-// --------------------------------------------------
-// BamDefaultIndex class
-//
-// implements default (per SAM/BAM spec) index file ops
-class BamDefaultIndex : public BamIndex {
+ // specify index-caching behavior
+ enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory
+ , LimitedIndexCaching // store only index data for current reference
+ , NoIndexCaching // do not store any index data between jumps
+ };
+ // list of supported BamIndex types
+ enum IndexType { BAMTOOLS = 0
+ , STANDARD
+ };
// ctor & dtor
public:
- BamDefaultIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
- ~BamDefaultIndex(void);
+ BamIndex(void);
+ virtual ~BamIndex(void);
- // interface (implements BamIndex virtual methods)
+ // index interface
public:
- // creates index data (in-memory) from current reader data
- bool Build(void);
- // calculates offset(s) for a given region
- bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
+ // creates index data (in-memory) from @reader data
+ virtual bool Build(Internal::BamReaderPrivate* reader) =0;
+ // returns supported file extension
+ virtual const std::string Extension(void) =0;
+ // returns whether reference has alignments or no
+ virtual bool HasAlignments(const int& referenceID) const =0;
+ // attempts to use index data to jump to @region in @reader; returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ virtual bool Jump(Internal::BamReaderPrivate* reader,
+ const BamTools::BamRegion& region,
+ bool* hasAlignmentsInRegion) =0;
+ // loads existing data from file into memory
+ virtual bool Load(const std::string& filename);
+ // change the index caching behavior
+ virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode);
// writes in-memory index data out to file
// N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- bool Write(const std::string& bamFilename);
-
- // internal implementation
- private:
- struct BamDefaultIndexPrivate;
- BamDefaultIndexPrivate* d;
-};
+ virtual bool Write(const std::string& bamFilename);
-// --------------------------------------------------
-// BamToolsIndex class
-//
-// implements BamTools-specific index file ops
-class BamToolsIndex : public BamIndex {
+ // derived-classes MUST provide implementation
+ protected:
+ // clear all current index offset data in memory
+ virtual void ClearAllData(void) =0;
+ // return file position after header metadata
+ virtual off_t DataBeginOffset(void) const =0;
+ // return true if all index data is cached
+ virtual bool HasFullDataCache(void) const =0;
+ // clears index data from all references except the first
+ virtual void KeepOnlyFirstReferenceOffsets(void) =0;
+ // load index data for all references, return true if loaded OK
+ // @saveData - save data in memory if true, just read & discard if false
+ virtual bool LoadAllReferences(bool saveData = true) =0;
+ // load first reference from file, return true if loaded OK
+ // @saveData - save data in memory if true, just read & discard if false
+ virtual bool LoadFirstReference(bool saveData = true) =0;
+ // load header data from index file, return true if loaded OK
+ virtual bool LoadHeader(void) =0;
+ // position file pointer to first reference begin, return true if skipped OK
+ virtual bool SkipToFirstReference(void) =0;
+ // write index reference data
+ virtual bool WriteAllReferences(void) =0;
+ // write index header data
+ virtual bool WriteHeader(void) =0;
+
+ // internal methods (but available to derived classes)
+ protected:
+ // rewind index file to beginning of index data, return true if rewound OK
+ bool Rewind(void);
- // ctor & dtor
- public:
- BamToolsIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
- ~BamToolsIndex(void);
-
- // interface (implements BamIndex virtual methods)
- public:
- // creates index data (in-memory) from current reader data
- bool Build(void);
- // calculates offset(s) for a given region
- bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
- // loads existing data from file into memory
- bool Load(const std::string& filename);
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- bool Write(const std::string& bamFilename);
-
- // internal implementation
private:
- struct BamToolsIndexPrivate;
- BamToolsIndexPrivate* d;
+ // return true if FILE* is open
+ bool IsOpen(void) const;
+ // opens index file according to requested mode, return true if opened OK
+ bool OpenIndexFile(const std::string& filename, const std::string& mode);
+ // updates in-memory cache of index data, depending on current cache mode
+ void UpdateCache(void);
+
+ // data members
+ protected:
+ FILE* m_indexStream;
+ std::string m_indexFilename;
+ BamIndex::IndexCacheMode m_cacheMode;
};
} // namespace BamTools
-#endif // BAM_INDEX_H
\ No newline at end of file
+#endif // BAM_INDEX_H