// ***************************************************************************
// BamIndex.h (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 19 November 2010 (DB)
+// Last modified: 10 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides basic BAM index interface
// ***************************************************************************
#ifndef BAM_INDEX_H
#define BAM_INDEX_H
-#include <api/api_global.h>
-#include <api/BamAux.h>
-#include <iostream>
+#include "api/api_global.h"
+#include "api/BamAux.h"
#include <string>
-#include <vector>
namespace BamTools {
-class BamReader;
-class BgzfData;
-
namespace Internal {
- class BamStandardIndex;
- class BamToolsIndex;
+ class BamReaderPrivate;
} // namespace Internal
-// --------------------------------------------------
-// BamIndex base class
+/*! \class BamTools::BamIndex
+ \brief Provides methods for generating & loading BAM index files.
+
+ This class straddles the line between public API and internal
+ implementation detail. Most client code should never have to use this
+ class directly.
+
+ It is exposed to the public API to allow advanced users to implement
+ their own custom indexing schemes.
+*/
+
class API_EXPORT BamIndex {
- // specify index-caching behavior
- //
- // @FullIndexCaching - store entire index file contents in memory
- // @LimitedIndexCaching - store only index data for current reference
- // being processed
- // @NoIndexCaching - do not store any index data. Load as needed to
- // calculate jump offset
- public: enum BamIndexCacheMode { FullIndexCaching = 0
- , LimitedIndexCaching
- , NoIndexCaching
- };
+ // enums
+ public:
+
+ // list of supported BamIndex types
+ enum IndexType { BAMTOOLS = 0
+ , STANDARD
+ };
// ctor & dtor
public:
- BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
- virtual ~BamIndex(void);
+ BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { }
+ virtual ~BamIndex(void) { }
// index interface
public:
- // creates index data (in-memory) from current reader data
- virtual bool Build(void) =0;
- // returns supported file extension
- virtual const std::string Extension(void) const =0;
+ // builds index from associated BAM file & writes out to index file
+ virtual bool Create(void) =0;
+
+ // returns a human-readable description of the last error encountered
+ std::string GetErrorString(void) { return m_errorString; }
+
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID) const =0;
- // attempts to use index to jump to region; returns success/fail
+
+ // attempts to use index data to jump to @region, returns success/fail
// a "successful" jump indicates no error, but not whether this region has data
- // * thus, the method sets a flag to indicate whether there are alignments
+ // * thus, the method sets a flag to indicate whether there are alignments
// available after the jump position
virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+
// loads existing data from file into memory
- virtual bool Load(const std::string& filename);
- // change the index caching behavior
- virtual void SetCacheMode(const BamIndexCacheMode mode);
- // writes in-memory index data out to file
- // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
- virtual bool Write(const std::string& bamFilename);
-
- // derived-classes MUST provide implementation
- protected:
- // clear all current index offset data in memory
- virtual void ClearAllData(void) =0;
- // return file position after header metadata
- virtual const off_t DataBeginOffset(void) const =0;
- // return true if all index data is cached
- virtual bool HasFullDataCache(void) const =0;
- // clears index data from all references except the first
- virtual void KeepOnlyFirstReferenceOffsets(void) =0;
- // load index data for all references, return true if loaded OK
- // @saveData - save data in memory if true, just read & discard if false
- virtual bool LoadAllReferences(bool saveData = true) =0;
- // load first reference from file, return true if loaded OK
- // @saveData - save data in memory if true, just read & discard if false
- virtual bool LoadFirstReference(bool saveData = true) =0;
- // load header data from index file, return true if loaded OK
- virtual bool LoadHeader(void) =0;
- // position file pointer to first reference begin, return true if skipped OK
- virtual bool SkipToFirstReference(void) =0;
- // write index reference data
- virtual bool WriteAllReferences(void) =0;
- // write index header data
- virtual bool WriteHeader(void) =0;
+ virtual bool Load(const std::string& filename) =0;
+
+ // returns the 'type' enum for derived index format
+ virtual BamIndex::IndexType Type(void) const =0;
+
+ //! \cond
// internal methods
protected:
- // rewind index file to beginning of index data, return true if rewound OK
- bool Rewind(void);
-
- private:
- // return true if FILE* is open
- bool IsOpen(void) const;
- // opens index file according to requested mode, return true if opened OK
- bool OpenIndexFile(const std::string& filename, const std::string& mode);
- // updates in-memory cache of index data, depending on current cache mode
- void UpdateCache(void);
-
- // factory methods for returning proper BamIndex-derived type based on available index files
- public:
-
- // returns index based on BAM filename 'stub'
- // checks first for preferred type, returns that type if found
- // (if not found, attmempts to load other type(s), returns 0 if NONE found)
- //
- // ** default preferred type is BamToolsIndex ** use this anytime it exists
- enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
- static BamIndex* FromBamFilename(const std::string& bamFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
-
- // returns index based on explicitly named index file (or 0 if not found)
- static BamIndex* FromIndexFilename(const std::string& indexFilename,
- BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader);
+ void SetErrorString(const std::string& where, const std::string& what) const {
+ m_errorString = where + ": " + what;
+ }
// data members
protected:
- BamTools::BgzfData* m_BGZF;
- BamTools::BamReader* m_reader;
- BamTools::RefVector m_references;
- BamIndex::BamIndexCacheMode m_cacheMode;
- FILE* m_indexStream;
-
+ Internal::BamReaderPrivate* m_reader; // copy, not owned
+ mutable std::string m_errorString;
- friend class Internal::BamStandardIndex;
- friend class Internal::BamToolsIndex;
+ //! \endcond
};
} // namespace BamTools