--- /dev/null
+// ***************************************************************************
+// BamStandardIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 10 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#ifndef BAM_STANDARD_INDEX_FORMAT_H
+#define BAM_STANDARD_INDEX_FORMAT_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include "api/BamAux.h"
+#include "api/BamIndex.h"
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// -----------------------------------------------------------------------------
+// BamStandardIndex data structures
+
+// defines start and end of a contiguous run of alignments
+struct BaiAlignmentChunk {
+
+ // data members
+ uint64_t Start;
+ uint64_t Stop;
+
+ // constructor
+ BaiAlignmentChunk(const uint64_t& start = 0,
+ const uint64_t& stop = 0)
+ : Start(start)
+ , Stop(stop)
+ { }
+};
+
+// comparison operator (for sorting)
+inline
+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+ return lhs.Start < rhs.Start;
+}
+
+// convenience typedef for a list of all alignment 'chunks' in a BAI bin
+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+
+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+
+// convenience typedef for a list of all 'linear offsets' in a reference
+typedef std::vector<uint64_t> BaiLinearOffsetVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BAI index data for a single reference
+struct BaiReferenceEntry {
+
+ // data members
+ int32_t ID;
+ BaiBinMap Bins;
+ BaiLinearOffsetVector LinearOffsets;
+
+ // ctor
+ BaiReferenceEntry(const int32_t& id = -1)
+ : ID(id)
+ { }
+};
+
+// provides (persistent) summary of BaiReferenceEntry's index data
+struct BaiReferenceSummary {
+
+ // data members
+ int NumBins;
+ int NumLinearOffsets;
+ uint64_t FirstBinFilePosition;
+ uint64_t FirstLinearOffsetFilePosition;
+
+ // ctor
+ BaiReferenceSummary(void)
+ : NumBins(0)
+ , NumLinearOffsets(0)
+ , FirstBinFilePosition(0)
+ , FirstLinearOffsetFilePosition(0)
+ { }
+};
+
+// convenience typedef for describing a full BAI index file summary
+typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+
+// end BamStandardIndex data structures
+// -----------------------------------------------------------------------------
+
+class BamStandardIndex : public BamIndex {
+
+ // ctor & dtor
+ public:
+ BamStandardIndex(Internal::BamReaderPrivate* reader);
+ ~BamStandardIndex(void);
+
+ // BamIndex implementation
+ public:
+ // builds index from associated BAM file & writes out to index file
+ bool Create(void);
+ // returns whether reference has alignments or no
+ bool HasAlignments(const int& referenceID) const;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+ // loads existing data from file into memory
+ bool Load(const std::string& filename);
+ public:
+ // returns format's file extension
+ static const std::string Extension(void);
+
+ // internal methods
+ private:
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CloseFile(void);
+ bool IsFileOpen(void) const;
+ void OpenFile(const std::string& filename, const char* mode);
+ void Seek(const int64_t& position, const int& origin);
+ int64_t Tell(void) const;
+
+ // BAI index building methods
+ void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+ void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+ const uint32_t& currentBin,
+ const uint64_t& currentOffset,
+ const uint64_t& lastOffset);
+ void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+ const int& alignmentStartPosition,
+ const int& alignmentStopPosition,
+ const uint64_t& lastOffset);
+
+ // random-access methods
+ void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+ void CalculateCandidateBins(const uint32_t& begin,
+ const uint32_t& end,
+ std::set<uint16_t>& candidateBins);
+ void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+ const uint64_t& minOffset,
+ std::set<uint16_t>& candidateBins,
+ std::vector<int64_t>& offsets);
+ uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+
+ // BAI summary (create/load) methods
+ void ReserveForSummary(const int& numReferences);
+ void SaveBinsSummary(const int& refId, const int& numBins);
+ void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+ void SkipBins(const int& numBins);
+ void SkipLinearOffsets(const int& numLinearOffsets);
+ void SummarizeBins(BaiReferenceSummary& refSummary);
+ void SummarizeIndexFile(void);
+ void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+ void SummarizeReference(BaiReferenceSummary& refSummary);
+
+ // BAI full index input methods
+ void ReadBinID(uint32_t& binId);
+ void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+ void ReadIntoBuffer(const unsigned int& bytesRequested);
+ void ReadLinearOffset(uint64_t& linearOffset);
+ void ReadNumAlignmentChunks(int& numAlignmentChunks);
+ void ReadNumBins(int& numBins);
+ void ReadNumLinearOffsets(int& numLinearOffsets);
+ void ReadNumReferences(int& numReferences);
+
+ // BAI full index output methods
+ void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+ void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+ void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+ void WriteBins(const int& refId, BaiBinMap& bins);
+ void WriteHeader(void);
+ void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+ void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+
+ // data members
+ private:
+ bool m_isBigEndian;
+ BaiFileSummary m_indexFileSummary;
+
+ // our input buffer
+ unsigned int m_bufferLength;
+
+ struct RaiiWrapper {
+ FILE* IndexStream;
+ char* Buffer;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper Resources;
+
+ // static methods
+ private:
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(unsigned char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // static constants
+ private:
+ static const int MAX_BIN;
+ static const int BAM_LIDX_SHIFT;
+ static const std::string BAI_EXTENSION;
+ static const char* const BAI_MAGIC;
+ static const int SIZEOF_ALIGNMENTCHUNK;
+ static const int SIZEOF_BINCORE;
+ static const int SIZEOF_LINEAROFFSET;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAM_STANDARD_INDEX_FORMAT_H