]> git.donarmstrong.com Git - bamtools.git/blob - src/api/internal/BamStandardIndex_p.h
Cleaned up intra-API includes & moved version numbers to 2.0.0
[bamtools.git] / src / api / internal / BamStandardIndex_p.h
1 // ***************************************************************************
2 // BamStandardIndex.h (c) 2010 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides index operations for the standardized BAM index format (".bai")
8 // ***************************************************************************
9
10 #ifndef BAM_STANDARD_INDEX_FORMAT_H
11 #define BAM_STANDARD_INDEX_FORMAT_H
12
13 //  -------------
14 //  W A R N I N G
15 //  -------------
16 //
17 // This file is not part of the BamTools API.  It exists purely as an
18 // implementation detail.  This header file may change from version to
19 // version without notice, or even be removed.
20 //
21 // We mean it.
22
23 #include "api/BamAux.h"
24 #include "api/BamIndex.h"
25 #include <map>
26 #include <set>
27 #include <string>
28 #include <vector>
29
30 namespace BamTools {
31 namespace Internal {
32
33 // -----------------------------------------------------------------------------
34 // BamStandardIndex data structures
35
36 // defines start and end of a contiguous run of alignments
37 struct BaiAlignmentChunk {
38
39     // data members
40     uint64_t Start;
41     uint64_t Stop;
42
43     // constructor
44     BaiAlignmentChunk(const uint64_t& start = 0,
45                       const uint64_t& stop = 0)
46         : Start(start)
47         , Stop(stop)
48     { }
49 };
50
51 // comparison operator (for sorting)
52 inline
53 bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
54     return lhs.Start < rhs.Start;
55 }
56
57 // convenience typedef for a list of all alignment 'chunks' in a BAI bin
58 typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
59
60 // convenience typedef for a map of all BAI bins in a reference (ID => chunks)
61 typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
62
63 // convenience typedef for a list of all 'linear offsets' in a reference
64 typedef std::vector<uint64_t> BaiLinearOffsetVector;
65
66 // contains all fields necessary for building, loading, & writing
67 // full BAI index data for a single reference
68 struct BaiReferenceEntry {
69
70     // data members
71     int32_t ID;
72     BaiBinMap Bins;
73     BaiLinearOffsetVector LinearOffsets;
74
75     // ctor
76     BaiReferenceEntry(const int32_t& id = -1)
77         : ID(id)
78     { }
79 };
80
81 // provides (persistent) summary of BaiReferenceEntry's index data
82 struct BaiReferenceSummary {
83
84     // data members
85     int NumBins;
86     int NumLinearOffsets;
87     uint64_t FirstBinFilePosition;
88     uint64_t FirstLinearOffsetFilePosition;
89
90     // ctor
91     BaiReferenceSummary(void)
92         : NumBins(0)
93         , NumLinearOffsets(0)
94         , FirstBinFilePosition(0)
95         , FirstLinearOffsetFilePosition(0)
96     { }
97 };
98
99 // convenience typedef for describing a full BAI index file summary
100 typedef std::vector<BaiReferenceSummary> BaiFileSummary;
101
102 // end BamStandardIndex data structures
103 // -----------------------------------------------------------------------------
104
105 class BamStandardIndex : public BamIndex {
106
107     // ctor & dtor
108     public:
109         BamStandardIndex(Internal::BamReaderPrivate* reader);
110         ~BamStandardIndex(void);
111
112     // BamIndex implementation
113     public:
114         // builds index from associated BAM file & writes out to index file
115         bool Create(void);
116         // returns whether reference has alignments or no
117         bool HasAlignments(const int& referenceID) const;
118         // attempts to use index data to jump to @region, returns success/fail
119         // a "successful" jump indicates no error, but not whether this region has data
120         //   * thus, the method sets a flag to indicate whether there are alignments
121         //     available after the jump position
122         bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
123         // loads existing data from file into memory
124         bool Load(const std::string& filename);
125         // change the index caching behavior
126         void SetCacheMode(const BamIndex::IndexCacheMode& mode);
127     public:
128         // returns format's file extension
129         static const std::string Extension(void);
130
131     // internal methods
132     private:
133
134         // index file ops
135         void CheckMagicNumber(void);
136         void CloseFile(void);
137         bool IsFileOpen(void) const;
138         void OpenFile(const std::string& filename, const char* mode);
139         void Seek(const int64_t& position, const int& origin);
140         int64_t Tell(void) const;
141
142         // BAI index building methods
143         void ClearReferenceEntry(BaiReferenceEntry& refEntry);
144         void SaveAlignmentChunkToBin(BaiBinMap& binMap,
145                                      const uint32_t& currentBin,
146                                      const uint64_t& currentOffset,
147                                      const uint64_t& lastOffset);
148         void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
149                                    const int& alignmentStartPosition,
150                                    const int& alignmentStopPosition,
151                                    const uint64_t& lastOffset);
152
153         // random-access methods
154         void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
155         void CalculateCandidateBins(const uint32_t& begin,
156                                     const uint32_t& end,
157                                     std::set<uint16_t>& candidateBins);
158         void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
159                                        const uint64_t& minOffset,
160                                        std::set<uint16_t>& candidateBins,
161                                        std::vector<int64_t>& offsets);
162         uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
163         void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
164         uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
165
166         // BAI summary (create/load) methods
167         void ReserveForSummary(const int& numReferences);
168         void SaveBinsSummary(const int& refId, const int& numBins);
169         void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
170         void SkipBins(const int& numBins);
171         void SkipLinearOffsets(const int& numLinearOffsets);
172         void SummarizeBins(BaiReferenceSummary& refSummary);
173         void SummarizeIndexFile(void);
174         void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
175         void SummarizeReference(BaiReferenceSummary& refSummary);
176
177         // BAI full index input methods
178         void ReadBinID(uint32_t& binId);
179         void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
180         void ReadIntoBuffer(const unsigned int& bytesRequested);
181         void ReadLinearOffset(uint64_t& linearOffset);
182         void ReadNumAlignmentChunks(int& numAlignmentChunks);
183         void ReadNumBins(int& numBins);
184         void ReadNumLinearOffsets(int& numLinearOffsets);
185         void ReadNumReferences(int& numReferences);
186
187         // BAI full index output methods
188         void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
189         void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
190         void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
191         void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
192         void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
193         void WriteBins(const int& refId, BaiBinMap& bins);
194         void WriteHeader(void);
195         void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
196         void WriteReferenceEntry(BaiReferenceEntry& refEntry);
197
198     // data members
199     private:
200         bool m_isBigEndian;
201         BamIndex::IndexCacheMode m_cacheMode;
202         BaiFileSummary m_indexFileSummary;
203
204         // our input buffer
205         unsigned int m_bufferLength;
206
207         struct RaiiWrapper {
208             FILE* IndexStream;
209             char* Buffer;
210             RaiiWrapper(void);
211             ~RaiiWrapper(void);
212         };
213         RaiiWrapper Resources;
214
215     // static methods
216     private:
217         // checks if the buffer is large enough to accomodate the requested size
218         static void CheckBufferSize(char*& buffer,
219                                     unsigned int& bufferLength,
220                                     const unsigned int& requestedBytes);
221         // checks if the buffer is large enough to accomodate the requested size
222         static void CheckBufferSize(unsigned char*& buffer,
223                                     unsigned int& bufferLength,
224                                     const unsigned int& requestedBytes);
225     // static constants
226     private:
227         static const int MAX_BIN;
228         static const int BAM_LIDX_SHIFT;
229         static const std::string BAI_EXTENSION;
230         static const char* const BAI_MAGIC;
231         static const int SIZEOF_ALIGNMENTCHUNK;
232         static const int SIZEOF_BINCORE;
233         static const int SIZEOF_LINEAROFFSET;
234 };
235
236 } // namespace Internal
237 } // namespace BamTools
238
239 #endif // BAM_STANDARD_INDEX_FORMAT_H