1 // ***************************************************************************
\r
2 // BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // All rights reserved.
\r
5 // ---------------------------------------------------------------------------
\r
6 // Last modified: 16 August 2010 (DB)
\r
7 // ---------------------------------------------------------------------------
\r
8 // BGZF routines were adapted from the bgzf.c code developed at the Broad
\r
10 // ---------------------------------------------------------------------------
\r
11 // Provides the basic functionality for reading & writing BGZF files
\r
12 // ***************************************************************************
\r
28 // Platform-specific large-file support
\r
29 #ifndef BAMTOOLS_LFS
\r
30 #define BAMTOOLS_LFS
\r
32 #define ftell64(a) _ftelli64(a)
\r
33 #define fseek64(a,b,c) _fseeki64(a,b,c)
\r
35 #define ftell64(a) ftello(a)
\r
36 #define fseek64(a,b,c) fseeko(a,b,c)
\r
38 #endif // BAMTOOLS_LFS
\r
40 // Platform-specific type definitions
\r
41 #ifndef BAMTOOLS_TYPES
\r
42 #define BAMTOOLS_TYPES
\r
44 typedef char int8_t;
\r
45 typedef unsigned char uint8_t;
\r
46 typedef short int16_t;
\r
47 typedef unsigned short uint16_t;
\r
48 typedef int int32_t;
\r
49 typedef unsigned int uint32_t;
\r
50 typedef long long int64_t;
\r
51 typedef unsigned long long uint64_t;
\r
55 #endif // BAMTOOLS_TYPES
\r
57 namespace BamTools {
\r
60 const int GZIP_ID1 = 31;
\r
61 const int GZIP_ID2 = 139;
\r
62 const int CM_DEFLATE = 8;
\r
63 const int FLG_FEXTRA = 4;
\r
64 const int OS_UNKNOWN = 255;
\r
65 const int BGZF_XLEN = 6;
\r
66 const int BGZF_ID1 = 66;
\r
67 const int BGZF_ID2 = 67;
\r
68 const int BGZF_LEN = 2;
\r
69 const int GZIP_WINDOW_BITS = -15;
\r
70 const int Z_DEFAULT_MEM_LEVEL = 8;
\r
73 const int BLOCK_HEADER_LENGTH = 18;
\r
74 const int BLOCK_FOOTER_LENGTH = 8;
\r
75 const int MAX_BLOCK_SIZE = 65536;
\r
76 const int DEFAULT_BLOCK_SIZE = 65536;
\r
82 unsigned int UncompressedBlockSize;
\r
83 unsigned int CompressedBlockSize;
\r
84 unsigned int BlockLength;
\r
85 unsigned int BlockOffset;
\r
86 uint64_t BlockAddress;
\r
89 bool IsWriteUncompressed;
\r
91 char* UncompressedBlock;
\r
92 char* CompressedBlock;
\r
94 // constructor & destructor
\r
99 // main interface methods
\r
101 // closes BGZF file
\r
103 // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
\r
104 bool Open(const std::string& filename, const char* mode, bool isWriteUncompressed = false);
\r
105 // reads BGZF data into a byte buffer
\r
106 int Read(char* data, const unsigned int dataLength);
\r
107 // seek to position in BGZF file
\r
108 bool Seek(int64_t position);
\r
109 // get file position in BGZF file
\r
110 int64_t Tell(void);
\r
111 // writes the supplied data into the BGZF buffer
\r
112 unsigned int Write(const char* data, const unsigned int dataLen);
\r
114 // internal methods
\r
116 // compresses the current block
\r
117 int DeflateBlock(void);
\r
118 // flushes the data in the BGZF block
\r
119 void FlushBlock(void);
\r
120 // de-compresses the current block
\r
121 int InflateBlock(const int& blockLength);
\r
122 // reads a BGZF block
\r
123 bool ReadBlock(void);
\r
125 // static 'utility' methods
\r
127 // checks BGZF block header
\r
128 static inline bool CheckBlockHeader(char* header);
\r
129 // packs an unsigned integer into the specified buffer
\r
130 static inline void PackUnsignedInt(char* buffer, unsigned int value);
\r
131 // packs an unsigned short into the specified buffer
\r
132 static inline void PackUnsignedShort(char* buffer, unsigned short value);
\r
133 // unpacks a buffer into a double
\r
134 static inline double UnpackDouble(char* buffer);
\r
135 static inline double UnpackDouble(const char* buffer);
\r
136 // unpacks a buffer into a float
\r
137 static inline float UnpackFloat(char* buffer);
\r
138 static inline float UnpackFloat(const char* buffer);
\r
139 // unpacks a buffer into a signed int
\r
140 static inline signed int UnpackSignedInt(char* buffer);
\r
141 static inline signed int UnpackSignedInt(const char* buffer);
\r
142 // unpacks a buffer into a signed short
\r
143 static inline signed short UnpackSignedShort(char* buffer);
\r
144 static inline signed short UnpackSignedShort(const char* buffer);
\r
145 // unpacks a buffer into an unsigned int
\r
146 static inline unsigned int UnpackUnsignedInt(char* buffer);
\r
147 static inline unsigned int UnpackUnsignedInt(const char* buffer);
\r
148 // unpacks a buffer into an unsigned short
\r
149 static inline unsigned short UnpackUnsignedShort(char* buffer);
\r
150 static inline unsigned short UnpackUnsignedShort(const char* buffer);
\r
153 // -------------------------------------------------------------
\r
154 // static 'utility' method implementations
\r
156 // checks BGZF block header
\r
158 bool BgzfData::CheckBlockHeader(char* header) {
\r
159 return (header[0] == GZIP_ID1 &&
\r
160 header[1] == (char)GZIP_ID2 &&
\r
161 header[2] == Z_DEFLATED &&
\r
162 (header[3] & FLG_FEXTRA) != 0 &&
\r
163 BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
\r
164 header[12] == BGZF_ID1 &&
\r
165 header[13] == BGZF_ID2 &&
\r
166 BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
\r
169 // 'packs' an unsigned integer into the specified buffer
\r
171 void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
\r
172 buffer[0] = (char)value;
\r
173 buffer[1] = (char)(value >> 8);
\r
174 buffer[2] = (char)(value >> 16);
\r
175 buffer[3] = (char)(value >> 24);
\r
178 // 'packs' an unsigned short into the specified buffer
\r
180 void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
\r
181 buffer[0] = (char)value;
\r
182 buffer[1] = (char)(value >> 8);
\r
185 // 'unpacks' a buffer into a double (includes both non-const & const char* flavors)
\r
187 double BgzfData::UnpackDouble(char* buffer) {
\r
188 union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
\r
190 un.valueBuffer[0] = buffer[0];
\r
191 un.valueBuffer[1] = buffer[1];
\r
192 un.valueBuffer[2] = buffer[2];
\r
193 un.valueBuffer[3] = buffer[3];
\r
194 un.valueBuffer[4] = buffer[4];
\r
195 un.valueBuffer[5] = buffer[5];
\r
196 un.valueBuffer[6] = buffer[6];
\r
197 un.valueBuffer[7] = buffer[7];
\r
202 double BgzfData::UnpackDouble(const char* buffer) {
\r
203 union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
\r
205 un.valueBuffer[0] = buffer[0];
\r
206 un.valueBuffer[1] = buffer[1];
\r
207 un.valueBuffer[2] = buffer[2];
\r
208 un.valueBuffer[3] = buffer[3];
\r
209 un.valueBuffer[4] = buffer[4];
\r
210 un.valueBuffer[5] = buffer[5];
\r
211 un.valueBuffer[6] = buffer[6];
\r
212 un.valueBuffer[7] = buffer[7];
\r
216 // 'unpacks' a buffer into a float (includes both non-const & const char* flavors)
\r
218 float BgzfData::UnpackFloat(char* buffer) {
\r
219 union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
\r
221 un.valueBuffer[0] = buffer[0];
\r
222 un.valueBuffer[1] = buffer[1];
\r
223 un.valueBuffer[2] = buffer[2];
\r
224 un.valueBuffer[3] = buffer[3];
\r
229 float BgzfData::UnpackFloat(const char* buffer) {
\r
230 union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
\r
232 un.valueBuffer[0] = buffer[0];
\r
233 un.valueBuffer[1] = buffer[1];
\r
234 un.valueBuffer[2] = buffer[2];
\r
235 un.valueBuffer[3] = buffer[3];
\r
239 // 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors)
\r
241 signed int BgzfData::UnpackSignedInt(char* buffer) {
\r
242 union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
\r
244 un.valueBuffer[0] = buffer[0];
\r
245 un.valueBuffer[1] = buffer[1];
\r
246 un.valueBuffer[2] = buffer[2];
\r
247 un.valueBuffer[3] = buffer[3];
\r
252 signed int BgzfData::UnpackSignedInt(const char* buffer) {
\r
253 union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
\r
255 un.valueBuffer[0] = buffer[0];
\r
256 un.valueBuffer[1] = buffer[1];
\r
257 un.valueBuffer[2] = buffer[2];
\r
258 un.valueBuffer[3] = buffer[3];
\r
262 // 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors)
\r
264 signed short BgzfData::UnpackSignedShort(char* buffer) {
\r
265 union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
\r
267 un.valueBuffer[0] = buffer[0];
\r
268 un.valueBuffer[1] = buffer[1];
\r
273 signed short BgzfData::UnpackSignedShort(const char* buffer) {
\r
274 union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
\r
276 un.valueBuffer[0] = buffer[0];
\r
277 un.valueBuffer[1] = buffer[1];
\r
281 // 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors)
\r
283 unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
\r
284 union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
\r
286 un.valueBuffer[0] = buffer[0];
\r
287 un.valueBuffer[1] = buffer[1];
\r
288 un.valueBuffer[2] = buffer[2];
\r
289 un.valueBuffer[3] = buffer[3];
\r
294 unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {
\r
295 union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
\r
297 un.valueBuffer[0] = buffer[0];
\r
298 un.valueBuffer[1] = buffer[1];
\r
299 un.valueBuffer[2] = buffer[2];
\r
300 un.valueBuffer[3] = buffer[3];
\r
304 // 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors)
\r
306 unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
\r
307 union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
\r
309 un.valueBuffer[0] = buffer[0];
\r
310 un.valueBuffer[1] = buffer[1];
\r
315 unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {
\r
316 union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
\r
318 un.valueBuffer[0] = buffer[0];
\r
319 un.valueBuffer[1] = buffer[1];
\r
323 } // namespace BamTools
\r