1 // ***************************************************************************
\r
2 // BGZF.h (c) 2009 Derek Barnett, Michael Strömberg
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // All rights reserved.
\r
5 // ---------------------------------------------------------------------------
\r
6 // Last modified: 8 December 2009 (DB)
\r
7 // ---------------------------------------------------------------------------
\r
8 // BGZF routines were adapted from the bgzf.c code developed at the Broad
\r
10 // ---------------------------------------------------------------------------
\r
11 // Provides the basic functionality for reading & writing BGZF files
\r
12 // ***************************************************************************
\r
28 // Platform-specific type definitions
\r
30 typedef char int8_t;
\r
31 typedef unsigned char uint8_t;
\r
32 typedef short int16_t;
\r
33 typedef unsigned short uint16_t;
\r
34 typedef int int32_t;
\r
35 typedef unsigned int uint32_t;
\r
36 typedef long long int64_t;
\r
37 typedef unsigned long long uint64_t;
\r
42 namespace BamTools {
\r
45 const int GZIP_ID1 = 31;
\r
46 const int GZIP_ID2 = 139;
\r
47 const int CM_DEFLATE = 8;
\r
48 const int FLG_FEXTRA = 4;
\r
49 const int OS_UNKNOWN = 255;
\r
50 const int BGZF_XLEN = 6;
\r
51 const int BGZF_ID1 = 66;
\r
52 const int BGZF_ID2 = 67;
\r
53 const int BGZF_LEN = 2;
\r
54 const int GZIP_WINDOW_BITS = -15;
\r
55 const int Z_DEFAULT_MEM_LEVEL = 8;
\r
58 const int BLOCK_HEADER_LENGTH = 18;
\r
59 const int BLOCK_FOOTER_LENGTH = 8;
\r
60 const int MAX_BLOCK_SIZE = 65536;
\r
61 const int DEFAULT_BLOCK_SIZE = 65536;
\r
66 unsigned int UncompressedBlockSize;
\r
67 unsigned int CompressedBlockSize;
\r
68 unsigned int BlockLength;
\r
69 unsigned int BlockOffset;
\r
70 uint64_t BlockAddress;
\r
74 char* UncompressedBlock;
\r
75 char* CompressedBlock;
\r
77 // constructor & destructor
\r
83 // compresses the current block
\r
84 int DeflateBlock(void);
\r
85 // flushes the data in the BGZF block
\r
86 void FlushBlock(void);
\r
87 // de-compresses the current block
\r
88 int InflateBlock(const int& blockLength);
\r
89 // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing
\r
90 void Open(const std::string& filename, const char* mode);
\r
91 // reads BGZF data into a byte buffer
\r
92 int Read(char* data, const unsigned int dataLength);
\r
94 int ReadBlock(void);
\r
95 // seek to position in BAM file
\r
96 bool Seek(int64_t position);
\r
97 // get file position in BAM file
\r
99 // writes the supplied data into the BGZF buffer
\r
100 unsigned int Write(const char* data, const unsigned int dataLen);
\r
102 // checks BGZF block header
\r
103 static inline bool CheckBlockHeader(char* header);
\r
104 // packs an unsigned integer into the specified buffer
\r
105 static inline void PackUnsignedInt(char* buffer, unsigned int value);
\r
106 // packs an unsigned short into the specified buffer
\r
107 static inline void PackUnsignedShort(char* buffer, unsigned short value);
\r
108 // unpacks a buffer into a signed int
\r
109 static inline signed int UnpackSignedInt(char* buffer);
\r
110 // unpacks a buffer into a unsigned int
\r
111 static inline unsigned int UnpackUnsignedInt(char* buffer);
\r
112 // unpacks a buffer into a unsigned short
\r
113 static inline unsigned short UnpackUnsignedShort(char* buffer);
\r
116 // -------------------------------------------------------------
\r
119 bool BgzfData::CheckBlockHeader(char* header) {
\r
121 return (header[0] == GZIP_ID1 &&
\r
122 header[1] == (char)GZIP_ID2 &&
\r
123 header[2] == Z_DEFLATED &&
\r
124 (header[3] & FLG_FEXTRA) != 0 &&
\r
125 BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
\r
126 header[12] == BGZF_ID1 &&
\r
127 header[13] == BGZF_ID2 &&
\r
128 BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
\r
131 // packs an unsigned integer into the specified buffer
\r
133 void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
\r
134 buffer[0] = (char)value;
\r
135 buffer[1] = (char)(value >> 8);
\r
136 buffer[2] = (char)(value >> 16);
\r
137 buffer[3] = (char)(value >> 24);
\r
140 // packs an unsigned short into the specified buffer
\r
142 void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
\r
143 buffer[0] = (char)value;
\r
144 buffer[1] = (char)(value >> 8);
\r
147 // unpacks a buffer into a signed int
\r
149 signed int BgzfData::UnpackSignedInt(char* buffer) {
\r
150 union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
\r
152 un.valueBuffer[0] = buffer[0];
\r
153 un.valueBuffer[1] = buffer[1];
\r
154 un.valueBuffer[2] = buffer[2];
\r
155 un.valueBuffer[3] = buffer[3];
\r
159 // unpacks a buffer into an unsigned int
\r
161 unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
\r
162 union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
\r
164 un.valueBuffer[0] = buffer[0];
\r
165 un.valueBuffer[1] = buffer[1];
\r
166 un.valueBuffer[2] = buffer[2];
\r
167 un.valueBuffer[3] = buffer[3];
\r
171 // unpacks a buffer into an unsigned short
\r
173 unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
\r
174 union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un;
\r
176 un.valueBuffer[0] = buffer[0];
\r
177 un.valueBuffer[1] = buffer[1];
\r
181 } // namespace BamTools
\r