// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 19 July 2010 (DB)\r
+// Last modified: 16 August 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
using std::string;\r
using std::min;\r
\r
-BgzfData::BgzfData(void)\r
+BgzfData::BgzfData(bool writeUncompressed)\r
: UncompressedBlockSize(DEFAULT_BLOCK_SIZE)\r
, CompressedBlockSize(MAX_BLOCK_SIZE)\r
, BlockLength(0)\r
, BlockAddress(0)\r
, IsOpen(false)\r
, IsWriteOnly(false)\r
+ , IsWriteUncompressed(writeUncompressed)\r
, Stream(NULL)\r
, UncompressedBlock(NULL)\r
, CompressedBlock(NULL)\r
\r
// destructor\r
BgzfData::~BgzfData(void) {\r
- if( CompressedBlock ) { delete[] CompressedBlock; }\r
- if( UncompressedBlock ) { delete[] UncompressedBlock; }\r
+ if( CompressedBlock ) delete[] CompressedBlock;\r
+ if( UncompressedBlock ) delete[] UncompressedBlock;\r
}\r
\r
// closes BGZF file\r
buffer[13] = BGZF_ID2;\r
buffer[14] = BGZF_LEN;\r
\r
+ // set compression level\r
+ const int compressionLevel = ( IsWriteUncompressed ? 0 : Z_DEFAULT_COMPRESSION );\r
+ \r
// loop to retry for blocks that do not compress enough\r
int inputLength = BlockOffset;\r
int compressedLength = 0;\r
unsigned int bufferSize = CompressedBlockSize;\r
\r
- while(true) {\r
+ while ( true ) {\r
\r
// initialize zstream values\r
z_stream zs;\r
zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;\r
\r
// initialize the zlib compression algorithm\r
- if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {\r
+ if ( deflateInit2(&zs, compressionLevel, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK ) {\r
printf("BGZF ERROR: zlib deflate initialization failed.\n");\r
exit(1);\r
}\r
\r
// compress the data\r
int status = deflate(&zs, Z_FINISH);\r
- if(status != Z_STREAM_END) {\r
+ if ( status != Z_STREAM_END ) {\r
\r
deflateEnd(&zs);\r
\r
// reduce the input length and try again\r
- if(status == Z_OK) {\r
+ if ( status == Z_OK ) {\r
inputLength -= 1024;\r
- if(inputLength < 0) {\r
+ if( inputLength < 0 ) {\r
printf("BGZF ERROR: input reduction failed.\n");\r
exit(1);\r
}\r
}\r
\r
// finalize the compression routine\r
- if(deflateEnd(&zs) != Z_OK) {\r
+ if ( deflateEnd(&zs) != Z_OK ) {\r
printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
compressedLength = zs.total_out;\r
compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;\r
- if(compressedLength > MAX_BLOCK_SIZE) {\r
+ if ( compressedLength > MAX_BLOCK_SIZE ) {\r
printf("BGZF ERROR: deflate overflow.\n");\r
exit(1);\r
}\r
\r
// ensure that we have less than a block of data left\r
int remaining = BlockOffset - inputLength;\r
- if(remaining > 0) {\r
- if(remaining > inputLength) {\r
+ if ( remaining > 0 ) {\r
+ if ( remaining > inputLength ) {\r
printf("BGZF ERROR: after deflate, remainder too large.\n");\r
exit(1);\r
}\r
void BgzfData::FlushBlock(void) {\r
\r
// flush all of the remaining blocks\r
- while(BlockOffset > 0) {\r
+ while ( BlockOffset > 0 ) {\r
\r
// compress the data block\r
int blockLength = DeflateBlock();\r
// flush the data to our output stream\r
int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);\r
\r
- if(numBytesWritten != blockLength) {\r
+ if ( numBytesWritten != blockLength ) {\r
printf("BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
exit(1);\r
}\r
zs.avail_out = UncompressedBlockSize;\r
\r
int status = inflateInit2(&zs, GZIP_WINDOW_BITS);\r
- if (status != Z_OK) {\r
+ if ( status != Z_OK ) {\r
printf("BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");\r
return -1;\r
}\r
\r
status = inflate(&zs, Z_FINISH);\r
- if (status != Z_STREAM_END) {\r
+ if ( status != Z_STREAM_END ) {\r
inflateEnd(&zs);\r
printf("BGZF ERROR: could not decompress block - zlib::inflate() failed\n");\r
return -1;\r
}\r
\r
status = inflateEnd(&zs);\r
- if (status != Z_OK) {\r
+ if ( status != Z_OK ) {\r
printf("BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");\r
return -1;\r
}\r
// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)\r
bool BgzfData::Open(const string& filename, const char* mode) {\r
\r
- // determine open mode\r
- if ( strcmp(mode, "rb") == 0 ) {\r
+ // determine open mode\r
+ if ( strcmp(mode, "rb") == 0 )\r
IsWriteOnly = false;\r
- } else if ( strcmp(mode, "wb") == 0) {\r
+ else if ( strcmp(mode, "wb") == 0) \r
IsWriteOnly = true;\r
- } else {\r
+ else {\r
printf("BGZF ERROR: unknown file mode: %s\n", mode);\r
return false; \r
}\r
\r
+ // ----------------------------------------------------------------\r
// open Stream to read to/write from file, stdin, or stdout\r
// stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
- if ( (filename != "stdin") && (filename != "stdout") ) {\r
- // read/write BGZF data to/from a file\r
-// Stream = fopen64(filename.c_str(), mode);\r
+ \r
+ // read/write BGZF data to/from a file\r
+ if ( (filename != "stdin") && (filename != "stdout") )\r
Stream = fopen(filename.c_str(), mode);\r
- }\r
- else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
- // read BGZF data from stdin\r
-// Stream = freopen64(NULL, mode, stdin);\r
+ \r
+ // read BGZF data from stdin\r
+ else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) )\r
Stream = freopen(NULL, mode, stdin);\r
- }\r
- else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
- // write BGZF data to stdout\r
-// Stream = freopen64(NULL, mode, stdout);\r
+ \r
+ // write BGZF data to stdout\r
+ else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) )\r
Stream = freopen(NULL, mode, stdout);\r
- }\r
\r
- if(!Stream) {\r
+ if ( !Stream ) {\r
printf("BGZF ERROR: unable to open file %s\n", filename.c_str() );\r
return false;\r
}\r
// reads BGZF data into a byte buffer\r
int BgzfData::Read(char* data, const unsigned int dataLength) {\r
\r
- if (dataLength == 0) return 0;\r
+ if ( !IsOpen || IsWriteOnly || dataLength == 0 ) return 0;\r
\r
char* output = data;\r
unsigned int numBytesRead = 0;\r
- while (numBytesRead < dataLength) {\r
+ while ( numBytesRead < dataLength ) {\r
\r
int bytesAvailable = BlockLength - BlockOffset;\r
if ( bytesAvailable <= 0 ) {\r
- if (!ReadBlock()) return -1; \r
+ if ( !ReadBlock() ) return -1; \r
bytesAvailable = BlockLength - BlockOffset;\r
- if (bytesAvailable <= 0) break;\r
+ if ( bytesAvailable <= 0 ) break;\r
}\r
\r
char* buffer = UncompressedBlock;\r
int64_t blockAddress = ftell64(Stream);\r
\r
int count = fread(header, 1, sizeof(header), Stream);\r
- if (count == 0) {\r
+ if ( count == 0 ) {\r
BlockLength = 0;\r
return true;\r
}\r
\r
- if (count != sizeof(header)) {\r
+ if ( count != sizeof(header) ) {\r
printf("BGZF ERROR: read block failed - could not read block header\n");\r
return false;\r
}\r
\r
- if (!BgzfData::CheckBlockHeader(header)) {\r
+ if ( !BgzfData::CheckBlockHeader(header) ) {\r
printf("BGZF ERROR: read block failed - invalid block header\n");\r
return false;\r
}\r
int remaining = blockLength - BLOCK_HEADER_LENGTH;\r
\r
count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);\r
- if (count != remaining) {\r
+ if ( count != remaining ) {\r
printf("BGZF ERROR: read block failed - could not read data from block\n");\r
return false;\r
}\r
\r
count = InflateBlock(blockLength);\r
- if (count < 0) { \r
+ if ( count < 0 ) { \r
printf("BGZF ERROR: read block failed - could not decompress block data\n");\r
return false;\r
}\r
// seek to position in BGZF file\r
bool BgzfData::Seek(int64_t position) {\r
\r
+ if ( !IsOpen ) return false;\r
+ \r
int blockOffset = (position & 0xFFFF);\r
int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\r
\r
- if (fseek64(Stream, blockAddress, SEEK_SET) != 0) {\r
+ if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {\r
printf("BGZF ERROR: unable to seek in file\n");\r
return false;\r
}\r
\r
// get file position in BGZF file\r
int64_t BgzfData::Tell(void) {\r
- return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
+ if ( !IsOpen ) \r
+ return false;\r
+ else \r
+ return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
}\r
\r
// writes the supplied data into the BGZF buffer\r
unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {\r
\r
+ if ( !IsOpen || !IsWriteOnly ) return false;\r
+ \r
// initialize\r
unsigned int numBytesWritten = 0;\r
const char* input = data;\r
unsigned int blockLength = UncompressedBlockSize;\r
\r
// copy the data to the buffer\r
- while(numBytesWritten < dataLen) {\r
+ while ( numBytesWritten < dataLen ) {\r
\r
unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\r
char* buffer = UncompressedBlock;\r
input += copyLength;\r
numBytesWritten += copyLength;\r
\r
- if(BlockOffset == blockLength)\r
+ if ( BlockOffset == blockLength )\r
FlushBlock();\r
}\r
\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 19 July 2010 (DB)\r
+// Last modified: 16 August 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
\r
struct BgzfData {\r
\r
- // ---------------------------------\r
// data members\r
- \r
- unsigned int UncompressedBlockSize;\r
- unsigned int CompressedBlockSize;\r
- unsigned int BlockLength;\r
- unsigned int BlockOffset;\r
- uint64_t BlockAddress;\r
- bool IsOpen;\r
- bool IsWriteOnly;\r
- FILE* Stream;\r
- char* UncompressedBlock;\r
- char* CompressedBlock;\r
-\r
- // ---------------------------------\r
+ public:\r
+ unsigned int UncompressedBlockSize;\r
+ unsigned int CompressedBlockSize;\r
+ unsigned int BlockLength;\r
+ unsigned int BlockOffset;\r
+ uint64_t BlockAddress;\r
+ bool IsOpen;\r
+ bool IsWriteOnly;\r
+ bool IsWriteUncompressed;\r
+ FILE* Stream;\r
+ char* UncompressedBlock;\r
+ char* CompressedBlock;\r
+\r
// constructor & destructor\r
- \r
- BgzfData(void);\r
- ~BgzfData(void);\r
+ public:\r
+ BgzfData(bool writeUncompressed = false);\r
+ ~BgzfData(void);\r
\r
- // ---------------------------------\r
// main interface methods\r
- \r
- // closes BGZF file\r
- void Close(void);\r
- // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)\r
- bool Open(const std::string& filename, const char* mode);\r
- // reads BGZF data into a byte buffer\r
- int Read(char* data, const unsigned int dataLength);\r
- // seek to position in BGZF file\r
- bool Seek(int64_t position);\r
- // get file position in BGZF file\r
- int64_t Tell(void);\r
- // writes the supplied data into the BGZF buffer\r
- unsigned int Write(const char* data, const unsigned int dataLen);\r
-\r
- // ---------------------------------\r
+ public: \r
+ // closes BGZF file\r
+ void Close(void);\r
+ // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)\r
+ bool Open(const std::string& filename, const char* mode);\r
+ // reads BGZF data into a byte buffer\r
+ int Read(char* data, const unsigned int dataLength);\r
+ // seek to position in BGZF file\r
+ bool Seek(int64_t position);\r
+ // get file position in BGZF file\r
+ int64_t Tell(void);\r
+ // writes the supplied data into the BGZF buffer\r
+ unsigned int Write(const char* data, const unsigned int dataLen);\r
+\r
// internal methods\r
+ private:\r
+ // compresses the current block\r
+ int DeflateBlock(void);\r
+ // flushes the data in the BGZF block\r
+ void FlushBlock(void);\r
+ // de-compresses the current block\r
+ int InflateBlock(const int& blockLength);\r
+ // reads a BGZF block\r
+ bool ReadBlock(void);\r
\r
- // compresses the current block\r
- int DeflateBlock(void);\r
- // flushes the data in the BGZF block\r
- void FlushBlock(void);\r
- // de-compresses the current block\r
- int InflateBlock(const int& blockLength);\r
- // reads a BGZF block\r
- bool ReadBlock(void);\r
- \r
- // ---------------------------------\r
// static 'utility' methods\r
- \r
- // checks BGZF block header\r
- static inline bool CheckBlockHeader(char* header);\r
- // packs an unsigned integer into the specified buffer\r
- static inline void PackUnsignedInt(char* buffer, unsigned int value);\r
- // packs an unsigned short into the specified buffer\r
- static inline void PackUnsignedShort(char* buffer, unsigned short value);\r
- // unpacks a buffer into a double\r
- static inline double UnpackDouble(char* buffer);\r
- static inline double UnpackDouble(const char* buffer);\r
- // unpacks a buffer into a float\r
- static inline float UnpackFloat(char* buffer);\r
- static inline float UnpackFloat(const char* buffer);\r
- // unpacks a buffer into a signed int\r
- static inline signed int UnpackSignedInt(char* buffer);\r
- static inline signed int UnpackSignedInt(const char* buffer);\r
- // unpacks a buffer into a signed short\r
- static inline signed short UnpackSignedShort(char* buffer);\r
- static inline signed short UnpackSignedShort(const char* buffer);\r
- // unpacks a buffer into an unsigned int\r
- static inline unsigned int UnpackUnsignedInt(char* buffer);\r
- static inline unsigned int UnpackUnsignedInt(const char* buffer);\r
- // unpacks a buffer into an unsigned short\r
- static inline unsigned short UnpackUnsignedShort(char* buffer);\r
- static inline unsigned short UnpackUnsignedShort(const char* buffer);\r
+ public:\r
+ // checks BGZF block header\r
+ static inline bool CheckBlockHeader(char* header);\r
+ // packs an unsigned integer into the specified buffer\r
+ static inline void PackUnsignedInt(char* buffer, unsigned int value);\r
+ // packs an unsigned short into the specified buffer\r
+ static inline void PackUnsignedShort(char* buffer, unsigned short value);\r
+ // unpacks a buffer into a double\r
+ static inline double UnpackDouble(char* buffer);\r
+ static inline double UnpackDouble(const char* buffer);\r
+ // unpacks a buffer into a float\r
+ static inline float UnpackFloat(char* buffer);\r
+ static inline float UnpackFloat(const char* buffer);\r
+ // unpacks a buffer into a signed int\r
+ static inline signed int UnpackSignedInt(char* buffer);\r
+ static inline signed int UnpackSignedInt(const char* buffer);\r
+ // unpacks a buffer into a signed short\r
+ static inline signed short UnpackSignedShort(char* buffer);\r
+ static inline signed short UnpackSignedShort(const char* buffer);\r
+ // unpacks a buffer into an unsigned int\r
+ static inline unsigned int UnpackUnsignedInt(char* buffer);\r
+ static inline unsigned int UnpackUnsignedInt(const char* buffer);\r
+ // unpacks a buffer into an unsigned short\r
+ static inline unsigned short UnpackUnsignedShort(char* buffer);\r
+ static inline unsigned short UnpackUnsignedShort(const char* buffer);\r
};\r
\r
// -------------------------------------------------------------\r