// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 19 July 2010 (DB)\r
+// Last modified: 16 August 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
, BlockAddress(0)\r
, IsOpen(false)\r
, IsWriteOnly(false)\r
+ , IsWriteUncompressed(false)\r
, Stream(NULL)\r
, UncompressedBlock(NULL)\r
, CompressedBlock(NULL)\r
\r
// destructor\r
BgzfData::~BgzfData(void) {\r
- if( CompressedBlock ) { delete[] CompressedBlock; }\r
- if( UncompressedBlock ) { delete[] UncompressedBlock; }\r
+ if( CompressedBlock ) delete[] CompressedBlock;\r
+ if( UncompressedBlock ) delete[] UncompressedBlock;\r
}\r
\r
// closes BGZF file\r
// flush and close\r
fflush(Stream);\r
fclose(Stream);\r
+ IsWriteUncompressed = false;\r
IsOpen = false;\r
}\r
\r
buffer[13] = BGZF_ID2;\r
buffer[14] = BGZF_LEN;\r
\r
+ // set compression level\r
+ const int compressionLevel = ( IsWriteUncompressed ? 0 : Z_DEFAULT_COMPRESSION );\r
+ \r
// loop to retry for blocks that do not compress enough\r
int inputLength = BlockOffset;\r
int compressedLength = 0;\r
unsigned int bufferSize = CompressedBlockSize;\r
\r
- while(true) {\r
+ while ( true ) {\r
\r
// initialize zstream values\r
z_stream zs;\r
zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;\r
\r
// initialize the zlib compression algorithm\r
- if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {\r
+ if ( deflateInit2(&zs, compressionLevel, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK ) {\r
printf("BGZF ERROR: zlib deflate initialization failed.\n");\r
exit(1);\r
}\r
\r
// compress the data\r
int status = deflate(&zs, Z_FINISH);\r
- if(status != Z_STREAM_END) {\r
+ if ( status != Z_STREAM_END ) {\r
\r
deflateEnd(&zs);\r
\r
// reduce the input length and try again\r
- if(status == Z_OK) {\r
+ if ( status == Z_OK ) {\r
inputLength -= 1024;\r
- if(inputLength < 0) {\r
+ if( inputLength < 0 ) {\r
printf("BGZF ERROR: input reduction failed.\n");\r
exit(1);\r
}\r
}\r
\r
// finalize the compression routine\r
- if(deflateEnd(&zs) != Z_OK) {\r
+ if ( deflateEnd(&zs) != Z_OK ) {\r
printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
compressedLength = zs.total_out;\r
compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;\r
- if(compressedLength > MAX_BLOCK_SIZE) {\r
+ if ( compressedLength > MAX_BLOCK_SIZE ) {\r
printf("BGZF ERROR: deflate overflow.\n");\r
exit(1);\r
}\r
\r
// ensure that we have less than a block of data left\r
int remaining = BlockOffset - inputLength;\r
- if(remaining > 0) {\r
- if(remaining > inputLength) {\r
+ if ( remaining > 0 ) {\r
+ if ( remaining > inputLength ) {\r
printf("BGZF ERROR: after deflate, remainder too large.\n");\r
exit(1);\r
}\r
void BgzfData::FlushBlock(void) {\r
\r
// flush all of the remaining blocks\r
- while(BlockOffset > 0) {\r
+ while ( BlockOffset > 0 ) {\r
\r
// compress the data block\r
int blockLength = DeflateBlock();\r
// flush the data to our output stream\r
int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);\r
\r
- if(numBytesWritten != blockLength) {\r
+ if ( numBytesWritten != blockLength ) {\r
printf("BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
exit(1);\r
}\r
zs.avail_out = UncompressedBlockSize;\r
\r
int status = inflateInit2(&zs, GZIP_WINDOW_BITS);\r
- if (status != Z_OK) {\r
+ if ( status != Z_OK ) {\r
printf("BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");\r
return -1;\r
}\r
\r
status = inflate(&zs, Z_FINISH);\r
- if (status != Z_STREAM_END) {\r
+ if ( status != Z_STREAM_END ) {\r
inflateEnd(&zs);\r
printf("BGZF ERROR: could not decompress block - zlib::inflate() failed\n");\r
return -1;\r
}\r
\r
status = inflateEnd(&zs);\r
- if (status != Z_OK) {\r
+ if ( status != Z_OK ) {\r
printf("BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");\r
return -1;\r
}\r
}\r
\r
// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)\r
-bool BgzfData::Open(const string& filename, const char* mode) {\r
+bool BgzfData::Open(const string& filename, const char* mode, bool isWriteUncompressed ) {\r
\r
- // determine open mode\r
- if ( strcmp(mode, "rb") == 0 ) {\r
+ // determine open mode\r
+ if ( strcmp(mode, "rb") == 0 )\r
IsWriteOnly = false;\r
- } else if ( strcmp(mode, "wb") == 0) {\r
+ else if ( strcmp(mode, "wb") == 0) \r
IsWriteOnly = true;\r
- } else {\r
+ else {\r
printf("BGZF ERROR: unknown file mode: %s\n", mode);\r
return false; \r
}\r
\r
+ // ----------------------------------------------------------------\r
// open Stream to read to/write from file, stdin, or stdout\r
// stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
- if ( (filename != "stdin") && (filename != "stdout") ) {\r
- // read/write BGZF data to/from a file\r
-// Stream = fopen64(filename.c_str(), mode);\r
+ \r
+ // read/write BGZF data to/from a file\r
+ if ( (filename != "stdin") && (filename != "stdout") )\r
Stream = fopen(filename.c_str(), mode);\r
- }\r
- else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
- // read BGZF data from stdin\r
-// Stream = freopen64(NULL, mode, stdin);\r
+ \r
+ // read BGZF data from stdin\r
+ else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) )\r
Stream = freopen(NULL, mode, stdin);\r
- }\r
- else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
- // write BGZF data to stdout\r
-// Stream = freopen64(NULL, mode, stdout);\r
+ \r
+ // write BGZF data to stdout\r
+ else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) )\r
Stream = freopen(NULL, mode, stdout);\r
- }\r
\r
- if(!Stream) {\r
+ if ( !Stream ) {\r
printf("BGZF ERROR: unable to open file %s\n", filename.c_str() );\r
return false;\r
}\r
\r
- // set flag, return success\r
+ // set flags, return success\r
IsOpen = true;\r
+ IsWriteUncompressed = isWriteUncompressed;\r
return true;\r
}\r
\r
// reads BGZF data into a byte buffer\r
int BgzfData::Read(char* data, const unsigned int dataLength) {\r
\r
- if (dataLength == 0) return 0;\r
+ if ( !IsOpen || IsWriteOnly || dataLength == 0 ) return 0;\r
\r
char* output = data;\r
unsigned int numBytesRead = 0;\r
- while (numBytesRead < dataLength) {\r
+ while ( numBytesRead < dataLength ) {\r
\r
int bytesAvailable = BlockLength - BlockOffset;\r
if ( bytesAvailable <= 0 ) {\r
- if (!ReadBlock()) return -1; \r
+ if ( !ReadBlock() ) return -1; \r
bytesAvailable = BlockLength - BlockOffset;\r
- if (bytesAvailable <= 0) break;\r
+ if ( bytesAvailable <= 0 ) break;\r
}\r
\r
char* buffer = UncompressedBlock;\r
int64_t blockAddress = ftell64(Stream);\r
\r
int count = fread(header, 1, sizeof(header), Stream);\r
- if (count == 0) {\r
+ if ( count == 0 ) {\r
BlockLength = 0;\r
return true;\r
}\r
\r
- if (count != sizeof(header)) {\r
+ if ( count != sizeof(header) ) {\r
printf("BGZF ERROR: read block failed - could not read block header\n");\r
return false;\r
}\r
\r
- if (!BgzfData::CheckBlockHeader(header)) {\r
+ if ( !BgzfData::CheckBlockHeader(header) ) {\r
printf("BGZF ERROR: read block failed - invalid block header\n");\r
return false;\r
}\r
int remaining = blockLength - BLOCK_HEADER_LENGTH;\r
\r
count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);\r
- if (count != remaining) {\r
+ if ( count != remaining ) {\r
printf("BGZF ERROR: read block failed - could not read data from block\n");\r
return false;\r
}\r
\r
count = InflateBlock(blockLength);\r
- if (count < 0) { \r
+ if ( count < 0 ) { \r
printf("BGZF ERROR: read block failed - could not decompress block data\n");\r
return false;\r
}\r
// seek to position in BGZF file\r
bool BgzfData::Seek(int64_t position) {\r
\r
+ if ( !IsOpen ) return false;\r
+ \r
int blockOffset = (position & 0xFFFF);\r
int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\r
\r
- if (fseek64(Stream, blockAddress, SEEK_SET) != 0) {\r
+ if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {\r
printf("BGZF ERROR: unable to seek in file\n");\r
return false;\r
}\r
\r
// get file position in BGZF file\r
int64_t BgzfData::Tell(void) {\r
- return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
+ if ( !IsOpen ) \r
+ return false;\r
+ else \r
+ return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
}\r
\r
// writes the supplied data into the BGZF buffer\r
unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {\r
\r
+ if ( !IsOpen || !IsWriteOnly ) return false;\r
+ \r
// initialize\r
unsigned int numBytesWritten = 0;\r
const char* input = data;\r
unsigned int blockLength = UncompressedBlockSize;\r
\r
// copy the data to the buffer\r
- while(numBytesWritten < dataLen) {\r
+ while ( numBytesWritten < dataLen ) {\r
\r
unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\r
char* buffer = UncompressedBlock;\r
input += copyLength;\r
numBytesWritten += copyLength;\r
\r
- if(BlockOffset == blockLength)\r
+ if ( BlockOffset == blockLength )\r
FlushBlock();\r
}\r
\r