// ***************************************************************************\r
-// BGZF.cpp (c) 2009 Derek Barnett, Michael Strömberg\r
+// BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 11 January 2010 (DB)\r
+// Last modified: 19 July 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
CompressedBlock = new char[CompressedBlockSize];\r
UncompressedBlock = new char[UncompressedBlockSize];\r
} catch( std::bad_alloc& ba ) {\r
- printf("ERROR: Unable to allocate memory for our BGZF object.\n");\r
+ printf("BGZF ERROR: unable to allocate memory for our BGZF object.\n");\r
exit(1);\r
}\r
}\r
\r
// destructor\r
BgzfData::~BgzfData(void) {\r
- if(CompressedBlock) { delete[] CompressedBlock; }\r
- if(UncompressedBlock) { delete[] UncompressedBlock; }\r
+ if( CompressedBlock ) { delete[] CompressedBlock; }\r
+ if( UncompressedBlock ) { delete[] UncompressedBlock; }\r
}\r
\r
// closes BGZF file\r
void BgzfData::Close(void) {\r
\r
// skip if file not open, otherwise set flag\r
- if (!IsOpen) { return; }\r
+ if ( !IsOpen ) return;\r
IsOpen = false;\r
\r
// flush the current BGZF block\r
- if (IsWriteOnly) { FlushBlock(); }\r
+ if ( IsWriteOnly ) FlushBlock();\r
\r
- // write an empty block (as EOF marker)\r
- int blockLength = DeflateBlock();\r
- fwrite(CompressedBlock, 1, blockLength, Stream);\r
- \r
+ // write an empty block (as EOF marker)\r
+ int blockLength = DeflateBlock();\r
+ fwrite(CompressedBlock, 1, blockLength, Stream);\r
+ \r
// flush and close\r
fflush(Stream);\r
fclose(Stream);\r
// loop to retry for blocks that do not compress enough\r
int inputLength = BlockOffset;\r
int compressedLength = 0;\r
- unsigned int bufferSize = CompressedBlockSize;\r
+ unsigned int bufferSize = CompressedBlockSize;\r
\r
while(true) {\r
- \r
- // initialize zstream values\r
+ \r
+ // initialize zstream values\r
z_stream zs;\r
zs.zalloc = NULL;\r
zs.zfree = NULL;\r
zs.avail_in = inputLength;\r
zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];\r
zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;\r
- \r
+\r
// initialize the zlib compression algorithm\r
if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {\r
- printf("ERROR: zlib deflate initialization failed.\n");\r
+ printf("BGZF ERROR: zlib deflate initialization failed.\n");\r
exit(1);\r
}\r
\r
if(status == Z_OK) {\r
inputLength -= 1024;\r
if(inputLength < 0) {\r
- printf("ERROR: input reduction failed.\n");\r
+ printf("BGZF ERROR: input reduction failed.\n");\r
exit(1);\r
}\r
continue;\r
}\r
\r
- printf("ERROR: zlib deflate failed.\n");\r
+ printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
// finalize the compression routine\r
if(deflateEnd(&zs) != Z_OK) {\r
- printf("ERROR: deflate end failed.\n");\r
+ printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
compressedLength = zs.total_out;\r
compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;\r
-\r
if(compressedLength > MAX_BLOCK_SIZE) {\r
- printf("ERROR: deflate overflow.\n");\r
+ printf("BGZF ERROR: deflate overflow.\n");\r
exit(1);\r
}\r
\r
int remaining = BlockOffset - inputLength;\r
if(remaining > 0) {\r
if(remaining > inputLength) {\r
- printf("ERROR: remainder too large.\n");\r
+ printf("BGZF ERROR: after deflate, remainder too large.\n");\r
exit(1);\r
}\r
memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);\r
int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);\r
\r
if(numBytesWritten != blockLength) {\r
- printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
- exit(1);\r
- }\r
- \r
+ printf("BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
+ exit(1);\r
+ }\r
+ \r
BlockAddress += blockLength;\r
}\r
}\r
\r
int status = inflateInit2(&zs, GZIP_WINDOW_BITS);\r
if (status != Z_OK) {\r
- printf("inflateInit failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");\r
+ return -1;\r
}\r
\r
status = inflate(&zs, Z_FINISH);\r
if (status != Z_STREAM_END) {\r
inflateEnd(&zs);\r
- printf("inflate failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflate() failed\n");\r
+ return -1;\r
}\r
\r
status = inflateEnd(&zs);\r
if (status != Z_OK) {\r
- printf("inflateEnd failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");\r
+ return -1;\r
}\r
\r
return zs.total_out;\r
}\r
\r
-void BgzfData::Open(const string& filename, const char* mode) {\r
+// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)\r
+bool BgzfData::Open(const string& filename, const char* mode) {\r
\r
// determine open mode\r
if ( strcmp(mode, "rb") == 0 ) {\r
} else if ( strcmp(mode, "wb") == 0) {\r
IsWriteOnly = true;\r
} else {\r
- printf("ERROR: Unknown file mode: %s\n", mode);\r
- exit(1);\r
+ printf("BGZF ERROR: unknown file mode: %s\n", mode);\r
+ return false; \r
+ }\r
+\r
+ // open Stream to read to/write from file, stdin, or stdout\r
+ // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
+ if ( (filename != "stdin") && (filename != "stdout") ) {\r
+ // read/write BGZF data to/from a file\r
+// Stream = fopen64(filename.c_str(), mode);\r
+ Stream = fopen(filename.c_str(), mode);\r
+ }\r
+ else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
+ // read BGZF data from stdin\r
+// Stream = freopen64(NULL, mode, stdin);\r
+ Stream = freopen(NULL, mode, stdin);\r
+ }\r
+ else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
+ // write BGZF data to stdout\r
+// Stream = freopen64(NULL, mode, stdout);\r
+ Stream = freopen(NULL, mode, stdout);\r
}\r
\r
- // open Stream to read to/write from file, stdin, or stdout\r
- // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
- if ( (filename != "stdin") && (filename != "stdout") ) {\r
- // read/wrtie BGZF data to/from a file\r
- Stream = fopen(filename.c_str(), mode);\r
- }\r
- else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
- // read BGZF data from stdin\r
- Stream = freopen(NULL, mode, stdin);\r
- }\r
- else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
- // write BGZF data to stdout\r
- Stream = freopen(NULL, mode, stdout);\r
- }\r
- \r
if(!Stream) {\r
- printf("ERROR: Unable to open the BAM file %s\n", filename.c_str() );\r
- exit(1);\r
+ printf("BGZF ERROR: unable to open file %s\n", filename.c_str() );\r
+ return false;\r
}\r
+ \r
+ // set flag, return success\r
IsOpen = true;\r
+ return true;\r
}\r
\r
+// reads BGZF data into a byte buffer\r
int BgzfData::Read(char* data, const unsigned int dataLength) {\r
\r
- if (dataLength == 0) { return 0; }\r
+ if (dataLength == 0) return 0;\r
\r
char* output = data;\r
unsigned int numBytesRead = 0;\r
while (numBytesRead < dataLength) {\r
\r
int bytesAvailable = BlockLength - BlockOffset;\r
- if (bytesAvailable <= 0) {\r
- if ( ReadBlock() != 0 ) { return -1; }\r
+ if ( bytesAvailable <= 0 ) {\r
+ if (!ReadBlock()) return -1; \r
bytesAvailable = BlockLength - BlockOffset;\r
- if ( bytesAvailable <= 0 ) { break; }\r
+ if (bytesAvailable <= 0) break;\r
}\r
\r
char* buffer = UncompressedBlock;\r
}\r
\r
if ( BlockOffset == BlockLength ) {\r
- BlockAddress = ftell(Stream);\r
+ BlockAddress = ftell64(Stream);\r
BlockOffset = 0;\r
BlockLength = 0;\r
}\r
return numBytesRead;\r
}\r
\r
-int BgzfData::ReadBlock(void) {\r
+// reads a BGZF block\r
+bool BgzfData::ReadBlock(void) {\r
\r
char header[BLOCK_HEADER_LENGTH];\r
- int64_t blockAddress = ftell(Stream);\r
-\r
+ int64_t blockAddress = ftell64(Stream);\r
+ \r
int count = fread(header, 1, sizeof(header), Stream);\r
if (count == 0) {\r
BlockLength = 0;\r
- return 0;\r
+ return true;\r
}\r
\r
if (count != sizeof(header)) {\r
- printf("read block failed - count != sizeof(header)\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - could not read block header\n");\r
+ return false;\r
}\r
\r
if (!BgzfData::CheckBlockHeader(header)) {\r
- printf("read block failed - CheckBlockHeader() returned false\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - invalid block header\n");\r
+ return false;\r
}\r
\r
int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;\r
\r
count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);\r
if (count != remaining) {\r
- printf("read block failed - count != remaining\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - could not read data from block\n");\r
+ return false;\r
}\r
\r
count = InflateBlock(blockLength);\r
- if (count < 0) { return -1; }\r
+ if (count < 0) { \r
+ printf("BGZF ERROR: read block failed - could not decompress block data\n");\r
+ return false;\r
+ }\r
\r
- if ( BlockLength != 0 ) {\r
+ if ( BlockLength != 0 )\r
BlockOffset = 0;\r
- }\r
\r
BlockAddress = blockAddress;\r
BlockLength = count;\r
- return 0;\r
+ return true;\r
}\r
\r
+// seek to position in BGZF file\r
bool BgzfData::Seek(int64_t position) {\r
\r
int blockOffset = (position & 0xFFFF);\r
int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\r
\r
- if (fseek(Stream, blockAddress, SEEK_SET) != 0) {\r
- printf("ERROR: Unable to seek in BAM file\n");\r
- exit(1);\r
+ if (fseek64(Stream, blockAddress, SEEK_SET) != 0) {\r
+ printf("BGZF ERROR: unable to seek in file\n");\r
+ return false;\r
}\r
\r
BlockLength = 0;\r
return true;\r
}\r
\r
+// get file position in BGZF file\r
int64_t BgzfData::Tell(void) {\r
return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
}\r
\r
// copy the data to the buffer\r
while(numBytesWritten < dataLen) {\r
+ \r
unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\r
char* buffer = UncompressedBlock;\r
memcpy(buffer + BlockOffset, input, copyLength);\r
input += copyLength;\r
numBytesWritten += copyLength;\r
\r
- if(BlockOffset == blockLength) {\r
+ if(BlockOffset == blockLength)\r
FlushBlock();\r
- }\r
}\r
\r
return numBytesWritten;\r