// ***************************************************************************\r
-// BGZF.cpp (c) 2009 Derek Barnett, Michael Strömberg\r
+// BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
CompressedBlock = new char[CompressedBlockSize];\r
UncompressedBlock = new char[UncompressedBlockSize];\r
} catch( std::bad_alloc& ba ) {\r
- printf("ERROR: Unable to allocate memory for our BGZF object.\n");\r
+ printf("BGZF ERROR: unable to allocate memory for our BGZF object.\n");\r
exit(1);\r
}\r
}\r
\r
// destructor\r
BgzfData::~BgzfData(void) {\r
- if(CompressedBlock) { delete[] CompressedBlock; }\r
- if(UncompressedBlock) { delete[] UncompressedBlock; }\r
+ if( CompressedBlock ) { delete[] CompressedBlock; }\r
+ if( UncompressedBlock ) { delete[] UncompressedBlock; }\r
}\r
\r
// closes BGZF file\r
void BgzfData::Close(void) {\r
\r
// skip if file not open, otherwise set flag\r
- if (!IsOpen) { return; }\r
+ if ( !IsOpen ) return;\r
IsOpen = false;\r
\r
// flush the current BGZF block\r
- if (IsWriteOnly) { FlushBlock(); }\r
+ if ( IsWriteOnly ) FlushBlock();\r
\r
- // write an empty block (as EOF marker)\r
- int blockLength = DeflateBlock();\r
- fwrite(CompressedBlock, 1, blockLength, Stream);\r
- \r
+ // write an empty block (as EOF marker)\r
+ int blockLength = DeflateBlock();\r
+ fwrite(CompressedBlock, 1, blockLength, Stream);\r
+ \r
// flush and close\r
fflush(Stream);\r
fclose(Stream);\r
// loop to retry for blocks that do not compress enough\r
int inputLength = BlockOffset;\r
int compressedLength = 0;\r
- unsigned int bufferSize = CompressedBlockSize;\r
+ unsigned int bufferSize = CompressedBlockSize;\r
\r
while(true) {\r
- \r
- // initialize zstream values\r
+ \r
+ // initialize zstream values\r
z_stream zs;\r
zs.zalloc = NULL;\r
zs.zfree = NULL;\r
zs.avail_in = inputLength;\r
zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];\r
zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;\r
- \r
+\r
// initialize the zlib compression algorithm\r
if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {\r
- printf("ERROR: zlib deflate initialization failed.\n");\r
+ printf("BGZF ERROR: zlib deflate initialization failed.\n");\r
exit(1);\r
}\r
\r
if(status == Z_OK) {\r
inputLength -= 1024;\r
if(inputLength < 0) {\r
- printf("ERROR: input reduction failed.\n");\r
+ printf("BGZF ERROR: input reduction failed.\n");\r
exit(1);\r
}\r
continue;\r
}\r
\r
- printf("ERROR: zlib deflate failed.\n");\r
+ printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
// finalize the compression routine\r
if(deflateEnd(&zs) != Z_OK) {\r
- printf("ERROR: deflate end failed.\n");\r
+ printf("BGZF ERROR: zlib::deflateEnd() failed.\n");\r
exit(1);\r
}\r
\r
compressedLength = zs.total_out;\r
compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;\r
-\r
if(compressedLength > MAX_BLOCK_SIZE) {\r
- printf("ERROR: deflate overflow.\n");\r
+ printf("BGZF ERROR: deflate overflow.\n");\r
exit(1);\r
}\r
\r
int remaining = BlockOffset - inputLength;\r
if(remaining > 0) {\r
if(remaining > inputLength) {\r
- printf("ERROR: remainder too large.\n");\r
+ printf("BGZF ERROR: after deflate, remainder too large.\n");\r
exit(1);\r
}\r
memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);\r
int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);\r
\r
if(numBytesWritten != blockLength) {\r
- printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
- exit(1);\r
- }\r
- \r
+ printf("BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);\r
+ exit(1);\r
+ }\r
+ \r
BlockAddress += blockLength;\r
}\r
}\r
\r
int status = inflateInit2(&zs, GZIP_WINDOW_BITS);\r
if (status != Z_OK) {\r
- printf("inflateInit failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");\r
+ return -1;\r
}\r
\r
status = inflate(&zs, Z_FINISH);\r
if (status != Z_STREAM_END) {\r
inflateEnd(&zs);\r
- printf("inflate failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflate() failed\n");\r
+ return -1;\r
}\r
\r
status = inflateEnd(&zs);\r
if (status != Z_OK) {\r
- printf("inflateEnd failed\n");\r
- exit(1);\r
+ printf("BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");\r
+ return -1;\r
}\r
\r
return zs.total_out;\r
}\r
\r
-void BgzfData::Open(const string& filename, const char* mode) {\r
+// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)\r
+bool BgzfData::Open(const string& filename, const char* mode) {\r
\r
// determine open mode\r
if ( strcmp(mode, "rb") == 0 ) {\r
} else if ( strcmp(mode, "wb") == 0) {\r
IsWriteOnly = true;\r
} else {\r
- printf("ERROR: Unknown file mode: %s\n", mode);\r
- exit(1);\r
+ printf("BGZF ERROR: unknown file mode: %s\n", mode);\r
+ return false; \r
+ }\r
+\r
+ // open Stream to read to/write from file, stdin, or stdout\r
+ // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
+ if ( (filename != "stdin") && (filename != "stdout") ) {\r
+ // read/write BGZF data to/from a file\r
+ Stream = fopen(filename.c_str(), mode);\r
+ }\r
+ else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
+ // read BGZF data from stdin\r
+ Stream = freopen(NULL, mode, stdin);\r
+ }\r
+ else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
+ // write BGZF data to stdout\r
+ Stream = freopen(NULL, mode, stdout);\r
}\r
\r
- // open Stream to read to/write from file, stdin, or stdout\r
- // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)\r
- if ( (filename != "stdin") && (filename != "stdout") ) {\r
- // read/wrtie BGZF data to/from a file\r
- Stream = fopen(filename.c_str(), mode);\r
- }\r
- else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { \r
- // read BGZF data from stdin\r
- Stream = freopen(NULL, mode, stdin);\r
- }\r
- else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { \r
- // write BGZF data to stdout\r
- Stream = freopen(NULL, mode, stdout);\r
- }\r
- \r
if(!Stream) {\r
- printf("ERROR: Unable to open the BAM file %s\n", filename.c_str() );\r
- exit(1);\r
+ printf("BGZF ERROR: unable to open file %s\n", filename.c_str() );\r
+ return false;\r
}\r
+ \r
+ // set flag, return success\r
IsOpen = true;\r
+ return true;\r
}\r
\r
+// reads BGZF data into a byte buffer\r
int BgzfData::Read(char* data, const unsigned int dataLength) {\r
\r
- if (dataLength == 0) { return 0; }\r
+ if (dataLength == 0) return 0;\r
\r
char* output = data;\r
unsigned int numBytesRead = 0;\r
while (numBytesRead < dataLength) {\r
\r
int bytesAvailable = BlockLength - BlockOffset;\r
- if (bytesAvailable <= 0) {\r
- if ( ReadBlock() != 0 ) { return -1; }\r
+ if ( bytesAvailable <= 0 ) {\r
+ if (!ReadBlock()) return -1; \r
bytesAvailable = BlockLength - BlockOffset;\r
- if ( bytesAvailable <= 0 ) { break; }\r
+ if (bytesAvailable <= 0) break;\r
}\r
\r
char* buffer = UncompressedBlock;\r
return numBytesRead;\r
}\r
\r
-int BgzfData::ReadBlock(void) {\r
+// reads a BGZF block\r
+bool BgzfData::ReadBlock(void) {\r
\r
char header[BLOCK_HEADER_LENGTH];\r
int64_t blockAddress = ftell(Stream);\r
int count = fread(header, 1, sizeof(header), Stream);\r
if (count == 0) {\r
BlockLength = 0;\r
- return 0;\r
+ return true;\r
}\r
\r
if (count != sizeof(header)) {\r
- printf("read block failed - count != sizeof(header)\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - could not read block header\n");\r
+ return false;\r
}\r
\r
if (!BgzfData::CheckBlockHeader(header)) {\r
- printf("read block failed - CheckBlockHeader() returned false\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - invalid block header\n");\r
+ return false;\r
}\r
\r
int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;\r
\r
count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);\r
if (count != remaining) {\r
- printf("read block failed - count != remaining\n");\r
- return -1;\r
+ printf("BGZF ERROR: read block failed - could not read data from block\n");\r
+ return false;\r
}\r
\r
count = InflateBlock(blockLength);\r
- if (count < 0) { return -1; }\r
+ if (count < 0) { \r
+ printf("BGZF ERROR: read block failed - could not decompress block data\n");\r
+ return false;\r
+ }\r
\r
- if ( BlockLength != 0 ) {\r
+ if ( BlockLength != 0 )\r
BlockOffset = 0;\r
- }\r
\r
BlockAddress = blockAddress;\r
BlockLength = count;\r
- return 0;\r
+ return true;\r
}\r
\r
+// seek to position in BGZF file\r
bool BgzfData::Seek(int64_t position) {\r
\r
int blockOffset = (position & 0xFFFF);\r
int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\r
\r
if (fseek(Stream, blockAddress, SEEK_SET) != 0) {\r
- printf("ERROR: Unable to seek in BAM file\n");\r
- exit(1);\r
+ printf("BGZF ERROR: unable to seek in file\n");\r
+ return false;\r
}\r
\r
BlockLength = 0;\r
return true;\r
}\r
\r
+// get file position in BGZF file\r
int64_t BgzfData::Tell(void) {\r
return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\r
}\r
\r
// copy the data to the buffer\r
while(numBytesWritten < dataLen) {\r
+ \r
unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\r
char* buffer = UncompressedBlock;\r
memcpy(buffer + BlockOffset, input, copyLength);\r
input += copyLength;\r
numBytesWritten += copyLength;\r
\r
- if(BlockOffset == blockLength) {\r
+ if(BlockOffset == blockLength)\r
FlushBlock();\r
- }\r
}\r
\r
return numBytesWritten;\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 11 January 2010 (DB)\r
+// Last modified: 22 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
// Platform-specific type definitions\r
#ifndef BAMTOOLS_TYPES\r
#define BAMTOOLS_TYPES\r
- #ifdef _MSC_VER\r
- typedef char int8_t;\r
- typedef unsigned char uint8_t;\r
- typedef short int16_t;\r
- typedef unsigned short uint16_t;\r
- typedef int int32_t;\r
- typedef unsigned int uint32_t;\r
- typedef long long int64_t;\r
- typedef unsigned long long uint64_t;\r
- #else\r
- #include <stdint.h>\r
- #endif\r
+ #ifdef _MSC_VER\r
+ typedef char int8_t;\r
+ typedef unsigned char uint8_t;\r
+ typedef short int16_t;\r
+ typedef unsigned short uint16_t;\r
+ typedef int int32_t;\r
+ typedef unsigned int uint32_t;\r
+ typedef long long int64_t;\r
+ typedef unsigned long long uint64_t;\r
+ #else \r
+ #include <stdint.h>\r
+ #endif\r
#endif // BAMTOOLS_TYPES\r
\r
namespace BamTools {\r
\r
struct BgzfData {\r
\r
+ // ---------------------------------\r
// data members\r
+ \r
unsigned int UncompressedBlockSize;\r
unsigned int CompressedBlockSize;\r
unsigned int BlockLength;\r
char* UncompressedBlock;\r
char* CompressedBlock;\r
\r
+ // ---------------------------------\r
// constructor & destructor\r
+ \r
BgzfData(void);\r
~BgzfData(void);\r
\r
+ // ---------------------------------\r
+ // main interface methods\r
+ \r
// closes BGZF file\r
void Close(void);\r
- // compresses the current block\r
- int DeflateBlock(void);\r
- // flushes the data in the BGZF block\r
- void FlushBlock(void);\r
- // de-compresses the current block\r
- int InflateBlock(const int& blockLength);\r
// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing\r
- void Open(const std::string& filename, const char* mode);\r
+ bool Open(const std::string& filename, const char* mode);\r
// reads BGZF data into a byte buffer\r
int Read(char* data, const unsigned int dataLength);\r
- // reads BGZF block\r
- int ReadBlock(void);\r
- // seek to position in BAM file\r
+ // seek to position in BGZF file\r
bool Seek(int64_t position);\r
- // get file position in BAM file\r
+ // get file position in BGZF file\r
int64_t Tell(void);\r
// writes the supplied data into the BGZF buffer\r
unsigned int Write(const char* data, const unsigned int dataLen);\r
\r
+ // ---------------------------------\r
+ // internal methods\r
+ \r
+ // compresses the current block\r
+ int DeflateBlock(void);\r
+ // flushes the data in the BGZF block\r
+ void FlushBlock(void);\r
+ // de-compresses the current block\r
+ int InflateBlock(const int& blockLength);\r
+ // reads a BGZF block\r
+ bool ReadBlock(void);\r
+ \r
+ // ---------------------------------\r
+ // static 'utility' methods\r
+ \r
// checks BGZF block header\r
static inline bool CheckBlockHeader(char* header);\r
// packs an unsigned integer into the specified buffer\r
static inline void PackUnsignedInt(char* buffer, unsigned int value);\r
// packs an unsigned short into the specified buffer\r
static inline void PackUnsignedShort(char* buffer, unsigned short value);\r
- \r
- // unpacks a buffer into a signed int\r
- static inline signed int UnpackSignedInt(char* buffer);\r
- // unpacks a buffer into an unsigned int\r
- static inline unsigned int UnpackUnsignedInt(char* buffer);\r
- // unpacks a buffer into a signed short\r
- static inline signed short UnpackSignedShort(char* buffer);\r
- // unpacks a buffer into an unsigned short\r
- static inline unsigned short UnpackUnsignedShort(char* buffer);\r
// unpacks a buffer into a double\r
static inline double UnpackDouble(char* buffer);\r
+ static inline double UnpackDouble(const char* buffer);\r
// unpacks a buffer into a float\r
- static inline float UnpackFloat(char* buffer); \r
- \r
+ static inline float UnpackFloat(char* buffer);\r
+ static inline float UnpackFloat(const char* buffer);\r
// unpacks a buffer into a signed int\r
+ static inline signed int UnpackSignedInt(char* buffer);\r
static inline signed int UnpackSignedInt(const char* buffer);\r
- // unpacks a buffer into an unsigned int\r
- static inline unsigned int UnpackUnsignedInt(const char* buffer);\r
// unpacks a buffer into a signed short\r
+ static inline signed short UnpackSignedShort(char* buffer);\r
static inline signed short UnpackSignedShort(const char* buffer);\r
+ // unpacks a buffer into an unsigned int\r
+ static inline unsigned int UnpackUnsignedInt(char* buffer);\r
+ static inline unsigned int UnpackUnsignedInt(const char* buffer);\r
// unpacks a buffer into an unsigned short\r
+ static inline unsigned short UnpackUnsignedShort(char* buffer);\r
static inline unsigned short UnpackUnsignedShort(const char* buffer);\r
- // unpacks a buffer into a double\r
- static inline double UnpackDouble(const char* buffer);\r
- // unpacks a buffer into a float\r
- static inline float UnpackFloat(const char* buffer); \r
};\r
\r
// -------------------------------------------------------------\r
+// static 'utility' method implementations\r
\r
+// checks BGZF block header\r
inline\r
bool BgzfData::CheckBlockHeader(char* header) {\r
return (header[0] == GZIP_ID1 &&\r
buffer[1] = (char)(value >> 8);\r
}\r
\r
-// 'unpacks' a buffer into a signed int\r
+// 'unpacks' a buffer into a double (includes both non-const & const char* flavors)\r
inline\r
-signed int BgzfData::UnpackSignedInt(char* buffer) {\r
- union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
+double BgzfData::UnpackDouble(char* buffer) {\r
+ union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
un.valueBuffer[2] = buffer[2];\r
un.valueBuffer[3] = buffer[3];\r
+ un.valueBuffer[4] = buffer[4];\r
+ un.valueBuffer[5] = buffer[5];\r
+ un.valueBuffer[6] = buffer[6];\r
+ un.valueBuffer[7] = buffer[7];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into an unsigned int\r
inline\r
-unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {\r
- union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
+double BgzfData::UnpackDouble(const char* buffer) {\r
+ union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
un.valueBuffer[2] = buffer[2];\r
un.valueBuffer[3] = buffer[3];\r
+ un.valueBuffer[4] = buffer[4];\r
+ un.valueBuffer[5] = buffer[5];\r
+ un.valueBuffer[6] = buffer[6];\r
+ un.valueBuffer[7] = buffer[7];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a signed short\r
-inline\r
-signed short BgzfData::UnpackSignedShort(char* buffer) {\r
- union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
- un.value = 0;\r
- un.valueBuffer[0] = buffer[0];\r
- un.valueBuffer[1] = buffer[1];\r
- return un.value;\r
-}\r
-\r
-// 'unpacks' a buffer into an unsigned short\r
+// 'unpacks' a buffer into a float (includes both non-const & const char* flavors)\r
inline\r
-unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {\r
- union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
+float BgzfData::UnpackFloat(char* buffer) {\r
+ union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a double\r
inline\r
-double BgzfData::UnpackDouble(char* buffer) {\r
- union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
+float BgzfData::UnpackFloat(const char* buffer) {\r
+ union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
un.valueBuffer[2] = buffer[2];\r
un.valueBuffer[3] = buffer[3];\r
- un.valueBuffer[4] = buffer[4];\r
- un.valueBuffer[5] = buffer[5];\r
- un.valueBuffer[6] = buffer[6];\r
- un.valueBuffer[7] = buffer[7];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a float\r
+// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors)\r
inline\r
-float BgzfData::UnpackFloat(char* buffer) {\r
- union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
+signed int BgzfData::UnpackSignedInt(char* buffer) {\r
+ union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
return un.value;\r
}\r
\r
-// ---------\r
-\r
-// 'unpacks' a buffer into a signed int\r
inline\r
signed int BgzfData::UnpackSignedInt(const char* buffer) {\r
union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into an unsigned int\r
+// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors)\r
inline\r
-unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {\r
- union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
+signed short BgzfData::UnpackSignedShort(char* buffer) {\r
+ union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
- un.valueBuffer[2] = buffer[2];\r
- un.valueBuffer[3] = buffer[3];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a signed short\r
inline\r
signed short BgzfData::UnpackSignedShort(const char* buffer) {\r
union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into an unsigned short\r
+// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors)\r
inline\r
-unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {\r
- union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
+unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {\r
+ union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a double\r
inline\r
-double BgzfData::UnpackDouble(const char* buffer) {\r
- union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
+unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {\r
+ union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
un.valueBuffer[2] = buffer[2];\r
un.valueBuffer[3] = buffer[3];\r
- un.valueBuffer[4] = buffer[4];\r
- un.valueBuffer[5] = buffer[5];\r
- un.valueBuffer[6] = buffer[6];\r
- un.valueBuffer[7] = buffer[7];\r
return un.value;\r
}\r
\r
-// 'unpacks' a buffer into a float\r
+// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors)\r
inline\r
-float BgzfData::UnpackFloat(const char* buffer) {\r
- union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
+unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {\r
+ union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ return un.value;\r
+}\r
+\r
+inline\r
+unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {\r
+ union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
un.value = 0;\r
un.valueBuffer[0] = buffer[0];\r
un.valueBuffer[1] = buffer[1];\r
- un.valueBuffer[2] = buffer[2];\r
- un.valueBuffer[3] = buffer[3];\r
return un.value;\r
}\r
\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 17 June 2010 (DB)\r
+// Last modified: 22 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
// file operations\r
void Close(void);\r
bool Jump(int refID, int position = 0);\r
- void Open(const string& filename, const string& indexFilename = "");\r
+ bool Open(const string& filename, const string& indexFilename = "");\r
bool Rewind(void);\r
bool SetRegion(const BamRegion& region);\r
\r
d->IsRightBoundSpecified = false;\r
return d->Jump(refID, position); \r
}\r
-void BamReader::Open(const string& filename, const string& indexFilename) { d->Open(filename, indexFilename); }\r
+bool BamReader::Open(const string& filename, const string& indexFilename) { return d->Open(filename, indexFilename); }\r
bool BamReader::Rewind(void) { return d->Rewind(); }\r
bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); }\r
bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) {\r
const char* allCharData = bAlignment.SupportData.AllCharData.data();\r
const char* seqData = ((const char*)allCharData) + seqDataOffset;\r
const char* qualData = ((const char*)allCharData) + qualDataOffset;\r
- char* tagData = ((char*)allCharData) + tagDataOffset;\r
+ char* tagData = ((char*)allCharData) + tagDataOffset;\r
\r
// save query sequence\r
bAlignment.QueryBases.clear();\r
bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);\r
\r
// save CigarOps for BamAlignment\r
+ CigarOp op;\r
bAlignment.CigarData.clear();\r
+ bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations);\r
for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {\r
\r
// swap if necessary\r
if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }\r
\r
// build CigarOp structure\r
- CigarOp op;\r
op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);\r
op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];\r
\r
}\r
\r
// opens BAM file (and index)\r
-void BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) {\r
+bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) {\r
\r
Filename = filename;\r
IndexFilename = indexFilename;\r
\r
- // open the BGZF file for reading, retrieve header text & reference data\r
- mBGZF.Open(filename, "rb");\r
+ // open the BGZF file for reading, return false on failure\r
+ if ( !mBGZF.Open(filename, "rb") ) \r
+ return false;\r
+ \r
+ // retrieve header text & reference data\r
LoadHeaderData();\r
LoadReferenceData();\r
\r
if ( !IndexFilename.empty() ) {\r
LoadIndex();\r
}\r
+ \r
+ // return success\r
+ return true;\r
}\r
\r
// returns BAM file pointer to beginning of alignment data\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 16 June 2010 (DB)\r
+// Last modified: 22 June 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
// performs random-access jump to reference, position\r
bool Jump(int refID, int position = 0);\r
// opens BAM file (and optional BAM index file, if provided)\r
- void Open(const std::string& filename, const std::string& indexFilename = "");\r
+ bool Open(const std::string& filename, const std::string& indexFilename = "");\r
// returns file pointer to beginning of alignments\r
bool Rewind(void);\r
// sets a region of interest (with left & right bound reference/position)\r
\r
unsigned int cigarOp;\r
vector<CigarOp>::const_iterator coIter;\r
- for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); coIter++) {\r
+ for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); ++coIter) {\r
\r
switch(coIter->Type) {\r
case 'M':\r
\r
// write the SAM header text length\r
uint32_t samHeaderLen = samHeader.size();\r
- if ( IsBigEndian ) { SwapEndian_32(samHeaderLen); }\r
+ if (IsBigEndian) SwapEndian_32(samHeaderLen);\r
mBGZF.Write((char*)&samHeaderLen, BT_SIZEOF_INT);\r
\r
// write the SAM header text\r
- if(samHeaderLen > 0) {\r
+ if(samHeaderLen > 0) \r
mBGZF.Write(samHeader.data(), samHeaderLen);\r
- }\r
\r
// write the number of reference sequences\r
uint32_t numReferenceSequences = referenceSequences.size();\r
- if ( IsBigEndian ) { SwapEndian_32(numReferenceSequences); }\r
+ if (IsBigEndian) SwapEndian_32(numReferenceSequences);\r
mBGZF.Write((char*)&numReferenceSequences, BT_SIZEOF_INT);\r
\r
// =============================\r
\r
// write the reference sequence name length\r
uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;\r
- if ( IsBigEndian ) { SwapEndian_32(referenceSequenceNameLen); }\r
+ if (IsBigEndian) SwapEndian_32(referenceSequenceNameLen);\r
mBGZF.Write((char*)&referenceSequenceNameLen, BT_SIZEOF_INT);\r
\r
// write the reference sequence name\r
\r
// write the reference sequence length\r
int32_t referenceLength = rsIter->RefLength;\r
- if ( IsBigEndian ) { SwapEndian_32(referenceLength); }\r
+ if (IsBigEndian) SwapEndian_32(referenceLength);\r
mBGZF.Write((char*)&referenceLength, BT_SIZEOF_INT);\r
}\r
}\r
\r
// write the block size\r
unsigned int blockSize = al.SupportData.BlockLength;\r
- if ( IsBigEndian ) { SwapEndian_32(blockSize); }\r
+ if (IsBigEndian) SwapEndian_32(blockSize);\r
mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);\r
\r
// swap BAM core endian-ness, if necessary\r
if ( IsBigEndian ) { \r
- for ( int i = 0; i < 8; ++i ) { \r
+ for ( int i = 0; i < 8; ++i )\r
SwapEndian_32(buffer[i]); \r
- } \r
}\r
\r
// write the BAM core\r
else {\r
\r
// initialize\r
- const unsigned int nameLen = al.Name.size() + 1;\r
- const unsigned int queryLen = al.QueryBases.size();\r
- const unsigned int tagDataLength = al.TagData.size();\r
+ const unsigned int nameLen = al.Name.size() + 1;\r
+ const unsigned int queryLen = al.QueryBases.size();\r
+ const unsigned int tagDataLength = al.TagData.size();\r
\r
// create our packed cigar string\r
string packedCigar;\r
memcpy(cigarData, packedCigar.data(), packedCigarLen);\r
\r
for (unsigned int i = 0; i < packedCigarLen; ++i) {\r
- if ( IsBigEndian ) { \r
+ if ( IsBigEndian )\r
SwapEndian_32p(&cigarData[i]); \r
- }\r
}\r
\r
mBGZF.Write(cigarData, packedCigarLen);\r
- free(cigarData);\r
- \r
- } else { \r
+ free(cigarData); \r
+ } \r
+ else \r
mBGZF.Write(packedCigar.data(), packedCigarLen);\r
- }\r
\r
// write the encoded query sequence\r
mBGZF.Write(encodedQuery.data(), encodedQueryLen);\r
\r
// write the base qualities\r
- string baseQualities = al.Qualities;\r
+ string baseQualities(al.Qualities);\r
char* pBaseQualities = (char*)al.Qualities.data();\r
- for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }\r
+ for(unsigned int i = 0; i < queryLen; i++) { \r
+ pBaseQualities[i] -= 33; \r
+ }\r
mBGZF.Write(pBaseQualities, queryLen);\r
\r
// write the read group tag\r
\r
mBGZF.Write(tagData, tagDataLength);\r
free(tagData);\r
- } else {\r
- mBGZF.Write(al.TagData.data(), tagDataLength);\r
- } \r
+ } \r
+ else \r
+ mBGZF.Write(al.TagData.data(), tagDataLength); \r
}\r
}\r