From: Derek Date: Mon, 28 Jun 2010 19:10:28 +0000 (-0400) Subject: Modified BamReader(and BGZF)::Open() to return bool. Tried to eliminate most exit... X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=5580b247bf3ef7b0cd1afd3aa16dc15cd7523636;p=bamtools.git Modified BamReader(and BGZF)::Open() to return bool. Tried to eliminate most exit() calls. These changes should allow for more graceful error handling. Some 'code cleanup' in BW, but no logic changes. --- diff --git a/BGZF.cpp b/BGZF.cpp index 225ddf0..5aeb26a 100644 --- a/BGZF.cpp +++ b/BGZF.cpp @@ -1,5 +1,5 @@ // *************************************************************************** -// BGZF.cpp (c) 2009 Derek Barnett, Michael Strömberg +// BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- @@ -33,31 +33,31 @@ BgzfData::BgzfData(void) CompressedBlock = new char[CompressedBlockSize]; UncompressedBlock = new char[UncompressedBlockSize]; } catch( std::bad_alloc& ba ) { - printf("ERROR: Unable to allocate memory for our BGZF object.\n"); + printf("BGZF ERROR: unable to allocate memory for our BGZF object.\n"); exit(1); } } // destructor BgzfData::~BgzfData(void) { - if(CompressedBlock) { delete[] CompressedBlock; } - if(UncompressedBlock) { delete[] UncompressedBlock; } + if( CompressedBlock ) { delete[] CompressedBlock; } + if( UncompressedBlock ) { delete[] UncompressedBlock; } } // closes BGZF file void BgzfData::Close(void) { // skip if file not open, otherwise set flag - if (!IsOpen) { return; } + if ( !IsOpen ) return; IsOpen = false; // flush the current BGZF block - if (IsWriteOnly) { FlushBlock(); } + if ( IsWriteOnly ) FlushBlock(); - // write an empty block (as EOF marker) - int blockLength = DeflateBlock(); - fwrite(CompressedBlock, 1, blockLength, Stream); - + // write an empty block (as EOF marker) + int blockLength = DeflateBlock(); + fwrite(CompressedBlock, 1, blockLength, Stream); + // flush and close fflush(Stream); fclose(Stream); @@ -82,11 +82,11 @@ int BgzfData::DeflateBlock(void) { // loop to retry for blocks that do not compress enough int inputLength = BlockOffset; int compressedLength = 0; - unsigned int bufferSize = CompressedBlockSize; + unsigned int bufferSize = CompressedBlockSize; while(true) { - - // initialize zstream values + + // initialize zstream values z_stream zs; zs.zalloc = NULL; zs.zfree = NULL; @@ -94,10 +94,10 @@ int BgzfData::DeflateBlock(void) { zs.avail_in = inputLength; zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH]; zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; - + // initialize the zlib compression algorithm if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) { - printf("ERROR: zlib deflate initialization failed.\n"); + printf("BGZF ERROR: zlib deflate initialization failed.\n"); exit(1); } @@ -111,27 +111,26 @@ int BgzfData::DeflateBlock(void) { if(status == Z_OK) { inputLength -= 1024; if(inputLength < 0) { - printf("ERROR: input reduction failed.\n"); + printf("BGZF ERROR: input reduction failed.\n"); exit(1); } continue; } - printf("ERROR: zlib deflate failed.\n"); + printf("BGZF ERROR: zlib::deflateEnd() failed.\n"); exit(1); } // finalize the compression routine if(deflateEnd(&zs) != Z_OK) { - printf("ERROR: deflate end failed.\n"); + printf("BGZF ERROR: zlib::deflateEnd() failed.\n"); exit(1); } compressedLength = zs.total_out; compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - if(compressedLength > MAX_BLOCK_SIZE) { - printf("ERROR: deflate overflow.\n"); + printf("BGZF ERROR: deflate overflow.\n"); exit(1); } @@ -151,7 +150,7 @@ int BgzfData::DeflateBlock(void) { int remaining = BlockOffset - inputLength; if(remaining > 0) { if(remaining > inputLength) { - printf("ERROR: remainder too large.\n"); + printf("BGZF ERROR: after deflate, remainder too large.\n"); exit(1); } memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining); @@ -174,10 +173,10 @@ void BgzfData::FlushBlock(void) { int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream); if(numBytesWritten != blockLength) { - printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten); - exit(1); - } - + printf("BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten); + exit(1); + } + BlockAddress += blockLength; } } @@ -196,27 +195,28 @@ int BgzfData::InflateBlock(const int& blockLength) { int status = inflateInit2(&zs, GZIP_WINDOW_BITS); if (status != Z_OK) { - printf("inflateInit failed\n"); - exit(1); + printf("BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n"); + return -1; } status = inflate(&zs, Z_FINISH); if (status != Z_STREAM_END) { inflateEnd(&zs); - printf("inflate failed\n"); - exit(1); + printf("BGZF ERROR: could not decompress block - zlib::inflate() failed\n"); + return -1; } status = inflateEnd(&zs); if (status != Z_OK) { - printf("inflateEnd failed\n"); - exit(1); + printf("BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n"); + return -1; } return zs.total_out; } -void BgzfData::Open(const string& filename, const char* mode) { +// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing) +bool BgzfData::Open(const string& filename, const char* mode) { // determine open mode if ( strcmp(mode, "rb") == 0 ) { @@ -224,45 +224,49 @@ void BgzfData::Open(const string& filename, const char* mode) { } else if ( strcmp(mode, "wb") == 0) { IsWriteOnly = true; } else { - printf("ERROR: Unknown file mode: %s\n", mode); - exit(1); + printf("BGZF ERROR: unknown file mode: %s\n", mode); + return false; + } + + // open Stream to read to/write from file, stdin, or stdout + // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03) + if ( (filename != "stdin") && (filename != "stdout") ) { + // read/write BGZF data to/from a file + Stream = fopen(filename.c_str(), mode); + } + else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { + // read BGZF data from stdin + Stream = freopen(NULL, mode, stdin); + } + else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { + // write BGZF data to stdout + Stream = freopen(NULL, mode, stdout); } - // open Stream to read to/write from file, stdin, or stdout - // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03) - if ( (filename != "stdin") && (filename != "stdout") ) { - // read/wrtie BGZF data to/from a file - Stream = fopen(filename.c_str(), mode); - } - else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { - // read BGZF data from stdin - Stream = freopen(NULL, mode, stdin); - } - else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { - // write BGZF data to stdout - Stream = freopen(NULL, mode, stdout); - } - if(!Stream) { - printf("ERROR: Unable to open the BAM file %s\n", filename.c_str() ); - exit(1); + printf("BGZF ERROR: unable to open file %s\n", filename.c_str() ); + return false; } + + // set flag, return success IsOpen = true; + return true; } +// reads BGZF data into a byte buffer int BgzfData::Read(char* data, const unsigned int dataLength) { - if (dataLength == 0) { return 0; } + if (dataLength == 0) return 0; char* output = data; unsigned int numBytesRead = 0; while (numBytesRead < dataLength) { int bytesAvailable = BlockLength - BlockOffset; - if (bytesAvailable <= 0) { - if ( ReadBlock() != 0 ) { return -1; } + if ( bytesAvailable <= 0 ) { + if (!ReadBlock()) return -1; bytesAvailable = BlockLength - BlockOffset; - if ( bytesAvailable <= 0 ) { break; } + if (bytesAvailable <= 0) break; } char* buffer = UncompressedBlock; @@ -283,7 +287,8 @@ int BgzfData::Read(char* data, const unsigned int dataLength) { return numBytesRead; } -int BgzfData::ReadBlock(void) { +// reads a BGZF block +bool BgzfData::ReadBlock(void) { char header[BLOCK_HEADER_LENGTH]; int64_t blockAddress = ftell(Stream); @@ -291,17 +296,17 @@ int BgzfData::ReadBlock(void) { int count = fread(header, 1, sizeof(header), Stream); if (count == 0) { BlockLength = 0; - return 0; + return true; } if (count != sizeof(header)) { - printf("read block failed - count != sizeof(header)\n"); - return -1; + printf("BGZF ERROR: read block failed - could not read block header\n"); + return false; } if (!BgzfData::CheckBlockHeader(header)) { - printf("read block failed - CheckBlockHeader() returned false\n"); - return -1; + printf("BGZF ERROR: read block failed - invalid block header\n"); + return false; } int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1; @@ -311,30 +316,33 @@ int BgzfData::ReadBlock(void) { count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream); if (count != remaining) { - printf("read block failed - count != remaining\n"); - return -1; + printf("BGZF ERROR: read block failed - could not read data from block\n"); + return false; } count = InflateBlock(blockLength); - if (count < 0) { return -1; } + if (count < 0) { + printf("BGZF ERROR: read block failed - could not decompress block data\n"); + return false; + } - if ( BlockLength != 0 ) { + if ( BlockLength != 0 ) BlockOffset = 0; - } BlockAddress = blockAddress; BlockLength = count; - return 0; + return true; } +// seek to position in BGZF file bool BgzfData::Seek(int64_t position) { int blockOffset = (position & 0xFFFF); int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL; if (fseek(Stream, blockAddress, SEEK_SET) != 0) { - printf("ERROR: Unable to seek in BAM file\n"); - exit(1); + printf("BGZF ERROR: unable to seek in file\n"); + return false; } BlockLength = 0; @@ -343,6 +351,7 @@ bool BgzfData::Seek(int64_t position) { return true; } +// get file position in BGZF file int64_t BgzfData::Tell(void) { return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) ); } @@ -357,6 +366,7 @@ unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) { // copy the data to the buffer while(numBytesWritten < dataLen) { + unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten); char* buffer = UncompressedBlock; memcpy(buffer + BlockOffset, input, copyLength); @@ -365,9 +375,8 @@ unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) { input += copyLength; numBytesWritten += copyLength; - if(BlockOffset == blockLength) { + if(BlockOffset == blockLength) FlushBlock(); - } } return numBytesWritten; diff --git a/BGZF.h b/BGZF.h index 0cca24c..29d6d14 100644 --- a/BGZF.h +++ b/BGZF.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 11 January 2010 (DB) +// Last modified: 22 June 2010 (DB) // --------------------------------------------------------------------------- // BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -28,18 +28,18 @@ // Platform-specific type definitions #ifndef BAMTOOLS_TYPES #define BAMTOOLS_TYPES - #ifdef _MSC_VER - typedef char int8_t; - typedef unsigned char uint8_t; - typedef short int16_t; - typedef unsigned short uint16_t; - typedef int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; - #else - #include - #endif + #ifdef _MSC_VER + typedef char int8_t; + typedef unsigned char uint8_t; + typedef short int16_t; + typedef unsigned short uint16_t; + typedef int int32_t; + typedef unsigned int uint32_t; + typedef long long int64_t; + typedef unsigned long long uint64_t; + #else + #include + #endif #endif // BAMTOOLS_TYPES namespace BamTools { @@ -65,7 +65,9 @@ const int DEFAULT_BLOCK_SIZE = 65536; struct BgzfData { + // --------------------------------- // data members + unsigned int UncompressedBlockSize; unsigned int CompressedBlockSize; unsigned int BlockLength; @@ -77,67 +79,73 @@ struct BgzfData { char* UncompressedBlock; char* CompressedBlock; + // --------------------------------- // constructor & destructor + BgzfData(void); ~BgzfData(void); + // --------------------------------- + // main interface methods + // closes BGZF file void Close(void); - // compresses the current block - int DeflateBlock(void); - // flushes the data in the BGZF block - void FlushBlock(void); - // de-compresses the current block - int InflateBlock(const int& blockLength); // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing - void Open(const std::string& filename, const char* mode); + bool Open(const std::string& filename, const char* mode); // reads BGZF data into a byte buffer int Read(char* data, const unsigned int dataLength); - // reads BGZF block - int ReadBlock(void); - // seek to position in BAM file + // seek to position in BGZF file bool Seek(int64_t position); - // get file position in BAM file + // get file position in BGZF file int64_t Tell(void); // writes the supplied data into the BGZF buffer unsigned int Write(const char* data, const unsigned int dataLen); + // --------------------------------- + // internal methods + + // compresses the current block + int DeflateBlock(void); + // flushes the data in the BGZF block + void FlushBlock(void); + // de-compresses the current block + int InflateBlock(const int& blockLength); + // reads a BGZF block + bool ReadBlock(void); + + // --------------------------------- + // static 'utility' methods + // checks BGZF block header static inline bool CheckBlockHeader(char* header); // packs an unsigned integer into the specified buffer static inline void PackUnsignedInt(char* buffer, unsigned int value); // packs an unsigned short into the specified buffer static inline void PackUnsignedShort(char* buffer, unsigned short value); - - // unpacks a buffer into a signed int - static inline signed int UnpackSignedInt(char* buffer); - // unpacks a buffer into an unsigned int - static inline unsigned int UnpackUnsignedInt(char* buffer); - // unpacks a buffer into a signed short - static inline signed short UnpackSignedShort(char* buffer); - // unpacks a buffer into an unsigned short - static inline unsigned short UnpackUnsignedShort(char* buffer); // unpacks a buffer into a double static inline double UnpackDouble(char* buffer); + static inline double UnpackDouble(const char* buffer); // unpacks a buffer into a float - static inline float UnpackFloat(char* buffer); - + static inline float UnpackFloat(char* buffer); + static inline float UnpackFloat(const char* buffer); // unpacks a buffer into a signed int + static inline signed int UnpackSignedInt(char* buffer); static inline signed int UnpackSignedInt(const char* buffer); - // unpacks a buffer into an unsigned int - static inline unsigned int UnpackUnsignedInt(const char* buffer); // unpacks a buffer into a signed short + static inline signed short UnpackSignedShort(char* buffer); static inline signed short UnpackSignedShort(const char* buffer); + // unpacks a buffer into an unsigned int + static inline unsigned int UnpackUnsignedInt(char* buffer); + static inline unsigned int UnpackUnsignedInt(const char* buffer); // unpacks a buffer into an unsigned short + static inline unsigned short UnpackUnsignedShort(char* buffer); static inline unsigned short UnpackUnsignedShort(const char* buffer); - // unpacks a buffer into a double - static inline double UnpackDouble(const char* buffer); - // unpacks a buffer into a float - static inline float UnpackFloat(const char* buffer); }; // ------------------------------------------------------------- +// static 'utility' method implementations +// checks BGZF block header inline bool BgzfData::CheckBlockHeader(char* header) { return (header[0] == GZIP_ID1 && @@ -166,70 +174,64 @@ void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) { buffer[1] = (char)(value >> 8); } -// 'unpacks' a buffer into a signed int +// 'unpacks' a buffer into a double (includes both non-const & const char* flavors) inline -signed int BgzfData::UnpackSignedInt(char* buffer) { - union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; +double BgzfData::UnpackDouble(char* buffer) { + union { double value; unsigned char valueBuffer[sizeof(double)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; un.valueBuffer[2] = buffer[2]; un.valueBuffer[3] = buffer[3]; + un.valueBuffer[4] = buffer[4]; + un.valueBuffer[5] = buffer[5]; + un.valueBuffer[6] = buffer[6]; + un.valueBuffer[7] = buffer[7]; return un.value; } -// 'unpacks' a buffer into an unsigned int inline -unsigned int BgzfData::UnpackUnsignedInt(char* buffer) { - union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; +double BgzfData::UnpackDouble(const char* buffer) { + union { double value; unsigned char valueBuffer[sizeof(double)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; un.valueBuffer[2] = buffer[2]; un.valueBuffer[3] = buffer[3]; + un.valueBuffer[4] = buffer[4]; + un.valueBuffer[5] = buffer[5]; + un.valueBuffer[6] = buffer[6]; + un.valueBuffer[7] = buffer[7]; return un.value; } -// 'unpacks' a buffer into a signed short -inline -signed short BgzfData::UnpackSignedShort(char* buffer) { - union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; - un.value = 0; - un.valueBuffer[0] = buffer[0]; - un.valueBuffer[1] = buffer[1]; - return un.value; -} - -// 'unpacks' a buffer into an unsigned short +// 'unpacks' a buffer into a float (includes both non-const & const char* flavors) inline -unsigned short BgzfData::UnpackUnsignedShort(char* buffer) { - union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; +float BgzfData::UnpackFloat(char* buffer) { + union { float value; unsigned char valueBuffer[sizeof(float)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; return un.value; } -// 'unpacks' a buffer into a double inline -double BgzfData::UnpackDouble(char* buffer) { - union { double value; unsigned char valueBuffer[sizeof(double)]; } un; +float BgzfData::UnpackFloat(const char* buffer) { + union { float value; unsigned char valueBuffer[sizeof(float)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; un.valueBuffer[2] = buffer[2]; un.valueBuffer[3] = buffer[3]; - un.valueBuffer[4] = buffer[4]; - un.valueBuffer[5] = buffer[5]; - un.valueBuffer[6] = buffer[6]; - un.valueBuffer[7] = buffer[7]; return un.value; } -// 'unpacks' a buffer into a float +// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors) inline -float BgzfData::UnpackFloat(char* buffer) { - union { float value; unsigned char valueBuffer[sizeof(float)]; } un; +signed int BgzfData::UnpackSignedInt(char* buffer) { + union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; @@ -238,9 +240,6 @@ float BgzfData::UnpackFloat(char* buffer) { return un.value; } -// --------- - -// 'unpacks' a buffer into a signed int inline signed int BgzfData::UnpackSignedInt(const char* buffer) { union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; @@ -252,19 +251,16 @@ signed int BgzfData::UnpackSignedInt(const char* buffer) { return un.value; } -// 'unpacks' a buffer into an unsigned int +// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors) inline -unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) { - union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; +signed short BgzfData::UnpackSignedShort(char* buffer) { + union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; - un.valueBuffer[2] = buffer[2]; - un.valueBuffer[3] = buffer[3]; return un.value; } -// 'unpacks' a buffer into a signed short inline signed short BgzfData::UnpackSignedShort(const char* buffer) { union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; @@ -274,41 +270,45 @@ signed short BgzfData::UnpackSignedShort(const char* buffer) { return un.value; } -// 'unpacks' a buffer into an unsigned short +// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors) inline -unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) { - union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; +unsigned int BgzfData::UnpackUnsignedInt(char* buffer) { + union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; return un.value; } -// 'unpacks' a buffer into a double inline -double BgzfData::UnpackDouble(const char* buffer) { - union { double value; unsigned char valueBuffer[sizeof(double)]; } un; +unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) { + union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; un.valueBuffer[2] = buffer[2]; un.valueBuffer[3] = buffer[3]; - un.valueBuffer[4] = buffer[4]; - un.valueBuffer[5] = buffer[5]; - un.valueBuffer[6] = buffer[6]; - un.valueBuffer[7] = buffer[7]; return un.value; } -// 'unpacks' a buffer into a float +// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors) inline -float BgzfData::UnpackFloat(const char* buffer) { - union { float value; unsigned char valueBuffer[sizeof(float)]; } un; +unsigned short BgzfData::UnpackUnsignedShort(char* buffer) { + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} + +inline +unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) { + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; - un.valueBuffer[2] = buffer[2]; - un.valueBuffer[3] = buffer[3]; return un.value; } diff --git a/BGZF.o b/BGZF.o new file mode 100644 index 0000000..b2fc10d Binary files /dev/null and b/BGZF.o differ diff --git a/BamReader.cpp b/BamReader.cpp index 41e3dc2..6ebc488 100644 --- a/BamReader.cpp +++ b/BamReader.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 17 June 2010 (DB) +// Last modified: 22 June 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -77,7 +77,7 @@ struct BamReader::BamReaderPrivate { // file operations void Close(void); bool Jump(int refID, int position = 0); - void Open(const string& filename, const string& indexFilename = ""); + bool Open(const string& filename, const string& indexFilename = ""); bool Rewind(void); bool SetRegion(const BamRegion& region); @@ -154,7 +154,7 @@ bool BamReader::Jump(int refID, int position) { d->IsRightBoundSpecified = false; return d->Jump(refID, position); } -void BamReader::Open(const string& filename, const string& indexFilename) { d->Open(filename, indexFilename); } +bool BamReader::Open(const string& filename, const string& indexFilename) { return d->Open(filename, indexFilename); } bool BamReader::Rewind(void) { return d->Rewind(); } bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); } bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) { @@ -244,7 +244,7 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) { const char* allCharData = bAlignment.SupportData.AllCharData.data(); const char* seqData = ((const char*)allCharData) + seqDataOffset; const char* qualData = ((const char*)allCharData) + qualDataOffset; - char* tagData = ((char*)allCharData) + tagDataOffset; + char* tagData = ((char*)allCharData) + tagDataOffset; // save query sequence bAlignment.QueryBases.clear(); @@ -982,14 +982,15 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) { bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength); // save CigarOps for BamAlignment + CigarOp op; bAlignment.CigarData.clear(); + bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations); for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) { // swap if necessary if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); } // build CigarOp structure - CigarOp op; op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT); op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ]; @@ -1097,13 +1098,16 @@ void BamReader::BamReaderPrivate::MergeChunks(void) { } // opens BAM file (and index) -void BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) { +bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) { Filename = filename; IndexFilename = indexFilename; - // open the BGZF file for reading, retrieve header text & reference data - mBGZF.Open(filename, "rb"); + // open the BGZF file for reading, return false on failure + if ( !mBGZF.Open(filename, "rb") ) + return false; + + // retrieve header text & reference data LoadHeaderData(); LoadReferenceData(); @@ -1114,6 +1118,9 @@ void BamReader::BamReaderPrivate::Open(const string& filename, const string& ind if ( !IndexFilename.empty() ) { LoadIndex(); } + + // return success + return true; } // returns BAM file pointer to beginning of alignment data diff --git a/BamReader.h b/BamReader.h index fc9a003..92de17e 100644 --- a/BamReader.h +++ b/BamReader.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 16 June 2010 (DB) +// Last modified: 22 June 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -41,7 +41,7 @@ class BamReader { // performs random-access jump to reference, position bool Jump(int refID, int position = 0); // opens BAM file (and optional BAM index file, if provided) - void Open(const std::string& filename, const std::string& indexFilename = ""); + bool Open(const std::string& filename, const std::string& indexFilename = ""); // returns file pointer to beginning of alignments bool Rewind(void); // sets a region of interest (with left & right bound reference/position) diff --git a/BamReader.o b/BamReader.o new file mode 100644 index 0000000..d6895e1 Binary files /dev/null and b/BamReader.o differ diff --git a/BamWriter.cpp b/BamWriter.cpp index 660be5d..12a13e0 100644 --- a/BamWriter.cpp +++ b/BamWriter.cpp @@ -94,7 +94,7 @@ void BamWriter::BamWriterPrivate::CreatePackedCigar(const vector& cigar unsigned int cigarOp; vector::const_iterator coIter; - for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); coIter++) { + for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); ++coIter) { switch(coIter->Type) { case 'M': @@ -206,17 +206,16 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam // write the SAM header text length uint32_t samHeaderLen = samHeader.size(); - if ( IsBigEndian ) { SwapEndian_32(samHeaderLen); } + if (IsBigEndian) SwapEndian_32(samHeaderLen); mBGZF.Write((char*)&samHeaderLen, BT_SIZEOF_INT); // write the SAM header text - if(samHeaderLen > 0) { + if(samHeaderLen > 0) mBGZF.Write(samHeader.data(), samHeaderLen); - } // write the number of reference sequences uint32_t numReferenceSequences = referenceSequences.size(); - if ( IsBigEndian ) { SwapEndian_32(numReferenceSequences); } + if (IsBigEndian) SwapEndian_32(numReferenceSequences); mBGZF.Write((char*)&numReferenceSequences, BT_SIZEOF_INT); // ============================= @@ -228,7 +227,7 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam // write the reference sequence name length uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1; - if ( IsBigEndian ) { SwapEndian_32(referenceSequenceNameLen); } + if (IsBigEndian) SwapEndian_32(referenceSequenceNameLen); mBGZF.Write((char*)&referenceSequenceNameLen, BT_SIZEOF_INT); // write the reference sequence name @@ -236,7 +235,7 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam // write the reference sequence length int32_t referenceLength = rsIter->RefLength; - if ( IsBigEndian ) { SwapEndian_32(referenceLength); } + if (IsBigEndian) SwapEndian_32(referenceLength); mBGZF.Write((char*)&referenceLength, BT_SIZEOF_INT); } } @@ -257,14 +256,13 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { // write the block size unsigned int blockSize = al.SupportData.BlockLength; - if ( IsBigEndian ) { SwapEndian_32(blockSize); } + if (IsBigEndian) SwapEndian_32(blockSize); mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT); // swap BAM core endian-ness, if necessary if ( IsBigEndian ) { - for ( int i = 0; i < 8; ++i ) { + for ( int i = 0; i < 8; ++i ) SwapEndian_32(buffer[i]); - } } // write the BAM core @@ -279,9 +277,9 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { else { // initialize - const unsigned int nameLen = al.Name.size() + 1; - const unsigned int queryLen = al.QueryBases.size(); - const unsigned int tagDataLength = al.TagData.size(); + const unsigned int nameLen = al.Name.size() + 1; + const unsigned int queryLen = al.QueryBases.size(); + const unsigned int tagDataLength = al.TagData.size(); // create our packed cigar string string packedCigar; @@ -303,25 +301,25 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { memcpy(cigarData, packedCigar.data(), packedCigarLen); for (unsigned int i = 0; i < packedCigarLen; ++i) { - if ( IsBigEndian ) { + if ( IsBigEndian ) SwapEndian_32p(&cigarData[i]); - } } mBGZF.Write(cigarData, packedCigarLen); - free(cigarData); - - } else { + free(cigarData); + } + else mBGZF.Write(packedCigar.data(), packedCigarLen); - } // write the encoded query sequence mBGZF.Write(encodedQuery.data(), encodedQueryLen); // write the base qualities - string baseQualities = al.Qualities; + string baseQualities(al.Qualities); char* pBaseQualities = (char*)al.Qualities.data(); - for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; } + for(unsigned int i = 0; i < queryLen; i++) { + pBaseQualities[i] -= 33; + } mBGZF.Write(pBaseQualities, queryLen); // write the read group tag @@ -375,8 +373,8 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { mBGZF.Write(tagData, tagDataLength); free(tagData); - } else { - mBGZF.Write(al.TagData.data(), tagDataLength); - } + } + else + mBGZF.Write(al.TagData.data(), tagDataLength); } }