]> git.donarmstrong.com Git - bamtools.git/blobdiff - src/api/internal/BamWriter_p.cpp
Cleaned up output/exception/asserts, etc after last merge
[bamtools.git] / src / api / internal / BamWriter_p.cpp
index 8f6c30d5558780240c9b65f0d363d8de3be0081a..100de2d2d25a378c64179e39e71e6fdf874eb847 100644 (file)
@@ -1,20 +1,20 @@
 // ***************************************************************************
 // BamWriter_p.cpp (c) 2010 Derek Barnett
 // Marth Lab, Department of Biology, Boston College
-// All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 7 October 2011 (DB)
 // ---------------------------------------------------------------------------
 // Provides the basic functionality for producing BAM files
 // ***************************************************************************
 
 #include <api/BamAlignment.h>
 #include <api/BamConstants.h>
+#include <api/IBamIODevice.h>
+#include <api/internal/BamException_p.h>
 #include <api/internal/BamWriter_p.h>
 using namespace BamTools;
 using namespace BamTools::Internal;
 
-#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 using namespace std;
@@ -26,11 +26,11 @@ BamWriterPrivate::BamWriterPrivate(void)
 
 // dtor
 BamWriterPrivate::~BamWriterPrivate(void) {
-    m_stream.Close();
+    Close();
 }
 
 // calculates minimum bin for a BAM alignment interval
-unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
     --end;
     if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
     if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
@@ -42,14 +42,23 @@ unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) con
 
 // closes the alignment archive
 void BamWriterPrivate::Close(void) {
-    m_stream.Close();
+
+    // skip if file not open
+    if ( !IsOpen() ) return;
+
+    // close output stream
+    try {
+        m_stream.Close();
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+    }
 }
 
 // creates a cigar string from the supplied alignment
 void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
 
     // initialize
-    const unsigned int numCigarOperations = cigarOperations.size();
+    const size_t numCigarOperations = cigarOperations.size();
     packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
 
     // pack the cigar data into the string
@@ -61,7 +70,7 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
     for ( ; coIter != coEnd; ++coIter ) {
 
         // store op in packedCigar
-        unsigned int cigarOp;
+        uint8_t cigarOp;
         switch ( coIter->Type ) {
             case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;
             case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;
@@ -73,8 +82,8 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
             case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
             case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
             default:
-              fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\n", coIter->Type);
-              exit(1);
+                const string message = string("invalid CIGAR operation type") + coIter->Type;
+                throw BamException("BamWriter::CreatePackedCigar", message);
         }
 
         *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
@@ -86,26 +95,36 @@ void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations,
 void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
 
     // prepare the encoded query string
-    const unsigned int queryLen = query.size();
-    const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
-    encodedQuery.resize(encodedQueryLen);
+    const size_t queryLength = query.size();
+    const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+    encodedQuery.resize(encodedQueryLength);
     char* pEncodedQuery = (char*)encodedQuery.data();
     const char* pQuery = (const char*)query.data();
 
+    // walk through original query sequence, encoding its bases
     unsigned char nucleotideCode;
     bool useHighWord = true;
-
     while ( *pQuery ) {
         switch ( *pQuery ) {
             case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
             case (Constants::BAM_DNA_A)     : nucleotideCode = Constants::BAM_BASECODE_A;     break;
             case (Constants::BAM_DNA_C)     : nucleotideCode = Constants::BAM_BASECODE_C;     break;
+            case (Constants::BAM_DNA_M)     : nucleotideCode = Constants::BAM_BASECODE_M;     break;
             case (Constants::BAM_DNA_G)     : nucleotideCode = Constants::BAM_BASECODE_G;     break;
+            case (Constants::BAM_DNA_R)     : nucleotideCode = Constants::BAM_BASECODE_R;     break;
+            case (Constants::BAM_DNA_S)     : nucleotideCode = Constants::BAM_BASECODE_S;     break;
+            case (Constants::BAM_DNA_V)     : nucleotideCode = Constants::BAM_BASECODE_V;     break;
             case (Constants::BAM_DNA_T)     : nucleotideCode = Constants::BAM_BASECODE_T;     break;
+            case (Constants::BAM_DNA_W)     : nucleotideCode = Constants::BAM_BASECODE_W;     break;
+            case (Constants::BAM_DNA_Y)     : nucleotideCode = Constants::BAM_BASECODE_Y;     break;
+            case (Constants::BAM_DNA_H)     : nucleotideCode = Constants::BAM_BASECODE_H;     break;
+            case (Constants::BAM_DNA_K)     : nucleotideCode = Constants::BAM_BASECODE_K;     break;
+            case (Constants::BAM_DNA_D)     : nucleotideCode = Constants::BAM_BASECODE_D;     break;
+            case (Constants::BAM_DNA_B)     : nucleotideCode = Constants::BAM_BASECODE_B;     break;
             case (Constants::BAM_DNA_N)     : nucleotideCode = Constants::BAM_BASECODE_N;     break;
             default:
-                fprintf(stderr, "BamWriter ERROR: only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
-                exit(1);
+                const string message = string("invalid base: ") + *pQuery;
+                throw BamException("BamWriter::EncodeQuerySequence", message);
         }
 
         // pack the nucleotide code
@@ -123,9 +142,14 @@ void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQ
     }
 }
 
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+    return m_errorString;
+}
+
 // returns whether BAM file is open for writing or not
 bool BamWriterPrivate::IsOpen(void) const {
-    return m_stream.IsOpen;
+    return m_stream.IsOpen();
 }
 
 // opens the alignment archive
@@ -133,248 +157,265 @@ bool BamWriterPrivate::Open(const string& filename,
                             const string& samHeaderText,
                             const RefVector& referenceSequences)
 {
-    // open the BGZF file for writing, return failure if error
-    if ( !m_stream.Open(filename, "wb") )
-        return false;
+    try {
 
-    // write BAM file 'metadata' components
-    WriteMagicNumber();
-    WriteSamHeaderText(samHeaderText);
-    WriteReferences(referenceSequences);
-    return true;
-}
+        // open the BGZF file for writing
+        m_stream.Open(filename,  IBamIODevice::WriteOnly);
 
-// saves the alignment to the alignment archive
-void BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
-    // if BamAlignment contains only the core data and a raw char data buffer
-    // (as a result of BamReader::GetNextAlignmentCore())
-    if ( al.SupportData.HasCoreOnly ) {
-
-        // write the block size
-        unsigned int blockSize = al.SupportData.BlockLength;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-        m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-        // assign the BAM core data
-        uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-        buffer[0] = al.RefID;
-        buffer[1] = al.Position;
-        buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
-        buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
-        buffer[4] = al.SupportData.QuerySequenceLength;
-        buffer[5] = al.MateRefID;
-        buffer[6] = al.MatePosition;
-        buffer[7] = al.InsertSize;
-
-        // swap BAM core endian-ness, if necessary
-        if ( m_isBigEndian ) {
-            for ( int i = 0; i < 8; ++i )
-                BamTools::SwapEndian_32(buffer[i]);
-        }
+        // write BAM file 'metadata' components
+        WriteMagicNumber();
+        WriteSamHeaderText(samHeaderText);
+        WriteReferences(referenceSequences);
 
-        // write the BAM core
-        m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+        // return success
+        return true;
 
-        // write the raw char data
-        m_stream.Write((char*)al.SupportData.AllCharData.data(),
-                       al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
     }
+}
 
-    // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
-    // ( resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code )
-    else {
-
-        // calculate char lengths
-        const unsigned int nameLength         = al.Name.size() + 1;
-        const unsigned int numCigarOperations = al.CigarData.size();
-        const unsigned int queryLength        = al.QueryBases.size();
-        const unsigned int tagDataLength      = al.TagData.size();
-
-        // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
-        // force calculation of Bin before storing
-        const int endPosition = al.GetEndPosition();
-        const unsigned int alignmentBin = CalculateMinimumBin(al.Position, endPosition);
-
-        // create our packed cigar string
-        string packedCigar;
-        CreatePackedCigar(al.CigarData, packedCigar);
-        const unsigned int packedCigarLength = packedCigar.size();
-
-        // encode the query
-        string encodedQuery;
-        EncodeQuerySequence(al.QueryBases, encodedQuery);
-        const unsigned int encodedQueryLength = encodedQuery.size();
-
-        // write the block size
-        const unsigned int dataBlockSize = nameLength +
-                                           packedCigarLength +
-                                           encodedQueryLength +
-                                           queryLength +
-                                           tagDataLength;
-        unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
-        if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
-        m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
-        // assign the BAM core data
-        uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
-        buffer[0] = al.RefID;
-        buffer[1] = al.Position;
-        buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
-        buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-        buffer[4] = queryLength;
-        buffer[5] = al.MateRefID;
-        buffer[6] = al.MatePosition;
-        buffer[7] = al.InsertSize;
-
-        // swap BAM core endian-ness, if necessary
-        if ( m_isBigEndian ) {
-            for ( int i = 0; i < 8; ++i )
-                BamTools::SwapEndian_32(buffer[i]);
-        }
-
-        // write the BAM core
-        m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
-        // write the query name
-        m_stream.Write(al.Name.c_str(), nameLength);
-
-        // write the packed cigar
-        if ( m_isBigEndian ) {
-            char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
-            memcpy(cigarData, packedCigar.data(), packedCigarLength);
-            if ( m_isBigEndian ) {
-                for ( unsigned int i = 0; i < packedCigarLength; ++i )
-                    BamTools::SwapEndian_32p(&cigarData[i]);
-            }
-            m_stream.Write(cigarData, packedCigarLength);
-            free(cigarData);
-        }
-        else
-            m_stream.Write(packedCigar.data(), packedCigarLength);
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
 
-        // write the encoded query sequence
-        m_stream.Write(encodedQuery.data(), encodedQueryLength);
+    try {
 
-        // write the base qualities
-        char* pBaseQualities = (char*)al.Qualities.data();
-        for ( unsigned int i = 0; i < queryLength; i++ )
-            pBaseQualities[i] -= 33; // FASTQ conversion
-        m_stream.Write(pBaseQualities, queryLength);
+        // if BamAlignment contains only the core data and a raw char data buffer
+        // (as a result of BamReader::GetNextAlignmentCore())
+        if ( al.SupportData.HasCoreOnly )
+            WriteCoreAlignment(al);
 
-        // write the read group tag
-        if ( m_isBigEndian ) {
+        // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+        // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+        else WriteAlignment(al);
 
-            char* tagData = (char*)calloc(sizeof(char), tagDataLength);
-            memcpy(tagData, al.TagData.data(), tagDataLength);
+        // if we get here, everything OK
+        return true;
 
-            int i = 0;
-            while ( (unsigned int)i < tagDataLength ) {
+    } catch ( BamException& e ) {
+        m_errorString = e.what();
+        return false;
+    }
+}
 
-                i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
-                const char type = tagData[i];     // get tag type at position i
-                ++i;
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+    // modifying compression is not allowed if BAM file is open
+    if ( !IsOpen() )
+        m_stream.SetWriteCompressed(ok);
+}
 
-                switch ( type ) {
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+    // calculate char lengths
+    const unsigned int nameLength         = al.Name.size() + 1;
+    const unsigned int numCigarOperations = al.CigarData.size();
+    const unsigned int queryLength        = al.QueryBases.size();
+    const unsigned int tagDataLength      = al.TagData.size();
+
+    // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
+    // force calculation of Bin before storing
+    const int endPosition = al.GetEndPosition();
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, endPosition);
+
+    // create our packed cigar string
+    string packedCigar;
+    CreatePackedCigar(al.CigarData, packedCigar);
+    const unsigned int packedCigarLength = packedCigar.size();
+
+    // encode the query
+    string encodedQuery;
+    EncodeQuerySequence(al.QueryBases, encodedQuery);
+    const unsigned int encodedQueryLength = encodedQuery.size();
+
+    // write the block size
+    const unsigned int dataBlockSize = nameLength +
+                                       packedCigarLength +
+                                       encodedQueryLength +
+                                       queryLength +
+                                       tagDataLength;
+    unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+    buffer[4] = queryLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
+    }
 
-                    case(Constants::BAM_TAG_TYPE_ASCII) :
-                    case(Constants::BAM_TAG_TYPE_INT8)  :
-                    case(Constants::BAM_TAG_TYPE_UINT8) :
-                        ++i;
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_INT16)  :
-                    case(Constants::BAM_TAG_TYPE_UINT16) :
-                        BamTools::SwapEndian_16p(&tagData[i]);
-                        i += sizeof(uint16_t);
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_FLOAT)  :
-                    case(Constants::BAM_TAG_TYPE_INT32)  :
-                    case(Constants::BAM_TAG_TYPE_UINT32) :
-                        BamTools::SwapEndian_32p(&tagData[i]);
-                        i += sizeof(uint32_t);
-                        break;
-
-                    case(Constants::BAM_TAG_TYPE_HEX) :
-                    case(Constants::BAM_TAG_TYPE_STRING) :
-                        // no endian swapping necessary for hex-string/string data
-                        while ( tagData[i] )
-                            ++i;
-                        // increment one more for null terminator
-                        ++i;
-                        break;
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
 
-                    case(Constants::BAM_TAG_TYPE_ARRAY) :
+    // write the query name
+    m_stream.Write(al.Name.c_str(), nameLength);
 
-                    {
-                        // read array type
-                        const char arrayType = tagData[i];
+    // write the packed cigar
+    if ( m_isBigEndian ) {
+        char* cigarData = new char[packedCigarLength]();
+        memcpy(cigarData, packedCigar.data(), packedCigarLength);
+        if ( m_isBigEndian ) {
+            for ( size_t i = 0; i < packedCigarLength; ++i )
+                BamTools::SwapEndian_32p(&cigarData[i]);
+        }
+        m_stream.Write(cigarData, packedCigarLength);
+        delete[] cigarData; // TODO: cleanup on Write exception thrown?
+    }
+    else
+        m_stream.Write(packedCigar.data(), packedCigarLength);
+
+    // write the encoded query sequence
+    m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+    // write the base qualities
+    char* pBaseQualities = (char*)al.Qualities.data();
+    for ( size_t i = 0; i < queryLength; ++i )
+        pBaseQualities[i] -= 33; // FASTQ conversion
+    m_stream.Write(pBaseQualities, queryLength);
+
+    // write the read group tag
+    if ( m_isBigEndian ) {
+
+        char* tagData = new char[tagDataLength]();
+        memcpy(tagData, al.TagData.data(), tagDataLength);
+
+        size_t i = 0;
+        while ( i < tagDataLength ) {
+
+            i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
+            const char type = tagData[i];     // get tag type at position i
+            ++i;
+
+            switch ( type ) {
+
+                case(Constants::BAM_TAG_TYPE_ASCII) :
+                case(Constants::BAM_TAG_TYPE_INT8)  :
+                case(Constants::BAM_TAG_TYPE_UINT8) :
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_INT16)  :
+                case(Constants::BAM_TAG_TYPE_UINT16) :
+                    BamTools::SwapEndian_16p(&tagData[i]);
+                    i += sizeof(uint16_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_FLOAT)  :
+                case(Constants::BAM_TAG_TYPE_INT32)  :
+                case(Constants::BAM_TAG_TYPE_UINT32) :
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    i += sizeof(uint32_t);
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_HEX) :
+                case(Constants::BAM_TAG_TYPE_STRING) :
+                    // no endian swapping necessary for hex-string/string data
+                    while ( tagData[i] )
                         ++i;
-
-                        // swap endian-ness of number of elements in place, then retrieve for loop
-                        BamTools::SwapEndian_32p(&tagData[i]);
-                        int32_t numElements;
-                        memcpy(&numElements, &tagData[i], sizeof(uint32_t));
-                        i += sizeof(uint32_t);
-
-                        // swap endian-ness of array elements
-                        for ( int j = 0; j < numElements; ++j ) {
-                            switch (arrayType) {
-                                case (Constants::BAM_TAG_TYPE_INT8)  :
-                                case (Constants::BAM_TAG_TYPE_UINT8) :
-                                    // no endian-swapping necessary
-                                    ++i;
-                                    break;
-                                case (Constants::BAM_TAG_TYPE_INT16)  :
-                                case (Constants::BAM_TAG_TYPE_UINT16) :
-                                    BamTools::SwapEndian_16p(&tagData[i]);
-                                    i += sizeof(uint16_t);
-                                    break;
-                                case (Constants::BAM_TAG_TYPE_FLOAT)  :
-                                case (Constants::BAM_TAG_TYPE_INT32)  :
-                                case (Constants::BAM_TAG_TYPE_UINT32) :
-                                    BamTools::SwapEndian_32p(&tagData[i]);
-                                    i += sizeof(uint32_t);
-                                    break;
-                                default:
-                                    // error case
-                                    fprintf(stderr,
-                                            "BamWriter ERROR: unknown binary array type encountered: [%c]\n",
-                                            arrayType);
-                                    exit(1);
-                            }
+                    // increment one more for null terminator
+                    ++i;
+                    break;
+
+                case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+                {
+                    // read array type
+                    const char arrayType = tagData[i];
+                    ++i;
+
+                    // swap endian-ness of number of elements in place, then retrieve for loop
+                    BamTools::SwapEndian_32p(&tagData[i]);
+                    int32_t numElements;
+                    memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+                    i += sizeof(uint32_t);
+
+                    // swap endian-ness of array elements
+                    for ( int j = 0; j < numElements; ++j ) {
+                        switch (arrayType) {
+                            case (Constants::BAM_TAG_TYPE_INT8)  :
+                            case (Constants::BAM_TAG_TYPE_UINT8) :
+                                // no endian-swapping necessary
+                                ++i;
+                                break;
+                            case (Constants::BAM_TAG_TYPE_INT16)  :
+                            case (Constants::BAM_TAG_TYPE_UINT16) :
+                                BamTools::SwapEndian_16p(&tagData[i]);
+                                i += sizeof(uint16_t);
+                                break;
+                            case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                            case (Constants::BAM_TAG_TYPE_INT32)  :
+                            case (Constants::BAM_TAG_TYPE_UINT32) :
+                                BamTools::SwapEndian_32p(&tagData[i]);
+                                i += sizeof(uint32_t);
+                                break;
+                            default:
+                                delete[] tagData;
+                                const string message = string("invalid binary array type: ") + arrayType;
+                                throw BamException("BamWriter::SaveAlignment", message);
                         }
-
-                        break;
                     }
 
-                    default :
-                        fprintf(stderr, "BamWriter ERROR: invalid tag value type\n"); // shouldn't get here
-                        free(tagData);
-                        exit(1);
+                    break;
                 }
+
+                default :
+                    delete[] tagData;
+                    const string message = string("invalid tag type: ") + type;
+                    throw BamException("BamWriter::SaveAlignment", message);
             }
-            m_stream.Write(tagData, tagDataLength);
-            free(tagData);
         }
-        else
-            m_stream.Write(al.TagData.data(), tagDataLength);
+
+        m_stream.Write(tagData, tagDataLength);
+        delete[] tagData; // TODO: cleanup on Write exception thrown?
     }
+    else
+        m_stream.Write(al.TagData.data(), tagDataLength);
 }
 
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
-
-    // warn if BAM file is already open
-    // modifying compression is not allowed in this case
-    if ( IsOpen() ) {
-        cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "
-             << "Ignoring request." << endl;
-        return;
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+    // write the block size
+    unsigned int blockSize = al.SupportData.BlockLength;
+    if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+    m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+    // re-calculate bin (in case BamAlignment's position has been previously modified)
+    const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+    // assign the BAM core data
+    uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+    buffer[4] = al.SupportData.QuerySequenceLength;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // swap BAM core endian-ness, if necessary
+    if ( m_isBigEndian ) {
+        for ( int i = 0; i < 8; ++i )
+            BamTools::SwapEndian_32(buffer[i]);
     }
 
-    // set BgzfStream compression mode
-    m_stream.SetWriteCompressed(ok);
+    // write the BAM core
+    m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+    // write the raw char data
+    m_stream.Write((char*)al.SupportData.AllCharData.data(),
+                   al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
 }
 
 void BamWriterPrivate::WriteMagicNumber(void) {