From 41da74387a2c1db662a416d20d7150ec1a745212 Mon Sep 17 00:00:00 2001
From: Derek <derekwbarnett@gmail.com>
Date: Thu, 10 Jun 2010 00:50:37 -0400
Subject: [PATCH] Moved BamAlignmentSupportData into BamAlignment data type.
 This continues the read/write speedup mentioned in prior commits, but removes
 the need for clients to manage this additional auxilary data object. The
 'BamAlignment lite' is accessed by calling BamReader::GetNextAlignmentCore()
 and written by BamWriter::SaveAlignment() which checks to see how much
 parsing & packing is needed before writing.

---
 BamAux.h           |  40 +++----
 BamReader.cpp      |  73 ++++++-------
 BamReader.h        |   3 +-
 BamWriter.cpp      | 254 +++++++++++++++++++--------------------------
 BamWriter.h        |   2 -
 bamtools_merge.cpp |  12 ++-
 6 files changed, 180 insertions(+), 204 deletions(-)

diff --git a/BamAux.h b/BamAux.h
index 4659249..5ef34fe 100644
--- a/BamAux.h
+++ b/BamAux.h
@@ -133,6 +133,28 @@ struct BamAlignment {
         int32_t      MateRefID;         // ID number for reference sequence where alignment's mate was aligned
         int32_t      MatePosition;      // Position (0-based) where alignment's mate starts
         int32_t      InsertSize;        // Mate-pair insert size
+        
+        struct BamAlignmentSupportData {
+      
+            // data members
+            std::string AllCharData;
+            uint32_t    BlockLength;
+            uint32_t    NumCigarOperations;
+            uint32_t    QueryNameLength;
+            uint32_t    QuerySequenceLength;
+            bool        IsParsed;
+            
+            // constructor
+            BamAlignmentSupportData(void)
+                : BlockLength(0)
+                , NumCigarOperations(0)
+                , QueryNameLength(0)
+                , QuerySequenceLength(0)
+                , IsParsed(false)
+            { }
+        };
+        
+        BamAlignmentSupportData SupportData;  // Contains raw character data & lengths 
 
     // Alignment flag query constants
     // Use the get/set methods above instead
@@ -154,24 +176,6 @@ struct BamAlignment {
 // ----------------------------------------------------------------
 // Auxiliary data structs & typedefs
 
-struct BamAlignmentSupportData {
-      
-    // data members
-    std::string AllCharData;
-    uint32_t    BlockLength;
-    uint32_t    NumCigarOperations;
-    uint32_t    QueryNameLength;
-    uint32_t    QuerySequenceLength;
-    
-    // constructor
-    BamAlignmentSupportData(void)
-        : BlockLength(0)
-        , NumCigarOperations(0)
-        , QueryNameLength(0)
-        , QuerySequenceLength(0)
-    { }
-};
-
 struct CigarOp {
   
     // data members
diff --git a/BamReader.cpp b/BamReader.cpp
index 53c32e9..e5265e0 100644
--- a/BamReader.cpp
+++ b/BamReader.cpp
@@ -69,7 +69,7 @@ struct BamReader::BamReaderPrivate {
 
     // access alignment data
     bool GetNextAlignment(BamAlignment& bAlignment);
-    bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);
+    bool GetNextAlignmentCore(BamAlignment& bAlignment);
 
     // access auxiliary data
     int GetReferenceID(const string& refName) const;
@@ -86,7 +86,7 @@ struct BamReader::BamReaderPrivate {
     // calculate bins that overlap region ( left to reference end for now )
     int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);
     // fills out character data for BamAlignment data
-    bool BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData);
+    bool BuildCharData(BamAlignment& bAlignment);
     // calculate file offset for first alignment chunk overlapping 'left'
     int64_t GetOffset(int refID, int left);
     // checks to see if alignment overlaps current region
@@ -94,7 +94,7 @@ struct BamReader::BamReaderPrivate {
     // retrieves header text from BAM file
     void LoadHeaderData(void);
     // retrieves BAM alignment under file pointer
-    bool LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);
+    bool LoadNextAlignment(BamAlignment& bAlignment);
     // builds reference data structure from BAM file
     void LoadReferenceData(void);
 
@@ -139,7 +139,7 @@ bool BamReader::Rewind(void) { return d->Rewind(); }
 
 // access alignment data
 bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }
-bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment) { return d->GetNextAlignmentCore(bAlignment); }
 
 // access auxiliary data
 const string BamReader::GetHeaderText(void) const { return d->HeaderText; }
@@ -196,40 +196,40 @@ int BamReader::BamReaderPrivate::BinsFromRegion(int refID, int left, uint16_t li
     return i;
 }
 
-bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData) {
+bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {
   
     // calculate character lengths/offsets
-    const unsigned int dataLength     = supportData.BlockLength - BAM_CORE_SIZE;
-    const unsigned int seqDataOffset  = supportData.QueryNameLength + (supportData.NumCigarOperations * 4);
-    const unsigned int qualDataOffset = seqDataOffset + (supportData.QuerySequenceLength+1)/2;
-    const unsigned int tagDataOffset  = qualDataOffset + supportData.QuerySequenceLength;
+    const unsigned int dataLength     = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;
+    const unsigned int seqDataOffset  = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);
+    const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;
+    const unsigned int tagDataOffset  = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;
     const unsigned int tagDataLength  = dataLength - tagDataOffset;
       
     // set up char buffers
-    const char* allCharData = supportData.AllCharData.data();
+    const char* allCharData = bAlignment.SupportData.AllCharData.data();
     const char* seqData     = ((const char*)allCharData) + seqDataOffset;
     const char* qualData    = ((const char*)allCharData) + qualDataOffset;
     char* tagData     = ((char*)allCharData) + tagDataOffset;
   
     // save query sequence
     bAlignment.QueryBases.clear();
-    bAlignment.QueryBases.reserve(supportData.QuerySequenceLength);
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {
+    bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
         char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
         bAlignment.QueryBases.append(1, singleBase);
     }
   
     // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
     bAlignment.Qualities.clear();
-    bAlignment.Qualities.reserve(supportData.QuerySequenceLength);
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {
+    bAlignment.Qualities.reserve(bAlignment.SupportData.QuerySequenceLength);
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
         char singleQuality = (char)(qualData[i]+33);
         bAlignment.Qualities.append(1, singleQuality);
     }
     
     // parse CIGAR to build 'AlignedBases'
     bAlignment.AlignedBases.clear();
-    bAlignment.AlignedBases.reserve(supportData.QuerySequenceLength);
+    bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);
     
     int k = 0;
     vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
@@ -323,6 +323,9 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const
     bAlignment.TagData.resize(tagDataLength);
     memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);
     
+    // set support data parsed flag
+    bAlignment.SupportData.IsParsed = true;
+    
     // return success
     return true;
 }
@@ -504,25 +507,23 @@ int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
 // get next alignment (from specified region, if given)
 bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 
-    BamAlignmentSupportData supportData;
-  
     // if valid alignment available
-    if ( LoadNextAlignment(bAlignment, supportData) ) {
+    if ( LoadNextAlignment(bAlignment) ) {
 
         // if region not specified, return success
         if ( !IsRegionSpecified ) { 
-          bool ok = BuildCharData(bAlignment, supportData);
+          bool ok = BuildCharData(bAlignment);
           return ok; 
         }
 
         // load next alignment until region overlap is found
         while ( !IsOverlap(bAlignment) ) {
             // if no valid alignment available (likely EOF) return failure
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;
+            if ( !LoadNextAlignment(bAlignment) ) return false;
         }
 
         // return success (alignment found that overlaps region)
-        bool ok = BuildCharData(bAlignment, supportData);
+        bool ok = BuildCharData(bAlignment);
         return ok;
     }
 
@@ -535,10 +536,10 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 // ** DOES NOT parse any character data (bases, qualities, tag data)
 //    these can be accessed, if necessary, from the supportData 
 // useful for operations requiring ONLY positional or other alignment-related information
-bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {
 
     // if valid alignment available
-    if ( LoadNextAlignment(bAlignment, supportData) ) {
+    if ( LoadNextAlignment(bAlignment) ) {
 
         // if region not specified, return success
         if ( !IsRegionSpecified ) return true;
@@ -546,7 +547,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment,
         // load next alignment until region overlap is found
         while ( !IsOverlap(bAlignment) ) {
             // if no valid alignment available (likely EOF) return failure
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;
+            if ( !LoadNextAlignment(bAlignment) ) return false;
         }
 
         // return success (alignment found that overlaps region)
@@ -847,14 +848,14 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
 }
 
 // populates BamAlignment with alignment data under file pointer, returns success/fail
-bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {
+bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {
 
     // read in the 'block length' value, make sure it's not zero
     char buffer[4];
     mBGZF.Read(buffer, 4);
-    supportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);
-    if ( IsBigEndian ) { SwapEndian_32(supportData.BlockLength); }
-    if ( supportData.BlockLength == 0 ) { return false; }
+    bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); }
+    if ( bAlignment.SupportData.BlockLength == 0 ) { return false; }
 
     // read in core alignment data, make sure the right size of data was read
     char x[BAM_CORE_SIZE];
@@ -873,20 +874,20 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
     unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]);
     bAlignment.Bin        = tempValue >> 16;
     bAlignment.MapQuality = tempValue >> 8 & 0xff;
-    supportData.QueryNameLength = tempValue & 0xff;
+    bAlignment.SupportData.QueryNameLength = tempValue & 0xff;
 
     tempValue = BgzfData::UnpackUnsignedInt(&x[12]);
     bAlignment.AlignmentFlag = tempValue >> 16;
-    supportData.NumCigarOperations = tempValue & 0xffff;
+    bAlignment.SupportData.NumCigarOperations = tempValue & 0xffff;
 
-    supportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);
+    bAlignment.SupportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);
     bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);
     bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);
     bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);
     
     // store 'all char data' and cigar ops
-    const unsigned int dataLength      = supportData.BlockLength - BAM_CORE_SIZE;
-    const unsigned int cigarDataOffset = supportData.QueryNameLength;
+    const unsigned int dataLength      = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;
+    const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength;
     
     char*     allCharData = (char*)calloc(sizeof(char), dataLength);
     uint32_t* cigarData   = (uint32_t*)(allCharData + cigarDataOffset);
@@ -897,14 +898,14 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
      
         // store alignment name and length
         bAlignment.Name.assign((const char*)(allCharData));
-        bAlignment.Length = supportData.QuerySequenceLength;
+        bAlignment.Length = bAlignment.SupportData.QuerySequenceLength;
       
         // store remaining 'allCharData' in supportData structure
-        supportData.AllCharData.assign((const char*)allCharData, dataLength);
+        bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);
         
         // save CigarOps for BamAlignment
         bAlignment.CigarData.clear();
-        for (unsigned int i = 0; i < supportData.NumCigarOperations; ++i) {
+        for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {
 
             // swap if necessary
             if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }
diff --git a/BamReader.h b/BamReader.h
index 88cc74a..0b13786 100644
--- a/BamReader.h
+++ b/BamReader.h
@@ -51,11 +51,12 @@ class BamReader {
 
         // retrieves next available alignment (returns success/fail)
         bool GetNextAlignment(BamAlignment& bAlignment);
+        
         // retrieves next available alignment core data (returns success/fail)
         // ** DOES NOT parse any character data (bases, qualities, tag data)
         //    these can be accessed, if necessary, from the supportData 
         // useful for operations requiring ONLY positional or other alignment-related information
-        bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);
+        bool GetNextAlignmentCore(BamAlignment& bAlignment);
 
         // ----------------------
         // access auxiliary data
diff --git a/BamWriter.cpp b/BamWriter.cpp
index 9d18fae..660be5d 100644
--- a/BamWriter.cpp
+++ b/BamWriter.cpp
@@ -37,7 +37,6 @@ struct BamWriter::BamWriterPrivate {
     void Close(void);
     void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);
     void SaveAlignment(const BamAlignment& al);
-    void SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData);
 
     // internal methods
     void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);
@@ -74,10 +73,6 @@ void BamWriter::SaveAlignment(const BamAlignment& al) {
     d->SaveAlignment(al);
 }
 
-void BamWriter::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {
-    d->SaveAlignment(al, supportData);
-}
-
 // -----------------------------------------------------
 // BamWriterPrivate implementation
 // -----------------------------------------------------
@@ -249,170 +244,139 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam
 // saves the alignment to the alignment archive
 void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
 
-    // initialize
-    const unsigned int nameLen            = al.Name.size() + 1;
-    const unsigned int queryLen           = al.QueryBases.size();
-    const unsigned int numCigarOperations = al.CigarData.size();
-
-    // create our packed cigar string
-    string packedCigar;
-    CreatePackedCigar(al.CigarData, packedCigar);
-    const unsigned int packedCigarLen = packedCigar.size();
-
-    // encode the query
-    string encodedQuery;
-    EncodeQuerySequence(al.QueryBases, encodedQuery);
-    const unsigned int encodedQueryLen = encodedQuery.size();
-
-    // store the tag data length
-    // -------------------------------------------
-    // Modified: 3-25-2010 DWB
-    // Contributed: ARQ
-    // Fixed: "off by one" error when parsing tags in files produced by BamWriter
-    const unsigned int tagDataLength = al.TagData.size();
-    // original line: 
-    // const unsigned int tagDataLength = al.TagData.size() + 1;     
-    // -------------------------------------------
-    
-    // assign the BAM core data
+     // assign the BAM core data
     uint32_t buffer[8];
     buffer[0] = al.RefID;
     buffer[1] = al.Position;
-    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | nameLen;
-    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-    buffer[4] = queryLen;
+    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+    buffer[4] = al.SupportData.QuerySequenceLength;
     buffer[5] = al.MateRefID;
     buffer[6] = al.MatePosition;
     buffer[7] = al.InsertSize;
 
     // write the block size
-    unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;
-    unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
+    unsigned int blockSize = al.SupportData.BlockLength;
     if ( IsBigEndian ) { SwapEndian_32(blockSize); }
     mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
 
-    // write the BAM core
+    // swap BAM core endian-ness, if necessary
     if ( IsBigEndian ) { 
         for ( int i = 0; i < 8; ++i ) { 
             SwapEndian_32(buffer[i]); 
         } 
     }
+    
+    // write the BAM core
     mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
 
-    // write the query name
-    mBGZF.Write(al.Name.c_str(), nameLen);
-
-    // write the packed cigar
-    if ( IsBigEndian ) {
+    // if support data, not parsed out (resulted from BamReader::GetNextAlignmentCore()
+    // write the raw char data
+    if ( !al.SupportData.IsParsed )
+        mBGZF.Write((char*)al.SupportData.AllCharData.data(), al.SupportData.BlockLength-BAM_CORE_SIZE);  
+    
+    // re-pack (as needed) & write the parsed char data
+    else {
       
-        char* cigarData = (char*)calloc(sizeof(char), packedCigarLen);
-        memcpy(cigarData, packedCigar.data(), packedCigarLen);
-        
-        for (unsigned int i = 0; i < packedCigarLen; ++i) {
-            if ( IsBigEndian ) { 
-              SwapEndian_32p(&cigarData[i]); 
-            }
-        }
+        // initialize
+        const unsigned int nameLen            = al.Name.size() + 1;
+        const unsigned int queryLen           = al.QueryBases.size();
+        const unsigned int tagDataLength      = al.TagData.size();
         
-        mBGZF.Write(cigarData, packedCigarLen);
-        free(cigarData);
-        
-    } else { 
-        mBGZF.Write(packedCigar.data(), packedCigarLen);
-    }
-
-    // write the encoded query sequence
-    mBGZF.Write(encodedQuery.data(), encodedQueryLen);
-
-    // write the base qualities
-    string baseQualities = al.Qualities;
-    char* pBaseQualities = (char*)al.Qualities.data();
-    for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }
-    mBGZF.Write(pBaseQualities, queryLen);
-
-    // write the read group tag
-    if ( IsBigEndian ) {
-      
-        char* tagData = (char*)calloc(sizeof(char), tagDataLength);
-        memcpy(tagData, al.TagData.data(), tagDataLength);
+        // create our packed cigar string
+        string packedCigar;
+        CreatePackedCigar(al.CigarData, packedCigar);
+        const unsigned int packedCigarLen = packedCigar.size();
+
+        // encode the query
+        string encodedQuery;
+        EncodeQuerySequence(al.QueryBases, encodedQuery);
+        const unsigned int encodedQueryLen = encodedQuery.size(); 
       
-        int i = 0;
-        while ( (unsigned int)i < tagDataLength ) {
+        // write the query name
+        mBGZF.Write(al.Name.c_str(), nameLen);
+
+        // write the packed cigar
+        if ( IsBigEndian ) {
+          
+            char* cigarData = (char*)calloc(sizeof(char), packedCigarLen);
+            memcpy(cigarData, packedCigar.data(), packedCigarLen);
             
-            i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)
-            uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning 
-            ++i;                                    // skip value type
-            
-            switch (type) {
-              
-                case('A') :
-                case('C') : 
-                    ++i;
-                    break;
-                            
-                case('S') : 
-                    SwapEndian_16p(&tagData[i]); 
-                    i+=2; // sizeof(uint16_t)
-                    break;
-                            
-                case('F') :
-                case('I') : 
-                    SwapEndian_32p(&tagData[i]);
-                    i+=4; // sizeof(uint32_t)
-                    break;
-                            
-                case('D') : 
-                    SwapEndian_64p(&tagData[i]);
-                    i+=8; // sizeof(uint64_t)
-                    break;
-                            
-                case('H') :
-                case('Z') : 
-                    while (tagData[i]) { ++i; }
-                    ++i; // increment one more for null terminator
-                    break;
-                            
-                default : 
-                    printf("ERROR: Invalid tag value type\n"); // shouldn't get here
-                    free(tagData);
-                    exit(1); 
+            for (unsigned int i = 0; i < packedCigarLen; ++i) {
+                if ( IsBigEndian ) { 
+                  SwapEndian_32p(&cigarData[i]); 
+                }
             }
+            
+            mBGZF.Write(cigarData, packedCigarLen);
+            free(cigarData);
+            
+        } else { 
+            mBGZF.Write(packedCigar.data(), packedCigarLen);
         }
-        
-        mBGZF.Write(tagData, tagDataLength);
-        free(tagData);
-    } else {
-        mBGZF.Write(al.TagData.data(), tagDataLength);
-    }
-}
 
-void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {
-  
-    // assign the BAM core data
-    uint32_t buffer[8];
-    buffer[0] = al.RefID;
-    buffer[1] = al.Position;
-    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | supportData.QueryNameLength;
-    buffer[3] = (al.AlignmentFlag << 16) | supportData.NumCigarOperations;
-    buffer[4] = supportData.QuerySequenceLength;
-    buffer[5] = al.MateRefID;
-    buffer[6] = al.MatePosition;
-    buffer[7] = al.InsertSize;
-
-    // write the block size
-    unsigned int blockSize = supportData.BlockLength;
-    if ( IsBigEndian ) { SwapEndian_32(blockSize); }
-    mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
-
-    // write the BAM core
-    if ( IsBigEndian ) { 
-        for ( int i = 0; i < 8; ++i ) { 
-            SwapEndian_32(buffer[i]); 
-        } 
+        // write the encoded query sequence
+        mBGZF.Write(encodedQuery.data(), encodedQueryLen);
+
+        // write the base qualities
+        string baseQualities = al.Qualities;
+        char* pBaseQualities = (char*)al.Qualities.data();
+        for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }
+        mBGZF.Write(pBaseQualities, queryLen);
+
+        // write the read group tag
+        if ( IsBigEndian ) {
+          
+            char* tagData = (char*)calloc(sizeof(char), tagDataLength);
+            memcpy(tagData, al.TagData.data(), tagDataLength);
+          
+            int i = 0;
+            while ( (unsigned int)i < tagDataLength ) {
+                
+                i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)
+                uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning 
+                ++i;                                    // skip value type
+                
+                switch (type) {
+                  
+                    case('A') :
+                    case('C') : 
+                        ++i;
+                        break;
+                                
+                    case('S') : 
+                        SwapEndian_16p(&tagData[i]); 
+                        i+=2; // sizeof(uint16_t)
+                        break;
+                                
+                    case('F') :
+                    case('I') : 
+                        SwapEndian_32p(&tagData[i]);
+                        i+=4; // sizeof(uint32_t)
+                        break;
+                                
+                    case('D') : 
+                        SwapEndian_64p(&tagData[i]);
+                        i+=8; // sizeof(uint64_t)
+                        break;
+                                
+                    case('H') :
+                    case('Z') : 
+                        while (tagData[i]) { ++i; }
+                        ++i; // increment one more for null terminator
+                        break;
+                                
+                    default : 
+                        printf("ERROR: Invalid tag value type\n"); // shouldn't get here
+                        free(tagData);
+                        exit(1); 
+                }
+            }
+            
+            mBGZF.Write(tagData, tagDataLength);
+            free(tagData);
+        } else {
+            mBGZF.Write(al.TagData.data(), tagDataLength);
+        }      
     }
-    mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
-
-    // write the raw char data
-    mBGZF.Write((char*)supportData.AllCharData.data(), supportData.BlockLength-BAM_CORE_SIZE);
 }
-
diff --git a/BamWriter.h b/BamWriter.h
index 31c7d61..2608ddb 100644
--- a/BamWriter.h
+++ b/BamWriter.h
@@ -37,8 +37,6 @@ class BamWriter {
         void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);
         // saves the alignment to the alignment archive
         void SaveAlignment(const BamTools::BamAlignment& al);
-        // saves the (partial) alignment, using support data, to the alignment archive
-        void SaveAlignment(const BamTools::BamAlignment& al, const BamTools::BamAlignmentSupportData& supportData);
 
     // private implementation
     private:
diff --git a/bamtools_merge.cpp b/bamtools_merge.cpp
index cae6134..bfb5de7 100644
--- a/bamtools_merge.cpp
+++ b/bamtools_merge.cpp
@@ -88,8 +88,11 @@ int MergeTool::Run(int argc, char* argv[]) {
     if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn());
     
     // opens the BAM files without checking for indexes
-    BamMultiReader reader;
-    reader.Open(m_settings->InputFiles, false); 
+//     BamMultiReader reader;
+//     reader.Open(m_settings->InputFiles, false); 
+
+    BamReader reader;
+    reader.Open(m_settings->InputFiles.at(0));
 
     // retrieve header & reference dictionary info
     std::string mergedHeader = reader.GetHeaderText();
@@ -100,6 +103,11 @@ int MergeTool::Run(int argc, char* argv[]) {
     writer.Open(m_settings->OutputFilename, mergedHeader, references);
 
     // store alignments to output file
+//     BamAlignment bAlignment;
+//     while (reader.GetNextAlignment(bAlignment)) {
+//         writer.SaveAlignment(bAlignment);
+//     }
+    
     BamAlignment bAlignment;
     while (reader.GetNextAlignment(bAlignment)) {
         writer.SaveAlignment(bAlignment);
-- 
2.39.5