Regression fixed: wasn't properly merging from multiple BAMs

[bamtools.git] / src / api / BamAlignment.cpp
diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp

index c3963386707f3c9b06701c2062075fe8b09fe2f5..013937a99e7bd96fb44e1249d7c7fe4d2a1966ff 100644 (file)
--- a/src/api/BamAlignment.cpp
+++ b/src/api/BamAlignment.cpp
@@ -1,26 +1,75 @@
  // ***************************************************************************
  // BamAlignment.cpp (c) 2009 Derek Barnett
  // Marth Lab, Department of Biology, Boston College
-// All rights reserved.
  // ---------------------------------------------------------------------------
-// Last modified: 19 September 2010 (DB)
+// Last modified: 13 October 2011 (DB)
  // ---------------------------------------------------------------------------
  // Provides the BamAlignment data structure
  // ***************************************************************************
  
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <exception>
-#include <map>
-#include <utility>
-#include "BamAlignment.h"
+#include "api/BamAlignment.h"
+#include "api/BamConstants.h"
  using namespace BamTools;
  using namespace std;
  
-// default ctor
-BamAlignment::BamAlignment(void) 
+/*! \class BamTools::BamAlignment
+    \brief The main BAM alignment data structure.
+
+    Provides methods to query/modify BAM alignment data fields.
+*/
+/*! \var BamAlignment::Name
+    \brief read name
+*/
+/*! \var BamAlignment::Length
+    \brief length of query sequence
+*/
+/*! \var BamAlignment::QueryBases
+    \brief 'original' sequence (as reported from sequencing machine)
+*/
+/*! \var BamAlignment::AlignedBases
+    \brief 'aligned' sequence (includes any indels, padding, clipping)
+*/
+/*! \var BamAlignment::Qualities
+    \brief FASTQ qualities (ASCII characters, not numeric values)
+*/
+/*! \var BamAlignment::TagData
+    \brief tag data (use the provided methods to query/modify)
+*/
+/*! \var BamAlignment::RefID
+    \brief ID number for reference sequence
+*/
+/*! \var BamAlignment::Position
+    \brief position (0-based) where alignment starts
+*/
+/*! \var BamAlignment::Bin
+    \brief BAM (standard) index bin number for this alignment
+*/
+/*! \var BamAlignment::MapQuality
+    \brief mapping quality score
+*/
+/*! \var BamAlignment::AlignmentFlag
+    \brief alignment bit-flag (use the provided methods to query/modify)
+*/
+/*! \var BamAlignment::CigarData
+    \brief CIGAR operations for this alignment
+*/
+/*! \var BamAlignment::MateRefID
+    \brief ID number for reference sequence where alignment's mate was aligned
+*/
+/*! \var BamAlignment::MatePosition
+    \brief position (0-based) where alignment's mate starts
+*/
+/*! \var BamAlignment::InsertSize
+    \brief mate-pair insert size
+*/
+/*! \var BamAlignment::Filename
+    \brief name of BAM file which this alignment comes from
+*/
+
+/*! \fn BamAlignment::BamAlignment(void)
+    \brief constructor
+*/
+BamAlignment::BamAlignment(void)
      : RefID(-1)
      , Position(-1)
      , MateRefID(-1)
@@ -28,7 +77,9 @@ BamAlignment::BamAlignment(void)
      , InsertSize(0)
  { }
  
-// copy ctor
+/*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
+    \brief copy constructor
+*/
  BamAlignment::BamAlignment(const BamAlignment& other)
      : Name(other.Name)
      , Length(other.Length)
@@ -45,539 +96,625 @@ BamAlignment::BamAlignment(const BamAlignment& other)
      , MateRefID(other.MateRefID)
      , MatePosition(other.MatePosition)
      , InsertSize(other.InsertSize)
+    , Filename(other.Filename)
      , SupportData(other.SupportData)
  { }
  
-// dtor
+/*! \fn BamAlignment::~BamAlignment(void)
+    \brief destructor
+*/
  BamAlignment::~BamAlignment(void) { }
  
-// Queries against alignment flags
-bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }
-bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }
-bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }
-bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }
-bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
-bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
-bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }
-bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }
-bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }
-bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }
-bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }
-
-// Manipulate alignment flags 
-void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }
-void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }
-void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }
-void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
-void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }
-void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }
-void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }
-void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }
-void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }
-void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }
-void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }
-
-// calculates alignment end position, based on starting position and CIGAR operations
-int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
+/*! \fn bool BamAlignment::BuildCharData(void)
+    \brief Populates alignment string fields (read name, bases, qualities, tag data).
  
-    // initialize alignment end to starting position
-    int alignEnd = Position;
-
-    // iterate over cigar operations
-    vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
-    vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
-    for ( ; cigarIter != cigarEnd; ++cigarIter) {
-        const char cigarType = (*cigarIter).Type;
-        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
-            alignEnd += (*cigarIter).Length;
-        else if ( usePadded && cigarType == 'I' )
-            alignEnd += (*cigarIter).Length;
-    }
-    
-    // adjust for zeroBased, if necessary
-    if (zeroBased) 
-        return alignEnd - 1;
-    else 
-        return alignEnd;
-}
+    An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
+    Using that method makes parsing much quicker when only positional data is required.
  
-bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type != "Z" && type != "H" ) return false;
-  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag already exists, return false
-    // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
-  
-    // otherwise, copy tag data to temp buffer
-    string newTag = tag + type + value;
-    const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
-    char originalTagData[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
-    
-    // append newTag
-    strcat(originalTagData + tagDataLength, newTag.data());  // removes original null-term, appends newTag + null-term
-    
-    // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
-    TagData.assign(newTagData, newTagDataLength);
-    
-    // return success
-    return true;
-}
+    However, if you later want to access the character data fields from such an alignment,
+    use this method to populate those fields. Provides ability to do 'lazy evaluation' of
+    alignment parsing.
  
-bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type == "f" || type == "Z" || type == "H" ) return false;
-  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag already exists, return false
-    // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
-  
-    // otherwise, convert value to string
-    union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
-    un.value = value;
-
-    // copy original tag data to temp buffer
-    string newTag = tag + type;
-    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
-    char originalTagData[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
-    
-    // append newTag
-    strcat(originalTagData + tagDataLength, newTag.data());
-    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
-    
-    // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
-    TagData.assign(newTagData, newTagDataLength);
-    
-    // return success
-    return true;
-}
+    \return \c true if character data populated successfully (or was already available to begin with)
+*/
+bool BamAlignment::BuildCharData(void) {
  
-bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
-    return AddTag(tag, type, (const uint32_t&)value);
-}
+    // skip if char data already parsed
+    if ( !SupportData.HasCoreOnly )
+        return true;
  
-bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type == "Z" || type == "H" ) return false;
-  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag already exists, return false
-    // use EditTag explicitly instead
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
-  
-    // otherwise, convert value to string
-    union { float value; char valueBuffer[sizeof(float)]; } un;
-    un.value = value;
-
-    // copy original tag data to temp buffer
-    string newTag = tag + type;
-    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
-    char originalTagData[newTagDataLength];
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
-    
-    // append newTag
-    strcat(originalTagData + tagDataLength, newTag.data());
-    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
-    
-    // store temp buffer back in TagData
-    const char* newTagData = (const char*)originalTagData;
-    TagData.assign(newTagData, newTagDataLength);
-    
-    // return success
-    return true;
-}
+    // check system endianness
+    bool IsBigEndian = BamTools::SystemIsBigEndian();
+
+    // calculate character lengths/offsets
+    const unsigned int dataLength     = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+    const unsigned int seqDataOffset  = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
+    const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
+    const unsigned int tagDataOffset  = qualDataOffset + SupportData.QuerySequenceLength;
+    const unsigned int tagDataLength  = dataLength - tagDataOffset;
+
+    // check offsets to see what char data exists
+    const bool hasSeqData  = ( seqDataOffset  < dataLength );
+    const bool hasQualData = ( qualDataOffset < dataLength );
+    const bool hasTagData  = ( tagDataOffset  < dataLength );
+
+    // set up char buffers
+    const char* allCharData = SupportData.AllCharData.data();
+    const char* seqData     = ( hasSeqData  ? (((const char*)allCharData) + seqDataOffset)  : (const char*)0 );
+    const char* qualData    = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
+          char* tagData     = ( hasTagData  ? (((char*)allCharData) + tagDataOffset)        : (char*)0 );
+
+    // store alignment name (relies on null char in name as terminator)
+    Name.assign((const char*)(allCharData));
+
+    // save query sequence
+    QueryBases.clear();
+    if ( hasSeqData ) {
+        QueryBases.reserve(SupportData.QuerySequenceLength);
+        for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+            const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+            QueryBases.append(1, singleBase);
+        }
+    }
  
-bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type != "Z" && type != "H" ) return false;
-  
-    // localize the tag data
-    char* pOriginalTagData = (char*)TagData.data();
-    char* pTagData = pOriginalTagData;
-    const unsigned int originalTagDataLength = TagData.size();
-    
-    unsigned int newTagDataLength = 0;
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
-        
-        // make sure array is more than big enough
-        char newTagData[originalTagDataLength + value.size()];  
-
-        // copy original tag data up til desired tag
-        const unsigned int beginningTagDataLength = numBytesParsed;
-        newTagDataLength += beginningTagDataLength;
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);
-      
-        // copy new VALUE in place of current tag data
-        const unsigned int dataLength = strlen(value.c_str());
-        memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
-        
-        // skip to next tag (if tag for removal is last, return true) 
-        const char* pTagStorageType = pTagData - 1;
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
-         
-        // copy everything from current tag (the next one after tag for removal) to end
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
-        const unsigned int endTagOffset      = beginningTagDataLength + dataLength + 1;
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
-        
-        // ensure null-terminator
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
-        
-        // save new tag data
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);
-        return true;
+    // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
+    Qualities.clear();
+    if ( hasQualData ) {
+        Qualities.reserve(SupportData.QuerySequenceLength);
+        for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+            const char singleQuality = static_cast<const char>(qualData[i]+33);
+            Qualities.append(1, singleQuality);
+        }
      }
-    
-    // tag not found, attempt AddTag
-    else return AddTag(tag, type, value);
-}
  
-bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type == "f" || type == "Z" || type == "H" ) return false;
-    
-     // localize the tag data
-    char* pOriginalTagData = (char*)TagData.data();
-    char* pTagData = pOriginalTagData;
-    const unsigned int originalTagDataLength = TagData.size();
-    
-    unsigned int newTagDataLength = 0;
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
-        
-        // make sure array is more than big enough
-        char newTagData[originalTagDataLength + sizeof(value)];  
-
-        // copy original tag data up til desired tag
-        const unsigned int beginningTagDataLength = numBytesParsed;
-        newTagDataLength += beginningTagDataLength;
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);
-      
-        // copy new VALUE in place of current tag data
-        union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
-        un.value = value;
-        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
-        
-        // skip to next tag (if tag for removal is last, return true) 
-        const char* pTagStorageType = pTagData - 1;
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
-         
-        // copy everything from current tag (the next one after tag for removal) to end
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
-        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(unsigned int);
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
-        
-        // ensure null-terminator
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
-        
-        // save new tag data
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);
-        return true;
+    // clear previous AlignedBases
+    AlignedBases.clear();
+
+    // if QueryBases has data, build AlignedBases using CIGAR data
+    // otherwise, AlignedBases will remain empty (this case IS allowed)
+    if ( !QueryBases.empty() ) {
+
+        // resize AlignedBases
+        AlignedBases.reserve(SupportData.QuerySequenceLength);
+
+        // iterate over CigarOps
+        int k = 0;
+        vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+        vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+        for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+            const CigarOp& op = (*cigarIter);
+
+            switch ( op.Type ) {
+
+                // for 'M', 'I', '=', 'X' - write bases
+                case (Constants::BAM_CIGAR_MATCH_CHAR)    :
+                case (Constants::BAM_CIGAR_INS_CHAR)      :
+                case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
+                case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
+                    AlignedBases.append(QueryBases.substr(k, op.Length));
+                    // fall through
+
+                // for 'S' - soft clip, do not write bases
+                // but increment placeholder 'k'
+                case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
+                    k += op.Length;
+                    break;
+
+                // for 'D' - write gap character
+                case (Constants::BAM_CIGAR_DEL_CHAR) :
+                    AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
+                    break;
+
+                // for 'P' - write padding character
+                case (Constants::BAM_CIGAR_PAD_CHAR) :
+                    AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
+                    break;
+
+                // for 'N' - write N's, skip bases in original query sequence
+                case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
+                    AlignedBases.append( op.Length, Constants::BAM_DNA_N );
+                    break;
+
+                // for 'H' - hard clip, do nothing to AlignedBases, move to next op
+                case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
+                    break;
+
+                // invalid CIGAR op-code
+                default:
+                    const string message = string("invalid CIGAR operation type: ") + op.Type;
+                    SetErrorString("BamAlignment::BuildCharData", message);
+                    return false;
+            }
+        }
      }
-    
-    // tag not found, attempt AddTag
-    else return AddTag(tag, type, value);
-}
  
-bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
-    return EditTag(tag, type, (const uint32_t&)value);
-}
+    // save tag data
+    TagData.clear();
+    if ( hasTagData ) {
+        if ( IsBigEndian ) {
+            size_t i = 0;
+            while ( i < tagDataLength ) {
+
+                i += Constants::BAM_TAG_TAGSIZE;  // skip tag chars (e.g. "RG", "NM", etc.)
+                const char type = tagData[i];     // get tag type at position i
+                ++i;                              // move i past tag type
+
+                switch (type) {
+
+                    case(Constants::BAM_TAG_TYPE_ASCII) :
+                    case(Constants::BAM_TAG_TYPE_INT8)  :
+                    case(Constants::BAM_TAG_TYPE_UINT8) :
+                        // no endian swapping necessary for single-byte data
+                        ++i;
+                        break;
+
+                    case(Constants::BAM_TAG_TYPE_INT16)  :
+                    case(Constants::BAM_TAG_TYPE_UINT16) :
+                        BamTools::SwapEndian_16p(&tagData[i]);
+                        i += sizeof(uint16_t);
+                        break;
+
+                    case(Constants::BAM_TAG_TYPE_FLOAT)  :
+                    case(Constants::BAM_TAG_TYPE_INT32)  :
+                    case(Constants::BAM_TAG_TYPE_UINT32) :
+                        BamTools::SwapEndian_32p(&tagData[i]);
+                        i += sizeof(uint32_t);
+                        break;
+
+                    case(Constants::BAM_TAG_TYPE_HEX) :
+                    case(Constants::BAM_TAG_TYPE_STRING) :
+                        // no endian swapping necessary for hex-string/string data
+                        while ( tagData[i] )
+                            ++i;
+                        // increment one more for null terminator
+                        ++i;
+                        break;
+
+                    case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+                    {
+                        // read array type
+                        const char arrayType = tagData[i];
+                        ++i;
+
+                        // swap endian-ness of number of elements in place, then retrieve for loop
+                        BamTools::SwapEndian_32p(&tagData[i]);
+                        uint32_t numElements;
+                        memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+                        i += sizeof(uint32_t);
+
+                        // swap endian-ness of array elements
+                        for ( size_t j = 0; j < numElements; ++j ) {
+                            switch (arrayType) {
+                                case (Constants::BAM_TAG_TYPE_INT8)  :
+                                case (Constants::BAM_TAG_TYPE_UINT8) :
+                                    // no endian-swapping necessary
+                                    ++i;
+                                    break;
+                                case (Constants::BAM_TAG_TYPE_INT16)  :
+                                case (Constants::BAM_TAG_TYPE_UINT16) :
+                                    BamTools::SwapEndian_16p(&tagData[i]);
+                                    i += sizeof(uint16_t);
+                                    break;
+                                case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                                case (Constants::BAM_TAG_TYPE_INT32)  :
+                                case (Constants::BAM_TAG_TYPE_UINT32) :
+                                    BamTools::SwapEndian_32p(&tagData[i]);
+                                    i += sizeof(uint32_t);
+                                    break;
+                                default:
+                                    const string message = string("invalid binary array type: ") + arrayType;
+                                    SetErrorString("BamAlignment::BuildCharData", message);
+                                    return false;
+                            }
+                        }
+
+                        break;
+                    }
+
+                    // invalid tag type-code
+                    default :
+                        const string message = string("invalid tag type: ") + type;
+                        SetErrorString("BamAlignment::BuildCharData", message);
+                        return false;
+                }
+            }
+        }
  
-bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
-  
-    if ( SupportData.HasCoreOnly ) return false;
-    if ( tag.size() != 2 || type.size() != 1 ) return false;
-    if ( type == "Z" || type == "H" ) return false;
-    
-     // localize the tag data
-    char* pOriginalTagData = (char*)TagData.data();
-    char* pTagData = pOriginalTagData;
-    const unsigned int originalTagDataLength = TagData.size();
-    
-    unsigned int newTagDataLength = 0;
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
-        
-        // make sure array is more than big enough
-        char newTagData[originalTagDataLength + sizeof(value)];  
-
-        // copy original tag data up til desired tag
-        const unsigned int beginningTagDataLength = numBytesParsed;
-        newTagDataLength += beginningTagDataLength;
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);
-      
-        // copy new VALUE in place of current tag data
-        union { float value; char valueBuffer[sizeof(float)]; } un;
-        un.value = value;
-        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
-        
-        // skip to next tag (if tag for removal is last, return true) 
-        const char* pTagStorageType = pTagData - 1;
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
-         
-        // copy everything from current tag (the next one after tag for removal) to end
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
-        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(float);
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
-        
-        // ensure null-terminator
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
-        
-        // save new tag data
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);
-        return true;
+        // store tagData in alignment
+        TagData.resize(tagDataLength);
+        memcpy((char*)(TagData.data()), tagData, tagDataLength);
      }
-    
-    // tag not found, attempt AddTag
-    else return AddTag(tag, type, value);
-}
  
-// get "NM" tag data - originally contributed by Aaron Quinlan
-// stores data in 'editDistance', returns success/fail
-bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { 
-    return GetTag("NM", (uint32_t&)editDistance);
+    // clear core-only flag & return success
+    SupportData.HasCoreOnly = false;
+    return true;
  }
  
-// get "RG" tag data
-// stores data in 'readGroup', returns success/fail
-bool BamAlignment::GetReadGroup(string& readGroup) const {
-    return GetTag("RG", readGroup);
-}
+/*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) const
+    \internal
  
-bool BamAlignment::GetTag(const string& tag, string& destination) const {
+    Searches for requested tag in BAM tag data.
  
-    // make sure tag data exists
-    if ( SupportData.HasCoreOnly || TagData.empty() ) 
-        return false;
+    \param[in]     tag            requested 2-character tag name
+    \param[in,out] pTagData       pointer to current position in BamAlignment::TagData
+    \param[in]     tagDataLength  length of BamAlignment::TagData
+    \param[in,out] numBytesParsed number of bytes parsed so far
  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
-        const unsigned int dataLength = strlen(pTagData);
-        destination.clear();
-        destination.resize(dataLength);
-        memcpy( (char*)destination.data(), pTagData, dataLength );
-        return true;
+    \return \c true if found
+
+    \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
+          \a numBytesParsed will correspond to the position in the full TagData string.
+
+*/
+bool BamAlignment::FindTag(const std::string& tag,
+                           char*& pTagData,
+                           const unsigned int& tagDataLength,
+                           unsigned int& numBytesParsed) const
+{
+
+    while ( numBytesParsed < tagDataLength ) {
+
+        const char* pTagType        = pTagData;
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+
+        // check the current tag, return true on match
+        if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
+            return true;
+
+        // get the storage class and find the next tag
+        if ( *pTagStorageType == '\0' ) return false;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
+        if ( *pTagData == '\0' ) return false;
      }
-    
-    // tag not found, return failure
+
+    // checked all tags, none match
      return false;
  }
  
-bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
-  
-    // make sure tag data exists
-    if ( SupportData.HasCoreOnly || TagData.empty() ) 
-        return false;
+/*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool closedInterval = false) const
+    \brief Calculates alignment end position, based on its starting position and CIGAR data.
  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, determine data byte-length, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
-        
-        // determine data byte-length
-        const char type = *(pTagData - 1);
-        int destinationLength = 0;
-        switch (type) {
-            // 1 byte data
-            case 'A':
-            case 'c':
-            case 'C':
-                destinationLength = 1;
-                break;
+    \warning The position returned now represents a zero-based, HALF-OPEN interval.
+    In previous versions of BamTools (0.x & 1.x) all intervals were treated
+    as zero-based, CLOSED.
  
-            // 2 byte data
-            case 's':
-            case 'S':
-                destinationLength = 2;
+    \param[in] usePadded      Allow inserted bases to affect the reported position. Default is
+                              false, so that reported position stays synced with reference
+                              coordinates.
+    \param[in] closedInterval Setting this to true will return a 0-based end coordinate. Default is
+                              false, so that his value represents a standard, half-open interval.
+
+    \return alignment end position
+*/
+int BamAlignment::GetEndPosition(bool usePadded, bool closedInterval) const {
+
+    // initialize alignment end to starting position
+    int alignEnd = Position;
+
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+    vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+        const CigarOp& op = (*cigarIter);
+
+        switch ( op.Type ) {
+
+            // increase end position on CIGAR chars [DMXN=]
+            case Constants::BAM_CIGAR_DEL_CHAR      :
+            case Constants::BAM_CIGAR_MATCH_CHAR    :
+            case Constants::BAM_CIGAR_MISMATCH_CHAR :
+            case Constants::BAM_CIGAR_REFSKIP_CHAR  :
+            case Constants::BAM_CIGAR_SEQMATCH_CHAR :
+                alignEnd += op.Length;
                  break;
  
-            // 4 byte data
-            case 'i':
-            case 'I':
-                destinationLength = 4;
+            // increase end position on insertion, only if @usePadded is true
+            case Constants::BAM_CIGAR_INS_CHAR :
+                if ( usePadded )
+                    alignEnd += op.Length;
                  break;
  
-            // unsupported type for integer destination (float or var-length strings)
-            case 'f':
-            case 'Z':
-            case 'H':
-                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
-                return false;
-
-            // unknown tag type
-            default:
-                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
-                return false;
+            // all other CIGAR chars do not affect end position
+            default :
+                break;
          }
-          
-        // store in destination
-        destination = 0;
-        memcpy(&destination, pTagData, destinationLength);
-        return true;
      }
-    
-    // tag not found, return failure
-    return false;
+
+    // adjust for closedInterval, if requested
+    if ( closedInterval )
+        alignEnd -= 1;
+
+    // return result
+    return alignEnd;
  }
  
-bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
-    return GetTag(tag, (uint32_t&)destination);
+/*! \fn std::string BamAlignment::GetErrorString(void) const
+    \brief Returns a human-readable description of the last error that occurred
+
+    This method allows elimination of STDERR pollution. Developers of client code
+    may choose how the messages are displayed to the user, if at all.
+
+    \return error description
+*/
+std::string BamAlignment::GetErrorString(void) const {
+    return ErrorString;
  }
  
-bool BamAlignment::GetTag(const string& tag, float& destination) const {
-  
-    // make sure tag data exists
-    if ( SupportData.HasCoreOnly || TagData.empty() ) 
-        return false;
+/*! \fn bool BamAlignment::GetSoftClips(std::vector<int>& clipSizes, std::vector<int>& readPositions, std::vector<int>& genomePositions, bool usePadded = false) const
+    \brief Identifies if an alignment has a soft clip. If so, identifies the
+           sizes of the soft clips, as well as their positions in the read and reference.
+
+    \param[out] clipSizes       vector of the sizes of each soft clip in the alignment
+    \param[out] readPositions   vector of the 0-based read locations of each soft clip in the alignment.
+                                These positions are basically indexes within the read, not genomic positions.
+    \param[out] genomePositions vector of the 0-based genome locations of each soft clip in the alignment
+    \param[in]  usePadded       inserted bases affect reported position. Default is false, so that
+                                reported position stays 'sync-ed' with reference coordinates.
+
+    \return \c true if any soft clips were found in the alignment
+*/
+bool BamAlignment::GetSoftClips(vector<int>& clipSizes,
+                                vector<int>& readPositions,
+                                vector<int>& genomePositions,
+                                bool usePadded) const
+{
+    // initialize positions & flags
+    int refPosition  = Position;
+    int readPosition = 0;
+    bool softClipFound = false;
+    bool firstCigarOp  = true;
  
-    // localize the tag data
-    char* pTagData = (char*)TagData.data();
-    const unsigned int tagDataLength = TagData.size();
-    unsigned int numBytesParsed = 0;
-    
-    // if tag found, determine data byte-length, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
-        
-        // determine data byte-length
-        const char type = *(pTagData - 1);
-        int destinationLength = 0;
-        switch(type) {
-
-            // 1 byte data
-            case 'A':
-            case 'c':
-            case 'C':
-                destinationLength = 1;
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+    vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+        const CigarOp& op = (*cigarIter);
+
+        switch ( op.Type ) {
+
+            // increase both read & genome positions on CIGAR chars [DMXN=]
+            case Constants::BAM_CIGAR_DEL_CHAR      :
+            case Constants::BAM_CIGAR_MATCH_CHAR    :
+            case Constants::BAM_CIGAR_MISMATCH_CHAR :
+            case Constants::BAM_CIGAR_REFSKIP_CHAR  :
+            case Constants::BAM_CIGAR_SEQMATCH_CHAR :
+                refPosition  += op.Length;
+                readPosition += op.Length;
                  break;
  
-            // 2 byte data
-            case 's':
-            case 'S':
-                destinationLength = 2;
+            // increase read position on insertion, genome position only if @usePadded is true
+            case Constants::BAM_CIGAR_INS_CHAR :
+                readPosition += op.Length;
+                if ( usePadded )
+                    refPosition += op.Length;
                  break;
  
-            // 4 byte data
-            case 'f':
-            case 'i':
-            case 'I':
-                destinationLength = 4;
+            case Constants::BAM_CIGAR_SOFTCLIP_CHAR :
+
+                softClipFound = true;
+
+                //////////////////////////////////////////////////////////////////////////////
+                // if we are dealing with the *first* CIGAR operation
+                // for this alignment, we increment the read position so that
+                // the read and genome position of the clip are referring to the same base.
+                // For example, in the alignment below, the ref position would be 4, yet
+                //              the read position would be 0. Thus, to "sync" the two,
+                //              we need to increment the read position by the length of the
+                //              soft clip.
+                // Read:  ATCGTTTCGTCCCTGC
+                // Ref:   GGGATTTCGTCCCTGC
+                // Cigar: SSSSMMMMMMMMMMMM
+                //
+                // NOTE: This only needs to be done if the soft clip is the _first_ CIGAR op.
+                //////////////////////////////////////////////////////////////////////////////
+                if ( firstCigarOp )
+                    readPosition += op.Length;
+
+                // track the soft clip's size, read position, and genome position
+                clipSizes.push_back(op.Length);
+                readPositions.push_back(readPosition);
+                genomePositions.push_back(refPosition);
+
+            // any other CIGAR operations have no effect
+            default :
                  break;
-            
-            // unsupported type (var-length strings)
-            case 'Z':
-            case 'H':
-                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
-                return false;
-
-            // unknown tag type
-            default:
-                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
-                return false;
          }
-          
-        // store in destination
-        destination = 0.0;
-        memcpy(&destination, pTagData, destinationLength);
-        return true;
+
+        // clear our "first pass" flag
+        firstCigarOp = false;
      }
-    
-    // tag not found, return failure
-    return false;
+
+    // return whether any soft clips found
+    return softClipFound;
  }
  
-bool BamAlignment::GetTagType(const string& tag, char& type) const {
+/*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
+    \brief Retrieves the BAM tag type-code associated with requested tag name.
+
+    \param[in]  tag  2-character tag name
+    \param[out] type retrieved (1-character) type-code
+
+    \return \c true if found
+    \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
    
-    // make sure tag data exists
-    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+    // skip if alignment is core-only
+    if ( SupportData.HasCoreOnly ) {
+        // TODO: set error string?
          return false;
+    }
+
+    // skip if no tags present
+    if ( TagData.empty() ) {
+        // TODO: set error string?
+        return false;
+    }
  
      // localize the tag data
      char* pTagData = (char*)TagData.data();
      const unsigned int tagDataLength = TagData.size();
      unsigned int numBytesParsed = 0;
      
-    // lookup tag
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
-        
-        // retrieve tag type code
-        type = *(pTagData - 1);
-        
-        // validate that type is a proper BAM tag type
-        switch(type) {
-            case 'A':
-            case 'c':
-            case 'C':
-            case 's':
-            case 'S':
-            case 'f':
-            case 'i':
-            case 'I':
-            case 'Z':
-            case 'H':
-                return true;
-
-            // unknown tag type
-            default:
-                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
-                return false;
-        }
+    // if tag not found, return failure
+    if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
+        // TODO: set error string?
+        return false;
      }
-    
-    // tag not found, return failure
-    return false;
+
+    // otherwise, retrieve & validate tag type code
+    type = *(pTagData - 1);
+    switch ( type ) {
+        case (Constants::BAM_TAG_TYPE_ASCII)  :
+        case (Constants::BAM_TAG_TYPE_INT8)   :
+        case (Constants::BAM_TAG_TYPE_UINT8)  :
+        case (Constants::BAM_TAG_TYPE_INT16)  :
+        case (Constants::BAM_TAG_TYPE_UINT16) :
+        case (Constants::BAM_TAG_TYPE_INT32)  :
+        case (Constants::BAM_TAG_TYPE_UINT32) :
+        case (Constants::BAM_TAG_TYPE_FLOAT)  :
+        case (Constants::BAM_TAG_TYPE_STRING) :
+        case (Constants::BAM_TAG_TYPE_HEX)    :
+        case (Constants::BAM_TAG_TYPE_ARRAY)  :
+            return true;
+
+        // unknown tag type
+        default:
+            const string message = string("invalid tag type: ") + type;
+            SetErrorString("BamAlignment::GetTagType", message);
+            return false;
+    }
+}
+
+/*! \fn bool BamAlignment::HasTag(const std::string& tag) const
+    \brief Returns true if alignment has a record for requested tag.
+
+    \param[in] tag 2-character tag name
+    \return \c true if alignment has a record for tag
+*/
+bool BamAlignment::HasTag(const std::string& tag) const {
+
+    // return false if no tag data present
+    if ( SupportData.HasCoreOnly || TagData.empty() )
+        return false;
+
+    // localize the tag data for lookup
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+
+    // if result of tag lookup
+    return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
+}
+
+/*! \fn bool BamAlignment::IsDuplicate(void) const
+    \return \c true if this read is a PCR duplicate
+*/
+bool BamAlignment::IsDuplicate(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsFailedQC(void) const
+    \return \c true if this read failed quality control
+*/
+bool BamAlignment::IsFailedQC(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsFirstMate(void) const
+    \return \c true if alignment is first mate on paired-end read
+*/
+bool BamAlignment::IsFirstMate(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsMapped(void) const
+    \return \c true if alignment is mapped
+*/
+bool BamAlignment::IsMapped(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
+}
+
+/*! \fn bool BamAlignment::IsMateMapped(void) const
+    \return \c true if alignment's mate is mapped
+*/
+bool BamAlignment::IsMateMapped(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
+}
+
+/*! \fn bool BamAlignment::IsMateReverseStrand(void) const
+    \return \c true if alignment's mate mapped to reverse strand
+*/
+bool BamAlignment::IsMateReverseStrand(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
  }
  
-bool BamAlignment::RemoveTag(const string& tag) {
+/*! \fn bool BamAlignment::IsPaired(void) const
+    \return \c true if alignment part of paired-end read
+*/
+bool BamAlignment::IsPaired(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
+    \return \c true if reported position is primary alignment
+*/
+bool BamAlignment::IsPrimaryAlignment(void) const  {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
+}
+
+/*! \fn bool BamAlignment::IsProperPair(void) const
+    \return \c true if alignment is part of read that satisfied paired-end resolution
+*/
+bool BamAlignment::IsProperPair(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsReverseStrand(void) const
+    \return \c true if alignment mapped to reverse strand
+*/
+bool BamAlignment::IsReverseStrand(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsSecondMate(void) const
+    \return \c true if alignment is second mate on read
+*/
+bool BamAlignment::IsSecondMate(void) const {
+    return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
+}
+
+/*! \fn bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const
+    \internal
+
+    Checks that tag name & type strings are expected sizes.
+
+    \param tag[in]  BAM tag name
+    \param type[in] BAM tag type-code
+    \return \c true if both input strings are valid sizes
+*/
+bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
+    return (tag.size()  == Constants::BAM_TAG_TAGSIZE) &&
+           (type.size() == Constants::BAM_TAG_TYPESIZE);
+}
+
+/*! \fn void BamAlignment::RemoveTag(const std::string& tag)
+    \brief Removes field from BAM tags.
+
+    \param[in] tag 2-character name of field to remove
+*/
+void BamAlignment::RemoveTag(const std::string& tag) {
    
-    // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
-    // also, return false if no data present to remove
-    if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
+    // if char data not populated, do that first
+    if ( SupportData.HasCoreOnly )
+        BuildCharData();
+
+    // skip if no tags available
+    if ( TagData.empty() )
+        return;
    
      // localize the tag data
      char* pOriginalTagData = (char*)TagData.data();
@@ -585,89 +722,181 @@ bool BamAlignment::RemoveTag(const string& tag) {
      const unsigned int originalTagDataLength = TagData.size();
      unsigned int newTagDataLength = 0;
      unsigned int numBytesParsed = 0;
-    
-    // if tag found, store data in readGroup, return success
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
-        
-        char newTagData[originalTagDataLength];
-
-        // copy original tag data up til desired tag
-        pTagData -= 3;
-        numBytesParsed -= 3;
-        const unsigned int beginningTagDataLength = numBytesParsed;
-        newTagDataLength += beginningTagDataLength;
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);
-        
-        // skip to next tag (if tag for removal is last, return true) 
-        const char* pTagStorageType = pTagData + 2;
-        pTagData       += 3;
-        numBytesParsed += 3;
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
-         
-        // copy everything from current tag (the next one after tag for removal) to end
+
+    // skip if tag not found
+    if  ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
+        return;
+
+    // otherwise, remove it
+    RaiiBuffer newTagData(originalTagDataLength);
+
+    // copy original tag data up til desired tag
+    pTagData       -= 3;
+    numBytesParsed -= 3;
+    const unsigned int beginningTagDataLength = numBytesParsed;
+    newTagDataLength += beginningTagDataLength;
+    memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
+
+    // attemp to skip to next tag
+    const char* pTagStorageType = pTagData + 2;
+    pTagData       += 3;
+    numBytesParsed += 3;
+    if ( SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) {
+
+        // squeeze remaining tag data
          const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
          const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
-        memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
-        
-        // save new tag data
-        TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
-        return true;
+        memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
+
+        // save modified tag data in alignment
+        TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
      }
-    
-    // tag not found, no removal - return failure
-    return false;
  }
  
-bool BamAlignment::FindTag(const string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {
+/*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
+    \internal
  
-    while ( numBytesParsed < tagDataLength ) {
+    Sets a formatted error string for this alignment.
  
-        const char* pTagType        = pTagData;
-        const char* pTagStorageType = pTagData + 2;
-        pTagData       += 3;
-        numBytesParsed += 3;
+    \param[in] where class/method where error occurred
+    \param[in] what  description of error
+*/
+void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
+    static const string SEPARATOR = ": ";
+    ErrorString = where + SEPARATOR + what;
+}
  
-        // check the current tag, return true on match
-        if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) 
-            return true;
+/*! \fn void BamAlignment::SetIsDuplicate(bool ok)
+    \brief Sets value of "PCR duplicate" flag to \a ok.
+*/
+void BamAlignment::SetIsDuplicate(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_DUPLICATE;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
+}
  
-        // get the storage class and find the next tag
-        if ( *pTagStorageType == '\0' ) return false; 
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
-        if ( *pTagData == '\0' ) return false;
-    }
-  
-    // checked all tags, none match
-    return false;
+/*! \fn void BamAlignment::SetIsFailedQC(bool ok)
+    \brief Sets "failed quality control" flag to \a ok.
+*/
+void BamAlignment::SetIsFailedQC(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_QC_FAILED;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
  }
  
-bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
-    
-    switch(storageType) {
+/*! \fn void BamAlignment::SetIsFirstMate(bool ok)
+    \brief Sets "alignment is first mate" flag to \a ok.
+*/
+void BamAlignment::SetIsFirstMate(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_READ_1;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
+}
+
+/*! \fn void BamAlignment::SetIsMapped(bool ok)
+    \brief Sets "alignment is mapped" flag to \a ok.
+*/
+void BamAlignment::SetIsMapped(bool ok) {
+    if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
+    else    AlignmentFlag |=  Constants::BAM_ALIGNMENT_UNMAPPED;
+}
+
+/*! \fn void BamAlignment::SetIsMateMapped(bool ok)
+    \brief Sets "alignment's mate is mapped" flag to \a ok.
+*/
+void BamAlignment::SetIsMateMapped(bool ok) {
+    if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
+    else    AlignmentFlag |=  Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
+}
+
+/*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
+    \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
+*/
+void BamAlignment::SetIsMateReverseStrand(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
+}
+
+/*! \fn void BamAlignment::SetIsPaired(bool ok)
+    \brief Sets "alignment part of paired-end read" flag to \a ok.
+*/
+void BamAlignment::SetIsPaired(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_PAIRED;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
+}
+
+/*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
+    \brief Sets "position is primary alignment" flag to \a ok.
+*/
+void BamAlignment::SetIsPrimaryAlignment(bool ok) {
+    if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
+    else    AlignmentFlag |=  Constants::BAM_ALIGNMENT_SECONDARY;
+}
+
+/*! \fn void BamAlignment::SetIsProperPair(bool ok)
+    \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
+*/
+void BamAlignment::SetIsProperPair(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_PROPER_PAIR;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
+}
+
+/*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
+    \brief Sets "alignment mapped to reverse strand" flag to \a ok.
+*/
+void BamAlignment::SetIsReverseStrand(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_REVERSE_STRAND;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
+}
+
+/*! \fn void BamAlignment::SetIsSecondMate(bool ok)
+    \brief Sets "alignment is second mate on read" flag to \a ok.
+*/
+void BamAlignment::SetIsSecondMate(bool ok) {
+    if (ok) AlignmentFlag |=  Constants::BAM_ALIGNMENT_READ_2;
+    else    AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
+}
  
-        case 'A':
-        case 'c':
-        case 'C':
+/*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed) const
+    \internal
+
+    Moves to next available tag in tag data string
+
+    \param[in]     storageType    BAM tag type-code that determines how far to move cursor
+    \param[in,out] pTagData       pointer to current position (cursor) in tag string
+    \param[in,out] numBytesParsed report of how many bytes were parsed (cumulatively)
+
+    \return \c if storageType was a recognized BAM tag type
+
+    \post \a pTagData       will point to the byte where the next tag data begins.
+          \a numBytesParsed will correspond to the cursor's position in the full TagData string.
+*/
+bool BamAlignment::SkipToNextTag(const char storageType,
+                                 char*& pTagData,
+                                 unsigned int& numBytesParsed) const
+{
+    switch (storageType) {
+
+        case (Constants::BAM_TAG_TYPE_ASCII) :
+        case (Constants::BAM_TAG_TYPE_INT8)  :
+        case (Constants::BAM_TAG_TYPE_UINT8) :
              ++numBytesParsed;
              ++pTagData;
              break;
  
-        case 's':
-        case 'S':
-            numBytesParsed += 2;
-            pTagData       += 2;
+        case (Constants::BAM_TAG_TYPE_INT16)  :
+        case (Constants::BAM_TAG_TYPE_UINT16) :
+            numBytesParsed += sizeof(uint16_t);
+            pTagData       += sizeof(uint16_t);
              break;
  
-        case 'f':
-        case 'i':
-        case 'I':
-            numBytesParsed += 4;
-            pTagData       += 4;
+        case (Constants::BAM_TAG_TYPE_FLOAT)  :
+        case (Constants::BAM_TAG_TYPE_INT32)  :
+        case (Constants::BAM_TAG_TYPE_UINT32) :
+            numBytesParsed += sizeof(uint32_t);
+            pTagData       += sizeof(uint32_t);
              break;
  
-        case 'Z':
-        case 'H':
-            while(*pTagData) {
+        case (Constants::BAM_TAG_TYPE_STRING) :
+        case (Constants::BAM_TAG_TYPE_HEX)    :
+            while( *pTagData ) {
                  ++numBytesParsed;
                  ++pTagData;
              }
@@ -676,12 +905,54 @@ bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsign
              ++pTagData;
              break;
  
-        default: 
-            // error case
-            fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);
+        case (Constants::BAM_TAG_TYPE_ARRAY) :
+
+        {
+            // read array type
+            const char arrayType = *pTagData;
+            ++numBytesParsed;
+            ++pTagData;
+
+            // read number of elements
+            int32_t numElements;
+            memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
+            numBytesParsed += sizeof(uint32_t);
+            pTagData       += sizeof(uint32_t);
+
+            // calculate number of bytes to skip
+            int bytesToSkip = 0;
+            switch (arrayType) {
+                case (Constants::BAM_TAG_TYPE_INT8)  :
+                case (Constants::BAM_TAG_TYPE_UINT8) :
+                    bytesToSkip = numElements;
+                    break;
+                case (Constants::BAM_TAG_TYPE_INT16)  :
+                case (Constants::BAM_TAG_TYPE_UINT16) :
+                    bytesToSkip = numElements*sizeof(uint16_t);
+                    break;
+                case (Constants::BAM_TAG_TYPE_FLOAT)  :
+                case (Constants::BAM_TAG_TYPE_INT32)  :
+                case (Constants::BAM_TAG_TYPE_UINT32) :
+                    bytesToSkip = numElements*sizeof(uint32_t);
+                    break;
+                default:
+                    const string message = string("invalid binary array type: ") + arrayType;
+                    SetErrorString("BamAlignment::SkipToNextTag", message);
+                    return false;
+            }
+
+            // skip binary array contents
+            numBytesParsed += bytesToSkip;
+            pTagData       += bytesToSkip;
+            break;
+        }
+
+        default:
+            const string message = string("invalid tag type: ") + storageType;
+            SetErrorString("BamAlignment::SkipToNextTag", message);
              return false;
      }
-    
-    // return success
+
+    // if we get here, tag skipped OK - return success
      return true;
-}
-\ No newline at end of file
+}