Moved BamAlignment data structure out to its own .h/.cpp. BamAux.h was getting over...

author Derek <derekwbarnett@gmail.com>

Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)

committer Derek <derekwbarnett@gmail.com>

Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)
author Derek <derekwbarnett@gmail.com>
Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)
committer Derek <derekwbarnett@gmail.com>
Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)
diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp

new file mode 100644 (file)

index 0000000..3dbc7e9
--- /dev/null
+++ b/src/api/BamAlignment.cpp
@@ -0,0 +1,645 @@
+// ***************************************************************************
+// BamAlignment.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 September 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the BamAlignment data structure
+// ***************************************************************************
+
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include <map>
+#include <utility>
+#include "BamAlignment.h"
+using namespace BamTools;
+
+// default ctor
+BamAlignment::BamAlignment(void) 
+    : RefID(-1)
+    , Position(-1)
+    , MateRefID(-1)
+    , MatePosition(-1)
+    , InsertSize(0)
+{ }
+
+// copy ctor
+BamAlignment::BamAlignment(const BamAlignment& other)
+    : Name(other.Name)
+    , Length(other.Length)
+    , QueryBases(other.QueryBases)
+    , AlignedBases(other.AlignedBases)
+    , Qualities(other.Qualities)
+    , TagData(other.TagData)
+    , RefID(other.RefID)
+    , Position(other.Position)
+    , Bin(other.Bin)
+    , MapQuality(other.MapQuality)
+    , AlignmentFlag(other.AlignmentFlag)
+    , CigarData(other.CigarData)
+    , MateRefID(other.MateRefID)
+    , MatePosition(other.MatePosition)
+    , InsertSize(other.InsertSize)
+    , SupportData(other.SupportData)
+{ }
+
+// dtor
+BamAlignment::~BamAlignment(void) { }
+
+// Queries against alignment flags
+bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }
+bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }
+bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }
+bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }
+bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
+bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
+bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }
+bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }
+bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }
+bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }
+bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }
+
+// Manipulate alignment flags 
+void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }
+void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }
+void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }
+void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
+void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }
+void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }
+void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }
+void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }
+void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }
+void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }
+void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }
+
+// calculates alignment end position, based on starting position and CIGAR operations
+int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
+
+    // initialize alignment end to starting position
+    int alignEnd = Position;
+
+    // iterate over cigar operations
+    std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+    std::vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+        const char cigarType = (*cigarIter).Type;
+        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
+            alignEnd += (*cigarIter).Length;
+        else if ( usePadded && cigarType == 'I' )
+            alignEnd += (*cigarIter).Length;
+    }
+    
+    // adjust for zeroBased, if necessary
+    if (zeroBased) 
+        return alignEnd - 1;
+    else 
+        return alignEnd;
+}
+
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type != "Z" && type != "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, copy tag data to temp buffer
+    std::string newTag = tag + type + value;
+    const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());  // removes original null-term, appends newTag + null-term
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "f" || type == "Z" || type == "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, convert value to string
+    union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
+    un.value = value;
+
+    // copy original tag data to temp buffer
+    std::string newTag = tag + type;
+    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());
+    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
+    return AddTag(tag, type, (const uint32_t&)value);
+}
+
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "Z" || type == "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, convert value to string
+    union { float value; char valueBuffer[sizeof(float)]; } un;
+    un.value = value;
+
+    // copy original tag data to temp buffer
+    std::string newTag = tag + type;
+    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());
+    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type != "Z" && type != "H" ) return false;
+  
+    // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + value.size()];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        const unsigned int dataLength = strlen(value.c_str());
+        memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + dataLength + 1;
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "f" || type == "Z" || type == "H" ) return false;
+    
+     // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + sizeof(value)];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
+        un.value = value;
+        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(unsigned int);
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
+    return EditTag(tag, type, (const uint32_t&)value);
+}
+
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "Z" || type == "H" ) return false;
+    
+     // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + sizeof(value)];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        union { float value; char valueBuffer[sizeof(float)]; } un;
+        un.value = value;
+        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(float);
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+// get "NM" tag data - originally contributed by Aaron Quinlan
+// stores data in 'editDistance', returns success/fail
+bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { 
+    return GetTag("NM", (uint32_t&)editDistance);
+}
+
+// get "RG" tag data
+// stores data in 'readGroup', returns success/fail
+bool BamAlignment::GetReadGroup(std::string& readGroup) const {
+    return GetTag("RG", readGroup);
+}
+
+bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
+
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        const unsigned int dataLength = strlen(pTagData);
+        destination.clear();
+        destination.resize(dataLength);
+        memcpy( (char*)destination.data(), pTagData, dataLength );
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
+  
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, determine data byte-length, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        
+        // determine data byte-length
+        const char type = *(pTagData - 1);
+        int destinationLength = 0;
+        switch (type) {
+            // 1 byte data
+            case 'A':
+            case 'c':
+            case 'C':
+                destinationLength = 1;
+                break;
+
+            // 2 byte data
+            case 's':
+            case 'S':
+                destinationLength = 2;
+                break;
+
+            // 4 byte data
+            case 'i':
+            case 'I':
+                destinationLength = 4;
+                break;
+
+            // unsupported type for integer destination (float or var-length strings)
+            case 'f':
+            case 'Z':
+            case 'H':
+                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
+                return false;
+
+            // unknown tag type
+            default:
+                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
+                return false;
+        }
+          
+        // store in destination
+        destination = 0;
+        memcpy(&destination, pTagData, destinationLength);
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
+    return GetTag(tag, (uint32_t&)destination);
+}
+
+bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
+  
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, determine data byte-length, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        //pTagData += numBytesParsed;
+        
+        // determine data byte-length
+        const char type = *(pTagData - 1);
+        int destinationLength = 0;
+        switch(type) {
+
+            // 1 byte data
+            case 'A':
+            case 'c':
+            case 'C':
+                destinationLength = 1;
+                break;
+
+            // 2 byte data
+            case 's':
+            case 'S':
+                destinationLength = 2;
+                break;
+
+            // 4 byte data
+            case 'f':
+            case 'i':
+            case 'I':
+                destinationLength = 4;
+                break;
+            
+            // unsupported type (var-length strings)
+            case 'Z':
+            case 'H':
+                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
+                return false;
+
+            // unknown tag type
+            default:
+                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
+                return false;
+        }
+          
+        // store in destination
+        destination = 0.0;
+        memcpy(&destination, pTagData, destinationLength);
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::RemoveTag(const std::string& tag) {
+  
+    // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
+    // also, return false if no data present to remove
+    if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
+  
+    // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        char newTagData[originalTagDataLength];
+
+        // copy original tag data up til desired tag
+        pTagData -= 3;
+        numBytesParsed -= 3;
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
+        
+        // save new tag data
+        TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, no removal - return failure
+    return false;
+}
+
+bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {
+
+    while ( numBytesParsed < tagDataLength ) {
+
+        const char* pTagType        = pTagData;
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+
+        // check the current tag, return true on match
+        if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) 
+            return true;
+
+        // get the storage class and find the next tag
+        if ( *pTagStorageType == '\0' ) return false; 
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
+        if ( *pTagData == '\0' ) return false;
+    }
+  
+    // checked all tags, none match
+    return false;
+}
+
+bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
+    
+    switch(storageType) {
+
+        case 'A':
+        case 'c':
+        case 'C':
+            ++numBytesParsed;
+            ++pTagData;
+            break;
+
+        case 's':
+        case 'S':
+            numBytesParsed += 2;
+            pTagData       += 2;
+            break;
+
+        case 'f':
+        case 'i':
+        case 'I':
+            numBytesParsed += 4;
+            pTagData       += 4;
+            break;
+
+        case 'Z':
+        case 'H':
+            while(*pTagData) {
+                ++numBytesParsed;
+                ++pTagData;
+            }
+            // increment for null-terminator
+            ++numBytesParsed;
+            ++pTagData;
+            break;
+
+        default: 
+            // error case
+            fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);
+            return false;
+    }
+    
+    // return success
+    return true;
+}
+\ No newline at end of file
diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h

new file mode 100644 (file)

index 0000000..ac50552
--- /dev/null
+++ b/src/api/BamAlignment.h
@@ -0,0 +1,191 @@
+// ***************************************************************************
+// BamAlignment.h (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 September 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the BamAlignment data structure
+// ***************************************************************************
+
+#ifndef BAMALIGNMENT_H
+#define BAMALIGNMENT_H
+
+#include <string>
+#include <vector>
+#include "BamAux.h"
+
+namespace BamTools {
+
+// BamAlignment data structure
+// explicitly labeled as 'struct' to indicate that (most of) its fields are public
+struct BamAlignment {
+
+    // constructors & destructor
+    public:
+        BamAlignment(void);
+        BamAlignment(const BamAlignment& other);
+        ~BamAlignment(void);
+
+    // Queries against alignment flags
+    public:        
+        bool IsDuplicate(void) const;           // Returns true if this read is a PCR duplicate       
+        bool IsFailedQC(void) const;            // Returns true if this read failed quality control      
+        bool IsFirstMate(void) const;           // Returns true if alignment is first mate on read        
+        bool IsMapped(void) const;              // Returns true if alignment is mapped        
+        bool IsMateMapped(void) const;          // Returns true if alignment's mate is mapped        
+        bool IsMateReverseStrand(void) const;   // Returns true if alignment's mate mapped to reverse strand        
+        bool IsPaired(void) const;              // Returns true if alignment part of paired-end read        
+        bool IsPrimaryAlignment(void) const;    // Returns true if reported position is primary alignment       
+        bool IsProperPair(void) const;          // Returns true if alignment is part of read that satisfied paired-end resolution     
+        bool IsReverseStrand(void) const;       // Returns true if alignment mapped to reverse strand
+        bool IsSecondMate(void) const;          // Returns true if alignment is second mate on read
+
+    // Manipulate alignment flags
+    public:        
+        void SetIsDuplicate(bool ok);           // Sets "PCR duplicate" flag        
+        void SetIsFailedQC(bool ok);            // Sets "failed quality control" flag        
+        void SetIsFirstMate(bool ok);           // Sets "alignment is first mate" flag        
+        void SetIsMateUnmapped(bool ok);        // Sets "alignment's mate is mapped" flag        
+        void SetIsMateReverseStrand(bool ok);   // Sets "alignment's mate mapped to reverse strand" flag        
+        void SetIsPaired(bool ok);              // Sets "alignment part of paired-end read" flag        
+        void SetIsProperPair(bool ok);          // Sets "alignment is part of read that satisfied paired-end resolution" flag        
+        void SetIsReverseStrand(bool ok);       // Sets "alignment mapped to reverse strand" flag        
+        void SetIsSecondaryAlignment(bool ok);  // Sets "position is primary alignment" flag        
+        void SetIsSecondMate(bool ok);          // Sets "alignment is second mate on read" flag        
+        void SetIsUnmapped(bool ok);            // Sets "alignment is mapped" flag
+
+    // Tag data access methods
+    public:
+        // -------------------------------------------------------------------------------------
+        // N.B. - The following tag access methods may not be used on BamAlignments fetched
+        // using BamReader::GetNextAlignmentCore().  Attempting to use them will not result in 
+        // error message (to keep output clean) but will ALWAYS return false.  Only user-created
+        // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
+
+        // add tag data (create new TAG entry with TYPE and VALUE)
+        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
+        // returns true if new data added, false if error or TAG already exists
+        // N.B. - will NOT modify existing tag. Use EditTag() instead
+        // @tag   - two character tag name
+        // @type  - single character tag type (see SAM/BAM spec for details)
+        // @value - value to associate with tag
+        bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
+        bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i
+        bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i
+        bool AddTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f
+        
+        // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)
+        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
+        // returns true if edit was successfaul, false if error
+        // @tag   - two character tag name
+        // @type  - single character tag type (see SAM/BAM spec for details)
+        // @value - new value for tag
+        bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
+        bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i
+        bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i
+        bool EditTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f
+
+        // specific tag data access methods - these only remain for legacy support
+        // returns whether specific tag could be retrieved
+        bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance))
+        bool GetReadGroup(std::string& readGroup) const;    // get "RG" tag data (equivalent to GetTag("RG", readGroup)) 
+        
+        // generic tag data access methods 
+        // returns whether tag is found & tag type is compatible with DESTINATION
+        // @tag - two character tag name
+        // @destination - if found, tag value is stored here
+        bool GetTag(const std::string& tag, std::string& destination) const;    // access variable-length char or hex strings 
+        bool GetTag(const std::string& tag, uint32_t& destination) const;       // access unsigned integer data
+        bool GetTag(const std::string& tag, int32_t& destination) const;        // access signed integer data
+        bool GetTag(const std::string& tag, float& destination) const;          // access floating point data
+        
+        // remove tag data
+        // returns true if removal was successful, false if error
+        // N.B. - returns false if TAG does not exist (no removal can occur)
+        // @tag - two character tag name
+        bool RemoveTag(const std::string& tag);
+
+    // Additional data access methods
+    public:
+        // calculates & returns alignment end position, based on starting position and CIGAR operations
+        // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference
+        // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats.
+        int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;  
+
+    // 'internal' utility methods 
+    private:
+        static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);
+        static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);
+
+    // Data members
+    public:
+        std::string Name;              // Read name
+        int32_t     Length;            // Query length
+        std::string QueryBases;        // 'Original' sequence (as reported from sequencing machine)
+        std::string AlignedBases;      // 'Aligned' sequence (includes any indels, padding, clipping)
+        std::string Qualities;         // FASTQ qualities (ASCII characters, not numeric values)
+        std::string TagData;           // Tag data (accessor methods will pull the requested information out)
+        int32_t     RefID;             // ID number for reference sequence
+        int32_t     Position;          // Position (0-based) where alignment starts
+        uint16_t    Bin;               // Bin in BAM file where this alignment resides
+        uint16_t    MapQuality;        // Mapping quality score
+        uint32_t    AlignmentFlag;     // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate 
+        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
+        int32_t     MateRefID;         // ID number for reference sequence where alignment's mate was aligned
+        int32_t     MatePosition;      // Position (0-based) where alignment's mate starts
+        int32_t     InsertSize;        // Mate-pair insert size
+          
+    // internal data
+    private:
+        struct BamAlignmentSupportData {
+      
+            // data members
+            std::string AllCharData;
+            uint32_t    BlockLength;
+            uint32_t    NumCigarOperations;
+            uint32_t    QueryNameLength;
+            uint32_t    QuerySequenceLength;
+            bool        HasCoreOnly;
+            
+            // constructor
+            BamAlignmentSupportData(void)
+                : BlockLength(0)
+                , NumCigarOperations(0)
+                , QueryNameLength(0)
+                , QuerySequenceLength(0)
+                , HasCoreOnly(false)
+            { }
+        };
+        
+        // contains raw character data & lengths
+        BamAlignmentSupportData SupportData;   
+        
+        // allow these classes access to BamAlignment private members (SupportData)
+        // but client code should not need to touch this data
+        friend class BamReader;
+        friend class BamWriter;
+
+    // Alignment flag query constants
+    // Use the get/set methods above instead
+    private:
+        enum { PAIRED        = 1
+             , PROPER_PAIR   = 2
+             , UNMAPPED      = 4
+             , MATE_UNMAPPED = 8
+             , REVERSE       = 16
+             , MATE_REVERSE  = 32
+             , READ_1        = 64
+             , READ_2        = 128
+             , SECONDARY     = 256
+             , QC_FAILED     = 512
+             , DUPLICATE     = 1024 
+             };
+};
+
+// convenience typedef(s)
+typedef std::vector<BamAlignment> BamAlignmentVector;
+
+} // namespace BamTools
+
+#endif // BAMALIGNMENT_H
diff --git a/src/api/BamAux.h b/src/api/BamAux.h

index 84223a61654eb51eefadba790acb2d1310b97145..3eff2d7e723b3cb25cf72a1c8483207f11975447 100644 (file)
--- a/src/api/BamAux.h
+++ b/src/api/BamAux.h
@@ -3,30 +3,24 @@
  // Marth Lab, Department of Biology, Boston College\r
  // All rights reserved.\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 16 September 2010 (DB)\r
+// Last modified: 18 September 2010 (DB)\r
  // ---------------------------------------------------------------------------\r
-// Provides the basic constants, data structures, etc. for using BAM files\r
+// Provides the basic constants, data structures, utilities etc. \r
+// used throughout the API for handling BAM files\r
  // ***************************************************************************\r
  \r
  #ifndef BAMAUX_H\r
  #define BAMAUX_H\r
  \r
-// C inclues\r
-#include <cctype>\r
-#include <cstdio>\r
-#include <cstdlib>\r
-#include <cstring>\r
-\r
-// C++ includes\r
-#include <exception>\r
-#include <fstream>\r
+#include <fstream> \r
  #include <iostream>\r
-#include <map>\r
  #include <string>\r
-#include <utility>\r
  #include <vector>\r
  \r
+// ----------------------------------------------------------------\r
+// ----------------------------------------------------------------\r
  // Platform-specific type definitions\r
+\r
  #ifndef BAMTOOLS_TYPES\r
  #define BAMTOOLS_TYPES\r
      #ifdef _MSC_VER\r
@@ -45,7 +39,10 @@
  \r
  namespace BamTools {\r
  \r
+// ----------------------------------------------------------------\r
+// ----------------------------------------------------------------\r
  // BAM constants\r
+\r
  const int BAM_CMATCH      = 0;\r
  const int BAM_CINS        = 1;\r
  const int BAM_CDEL        = 2;\r
@@ -58,163 +55,11 @@ const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
  const int BAM_CORE_SIZE   = 32;\r
  const int BT_SIZEOF_INT   = 4;\r
  \r
-struct CigarOp;\r
-\r
-struct BamAlignment {\r
-\r
-    // constructors & destructor\r
-    public:\r
-        BamAlignment(void);\r
-        BamAlignment(const BamAlignment& other);\r
-        ~BamAlignment(void);\r
-\r
-    // Queries against alignment flags\r
-    public:        \r
-        bool IsDuplicate(void) const;          // Returns true if this read is a PCR duplicate       \r
-        bool IsFailedQC(void) const;           // Returns true if this read failed quality control      \r
-        bool IsFirstMate(void) const;          // Returns true if alignment is first mate on read        \r
-        bool IsMapped(void) const;             // Returns true if alignment is mapped        \r
-        bool IsMateMapped(void) const;         // Returns true if alignment's mate is mapped        \r
-        bool IsMateReverseStrand(void) const;  // Returns true if alignment's mate mapped to reverse strand        \r
-        bool IsPaired(void) const;             // Returns true if alignment part of paired-end read        \r
-        bool IsPrimaryAlignment(void) const;   // Returns true if reported position is primary alignment       \r
-        bool IsProperPair(void) const;         // Returns true if alignment is part of read that satisfied paired-end resolution     \r
-        bool IsReverseStrand(void) const;      // Returns true if alignment mapped to reverse strand\r
-        bool IsSecondMate(void) const;         // Returns true if alignment is second mate on read\r
-\r
-    // Manipulate alignment flags\r
-    public:        \r
-        void SetIsDuplicate(bool ok);          // Sets "PCR duplicate" flag        \r
-        void SetIsFailedQC(bool ok);           // Sets "failed quality control" flag        \r
-        void SetIsFirstMate(bool ok);          // Sets "alignment is first mate" flag        \r
-        void SetIsMateUnmapped(bool ok);       // Sets "alignment's mate is mapped" flag        \r
-        void SetIsMateReverseStrand(bool ok);  // Sets "alignment's mate mapped to reverse strand" flag        \r
-        void SetIsPaired(bool ok);             // Sets "alignment part of paired-end read" flag        \r
-       void SetIsProperPair(bool ok);          // Sets "alignment is part of read that satisfied paired-end resolution" flag        \r
-        void SetIsReverseStrand(bool ok);      // Sets "alignment mapped to reverse strand" flag        \r
-        void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag        \r
-        void SetIsSecondMate(bool ok);         // Sets "alignment is second mate on read" flag        \r
-        void SetIsUnmapped(bool ok);           // Sets "alignment is mapped" flag\r
-\r
-    // Tag data access methods\r
-    public:\r
-        // -------------------------------------------------------------------------------------\r
-        // N.B. - The following tag-modifying methods may not be used on BamAlignments fetched\r
-        // using BamReader::GetNextAlignmentCore().  Attempting to use them will not result in \r
-        // error message (to keep output clean) but will ALWAYS return false.  Only user-\r
-        // generated BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid.\r
-\r
-        // add tag data (create new TAG entry with TYPE and VALUE)\r
-        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
-        // returns true if new data added, false if error or TAG already exists\r
-        // N.B. - will NOT modify existing tag. Use EditTag() instead\r
-        bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
-        bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i\r
-        bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i\r
-        bool AddTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f\r
-        \r
-        // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)\r
-        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
-        // returns true if edit was successfaul, false if error\r
-        bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
-        bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i\r
-        bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i\r
-        bool EditTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f\r
-\r
-        // specific tag data access methods - these only remain for legacy support\r
-        bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (implemented as GetTag("NM", editDistance))\r
-        bool GetReadGroup(std::string& readGroup) const;    // get "RG" tag data (implemented as GetTag("RG", readGroup)) \r
-        \r
-        // generic tag data access methods \r
-        bool GetTag(const std::string& tag, std::string& destination) const;    // access variable-length char or hex strings \r
-        bool GetTag(const std::string& tag, uint32_t& destination) const;       // access unsigned integer data\r
-        bool GetTag(const std::string& tag, int32_t& destination) const;        // access signed integer data\r
-        bool GetTag(const std::string& tag, float& destination) const;          // access floating point data\r
-        \r
-        // remove tag data\r
-        // returns true if removal was successful, false if error\r
-        // N.B. - returns false if TAG does not exist (no removal can occur)\r
-        bool RemoveTag(const std::string& tag);\r
-\r
-    // Additional data access methods\r
-    public:\r
-        // calculates alignment end position, based on starting position and CIGAR operations\r
-        // @zeroBased - if true, returns 0-based coordinate; else returns 1-based\r
-       int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;  \r
-\r
-    // 'internal' utility methods \r
-    private:\r
-        static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);\r
-        static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
-\r
-    // Data members\r
-    public:\r
-        std::string Name;              // Read name\r
-        int32_t     Length;            // Query length\r
-        std::string QueryBases;        // 'Original' sequence (as reported from sequencing machine)\r
-        std::string AlignedBases;      // 'Aligned' sequence (includes any indels, padding, clipping)\r
-        std::string Qualities;         // FASTQ qualities (ASCII characters, not numeric values)\r
-        std::string TagData;           // Tag data (accessor methods will pull the requested information out)\r
-        int32_t     RefID;             // ID number for reference sequence\r
-        int32_t     Position;          // Position (0-based) where alignment starts\r
-        uint16_t    Bin;               // Bin in BAM file where this alignment resides\r
-        uint16_t    MapQuality;        // Mapping quality score\r
-        uint32_t    AlignmentFlag;     // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
-        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
-        int32_t     MateRefID;         // ID number for reference sequence where alignment's mate was aligned\r
-        int32_t     MatePosition;      // Position (0-based) where alignment's mate starts\r
-        int32_t     InsertSize;        // Mate-pair insert size\r
-          \r
-    // internal data\r
-    private:\r
-        struct BamAlignmentSupportData {\r
-      \r
-            // data members\r
-            std::string AllCharData;\r
-            uint32_t    BlockLength;\r
-            uint32_t    NumCigarOperations;\r
-            uint32_t    QueryNameLength;\r
-            uint32_t    QuerySequenceLength;\r
-            bool        HasCoreOnly;\r
-            \r
-            // constructor\r
-            BamAlignmentSupportData(void)\r
-                : BlockLength(0)\r
-                , NumCigarOperations(0)\r
-                , QueryNameLength(0)\r
-                , QuerySequenceLength(0)\r
-                , HasCoreOnly(false)\r
-            { }\r
-        };\r
-        \r
-        // contains raw character data & lengths\r
-        BamAlignmentSupportData SupportData;   \r
-        \r
-        // allow these classes access to BamAlignment private members (SupportData)\r
-        // but client code should not need to touch this data\r
-        friend class BamReader;\r
-        friend class BamWriter;\r
-\r
-    // Alignment flag query constants\r
-    // Use the get/set methods above instead\r
-    private:\r
-        enum { PAIRED        = 1\r
-             , PROPER_PAIR   = 2\r
-             , UNMAPPED      = 4\r
-             , MATE_UNMAPPED = 8\r
-             , REVERSE       = 16\r
-             , MATE_REVERSE  = 32\r
-             , READ_1        = 64\r
-             , READ_2        = 128\r
-             , SECONDARY     = 256\r
-             , QC_FAILED     = 512\r
-             , DUPLICATE     = 1024 \r
-             };\r
-};\r
-\r
  // ----------------------------------------------------------------\r
-// Auxiliary data structs & typedefs\r
+// ----------------------------------------------------------------\r
+// Data structs & typedefs\r
  \r
+// CIGAR operation data structure\r
  struct CigarOp {\r
    \r
      // data members\r
@@ -229,6 +74,7 @@ struct CigarOp {
      { }\r
  };\r
  \r
+// Reference data entry\r
  struct RefData {\r
     \r
      // data members\r
@@ -243,10 +89,9 @@ struct RefData {
          , RefHasAlignments(ok)\r
      { }\r
  };\r
+typedef std::vector<RefData> RefVector;\r
  \r
-typedef std::vector<RefData>      RefVector;\r
-typedef std::vector<BamAlignment> BamAlignmentVector;\r
-\r
+// General (sequential) genome region\r
  struct BamRegion {\r
    \r
      // data members\r
@@ -274,9 +119,8 @@ struct BamRegion {
  };\r
  \r
  // ----------------------------------------------------------------\r
-// Added: 3-35-2010 DWB\r
-// Fixed: Routines to provide endian-correctness\r
  // ----------------------------------------------------------------\r
+// General utilities \r
  \r
  // returns true if system is big endian\r
  inline bool SystemIsBigEndian(void) {\r
@@ -353,652 +197,12 @@ inline void SwapEndian_64p(char* data) {
      SwapEndian_64(value);\r
  }\r
  \r
+// returns whether file exists (can be opened OK)\r
  inline bool FileExists(const std::string& filename) {\r
      std::ifstream f(filename.c_str(), std::ifstream::in);\r
      return !f.fail();\r
  }\r
  \r
-// ----------------------------------------------------------------\r
-// BamAlignment member methods\r
-\r
-// constructors & destructor\r
-inline BamAlignment::BamAlignment(void) { }\r
-\r
-inline BamAlignment::BamAlignment(const BamAlignment& other)\r
-    : Name(other.Name)\r
-    , Length(other.Length)\r
-    , QueryBases(other.QueryBases)\r
-    , AlignedBases(other.AlignedBases)\r
-    , Qualities(other.Qualities)\r
-    , TagData(other.TagData)\r
-    , RefID(other.RefID)\r
-    , Position(other.Position)\r
-    , Bin(other.Bin)\r
-    , MapQuality(other.MapQuality)\r
-    , AlignmentFlag(other.AlignmentFlag)\r
-    , CigarData(other.CigarData)\r
-    , MateRefID(other.MateRefID)\r
-    , MatePosition(other.MatePosition)\r
-    , InsertSize(other.InsertSize)\r
-    , SupportData(other.SupportData)\r
-{ }\r
-\r
-inline BamAlignment::~BamAlignment(void) { }\r
-\r
-// Queries against alignment flags\r
-inline bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }\r
-inline bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }\r
-inline bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }\r
-inline bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }\r
-inline bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }\r
-inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }\r
-inline bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }\r
-inline bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }\r
-inline bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }\r
-inline bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }\r
-inline bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }\r
-\r
-// Manipulate alignment flags \r
-inline void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }\r
-inline void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }\r
-inline void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }\r
-inline void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }\r
-inline void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }\r
-inline void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }\r
-inline void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }\r
-inline void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }\r
-inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }\r
-inline void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }\r
-inline void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }\r
-\r
-// calculates alignment end position, based on starting position and CIGAR operations\r
-inline \r
-int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {\r
-\r
-    // initialize alignment end to starting position\r
-    int alignEnd = Position;\r
-\r
-    // iterate over cigar operations\r
-    std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();\r
-    std::vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();\r
-    for ( ; cigarIter != cigarEnd; ++cigarIter) {\r
-       const char cigarType = (*cigarIter).Type;\r
-       if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
-           alignEnd += (*cigarIter).Length;\r
-       } \r
-        else if ( usePadded && cigarType == 'I' ) {\r
-            alignEnd += (*cigarIter).Length;\r
-        }\r
-    }\r
-    \r
-    // adjust for zeroBased, if necessary\r
-    if (zeroBased) \r
-        return alignEnd - 1;\r
-    else \r
-        return alignEnd;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type != "Z" && type != "H" ) return false;\r
-  \r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag already exists, return false\r
-    // use EditTag explicitly instead\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
-  \r
-    // otherwise, copy tag data to temp buffer\r
-    std::string newTag = tag + type + value;\r
-    const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term\r
-    char originalTagData[newTagDataLength];\r
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term\r
-    \r
-    // append newTag\r
-    strcat(originalTagData + tagDataLength, newTag.data());  // removes original null-term, appends newTag + null-term\r
-    \r
-    // store temp buffer back in TagData\r
-    const char* newTagData = (const char*)originalTagData;\r
-    TagData.assign(newTagData, newTagDataLength);\r
-    \r
-    // return success\r
-    return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type == "f" || type == "Z" || type == "H" ) return false;\r
-  \r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag already exists, return false\r
-    // use EditTag explicitly instead\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
-  \r
-    // otherwise, convert value to string\r
-    union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
-    un.value = value;\r
-\r
-    // copy original tag data to temp buffer\r
-    std::string newTag = tag + type;\r
-    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer\r
-    char originalTagData[newTagDataLength];\r
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term\r
-    \r
-    // append newTag\r
-    strcat(originalTagData + tagDataLength, newTag.data());\r
-    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));\r
-    \r
-    // store temp buffer back in TagData\r
-    const char* newTagData = (const char*)originalTagData;\r
-    TagData.assign(newTagData, newTagDataLength);\r
-    \r
-    // return success\r
-    return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
-    return AddTag(tag, type, (const uint32_t&)value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type == "Z" || type == "H" ) return false;\r
-  \r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag already exists, return false\r
-    // use EditTag explicitly instead\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
-  \r
-    // otherwise, convert value to string\r
-    union { float value; char valueBuffer[sizeof(float)]; } un;\r
-    un.value = value;\r
-\r
-    // copy original tag data to temp buffer\r
-    std::string newTag = tag + type;\r
-    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float\r
-    char originalTagData[newTagDataLength];\r
-    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term\r
-    \r
-    // append newTag\r
-    strcat(originalTagData + tagDataLength, newTag.data());\r
-    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));\r
-    \r
-    // store temp buffer back in TagData\r
-    const char* newTagData = (const char*)originalTagData;\r
-    TagData.assign(newTagData, newTagDataLength);\r
-    \r
-    // return success\r
-    return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type != "Z" && type != "H" ) return false;\r
-  \r
-    // localize the tag data\r
-    char* pOriginalTagData = (char*)TagData.data();\r
-    char* pTagData = pOriginalTagData;\r
-    const unsigned int originalTagDataLength = TagData.size();\r
-    \r
-    unsigned int newTagDataLength = 0;\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
-        \r
-        // make sure array is more than big enough\r
-        char newTagData[originalTagDataLength + value.size()];  \r
-\r
-        // copy original tag data up til desired tag\r
-        const unsigned int beginningTagDataLength = numBytesParsed;\r
-        newTagDataLength += beginningTagDataLength;\r
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
-      \r
-        // copy new VALUE in place of current tag data\r
-        const unsigned int dataLength = strlen(value.c_str());\r
-        memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );\r
-        \r
-        // skip to next tag (if tag for removal is last, return true) \r
-        const char* pTagStorageType = pTagData - 1;\r
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
-         \r
-        // copy everything from current tag (the next one after tag for removal) to end\r
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
-        const unsigned int endTagOffset      = beginningTagDataLength + dataLength + 1;\r
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
-        \r
-        // ensure null-terminator\r
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
-        \r
-        // save new tag data\r
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, attempt AddTag\r
-    else return AddTag(tag, type, value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type == "f" || type == "Z" || type == "H" ) return false;\r
-    \r
-     // localize the tag data\r
-    char* pOriginalTagData = (char*)TagData.data();\r
-    char* pTagData = pOriginalTagData;\r
-    const unsigned int originalTagDataLength = TagData.size();\r
-    \r
-    unsigned int newTagDataLength = 0;\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
-        \r
-        // make sure array is more than big enough\r
-        char newTagData[originalTagDataLength + sizeof(value)];  \r
-\r
-        // copy original tag data up til desired tag\r
-        const unsigned int beginningTagDataLength = numBytesParsed;\r
-        newTagDataLength += beginningTagDataLength;\r
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
-      \r
-        // copy new VALUE in place of current tag data\r
-        union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
-        un.value = value;\r
-        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));\r
-        \r
-        // skip to next tag (if tag for removal is last, return true) \r
-        const char* pTagStorageType = pTagData - 1;\r
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
-         \r
-        // copy everything from current tag (the next one after tag for removal) to end\r
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
-        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(unsigned int);\r
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
-        \r
-        // ensure null-terminator\r
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
-        \r
-        // save new tag data\r
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, attempt AddTag\r
-    else return AddTag(tag, type, value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
-    return EditTag(tag, type, (const uint32_t&)value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {\r
-  \r
-    if ( SupportData.HasCoreOnly ) return false;\r
-    if ( tag.size() != 2 || type.size() != 1 ) return false;\r
-    if ( type == "Z" || type == "H" ) return false;\r
-    \r
-     // localize the tag data\r
-    char* pOriginalTagData = (char*)TagData.data();\r
-    char* pTagData = pOriginalTagData;\r
-    const unsigned int originalTagDataLength = TagData.size();\r
-    \r
-    unsigned int newTagDataLength = 0;\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
-        \r
-        // make sure array is more than big enough\r
-        char newTagData[originalTagDataLength + sizeof(value)];  \r
-\r
-        // copy original tag data up til desired tag\r
-        const unsigned int beginningTagDataLength = numBytesParsed;\r
-        newTagDataLength += beginningTagDataLength;\r
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
-      \r
-        // copy new VALUE in place of current tag data\r
-        union { float value; char valueBuffer[sizeof(float)]; } un;\r
-        un.value = value;\r
-        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));\r
-        \r
-        // skip to next tag (if tag for removal is last, return true) \r
-        const char* pTagStorageType = pTagData - 1;\r
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
-         \r
-        // copy everything from current tag (the next one after tag for removal) to end\r
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
-        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(float);\r
-        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
-        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
-        \r
-        // ensure null-terminator\r
-        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
-        \r
-        // save new tag data\r
-        TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, attempt AddTag\r
-    else return AddTag(tag, type, value);\r
-}\r
-\r
-// get "NM" tag data - originally contributed by Aaron Quinlan\r
-// stores data in 'editDistance', returns success/fail\r
-inline \r
-bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { \r
-    return GetTag("NM", (uint32_t&)editDistance);\r
-}\r
-\r
-// get "RG" tag data\r
-// stores data in 'readGroup', returns success/fail\r
-inline \r
-bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
-    return GetTag("RG", readGroup);\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {\r
-\r
-    // make sure tag data exists\r
-    if ( SupportData.HasCoreOnly || TagData.empty() ) \r
-        return false;\r
-\r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
-        const unsigned int dataLength = strlen(pTagData);\r
-        destination.clear();\r
-        destination.resize(dataLength);\r
-        memcpy( (char*)destination.data(), pTagData, dataLength );\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, return failure\r
-    return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {\r
-  \r
-    // make sure tag data exists\r
-    if ( SupportData.HasCoreOnly || TagData.empty() ) \r
-        return false;\r
-\r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, determine data byte-length, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
-        \r
-        // determine data byte-length\r
-        const char type = *(pTagData - 1);\r
-        int destinationLength = 0;\r
-        switch (type) {\r
-            // 1 byte data\r
-            case 'A':\r
-            case 'c':\r
-            case 'C':\r
-                destinationLength = 1;\r
-                break;\r
-\r
-            // 2 byte data\r
-            case 's':\r
-            case 'S':\r
-                destinationLength = 2;\r
-                break;\r
-\r
-            // 4 byte data\r
-            case 'i':\r
-            case 'I':\r
-                destinationLength = 4;\r
-                break;\r
-\r
-            // unsupported type for integer destination (float or var-length strings)\r
-            case 'f':\r
-            case 'Z':\r
-            case 'H':\r
-                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);\r
-                return false;\r
-\r
-            // unknown tag type\r
-            default:\r
-                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
-                return false;\r
-        }\r
-          \r
-        // store in destination\r
-        destination = 0;\r
-        memcpy(&destination, pTagData, destinationLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, return failure\r
-    return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {\r
-    return GetTag(tag, (uint32_t&)destination);\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, float& destination) const {\r
-  \r
-    // make sure tag data exists\r
-    if ( SupportData.HasCoreOnly || TagData.empty() ) \r
-        return false;\r
-\r
-    // localize the tag data\r
-    char* pTagData = (char*)TagData.data();\r
-    const unsigned int tagDataLength = TagData.size();\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, determine data byte-length, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
-        //pTagData += numBytesParsed;\r
-        \r
-        // determine data byte-length\r
-        const char type = *(pTagData - 1);\r
-        int destinationLength = 0;\r
-        switch(type) {\r
-\r
-            // 1 byte data\r
-            case 'A':\r
-            case 'c':\r
-            case 'C':\r
-                destinationLength = 1;\r
-                break;\r
-\r
-            // 2 byte data\r
-            case 's':\r
-            case 'S':\r
-                destinationLength = 2;\r
-                break;\r
-\r
-            // 4 byte data\r
-            case 'f':\r
-            case 'i':\r
-            case 'I':\r
-                destinationLength = 4;\r
-                break;\r
-            \r
-            // unsupported type (var-length strings)\r
-            case 'Z':\r
-            case 'H':\r
-                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);\r
-                return false;\r
-\r
-            // unknown tag type\r
-            default:\r
-                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
-                return false;\r
-        }\r
-          \r
-        // store in destination\r
-        destination = 0.0;\r
-        memcpy(&destination, pTagData, destinationLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, return failure\r
-    return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::RemoveTag(const std::string& tag) {\r
-  \r
-    // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed\r
-    // also, return false if no data present to remove\r
-    if ( SupportData.HasCoreOnly || TagData.empty() ) return false;\r
-  \r
-    // localize the tag data\r
-    char* pOriginalTagData = (char*)TagData.data();\r
-    char* pTagData = pOriginalTagData;\r
-    const unsigned int originalTagDataLength = TagData.size();\r
-    unsigned int newTagDataLength = 0;\r
-    unsigned int numBytesParsed = 0;\r
-    \r
-    // if tag found, store data in readGroup, return success\r
-    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
-        \r
-        char newTagData[originalTagDataLength];\r
-\r
-        // copy original tag data up til desired tag\r
-        pTagData -= 3;\r
-        numBytesParsed -= 3;\r
-        const unsigned int beginningTagDataLength = numBytesParsed;\r
-        newTagDataLength += beginningTagDataLength;\r
-        memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
-        \r
-        // skip to next tag (if tag for removal is last, return true) \r
-        const char* pTagStorageType = pTagData + 2;\r
-        pTagData       += 3;\r
-        numBytesParsed += 3;\r
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
-         \r
-        // copy everything from current tag (the next one after tag for removal) to end\r
-        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
-        const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
-        memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );\r
-        \r
-        // save new tag data\r
-        TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);\r
-        return true;\r
-    }\r
-    \r
-    // tag not found, no removal - return failure\r
-    return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {\r
-\r
-    while ( numBytesParsed < tagDataLength ) {\r
-\r
-        const char* pTagType        = pTagData;\r
-        const char* pTagStorageType = pTagData + 2;\r
-        pTagData       += 3;\r
-        numBytesParsed += 3;\r
-\r
-        // check the current tag, return true on match\r
-        if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) \r
-            return true;\r
-\r
-        // get the storage class and find the next tag\r
-        if ( *pTagStorageType == '\0' ) return false; \r
-        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;\r
-        if ( *pTagData == '\0' ) return false;\r
-    }\r
-  \r
-    // checked all tags, none match\r
-    return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
-    \r
-    switch(storageType) {\r
-\r
-        case 'A':\r
-        case 'c':\r
-        case 'C':\r
-            ++numBytesParsed;\r
-            ++pTagData;\r
-            break;\r
-\r
-        case 's':\r
-        case 'S':\r
-            numBytesParsed += 2;\r
-            pTagData       += 2;\r
-            break;\r
-\r
-        case 'f':\r
-        case 'i':\r
-        case 'I':\r
-            numBytesParsed += 4;\r
-            pTagData       += 4;\r
-            break;\r
-\r
-        case 'Z':\r
-        case 'H':\r
-            while(*pTagData) {\r
-                ++numBytesParsed;\r
-                ++pTagData;\r
-            }\r
-            // increment for null-terminator\r
-            ++numBytesParsed;\r
-            ++pTagData;\r
-            break;\r
-\r
-        default: \r
-            // error case\r
-            fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);\r
-            return false;\r
-    }\r
-    \r
-    // return success\r
-    return true;\r
-}\r
-\r
  } // namespace BamTools\r
  \r
  #endif // BAMAUX_H\r
diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h

index fdd0d134bc3fd99cc98e4ed2d6b6aeb6eb21c15f..710893a0c55361ace7e9cdd7b1597f7edbe3afae 100644 (file)
--- a/src/api/BamIndex.h
+++ b/src/api/BamIndex.h
@@ -3,7 +3,7 @@
  // Marth Lab, Department of Biology, Boston College
  // All rights reserved.
  // ---------------------------------------------------------------------------
-// Last modified: 17 September 2010 (DB)
+// Last modified: 18 September 2010 (DB)
  // ---------------------------------------------------------------------------
  // Provides index functionality - both for the standardized BAM index format
  // (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
@@ -15,7 +15,7 @@
  #include <iostream>
  #include <string>
  #include <vector>
-#include "BamAux.h"
+#include "BamAlignment.h"
  
  namespace BamTools {
  
diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp

index 85905169ef270cd19a398ece14ebe349aca09a4b..a81ac87ee98d2b73b9b2682e976bb636535d12f5 100644 (file)
--- a/src/api/BamMultiReader.cpp
+++ b/src/api/BamMultiReader.cpp
@@ -1,9 +1,9 @@
  // ***************************************************************************
-// BamMultiReader.cpp (c) 2010 Erik Garrison
+// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
  // Marth Lab, Department of Biology, Boston College
  // All rights reserved.
  // ---------------------------------------------------------------------------
-// Last modified: 3 September 2010 (DB)
+// Last modified: 18 September 2010 (DB)
  // ---------------------------------------------------------------------------
  // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
  // Institute.
@@ -16,7 +16,6 @@
  // precludes the need to sort merged files.
  // ***************************************************************************
  
-// C++ includes
  #include <algorithm>
  #include <fstream>
  #include <iostream>
@@ -24,8 +23,6 @@
  #include <sstream>
  #include <string>
  #include <vector>
-
-// BamTools includes
  #include "BGZF.h"
  #include "BamMultiReader.h"
  using namespace BamTools;
@@ -74,11 +71,11 @@ void BamMultiReader::Close(void) {
  }
  
  // saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail
-bool BamMultiReader::CreateIndexes(bool useDefaultIndex) {
+bool BamMultiReader::CreateIndexes(bool useStandardIndex) {
      bool result = true;
      for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
          BamReader* reader = it->first;
-        result &= reader->CreateIndex(useDefaultIndex);
+        result &= reader->CreateIndex(useStandardIndex);
      }
      return result;
  }
@@ -99,11 +96,12 @@ const string BamMultiReader::GetHeaderText(void) const {
      // foreach extraction entry (each BAM file)
      for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
  
-        map<string, bool> currentFileReadGroups;
-
          BamReader* reader = rs->first;
-
-        stringstream header(reader->GetHeaderText());
+        string headerText = reader->GetHeaderText();
+        if ( headerText.empty() ) continue;
+        
+        map<string, bool> currentFileReadGroups;
+        stringstream header(headerText);
          vector<string> lines;
          string item;
          while (getline(header, item))
@@ -290,7 +288,7 @@ bool BamMultiReader::Jump(int refID, int position) {
  }
  
  // opens BAM files
-bool BamMultiReader::Open(const vector<string> filenames, bool openIndexes, bool coreMode, bool useDefaultIndex) {
+bool BamMultiReader::Open(const vector<string>& filenames, bool openIndexes, bool coreMode, bool useDefaultIndex) {
      
      // for filename in filenames
      fileNames = filenames; // save filenames in our multireader
diff --git a/src/api/BamMultiReader.cpp~ b/src/api/BamMultiReader.cpp~

new file mode 100644 (file)

index 0000000..b5c4002
--- /dev/null
+++ b/src/api/BamMultiReader.cpp~
@@ -0,0 +1,450 @@
+// ***************************************************************************
+// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 September 2010 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files.
+//
+// This functionality allows applications to work on very large sets of files
+// without requiring intermediate merge, sort, and index steps for each file
+// subset.  It also improves the performance of our merge system as it
+// precludes the need to sort merged files.
+// ***************************************************************************
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "BGZF.h"
+#include "BamMultiReader.h"
+using namespace BamTools;
+using namespace std;
+
+// -----------------------------------------------------
+// BamMultiReader implementation
+// -----------------------------------------------------
+
+// constructor
+BamMultiReader::BamMultiReader(void)
+    : CurrentRefID(0)
+    , CurrentLeft(0)
+{ }
+
+// destructor
+BamMultiReader::~BamMultiReader(void) {
+    Close(); 
+}
+
+// close the BAM files
+void BamMultiReader::Close(void) {
+  
+    // close all BAM readers and clean up pointers
+    vector<pair<BamReader*, BamAlignment*> >::iterator readerIter = readers.begin();
+    vector<pair<BamReader*, BamAlignment*> >::iterator readerEnd  = readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter) {
+      
+        BamReader* reader = (*readerIter).first;
+        BamAlignment* alignment = (*readerIter).second;
+        
+        // close the reader
+        if ( reader) reader->Close();  
+        
+        // delete reader pointer
+        delete reader;
+        reader = 0;
+
+        // delete alignment pointer
+        delete alignment;
+        alignment = 0;
+    }
+
+    // clear out the container
+    readers.clear();
+}
+
+// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail
+bool BamMultiReader::CreateIndexes(bool useStandardIndex) {
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->CreateIndex(useStandardIndex);
+    }
+    return result;
+}
+
+// for debugging
+void BamMultiReader::DumpAlignmentIndex(void) {
+    for (AlignmentIndex::const_iterator it = alignments.begin(); it != alignments.end(); ++it) {
+        cerr << it->first.first << ":" << it->first.second << " " << it->second.first->GetFilename() << endl;
+    }
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+const string BamMultiReader::GetHeaderText(void) const {
+
+    string mergedHeader = "";
+    map<string, bool> readGroups;
+
+    // foreach extraction entry (each BAM file)
+    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
+
+        BamReader* reader = rs->first;
+        const string headerText = reader->GetHeaderText();
+        if ( headerText.empty() ) continue;
+        
+        map<string, bool> currentFileReadGroups;
+        stringstream header(headerText);
+        vector<string> lines;
+        string item;
+        while (getline(header, item))
+            lines.push_back(item);
+
+        for (vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it) {
+
+            // get next line from header, skip if empty
+            string headerLine = *it;
+            if ( headerLine.empty() ) { continue; }
+
+            // if first file, save HD & SQ entries
+            if ( rs == readers.begin() ) {
+                if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
+                    mergedHeader.append(headerLine.c_str());
+                    mergedHeader.append(1, '\n');
+                }
+            }
+
+            // (for all files) append RG entries if they are unique
+            if ( headerLine.find("@RG") == 0 ) {
+                stringstream headerLineSs(headerLine);
+                string part, readGroupPart, readGroup;
+                while(std::getline(headerLineSs, part, '\t')) {
+                    stringstream partSs(part);
+                    string subtag;
+                    std::getline(partSs, subtag, ':');
+                    if (subtag == "ID") {
+                        std::getline(partSs, readGroup, ':');
+                        break;
+                    }
+                }
+                if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
+                    mergedHeader.append(headerLine.c_str() );
+                    mergedHeader.append(1, '\n');
+                    readGroups[readGroup] = true;
+                    currentFileReadGroups[readGroup] = true;
+                } else {
+                    // warn iff we are reading one file and discover duplicated @RG tags in the header
+                    // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+                    if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
+                        cerr << "WARNING: duplicate @RG tag " << readGroup 
+                            << " entry in header of " << reader->GetFilename() << endl;
+                    }
+                }
+            }
+        }
+    }
+
+    // return merged header text
+    return mergedHeader;
+}
+
+// get next alignment among all files
+bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
+
+    // bail out if we are at EOF in all files, means no more alignments to process
+    if (!HasOpenReaders())
+        return false;
+
+    // when all alignments have stepped into a new target sequence, update our
+    // current reference sequence id
+    UpdateReferenceID();
+
+    // our lowest alignment and reader will be at the front of our alignment index
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
+
+    // now that we have the lowest alignment in the set, save it by copy to our argument
+    nextAlignment = BamAlignment(*alignment);
+
+    // remove this alignment index entry from our alignment index
+    alignments.erase(alignments.begin());
+
+    // and add another entry if we can get another alignment from the reader
+    if (reader->GetNextAlignment(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
+                                    make_pair(reader, alignment)));
+    } else { // do nothing
+        //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
+    }
+
+    return true;
+
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
+
+    // bail out if we are at EOF in all files, means no more alignments to process
+    if (!HasOpenReaders())
+        return false;
+
+    // when all alignments have stepped into a new target sequence, update our
+    // current reference sequence id
+    UpdateReferenceID();
+
+    // our lowest alignment and reader will be at the front of our alignment index
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
+
+    // now that we have the lowest alignment in the set, save it by copy to our argument
+    nextAlignment = BamAlignment(*alignment);
+    //memcpy(&nextAlignment, alignment, sizeof(BamAlignment));
+
+    // remove this alignment index entry from our alignment index
+    alignments.erase(alignments.begin());
+
+    // and add another entry if we can get another alignment from the reader
+    if (reader->GetNextAlignmentCore(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position), 
+                                    make_pair(reader, alignment)));
+    } else { // do nothing
+        //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
+    }
+
+    return true;
+
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical 
+// references for all BAM files.  We enforce this by invoking the above 
+// validation function (ValidateReaders) to verify that our reference data 
+// is the same across all files on Open, so we will not encounter a situation 
+// in which there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+const int BamMultiReader::GetReferenceCount(void) const {
+    return readers.front().first->GetReferenceCount();
+}
+
+// returns vector of reference objects
+const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
+    return readers.front().first->GetReferenceData();
+}
+
+// returns refID from reference name
+const int BamMultiReader::GetReferenceID(const string& refName) const { 
+    return readers.front().first->GetReferenceID(refName);
+}
+
+// ---------------------------------------------------------------------------------------
+
+// checks if any readers still have alignments
+bool BamMultiReader::HasOpenReaders() {
+    return alignments.size() > 0;
+}
+
+// returns whether underlying BAM readers ALL have an index loaded
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReader::IsIndexLoaded(void) const {
+    bool ok = true;
+    vector<pair<BamReader*, BamAlignment*> >::const_iterator readerIter = readers.begin();
+    vector<pair<BamReader*, BamAlignment*> >::const_iterator readerEnd  = readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const BamReader* reader = (*readerIter).first;
+        if ( reader ) ok &= reader->IsIndexLoaded();
+    }
+    return ok;
+}
+
+// jumps to specified region(refID, leftBound) in BAM files, returns success/fail
+bool BamMultiReader::Jump(int refID, int position) {
+
+    //if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {
+    CurrentRefID = refID;
+    CurrentLeft  = position;
+
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->Jump(refID, position);
+        if (!result) {
+            cerr << "ERROR: could not jump " << reader->GetFilename() << " to " << refID << ":" << position << endl;
+            exit(1);
+        }
+    }
+    if (result) UpdateAlignments();
+    return result;
+}
+
+// opens BAM files
+bool BamMultiReader::Open(const vector<string>& filenames, bool openIndexes, bool coreMode, bool useDefaultIndex) {
+    
+    // for filename in filenames
+    fileNames = filenames; // save filenames in our multireader
+    for (vector<string>::const_iterator it = filenames.begin(); it != filenames.end(); ++it) {
+        
+        const string filename = *it;
+        BamReader* reader = new BamReader;
+
+        bool openedOK = true;
+        if (openIndexes) {
+            
+            // leave index filename empty 
+            // this allows BamReader & BamIndex to search for any available
+            // useDefaultIndex gives hint to prefer BAI over BTI
+            openedOK = reader->Open(filename, "", true, useDefaultIndex);
+        } 
+        
+        // ignoring index file(s)
+        else openedOK = reader->Open(filename); 
+        
+        // if file opened ok, check that it can be read
+        if ( openedOK ) {
+           
+            bool fileOK = true;
+            BamAlignment* alignment = new BamAlignment;
+            fileOK &= ( coreMode ? reader->GetNextAlignmentCore(*alignment) : reader->GetNextAlignment(*alignment) );
+            
+            if (fileOK) {
+                readers.push_back(make_pair(reader, alignment)); // store pointers to our readers for cleanup
+                alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
+                                            make_pair(reader, alignment)));
+            } else {
+                cerr << "WARNING: could not read first alignment in " << filename << ", ignoring file" << endl;
+                // if only file available & could not be read, return failure
+                if ( filenames.size() == 1 ) return false;
+            }
+        } 
+       
+        // TODO; any further error handling when openedOK is false ??
+        else 
+            return false;
+    }
+
+    // files opened ok, at least one alignment could be read,
+    // now need to check that all files use same reference data
+    ValidateReaders();
+    return true;
+}
+
+void BamMultiReader::PrintFilenames(void) {
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        cout << reader->GetFilename() << endl;
+    }
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReader::Rewind(void) { 
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->Rewind();
+    }
+    return result;
+}
+
+bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) {
+    BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
+    return SetRegion(region);
+}
+
+bool BamMultiReader::SetRegion(const BamRegion& region) {
+
+    Region = region;
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully SetRegion, In practice a failure of SetRegion means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        if (!it->first->SetRegion(region)) {
+            cerr << "ERROR: could not jump " << it->first->GetFilename() << " to "
+                << region.LeftRefID << ":" << region.LeftPosition
+                << ".." << region.RightRefID << ":" << region.RightPosition << endl;
+        }
+    }
+
+    UpdateAlignments();
+    return true;
+}
+
+void BamMultiReader::UpdateAlignments(void) {
+    // Update Alignments
+    alignments.clear();
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* br = it->first;
+        BamAlignment* ba = it->second;
+        if (br->GetNextAlignment(*ba)) {
+            alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), 
+                                        make_pair(br, ba)));
+        } else {
+            // assume BamReader end of region / EOF
+        }
+    }
+}
+
+// updates the reference id stored in the BamMultiReader
+// to reflect the current state of the readers
+void BamMultiReader::UpdateReferenceID(void) {
+    // the alignments are sorted by position, so the first alignment will always have the lowest reference ID
+    if (alignments.begin()->second.second->RefID != CurrentRefID) {
+        // get the next reference id
+        // while there aren't any readers at the next ref id
+        // increment the ref id
+        int nextRefID = CurrentRefID;
+        while (alignments.begin()->second.second->RefID != nextRefID) {
+            ++nextRefID;
+        }
+        //cerr << "updating reference id from " << CurrentRefID << " to " << nextRefID << endl;
+        CurrentRefID = nextRefID;
+    }
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered.  If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+void BamMultiReader::ValidateReaders(void) const {
+    int firstRefCount = readers.front().first->GetReferenceCount();
+    BamTools::RefVector firstRefData = readers.front().first->GetReferenceData();
+    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        BamTools::RefVector currentRefData = reader->GetReferenceData();
+        BamTools::RefVector::const_iterator f = firstRefData.begin();
+        BamTools::RefVector::const_iterator c = currentRefData.begin();
+        if (reader->GetReferenceCount() != firstRefCount || firstRefData.size() != currentRefData.size()) {
+            cerr << "ERROR: mismatched number of references in " << reader->GetFilename()
+                      << " expected " << firstRefCount 
+                      << " reference sequences but only found " << reader->GetReferenceCount() << endl;
+            exit(1);
+        }
+        // this will be ok; we just checked above that we have identically-sized sets of references
+        // here we simply check if they are all, in fact, equal in content
+        while (f != firstRefData.end()) {
+            if (f->RefName != c->RefName || f->RefLength != c->RefLength) {
+                cerr << "ERROR: mismatched references found in " << reader->GetFilename()
+                          << " expected: " << endl;
+                for (BamTools::RefVector::const_iterator a = firstRefData.begin(); a != firstRefData.end(); ++a)
+                    cerr << a->RefName << " " << a->RefLength << endl;
+                cerr << "but found: " << endl;
+                for (BamTools::RefVector::const_iterator a = currentRefData.begin(); a != currentRefData.end(); ++a)
+                    cerr << a->RefName << " " << a->RefLength << endl;
+                exit(1);
+            }
+            ++f; ++c;
+        }
+    }
+}
diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h

index e2c1dad87fd054aae6299d8480afa01c80e87c68..e1a6e941131e1988ae3e2477d468c2cf8a99d30a 100644 (file)
--- a/src/api/BamMultiReader.h
+++ b/src/api/BamMultiReader.h
@@ -1,9 +1,9 @@
  // ***************************************************************************\r
-// BamMultiReader.h (c) 2010 Erik Garrison\r
+// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett\r
  // Marth Lab, Department of Biology, Boston College\r
  // All rights reserved.\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 3 September 2010 (DB)\r
+// Last modified: 18 September 2010 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Functionality for simultaneously reading multiple BAM files\r
  // ***************************************************************************\r
@@ -11,23 +11,16 @@
  #ifndef BAMMULTIREADER_H\r
  #define BAMMULTIREADER_H\r
  \r
-// C++ includes\r
  #include <string>\r
  #include <map>\r
-#include <utility> // for pair\r
+#include <utility>\r
  #include <sstream>\r
-\r
-using namespace std;\r
-\r
-// BamTools includes\r
-#include "BamAux.h"\r
  #include "BamReader.h"\r
  \r
  namespace BamTools {\r
  \r
  // index mapping reference/position pairings to bamreaders and their alignments\r
-typedef multimap<pair<int, int>, pair<BamReader*, BamAlignment*> > AlignmentIndex;\r
-\r
+typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex;\r
  \r
  class BamMultiReader {\r
  \r
@@ -61,7 +54,7 @@ class BamMultiReader {
          // also useful for merging\r
          // @preferStandardIndex - look for standard BAM index ".bai" first.  If false, \r
          // will look for BamTools index ".bti".  \r
-        bool Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);\r
+        bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);\r
  \r
          // returns whether underlying BAM readers ALL have an index loaded\r
          // this is useful to indicate whether Jump() or SetRegion() are possible\r
@@ -97,7 +90,7 @@ class BamMultiReader {
          // ----------------------\r
  \r
          // returns unified SAM header text for all files\r
-        const string GetHeaderText(void) const;\r
+        const std::string GetHeaderText(void) const;\r
          // returns number of reference sequences\r
          const int GetReferenceCount(void) const;\r
          // returns vector of reference objects\r
@@ -112,7 +105,7 @@ class BamMultiReader {
          // ----------------------\r
  \r
          // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")\r
-        bool CreateIndexes(bool useDefaultIndex = true);\r
+        bool CreateIndexes(bool useStandardIndex = true);\r
  \r
          //const int GetReferenceID(const string& refName) const;\r
  \r
@@ -125,13 +118,13 @@ class BamMultiReader {
      private:\r
  \r
          // the set of readers and alignments which we operate on, maintained throughout the life of this class\r
-        vector<pair<BamReader*, BamAlignment*> > readers;\r
+        std::vector<std::pair<BamReader*, BamAlignment*> > readers;\r
  \r
          // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment\r
          // when a reader reaches EOF, its entry is removed from this index\r
          AlignmentIndex alignments;\r
  \r
-        vector<string> fileNames;\r
+        std::vector<std::string> fileNames;\r
  };\r
  \r
  } // namespace BamTools\r
diff --git a/src/api/BamReader.h b/src/api/BamReader.h

index 6e863b6beded3fbded2b9d70b0442b4c1276dbaf..132c95b826dbe854750c883fe38d20a4a2ab4806 100644 (file)
--- a/src/api/BamReader.h
+++ b/src/api/BamReader.h
@@ -3,7 +3,7 @@
  // Marth Lab, Department of Biology, Boston College\r
  // All rights reserved.\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 3 September 2010 (DB)\r
+// Last modified: 18 September 2010 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
  // Institute.\r
@@ -14,11 +14,8 @@
  #ifndef BAMREADER_H\r
  #define BAMREADER_H\r
  \r
-// C++ includes\r
  #include <string>\r
-\r
-// BamTools includes\r
-#include "BamAux.h"\r
+#include "BamAlignment.h"\r
  \r
  namespace BamTools {\r
    \r
@@ -42,7 +39,7 @@ class BamReader {
          bool IsIndexLoaded(void) const;\r
          // returns whether reader is open for reading or not\r
          bool IsOpen(void) const;\r
-        // performs random-access jump to reference, position\r
+        // performs random-access jump using (reference, position) as a left-bound\r
          bool Jump(int refID, int position = 0);\r
          // opens BAM file (and optional BAM index file, if provided)\r
          // @lookForIndex - if no indexFilename provided, look for an existing index file\r
@@ -67,9 +64,8 @@ class BamReader {
          bool GetNextAlignment(BamAlignment& bAlignment);\r
          \r
          // retrieves next available alignment core data (returns success/fail)\r
-        // ** DOES NOT parse any character data (read name, bases, qualities, tag data)\r
-        //    these can be accessed, if necessary, from the supportData \r
-        // useful for operations requiring ONLY positional or other alignment-related information\r
+        // ** DOES NOT parse any character data (read name, bases, qualities, tag data) **\r
+        // useful for operations requiring ONLY aligner-related information (refId/position, alignment flags, CIGAR, mapQuality, etc)\r
          bool GetNextAlignmentCore(BamAlignment& bAlignment);\r
  \r
          // ----------------------\r
diff --git a/src/api/BamWriter.h b/src/api/BamWriter.h

index b0cb6ceba3457cc58f3f3670522148a394a39a96..106e3e9263a85b0670282b737d49ae1d9adf8c8d 100644 (file)
--- a/src/api/BamWriter.h
+++ b/src/api/BamWriter.h
@@ -3,7 +3,7 @@
  // Marth Lab, Department of Biology, Boston College\r
  // All rights reserved.\r
  // ---------------------------------------------------------------------------\r
-// Last modified: 17 August 2010 (DB)\r
+// Last modified: 18 September 2010 (DB)\r
  // ---------------------------------------------------------------------------\r
  // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
  // Institute.\r
@@ -14,11 +14,8 @@
  #ifndef BAMWRITER_H\r
  #define BAMWRITER_H\r
  \r
-// C++ includes\r
  #include <string>\r
-\r
-// BamTools includes\r
-#include "BamAux.h"\r
+#include "BamAlignment.h"\r
  \r
  namespace BamTools {\r
  \r
author	Derek <derekwbarnett@gmail.com>
	Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)
committer	Derek <derekwbarnett@gmail.com>
	Sat, 18 Sep 2010 20:43:37 +0000 (16:43 -0400)
src/api/BamAlignment.cpp	[new file with mode: 0644]	patch \| blob
src/api/BamAlignment.h	[new file with mode: 0644]	patch \| blob
src/api/BamAux.h		patch \| blob \| history
src/api/BamIndex.h		patch \| blob \| history
src/api/BamMultiReader.cpp		patch \| blob \| history
src/api/BamMultiReader.cpp~	[new file with mode: 0644]	patch \| blob
src/api/BamMultiReader.h		patch \| blob \| history
src/api/BamReader.h		patch \| blob \| history
src/api/BamWriter.h		patch \| blob \| history