X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=src%2Fapi%2FBamAlignment.cpp;h=0077f64a9eb5306033811a6e49261cebfacb5969;hb=dd6648692c398c783958be7e02fc669b16b5014a;hp=7cff4b0da8db0728d7a9fff14bed7d518384e16c;hpb=cdf4bbcb19025398d429035fe672661a8c8d1a80;p=bamtools.git diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp index 7cff4b0..0077f64 100644 --- a/src/api/BamAlignment.cpp +++ b/src/api/BamAlignment.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) +// Last modified: 22 April 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -22,173 +22,6 @@ using namespace BamTools; #include using namespace std; -// internal utility methods -namespace BamTools { -namespace Internal { - -/*! \fn bool IsValidSize(const string& tag, const string& type) - \internal - - Checks that tag name & type strings are expected sizes. - \a tag should have length - \a type should have length 1 - - \param tag BAM tag name - \param type BAM tag type-code - - \return \c true if both \a tag and \a type are correct sizes -*/ -bool IsValidSize(const string& tag, const string& type) { - return (tag.size() == Constants::BAM_TAG_TAGSIZE) && - (type.size() == Constants::BAM_TAG_TYPESIZE); -} - -/*! \fn bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) - \internal - - Moves to next available tag in tag data string - - \param storageType BAM tag type-code that determines how far to move cursor - \param pTagData pointer to current position (cursor) in tag string - \param numBytesParsed report of how many bytes were parsed (cumulatively) - - \return \c if storageType was a recognized BAM tag type - \post \a pTagData will point to the byte where the next tag data begins. - \a numBytesParsed will correspond to the cursor's position in the full TagData string. -*/ -bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) { - - switch (storageType) { - - case (Constants::BAM_TAG_TYPE_ASCII) : - case (Constants::BAM_TAG_TYPE_INT8) : - case (Constants::BAM_TAG_TYPE_UINT8) : - ++numBytesParsed; - ++pTagData; - break; - - case (Constants::BAM_TAG_TYPE_INT16) : - case (Constants::BAM_TAG_TYPE_UINT16) : - numBytesParsed += sizeof(uint16_t); - pTagData += sizeof(uint16_t); - break; - - case (Constants::BAM_TAG_TYPE_FLOAT) : - case (Constants::BAM_TAG_TYPE_INT32) : - case (Constants::BAM_TAG_TYPE_UINT32) : - numBytesParsed += sizeof(uint32_t); - pTagData += sizeof(uint32_t); - break; - - case (Constants::BAM_TAG_TYPE_STRING) : - case (Constants::BAM_TAG_TYPE_HEX) : - while( *pTagData ) { - ++numBytesParsed; - ++pTagData; - } - // increment for null-terminator - ++numBytesParsed; - ++pTagData; - break; - - case (Constants::BAM_TAG_TYPE_ARRAY) : - - { - // read array type - const char arrayType = *pTagData; - ++numBytesParsed; - ++pTagData; - - // read number of elements - int32_t numElements; - memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary - numBytesParsed += sizeof(uint32_t); - pTagData += sizeof(uint32_t); - - // calculate number of bytes to skip - int bytesToSkip = 0; - switch (arrayType) { - case (Constants::BAM_TAG_TYPE_INT8) : - case (Constants::BAM_TAG_TYPE_UINT8) : - bytesToSkip = numElements; - break; - case (Constants::BAM_TAG_TYPE_INT16) : - case (Constants::BAM_TAG_TYPE_UINT16) : - bytesToSkip = numElements*sizeof(uint16_t); - break; - case (Constants::BAM_TAG_TYPE_FLOAT) : - case (Constants::BAM_TAG_TYPE_INT32) : - case (Constants::BAM_TAG_TYPE_UINT32) : - bytesToSkip = numElements*sizeof(uint32_t); - break; - default: - cerr << "BamAlignment ERROR: unknown binary array type encountered: " - << arrayType << endl; - return false; - } - - // skip binary array contents - numBytesParsed += bytesToSkip; - pTagData += bytesToSkip; - break; - } - - default: - cerr << "BamAlignment ERROR: unknown tag type encountered" - << storageType << endl; - return false; - } - - // return success - return true; -} - -/*! \fn bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) - \internal - - Searches for requested tag in BAM tag data. - - \param tag requested 2-character tag name - \param pTagData pointer to current position in BamAlignment::TagData - \param tagDataLength length of BamAlignment::TagData - \param numBytesParsed number of bytes parsed so far - - \return \c true if found - - \post If \a tag is found, \a pTagData will point to the byte where the tag data begins. - \a numBytesParsed will correspond to the position in the full TagData string. - -*/ -bool FindTag(const std::string& tag, - char* &pTagData, - const unsigned int& tagDataLength, - unsigned int& numBytesParsed) -{ - - while ( numBytesParsed < tagDataLength ) { - - const char* pTagType = pTagData; - const char* pTagStorageType = pTagData + 2; - pTagData += 3; - numBytesParsed += 3; - - // check the current tag, return true on match - if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) - return true; - - // get the storage class and find the next tag - if ( *pTagStorageType == '\0' ) return false; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false; - if ( *pTagData == '\0' ) return false; - } - - // checked all tags, none match - return false; -} - -} // namespace Internal -} // namespace BamTools - /*! \class BamTools::BamAlignment \brief The main BAM alignment data structure. @@ -300,7 +133,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for string value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING && type.at(0) != Constants::BAM_TAG_TYPE_HEX ) @@ -315,22 +148,24 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // otherwise, copy tag data to temp buffer string newTag = tag + type + value; const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term - + // append newTag strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term - + // store temp buffer back in TagData const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -353,7 +188,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for uint32_t value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT || type.at(0) == Constants::BAM_TAG_TYPE_STRING || type.at(0) == Constants::BAM_TAG_TYPE_HEX || @@ -370,7 +205,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // otherwise, convert value to string @@ -380,7 +215,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // copy original tag data to temp buffer string newTag = tag + type; const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term // append newTag @@ -390,6 +225,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // store temp buffer back in TagData const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; // return success return true; @@ -429,7 +265,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for float value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING || type.at(0) == Constants::BAM_TAG_TYPE_HEX || type.at(0) == Constants::BAM_TAG_TYPE_ARRAY @@ -445,7 +281,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // otherwise, convert value to string @@ -455,7 +291,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const // copy original tag data to temp buffer string newTag = tag + type; const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term // append newTag @@ -466,6 +302,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -496,7 +334,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -513,7 +351,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(uint8_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -531,6 +369,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -561,7 +401,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& val // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -578,7 +418,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& val const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(int8_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -596,6 +436,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& val const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -626,7 +468,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -643,7 +485,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(uint16_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -661,6 +503,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -691,7 +535,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -708,7 +552,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(int16_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -726,6 +570,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -756,7 +602,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -773,7 +619,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(uint32_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -791,6 +637,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& v const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -821,7 +669,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -838,7 +686,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(int32_t); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -856,6 +704,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& va const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -886,7 +736,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& valu // if tag already exists, return false // use EditTag explicitly instead - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // build new tag's base information @@ -903,7 +753,7 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& valu const int newTagDataLength = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE + numElements*sizeof(float); - char originalTagData[newTagDataLength]; + char* originalTagData = new char[newTagDataLength]; memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) @@ -921,6 +771,8 @@ bool BamAlignment::AddTag(const std::string& tag, const std::vector& valu const char* newTagData = (const char*)originalTagData; TagData.assign(newTagData, newTagDataLength); + delete[] originalTagData; + // return success return true; } @@ -1174,7 +1026,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for string value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING && type.at(0) != Constants::BAM_TAG_TYPE_HEX ) return false; @@ -1188,10 +1040,10 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { // make sure array is more than big enough - char newTagData[originalTagDataLength + value.size()]; + char* newTagData = new char[originalTagDataLength + value.size()]; // copy original tag data up til desired tag const unsigned int beginningTagDataLength = numBytesParsed; @@ -1204,7 +1056,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // skip to next tag (if tag for removal is last, return true) const char* pTagStorageType = pTagData - 1; - if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; // copy everything from current tag (the next one after tag for removal) to end @@ -1218,6 +1070,9 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // save new tag data TagData.assign(newTagData, endTagOffset + endTagDataLength); + + delete[] newTagData; + return true; } @@ -1245,7 +1100,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for uint32_t value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT || type.at(0) == Constants::BAM_TAG_TYPE_STRING || type.at(0) == Constants::BAM_TAG_TYPE_HEX || @@ -1264,10 +1119,10 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { // make sure array is more than big enough - char newTagData[originalTagDataLength + sizeof(value)]; + char* newTagData = new char[originalTagDataLength + sizeof(value)]; // copy original tag data up til desired tag const unsigned int beginningTagDataLength = numBytesParsed; @@ -1281,7 +1136,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // skip to next tag (if tag for removal is last, return true) const char* pTagStorageType = pTagData - 1; - if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; // copy everything from current tag (the next one after tag for removal) to end @@ -1295,6 +1150,9 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // save new tag data TagData.assign(newTagData, endTagOffset + endTagDataLength); + + delete[] newTagData; + return true; } @@ -1340,7 +1198,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons if ( SupportData.HasCoreOnly ) return false; // validate tag/type size & that type is OK for float value - if ( !Internal::IsValidSize(tag, type) ) return false; + if ( !IsValidSize(tag, type) ) return false; if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING || type.at(0) == Constants::BAM_TAG_TYPE_HEX || type.at(0) == Constants::BAM_TAG_TYPE_ARRAY @@ -1358,10 +1216,10 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { // make sure array is more than big enough - char newTagData[originalTagDataLength + sizeof(value)]; + char* newTagData = new char[originalTagDataLength + sizeof(value)]; // copy original tag data up til desired tag const unsigned int beginningTagDataLength = numBytesParsed; @@ -1375,7 +1233,7 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // skip to next tag (if tag for removal is last, return true) const char* pTagStorageType = pTagData - 1; - if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; // copy everything from current tag (the next one after tag for removal) to end @@ -1389,6 +1247,9 @@ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, cons // save new tag data TagData.assign(newTagData, endTagOffset + endTagDataLength); + + delete[] newTagData; + return true; } @@ -1571,6 +1432,49 @@ bool BamAlignment::EditTag(const std::string& tag, const std::vector& val return AddTag(tag, values); } +/*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) + \internal + + Searches for requested tag in BAM tag data. + + \param tag requested 2-character tag name + \param pTagData pointer to current position in BamAlignment::TagData + \param tagDataLength length of BamAlignment::TagData + \param numBytesParsed number of bytes parsed so far + + \return \c true if found + + \post If \a tag is found, \a pTagData will point to the byte where the tag data begins. + \a numBytesParsed will correspond to the position in the full TagData string. + +*/ +bool BamAlignment::FindTag(const std::string& tag, + char*& pTagData, + const unsigned int& tagDataLength, + unsigned int& numBytesParsed) const +{ + + while ( numBytesParsed < tagDataLength ) { + + const char* pTagType = pTagData; + const char* pTagStorageType = pTagData + 2; + pTagData += 3; + numBytesParsed += 3; + + // check the current tag, return true on match + if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) + return true; + + // get the storage class and find the next tag + if ( *pTagStorageType == '\0' ) return false; + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false; + if ( *pTagData == '\0' ) return false; + } + + // checked all tags, none match + return false; +} + /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const \brief Retrieves value of edit distance tag ("NM"). @@ -1660,7 +1564,7 @@ bool BamAlignment::GetTag(const std::string& tag, std::string& destination) cons unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { const unsigned int dataLength = strlen(pTagData); destination.clear(); destination.resize(dataLength); @@ -1692,7 +1596,7 @@ bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const { unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { // determine data byte-length const char type = *(pTagData - 1); @@ -1776,7 +1680,7 @@ bool BamAlignment::GetTag(const std::string& tag, float& destination) const { unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { // determine data byte-length const char type = *(pTagData - 1); @@ -1848,7 +1752,7 @@ bool BamAlignment::GetTag(const std::string& tag, std::vector& destina unsigned int numBytesParsed = 0; // return false if tag not found - if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // check that tag is array type @@ -1935,7 +1839,7 @@ bool BamAlignment::GetTag(const std::string& tag, std::vector& destinat unsigned int numBytesParsed = 0; // return false if tag not found - if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // check that tag is array type @@ -2023,7 +1927,7 @@ bool BamAlignment::GetTag(const std::string& tag, std::vector& destinatio unsigned int numBytesParsed = 0; // return false if tag not found - if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; // check that tag is array type @@ -2111,7 +2015,7 @@ bool BamAlignment::GetTagType(const std::string& tag, char& type) const { unsigned int numBytesParsed = 0; // lookup tag - if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { // retrieve tag type code type = *(pTagData - 1); @@ -2160,7 +2064,7 @@ bool BamAlignment::HasTag(const std::string& tag) const { unsigned int numBytesParsed = 0; // if result of tag lookup - return Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed); + return FindTag(tag, pTagData, tagDataLength, numBytesParsed); } /*! \fn bool BamAlignment::IsDuplicate(void) const @@ -2240,6 +2144,23 @@ bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 ); } +/*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const + \internal + + Checks that tag name & type strings are expected sizes. + \a tag should have length + \a type should have length 1 + + \param tag BAM tag name + \param type BAM tag type-code + + \return \c true if both \a tag and \a type are correct sizes +*/ +bool BamAlignment::IsValidSize(const string& tag, const string& type) const { + return (tag.size() == Constants::BAM_TAG_TAGSIZE) && + (type.size() == Constants::BAM_TAG_TYPESIZE); +} + /*! \fn bool BamAlignment::RemoveTag(const std::string& tag) \brief Removes field from BAM tags. @@ -2259,9 +2180,9 @@ bool BamAlignment::RemoveTag(const std::string& tag) { unsigned int numBytesParsed = 0; // if tag found - if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { + if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { - char newTagData[originalTagDataLength]; + char* newTagData = new char[originalTagDataLength]; // copy original tag data up til desired tag pTagData -= 3; @@ -2274,7 +2195,7 @@ bool BamAlignment::RemoveTag(const std::string& tag) { const char* pTagStorageType = pTagData + 2; pTagData += 3; numBytesParsed += 3; - if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; // copy everything from current tag (the next one after tag for removal) to end @@ -2284,6 +2205,9 @@ bool BamAlignment::RemoveTag(const std::string& tag) { // save new tag data TagData.assign(newTagData, beginningTagDataLength + endTagDataLength); + + delete[] newTagData; + return true; } @@ -2405,3 +2329,105 @@ void BamAlignment::SetIsSecondMate(bool ok) { void BamAlignment::SetIsUnmapped(bool ok) { SetIsMapped(!ok); } + +/*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed) + \internal + + Moves to next available tag in tag data string + + \param storageType BAM tag type-code that determines how far to move cursor + \param pTagData pointer to current position (cursor) in tag string + \param numBytesParsed report of how many bytes were parsed (cumulatively) + + \return \c if storageType was a recognized BAM tag type + \post \a pTagData will point to the byte where the next tag data begins. + \a numBytesParsed will correspond to the cursor's position in the full TagData string. +*/ +bool BamAlignment::SkipToNextTag(const char storageType, + char*& pTagData, + unsigned int& numBytesParsed) const +{ + switch (storageType) { + + case (Constants::BAM_TAG_TYPE_ASCII) : + case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_UINT8) : + ++numBytesParsed; + ++pTagData; + break; + + case (Constants::BAM_TAG_TYPE_INT16) : + case (Constants::BAM_TAG_TYPE_UINT16) : + numBytesParsed += sizeof(uint16_t); + pTagData += sizeof(uint16_t); + break; + + case (Constants::BAM_TAG_TYPE_FLOAT) : + case (Constants::BAM_TAG_TYPE_INT32) : + case (Constants::BAM_TAG_TYPE_UINT32) : + numBytesParsed += sizeof(uint32_t); + pTagData += sizeof(uint32_t); + break; + + case (Constants::BAM_TAG_TYPE_STRING) : + case (Constants::BAM_TAG_TYPE_HEX) : + while( *pTagData ) { + ++numBytesParsed; + ++pTagData; + } + // increment for null-terminator + ++numBytesParsed; + ++pTagData; + break; + + case (Constants::BAM_TAG_TYPE_ARRAY) : + + { + // read array type + const char arrayType = *pTagData; + ++numBytesParsed; + ++pTagData; + + // read number of elements + int32_t numElements; + memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary + numBytesParsed += sizeof(uint32_t); + pTagData += sizeof(uint32_t); + + // calculate number of bytes to skip + int bytesToSkip = 0; + switch (arrayType) { + case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_UINT8) : + bytesToSkip = numElements; + break; + case (Constants::BAM_TAG_TYPE_INT16) : + case (Constants::BAM_TAG_TYPE_UINT16) : + bytesToSkip = numElements*sizeof(uint16_t); + break; + case (Constants::BAM_TAG_TYPE_FLOAT) : + case (Constants::BAM_TAG_TYPE_INT32) : + case (Constants::BAM_TAG_TYPE_UINT32) : + bytesToSkip = numElements*sizeof(uint32_t); + break; + default: + cerr << "BamAlignment ERROR: unknown binary array type encountered: " + << arrayType << endl; + return false; + } + + // skip binary array contents + numBytesParsed += bytesToSkip; + pTagData += bytesToSkip; + break; + } + + default: + cerr << "BamAlignment ERROR: unknown tag type encountered" + << storageType << endl; + return false; + } + + // return success + return true; +}