// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 21 July 2010 (DB)\r
+// Last modified: 27 July 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic constants, data structures, etc. for using BAM files\r
// ***************************************************************************\r
\r
// Tag data access methods\r
public:\r
- bool GetEditDistance(uint8_t& editDistance) const; // get "NM" tag data - contributed by Aaron Quinlan\r
- bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data\r
+ // -------------------------------------------------------------------------------------\r
+ // N.B. - The following tag-modifying methods may not be used on BamAlignments fetched\r
+ // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in \r
+ // error message (to keep output clean) but will ALWAYS return false. Only user-\r
+ // generated BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid.\r
+\r
+ // add tag data (create new TAG entry with TYPE and VALUE)\r
+ // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
+ // returns true if new data added, false if error or TAG already exists\r
+ // N.B. - will NOT modify existing tag. Use EditTag() instead\r
+ bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
+ bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
+ bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
+ bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
\r
- bool GetTag(const std::string& tag, std::string& destination);\r
- template<typename T> bool GetTag(const std::string& tag, T& destination);\r
+ // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)\r
+ // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
+ // returns true if edit was successfaul, false if error\r
+ bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
+ bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
+ bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
+ bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
+\r
+ // specific tag data access methods - these only remain for legacy support\r
+ bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (implemented as GetTag("NM", editDistance))\r
+ bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (implemented as GetTag("RG", readGroup)) \r
+ \r
+ // generic tag data access methods \r
+ bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings \r
+ bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data\r
+ bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data\r
+ bool GetTag(const std::string& tag, float& destination) const; // access floating point data\r
+ \r
+ // remove tag data\r
+ // returns true if removal was successful, false if error\r
+ // N.B. - returns false if TAG does not exist (no removal can occur)\r
+ bool RemoveTag(const std::string& tag);\r
\r
// Additional data access methods\r
public:\r
- int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations\r
+ int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations\r
\r
// 'internal' utility methods \r
private:\r
- static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
+ static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);\r
+ static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
\r
// Data members\r
public:\r
- std::string Name; // Read name\r
- int32_t Length; // Query length\r
- std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)\r
- std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)\r
- std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\r
- std::string TagData; // Tag data (accessor methods will pull the requested information out)\r
- int32_t RefID; // ID number for reference sequence\r
- int32_t Position; // Position (0-based) where alignment starts\r
- uint16_t Bin; // Bin in BAM file where this alignment resides\r
- uint16_t MapQuality; // Mapping quality score\r
- uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
+ std::string Name; // Read name\r
+ int32_t Length; // Query length\r
+ std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)\r
+ std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)\r
+ std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\r
+ std::string TagData; // Tag data (accessor methods will pull the requested information out)\r
+ int32_t RefID; // ID number for reference sequence\r
+ int32_t Position; // Position (0-based) where alignment starts\r
+ uint16_t Bin; // Bin in BAM file where this alignment resides\r
+ uint16_t MapQuality; // Mapping quality score\r
+ uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
- int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned\r
- int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
- int32_t InsertSize; // Mate-pair insert size\r
- \r
+ int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned\r
+ int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
+ int32_t InsertSize; // Mate-pair insert size\r
\r
+ // internal data\r
+ private:\r
struct BamAlignmentSupportData {\r
\r
// data members\r
{ }\r
};\r
\r
- BamAlignmentSupportData SupportData; // Contains raw character data & lengths \r
+ // contains raw character data & lengths\r
+ BamAlignmentSupportData SupportData; \r
+ \r
+ // allow these classes access to BamAlignment private members (SupportData)\r
+ // but client code should not need to touch this data\r
+ friend class BamReader;\r
+ friend class BamWriter;\r
\r
// Alignment flag query constants\r
// Use the get/set methods above instead\r
{ }\r
};\r
\r
+// ----------------------------------------------------------------\r
+// Added: 3-35-2010 DWB\r
+// Fixed: Routines to provide endian-correctness\r
+// ----------------------------------------------------------------\r
+\r
+// returns true if system is big endian\r
+inline bool SystemIsBigEndian(void) {\r
+ const uint16_t one = 0x0001;\r
+ return ((*(char*) &one) == 0 );\r
+}\r
+\r
+// swaps endianness of 16-bit value 'in place'\r
+inline void SwapEndian_16(int16_t& x) {\r
+ x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+inline void SwapEndian_16(uint16_t& x) {\r
+ x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+// swaps endianness of 32-bit value 'in-place'\r
+inline void SwapEndian_32(int32_t& x) {\r
+ x = ( (x >> 24) | \r
+ ((x << 8) & 0x00FF0000) | \r
+ ((x >> 8) & 0x0000FF00) | \r
+ (x << 24)\r
+ );\r
+}\r
+\r
+inline void SwapEndian_32(uint32_t& x) {\r
+ x = ( (x >> 24) | \r
+ ((x << 8) & 0x00FF0000) | \r
+ ((x >> 8) & 0x0000FF00) | \r
+ (x << 24)\r
+ );\r
+}\r
+\r
+// swaps endianness of 64-bit value 'in-place'\r
+inline void SwapEndian_64(int64_t& x) {\r
+ x = ( (x >> 56) | \r
+ ((x << 40) & 0x00FF000000000000ll) |\r
+ ((x << 24) & 0x0000FF0000000000ll) |\r
+ ((x << 8) & 0x000000FF00000000ll) |\r
+ ((x >> 8) & 0x00000000FF000000ll) |\r
+ ((x >> 24) & 0x0000000000FF0000ll) |\r
+ ((x >> 40) & 0x000000000000FF00ll) |\r
+ (x << 56)\r
+ );\r
+}\r
+\r
+inline void SwapEndian_64(uint64_t& x) {\r
+ x = ( (x >> 56) | \r
+ ((x << 40) & 0x00FF000000000000ll) |\r
+ ((x << 24) & 0x0000FF0000000000ll) |\r
+ ((x << 8) & 0x000000FF00000000ll) |\r
+ ((x >> 8) & 0x00000000FF000000ll) |\r
+ ((x >> 24) & 0x0000000000FF0000ll) |\r
+ ((x >> 40) & 0x000000000000FF00ll) |\r
+ (x << 56)\r
+ );\r
+}\r
+\r
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_16p(char* data) {\r
+ uint16_t& value = (uint16_t&)*data; \r
+ SwapEndian_16(value);\r
+}\r
+\r
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_32p(char* data) {\r
+ uint32_t& value = (uint32_t&)*data; \r
+ SwapEndian_32(value);\r
+}\r
+\r
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_64p(char* data) {\r
+ uint64_t& value = (uint64_t&)*data; \r
+ SwapEndian_64(value);\r
+}\r
+\r
// ----------------------------------------------------------------\r
// BamAlignment member methods\r
\r
// constructors & destructor\r
-inline \r
-BamAlignment::BamAlignment(void) { }\r
+inline BamAlignment::BamAlignment(void) { }\r
\r
-inline \r
-BamAlignment::BamAlignment(const BamAlignment& other)\r
+inline BamAlignment::BamAlignment(const BamAlignment& other)\r
: Name(other.Name)\r
, Length(other.Length)\r
, QueryBases(other.QueryBases)\r
, SupportData(other.SupportData)\r
{ }\r
\r
-inline \r
-BamAlignment::~BamAlignment(void) { }\r
+inline BamAlignment::~BamAlignment(void) { }\r
\r
// Queries against alignment flags\r
inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
return alignEnd;\r
}\r
\r
-// get "NM" tag data - contributed by Aaron Quinlan\r
-// stores data in 'editDistance', returns success/fail\r
-inline \r
-bool BamAlignment::GetEditDistance(uint8_t& editDistance) const {\r
+inline\r
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type != "Z" && type != "H" ) return false;\r
+ \r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLength = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag already exists, return false\r
+ // use EditTag explicitly instead\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
+ \r
+ // otherwise, copy tag data to temp buffer\r
+ std::string newTag = tag + type + value;\r
+ const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term\r
+ char originalTagData[newTagDataLength];\r
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
+ \r
+ // append newTag\r
+ strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term\r
+ \r
+ // store temp buffer back in TagData\r
+ const char* newTagData = (const char*)originalTagData;\r
+ TagData.assign(newTagData, newTagDataLength);\r
+ \r
+ // return success\r
+ return true;\r
+}\r
+\r
+inline\r
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type == "f" || type == "Z" || type == "H" ) return false;\r
+ \r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLength = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag already exists, return false\r
+ // use EditTag explicitly instead\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
+ \r
+ // otherwise, convert value to string\r
+ union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
+ un.value = value;\r
+\r
+ // copy original tag data to temp buffer\r
+ std::string newTag = tag + type;\r
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer\r
+ char originalTagData[newTagDataLength];\r
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
+ \r
+ // append newTag\r
+ strcat(originalTagData + tagDataLength, newTag.data());\r
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));\r
+ \r
+ // store temp buffer back in TagData\r
+ const char* newTagData = (const char*)originalTagData;\r
+ TagData.assign(newTagData, newTagDataLength);\r
+ \r
+ // return success\r
+ return true;\r
+}\r
\r
- if ( TagData.empty() ) { return false; }\r
+inline\r
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
+ return AddTag(tag, type, (const uint32_t&)value);\r
+}\r
\r
+inline\r
+bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type == "Z" || type == "H" ) return false;\r
+ \r
// localize the tag data\r
char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLen = TagData.size();\r
+ const unsigned int tagDataLength = TagData.size();\r
unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag already exists, return false\r
+ // use EditTag explicitly instead\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
+ \r
+ // otherwise, convert value to string\r
+ union { float value; char valueBuffer[sizeof(float)]; } un;\r
+ un.value = value;\r
+\r
+ // copy original tag data to temp buffer\r
+ std::string newTag = tag + type;\r
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float\r
+ char originalTagData[newTagDataLength];\r
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
+ \r
+ // append newTag\r
+ strcat(originalTagData + tagDataLength, newTag.data());\r
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));\r
+ \r
+ // store temp buffer back in TagData\r
+ const char* newTagData = (const char*)originalTagData;\r
+ TagData.assign(newTagData, newTagDataLength);\r
+ \r
+ // return success\r
+ return true;\r
+}\r
\r
- bool foundEditDistanceTag = false;\r
- while( numBytesParsed < tagDataLen ) {\r
+inline\r
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type != "Z" && type != "H" ) return false;\r
+ \r
+ // localize the tag data\r
+ char* pOriginalTagData = (char*)TagData.data();\r
+ char* pTagData = pOriginalTagData;\r
+ const unsigned int originalTagDataLength = TagData.size();\r
+ \r
+ unsigned int newTagDataLength = 0;\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
+ \r
+ // make sure array is more than big enough\r
+ char newTagData[originalTagDataLength + value.size()]; \r
\r
- const char* pTagType = pTagData;\r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
+ // copy original tag data up til desired tag\r
+ const unsigned int beginningTagDataLength = numBytesParsed;\r
+ newTagDataLength += beginningTagDataLength;\r
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
+ \r
+ // copy new VALUE in place of current tag data\r
+ const unsigned int dataLength = strlen(value.c_str());\r
+ memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );\r
+ \r
+ // skip to next tag (if tag for removal is last, return true) \r
+ const char* pTagStorageType = pTagData - 1;\r
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
+ \r
+ // copy everything from current tag (the next one after tag for removal) to end\r
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
+ const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;\r
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
+ \r
+ // ensure null-terminator\r
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
+ \r
+ // save new tag data\r
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
+ return true;\r
+ }\r
+ \r
+ // tag not found, attempt AddTag\r
+ else return AddTag(tag, type, value);\r
+}\r
\r
- // check the current tag\r
- if ( strncmp(pTagType, "NM", 2) == 0 ) {\r
- foundEditDistanceTag = true;\r
- break;\r
- }\r
+inline\r
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type == "f" || type == "Z" || type == "H" ) return false;\r
+ \r
+ // localize the tag data\r
+ char* pOriginalTagData = (char*)TagData.data();\r
+ char* pTagData = pOriginalTagData;\r
+ const unsigned int originalTagDataLength = TagData.size();\r
+ \r
+ unsigned int newTagDataLength = 0;\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
+ \r
+ // make sure array is more than big enough\r
+ char newTagData[originalTagDataLength + sizeof(value)]; \r
\r
- // get the storage class and find the next tag\r
- if (*pTagStorageType == '\0') { return false; }\r
- SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
- if (*pTagData == '\0') { return false; }\r
+ // copy original tag data up til desired tag\r
+ const unsigned int beginningTagDataLength = numBytesParsed;\r
+ newTagDataLength += beginningTagDataLength;\r
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
+ \r
+ // copy new VALUE in place of current tag data\r
+ union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
+ un.value = value;\r
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));\r
+ \r
+ // skip to next tag (if tag for removal is last, return true) \r
+ const char* pTagStorageType = pTagData - 1;\r
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
+ \r
+ // copy everything from current tag (the next one after tag for removal) to end\r
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);\r
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
+ \r
+ // ensure null-terminator\r
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
+ \r
+ // save new tag data\r
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
+ return true;\r
}\r
- // return if the edit distance tag was not present\r
- if ( !foundEditDistanceTag ) { return false; }\r
+ \r
+ // tag not found, attempt AddTag\r
+ else return AddTag(tag, type, value);\r
+}\r
\r
- // assign the editDistance value\r
- std::memcpy(&editDistance, pTagData, 1);\r
- return true;\r
+inline\r
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
+ return EditTag(tag, type, (const uint32_t&)value);\r
+}\r
+\r
+inline\r
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {\r
+ \r
+ if ( SupportData.HasCoreOnly ) return false;\r
+ if ( tag.size() != 2 || type.size() != 1 ) return false;\r
+ if ( type == "Z" || type == "H" ) return false;\r
+ \r
+ // localize the tag data\r
+ char* pOriginalTagData = (char*)TagData.data();\r
+ char* pTagData = pOriginalTagData;\r
+ const unsigned int originalTagDataLength = TagData.size();\r
+ \r
+ unsigned int newTagDataLength = 0;\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
+ \r
+ // make sure array is more than big enough\r
+ char newTagData[originalTagDataLength + sizeof(value)]; \r
+\r
+ // copy original tag data up til desired tag\r
+ const unsigned int beginningTagDataLength = numBytesParsed;\r
+ newTagDataLength += beginningTagDataLength;\r
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
+ \r
+ // copy new VALUE in place of current tag data\r
+ union { float value; char valueBuffer[sizeof(float)]; } un;\r
+ un.value = value;\r
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));\r
+ \r
+ // skip to next tag (if tag for removal is last, return true) \r
+ const char* pTagStorageType = pTagData - 1;\r
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
+ \r
+ // copy everything from current tag (the next one after tag for removal) to end\r
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);\r
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
+ \r
+ // ensure null-terminator\r
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
+ \r
+ // save new tag data\r
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
+ return true;\r
+ }\r
+ \r
+ // tag not found, attempt AddTag\r
+ else return AddTag(tag, type, value);\r
+}\r
+\r
+// get "NM" tag data - originally contributed by Aaron Quinlan\r
+// stores data in 'editDistance', returns success/fail\r
+inline \r
+bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { \r
+ return GetTag("NM", (uint32_t&)editDistance);\r
}\r
\r
// get "RG" tag data\r
// stores data in 'readGroup', returns success/fail\r
inline \r
bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
+ return GetTag("RG", readGroup);\r
+}\r
+\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {\r
\r
- if ( TagData.empty() ) { return false; }\r
+ // make sure tag data exists\r
+ if ( SupportData.HasCoreOnly || TagData.empty() ) \r
+ return false;\r
\r
// localize the tag data\r
char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLen = TagData.size();\r
+ const unsigned int tagDataLength = TagData.size();\r
unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
+ const unsigned int dataLength = strlen(pTagData);\r
+ destination.clear();\r
+ destination.resize(dataLength);\r
+ memcpy( (char*)destination.data(), pTagData, dataLength );\r
+ return true;\r
+ }\r
+ \r
+ // tag not found, return failure\r
+ return false;\r
+}\r
\r
- bool foundReadGroupTag = false;\r
- while( numBytesParsed < tagDataLen ) {\r
-\r
- const char* pTagType = pTagData;\r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {\r
+ \r
+ // make sure tag data exists\r
+ if ( SupportData.HasCoreOnly || TagData.empty() ) \r
+ return false;\r
\r
- // check the current tag\r
- if ( std::strncmp(pTagType, "RG", 2) == 0 ) {\r
- foundReadGroupTag = true;\r
- break;\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLength = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, determine data byte-length, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
+ \r
+ // determine data byte-length\r
+ const char type = *(pTagData - 1);\r
+ int destinationLength = 0;\r
+ switch (type) {\r
+ // 1 byte data\r
+ case 'A':\r
+ case 'c':\r
+ case 'C':\r
+ destinationLength = 1;\r
+ break;\r
+\r
+ // 2 byte data\r
+ case 's':\r
+ case 'S':\r
+ destinationLength = 2;\r
+ break;\r
+\r
+ // 4 byte data\r
+ case 'i':\r
+ case 'I':\r
+ destinationLength = 4;\r
+ break;\r
+\r
+ // unsupported type for integer destination (float or var-length strings)\r
+ case 'f':\r
+ case 'Z':\r
+ case 'H':\r
+ printf("ERROR: Cannot store tag of type %c in integer destination\n", type);\r
+ return false;\r
+\r
+ // unknown tag type\r
+ default:\r
+ printf("ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
+ return false;\r
}\r
-\r
- // get the storage class and find the next tag\r
- if (*pTagStorageType == '\0') { return false; }\r
- SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
- if (*pTagData == '\0') { return false; }\r
+ \r
+ // store in destination\r
+ destination = 0;\r
+ memcpy(&destination, pTagData, destinationLength);\r
+ return true;\r
}\r
+ \r
+ // tag not found, return failure\r
+ return false;\r
+}\r
\r
- // return if the read group tag was not present\r
- if ( !foundReadGroupTag ) { return false; }\r
-\r
- // assign the read group\r
- const unsigned int readGroupLen = std::strlen(pTagData);\r
- readGroup.resize(readGroupLen);\r
- std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
- return true;\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {\r
+ return GetTag(tag, (uint32_t&)destination);\r
}\r
\r
inline\r
-bool BamAlignment::GetTag(const std::string& tag, std::string& destination) {\r
+bool BamAlignment::GetTag(const std::string& tag, float& destination) const {\r
\r
- if ( TagData.empty() ) { return false; }\r
+ // make sure tag data exists\r
+ if ( SupportData.HasCoreOnly || TagData.empty() ) \r
+ return false;\r
\r
// localize the tag data\r
char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLen = TagData.size();\r
+ const unsigned int tagDataLength = TagData.size();\r
unsigned int numBytesParsed = 0;\r
-\r
- bool foundReadGroupTag = false;\r
- while( numBytesParsed < tagDataLen ) {\r
-\r
- const char* pTagType = pTagData;\r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
-\r
- // check the current tag\r
- if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
- foundReadGroupTag = true;\r
- break;\r
+ \r
+ // if tag found, determine data byte-length, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
+ //pTagData += numBytesParsed;\r
+ \r
+ // determine data byte-length\r
+ const char type = *(pTagData - 1);\r
+ int destinationLength = 0;\r
+ switch(type) {\r
+\r
+ // 1 byte data\r
+ case 'A':\r
+ case 'c':\r
+ case 'C':\r
+ destinationLength = 1;\r
+ break;\r
+\r
+ // 2 byte data\r
+ case 's':\r
+ case 'S':\r
+ destinationLength = 2;\r
+ break;\r
+\r
+ // 4 byte data\r
+ case 'f':\r
+ case 'i':\r
+ case 'I':\r
+ destinationLength = 4;\r
+ break;\r
+ \r
+ // unsupported type (var-length strings)\r
+ case 'Z':\r
+ case 'H':\r
+ printf("ERROR: Cannot store tag of type %c in integer destination\n", type);\r
+ return false;\r
+\r
+ // unknown tag type\r
+ default:\r
+ printf("ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
+ return false;\r
}\r
-\r
- // get the storage class and find the next tag\r
- if (*pTagStorageType == '\0') { return false; }\r
- SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
- if (*pTagData == '\0') { return false; }\r
+ \r
+ // store in destination\r
+ destination = 0.0;\r
+ memcpy(&destination, pTagData, destinationLength);\r
+ return true;\r
}\r
-\r
- // return if the read group tag was not present\r
- if ( !foundReadGroupTag ) { return false; }\r
-\r
- // assign the read group\r
- const unsigned int dataLen = std::strlen(pTagData);\r
- destination.resize(dataLen);\r
- std::memcpy( (char*)destination.data(), pTagData, dataLen );\r
- return true;\r
+ \r
+ // tag not found, return failure\r
+ return false;\r
}\r
\r
-template<typename T> \r
-bool BamAlignment::GetTag(const std::string& tag, T& destination) {\r
+inline\r
+bool BamAlignment::RemoveTag(const std::string& tag) {\r
+ \r
+ // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed\r
+ // also, return false if no data present to remove\r
+ if ( SupportData.HasCoreOnly || TagData.empty() ) return false;\r
\r
- if ( TagData.empty() ) { return false; }\r
-\r
// localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLen = TagData.size();\r
+ char* pOriginalTagData = (char*)TagData.data();\r
+ char* pTagData = pOriginalTagData;\r
+ const unsigned int originalTagDataLength = TagData.size();\r
+ unsigned int newTagDataLength = 0;\r
unsigned int numBytesParsed = 0;\r
+ \r
+ // if tag found, store data in readGroup, return success\r
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
+ \r
+ char newTagData[originalTagDataLength];\r
+\r
+ // copy original tag data up til desired tag\r
+ pTagData -= 3;\r
+ numBytesParsed -= 3;\r
+ const unsigned int beginningTagDataLength = numBytesParsed;\r
+ newTagDataLength += beginningTagDataLength;\r
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
+ \r
+ // skip to next tag (if tag for removal is last, return true) \r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
+ \r
+ // copy everything from current tag (the next one after tag for removal) to end\r
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
+ memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );\r
+ \r
+ // save new tag data\r
+ TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);\r
+ return true;\r
+ }\r
+ \r
+ // tag not found, no removal - return failure\r
+ return false;\r
+}\r
+\r
+inline\r
+bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {\r
\r
- bool foundDesiredTag = false;\r
- while( numBytesParsed < tagDataLen ) {\r
+ while ( numBytesParsed < tagDataLength ) {\r
\r
- const char* pTagType = pTagData;\r
+ const char* pTagType = pTagData;\r
const char* pTagStorageType = pTagData + 2;\r
pTagData += 3;\r
numBytesParsed += 3;\r
\r
- // check the current tag\r
- if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
- foundDesiredTag = true;\r
- break;\r
- }\r
+ // check the current tag, return true on match\r
+ if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) \r
+ return true;\r
\r
// get the storage class and find the next tag\r
- if (*pTagStorageType == '\0') { return false; }\r
- SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
- if (*pTagData == '\0') { return false; }\r
+ if ( *pTagStorageType == '\0' ) return false; \r
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;\r
+ if ( *pTagData == '\0' ) return false;\r
}\r
- // return if the edit distance tag was not present\r
- if ( !foundDesiredTag ) { return false; }\r
-\r
- // assign the editDistance value\r
- std::memcpy(&destination, pTagData, sizeof(T));\r
- return true;\r
+ \r
+ // checked all tags, none match\r
+ return false;\r
}\r
\r
inline\r
-void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
+bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
\r
switch(storageType) {\r
\r
++numBytesParsed;\r
++pTagData;\r
}\r
- // ---------------------------\r
- // Added: 3-25-2010 DWB\r
- // Contributed: ARQ\r
- // Fixed: error parsing variable length tag data\r
+ // increment for null-terminator\r
+ ++numBytesParsed;\r
++pTagData;\r
- // ---------------------------\r
break;\r
\r
- default:\r
- printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
- exit(1);\r
+ default: \r
+ // error case\r
+ printf("ERROR: Unknown tag storage class encountered: [%c]\n", storageType);\r
+ return false;\r
}\r
-}\r
-\r
-// ----------------------------------------------------------------\r
-// Added: 3-35-2010 DWB\r
-// Fixed: Routines to provide endian-correctness\r
-// ----------------------------------------------------------------\r
-\r
-// returns true if system is big endian\r
-inline bool SystemIsBigEndian(void) {\r
- const uint16_t one = 0x0001;\r
- return ((*(char*) &one) == 0 );\r
-}\r
-\r
-// swaps endianness of 16-bit value 'in place'\r
-inline void SwapEndian_16(int16_t& x) {\r
- x = ((x >> 8) | (x << 8));\r
-}\r
-\r
-inline void SwapEndian_16(uint16_t& x) {\r
- x = ((x >> 8) | (x << 8));\r
-}\r
-\r
-// swaps endianness of 32-bit value 'in-place'\r
-inline void SwapEndian_32(int32_t& x) {\r
- x = ( (x >> 24) | \r
- ((x << 8) & 0x00FF0000) | \r
- ((x >> 8) & 0x0000FF00) | \r
- (x << 24)\r
- );\r
-}\r
-\r
-inline void SwapEndian_32(uint32_t& x) {\r
- x = ( (x >> 24) | \r
- ((x << 8) & 0x00FF0000) | \r
- ((x >> 8) & 0x0000FF00) | \r
- (x << 24)\r
- );\r
-}\r
-\r
-// swaps endianness of 64-bit value 'in-place'\r
-inline void SwapEndian_64(int64_t& x) {\r
- x = ( (x >> 56) | \r
- ((x << 40) & 0x00FF000000000000ll) |\r
- ((x << 24) & 0x0000FF0000000000ll) |\r
- ((x << 8) & 0x000000FF00000000ll) |\r
- ((x >> 8) & 0x00000000FF000000ll) |\r
- ((x >> 24) & 0x0000000000FF0000ll) |\r
- ((x >> 40) & 0x000000000000FF00ll) |\r
- (x << 56)\r
- );\r
-}\r
-\r
-inline void SwapEndian_64(uint64_t& x) {\r
- x = ( (x >> 56) | \r
- ((x << 40) & 0x00FF000000000000ll) |\r
- ((x << 24) & 0x0000FF0000000000ll) |\r
- ((x << 8) & 0x000000FF00000000ll) |\r
- ((x >> 8) & 0x00000000FF000000ll) |\r
- ((x >> 24) & 0x0000000000FF0000ll) |\r
- ((x >> 40) & 0x000000000000FF00ll) |\r
- (x << 56)\r
- );\r
-}\r
-\r
-// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_16p(char* data) {\r
- uint16_t& value = (uint16_t&)*data; \r
- SwapEndian_16(value);\r
-}\r
-\r
-// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_32p(char* data) {\r
- uint32_t& value = (uint32_t&)*data; \r
- SwapEndian_32(value);\r
-}\r
-\r
-// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_64p(char* data) {\r
- uint64_t& value = (uint64_t&)*data; \r
- SwapEndian_64(value);\r
+ \r
+ // return success\r
+ return true;\r
}\r
\r
} // namespace BamTools\r