X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamAlignment.h;h=9a8e7be0a7351a405317d6c0c281ad01c384a3c0;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=ac50552656ee551e5ded24cb47a3124e72c42cee;hpb=049d6b2501ea9c9afeb8588013d5f536246a2ca8;p=bamtools.git diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h index ac50552..9a8e7be 100644 --- a/src/api/BamAlignment.h +++ b/src/api/BamAlignment.h @@ -1,9 +1,8 @@ // *************************************************************************** // BamAlignment.h (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 18 September 2010 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -11,15 +10,26 @@ #ifndef BAMALIGNMENT_H #define BAMALIGNMENT_H +#include "api/api_global.h" +#include "api/BamAux.h" +#include "api/BamConstants.h" +#include +#include #include #include -#include "BamAux.h" namespace BamTools { +//! \cond +// forward declaration of BamAlignment's "friends" +namespace Internal { + class BamReaderPrivate; + class BamWriterPrivate; +} // namespace Internal +//! \endcond + // BamAlignment data structure -// explicitly labeled as 'struct' to indicate that (most of) its fields are public -struct BamAlignment { +struct API_EXPORT BamAlignment { // constructors & destructor public: @@ -27,117 +37,104 @@ struct BamAlignment { BamAlignment(const BamAlignment& other); ~BamAlignment(void); - // Queries against alignment flags + // queries against alignment flags public: - bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate - bool IsFailedQC(void) const; // Returns true if this read failed quality control - bool IsFirstMate(void) const; // Returns true if alignment is first mate on read - bool IsMapped(void) const; // Returns true if alignment is mapped - bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped - bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand - bool IsPaired(void) const; // Returns true if alignment part of paired-end read - bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment - bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution - bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand - bool IsSecondMate(void) const; // Returns true if alignment is second mate on read - - // Manipulate alignment flags + bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate + bool IsFailedQC(void) const; // returns true if this read failed quality control + bool IsFirstMate(void) const; // returns true if alignment is first mate on read + bool IsMapped(void) const; // returns true if alignment is mapped + bool IsMateMapped(void) const; // returns true if alignment's mate is mapped + bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand + bool IsPaired(void) const; // returns true if alignment part of paired-end read + bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment + bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution + bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand + bool IsSecondMate(void) const; // returns true if alignment is second mate on read + + // manipulate alignment flags public: - void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag - void SetIsFailedQC(bool ok); // Sets "failed quality control" flag - void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag - void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag - void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag - void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag - void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag - void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag - void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag - void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag - void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag - - // Tag data access methods + void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag + void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag + void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag + void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag + void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag + void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag + void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag + void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag + void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag + void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag + void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag + + // tag data access methods public: - // ------------------------------------------------------------------------------------- - // N.B. - The following tag access methods may not be used on BamAlignments fetched - // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in - // error message (to keep output clean) but will ALWAYS return false. Only user-created - // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here. - - // add tag data (create new TAG entry with TYPE and VALUE) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if new data added, false if error or TAG already exists - // N.B. - will NOT modify existing tag. Use EditTag() instead - // @tag - two character tag name - // @type - single character tag type (see SAM/BAM spec for details) - // @value - value to associate with tag - bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if edit was successfaul, false if error - // @tag - two character tag name - // @type - single character tag type (see SAM/BAM spec for details) - // @value - new value for tag - bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // specific tag data access methods - these only remain for legacy support - // returns whether specific tag could be retrieved - bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance)) - bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (equivalent to GetTag("RG", readGroup)) - - // generic tag data access methods - // returns whether tag is found & tag type is compatible with DESTINATION - // @tag - two character tag name - // @destination - if found, tag value is stored here - bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings - bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data - bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data - bool GetTag(const std::string& tag, float& destination) const; // access floating point data - - // remove tag data - // returns true if removal was successful, false if error - // N.B. - returns false if TAG does not exist (no removal can occur) - // @tag - two character tag name - bool RemoveTag(const std::string& tag); - - // Additional data access methods + + // add a new tag + template bool AddTag(const std::string& tag, const std::string& type, const T& value); + template bool AddTag(const std::string& tag, const std::vector& values); + + // edit (or append) tag + template bool EditTag(const std::string& tag, const std::string& type, const T& value); + template bool EditTag(const std::string& tag, const std::vector& values); + + // retrieves tag data + template bool GetTag(const std::string& tag, T& destination) const; + template bool GetTag(const std::string& tag, std::vector& destination) const; + + // retrieves the SAM/BAM type-code for requested tag name + bool GetTagType(const std::string& tag, char& type) const; + + // returns true if alignment has a record for this tag name + bool HasTag(const std::string& tag) const; + + // removes a tag + void RemoveTag(const std::string& tag); + + // additional methods public: - // calculates & returns alignment end position, based on starting position and CIGAR operations - // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference - // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats. - int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; + // populates alignment string fields + bool BuildCharData(void); - // 'internal' utility methods - private: - static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed); - static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed); + // calculates alignment end position + int GetEndPosition(bool usePadded = false, bool closedInterval = false) const; + + // returns a description of the last error that occurred + std::string GetErrorString(void) const; - // Data members + // public data fields public: - std::string Name; // Read name - int32_t Length; // Query length - std::string QueryBases; // 'Original' sequence (as reported from sequencing machine) - std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping) - std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values) - std::string TagData; // Tag data (accessor methods will pull the requested information out) - int32_t RefID; // ID number for reference sequence - int32_t Position; // Position (0-based) where alignment starts - uint16_t Bin; // Bin in BAM file where this alignment resides - uint16_t MapQuality; // Mapping quality score - uint32_t AlignmentFlag; // Alignment bit-flag - see Is() methods to query this value, SetIs() methods to manipulate + std::string Name; // read name + int32_t Length; // length of query sequence + std::string QueryBases; // 'original' sequence (as reported from sequencing machine) + std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping) + std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values) + std::string TagData; // tag data (use provided methods to query/modify) + int32_t RefID; // ID number for reference sequence + int32_t Position; // position (0-based) where alignment starts + uint16_t Bin; // BAM (standard) index bin number for this alignment + uint16_t MapQuality; // mapping quality score + uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify) std::vector CigarData; // CIGAR operations for this alignment - int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned - int32_t MatePosition; // Position (0-based) where alignment's mate starts - int32_t InsertSize; // Mate-pair insert size - + int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned + int32_t MatePosition; // position (0-based) where alignment's mate starts + int32_t InsertSize; // mate-pair insert size + std::string Filename; // name of BAM file which this alignment comes from + + //! \internal + // internal utility methods + private: + bool FindTag(const std::string& tag, + char*& pTagData, + const unsigned int& tagDataLength, + unsigned int& numBytesParsed) const; + bool IsValidSize(const std::string& tag, const std::string& type) const; + void SetErrorString(const std::string& where, const std::string& what) const; + bool SkipToNextTag(const char storageType, + char*& pTagData, + unsigned int& numBytesParsed) const; + // internal data private: + struct BamAlignmentSupportData { // data members @@ -157,33 +154,471 @@ struct BamAlignment { , HasCoreOnly(false) { } }; - - // contains raw character data & lengths - BamAlignmentSupportData SupportData; - - // allow these classes access to BamAlignment private members (SupportData) - // but client code should not need to touch this data - friend class BamReader; - friend class BamWriter; - - // Alignment flag query constants - // Use the get/set methods above instead - private: - enum { PAIRED = 1 - , PROPER_PAIR = 2 - , UNMAPPED = 4 - , MATE_UNMAPPED = 8 - , REVERSE = 16 - , MATE_REVERSE = 32 - , READ_1 = 64 - , READ_2 = 128 - , SECONDARY = 256 - , QC_FAILED = 512 - , DUPLICATE = 1024 - }; + BamAlignmentSupportData SupportData; + friend class Internal::BamReaderPrivate; + friend class Internal::BamWriterPrivate; + + mutable std::string ErrorString; // mutable to allow updates even in logically const methods + //! \endinternal }; -// convenience typedef(s) +// --------------------------------------------------------- +// BamAlignment tag access methods + +/*! \fn bool AddTag(const std::string& tag, const std::string& type, const T& value) + \brief Adds a field to the BAM tags. + + Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead. + + \param[in] tag 2-character tag name + \param[in] type 1-character tag type + \param[in] value data to store + \return \c true if the \b new tag was added successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ +template +inline bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const T& value) { + + // if char data not populated, do that first + if ( SupportData.HasCoreOnly ) + BuildCharData(); + + // check tag/type size + if ( !IsValidSize(tag, type) ) { + // TODO: set error string? + return false; + } + + // check that storage type code is OK for T + if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) { + // TODO: set error string? + return false; + } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // if tag already exists, return false + // use EditTag explicitly instead + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // otherwise, convert value to string + union { T value; char valueBuffer[sizeof(T)]; } un; + un.value = value; + + // copy original tag data to temp buffer + const std::string newTag = tag + type; + const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term + + // append newTag + strcat(originalTagData.Buffer + tagDataLength, newTag.data()); + memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T)); + + // store temp buffer back in TagData + const char* newTagData = (const char*)originalTagData.Buffer; + TagData.assign(newTagData, newTagDataLength); + return true; +} + +template<> +inline bool BamAlignment::AddTag(const std::string& tag, + const std::string& type, + const std::string& value) +{ + // if char data not populated, do that first + if ( SupportData.HasCoreOnly ) + BuildCharData(); + + // check tag/type size + if ( !IsValidSize(tag, type) ) { + // TODO: set error string? + return false; + } + + // check that storage type code is OK for string + if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) { + // TODO: set error string? + return false; + } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // if tag already exists, return false + // use EditTag explicitly instead + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // otherwise, copy tag data to temp buffer + const std::string newTag = tag + type + value; + const size_t newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term + + // append newTag (removes original null-term, then appends newTag + null-term) + strcat(originalTagData.Buffer + tagDataLength, newTag.data()); + + // store temp buffer back in TagData + const char* newTagData = (const char*)originalTagData.Buffer; + TagData.assign(newTagData, newTagDataLength); + return true; +} + +/*! \fn template bool AddTag(const std::string& tag, const std::vector& values) + \brief Adds a numeric array field to the BAM tags. + + Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead. + + \param[in] tag 2-character tag name + \param[in] values vector of data values to store + \return \c true if the \b new tag was added successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ +template +inline bool BamAlignment::AddTag(const std::string& tag, const std::vector& values) { + + // if char data not populated, do that first + if ( SupportData.HasCoreOnly ) + BuildCharData(); + + // check for valid tag name length + if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) + return false; + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // if tag already exists, return false + // use EditTag explicitly instead + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // build new tag's base information + char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE]; + memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE ); + newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY; + newTagBase[3] = TagTypeHelper::TypeCode(); + + // add number of array elements to newTagBase + const int32_t numElements = values.size(); + memcpy(newTagBase + 4, &numElements, sizeof(int32_t)); + + // copy current TagData string to temp buffer, leaving room for new tag's contents + const size_t newTagDataLength = tagDataLength + + Constants::BAM_TAG_ARRAYBASE_SIZE + + numElements*sizeof(T); + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term + + // write newTagBase (removes old null term) + strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase); + + // add vector elements to tag + int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE; + for ( int i = 0 ; i < numElements; ++i ) { + const T& value = values.at(i); + memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T)); + } + + // store temp buffer back in TagData + const char* newTagData = (const char*)originalTagData.Buffer; + TagData.assign(newTagData, newTagDataLength); + return true; +} + +/*! \fn template bool EditTag(const std::string& tag, const std::string& type, const T& value) + \brief Edits a BAM tag field. + + If \a tag does not exist, a new entry is created. + + \param tag[in] 2-character tag name + \param type[in] 1-character tag type (must be "Z" or "H") + \param value[in] new data value + + \return \c true if the tag was modified/created successfully + + \sa BamAlignment::RemoveTag() + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ +template +inline bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const T& value) { + + // if char data not populated, do that first + if ( SupportData.HasCoreOnly ) + BuildCharData(); + + // remove existing tag if present, then append tag with new value + if ( HasTag(tag) ) + RemoveTag(tag); + return AddTag(tag, type, value); +} + +/*! \fn template bool EditTag(const std::string& tag, const std::vector& values) + \brief Edits a BAM tag field containing a numeric array. + + If \a tag does not exist, a new entry is created. + + \param tag[in] 2-character tag name + \param value[in] vector of data values + + \return \c true if the tag was modified/created successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ +template +inline bool BamAlignment::EditTag(const std::string& tag, const std::vector& values) { + + // if char data not populated, do that first + if ( SupportData.HasCoreOnly ) + BuildCharData(); + + // remove existing tag if present, then append tag with new values + if ( HasTag(tag) ) + RemoveTag(tag); + return AddTag(tag, values); +} + + +/*! \fn template bool GetTag(const std::string& tag, T& destination) const + \brief Retrieves the value associated with a BAM tag. + + \param tag[in] 2-character tag name + \param destination[out] retrieved value + \return \c true if found +*/ +template +inline bool BamAlignment::GetTag(const std::string& tag, T& destination) const { + + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? + return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? + return false; + } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // return failure if tag not found + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // fetch data type + const char type = *(pTagData - 1); + if ( !TagTypeHelper::CanConvertFrom(type) ) { + // TODO: set error string ? + return false; + } + + // determine data length + int destinationLength = 0; + switch ( type ) { + + // 1 byte data + case (Constants::BAM_TAG_TYPE_ASCII) : + case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_UINT8) : + destinationLength = 1; + break; + + // 2 byte data + case (Constants::BAM_TAG_TYPE_INT16) : + case (Constants::BAM_TAG_TYPE_UINT16) : + destinationLength = 2; + break; + + // 4 byte data + case (Constants::BAM_TAG_TYPE_INT32) : + case (Constants::BAM_TAG_TYPE_UINT32) : + case (Constants::BAM_TAG_TYPE_FLOAT) : + destinationLength = 4; + break; + + // var-length types not supported for numeric destination + case (Constants::BAM_TAG_TYPE_STRING) : + case (Constants::BAM_TAG_TYPE_HEX) : + case (Constants::BAM_TAG_TYPE_ARRAY) : + SetErrorString("BamAlignment::GetTag", + "cannot store variable length tag data into a numeric destination"); + return false; + + // unrecognized tag type + default: + const std::string message = std::string("invalid tag type: ") + type; + SetErrorString("BamAlignment::GetTag", message); + return false; + } + + // store data in destination + destination = 0; + memcpy(&destination, pTagData, destinationLength); + + // return success + return true; +} + +template<> +inline bool BamAlignment::GetTag(const std::string& tag, + std::string& destination) const +{ + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? + return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? + return false; + } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // return failure if tag not found + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // otherwise copy data into destination + const unsigned int dataLength = strlen(pTagData); + destination.clear(); + destination.resize(dataLength); + memcpy( (char*)destination.data(), pTagData, dataLength ); + + // return success + return true; +} + +/*! \fn template bool GetTag(const std::string& tag, std::vector& destination) const + \brief Retrieves the numeric array associated with a BAM tag. + + \param tag[in] 2-character tag name + \param destination[out] retrieved values + \return \c true if found +*/ +template +inline bool BamAlignment::GetTag(const std::string& tag, std::vector& destination) const { + + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? + return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? + return false; + } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLength = TagData.size(); + unsigned int numBytesParsed = 0; + + // return false if tag not found + if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? + return false; + } + + // check that tag is array type + const char tagType = *(pTagData - 1); + if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) { + SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination"); + return false; + } + + // fetch element type + const char elementType = *pTagData; + if ( !TagTypeHelper::CanConvertFrom(elementType) ) { + // TODO: set error string ? + return false; + } + ++pTagData; + + // calculate length of each element in tag's array + int elementLength = 0; + switch ( elementType ) { + case (Constants::BAM_TAG_TYPE_ASCII) : + case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_UINT8) : + elementLength = sizeof(uint8_t); + break; + + case (Constants::BAM_TAG_TYPE_INT16) : + case (Constants::BAM_TAG_TYPE_UINT16) : + elementLength = sizeof(uint16_t); + break; + + case (Constants::BAM_TAG_TYPE_INT32) : + case (Constants::BAM_TAG_TYPE_UINT32) : + case (Constants::BAM_TAG_TYPE_FLOAT) : + elementLength = sizeof(uint32_t); + break; + + // var-length types not supported for numeric destination + case (Constants::BAM_TAG_TYPE_STRING) : + case (Constants::BAM_TAG_TYPE_HEX) : + case (Constants::BAM_TAG_TYPE_ARRAY) : + SetErrorString("BamAlignment::GetTag", + "invalid array data, variable-length elements are not allowed"); + return false; + + // unknown tag type + default: + const std::string message = std::string("invalid array element type: ") + elementType; + SetErrorString("BamAlignment::GetTag", message); + return false; + } + + // get number of elements + int32_t numElements; + memcpy(&numElements, pTagData, sizeof(int32_t)); + pTagData += 4; + destination.clear(); + destination.reserve(numElements); + + // read in elements + T value; + for ( int i = 0 ; i < numElements; ++i ) { + memcpy(&value, pTagData, sizeof(T)); + pTagData += sizeof(T); + destination.push_back(value); + } + + // return success + return true; +} + typedef std::vector BamAlignmentVector; } // namespace BamTools