X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamAlignment.h;h=9a8e7be0a7351a405317d6c0c281ad01c384a3c0;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=33175b5d10e5b5466da978ff11f0bfd714ea8ed5;hpb=11fabb69eb8c86635dd9498679b72bf78b3af3d1;p=bamtools.git diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h index 33175b5..9a8e7be 100644 --- a/src/api/BamAlignment.h +++ b/src/api/BamAlignment.h @@ -2,7 +2,7 @@ // BamAlignment.h (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 4 October 2011 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -10,24 +10,23 @@ #ifndef BAMALIGNMENT_H #define BAMALIGNMENT_H -#include -#include -#include -#include -#include +#include "api/api_global.h" +#include "api/BamAux.h" +#include "api/BamConstants.h" #include #include -#include #include #include namespace BamTools { -// forward declaration of BamAlignment's friend classes +//! \cond +// forward declaration of BamAlignment's "friends" namespace Internal { class BamReaderPrivate; class BamWriterPrivate; } // namespace Internal +//! \endcond // BamAlignment data structure struct API_EXPORT BamAlignment { @@ -40,73 +39,55 @@ struct API_EXPORT BamAlignment { // queries against alignment flags public: - bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate - bool IsFailedQC(void) const; // returns true if this read failed quality control - bool IsFirstMate(void) const; // returns true if alignment is first mate on read - bool IsMapped(void) const; // returns true if alignment is mapped - bool IsMateMapped(void) const; // returns true if alignment's mate is mapped - bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand - bool IsPaired(void) const; // returns true if alignment part of paired-end read - bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment - bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution - bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand - bool IsSecondMate(void) const; // returns true if alignment is second mate on read + bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate + bool IsFailedQC(void) const; // returns true if this read failed quality control + bool IsFirstMate(void) const; // returns true if alignment is first mate on read + bool IsMapped(void) const; // returns true if alignment is mapped + bool IsMateMapped(void) const; // returns true if alignment's mate is mapped + bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand + bool IsPaired(void) const; // returns true if alignment part of paired-end read + bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment + bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution + bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand + bool IsSecondMate(void) const; // returns true if alignment is second mate on read // manipulate alignment flags public: - void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag - void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag - void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag - void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag - void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag - void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag - void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag - void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag - void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag - void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag - void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag - - // legacy methods (consider deprecated, but still available) - void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped() - void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment() - void SetIsUnmapped(bool ok); // complement of using SetIsMapped() + void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag + void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag + void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag + void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag + void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag + void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag + void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag + void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag + void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag + void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag + void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag // tag data access methods public: // add a new tag - template bool AddTag(const std::string& tag, - const std::string& type, - const T& value); - template bool AddTag(const std::string& tag, - const std::vector& values); + template bool AddTag(const std::string& tag, const std::string& type, const T& value); + template bool AddTag(const std::string& tag, const std::vector& values); // edit (or append) tag - template bool EditTag(const std::string& tag, - const std::string& type, - const T& value); - template bool EditTag(const std::string& tag, - const std::vector& values); + template bool EditTag(const std::string& tag, const std::string& type, const T& value); + template bool EditTag(const std::string& tag, const std::vector& values); // retrieves tag data - template bool GetTag(const std::string& tag, - T& destination) const; - template bool GetTag(const std::string& tag, - std::vector& destination) const; + template bool GetTag(const std::string& tag, T& destination) const; + template bool GetTag(const std::string& tag, std::vector& destination) const; - // retrieves the BAM type-code for requested tag - // (returns whether or not tag exists, and type-code is valid) + // retrieves the SAM/BAM type-code for requested tag name bool GetTagType(const std::string& tag, char& type) const; - // legacy methods (consider deprecated, but still available) - bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag - bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag - // returns true if alignment has a record for this tag name bool HasTag(const std::string& tag) const; // removes a tag - bool RemoveTag(const std::string& tag); + void RemoveTag(const std::string& tag); // additional methods public: @@ -114,7 +95,10 @@ struct API_EXPORT BamAlignment { bool BuildCharData(void); // calculates alignment end position - int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; + int GetEndPosition(bool usePadded = false, bool closedInterval = false) const; + + // returns a description of the last error that occurred + std::string GetErrorString(void) const; // public data fields public: @@ -135,18 +119,18 @@ struct API_EXPORT BamAlignment { int32_t InsertSize; // mate-pair insert size std::string Filename; // name of BAM file which this alignment comes from - //! \cond + //! \internal // internal utility methods private: - static bool FindTag(const std::string& tag, - char*& pTagData, - const unsigned int& tagDataLength, - unsigned int& numBytesParsed); - static bool IsValidSize(const std::string& tag, - const std::string& type); - static bool SkipToNextTag(const char storageType, + bool FindTag(const std::string& tag, + char*& pTagData, + const unsigned int& tagDataLength, + unsigned int& numBytesParsed) const; + bool IsValidSize(const std::string& tag, const std::string& type) const; + void SetErrorString(const std::string& where, const std::string& what) const; + bool SkipToNextTag(const char storageType, char*& pTagData, - unsigned int& numBytesParsed); + unsigned int& numBytesParsed) const; // internal data private: @@ -173,25 +157,43 @@ struct API_EXPORT BamAlignment { BamAlignmentSupportData SupportData; friend class Internal::BamReaderPrivate; friend class Internal::BamWriterPrivate; - //! \endcond + + mutable std::string ErrorString; // mutable to allow updates even in logically const methods + //! \endinternal }; // --------------------------------------------------------- // BamAlignment tag access methods +/*! \fn bool AddTag(const std::string& tag, const std::string& type, const T& value) + \brief Adds a field to the BAM tags. + + Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead. + + \param[in] tag 2-character tag name + \param[in] type 1-character tag type + \param[in] value data to store + \return \c true if the \b new tag was added successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ template -inline bool BamAlignment::AddTag(const std::string& tag, - const std::string& type, - const T& value) -{ +inline bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const T& value) { + // if char data not populated, do that first if ( SupportData.HasCoreOnly ) BuildCharData(); - // validate tag/type size & that storage type code is OK for T - if ( !IsValidSize(tag, type) ) return false; - if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) + // check tag/type size + if ( !IsValidSize(tag, type) ) { + // TODO: set error string? + return false; + } + + // check that storage type code is OK for T + if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) { + // TODO: set error string? return false; + } // localize the tag data char* pTagData = (char*)TagData.data(); @@ -200,8 +202,10 @@ inline bool BamAlignment::AddTag(const std::string& tag, // if tag already exists, return false // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } // otherwise, convert value to string union { T value; char valueBuffer[sizeof(T)]; } un; @@ -209,20 +213,17 @@ inline bool BamAlignment::AddTag(const std::string& tag, // copy original tag data to temp buffer const std::string newTag = tag + type; - const int newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T - char* originalTagData = new char[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term + const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term // append newTag - strcat(originalTagData + tagDataLength, newTag.data()); - memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T)); + strcat(originalTagData.Buffer + tagDataLength, newTag.data()); + memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T)); // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; + const char* newTagData = (const char*)originalTagData.Buffer; TagData.assign(newTagData, newTagDataLength); - - // clean up & return success - delete[] originalTagData; return true; } @@ -235,10 +236,17 @@ inline bool BamAlignment::AddTag(const std::string& tag, if ( SupportData.HasCoreOnly ) BuildCharData(); - // validate tag/type size & that storage type code is OK for string - if ( !IsValidSize(tag, type) ) return false; - if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) + // check tag/type size + if ( !IsValidSize(tag, type) ) { + // TODO: set error string? return false; + } + + // check that storage type code is OK for string + if ( !TagTypeHelper::CanConvertTo(type.at(0)) ) { + // TODO: set error string? + return false; + } // localize the tag data char* pTagData = (char*)TagData.data(); @@ -247,31 +255,39 @@ inline bool BamAlignment::AddTag(const std::string& tag, // if tag already exists, return false // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } // otherwise, copy tag data to temp buffer const std::string newTag = tag + type + value; - const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term - char* originalTagData = new char[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term + const size_t newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term - // append newTag - strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term + // append newTag (removes original null-term, then appends newTag + null-term) + strcat(originalTagData.Buffer + tagDataLength, newTag.data()); // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; + const char* newTagData = (const char*)originalTagData.Buffer; TagData.assign(newTagData, newTagDataLength); - - // clean up & return success - delete[] originalTagData; return true; } +/*! \fn template bool AddTag(const std::string& tag, const std::vector& values) + \brief Adds a numeric array field to the BAM tags. + + Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead. + + \param[in] tag 2-character tag name + \param[in] values vector of data values to store + \return \c true if the \b new tag was added successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ template -inline bool BamAlignment::AddTag(const std::string& tag, - const std::vector& values) -{ +inline bool BamAlignment::AddTag(const std::string& tag, const std::vector& values) { + // if char data not populated, do that first if ( SupportData.HasCoreOnly ) BuildCharData(); @@ -287,8 +303,10 @@ inline bool BamAlignment::AddTag(const std::string& tag, // if tag already exists, return false // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } // build new tag's base information char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE]; @@ -301,36 +319,45 @@ inline bool BamAlignment::AddTag(const std::string& tag, memcpy(newTagBase + 4, &numElements, sizeof(int32_t)); // copy current TagData string to temp buffer, leaving room for new tag's contents - const int newTagDataLength = tagDataLength + - Constants::BAM_TAG_ARRAYBASE_SIZE + - numElements*sizeof(T); - char* originalTagData = new char[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term + const size_t newTagDataLength = tagDataLength + + Constants::BAM_TAG_ARRAYBASE_SIZE + + numElements*sizeof(T); + RaiiBuffer originalTagData(newTagDataLength); + memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term // write newTagBase (removes old null term) - strcat(originalTagData + tagDataLength, (const char*)newTagBase); + strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase); // add vector elements to tag int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE; for ( int i = 0 ; i < numElements; ++i ) { const T& value = values.at(i); - memcpy(originalTagData + elementsBeginOffset + i*sizeof(T), &value, sizeof(T)); + memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T)); } // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; + const char* newTagData = (const char*)originalTagData.Buffer; TagData.assign(newTagData, newTagDataLength); - - // cleanup & return success - delete[] originalTagData; return true; } +/*! \fn template bool EditTag(const std::string& tag, const std::string& type, const T& value) + \brief Edits a BAM tag field. + + If \a tag does not exist, a new entry is created. + + \param tag[in] 2-character tag name + \param type[in] 1-character tag type (must be "Z" or "H") + \param value[in] new data value + + \return \c true if the tag was modified/created successfully + + \sa BamAlignment::RemoveTag() + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ template -inline bool BamAlignment::EditTag(const std::string& tag, - const std::string& type, - const T& value) -{ +inline bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const T& value) { + // if char data not populated, do that first if ( SupportData.HasCoreOnly ) BuildCharData(); @@ -341,10 +368,20 @@ inline bool BamAlignment::EditTag(const std::string& tag, return AddTag(tag, type, value); } +/*! \fn template bool EditTag(const std::string& tag, const std::vector& values) + \brief Edits a BAM tag field containing a numeric array. + + If \a tag does not exist, a new entry is created. + + \param tag[in] 2-character tag name + \param value[in] vector of data values + + \return \c true if the tag was modified/created successfully + \sa \samSpecURL for more details on reserved tag names, supported tag types, etc. +*/ template -inline bool BamAlignment::EditTag(const std::string& tag, - const std::vector& values) -{ +inline bool BamAlignment::EditTag(const std::string& tag, const std::vector& values) { + // if char data not populated, do that first if ( SupportData.HasCoreOnly ) BuildCharData(); @@ -355,13 +392,28 @@ inline bool BamAlignment::EditTag(const std::string& tag, return AddTag(tag, values); } + +/*! \fn template bool GetTag(const std::string& tag, T& destination) const + \brief Retrieves the value associated with a BAM tag. + + \param tag[in] 2-character tag name + \param destination[out] retrieved value + \return \c true if found +*/ template -inline bool BamAlignment::GetTag(const std::string& tag, - T& destination) const -{ - // skip if core-only or no tags present - if ( SupportData.HasCoreOnly || TagData.empty() ) +inline bool BamAlignment::GetTag(const std::string& tag, T& destination) const { + + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? + return false; + } // localize the tag data char* pTagData = (char*)TagData.data(); @@ -369,13 +421,19 @@ inline bool BamAlignment::GetTag(const std::string& tag, unsigned int numBytesParsed = 0; // return failure if tag not found - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } - // otherwise try to copy data into destination + // fetch data type const char type = *(pTagData - 1); - if ( !TagTypeHelper::CanConvertFrom(type) ) + if ( !TagTypeHelper::CanConvertFrom(type) ) { + // TODO: set error string ? return false; + } + + // determine data length int destinationLength = 0; switch ( type ) { @@ -403,18 +461,18 @@ inline bool BamAlignment::GetTag(const std::string& tag, case (Constants::BAM_TAG_TYPE_STRING) : case (Constants::BAM_TAG_TYPE_HEX) : case (Constants::BAM_TAG_TYPE_ARRAY) : - std::cerr << "BamAlignment ERROR: cannot store tag of type " << type - << " in integer destination" << std::endl; + SetErrorString("BamAlignment::GetTag", + "cannot store variable length tag data into a numeric destination"); return false; // unrecognized tag type default: - std::cerr << "BamAlignment ERROR: unknown tag type encountered: " - << type << std::endl; + const std::string message = std::string("invalid tag type: ") + type; + SetErrorString("BamAlignment::GetTag", message); return false; } - // store in destination + // store data in destination destination = 0; memcpy(&destination, pTagData, destinationLength); @@ -426,9 +484,17 @@ template<> inline bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const { - // skip if core-only or no tags present - if ( SupportData.HasCoreOnly || TagData.empty() ) + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? + return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? return false; + } // localize the tag data char* pTagData = (char*)TagData.data(); @@ -436,8 +502,10 @@ inline bool BamAlignment::GetTag(const std::string& tag, unsigned int numBytesParsed = 0; // return failure if tag not found - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } // otherwise copy data into destination const unsigned int dataLength = strlen(pTagData); @@ -449,14 +517,27 @@ inline bool BamAlignment::GetTag(const std::string& tag, return true; } -// retrieves "binary-array" tag data +/*! \fn template bool GetTag(const std::string& tag, std::vector& destination) const + \brief Retrieves the numeric array associated with a BAM tag. + + \param tag[in] 2-character tag name + \param destination[out] retrieved values + \return \c true if found +*/ template -inline bool BamAlignment::GetTag(const std::string& tag, - std::vector& destination) const -{ - // skip if core-only or no tags present - if ( SupportData.HasCoreOnly || TagData.empty() ) +inline bool BamAlignment::GetTag(const std::string& tag, std::vector& destination) const { + + // skip if alignment is core-only + if ( SupportData.HasCoreOnly ) { + // TODO: set error string? + return false; + } + + // skip if no tags present + if ( TagData.empty() ) { + // TODO: set error string? return false; + } // localize the tag data char* pTagData = (char*)TagData.data(); @@ -464,22 +545,27 @@ inline bool BamAlignment::GetTag(const std::string& tag, unsigned int numBytesParsed = 0; // return false if tag not found - if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) + if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { + // TODO: set error string? return false; + } // check that tag is array type const char tagType = *(pTagData - 1); if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) { - std::cerr << "BamAlignment ERROR: Cannot store non-array data from tag: " - << tag << " in array destination" << std::endl; + SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination"); return false; } - // calculate length of each element in tag's array + // fetch element type const char elementType = *pTagData; - if ( !TagTypeHelper::CanConvertFrom(elementType) ) + if ( !TagTypeHelper::CanConvertFrom(elementType) ) { + // TODO: set error string ? return false; + } ++pTagData; + + // calculate length of each element in tag's array int elementLength = 0; switch ( elementType ) { case (Constants::BAM_TAG_TYPE_ASCII) : @@ -503,14 +589,14 @@ inline bool BamAlignment::GetTag(const std::string& tag, case (Constants::BAM_TAG_TYPE_STRING) : case (Constants::BAM_TAG_TYPE_HEX) : case (Constants::BAM_TAG_TYPE_ARRAY) : - std::cerr << "BamAlignment ERROR: array element type: " << elementType - << " cannot be stored in integer value" << std::endl; + SetErrorString("BamAlignment::GetTag", + "invalid array data, variable-length elements are not allowed"); return false; // unknown tag type default: - std::cerr << "BamAlignment ERROR: unknown element type encountered: " - << elementType << std::endl; + const std::string message = std::string("invalid array element type: ") + elementType; + SetErrorString("BamAlignment::GetTag", message); return false; }