From: Derek Date: Tue, 25 May 2010 17:12:27 +0000 (-0400) Subject: Added templated GetTag() method, compile tested, but not runtime tested X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=8a0328ccbd7db4283b113e1f3480fd5ac334dbbc;p=bamtools.git Added templated GetTag() method, compile tested, but not runtime tested --- diff --git a/BamAux.h b/BamAux.h index 2ce2733..3d14a46 100644 --- a/BamAux.h +++ b/BamAux.h @@ -65,12 +65,12 @@ const int BT_SIZEOF_INT = 4; struct CigarOp; struct BamAlignment { - - // constructors & destructor - public: - BamAlignment(void); - BamAlignment(const BamAlignment& other); - ~BamAlignment(void); + + // constructors & destructor + public: + BamAlignment(void); + BamAlignment(const BamAlignment& other); + ~BamAlignment(void); // Queries against alignment flags public: @@ -99,16 +99,19 @@ struct BamAlignment { void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag - - // Tag data access methods + + // Tag data access methods public: bool GetEditDistance(uint8_t& editDistance) const; // get "NM" tag data - contributed by Aaron Quinlan bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data - + + bool GetTag(const std::string& tag, std::string& destination); + template bool GetTag(const std::string& tag, T& destination); + // Additional data access methods public: - int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations - + int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations + // 'internal' utility methods private: static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed); @@ -131,7 +134,7 @@ struct BamAlignment { int32_t MatePosition; // Position (0-based) where alignment's mate starts int32_t InsertSize; // Mate-pair insert size - // Alignment flag query constants + // Alignment flag query constants // Use the get/set methods above instead private: enum { PAIRED = 1 @@ -157,14 +160,14 @@ struct CigarOp { }; struct RefData { - + // data members std::string RefName; // Name of reference sequence int32_t RefLength; // Length of reference sequence bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence - + // constructor - RefData(const int32_t& length = 0, + RefData(const int32_t& length = 0, bool ok = false) : RefLength(length) , RefHasAlignments(ok) @@ -177,14 +180,14 @@ typedef std::vector BamAlignmentVector; // ---------------------------------------------------------------- // Indexing structs & typedefs -struct Chunk { +struct Chunk { // data members uint64_t Start; uint64_t Stop; - + // constructor - Chunk(const uint64_t& start = 0, + Chunk(const uint64_t& start = 0, const uint64_t& stop = 0) : Start(start) , Stop(stop) @@ -213,36 +216,36 @@ struct ReferenceIndex { }; typedef std::vector BamIndex; - -// ---------------------------------------------------------------- -// BamAlignment member methods - -// constructors & destructor -inline -BamAlignment::BamAlignment(void) { } - -inline -BamAlignment::BamAlignment(const BamAlignment& other) - : Name(other.Name) - , Length(other.Length) - , QueryBases(other.QueryBases) - , AlignedBases(other.AlignedBases) - , Qualities(other.Qualities) - , TagData(other.TagData) - , RefID(other.RefID) - , Position(other.Position) - , Bin(other.Bin) - , MapQuality(other.MapQuality) - , AlignmentFlag(other.AlignmentFlag) - , CigarData(other.CigarData) - , MateRefID(other.MateRefID) - , MatePosition(other.MatePosition) - , InsertSize(other.InsertSize) -{ } - -inline -BamAlignment::~BamAlignment(void) { } - + +// ---------------------------------------------------------------- +// BamAlignment member methods + +// constructors & destructor +inline +BamAlignment::BamAlignment(void) { } + +inline +BamAlignment::BamAlignment(const BamAlignment& other) + : Name(other.Name) + , Length(other.Length) + , QueryBases(other.QueryBases) + , AlignedBases(other.AlignedBases) + , Qualities(other.Qualities) + , TagData(other.TagData) + , RefID(other.RefID) + , Position(other.Position) + , Bin(other.Bin) + , MapQuality(other.MapQuality) + , AlignmentFlag(other.AlignmentFlag) + , CigarData(other.CigarData) + , MateRefID(other.MateRefID) + , MatePosition(other.MatePosition) + , InsertSize(other.InsertSize) +{ } + +inline +BamAlignment::~BamAlignment(void) { } + // Queries against alignment flags inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); } inline bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); } @@ -254,8 +257,8 @@ inline bool BamAlignment::IsPaired(void) const { return ( (AlignmentF inline bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); } inline bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); } inline bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); } -inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } - +inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } + // Manipulate alignment flags inline void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; } inline void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; } @@ -267,9 +270,9 @@ inline void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFl inline void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; } inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; } inline void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; } -inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; } - -// calculates alignment end position, based on starting position and CIGAR operations +inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; } + +// calculates alignment end position, based on starting position and CIGAR operations inline int BamAlignment::GetEndPosition(bool usePadded) const { @@ -283,16 +286,16 @@ int BamAlignment::GetEndPosition(bool usePadded) const { const char cigarType = (*cigarIter).Type; if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) { alignEnd += (*cigarIter).Length; - } - else if ( usePadded && cigarType == 'I' ) { - alignEnd += (*cigarIter).Length; + } + else if ( usePadded && cigarType == 'I' ) { + alignEnd += (*cigarIter).Length; } } return alignEnd; } -// get "NM" tag data - contributed by Aaron Quinlan -// stores data in 'editDistance', returns success/fail +// get "NM" tag data - contributed by Aaron Quinlan +// stores data in 'editDistance', returns success/fail inline bool BamAlignment::GetEditDistance(uint8_t& editDistance) const { @@ -328,10 +331,10 @@ bool BamAlignment::GetEditDistance(uint8_t& editDistance) const { // assign the editDistance value std::memcpy(&editDistance, pTagData, 1); return true; -} - -// get "RG" tag data -// stores data in 'readGroup', returns success/fail +} + +// get "RG" tag data +// stores data in 'readGroup', returns success/fail inline bool BamAlignment::GetReadGroup(std::string& readGroup) const { @@ -370,11 +373,88 @@ bool BamAlignment::GetReadGroup(std::string& readGroup) const { readGroup.resize(readGroupLen); std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen ); return true; -} - -inline +} + +inline +bool BamAlignment::GetTag(const std::string& tag, std::string& destination) { + + if ( TagData.empty() ) { return false; } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLen = TagData.size(); + unsigned int numBytesParsed = 0; + + bool foundReadGroupTag = false; + while( numBytesParsed < tagDataLen ) { + + const char* pTagType = pTagData; + const char* pTagStorageType = pTagData + 2; + pTagData += 3; + numBytesParsed += 3; + + // check the current tag + if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) { + foundReadGroupTag = true; + break; + } + + // get the storage class and find the next tag + if (*pTagStorageType == '\0') { return false; } + SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed ); + if (*pTagData == '\0') { return false; } + } + + // return if the read group tag was not present + if ( !foundReadGroupTag ) { return false; } + + // assign the read group + const unsigned int dataLen = std::strlen(pTagData); + destination.resize(dataLen); + std::memcpy( (char*)destination.data(), pTagData, dataLen ); + return true; +} + +template +bool BamAlignment::GetTag(const std::string& tag, T& destination) { + + if ( TagData.empty() ) { return false; } + + // localize the tag data + char* pTagData = (char*)TagData.data(); + const unsigned int tagDataLen = TagData.size(); + unsigned int numBytesParsed = 0; + + bool foundDesiredTag = false; + while( numBytesParsed < tagDataLen ) { + + const char* pTagType = pTagData; + const char* pTagStorageType = pTagData + 2; + pTagData += 3; + numBytesParsed += 3; + + // check the current tag + if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) { + foundDesiredTag = true; + break; + } + + // get the storage class and find the next tag + if (*pTagStorageType == '\0') { return false; } + SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed ); + if (*pTagData == '\0') { return false; } + } + // return if the edit distance tag was not present + if ( !foundDesiredTag ) { return false; } + + // assign the editDistance value + std::memcpy(&destination, pTagData, sizeof(T)); + return true; +} + +inline void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) { - + switch(storageType) { case 'A': @@ -415,7 +495,7 @@ void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsign printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData); exit(1); } -} +} // ---------------------------------------------------------------- // Added: 3-35-2010 DWB @@ -431,7 +511,7 @@ inline bool SystemIsBigEndian(void) { // swaps endianness of 16-bit value 'in place' inline void SwapEndian_16(int16_t& x) { x = ((x >> 8) | (x << 8)); -} +} inline void SwapEndian_16(uint16_t& x) { x = ((x >> 8) | (x << 8)); @@ -444,7 +524,7 @@ inline void SwapEndian_32(int32_t& x) { ((x >> 8) & 0x0000FF00) | (x << 24) ); -} +} inline void SwapEndian_32(uint32_t& x) { x = ( (x >> 24) | @@ -478,19 +558,19 @@ inline void SwapEndian_64(uint64_t& x) { (x << 56) ); } - + // swaps endianness of 'next 2 bytes' in a char buffer (in-place) inline void SwapEndian_16p(char* data) { uint16_t& value = (uint16_t&)*data; SwapEndian_16(value); } - + // swaps endianness of 'next 4 bytes' in a char buffer (in-place) inline void SwapEndian_32p(char* data) { uint32_t& value = (uint32_t&)*data; SwapEndian_32(value); } - + // swaps endianness of 'next 8 bytes' in a char buffer (in-place) inline void SwapEndian_64p(char* data) { uint64_t& value = (uint64_t&)*data;