struct CigarOp;\r
\r
struct BamAlignment {\r
-
- // constructors & destructor
- public:
- BamAlignment(void);
- BamAlignment(const BamAlignment& other);
- ~BamAlignment(void);
+\r
+ // constructors & destructor\r
+ public:\r
+ BamAlignment(void);\r
+ BamAlignment(const BamAlignment& other);\r
+ ~BamAlignment(void);\r
\r
// Queries against alignment flags\r
public: \r
void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag \r
void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag \r
void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag\r
-
- // Tag data access methods
+\r
+ // Tag data access methods\r
public:\r
bool GetEditDistance(uint8_t& editDistance) const; // get "NM" tag data - contributed by Aaron Quinlan\r
bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data\r
-
+ \r
+ bool GetTag(const std::string& tag, std::string& destination);\r
+ template<typename T> bool GetTag(const std::string& tag, T& destination);\r
+\r
// Additional data access methods\r
public:\r
- int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations
-
+ int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations\r
+\r
// 'internal' utility methods \r
private:\r
static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
int32_t InsertSize; // Mate-pair insert size\r
\r
- // Alignment flag query constants
+ // Alignment flag query constants\r
// Use the get/set methods above instead\r
private:\r
enum { PAIRED = 1\r
};\r
\r
struct RefData {\r
-
+ \r
// data members\r
std::string RefName; // Name of reference sequence\r
int32_t RefLength; // Length of reference sequence\r
bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence\r
-
+ \r
// constructor\r
- RefData(const int32_t& length = 0,
+ RefData(const int32_t& length = 0, \r
bool ok = false)\r
: RefLength(length)\r
, RefHasAlignments(ok)\r
// ----------------------------------------------------------------\r
// Indexing structs & typedefs\r
\r
-struct Chunk {
+struct Chunk {\r
\r
// data members\r
uint64_t Start;\r
uint64_t Stop;\r
-
+\r
// constructor\r
- Chunk(const uint64_t& start = 0,
+ Chunk(const uint64_t& start = 0, \r
const uint64_t& stop = 0)\r
: Start(start)\r
, Stop(stop)\r
};\r
\r
typedef std::vector<ReferenceIndex> BamIndex;\r
-
-// ----------------------------------------------------------------
-// BamAlignment member methods
-
-// constructors & destructor
-inline
-BamAlignment::BamAlignment(void) { }
-
-inline
-BamAlignment::BamAlignment(const BamAlignment& other)
- : Name(other.Name)
- , Length(other.Length)
- , QueryBases(other.QueryBases)
- , AlignedBases(other.AlignedBases)
- , Qualities(other.Qualities)
- , TagData(other.TagData)
- , RefID(other.RefID)
- , Position(other.Position)
- , Bin(other.Bin)
- , MapQuality(other.MapQuality)
- , AlignmentFlag(other.AlignmentFlag)
- , CigarData(other.CigarData)
- , MateRefID(other.MateRefID)
- , MatePosition(other.MatePosition)
- , InsertSize(other.InsertSize)
-{ }
-
-inline
-BamAlignment::~BamAlignment(void) { }
-
+\r
+// ----------------------------------------------------------------\r
+// BamAlignment member methods\r
+\r
+// constructors & destructor\r
+inline \r
+BamAlignment::BamAlignment(void) { }\r
+\r
+inline \r
+BamAlignment::BamAlignment(const BamAlignment& other)\r
+ : Name(other.Name)\r
+ , Length(other.Length)\r
+ , QueryBases(other.QueryBases)\r
+ , AlignedBases(other.AlignedBases)\r
+ , Qualities(other.Qualities)\r
+ , TagData(other.TagData)\r
+ , RefID(other.RefID)\r
+ , Position(other.Position)\r
+ , Bin(other.Bin)\r
+ , MapQuality(other.MapQuality)\r
+ , AlignmentFlag(other.AlignmentFlag)\r
+ , CigarData(other.CigarData)\r
+ , MateRefID(other.MateRefID)\r
+ , MatePosition(other.MatePosition)\r
+ , InsertSize(other.InsertSize)\r
+{ }\r
+\r
+inline \r
+BamAlignment::~BamAlignment(void) { }\r
+\r
// Queries against alignment flags\r
inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
inline bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }\r
inline bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }\r
inline bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }\r
inline bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }\r
-inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
-
+inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }\r
+\r
// Manipulate alignment flags \r
inline void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }\r
inline void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }\r
inline void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }\r
inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }\r
inline void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }\r
-inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
-
-// calculates alignment end position, based on starting position and CIGAR operations
+inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }\r
+\r
+// calculates alignment end position, based on starting position and CIGAR operations\r
inline \r
int BamAlignment::GetEndPosition(bool usePadded) const {\r
\r
const char cigarType = (*cigarIter).Type;\r
if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
alignEnd += (*cigarIter).Length;\r
- }
- else if ( usePadded && cigarType == 'I' ) {
- alignEnd += (*cigarIter).Length;
+ } \r
+ else if ( usePadded && cigarType == 'I' ) {\r
+ alignEnd += (*cigarIter).Length;\r
}\r
}\r
return alignEnd;\r
}\r
\r
-// get "NM" tag data - contributed by Aaron Quinlan
-// stores data in 'editDistance', returns success/fail
+// get "NM" tag data - contributed by Aaron Quinlan\r
+// stores data in 'editDistance', returns success/fail\r
inline \r
bool BamAlignment::GetEditDistance(uint8_t& editDistance) const {\r
\r
// assign the editDistance value\r
std::memcpy(&editDistance, pTagData, 1);\r
return true;\r
-}
-
-// get "RG" tag data
-// stores data in 'readGroup', returns success/fail
+}\r
+\r
+// get "RG" tag data\r
+// stores data in 'readGroup', returns success/fail\r
inline \r
bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
\r
readGroup.resize(readGroupLen);\r
std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
return true;\r
-}
-
-inline
+}\r
+\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, std::string& destination) {\r
+ \r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundReadGroupTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+ foundReadGroupTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+\r
+ // return if the read group tag was not present\r
+ if ( !foundReadGroupTag ) { return false; }\r
+\r
+ // assign the read group\r
+ const unsigned int dataLen = std::strlen(pTagData);\r
+ destination.resize(dataLen);\r
+ std::memcpy( (char*)destination.data(), pTagData, dataLen );\r
+ return true;\r
+}\r
+\r
+template<typename T> \r
+bool BamAlignment::GetTag(const std::string& tag, T& destination) {\r
+ \r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundDesiredTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+ foundDesiredTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+ // return if the edit distance tag was not present\r
+ if ( !foundDesiredTag ) { return false; }\r
+\r
+ // assign the editDistance value\r
+ std::memcpy(&destination, pTagData, sizeof(T));\r
+ return true;\r
+}\r
+\r
+inline\r
void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
-
+ \r
switch(storageType) {\r
\r
case 'A':\r
printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
exit(1);\r
}\r
-}
+}\r
\r
// ----------------------------------------------------------------\r
// Added: 3-35-2010 DWB\r
// swaps endianness of 16-bit value 'in place'\r
inline void SwapEndian_16(int16_t& x) {\r
x = ((x >> 8) | (x << 8));\r
-}
+}\r
\r
inline void SwapEndian_16(uint16_t& x) {\r
x = ((x >> 8) | (x << 8));\r
((x >> 8) & 0x0000FF00) | \r
(x << 24)\r
);\r
-}
+}\r
\r
inline void SwapEndian_32(uint32_t& x) {\r
x = ( (x >> 24) | \r
(x << 56)\r
);\r
}\r
-
+\r
// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
inline void SwapEndian_16p(char* data) {\r
uint16_t& value = (uint16_t&)*data; \r
SwapEndian_16(value);\r
}\r
-
+\r
// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
inline void SwapEndian_32p(char* data) {\r
uint32_t& value = (uint32_t&)*data; \r
SwapEndian_32(value);\r
}\r
-
+\r
// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
inline void SwapEndian_64p(char* data) {\r
uint64_t& value = (uint64_t&)*data; \r