// ***************************************************************************\r
-// BamAux.h (c) 2009 Derek Barnett, Michael Strömberg\r
+// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 1 October 2009 (DB)\r
+// Last modified: 21 July 2010 (DB)\r
// ---------------------------------------------------------------------------\r
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad\r
-// Institute.\r
-// ---------------------------------------------------------------------------\r
-// Defines common constants, typedefs, & data structures for BamTools.\r
+// Provides the basic constants, data structures, etc. for using BAM files\r
// ***************************************************************************\r
\r
-/*! \r
- \file BamAux.h\r
- \brief BamTools constants, typedefs, & data structures\r
-*/\r
+#ifndef BAMAUX_H\r
+#define BAMAUX_H\r
\r
-#pragma once\r
+// C inclues\r
+#include <cctype>\r
+#include <cstdio>\r
+#include <cstdlib>\r
+#include <cstring>\r
\r
// C++ includes\r
#include <exception>\r
+#include <map>\r
#include <string>\r
#include <utility>\r
#include <vector>\r
\r
-// C includes\r
-#include <cstdio>\r
-#include <cstdlib>\r
-#include <cstring>\r
-\r
// Platform-specific type definitions\r
-#ifdef WIN32\r
- typedef char int8_t;\r
- typedef unsigned char uint8_t;\r
- typedef short int16_t;\r
- typedef unsigned short uint16_t;\r
- typedef int int32_t;\r
- typedef unsigned int uint32_t;\r
- typedef long long int64_t;\r
- typedef unsigned long long uint64_t;\r
-#else\r
- #include <stdint.h>\r
-#endif\r
-\r
-//! \namespace BamTools\r
+#ifndef BAMTOOLS_TYPES\r
+#define BAMTOOLS_TYPES\r
+ #ifdef _MSC_VER\r
+ typedef char int8_t;\r
+ typedef unsigned char uint8_t;\r
+ typedef short int16_t;\r
+ typedef unsigned short uint16_t;\r
+ typedef int int32_t;\r
+ typedef unsigned int uint32_t;\r
+ typedef long long int64_t;\r
+ typedef unsigned long long uint64_t;\r
+ #else\r
+ #include <stdint.h>\r
+ #endif\r
+#endif // BAMTOOLS_TYPES\r
+\r
namespace BamTools {\r
\r
- //! \cond\r
- // --------------------------------------------------------------------------------------\r
- // This section is purely internal and can be excluded from main generated documentation.\r
- \r
- // zlib constants\r
- const int GZIP_ID1 = 31;\r
- const int GZIP_ID2 = 139;\r
- const int CM_DEFLATE = 8;\r
- const int FLG_FEXTRA = 4;\r
- const int OS_UNKNOWN = 255;\r
- const int BGZF_XLEN = 6;\r
- const int BGZF_ID1 = 66;\r
- const int BGZF_ID2 = 67;\r
- const int BGZF_LEN = 2;\r
- const int GZIP_WINDOW_BITS = -15;\r
- const int Z_DEFAULT_MEM_LEVEL = 8;\r
-\r
- // BZGF constants\r
- const int BLOCK_HEADER_LENGTH = 18;\r
- const int BLOCK_FOOTER_LENGTH = 8;\r
- const int MAX_BLOCK_SIZE = 65536;\r
- const int DEFAULT_BLOCK_SIZE = 65536;\r
-\r
- // BAM constants\r
- const unsigned int BAM_CORE_SIZE = 32;\r
- const int BAM_CMATCH = 0;\r
- const int BAM_CINS = 1;\r
- const int BAM_CDEL = 2;\r
- const int BAM_CREF_SKIP = 3;\r
- const int BAM_CSOFT_CLIP = 4;\r
- const int BAM_CHARD_CLIP = 5;\r
- const int BAM_CPAD = 6;\r
- const int BAM_CIGAR_SHIFT = 4;\r
- const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);\r
-\r
- // BAM index constants\r
- const int MAX_BIN = 37450; // =(8^6-1)/7+1\r
- const int BAM_MIN_CHUNK_GAP = 32768;\r
- const int BAM_LIDX_SHIFT = 14;\r
-\r
- // Explicit variable sizes\r
- const int BT_SIZEOF_INT = 4;\r
- \r
- struct BgzfData {\r
- unsigned int UncompressedBlockSize;\r
- unsigned int CompressedBlockSize;\r
- unsigned int BlockLength;\r
- unsigned int BlockOffset;\r
- uint64_t BlockAddress;\r
- bool IsOpen;\r
- FILE* Stream;\r
- char* UncompressedBlock;\r
- char* CompressedBlock;\r
- \r
- // constructor\r
- BgzfData(void)\r
- : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)\r
- , CompressedBlockSize(MAX_BLOCK_SIZE)\r
- , BlockLength(0)\r
- , BlockOffset(0)\r
- , BlockAddress(0)\r
- , IsOpen(false)\r
- , Stream(NULL)\r
- , UncompressedBlock(NULL)\r
- , CompressedBlock(NULL)\r
- {\r
- try {\r
- CompressedBlock = new char[CompressedBlockSize];\r
- UncompressedBlock = new char[UncompressedBlockSize];\r
- } catch( std::bad_alloc& ba ) {\r
- printf("ERROR: Unable to allocate memory for our BGZF object.\n");\r
- exit(1);\r
- }\r
- }\r
- \r
- // destructor\r
- ~BgzfData(void) {\r
- if(CompressedBlock) delete [] CompressedBlock;\r
- if(UncompressedBlock) delete [] UncompressedBlock;\r
- }\r
- };\r
- //! \endcond\r
- \r
- // --------------------------------------------------------------------------------------\r
- // Data structures\r
- \r
- //! \brief Cigar operation data structure\r
- struct CigarOp {\r
- char Type; //!< Operation type (MIDNSHP)\r
- uint32_t Length; //!< Operation length (number of bases)\r
- \r
- };\r
-\r
- //! Reference sequence data structure\r
- struct RefData {\r
- std::string RefName; //!< Name of reference sequence\r
- unsigned int RefLength; //!< Length of reference sequence\r
- bool RefHasAlignments; //!< True if BAM file contains alignments mapped to reference sequence\r
- \r
- // constructor\r
- RefData(void)\r
- : RefLength(0)\r
- , RefHasAlignments(false)\r
- { }\r
- };\r
-\r
- //! BAM alignment data structure\r
- struct BamAlignment {\r
- \r
- // Queries against alignment flag\r
- public:\r
- //! Returns true if this read is a PCR duplicate (determined by external app)\r
- bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
- //! Returns true if this read failed quality control (determined by external app)\r
- bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); } \r
- //! Returns true if alignment is first mate on read\r
- bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }\r
- //! Returns true if alignment is mapped \r
- bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }\r
- //! Returns true if alignment's mate is mapped\r
- bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); } \r
- //! Returns true if alignment's mate mapped to reverse strand\r
- bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }\r
- //! Returns true if alignment part of paired-end read\r
- bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); } \r
- //! Returns true if this position is primary alignment (determined by external app)\r
- bool IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); } \r
- //! Returns true if alignment is part of read that satisfied paired-end resolution (determined by external app)\r
- bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); } \r
- //! Returns true if alignment mapped to reverse strand\r
- bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); } \r
- //! Returns true if alignment is second mate on read\r
- bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } \r
- \r
- public:\r
- /*! \r
- \brief Get alignment's read group text.\r
- \r
- Assigns read group text to readGroup.\r
- \r
- \return True if read group data successfully retrieved.\r
- */\r
- bool GetReadGroup(std::string& readGroup) const {\r
- \r
- if ( TagData.empty() ) { return false; }\r
- \r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLen = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- bool foundReadGroupTag = false;\r
- while( numBytesParsed < tagDataLen ) {\r
- \r
- const char* pTagType = pTagData;\r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
- \r
- // check the current tag\r
- if ( strncmp(pTagType, "RG", 2) == 0 ) {\r
- foundReadGroupTag = true;\r
- break;\r
- }\r
- \r
- // get the storage class and find the next tag\r
- SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
- }\r
- \r
- // return if the read group tag was not present\r
- if ( !foundReadGroupTag ) { return false; }\r
- \r
- // assign the read group\r
- const unsigned int readGroupLen = strlen(pTagData);\r
- readGroup.resize(readGroupLen);\r
- memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
- return true;\r
- }\r
- \r
- private:\r
- // skips to the next tag\r
- static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
- switch(storageType) {\r
- \r
- case 'A':\r
- case 'c':\r
- case 'C':\r
- ++numBytesParsed;\r
- ++pTagData;\r
- break;\r
- \r
- case 's':\r
- case 'S':\r
- case 'f':\r
- numBytesParsed += 2;\r
- pTagData += 2;\r
- break;\r
- \r
- case 'i':\r
- case 'I':\r
- numBytesParsed += 4;\r
- pTagData += 4;\r
- break;\r
- \r
- case 'Z':\r
- case 'H':\r
- while(*pTagData) {\r
- ++numBytesParsed;\r
- ++pTagData;\r
- }\r
- break;\r
- \r
- default:\r
- printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
- exit(1);\r
- }\r
- }\r
- \r
- // Data members\r
- public:\r
- std::string Name; //!< Read name\r
- unsigned int Length; //!< Query length\r
- std::string QueryBases; //!< 'Original' sequence (as reported from sequencing machine)\r
- std::string AlignedBases; //!< 'Aligned' sequence (includes any indels, padding, clipping) \r
- std::string Qualities; //!< FASTQ qualities (ASCII characters, not numeric values)\r
- std::string TagData; //!< Tag data (accessor methods will pull the requested information out)\r
- //unsigned int RefID; //!< ID number for reference sequence\r
- //unsigned int Position; //!< Position (0-based) where alignment starts \r
- signed int RefID; //!< ID number for reference sequence (-1)\r
- signed int Position; //!< Position (0-based) where alignment starts (-1) \r
- unsigned int Bin; //!< Bin in BAM file where this alignment resides\r
- unsigned int MapQuality; //!< Mapping quality score \r
- unsigned int AlignmentFlag; //!< Alignment bit-flag - see Is<something>() methods for available queries\r
- std::vector<CigarOp> CigarData; //!< CIGAR operations for this alignment\r
- //unsigned int MateRefID; //!< ID number for reference sequence where alignment's mate was aligned\r
- //unsigned int MatePosition; //!< Position (0-based) where alignment's mate starts\r
- //unsigned int InsertSize; //!< Mate-pair insert size\r
- signed int MateRefID; //!< ID number for reference sequence where alignment's mate was aligned (-1)\r
- signed int MatePosition; //!< Position (0-based) where alignment's mate starts (-1)\r
- signed int InsertSize; //!< Mate-pair insert size(0)\r
- \r
- // Alignment flag query constants\r
- private:\r
- enum { PAIRED = 1,\r
- PROPER_PAIR = 2,\r
- UNMAPPED = 4,\r
- MATE_UNMAPPED = 8,\r
- REVERSE = 16,\r
- MATE_REVERSE = 32,\r
- READ_1 = 64,\r
- READ_2 = 128,\r
- SECONDARY = 256,\r
- QC_FAILED = 512,\r
- DUPLICATE = 1024\r
- };\r
- };\r
-\r
- // ----------------------------------------------------------------\r
- // Typedefs\r
- \r
- /*!\r
- \typedef RefVector\r
- \brief Vector of RefData objects\r
- */\r
- typedef std::vector<RefData> RefVector;\r
- \r
- /*! \r
- \typedef BamAlignmentVector\r
- \brief Vector of BamAlignments\r
- */\r
- typedef std::vector< BamAlignment > BamAlignmentVector;\r
- \r
- //! \cond\r
- // ----------------------------------------------------------------\r
- // Typedefs (internal - can exclude from main documentation)\r
- \r
- //Offsets for linear indexing\r
- typedef std::vector<uint64_t> LinearOffsetVector;\r
-\r
- // Alignment 'chunk' boundaries\r
- typedef std::pair<uint64_t, uint64_t> ChunkPair;\r
- // Vector of alignment 'chunks'\r
- typedef std::vector<ChunkPair> ChunkVector;\r
-\r
- // BAM bin contains a bin ID & a vector of alignment 'chunks'\r
- typedef std::pair<uint32_t, ChunkVector*> BamBin;\r
- // Vector of BAM bins\r
- typedef std::vector<BamBin> BinVector;\r
-\r
- // Reference sequence index data\r
- typedef std::pair<BinVector*, LinearOffsetVector*> RefIndex;\r
-\r
- // Full BAM file index data structure \r
- typedef std::vector<RefIndex*> BamIndex;\r
- // ----------------------------------------------------------------\r
- //! \endcond\r
+// BAM constants\r
+const int BAM_CORE_SIZE = 32;\r
+const int BAM_CMATCH = 0;\r
+const int BAM_CINS = 1;\r
+const int BAM_CDEL = 2;\r
+const int BAM_CREF_SKIP = 3;\r
+const int BAM_CSOFT_CLIP = 4;\r
+const int BAM_CHARD_CLIP = 5;\r
+const int BAM_CPAD = 6;\r
+const int BAM_CIGAR_SHIFT = 4;\r
+const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);\r
+\r
+// BAM index constants\r
+const int MAX_BIN = 37450; // =(8^6-1)/7+1\r
+const int BAM_MIN_CHUNK_GAP = 32768;\r
+const int BAM_LIDX_SHIFT = 14;\r
+\r
+// Explicit variable sizes\r
+const int BT_SIZEOF_INT = 4;\r
+\r
+struct CigarOp;\r
+\r
+struct BamAlignment {\r
+\r
+ // constructors & destructor\r
+ public:\r
+ BamAlignment(void);\r
+ BamAlignment(const BamAlignment& other);\r
+ ~BamAlignment(void);\r
+\r
+ // Queries against alignment flags\r
+ public: \r
+ bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate \r
+ bool IsFailedQC(void) const; // Returns true if this read failed quality control \r
+ bool IsFirstMate(void) const; // Returns true if alignment is first mate on read \r
+ bool IsMapped(void) const; // Returns true if alignment is mapped \r
+ bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped \r
+ bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand \r
+ bool IsPaired(void) const; // Returns true if alignment part of paired-end read \r
+ bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment \r
+ bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution \r
+ bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand\r
+ bool IsSecondMate(void) const; // Returns true if alignment is second mate on read\r
+\r
+ // Manipulate alignment flags\r
+ public: \r
+ void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag \r
+ void SetIsFailedQC(bool ok); // Sets "failed quality control" flag \r
+ void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag \r
+ void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag \r
+ void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag \r
+ void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag \r
+ void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag \r
+ void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag \r
+ void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag \r
+ void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag \r
+ void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag\r
+\r
+ // Tag data access methods\r
+ public:\r
+ bool GetEditDistance(uint8_t& editDistance) const; // get "NM" tag data - contributed by Aaron Quinlan\r
+ bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data\r
+ \r
+ bool GetTag(const std::string& tag, std::string& destination);\r
+ template<typename T> bool GetTag(const std::string& tag, T& destination);\r
+\r
+ // Additional data access methods\r
+ public:\r
+ int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations\r
+\r
+ // 'internal' utility methods \r
+ private:\r
+ static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
+\r
+ // Data members\r
+ public:\r
+ std::string Name; // Read name\r
+ int32_t Length; // Query length\r
+ std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)\r
+ std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)\r
+ std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\r
+ std::string TagData; // Tag data (accessor methods will pull the requested information out)\r
+ int32_t RefID; // ID number for reference sequence\r
+ int32_t Position; // Position (0-based) where alignment starts\r
+ uint16_t Bin; // Bin in BAM file where this alignment resides\r
+ uint16_t MapQuality; // Mapping quality score\r
+ uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
+ std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
+ int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned\r
+ int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
+ int32_t InsertSize; // Mate-pair insert size\r
+ \r
+ \r
+ struct BamAlignmentSupportData {\r
+ \r
+ // data members\r
+ std::string AllCharData;\r
+ uint32_t BlockLength;\r
+ uint32_t NumCigarOperations;\r
+ uint32_t QueryNameLength;\r
+ uint32_t QuerySequenceLength;\r
+ bool HasCoreOnly;\r
+ \r
+ // constructor\r
+ BamAlignmentSupportData(void)\r
+ : BlockLength(0)\r
+ , NumCigarOperations(0)\r
+ , QueryNameLength(0)\r
+ , QuerySequenceLength(0)\r
+ , HasCoreOnly(false)\r
+ { }\r
+ };\r
+ \r
+ BamAlignmentSupportData SupportData; // Contains raw character data & lengths \r
+\r
+ // Alignment flag query constants\r
+ // Use the get/set methods above instead\r
+ private:\r
+ enum { PAIRED = 1\r
+ , PROPER_PAIR = 2\r
+ , UNMAPPED = 4\r
+ , MATE_UNMAPPED = 8\r
+ , REVERSE = 16\r
+ , MATE_REVERSE = 32\r
+ , READ_1 = 64\r
+ , READ_2 = 128\r
+ , SECONDARY = 256\r
+ , QC_FAILED = 512\r
+ , DUPLICATE = 1024 \r
+ };\r
+};\r
+\r
+// ----------------------------------------------------------------\r
+// Auxiliary data structs & typedefs\r
+\r
+struct CigarOp {\r
+ \r
+ // data members\r
+ char Type; // Operation type (MIDNSHP)\r
+ uint32_t Length; // Operation length (number of bases)\r
+ \r
+ // constructor\r
+ CigarOp(const char type = '\0', \r
+ const uint32_t length = 0) \r
+ : Type(type)\r
+ , Length(length) \r
+ { }\r
+};\r
+\r
+struct RefData {\r
+ \r
+ // data members\r
+ std::string RefName; // Name of reference sequence\r
+ int32_t RefLength; // Length of reference sequence\r
+ bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence\r
+ \r
+ // constructor\r
+ RefData(const int32_t& length = 0, \r
+ bool ok = false)\r
+ : RefLength(length)\r
+ , RefHasAlignments(ok)\r
+ { }\r
+};\r
+\r
+typedef std::vector<RefData> RefVector;\r
+typedef std::vector<BamAlignment> BamAlignmentVector;\r
+\r
+struct BamRegion {\r
+ \r
+ // data members\r
+ int LeftRefID;\r
+ int LeftPosition;\r
+ int RightRefID;\r
+ int RightPosition;\r
+ \r
+ // constructor\r
+ BamRegion(const int& leftID = -1, \r
+ const int& leftPos = -1,\r
+ const int& rightID = -1,\r
+ const int& rightPos = -1)\r
+ : LeftRefID(leftID)\r
+ , LeftPosition(leftPos)\r
+ , RightRefID(rightID)\r
+ , RightPosition(rightPos)\r
+ { }\r
+};\r
+\r
+// ----------------------------------------------------------------\r
+// BamAlignment member methods\r
+\r
+// constructors & destructor\r
+inline \r
+BamAlignment::BamAlignment(void) { }\r
+\r
+inline \r
+BamAlignment::BamAlignment(const BamAlignment& other)\r
+ : Name(other.Name)\r
+ , Length(other.Length)\r
+ , QueryBases(other.QueryBases)\r
+ , AlignedBases(other.AlignedBases)\r
+ , Qualities(other.Qualities)\r
+ , TagData(other.TagData)\r
+ , RefID(other.RefID)\r
+ , Position(other.Position)\r
+ , Bin(other.Bin)\r
+ , MapQuality(other.MapQuality)\r
+ , AlignmentFlag(other.AlignmentFlag)\r
+ , CigarData(other.CigarData)\r
+ , MateRefID(other.MateRefID)\r
+ , MatePosition(other.MatePosition)\r
+ , InsertSize(other.InsertSize)\r
+ , SupportData(other.SupportData)\r
+{ }\r
+\r
+inline \r
+BamAlignment::~BamAlignment(void) { }\r
+\r
+// Queries against alignment flags\r
+inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
+inline bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }\r
+inline bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }\r
+inline bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }\r
+inline bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }\r
+inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }\r
+inline bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }\r
+inline bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }\r
+inline bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }\r
+inline bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }\r
+inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }\r
+\r
+// Manipulate alignment flags \r
+inline void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }\r
+inline void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }\r
+inline void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }\r
+inline void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }\r
+inline void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }\r
+inline void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }\r
+inline void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }\r
+inline void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }\r
+inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }\r
+inline void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }\r
+inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }\r
+\r
+// calculates alignment end position, based on starting position and CIGAR operations\r
+inline \r
+int BamAlignment::GetEndPosition(bool usePadded) const {\r
+\r
+ // initialize alignment end to starting position\r
+ int alignEnd = Position;\r
+\r
+ // iterate over cigar operations\r
+ std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();\r
+ std::vector<CigarOp>::const_iterator cigarEnd = CigarData.end();\r
+ for ( ; cigarIter != cigarEnd; ++cigarIter) {\r
+ const char cigarType = (*cigarIter).Type;\r
+ if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
+ alignEnd += (*cigarIter).Length;\r
+ } \r
+ else if ( usePadded && cigarType == 'I' ) {\r
+ alignEnd += (*cigarIter).Length;\r
+ }\r
+ }\r
+ return alignEnd;\r
}\r
+\r
+// get "NM" tag data - contributed by Aaron Quinlan\r
+// stores data in 'editDistance', returns success/fail\r
+inline \r
+bool BamAlignment::GetEditDistance(uint8_t& editDistance) const {\r
+\r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundEditDistanceTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( strncmp(pTagType, "NM", 2) == 0 ) {\r
+ foundEditDistanceTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+ // return if the edit distance tag was not present\r
+ if ( !foundEditDistanceTag ) { return false; }\r
+\r
+ // assign the editDistance value\r
+ std::memcpy(&editDistance, pTagData, 1);\r
+ return true;\r
+}\r
+\r
+// get "RG" tag data\r
+// stores data in 'readGroup', returns success/fail\r
+inline \r
+bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
+\r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundReadGroupTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( std::strncmp(pTagType, "RG", 2) == 0 ) {\r
+ foundReadGroupTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+\r
+ // return if the read group tag was not present\r
+ if ( !foundReadGroupTag ) { return false; }\r
+\r
+ // assign the read group\r
+ const unsigned int readGroupLen = std::strlen(pTagData);\r
+ readGroup.resize(readGroupLen);\r
+ std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
+ return true;\r
+}\r
+\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, std::string& destination) {\r
+ \r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundReadGroupTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+ foundReadGroupTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+\r
+ // return if the read group tag was not present\r
+ if ( !foundReadGroupTag ) { return false; }\r
+\r
+ // assign the read group\r
+ const unsigned int dataLen = std::strlen(pTagData);\r
+ destination.resize(dataLen);\r
+ std::memcpy( (char*)destination.data(), pTagData, dataLen );\r
+ return true;\r
+}\r
+\r
+template<typename T> \r
+bool BamAlignment::GetTag(const std::string& tag, T& destination) {\r
+ \r
+ if ( TagData.empty() ) { return false; }\r
+\r
+ // localize the tag data\r
+ char* pTagData = (char*)TagData.data();\r
+ const unsigned int tagDataLen = TagData.size();\r
+ unsigned int numBytesParsed = 0;\r
+\r
+ bool foundDesiredTag = false;\r
+ while( numBytesParsed < tagDataLen ) {\r
+\r
+ const char* pTagType = pTagData;\r
+ const char* pTagStorageType = pTagData + 2;\r
+ pTagData += 3;\r
+ numBytesParsed += 3;\r
+\r
+ // check the current tag\r
+ if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+ foundDesiredTag = true;\r
+ break;\r
+ }\r
+\r
+ // get the storage class and find the next tag\r
+ if (*pTagStorageType == '\0') { return false; }\r
+ SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+ if (*pTagData == '\0') { return false; }\r
+ }\r
+ // return if the edit distance tag was not present\r
+ if ( !foundDesiredTag ) { return false; }\r
+\r
+ // assign the editDistance value\r
+ std::memcpy(&destination, pTagData, sizeof(T));\r
+ return true;\r
+}\r
+\r
+inline\r
+void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
+ \r
+ switch(storageType) {\r
+\r
+ case 'A':\r
+ case 'c':\r
+ case 'C':\r
+ ++numBytesParsed;\r
+ ++pTagData;\r
+ break;\r
+\r
+ case 's':\r
+ case 'S':\r
+ numBytesParsed += 2;\r
+ pTagData += 2;\r
+ break;\r
+\r
+ case 'f':\r
+ case 'i':\r
+ case 'I':\r
+ numBytesParsed += 4;\r
+ pTagData += 4;\r
+ break;\r
+\r
+ case 'Z':\r
+ case 'H':\r
+ while(*pTagData) {\r
+ ++numBytesParsed;\r
+ ++pTagData;\r
+ }\r
+ // ---------------------------\r
+ // Added: 3-25-2010 DWB\r
+ // Contributed: ARQ\r
+ // Fixed: error parsing variable length tag data\r
+ ++pTagData;\r
+ // ---------------------------\r
+ break;\r
+\r
+ default:\r
+ printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
+ exit(1);\r
+ }\r
+}\r
+\r
+// ----------------------------------------------------------------\r
+// Added: 3-35-2010 DWB\r
+// Fixed: Routines to provide endian-correctness\r
+// ----------------------------------------------------------------\r
+\r
+// returns true if system is big endian\r
+inline bool SystemIsBigEndian(void) {\r
+ const uint16_t one = 0x0001;\r
+ return ((*(char*) &one) == 0 );\r
+}\r
+\r
+// swaps endianness of 16-bit value 'in place'\r
+inline void SwapEndian_16(int16_t& x) {\r
+ x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+inline void SwapEndian_16(uint16_t& x) {\r
+ x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+// swaps endianness of 32-bit value 'in-place'\r
+inline void SwapEndian_32(int32_t& x) {\r
+ x = ( (x >> 24) | \r
+ ((x << 8) & 0x00FF0000) | \r
+ ((x >> 8) & 0x0000FF00) | \r
+ (x << 24)\r
+ );\r
+}\r
+\r
+inline void SwapEndian_32(uint32_t& x) {\r
+ x = ( (x >> 24) | \r
+ ((x << 8) & 0x00FF0000) | \r
+ ((x >> 8) & 0x0000FF00) | \r
+ (x << 24)\r
+ );\r
+}\r
+\r
+// swaps endianness of 64-bit value 'in-place'\r
+inline void SwapEndian_64(int64_t& x) {\r
+ x = ( (x >> 56) | \r
+ ((x << 40) & 0x00FF000000000000ll) |\r
+ ((x << 24) & 0x0000FF0000000000ll) |\r
+ ((x << 8) & 0x000000FF00000000ll) |\r
+ ((x >> 8) & 0x00000000FF000000ll) |\r
+ ((x >> 24) & 0x0000000000FF0000ll) |\r
+ ((x >> 40) & 0x000000000000FF00ll) |\r
+ (x << 56)\r
+ );\r
+}\r
+\r
+inline void SwapEndian_64(uint64_t& x) {\r
+ x = ( (x >> 56) | \r
+ ((x << 40) & 0x00FF000000000000ll) |\r
+ ((x << 24) & 0x0000FF0000000000ll) |\r
+ ((x << 8) & 0x000000FF00000000ll) |\r
+ ((x >> 8) & 0x00000000FF000000ll) |\r
+ ((x >> 24) & 0x0000000000FF0000ll) |\r
+ ((x >> 40) & 0x000000000000FF00ll) |\r
+ (x << 56)\r
+ );\r
+}\r
+\r
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_16p(char* data) {\r
+ uint16_t& value = (uint16_t&)*data; \r
+ SwapEndian_16(value);\r
+}\r
+\r
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_32p(char* data) {\r
+ uint32_t& value = (uint32_t&)*data; \r
+ SwapEndian_32(value);\r
+}\r
+\r
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_64p(char* data) {\r
+ uint64_t& value = (uint64_t&)*data; \r
+ SwapEndian_64(value);\r
+}\r
+\r
+} // namespace BamTools\r
+\r
+#endif // BAMAUX_H\r