-// BAM constants\r
-const int BAM_CMATCH = 0;\r
-const int BAM_CINS = 1;\r
-const int BAM_CDEL = 2;\r
-const int BAM_CREF_SKIP = 3;\r
-const int BAM_CSOFT_CLIP = 4;\r
-const int BAM_CHARD_CLIP = 5;\r
-const int BAM_CPAD = 6;\r
-const int BAM_CIGAR_SHIFT = 4;\r
-const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);\r
-const int BAM_CORE_SIZE = 32;\r
-const int BT_SIZEOF_INT = 4;\r
-\r
-struct CigarOp;\r
-\r
-struct BamAlignment {\r
-\r
- // constructors & destructor\r
- public:\r
- BamAlignment(void);\r
- BamAlignment(const BamAlignment& other);\r
- ~BamAlignment(void);\r
-\r
- // Queries against alignment flags\r
- public: \r
- bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate \r
- bool IsFailedQC(void) const; // Returns true if this read failed quality control \r
- bool IsFirstMate(void) const; // Returns true if alignment is first mate on read \r
- bool IsMapped(void) const; // Returns true if alignment is mapped \r
- bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped \r
- bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand \r
- bool IsPaired(void) const; // Returns true if alignment part of paired-end read \r
- bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment \r
- bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution \r
- bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand\r
- bool IsSecondMate(void) const; // Returns true if alignment is second mate on read\r
-\r
- // Manipulate alignment flags\r
- public: \r
- void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag \r
- void SetIsFailedQC(bool ok); // Sets "failed quality control" flag \r
- void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag \r
- void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag \r
- void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag \r
- void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag \r
- void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag \r
- void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag \r
- void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag \r
- void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag \r
- void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag\r
-\r
- // Tag data access methods\r
- public:\r
- // -------------------------------------------------------------------------------------\r
- // N.B. - The following tag-modifying methods may not be used on BamAlignments fetched\r
- // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in \r
- // error message (to keep output clean) but will ALWAYS return false. Only user-\r
- // generated BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid.\r
-\r
- // add tag data (create new TAG entry with TYPE and VALUE)\r
- // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
- // returns true if new data added, false if error or TAG already exists\r
- // N.B. - will NOT modify existing tag. Use EditTag() instead\r
- bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
- bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
- bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
- bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
- \r
- // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)\r
- // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
- // returns true if edit was successfaul, false if error\r
- bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
- bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
- bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
- bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
-\r
- // specific tag data access methods - these only remain for legacy support\r
- bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (implemented as GetTag("NM", editDistance))\r
- bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (implemented as GetTag("RG", readGroup)) \r
- \r
- // generic tag data access methods \r
- bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings \r
- bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data\r
- bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data\r
- bool GetTag(const std::string& tag, float& destination) const; // access floating point data\r
- \r
- // remove tag data\r
- // returns true if removal was successful, false if error\r
- // N.B. - returns false if TAG does not exist (no removal can occur)\r
- bool RemoveTag(const std::string& tag);\r
-\r
- // Additional data access methods\r
- public:\r
- // calculates alignment end position, based on starting position and CIGAR operations\r
- // @zeroBased - if true, returns 0-based coordinate; else returns 1-based\r
- int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; \r
-\r
- // 'internal' utility methods \r
- private:\r
- static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);\r
- static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
-\r
- // Data members\r
- public:\r
- std::string Name; // Read name\r
- int32_t Length; // Query length\r
- std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)\r
- std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)\r
- std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\r
- std::string TagData; // Tag data (accessor methods will pull the requested information out)\r
- int32_t RefID; // ID number for reference sequence\r
- int32_t Position; // Position (0-based) where alignment starts\r
- uint16_t Bin; // Bin in BAM file where this alignment resides\r
- uint16_t MapQuality; // Mapping quality score\r
- uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
- std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
- int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned\r
- int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
- int32_t InsertSize; // Mate-pair insert size\r
- \r
- // internal data\r
- private:\r
- struct BamAlignmentSupportData {\r
- \r
- // data members\r
- std::string AllCharData;\r
- uint32_t BlockLength;\r
- uint32_t NumCigarOperations;\r
- uint32_t QueryNameLength;\r
- uint32_t QuerySequenceLength;\r
- bool HasCoreOnly;\r
- \r
- // constructor\r
- BamAlignmentSupportData(void)\r
- : BlockLength(0)\r
- , NumCigarOperations(0)\r
- , QueryNameLength(0)\r
- , QuerySequenceLength(0)\r
- , HasCoreOnly(false)\r
- { }\r
- };\r
- \r
- // contains raw character data & lengths\r
- BamAlignmentSupportData SupportData; \r
- \r
- // allow these classes access to BamAlignment private members (SupportData)\r
- // but client code should not need to touch this data\r
- friend class BamReader;\r
- friend class BamWriter;\r
-\r
- // Alignment flag query constants\r
- // Use the get/set methods above instead\r
- private:\r
- enum { PAIRED = 1\r
- , PROPER_PAIR = 2\r
- , UNMAPPED = 4\r
- , MATE_UNMAPPED = 8\r
- , REVERSE = 16\r
- , MATE_REVERSE = 32\r
- , READ_1 = 64\r
- , READ_2 = 128\r
- , SECONDARY = 256\r
- , QC_FAILED = 512\r
- , DUPLICATE = 1024 \r
- };\r
-};\r