X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamAlignment.h;h=fb54b1a6d38ed1a4c5461551c46afef16747eb79;hb=8c80d760637f8df39262683cd2570f0589423d36;hp=6eb7618b2e25544ca2df3db22a6910a544fec7b4;hpb=577b6032aa3d85616047c8aba6061dd8dad20cfc;p=bamtools.git diff --git a/src/api/BamAlignment.h b/src/api/BamAlignment.h index 6eb7618..fb54b1a 100644 --- a/src/api/BamAlignment.h +++ b/src/api/BamAlignment.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 December 2010 (DB) +// Last modified: 21 March 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -18,7 +18,7 @@ namespace BamTools { -// forward declare BamAlignment's friend classes +// forward declaration of BamAlignment's friend classes namespace Internal { class BamReaderPrivate; class BamWriterPrivate; @@ -33,132 +33,109 @@ struct API_EXPORT BamAlignment { BamAlignment(const BamAlignment& other); ~BamAlignment(void); - // Queries against alignment flags + // queries against alignment flags public: - bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate - bool IsFailedQC(void) const; // Returns true if this read failed quality control - bool IsFirstMate(void) const; // Returns true if alignment is first mate on read - bool IsMapped(void) const; // Returns true if alignment is mapped - bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped - bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand - bool IsPaired(void) const; // Returns true if alignment part of paired-end read - bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment - bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution - bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand - bool IsSecondMate(void) const; // Returns true if alignment is second mate on read - - // Manipulate alignment flags + bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate + bool IsFailedQC(void) const; // returns true if this read failed quality control + bool IsFirstMate(void) const; // returns true if alignment is first mate on read + bool IsMapped(void) const; // returns true if alignment is mapped + bool IsMateMapped(void) const; // returns true if alignment's mate is mapped + bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand + bool IsPaired(void) const; // returns true if alignment part of paired-end read + bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment + bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution + bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand + bool IsSecondMate(void) const; // returns true if alignment is second mate on read + + // manipulate alignment flags public: - void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag - void SetIsFailedQC(bool ok); // Sets "failed quality control" flag - void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag - void SetIsMapped(bool ok); // Sets "alignment is mapped" flag - void SetIsMateMapped(bool ok); // Sets "alignment's mate is mapped" flag - void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag - void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag - void SetIsPrimaryAlignment(bool ok); // Sets "position is primary alignment" flag - void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag - void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag - void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag - - // legacy methods (deprecated, but available) - void SetIsMateUnmapped(bool ok); // Complement of IsMateMapped() flag - void SetIsSecondaryAlignment(bool ok); // Complement of IsPrimaryAlignment() flag - void SetIsUnmapped(bool ok); // Complement of IsMapped() flag - - // Tag data access methods + void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag + void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag + void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag + void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag + void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag + void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag + void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag + void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag + void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag + void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag + void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag + + // legacy methods (consider deprecated, but still available) + void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped() + void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment() + void SetIsUnmapped(bool ok); // complement of using SetIsMapped() + + // tag data access methods public: + // ------------------------------------------------------------------------------------- // N.B. - The following tag access methods may not be used on BamAlignments fetched // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in // error message (to keep output clean) but will ALWAYS return false. Only user-created // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here. + // + // You can call BuildCharData() on such an alignment retrieved by GetNextAlignmentCore(). + // This populates all the character data, and will enable subsequent queries on tag data. + // ------------------------------------------------------------------------------------- - // add tag data (create new TAG entry with TYPE and VALUE) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if new data added, false if error or TAG already exists - // N.B. - will NOT modify existing tag. Use EditTag() instead - // @tag - two character tag name - // @type - single character tag type (see SAM/BAM spec for details) - // @value - value to associate with tag - bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if edit was successfaul, false if error - // @tag - two character tag name - // @type - single character tag type (see SAM/BAM spec for details) - // @value - new value for tag - bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // specific tag data access methods - these only remain for legacy support - // returns whether specific tag could be retrieved - bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance)) - bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (equivalent to GetTag("RG", readGroup)) + // adds a tag + bool AddTag(const std::string& tag, const std::string& type, const std::string& value); + bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); + bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); + bool AddTag(const std::string& tag, const std::string& type, const float& value); - // generic tag data access methods - // returns whether tag is found & tag type is compatible with DESTINATION - // @tag - two character tag name - // @destination - if found, tag value is stored here - bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings - bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data - bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data - bool GetTag(const std::string& tag, float& destination) const; // access floating point data - - // retrieve the tag type code for TAG - // returns true if tag could be found and type determined + // edits a tag + bool EditTag(const std::string& tag, const std::string& type, const std::string& value); + bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); + bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); + bool EditTag(const std::string& tag, const std::string& type, const float& value); + + // retrieves data for a tag + bool GetTag(const std::string& tag, std::string& destination) const; + bool GetTag(const std::string& tag, uint32_t& destination) const; + bool GetTag(const std::string& tag, int32_t& destination) const; + bool GetTag(const std::string& tag, float& destination) const; + + // retrieves the BAM tag-type character for a tag bool GetTagType(const std::string& tag, char& type) const; + + // legacy methods (consider deprecated, but still available) + bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag + bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag - // remove tag data - // returns true if removal was successful, false if error - // N.B. - returns false if TAG does not exist (no removal can occur) - // @tag - two character tag name + // removes a tag bool RemoveTag(const std::string& tag); - // Populate an alignment retrieved by BamAlignment::GetNextAlignmentCore() with full character data - // (read name, bases, qualities, tag data) + // additional methods public: + // populates alignment string fields bool BuildCharData(void); - - // Additional data access methods - public: - // calculates & returns alignment end position, based on starting position and CIGAR operations - // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference - // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats. + // calculates alignment end position int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; - // 'internal' utility methods - private: - static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed); - static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed); - - // Data members + // public data fields public: - std::string Name; // Read name - int32_t Length; // Query length - std::string QueryBases; // 'Original' sequence (as reported from sequencing machine) - std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping) + std::string Name; // read name + int32_t Length; // length of query sequence + std::string QueryBases; // 'original' sequence (as reported from sequencing machine) + std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping) std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values) - std::string TagData; // Tag data (accessor methods will pull the requested information out) + std::string TagData; // tag data (use provided methods to query/modify) int32_t RefID; // ID number for reference sequence - int32_t Position; // Position (0-based) where alignment starts - uint16_t Bin; // Bin in BAM file where this alignment resides - uint16_t MapQuality; // Mapping quality score - uint32_t AlignmentFlag; // Alignment bit-flag - see Is() methods to query this value, SetIs() methods to manipulate - std::vector CigarData; // CIGAR operations for this alignment + int32_t Position; // position (0-based) where alignment starts + uint16_t Bin; // BAM (standard) index bin number for this alignment + uint16_t MapQuality; // mapping quality score + uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify) + std::vector CigarData; // CIGAR operations for this alignment int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned - int32_t MatePosition; // Position (0-based) where alignment's mate starts - int32_t InsertSize; // Mate-pair insert size - - // Internal data, inaccessible to client code - // but available BamReaderPrivate & BamWriterPrivate + int32_t MatePosition; // position (0-based) where alignment's mate starts + int32_t InsertSize; // mate-pair insert size + std::string Filename; // name of BAM file which this alignment comes from + + // internal data private: + //! \cond struct BamAlignmentSupportData { // data members @@ -178,28 +155,12 @@ struct API_EXPORT BamAlignment { , HasCoreOnly(false) { } }; - BamAlignmentSupportData SupportData; - friend class Internal::BamReaderPrivate; - friend class Internal::BamWriterPrivate; - - // Alignment flag query constants - // Use the get/set methods above instead - private: - enum { PAIRED = 1 - , PROPER_PAIR = 2 - , UNMAPPED = 4 - , MATE_UNMAPPED = 8 - , REVERSE = 16 - , MATE_REVERSE = 32 - , READ_1 = 64 - , READ_2 = 128 - , SECONDARY = 256 - , QC_FAILED = 512 - , DUPLICATE = 1024 - }; + BamAlignmentSupportData SupportData; + friend class Internal::BamReaderPrivate; + friend class Internal::BamWriterPrivate; + //! \endcond }; -// convenience typedef(s) typedef std::vector BamAlignmentVector; } // namespace BamTools