1 // ***************************************************************************
2 // BamAlignment.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 December 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #ifndef BAMALIGNMENT_H
12 #define BAMALIGNMENT_H
14 #include <api/api_global.h>
15 #include <api/BamAux.h>
21 // forward declare BamAlignment's friend classes
23 class BamReaderPrivate;
24 class BamWriterPrivate;
25 } // namespace Internal
27 // BamAlignment data structure
28 struct API_EXPORT BamAlignment {
30 // constructors & destructor
33 BamAlignment(const BamAlignment& other);
36 // Queries against alignment flags
38 bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate
39 bool IsFailedQC(void) const; // Returns true if this read failed quality control
40 bool IsFirstMate(void) const; // Returns true if alignment is first mate on read
41 bool IsMapped(void) const; // Returns true if alignment is mapped
42 bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped
43 bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand
44 bool IsPaired(void) const; // Returns true if alignment part of paired-end read
45 bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment
46 bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution
47 bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand
48 bool IsSecondMate(void) const; // Returns true if alignment is second mate on read
50 // Manipulate alignment flags
52 void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag
53 void SetIsFailedQC(bool ok); // Sets "failed quality control" flag
54 void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag
55 void SetIsMapped(bool ok); // Sets "alignment is mapped" flag
56 void SetIsMateMapped(bool ok); // Sets "alignment's mate is mapped" flag
57 void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag
58 void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag
59 void SetIsPrimaryAlignment(bool ok); // Sets "position is primary alignment" flag
60 void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag
61 void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag
62 void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag
64 // legacy methods (deprecated, but available)
65 void SetIsMateUnmapped(bool ok); // Complement of IsMateMapped() flag
66 void SetIsSecondaryAlignment(bool ok); // Complement of IsPrimaryAlignment() flag
67 void SetIsUnmapped(bool ok); // Complement of IsMapped() flag
69 // Tag data access methods
71 // -------------------------------------------------------------------------------------
72 // N.B. - The following tag access methods may not be used on BamAlignments fetched
73 // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in
74 // error message (to keep output clean) but will ALWAYS return false. Only user-created
75 // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
77 // add tag data (create new TAG entry with TYPE and VALUE)
78 // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
79 // returns true if new data added, false if error or TAG already exists
80 // N.B. - will NOT modify existing tag. Use EditTag() instead
81 // @tag - two character tag name
82 // @type - single character tag type (see SAM/BAM spec for details)
83 // @value - value to associate with tag
84 bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
85 bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i
86 bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i
87 bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f
89 // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)
90 // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
91 // returns true if edit was successfaul, false if error
92 // @tag - two character tag name
93 // @type - single character tag type (see SAM/BAM spec for details)
94 // @value - new value for tag
95 bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
96 bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i
97 bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i
98 bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f
100 // specific tag data access methods - these only remain for legacy support
101 // returns whether specific tag could be retrieved
102 bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance))
103 bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (equivalent to GetTag("RG", readGroup))
105 // generic tag data access methods
106 // returns whether tag is found & tag type is compatible with DESTINATION
107 // @tag - two character tag name
108 // @destination - if found, tag value is stored here
109 bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings
110 bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data
111 bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data
112 bool GetTag(const std::string& tag, float& destination) const; // access floating point data
114 // retrieve the tag type code for TAG
115 // returns true if tag could be found and type determined
116 bool GetTagType(const std::string& tag, char& type) const;
119 // returns true if removal was successful, false if error
120 // N.B. - returns false if TAG does not exist (no removal can occur)
121 // @tag - two character tag name
122 bool RemoveTag(const std::string& tag);
124 // Populate an alignment retrieved by BamAlignment::GetNextAlignmentCore() with full character data
125 // (read name, bases, qualities, tag data)
127 bool BuildCharData(void);
129 // Additional data access methods
131 // calculates & returns alignment end position, based on starting position and CIGAR operations
132 // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference
133 // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats.
134 int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;
136 // 'internal' utility methods
138 static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);
139 static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);
143 std::string Name; // Read name
144 int32_t Length; // Query length
145 std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)
146 std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)
147 std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
148 std::string TagData; // Tag data (accessor methods will pull the requested information out)
149 int32_t RefID; // ID number for reference sequence
150 int32_t Position; // Position (0-based) where alignment starts
151 uint16_t Bin; // Bin in BAM file where this alignment resides
152 uint16_t MapQuality; // Mapping quality score
153 uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate
154 std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
155 int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
156 int32_t MatePosition; // Position (0-based) where alignment's mate starts
157 int32_t InsertSize; // Mate-pair insert size
159 // Internal data, inaccessible to client code
160 // but available BamReaderPrivate & BamWriterPrivate
162 struct BamAlignmentSupportData {
165 std::string AllCharData;
166 uint32_t BlockLength;
167 uint32_t NumCigarOperations;
168 uint32_t QueryNameLength;
169 uint32_t QuerySequenceLength;
173 BamAlignmentSupportData(void)
175 , NumCigarOperations(0)
177 , QuerySequenceLength(0)
181 BamAlignmentSupportData SupportData;
182 friend class Internal::BamReaderPrivate;
183 friend class Internal::BamWriterPrivate;
185 // Alignment flag query constants
186 // Use the get/set methods above instead
202 // convenience typedef(s)
203 typedef std::vector<BamAlignment> BamAlignmentVector;
205 } // namespace BamTools
207 #endif // BAMALIGNMENT_H