1 // ***************************************************************************
2 // BamAlignment.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 22 April 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #ifndef BAMALIGNMENT_H
11 #define BAMALIGNMENT_H
13 #include <api/api_global.h>
14 #include <api/BamAux.h>
20 // forward declaration of BamAlignment's friend classes
22 class BamReaderPrivate;
23 class BamWriterPrivate;
24 } // namespace Internal
26 // BamAlignment data structure
27 struct API_EXPORT BamAlignment {
29 // constructors & destructor
32 BamAlignment(const BamAlignment& other);
35 // queries against alignment flags
37 bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate
38 bool IsFailedQC(void) const; // returns true if this read failed quality control
39 bool IsFirstMate(void) const; // returns true if alignment is first mate on read
40 bool IsMapped(void) const; // returns true if alignment is mapped
41 bool IsMateMapped(void) const; // returns true if alignment's mate is mapped
42 bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand
43 bool IsPaired(void) const; // returns true if alignment part of paired-end read
44 bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment
45 bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution
46 bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand
47 bool IsSecondMate(void) const; // returns true if alignment is second mate on read
49 // manipulate alignment flags
51 void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag
52 void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag
53 void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag
54 void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag
55 void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag
56 void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag
57 void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag
58 void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag
59 void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag
60 void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag
61 void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag
63 // legacy methods (consider deprecated, but still available)
64 void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped()
65 void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment()
66 void SetIsUnmapped(bool ok); // complement of using SetIsMapped()
68 // tag data access methods
71 // -------------------------------------------------------------------------------------
72 // N.B. - The following tag access methods may not be used on BamAlignments fetched
73 // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in
74 // error message (to keep output clean) but will ALWAYS return false. Only user-created
75 // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
77 // You can call BuildCharData() on such an alignment retrieved by GetNextAlignmentCore().
78 // This populates all the character data, and will enable subsequent queries on tag data.
79 // -------------------------------------------------------------------------------------
82 bool AddTag(const std::string& tag, const std::string& type, const std::string& value);
83 bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);
84 bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);
85 bool AddTag(const std::string& tag, const std::string& type, const float& value);
87 // adds a "binary array" tag
88 bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
89 bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
90 bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
91 bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
92 bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
93 bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
94 bool AddTag(const std::string& tag, const std::vector<float>& values);
97 bool EditTag(const std::string& tag, const std::string& type, const std::string& value);
98 bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);
99 bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);
100 bool EditTag(const std::string& tag, const std::string& type, const float& value);
102 // edits a "binary array" tag
103 bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
104 bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
105 bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
106 bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
107 bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
108 bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
109 bool EditTag(const std::string& tag, const std::vector<float>& values);
111 // retrieves data for a tag
112 bool GetTag(const std::string& tag, std::string& destination) const;
113 bool GetTag(const std::string& tag, uint32_t& destination) const;
114 bool GetTag(const std::string& tag, int32_t& destination) const;
115 bool GetTag(const std::string& tag, float& destination) const;
117 // retrieves data for a "binary array" tag
118 bool GetTag(const std::string& tag, std::vector<uint32_t>& destination) const;
119 bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;
120 bool GetTag(const std::string& tag, std::vector<float>& destination) const;
122 // retrieves the BAM tag-type character for a tag
123 bool GetTagType(const std::string& tag, char& type) const;
125 // legacy methods (consider deprecated, but still available)
126 bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag
127 bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag
129 // returns true if alignment has a record for this tag name
130 bool HasTag(const std::string& tag) const;
133 bool RemoveTag(const std::string& tag);
135 // additional methods
137 // populates alignment string fields
138 bool BuildCharData(void);
139 // calculates alignment end position
140 int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;
142 // public data fields
144 std::string Name; // read name
145 int32_t Length; // length of query sequence
146 std::string QueryBases; // 'original' sequence (as reported from sequencing machine)
147 std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping)
148 std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
149 std::string TagData; // tag data (use provided methods to query/modify)
150 int32_t RefID; // ID number for reference sequence
151 int32_t Position; // position (0-based) where alignment starts
152 uint16_t Bin; // BAM (standard) index bin number for this alignment
153 uint16_t MapQuality; // mapping quality score
154 uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)
155 std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
156 int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
157 int32_t MatePosition; // position (0-based) where alignment's mate starts
158 int32_t InsertSize; // mate-pair insert size
159 std::string Filename; // name of BAM file which this alignment comes from
162 // internal utility methods
164 bool FindTag(const std::string& tag,
166 const unsigned int& tagDataLength,
167 unsigned int& numBytesParsed) const;
168 bool IsValidSize(const std::string& tag,
169 const std::string& type) const;
170 bool SkipToNextTag(const char storageType,
172 unsigned int& numBytesParsed) const;
177 struct BamAlignmentSupportData {
180 std::string AllCharData;
181 uint32_t BlockLength;
182 uint32_t NumCigarOperations;
183 uint32_t QueryNameLength;
184 uint32_t QuerySequenceLength;
188 BamAlignmentSupportData(void)
190 , NumCigarOperations(0)
192 , QuerySequenceLength(0)
196 BamAlignmentSupportData SupportData;
197 friend class Internal::BamReaderPrivate;
198 friend class Internal::BamWriterPrivate;
202 typedef std::vector<BamAlignment> BamAlignmentVector;
204 } // namespace BamTools
206 #endif // BAMALIGNMENT_H