1 // ***************************************************************************
2 // BamAlignment.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #ifndef BAMALIGNMENT_H
12 #define BAMALIGNMENT_H
14 #include <api/api_global.h>
15 #include <api/BamAux.h>
21 // forward declaration of BamAlignment's friend classes
23 class BamReaderPrivate;
24 class BamWriterPrivate;
25 } // namespace Internal
27 // BamAlignment data structure
28 struct API_EXPORT BamAlignment {
30 // constructors & destructor
33 BamAlignment(const BamAlignment& other);
36 // queries against alignment flags
38 bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate
39 bool IsFailedQC(void) const; // returns true if this read failed quality control
40 bool IsFirstMate(void) const; // returns true if alignment is first mate on read
41 bool IsMapped(void) const; // returns true if alignment is mapped
42 bool IsMateMapped(void) const; // returns true if alignment's mate is mapped
43 bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand
44 bool IsPaired(void) const; // returns true if alignment part of paired-end read
45 bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment
46 bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution
47 bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand
48 bool IsSecondMate(void) const; // returns true if alignment is second mate on read
50 // manipulate alignment flags
52 void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag
53 void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag
54 void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag
55 void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag
56 void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag
57 void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag
58 void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag
59 void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag
60 void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag
61 void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag
62 void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag
64 // legacy methods (consider deprecated, but still available)
65 void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped()
66 void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment()
67 void SetIsUnmapped(bool ok); // complement of using SetIsMapped()
69 // tag data access methods
72 // -------------------------------------------------------------------------------------
73 // N.B. - The following tag access methods may not be used on BamAlignments fetched
74 // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in
75 // error message (to keep output clean) but will ALWAYS return false. Only user-created
76 // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
78 // You can call BuildCharData() on such an alignment retrieved by GetNextAlignmentCore().
79 // This populates all the character data, and will enable subsequent queries on tag data.
80 // -------------------------------------------------------------------------------------
83 bool AddTag(const std::string& tag, const std::string& type, const std::string& value);
84 bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);
85 bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);
86 bool AddTag(const std::string& tag, const std::string& type, const float& value);
88 // adds a "binary array" tag
89 bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
90 bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
91 bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
92 bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
93 bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
94 bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
95 bool AddTag(const std::string& tag, const std::vector<float>& values);
98 bool EditTag(const std::string& tag, const std::string& type, const std::string& value);
99 bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);
100 bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);
101 bool EditTag(const std::string& tag, const std::string& type, const float& value);
103 // edits a "binary array" tag
104 bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
105 bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
106 bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
107 bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
108 bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
109 bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
110 bool EditTag(const std::string& tag, const std::vector<float>& values);
112 // retrieves data for a tag
113 bool GetTag(const std::string& tag, std::string& destination) const;
114 bool GetTag(const std::string& tag, uint32_t& destination) const;
115 bool GetTag(const std::string& tag, int32_t& destination) const;
116 bool GetTag(const std::string& tag, float& destination) const;
118 // retrieves data for a "binary array" tag
119 bool GetTag(const std::string& tag, std::vector<uint32_t>& destination) const;
120 bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;
121 bool GetTag(const std::string& tag, std::vector<float>& destination) const;
123 // retrieves the BAM tag-type character for a tag
124 bool GetTagType(const std::string& tag, char& type) const;
126 // legacy methods (consider deprecated, but still available)
127 bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag
128 bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag
130 // returns true if alignment has a record for this tag name
131 bool HasTag(const std::string& tag) const;
134 bool RemoveTag(const std::string& tag);
136 // additional methods
138 // populates alignment string fields
139 bool BuildCharData(void);
140 // calculates alignment end position
141 int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;
143 // public data fields
145 std::string Name; // read name
146 int32_t Length; // length of query sequence
147 std::string QueryBases; // 'original' sequence (as reported from sequencing machine)
148 std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping)
149 std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
150 std::string TagData; // tag data (use provided methods to query/modify)
151 int32_t RefID; // ID number for reference sequence
152 int32_t Position; // position (0-based) where alignment starts
153 uint16_t Bin; // BAM (standard) index bin number for this alignment
154 uint16_t MapQuality; // mapping quality score
155 uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)
156 std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
157 int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
158 int32_t MatePosition; // position (0-based) where alignment's mate starts
159 int32_t InsertSize; // mate-pair insert size
160 std::string Filename; // name of BAM file which this alignment comes from
165 struct BamAlignmentSupportData {
168 std::string AllCharData;
169 uint32_t BlockLength;
170 uint32_t NumCigarOperations;
171 uint32_t QueryNameLength;
172 uint32_t QuerySequenceLength;
176 BamAlignmentSupportData(void)
178 , NumCigarOperations(0)
180 , QuerySequenceLength(0)
184 BamAlignmentSupportData SupportData;
185 friend class Internal::BamReaderPrivate;
186 friend class Internal::BamWriterPrivate;
190 typedef std::vector<BamAlignment> BamAlignmentVector;
192 } // namespace BamTools
194 #endif // BAMALIGNMENT_H