1 // ***************************************************************************
2 // BamAlignment.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #ifndef BAMALIGNMENT_H
11 #define BAMALIGNMENT_H
13 #include "api/api_global.h"
14 #include "api/BamAux.h"
15 #include "api/BamConstants.h"
24 // forward declaration of BamAlignment's "friends"
26 class BamReaderPrivate;
27 class BamWriterPrivate;
28 } // namespace Internal
31 // BamAlignment data structure
32 struct API_EXPORT BamAlignment {
34 // constructors & destructor
37 BamAlignment(const BamAlignment& other);
40 // queries against alignment flags
42 bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate
43 bool IsFailedQC(void) const; // returns true if this read failed quality control
44 bool IsFirstMate(void) const; // returns true if alignment is first mate on read
45 bool IsMapped(void) const; // returns true if alignment is mapped
46 bool IsMateMapped(void) const; // returns true if alignment's mate is mapped
47 bool IsMateReverseStrand(void) const; // returns true if alignment's mate mapped to reverse strand
48 bool IsPaired(void) const; // returns true if alignment part of paired-end read
49 bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment
50 bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution
51 bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand
52 bool IsSecondMate(void) const; // returns true if alignment is second mate on read
54 // manipulate alignment flags
56 void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag
57 void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag
58 void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag
59 void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag
60 void SetIsMateMapped(bool ok); // sets value of "alignment's mate is mapped" flag
61 void SetIsMateReverseStrand(bool ok); // sets value of "alignment's mate mapped to reverse strand" flag
62 void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag
63 void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag
64 void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag
65 void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag
66 void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag
68 // tag data access methods
72 template<typename T> bool AddTag(const std::string& tag, const std::string& type, const T& value);
73 template<typename T> bool AddTag(const std::string& tag, const std::vector<T>& values);
75 // edit (or append) tag
76 template<typename T> bool EditTag(const std::string& tag, const std::string& type, const T& value);
77 template<typename T> bool EditTag(const std::string& tag, const std::vector<T>& values);
80 template<typename T> bool GetTag(const std::string& tag, T& destination) const;
81 template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const;
83 // retrieves the SAM/BAM type-code for requested tag name
84 bool GetTagType(const std::string& tag, char& type) const;
86 // returns true if alignment has a record for this tag name
87 bool HasTag(const std::string& tag) const;
90 void RemoveTag(const std::string& tag);
94 // populates alignment string fields
95 bool BuildCharData(void);
97 // calculates alignment end position
98 int GetEndPosition(bool usePadded = false, bool closedInterval = false) const;
100 // returns a description of the last error that occurred
101 std::string GetErrorString(void) const;
103 // public data fields
105 std::string Name; // read name
106 int32_t Length; // length of query sequence
107 std::string QueryBases; // 'original' sequence (as reported from sequencing machine)
108 std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping)
109 std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
110 std::string TagData; // tag data (use provided methods to query/modify)
111 int32_t RefID; // ID number for reference sequence
112 int32_t Position; // position (0-based) where alignment starts
113 uint16_t Bin; // BAM (standard) index bin number for this alignment
114 uint16_t MapQuality; // mapping quality score
115 uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)
116 std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
117 int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
118 int32_t MatePosition; // position (0-based) where alignment's mate starts
119 int32_t InsertSize; // mate-pair insert size
120 std::string Filename; // name of BAM file which this alignment comes from
123 // internal utility methods
125 bool FindTag(const std::string& tag,
127 const unsigned int& tagDataLength,
128 unsigned int& numBytesParsed) const;
129 bool IsValidSize(const std::string& tag, const std::string& type) const;
130 void SetErrorString(const std::string& where, const std::string& what) const;
131 bool SkipToNextTag(const char storageType,
133 unsigned int& numBytesParsed) const;
138 struct BamAlignmentSupportData {
141 std::string AllCharData;
142 uint32_t BlockLength;
143 uint32_t NumCigarOperations;
144 uint32_t QueryNameLength;
145 uint32_t QuerySequenceLength;
149 BamAlignmentSupportData(void)
151 , NumCigarOperations(0)
153 , QuerySequenceLength(0)
157 BamAlignmentSupportData SupportData;
158 friend class Internal::BamReaderPrivate;
159 friend class Internal::BamWriterPrivate;
161 mutable std::string ErrorString; // mutable to allow updates even in logically const methods
165 // ---------------------------------------------------------
166 // BamAlignment tag access methods
168 /*! \fn bool AddTag(const std::string& tag, const std::string& type, const T& value)
169 \brief Adds a field to the BAM tags.
171 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
173 \param[in] tag 2-character tag name
174 \param[in] type 1-character tag type
175 \param[in] value data to store
176 \return \c true if the \b new tag was added successfully
177 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
180 inline bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const T& value) {
182 // if char data not populated, do that first
183 if ( SupportData.HasCoreOnly )
186 // check tag/type size
187 if ( !IsValidSize(tag, type) ) {
188 // TODO: set error string?
192 // check that storage type code is OK for T
193 if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) ) {
194 // TODO: set error string?
198 // localize the tag data
199 char* pTagData = (char*)TagData.data();
200 const unsigned int tagDataLength = TagData.size();
201 unsigned int numBytesParsed = 0;
203 // if tag already exists, return false
204 // use EditTag explicitly instead
205 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
206 // TODO: set error string?
210 // otherwise, convert value to string
211 union { T value; char valueBuffer[sizeof(T)]; } un;
214 // copy original tag data to temp buffer
215 const std::string newTag = tag + type;
216 const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T
217 RaiiBuffer originalTagData(newTagDataLength);
218 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
221 strcat(originalTagData.Buffer + tagDataLength, newTag.data());
222 memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
224 // store temp buffer back in TagData
225 const char* newTagData = (const char*)originalTagData.Buffer;
226 TagData.assign(newTagData, newTagDataLength);
231 inline bool BamAlignment::AddTag<std::string>(const std::string& tag,
232 const std::string& type,
233 const std::string& value)
235 // if char data not populated, do that first
236 if ( SupportData.HasCoreOnly )
239 // check tag/type size
240 if ( !IsValidSize(tag, type) ) {
241 // TODO: set error string?
245 // check that storage type code is OK for string
246 if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) ) {
247 // TODO: set error string?
251 // localize the tag data
252 char* pTagData = (char*)TagData.data();
253 const unsigned int tagDataLength = TagData.size();
254 unsigned int numBytesParsed = 0;
256 // if tag already exists, return false
257 // use EditTag explicitly instead
258 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
259 // TODO: set error string?
263 // otherwise, copy tag data to temp buffer
264 const std::string newTag = tag + type + value;
265 const size_t newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
266 RaiiBuffer originalTagData(newTagDataLength);
267 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
269 // append newTag (removes original null-term, then appends newTag + null-term)
270 strcat(originalTagData.Buffer + tagDataLength, newTag.data());
272 // store temp buffer back in TagData
273 const char* newTagData = (const char*)originalTagData.Buffer;
274 TagData.assign(newTagData, newTagDataLength);
278 /*! \fn template<typename T> bool AddTag(const std::string& tag, const std::vector<T>& values)
279 \brief Adds a numeric array field to the BAM tags.
281 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
283 \param[in] tag 2-character tag name
284 \param[in] values vector of data values to store
285 \return \c true if the \b new tag was added successfully
286 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
289 inline bool BamAlignment::AddTag(const std::string& tag, const std::vector<T>& values) {
291 // if char data not populated, do that first
292 if ( SupportData.HasCoreOnly )
295 // check for valid tag name length
296 if ( tag.size() != Constants::BAM_TAG_TAGSIZE )
299 // localize the tag data
300 char* pTagData = (char*)TagData.data();
301 const unsigned int tagDataLength = TagData.size();
302 unsigned int numBytesParsed = 0;
304 // if tag already exists, return false
305 // use EditTag explicitly instead
306 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
307 // TODO: set error string?
311 // build new tag's base information
312 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
313 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
314 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
315 newTagBase[3] = TagTypeHelper<T>::TypeCode();
317 // add number of array elements to newTagBase
318 const int32_t numElements = values.size();
319 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
321 // copy current TagData string to temp buffer, leaving room for new tag's contents
322 const size_t newTagDataLength = tagDataLength +
323 Constants::BAM_TAG_ARRAYBASE_SIZE +
324 numElements*sizeof(T);
325 RaiiBuffer originalTagData(newTagDataLength);
326 memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
328 // write newTagBase (removes old null term)
329 strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase);
331 // add vector elements to tag
332 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
333 for ( int i = 0 ; i < numElements; ++i ) {
334 const T& value = values.at(i);
335 memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
338 // store temp buffer back in TagData
339 const char* newTagData = (const char*)originalTagData.Buffer;
340 TagData.assign(newTagData, newTagDataLength);
344 /*! \fn template<typename T> bool EditTag(const std::string& tag, const std::string& type, const T& value)
345 \brief Edits a BAM tag field.
347 If \a tag does not exist, a new entry is created.
349 \param tag[in] 2-character tag name
350 \param type[in] 1-character tag type (must be "Z" or "H")
351 \param value[in] new data value
353 \return \c true if the tag was modified/created successfully
355 \sa BamAlignment::RemoveTag()
356 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
359 inline bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const T& value) {
361 // if char data not populated, do that first
362 if ( SupportData.HasCoreOnly )
365 // remove existing tag if present, then append tag with new value
368 return AddTag(tag, type, value);
371 /*! \fn template<typename T> bool EditTag(const std::string& tag, const std::vector<T>& values)
372 \brief Edits a BAM tag field containing a numeric array.
374 If \a tag does not exist, a new entry is created.
376 \param tag[in] 2-character tag name
377 \param value[in] vector of data values
379 \return \c true if the tag was modified/created successfully
380 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
383 inline bool BamAlignment::EditTag(const std::string& tag, const std::vector<T>& values) {
385 // if char data not populated, do that first
386 if ( SupportData.HasCoreOnly )
389 // remove existing tag if present, then append tag with new values
392 return AddTag(tag, values);
396 /*! \fn template<typename T> bool GetTag(const std::string& tag, T& destination) const
397 \brief Retrieves the value associated with a BAM tag.
399 \param tag[in] 2-character tag name
400 \param destination[out] retrieved value
401 \return \c true if found
404 inline bool BamAlignment::GetTag(const std::string& tag, T& destination) const {
406 // skip if alignment is core-only
407 if ( SupportData.HasCoreOnly ) {
408 // TODO: set error string?
412 // skip if no tags present
413 if ( TagData.empty() ) {
414 // TODO: set error string?
418 // localize the tag data
419 char* pTagData = (char*)TagData.data();
420 const unsigned int tagDataLength = TagData.size();
421 unsigned int numBytesParsed = 0;
423 // return failure if tag not found
424 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
425 // TODO: set error string?
430 const char type = *(pTagData - 1);
431 if ( !TagTypeHelper<T>::CanConvertFrom(type) ) {
432 // TODO: set error string ?
436 // determine data length
437 int destinationLength = 0;
441 case (Constants::BAM_TAG_TYPE_ASCII) :
442 case (Constants::BAM_TAG_TYPE_INT8) :
443 case (Constants::BAM_TAG_TYPE_UINT8) :
444 destinationLength = 1;
448 case (Constants::BAM_TAG_TYPE_INT16) :
449 case (Constants::BAM_TAG_TYPE_UINT16) :
450 destinationLength = 2;
454 case (Constants::BAM_TAG_TYPE_INT32) :
455 case (Constants::BAM_TAG_TYPE_UINT32) :
456 case (Constants::BAM_TAG_TYPE_FLOAT) :
457 destinationLength = 4;
460 // var-length types not supported for numeric destination
461 case (Constants::BAM_TAG_TYPE_STRING) :
462 case (Constants::BAM_TAG_TYPE_HEX) :
463 case (Constants::BAM_TAG_TYPE_ARRAY) :
464 SetErrorString("BamAlignment::GetTag",
465 "cannot store variable length tag data into a numeric destination");
468 // unrecognized tag type
470 const std::string message = std::string("invalid tag type: ") + type;
471 SetErrorString("BamAlignment::GetTag", message);
475 // store data in destination
477 memcpy(&destination, pTagData, destinationLength);
484 inline bool BamAlignment::GetTag<std::string>(const std::string& tag,
485 std::string& destination) const
487 // skip if alignment is core-only
488 if ( SupportData.HasCoreOnly ) {
489 // TODO: set error string?
493 // skip if no tags present
494 if ( TagData.empty() ) {
495 // TODO: set error string?
499 // localize the tag data
500 char* pTagData = (char*)TagData.data();
501 const unsigned int tagDataLength = TagData.size();
502 unsigned int numBytesParsed = 0;
504 // return failure if tag not found
505 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
506 // TODO: set error string?
510 // otherwise copy data into destination
511 const unsigned int dataLength = strlen(pTagData);
513 destination.resize(dataLength);
514 memcpy( (char*)destination.data(), pTagData, dataLength );
520 /*! \fn template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const
521 \brief Retrieves the numeric array associated with a BAM tag.
523 \param tag[in] 2-character tag name
524 \param destination[out] retrieved values
525 \return \c true if found
528 inline bool BamAlignment::GetTag(const std::string& tag, std::vector<T>& destination) const {
530 // skip if alignment is core-only
531 if ( SupportData.HasCoreOnly ) {
532 // TODO: set error string?
536 // skip if no tags present
537 if ( TagData.empty() ) {
538 // TODO: set error string?
542 // localize the tag data
543 char* pTagData = (char*)TagData.data();
544 const unsigned int tagDataLength = TagData.size();
545 unsigned int numBytesParsed = 0;
547 // return false if tag not found
548 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
549 // TODO: set error string?
553 // check that tag is array type
554 const char tagType = *(pTagData - 1);
555 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
556 SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination");
560 // fetch element type
561 const char elementType = *pTagData;
562 if ( !TagTypeHelper<T>::CanConvertFrom(elementType) ) {
563 // TODO: set error string ?
568 // calculate length of each element in tag's array
569 int elementLength = 0;
570 switch ( elementType ) {
571 case (Constants::BAM_TAG_TYPE_ASCII) :
572 case (Constants::BAM_TAG_TYPE_INT8) :
573 case (Constants::BAM_TAG_TYPE_UINT8) :
574 elementLength = sizeof(uint8_t);
577 case (Constants::BAM_TAG_TYPE_INT16) :
578 case (Constants::BAM_TAG_TYPE_UINT16) :
579 elementLength = sizeof(uint16_t);
582 case (Constants::BAM_TAG_TYPE_INT32) :
583 case (Constants::BAM_TAG_TYPE_UINT32) :
584 case (Constants::BAM_TAG_TYPE_FLOAT) :
585 elementLength = sizeof(uint32_t);
588 // var-length types not supported for numeric destination
589 case (Constants::BAM_TAG_TYPE_STRING) :
590 case (Constants::BAM_TAG_TYPE_HEX) :
591 case (Constants::BAM_TAG_TYPE_ARRAY) :
592 SetErrorString("BamAlignment::GetTag",
593 "invalid array data, variable-length elements are not allowed");
598 const std::string message = std::string("invalid array element type: ") + elementType;
599 SetErrorString("BamAlignment::GetTag", message);
603 // get number of elements
605 memcpy(&numElements, pTagData, sizeof(int32_t));
608 destination.reserve(numElements);
612 for ( int i = 0 ; i < numElements; ++i ) {
613 memcpy(&value, pTagData, sizeof(T));
614 pTagData += sizeof(T);
615 destination.push_back(value);
622 typedef std::vector<BamAlignment> BamAlignmentVector;
624 } // namespace BamTools
626 #endif // BAMALIGNMENT_H