1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 #include <api/BamConstants.h>
13 using namespace BamTools;
25 // internal utility methods
29 /*! \fn bool IsValidSize(const string& tag, const string& type)
32 Checks that tag name & type strings are expected sizes.
33 \a tag should have length
34 \a type should have length 1
36 \param tag BAM tag name
37 \param type BAM tag type-code
39 \return \c true if both \a tag and \a type are correct sizes
41 bool IsValidSize(const string& tag, const string& type) {
42 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
43 (type.size() == Constants::BAM_TAG_TYPESIZE);
46 /*! \fn bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed)
49 Moves to next available tag in tag data string
51 \param storageType BAM tag type-code that determines how far to move cursor
52 \param pTagData pointer to current position (cursor) in tag string
53 \param numBytesParsed report of how many bytes were parsed (cumulatively)
55 \return \c if storageType was a recognized BAM tag type
56 \post \a pTagData will point to the byte where the next tag data begins.
57 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
59 bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
61 switch (storageType) {
63 case (Constants::BAM_TAG_TYPE_ASCII) :
64 case (Constants::BAM_TAG_TYPE_INT8) :
65 case (Constants::BAM_TAG_TYPE_UINT8) :
70 case (Constants::BAM_TAG_TYPE_INT16) :
71 case (Constants::BAM_TAG_TYPE_UINT16) :
72 numBytesParsed += sizeof(uint16_t);
73 pTagData += sizeof(uint16_t);
76 case (Constants::BAM_TAG_TYPE_FLOAT) :
77 case (Constants::BAM_TAG_TYPE_INT32) :
78 case (Constants::BAM_TAG_TYPE_UINT32) :
79 numBytesParsed += sizeof(uint32_t);
80 pTagData += sizeof(uint32_t);
83 case (Constants::BAM_TAG_TYPE_STRING) :
84 case (Constants::BAM_TAG_TYPE_HEX) :
89 // increment for null-terminator
94 case (Constants::BAM_TAG_TYPE_ARRAY) :
98 const char arrayType = *pTagData;
102 // read number of elements
104 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
105 numBytesParsed += sizeof(uint32_t);
106 pTagData += sizeof(uint32_t);
108 // calculate number of bytes to skip
111 case (Constants::BAM_TAG_TYPE_INT8) :
112 case (Constants::BAM_TAG_TYPE_UINT8) :
113 bytesToSkip = numElements;
115 case (Constants::BAM_TAG_TYPE_INT16) :
116 case (Constants::BAM_TAG_TYPE_UINT16) :
117 bytesToSkip = numElements*sizeof(uint16_t);
119 case (Constants::BAM_TAG_TYPE_FLOAT) :
120 case (Constants::BAM_TAG_TYPE_INT32) :
121 case (Constants::BAM_TAG_TYPE_UINT32) :
122 bytesToSkip = numElements*sizeof(uint32_t);
125 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
126 << arrayType << endl;
130 // skip binary array contents
131 numBytesParsed += bytesToSkip;
132 pTagData += bytesToSkip;
137 cerr << "BamAlignment ERROR: unknown tag type encountered"
138 << storageType << endl;
146 /*! \fn bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
149 Searches for requested tag in BAM tag data.
151 \param tag requested 2-character tag name
152 \param pTagData pointer to current position in BamAlignment::TagData
153 \param tagDataLength length of BamAlignment::TagData
154 \param numBytesParsed number of bytes parsed so far
156 \return \c true if found
158 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
159 \a numBytesParsed will correspond to the position in the full TagData string.
162 bool FindTag(const std::string& tag,
164 const unsigned int& tagDataLength,
165 unsigned int& numBytesParsed)
168 while ( numBytesParsed < tagDataLength ) {
170 const char* pTagType = pTagData;
171 const char* pTagStorageType = pTagData + 2;
175 // check the current tag, return true on match
176 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
179 // get the storage class and find the next tag
180 if ( *pTagStorageType == '\0' ) return false;
181 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
182 if ( *pTagData == '\0' ) return false;
185 // checked all tags, none match
189 } // namespace Internal
190 } // namespace BamTools
192 /*! \class BamTools::BamAlignment
193 \brief The main BAM alignment data structure.
195 Provides methods to query/modify BAM alignment data fields.
197 /*! \var BamAlignment::Name
200 /*! \var BamAlignment::Length
201 \brief length of query sequence
203 /*! \var BamAlignment::QueryBases
204 \brief 'original' sequence (as reported from sequencing machine)
206 /*! \var BamAlignment::AlignedBases
207 \brief 'aligned' sequence (includes any indels, padding, clipping)
209 /*! \var BamAlignment::Qualities
210 \brief FASTQ qualities (ASCII characters, not numeric values)
212 /*! \var BamAlignment::TagData
213 \brief tag data (use the provided methods to query/modify)
215 /*! \var BamAlignment::RefID
216 \brief ID number for reference sequence
218 /*! \var BamAlignment::Position
219 \brief position (0-based) where alignment starts
221 /*! \var BamAlignment::Bin
222 \brief BAM (standard) index bin number for this alignment
224 /*! \var BamAlignment::MapQuality
225 \brief mapping quality score
227 /*! \var BamAlignment::AlignmentFlag
228 \brief alignment bit-flag (use the provided methods to query/modify)
230 /*! \var BamAlignment::CigarData
231 \brief CIGAR operations for this alignment
233 /*! \var BamAlignment::MateRefID
234 \brief ID number for reference sequence where alignment's mate was aligned
236 /*! \var BamAlignment::MatePosition
237 \brief position (0-based) where alignment's mate starts
239 /*! \var BamAlignment::InsertSize
240 \brief mate-pair insert size
242 /*! \var BamAlignment::Filename
243 \brief name of BAM file which this alignment comes from
246 /*! \fn BamAlignment::BamAlignment(void)
249 BamAlignment::BamAlignment(void)
257 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
258 \brief copy constructor
260 BamAlignment::BamAlignment(const BamAlignment& other)
262 , Length(other.Length)
263 , QueryBases(other.QueryBases)
264 , AlignedBases(other.AlignedBases)
265 , Qualities(other.Qualities)
266 , TagData(other.TagData)
268 , Position(other.Position)
270 , MapQuality(other.MapQuality)
271 , AlignmentFlag(other.AlignmentFlag)
272 , CigarData(other.CigarData)
273 , MateRefID(other.MateRefID)
274 , MatePosition(other.MatePosition)
275 , InsertSize(other.InsertSize)
276 , Filename(other.Filename)
277 , SupportData(other.SupportData)
280 /*! \fn BamAlignment::~BamAlignment(void)
283 BamAlignment::~BamAlignment(void) { }
285 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
286 \brief Adds a field with string data to the BAM tags.
288 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
290 \param tag 2-character tag name
291 \param type 1-character tag type (must be "Z" or "H")
292 \param value string data to store
294 \return \c true if the \b new tag was added successfully
295 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
297 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
299 // skip if core data not parsed
300 if ( SupportData.HasCoreOnly ) return false;
302 // validate tag/type size & that type is OK for string value
303 if ( !Internal::IsValidSize(tag, type) ) return false;
304 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
305 type.at(0) != Constants::BAM_TAG_TYPE_HEX
311 // localize the tag data
312 char* pTagData = (char*)TagData.data();
313 const unsigned int tagDataLength = TagData.size();
314 unsigned int numBytesParsed = 0;
316 // if tag already exists, return false
317 // use EditTag explicitly instead
318 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
321 // otherwise, copy tag data to temp buffer
322 string newTag = tag + type + value;
323 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
324 char originalTagData[newTagDataLength];
325 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
328 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
330 // store temp buffer back in TagData
331 const char* newTagData = (const char*)originalTagData;
332 TagData.assign(newTagData, newTagDataLength);
338 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
339 \brief Adds a field with unsigned integer data to the BAM tags.
341 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
343 \param tag 2-character tag name
344 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
345 \param value unsigned int data to store
347 \return \c true if the \b new tag was added successfully
348 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
350 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
352 // skip if core data not parsed
353 if ( SupportData.HasCoreOnly ) return false;
355 // validate tag/type size & that type is OK for uint32_t value
356 if ( !Internal::IsValidSize(tag, type) ) return false;
357 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
358 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
359 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
360 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
366 // localize the tag data
367 char* pTagData = (char*)TagData.data();
368 const unsigned int tagDataLength = TagData.size();
369 unsigned int numBytesParsed = 0;
371 // if tag already exists, return false
372 // use EditTag explicitly instead
373 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
376 // otherwise, convert value to string
377 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
380 // copy original tag data to temp buffer
381 string newTag = tag + type;
382 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
383 char originalTagData[newTagDataLength];
384 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
387 strcat(originalTagData + tagDataLength, newTag.data());
388 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
390 // store temp buffer back in TagData
391 const char* newTagData = (const char*)originalTagData;
392 TagData.assign(newTagData, newTagDataLength);
398 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
399 \brief Adds a field with signed integer data to the BAM tags.
401 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
403 \param tag 2-character tag name
404 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
405 \param value signed int data to store
407 \return \c true if the \b new tag was added successfully
408 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
410 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
411 return AddTag(tag, type, (const uint32_t&)value);
414 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
415 \brief Adds a field with floating-point data to the BAM tags.
417 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
419 \param tag 2-character tag name
420 \param type 1-character tag type (must NOT be "Z", "H", or "B")
421 \param value float data to store
423 \return \c true if the \b new tag was added successfully
424 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
426 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
428 // skip if core data not parsed
429 if ( SupportData.HasCoreOnly ) return false;
431 // validate tag/type size & that type is OK for float value
432 if ( !Internal::IsValidSize(tag, type) ) return false;
433 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
434 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
435 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
441 // localize the tag data
442 char* pTagData = (char*)TagData.data();
443 const unsigned int tagDataLength = TagData.size();
444 unsigned int numBytesParsed = 0;
446 // if tag already exists, return false
447 // use EditTag explicitly instead
448 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
451 // otherwise, convert value to string
452 union { float value; char valueBuffer[sizeof(float)]; } un;
455 // copy original tag data to temp buffer
456 string newTag = tag + type;
457 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
458 char originalTagData[newTagDataLength];
459 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
462 strcat(originalTagData + tagDataLength, newTag.data());
463 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
465 // store temp buffer back in TagData
466 const char* newTagData = (const char*)originalTagData;
467 TagData.assign(newTagData, newTagDataLength);
473 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
474 \brief Adds a numeric array field to the BAM tags.
476 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
478 \param tag 2-character tag name
479 \param values vector of uint8_t values to store
481 \return \c true if the \b new tag was added successfully
482 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
484 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
486 // skip if core data not parsed
487 if ( SupportData.HasCoreOnly ) return false;
489 // check for valid tag length
490 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
492 // localize the tag data
493 char* pTagData = (char*)TagData.data();
494 const unsigned int tagDataLength = TagData.size();
495 unsigned int numBytesParsed = 0;
497 // if tag already exists, return false
498 // use EditTag explicitly instead
499 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
502 // build new tag's base information
503 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
504 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
505 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
506 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
508 // add number of array elements to newTagBase
509 const int32_t numElements = values.size();
510 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
512 // copy current TagData string to temp buffer, leaving room for new tag's contents
513 const int newTagDataLength = tagDataLength +
514 Constants::BAM_TAG_ARRAYBASE_SIZE +
515 numElements*sizeof(uint8_t);
516 char originalTagData[newTagDataLength];
517 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
519 // write newTagBase (removes old null term)
520 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
522 // add vector elements to tag
523 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
524 for ( int i = 0 ; i < numElements; ++i ) {
525 const uint8_t value = values.at(i);
526 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
527 &value, sizeof(uint8_t));
530 // store temp buffer back in TagData
531 const char* newTagData = (const char*)originalTagData;
532 TagData.assign(newTagData, newTagDataLength);
538 /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
539 \brief Adds a numeric array field to the BAM tags.
541 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
543 \param tag 2-character tag name
544 \param values vector of int8_t values to store
546 \return \c true if the \b new tag was added successfully
547 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
549 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
551 // skip if core data not parsed
552 if ( SupportData.HasCoreOnly ) return false;
554 // check for valid tag length
555 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
557 // localize the tag data
558 char* pTagData = (char*)TagData.data();
559 const unsigned int tagDataLength = TagData.size();
560 unsigned int numBytesParsed = 0;
562 // if tag already exists, return false
563 // use EditTag explicitly instead
564 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
567 // build new tag's base information
568 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
569 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
570 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
571 newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
573 // add number of array elements to newTagBase
574 const int32_t numElements = values.size();
575 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
577 // copy current TagData string to temp buffer, leaving room for new tag's contents
578 const int newTagDataLength = tagDataLength +
579 Constants::BAM_TAG_ARRAYBASE_SIZE +
580 numElements*sizeof(int8_t);
581 char originalTagData[newTagDataLength];
582 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
584 // write newTagBase (removes old null term)
585 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
587 // add vector elements to tag
588 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
589 for ( int i = 0 ; i < numElements; ++i ) {
590 const int8_t value = values.at(i);
591 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
592 &value, sizeof(int8_t));
595 // store temp buffer back in TagData
596 const char* newTagData = (const char*)originalTagData;
597 TagData.assign(newTagData, newTagDataLength);
603 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
604 \brief Adds a numeric array field to the BAM tags.
606 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
608 \param tag 2-character tag name
609 \param values vector of uint16_t values to store
611 \return \c true if the \b new tag was added successfully
612 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
614 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
616 // skip if core data not parsed
617 if ( SupportData.HasCoreOnly ) return false;
619 // check for valid tag length
620 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
622 // localize the tag data
623 char* pTagData = (char*)TagData.data();
624 const unsigned int tagDataLength = TagData.size();
625 unsigned int numBytesParsed = 0;
627 // if tag already exists, return false
628 // use EditTag explicitly instead
629 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
632 // build new tag's base information
633 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
634 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
635 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
636 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
638 // add number of array elements to newTagBase
639 const int32_t numElements = values.size();
640 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
642 // copy current TagData string to temp buffer, leaving room for new tag's contents
643 const int newTagDataLength = tagDataLength +
644 Constants::BAM_TAG_ARRAYBASE_SIZE +
645 numElements*sizeof(uint16_t);
646 char originalTagData[newTagDataLength];
647 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
649 // write newTagBase (removes old null term)
650 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
652 // add vector elements to tag
653 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
654 for ( int i = 0 ; i < numElements; ++i ) {
655 const uint16_t value = values.at(i);
656 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
657 &value, sizeof(uint16_t));
660 // store temp buffer back in TagData
661 const char* newTagData = (const char*)originalTagData;
662 TagData.assign(newTagData, newTagDataLength);
668 /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
669 \brief Adds a numeric array field to the BAM tags.
671 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
673 \param tag 2-character tag name
674 \param values vector of int16_t values to store
676 \return \c true if the \b new tag was added successfully
677 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
679 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
681 // skip if core data not parsed
682 if ( SupportData.HasCoreOnly ) return false;
684 // check for valid tag length
685 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
687 // localize the tag data
688 char* pTagData = (char*)TagData.data();
689 const unsigned int tagDataLength = TagData.size();
690 unsigned int numBytesParsed = 0;
692 // if tag already exists, return false
693 // use EditTag explicitly instead
694 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
697 // build new tag's base information
698 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
699 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
700 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
701 newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
703 // add number of array elements to newTagBase
704 const int32_t numElements = values.size();
705 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
707 // copy current TagData string to temp buffer, leaving room for new tag's contents
708 const int newTagDataLength = tagDataLength +
709 Constants::BAM_TAG_ARRAYBASE_SIZE +
710 numElements*sizeof(int16_t);
711 char originalTagData[newTagDataLength];
712 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
714 // write newTagBase (removes old null term)
715 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
717 // add vector elements to tag
718 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
719 for ( int i = 0 ; i < numElements; ++i ) {
720 const int16_t value = values.at(i);
721 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
722 &value, sizeof(int16_t));
725 // store temp buffer back in TagData
726 const char* newTagData = (const char*)originalTagData;
727 TagData.assign(newTagData, newTagDataLength);
733 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
734 \brief Adds a numeric array field to the BAM tags.
736 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
738 \param tag 2-character tag name
739 \param values vector of uint32_t values to store
741 \return \c true if the \b new tag was added successfully
742 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
744 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
746 // skip if core data not parsed
747 if ( SupportData.HasCoreOnly ) return false;
749 // check for valid tag length
750 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
752 // localize the tag data
753 char* pTagData = (char*)TagData.data();
754 const unsigned int tagDataLength = TagData.size();
755 unsigned int numBytesParsed = 0;
757 // if tag already exists, return false
758 // use EditTag explicitly instead
759 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
762 // build new tag's base information
763 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
764 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
765 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
766 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
768 // add number of array elements to newTagBase
769 const int32_t numElements = values.size();
770 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
772 // copy current TagData string to temp buffer, leaving room for new tag's contents
773 const int newTagDataLength = tagDataLength +
774 Constants::BAM_TAG_ARRAYBASE_SIZE +
775 numElements*sizeof(uint32_t);
776 char originalTagData[newTagDataLength];
777 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
779 // write newTagBase (removes old null term)
780 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
782 // add vector elements to tag
783 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
784 for ( int i = 0 ; i < numElements; ++i ) {
785 const uint32_t value = values.at(i);
786 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
787 &value, sizeof(uint32_t));
790 // store temp buffer back in TagData
791 const char* newTagData = (const char*)originalTagData;
792 TagData.assign(newTagData, newTagDataLength);
798 /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
799 \brief Adds a numeric array field to the BAM tags.
801 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
803 \param tag 2-character tag name
804 \param values vector of int32_t values to store
806 \return \c true if the \b new tag was added successfully
807 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
809 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
811 // skip if core data not parsed
812 if ( SupportData.HasCoreOnly ) return false;
814 // check for valid tag length
815 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
817 // localize the tag data
818 char* pTagData = (char*)TagData.data();
819 const unsigned int tagDataLength = TagData.size();
820 unsigned int numBytesParsed = 0;
822 // if tag already exists, return false
823 // use EditTag explicitly instead
824 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
827 // build new tag's base information
828 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
829 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
830 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
831 newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
833 // add number of array elements to newTagBase
834 const int32_t numElements = values.size();
835 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
837 // copy current TagData string to temp buffer, leaving room for new tag's contents
838 const int newTagDataLength = tagDataLength +
839 Constants::BAM_TAG_ARRAYBASE_SIZE +
840 numElements*sizeof(int32_t);
841 char originalTagData[newTagDataLength];
842 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
844 // write newTagBase (removes old null term)
845 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
847 // add vector elements to tag
848 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
849 for ( int i = 0 ; i < numElements; ++i ) {
850 const int32_t value = values.at(i);
851 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
852 &value, sizeof(int32_t));
855 // store temp buffer back in TagData
856 const char* newTagData = (const char*)originalTagData;
857 TagData.assign(newTagData, newTagDataLength);
863 /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
864 \brief Adds a numeric array field to the BAM tags.
866 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
868 \param tag 2-character tag name
869 \param values vector of float values to store
871 \return \c true if the \b new tag was added successfully
872 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
874 bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
876 // skip if core data not parsed
877 if ( SupportData.HasCoreOnly ) return false;
879 // check for valid tag length
880 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
882 // localize the tag data
883 char* pTagData = (char*)TagData.data();
884 const unsigned int tagDataLength = TagData.size();
885 unsigned int numBytesParsed = 0;
887 // if tag already exists, return false
888 // use EditTag explicitly instead
889 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
892 // build new tag's base information
893 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
894 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
895 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
896 newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
898 // add number of array elements to newTagBase
899 const int32_t numElements = values.size();
900 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
902 // copy current TagData string to temp buffer, leaving room for new tag's contents
903 const int newTagDataLength = tagDataLength +
904 Constants::BAM_TAG_ARRAYBASE_SIZE +
905 numElements*sizeof(float);
906 char originalTagData[newTagDataLength];
907 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
909 // write newTagBase (removes old null term)
910 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
912 // add vector elements to tag
913 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
914 for ( int i = 0 ; i < numElements; ++i ) {
915 const float value = values.at(i);
916 memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
917 &value, sizeof(float));
920 // store temp buffer back in TagData
921 const char* newTagData = (const char*)originalTagData;
922 TagData.assign(newTagData, newTagDataLength);
928 /*! \fn bool BamAlignment::BuildCharData(void)
929 \brief Populates alignment string fields (read name, bases, qualities, tag data).
931 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
932 Using that method makes parsing much quicker when only positional data is required.
934 However, if you later want to access the character data fields from such an alignment,
935 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
938 \return \c true if character data populated successfully (or was already available to begin with)
940 bool BamAlignment::BuildCharData(void) {
942 // skip if char data already parsed
943 if ( !SupportData.HasCoreOnly )
946 // check system endianness
947 bool IsBigEndian = BamTools::SystemIsBigEndian();
949 // calculate character lengths/offsets
950 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
951 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
952 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
953 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
954 const unsigned int tagDataLength = dataLength - tagDataOffset;
956 // check offsets to see what char data exists
957 const bool hasSeqData = ( seqDataOffset < dataLength );
958 const bool hasQualData = ( qualDataOffset < dataLength );
959 const bool hasTagData = ( tagDataOffset < dataLength );
961 // set up char buffers
962 const char* allCharData = SupportData.AllCharData.data();
963 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
964 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
965 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
967 // store alignment name (relies on null char in name as terminator)
968 Name.assign((const char*)(allCharData));
970 // save query sequence
973 QueryBases.reserve(SupportData.QuerySequenceLength);
974 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
975 char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
976 QueryBases.append(1, singleBase);
980 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
983 Qualities.reserve(SupportData.QuerySequenceLength);
984 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
985 char singleQuality = (char)(qualData[i]+33);
986 Qualities.append(1, singleQuality);
990 // clear previous AlignedBases
991 AlignedBases.clear();
993 // if QueryBases has data, build AlignedBases using CIGAR data
994 // otherwise, AlignedBases will remain empty (this case IS allowed)
995 if ( !QueryBases.empty() ) {
997 // resize AlignedBases
998 AlignedBases.reserve(SupportData.QuerySequenceLength);
1000 // iterate over CigarOps
1002 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1003 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1004 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
1005 const CigarOp& op = (*cigarIter);
1009 // for 'M', 'I', '=', 'X' - write bases
1010 case (Constants::BAM_CIGAR_MATCH_CHAR) :
1011 case (Constants::BAM_CIGAR_INS_CHAR) :
1012 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
1013 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
1014 AlignedBases.append(QueryBases.substr(k, op.Length));
1017 // for 'S' - soft clip, do not write bases
1018 // but increment placeholder 'k'
1019 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
1023 // for 'D' - write gap character
1024 case (Constants::BAM_CIGAR_DEL_CHAR) :
1025 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
1028 // for 'P' - write padding character
1029 case (Constants::BAM_CIGAR_PAD_CHAR) :
1030 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
1033 // for 'N' - write N's, skip bases in original query sequence
1034 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
1035 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
1038 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
1039 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
1042 // shouldn't get here
1044 cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
1054 if ( IsBigEndian ) {
1056 while ( (unsigned int)i < tagDataLength ) {
1058 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
1059 const char type = tagData[i]; // get tag type at position i
1060 ++i; // move i past tag type
1064 case(Constants::BAM_TAG_TYPE_ASCII) :
1065 case(Constants::BAM_TAG_TYPE_INT8) :
1066 case(Constants::BAM_TAG_TYPE_UINT8) :
1067 // no endian swapping necessary for single-byte data
1071 case(Constants::BAM_TAG_TYPE_INT16) :
1072 case(Constants::BAM_TAG_TYPE_UINT16) :
1073 BamTools::SwapEndian_16p(&tagData[i]);
1074 i += sizeof(uint16_t);
1077 case(Constants::BAM_TAG_TYPE_FLOAT) :
1078 case(Constants::BAM_TAG_TYPE_INT32) :
1079 case(Constants::BAM_TAG_TYPE_UINT32) :
1080 BamTools::SwapEndian_32p(&tagData[i]);
1081 i += sizeof(uint32_t);
1084 case(Constants::BAM_TAG_TYPE_HEX) :
1085 case(Constants::BAM_TAG_TYPE_STRING) :
1086 // no endian swapping necessary for hex-string/string data
1087 while ( tagData[i] )
1089 // increment one more for null terminator
1093 case(Constants::BAM_TAG_TYPE_ARRAY) :
1097 const char arrayType = tagData[i];
1100 // swap endian-ness of number of elements in place, then retrieve for loop
1101 BamTools::SwapEndian_32p(&tagData[i]);
1102 int32_t numElements;
1103 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
1104 i += sizeof(uint32_t);
1106 // swap endian-ness of array elements
1107 for ( int j = 0; j < numElements; ++j ) {
1108 switch (arrayType) {
1109 case (Constants::BAM_TAG_TYPE_INT8) :
1110 case (Constants::BAM_TAG_TYPE_UINT8) :
1111 // no endian-swapping necessary
1114 case (Constants::BAM_TAG_TYPE_INT16) :
1115 case (Constants::BAM_TAG_TYPE_UINT16) :
1116 BamTools::SwapEndian_16p(&tagData[i]);
1117 i += sizeof(uint16_t);
1119 case (Constants::BAM_TAG_TYPE_FLOAT) :
1120 case (Constants::BAM_TAG_TYPE_INT32) :
1121 case (Constants::BAM_TAG_TYPE_UINT32) :
1122 BamTools::SwapEndian_32p(&tagData[i]);
1123 i += sizeof(uint32_t);
1127 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
1128 << arrayType << endl;
1136 // shouldn't get here
1138 cerr << "BamAlignment ERROR: invalid tag value type: "
1145 // store tagData in alignment
1146 TagData.resize(tagDataLength);
1147 memcpy((char*)TagData.data(), tagData, tagDataLength);
1150 // clear the core-only flag
1151 SupportData.HasCoreOnly = false;
1157 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
1158 \brief Edits a BAM tag field containing string data.
1160 If \a tag does not exist, a new entry is created.
1162 \param tag 2-character tag name
1163 \param type 1-character tag type (must be "Z" or "H")
1164 \param value string data to store
1166 \return \c true if the tag was modified/created successfully
1168 \sa BamAlignment::RemoveTag()
1169 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1171 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1173 // skip if core data not parsed
1174 if ( SupportData.HasCoreOnly ) return false;
1176 // validate tag/type size & that type is OK for string value
1177 if ( !Internal::IsValidSize(tag, type) ) return false;
1178 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1179 type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1182 // localize the tag data
1183 char* pOriginalTagData = (char*)TagData.data();
1184 char* pTagData = pOriginalTagData;
1185 const unsigned int originalTagDataLength = TagData.size();
1187 unsigned int newTagDataLength = 0;
1188 unsigned int numBytesParsed = 0;
1191 if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1193 // make sure array is more than big enough
1194 char newTagData[originalTagDataLength + value.size()];
1196 // copy original tag data up til desired tag
1197 const unsigned int beginningTagDataLength = numBytesParsed;
1198 newTagDataLength += beginningTagDataLength;
1199 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1201 // copy new @value in place of current tag data
1202 const unsigned int dataLength = strlen(value.c_str());
1203 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1205 // skip to next tag (if tag for removal is last, return true)
1206 const char* pTagStorageType = pTagData - 1;
1207 if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1210 // copy everything from current tag (the next one after tag for removal) to end
1211 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1212 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1213 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1214 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1216 // ensure null-terminator
1217 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1219 // save new tag data
1220 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1224 // tag not found, attempt AddTag
1225 else return AddTag(tag, type, value);
1228 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1229 \brief Edits a BAM tag field containing unsigned integer data.
1231 If \a tag does not exist, a new entry is created.
1233 \param tag 2-character tag name
1234 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1235 \param value unsigned integer data to store
1237 \return \c true if the tag was modified/created successfully
1239 \sa BamAlignment::RemoveTag()
1240 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1242 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1244 // skip if core data not parsed
1245 if ( SupportData.HasCoreOnly ) return false;
1247 // validate tag/type size & that type is OK for uint32_t value
1248 if ( !Internal::IsValidSize(tag, type) ) return false;
1249 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1250 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1251 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1252 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1258 // localize the tag data
1259 char* pOriginalTagData = (char*)TagData.data();
1260 char* pTagData = pOriginalTagData;
1261 const unsigned int originalTagDataLength = TagData.size();
1263 unsigned int newTagDataLength = 0;
1264 unsigned int numBytesParsed = 0;
1267 if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1269 // make sure array is more than big enough
1270 char newTagData[originalTagDataLength + sizeof(value)];
1272 // copy original tag data up til desired tag
1273 const unsigned int beginningTagDataLength = numBytesParsed;
1274 newTagDataLength += beginningTagDataLength;
1275 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1277 // copy new @value in place of current tag data
1278 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1280 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1282 // skip to next tag (if tag for removal is last, return true)
1283 const char* pTagStorageType = pTagData - 1;
1284 if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1287 // copy everything from current tag (the next one after tag for removal) to end
1288 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1289 const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1290 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1291 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1293 // ensure null-terminator
1294 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1296 // save new tag data
1297 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1301 // tag not found, attempt AddTag
1302 else return AddTag(tag, type, value);
1305 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1306 \brief Edits a BAM tag field containing signed integer data.
1308 If \a tag does not exist, a new entry is created.
1310 \param tag 2-character tag name
1311 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1312 \param value signed integer data to store
1314 \return \c true if the tag was modified/created successfully
1316 \sa BamAlignment::RemoveTag()
1317 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1319 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1320 return EditTag(tag, type, (const uint32_t&)value);
1323 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1324 \brief Edits a BAM tag field containing floating-point data.
1326 If \a tag does not exist, a new entry is created.
1328 \param tag 2-character tag name
1329 \param type 1-character tag type (must NOT be "Z", "H", or "B")
1330 \param value float data to store
1332 \return \c true if the tag was modified/created successfully
1334 \sa BamAlignment::RemoveTag()
1335 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1337 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1339 // skip if core data not parsed
1340 if ( SupportData.HasCoreOnly ) return false;
1342 // validate tag/type size & that type is OK for float value
1343 if ( !Internal::IsValidSize(tag, type) ) return false;
1344 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1345 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1346 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1352 // localize the tag data
1353 char* pOriginalTagData = (char*)TagData.data();
1354 char* pTagData = pOriginalTagData;
1355 const unsigned int originalTagDataLength = TagData.size();
1357 unsigned int newTagDataLength = 0;
1358 unsigned int numBytesParsed = 0;
1361 if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1363 // make sure array is more than big enough
1364 char newTagData[originalTagDataLength + sizeof(value)];
1366 // copy original tag data up til desired tag
1367 const unsigned int beginningTagDataLength = numBytesParsed;
1368 newTagDataLength += beginningTagDataLength;
1369 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1371 // copy new @value in place of current tag data
1372 union { float value; char valueBuffer[sizeof(float)]; } un;
1374 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1376 // skip to next tag (if tag for removal is last, return true)
1377 const char* pTagStorageType = pTagData - 1;
1378 if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1381 // copy everything from current tag (the next one after tag for removal) to end
1382 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1383 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1384 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1385 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1387 // ensure null-terminator
1388 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1390 // save new tag data
1391 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1395 // tag not found, attempt AddTag
1396 else return AddTag(tag, type, value);
1399 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1400 \brief Edits a BAM tag field containing a numeric array.
1402 If \a tag does not exist, a new entry is created.
1404 \param tag 2-character tag name
1405 \param value vector of uint8_t values to store
1407 \return \c true if the tag was modified/created successfully
1408 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1410 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1412 // can't do anything if TagData not parsed
1413 if ( SupportData.HasCoreOnly )
1416 // remove existing tag if present
1420 // add tag record with new values
1421 return AddTag(tag, values);
1424 /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1425 \brief Edits a BAM tag field containing a numeric array.
1427 If \a tag does not exist, a new entry is created.
1429 \param tag 2-character tag name
1430 \param value vector of int8_t values to store
1432 \return \c true if the tag was modified/created successfully
1433 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1435 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1437 // can't do anything if TagData not parsed
1438 if ( SupportData.HasCoreOnly )
1441 // remove existing tag if present
1445 // add tag record with new values
1446 return AddTag(tag, values);
1449 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1450 \brief Edits a BAM tag field containing a numeric array.
1452 If \a tag does not exist, a new entry is created.
1454 \param tag 2-character tag name
1455 \param value vector of uint16_t values to store
1457 \return \c true if the tag was modified/created successfully
1458 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1460 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1462 // can't do anything if TagData not parsed
1463 if ( SupportData.HasCoreOnly )
1466 // remove existing tag if present
1470 // add tag record with new values
1471 return AddTag(tag, values);
1474 /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1475 \brief Edits a BAM tag field containing a numeric array.
1477 If \a tag does not exist, a new entry is created.
1479 \param tag 2-character tag name
1480 \param value vector of int16_t values to store
1482 \return \c true if the tag was modified/created successfully
1483 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1485 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1487 // can't do anything if TagData not parsed
1488 if ( SupportData.HasCoreOnly )
1491 // remove existing tag if present
1495 // add tag record with new values
1496 return AddTag(tag, values);
1499 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1500 \brief Edits a BAM tag field containing a numeric array.
1502 If \a tag does not exist, a new entry is created.
1504 \param tag 2-character tag name
1505 \param value vector of uint32_t values to store
1507 \return \c true if the tag was modified/created successfully
1508 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1510 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1512 // can't do anything if TagData not parsed
1513 if ( SupportData.HasCoreOnly )
1516 // remove existing tag if present
1520 // add tag record with new values
1521 return AddTag(tag, values);
1524 /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1525 \brief Edits a BAM tag field containing a numeric array.
1527 If \a tag does not exist, a new entry is created.
1529 \param tag 2-character tag name
1530 \param value vector of int32_t values to store
1532 \return \c true if the tag was modified/created successfully
1533 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1535 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1537 // can't do anything if TagData not parsed
1538 if ( SupportData.HasCoreOnly )
1541 // remove existing tag if present
1545 // add tag record with new values
1546 return AddTag(tag, values);
1549 /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1550 \brief Edits a BAM tag field containing a numeric array.
1552 If \a tag does not exist, a new entry is created.
1554 \param tag 2-character tag name
1555 \param value vector of float values to store
1557 \return \c true if the tag was modified/created successfully
1558 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1560 bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1562 // can't do anything if TagData not parsed
1563 if ( SupportData.HasCoreOnly )
1566 // remove existing tag if present
1570 // add tag record with new values
1571 return AddTag(tag, values);
1574 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1575 \brief Retrieves value of edit distance tag ("NM").
1577 \deprecated Instead use BamAlignment::GetTag()
1579 BamAlignment::GetTag("NM", editDistance);
1582 \param editDistance destination for retrieved value
1584 \return \c true if found
1586 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1587 return GetTag("NM", (uint32_t&)editDistance);
1590 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1591 \brief Calculates alignment end position, based on starting position and CIGAR data.
1593 \param usePadded Inserted bases affect reported position. Default is false, so that reported
1594 position stays 'sync-ed' with reference coordinates.
1595 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1596 when using BAM data with half-open formats (e.g. BED).
1598 \return alignment end position
1600 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1602 // initialize alignment end to starting position
1603 int alignEnd = Position;
1605 // iterate over cigar operations
1606 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1607 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1608 for ( ; cigarIter != cigarEnd; ++cigarIter) {
1609 const char cigarType = (*cigarIter).Type;
1610 const uint32_t& cigarLength = (*cigarIter).Length;
1612 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1613 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1614 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1615 alignEnd += cigarLength;
1616 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1617 alignEnd += cigarLength;
1620 // adjust for zero-based coordinates, if requested
1621 if ( zeroBased ) alignEnd -= 1;
1627 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1628 \brief Retrieves value of read group tag ("RG").
1630 \deprecated Instead use BamAlignment::GetTag()
1632 BamAlignment::GetTag("RG", readGroup);
1635 \param readGroup destination for retrieved value
1637 \return \c true if found
1639 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1640 return GetTag("RG", readGroup);
1643 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1644 \brief Retrieves the string value associated with a BAM tag.
1646 \param tag 2-character tag name
1647 \param destination destination for retrieved value
1649 \return \c true if found
1651 bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1653 // make sure tag data exists
1654 if ( SupportData.HasCoreOnly || TagData.empty() )
1657 // localize the tag data
1658 char* pTagData = (char*)TagData.data();
1659 const unsigned int tagDataLength = TagData.size();
1660 unsigned int numBytesParsed = 0;
1663 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1664 const unsigned int dataLength = strlen(pTagData);
1665 destination.clear();
1666 destination.resize(dataLength);
1667 memcpy( (char*)destination.data(), pTagData, dataLength );
1671 // tag not found, return failure
1675 /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1676 \brief Retrieves the unsigned integer value associated with a BAM tag.
1678 \param tag 2-character tag name
1679 \param destination destination for retrieved value
1681 \return \c true if found
1683 bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1685 // make sure tag data exists
1686 if ( SupportData.HasCoreOnly || TagData.empty() )
1689 // localize the tag data
1690 char* pTagData = (char*)TagData.data();
1691 const unsigned int tagDataLength = TagData.size();
1692 unsigned int numBytesParsed = 0;
1695 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1697 // determine data byte-length
1698 const char type = *(pTagData - 1);
1699 int destinationLength = 0;
1703 case (Constants::BAM_TAG_TYPE_ASCII) :
1704 case (Constants::BAM_TAG_TYPE_INT8) :
1705 case (Constants::BAM_TAG_TYPE_UINT8) :
1706 destinationLength = 1;
1710 case (Constants::BAM_TAG_TYPE_INT16) :
1711 case (Constants::BAM_TAG_TYPE_UINT16) :
1712 destinationLength = 2;
1716 case (Constants::BAM_TAG_TYPE_INT32) :
1717 case (Constants::BAM_TAG_TYPE_UINT32) :
1718 destinationLength = 4;
1721 // unsupported type for integer destination (float or var-length strings)
1722 case (Constants::BAM_TAG_TYPE_FLOAT) :
1723 case (Constants::BAM_TAG_TYPE_STRING) :
1724 case (Constants::BAM_TAG_TYPE_HEX) :
1725 case (Constants::BAM_TAG_TYPE_ARRAY) :
1726 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1727 << " in integer destination" << endl;
1732 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1737 // store in destination
1739 memcpy(&destination, pTagData, destinationLength);
1743 // tag not found, return failure
1747 /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1748 \brief Retrieves the signed integer value associated with a BAM tag.
1750 \param tag 2-character tag name
1751 \param destination destination for retrieved value
1753 \return \c true if found
1755 bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1756 return GetTag(tag, (uint32_t&)destination);
1759 /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1760 \brief Retrieves the floating-point value associated with a BAM tag.
1762 \param tag 2-character tag name
1763 \param destination destination for retrieved value
1765 \return \c true if found
1767 bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1769 // make sure tag data exists
1770 if ( SupportData.HasCoreOnly || TagData.empty() )
1773 // localize the tag data
1774 char* pTagData = (char*)TagData.data();
1775 const unsigned int tagDataLength = TagData.size();
1776 unsigned int numBytesParsed = 0;
1779 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1781 // determine data byte-length
1782 const char type = *(pTagData - 1);
1783 int destinationLength = 0;
1787 case (Constants::BAM_TAG_TYPE_ASCII) :
1788 case (Constants::BAM_TAG_TYPE_INT8) :
1789 case (Constants::BAM_TAG_TYPE_UINT8) :
1790 destinationLength = 1;
1794 case (Constants::BAM_TAG_TYPE_INT16) :
1795 case (Constants::BAM_TAG_TYPE_UINT16) :
1796 destinationLength = 2;
1800 case (Constants::BAM_TAG_TYPE_FLOAT) :
1801 case (Constants::BAM_TAG_TYPE_INT32) :
1802 case (Constants::BAM_TAG_TYPE_UINT32) :
1803 destinationLength = 4;
1806 // unsupported type (var-length strings)
1807 case (Constants::BAM_TAG_TYPE_STRING) :
1808 case (Constants::BAM_TAG_TYPE_HEX) :
1809 case (Constants::BAM_TAG_TYPE_ARRAY) :
1810 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1811 << " in float destination" << endl;
1816 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1821 // store in destination
1823 memcpy(&destination, pTagData, destinationLength);
1827 // tag not found, return failure
1831 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1832 \brief Retrieves the numeric array data associated with a BAM tag
1834 \param tag 2-character tag name
1835 \param destination destination for retrieved data
1837 \return \c true if found
1839 bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1841 // make sure tag data exists
1842 if ( SupportData.HasCoreOnly || TagData.empty() )
1845 // localize the tag data
1846 char* pTagData = (char*)TagData.data();
1847 const unsigned int tagDataLength = TagData.size();
1848 unsigned int numBytesParsed = 0;
1850 // return false if tag not found
1851 if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1854 // check that tag is array type
1855 const char tagType = *(pTagData - 1);
1856 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1857 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1858 << tag << " in array destination" << endl;
1862 // calculate length of each element in tag's array
1863 const char elementType = *pTagData;
1865 int elementLength = 0;
1866 switch ( elementType ) {
1867 case (Constants::BAM_TAG_TYPE_ASCII) :
1868 case (Constants::BAM_TAG_TYPE_INT8) :
1869 case (Constants::BAM_TAG_TYPE_UINT8) :
1870 elementLength = sizeof(uint8_t);
1873 case (Constants::BAM_TAG_TYPE_INT16) :
1874 case (Constants::BAM_TAG_TYPE_UINT16) :
1875 elementLength = sizeof(uint16_t);
1878 case (Constants::BAM_TAG_TYPE_INT32) :
1879 case (Constants::BAM_TAG_TYPE_UINT32) :
1880 elementLength = sizeof(uint32_t);
1883 // unsupported type for integer destination (float or var-length data)
1884 case (Constants::BAM_TAG_TYPE_FLOAT) :
1885 case (Constants::BAM_TAG_TYPE_STRING) :
1886 case (Constants::BAM_TAG_TYPE_HEX) :
1887 case (Constants::BAM_TAG_TYPE_ARRAY) :
1888 cerr << "BamAlignment ERROR: array element type: " << elementType
1889 << " cannot be stored in integer value" << endl;
1894 cerr << "BamAlignment ERROR: unknown element type encountered: "
1895 << elementType << endl;
1899 // get number of elements
1900 int32_t numElements;
1901 memcpy(&numElements, pTagData, sizeof(int32_t));
1903 destination.clear();
1904 destination.reserve(numElements);
1908 for ( int i = 0 ; i < numElements; ++i ) {
1909 memcpy(&value, pTagData, sizeof(uint32_t));
1910 pTagData += sizeof(uint32_t);
1911 destination.push_back(value);
1918 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1919 \brief Retrieves the numeric array data associated with a BAM tag
1921 \param tag 2-character tag name
1922 \param destination destination for retrieved data
1924 \return \c true if found
1926 bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1928 // make sure tag data exists
1929 if ( SupportData.HasCoreOnly || TagData.empty() )
1932 // localize the tag data
1933 char* pTagData = (char*)TagData.data();
1934 const unsigned int tagDataLength = TagData.size();
1935 unsigned int numBytesParsed = 0;
1937 // return false if tag not found
1938 if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1941 // check that tag is array type
1942 const char tagType = *(pTagData - 1);
1943 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1944 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1945 << tag << " in array destination" << endl;
1949 // calculate length of each element in tag's array
1950 const char elementType = *pTagData;
1952 int elementLength = 0;
1953 switch ( elementType ) {
1954 case (Constants::BAM_TAG_TYPE_ASCII) :
1955 case (Constants::BAM_TAG_TYPE_INT8) :
1956 case (Constants::BAM_TAG_TYPE_UINT8) :
1957 elementLength = sizeof(uint8_t);
1960 case (Constants::BAM_TAG_TYPE_INT16) :
1961 case (Constants::BAM_TAG_TYPE_UINT16) :
1962 elementLength = sizeof(uint16_t);
1965 case (Constants::BAM_TAG_TYPE_INT32) :
1966 case (Constants::BAM_TAG_TYPE_UINT32) :
1967 elementLength = sizeof(uint32_t);
1970 // unsupported type for integer destination (float or var-length data)
1971 case (Constants::BAM_TAG_TYPE_FLOAT) :
1972 case (Constants::BAM_TAG_TYPE_STRING) :
1973 case (Constants::BAM_TAG_TYPE_HEX) :
1974 case (Constants::BAM_TAG_TYPE_ARRAY) :
1975 cerr << "BamAlignment ERROR: array element type: " << elementType
1976 << " cannot be stored in integer value" << endl;
1981 cerr << "BamAlignment ERROR: unknown element type encountered: "
1982 << elementType << endl;
1986 // get number of elements
1987 int32_t numElements;
1988 memcpy(&numElements, pTagData, sizeof(int32_t));
1990 destination.clear();
1991 destination.reserve(numElements);
1995 for ( int i = 0 ; i < numElements; ++i ) {
1996 memcpy(&value, pTagData, sizeof(int32_t));
1997 pTagData += sizeof(int32_t);
1998 destination.push_back(value);
2006 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
2007 \brief Retrieves the numeric array data associated with a BAM tag
2009 \param tag 2-character tag name
2010 \param destination destination for retrieved data
2012 \return \c true if found
2014 bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
2016 // make sure tag data exists
2017 if ( SupportData.HasCoreOnly || TagData.empty() )
2020 // localize the tag data
2021 char* pTagData = (char*)TagData.data();
2022 const unsigned int tagDataLength = TagData.size();
2023 unsigned int numBytesParsed = 0;
2025 // return false if tag not found
2026 if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
2029 // check that tag is array type
2030 const char tagType = *(pTagData - 1);
2031 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
2032 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
2033 << tag << " in array destination" << endl;
2037 // calculate length of each element in tag's array
2038 const char elementType = *pTagData;
2040 int elementLength = 0;
2041 switch ( elementType ) {
2042 case (Constants::BAM_TAG_TYPE_ASCII) :
2043 case (Constants::BAM_TAG_TYPE_INT8) :
2044 case (Constants::BAM_TAG_TYPE_UINT8) :
2045 elementLength = sizeof(uint8_t);
2048 case (Constants::BAM_TAG_TYPE_INT16) :
2049 case (Constants::BAM_TAG_TYPE_UINT16) :
2050 elementLength = sizeof(uint16_t);
2053 case (Constants::BAM_TAG_TYPE_INT32) :
2054 case (Constants::BAM_TAG_TYPE_UINT32) :
2055 case (Constants::BAM_TAG_TYPE_FLOAT) :
2056 elementLength = sizeof(uint32_t);
2059 // unsupported type for float destination (var-length data)
2060 case (Constants::BAM_TAG_TYPE_STRING) :
2061 case (Constants::BAM_TAG_TYPE_HEX) :
2062 case (Constants::BAM_TAG_TYPE_ARRAY) :
2063 cerr << "BamAlignment ERROR: array element type: " << elementType
2064 << " cannot be stored in float value" << endl;
2069 cerr << "BamAlignment ERROR: unknown element type encountered: "
2070 << elementType << endl;
2074 // get number of elements
2075 int32_t numElements;
2076 memcpy(&numElements, pTagData, sizeof(int32_t));
2078 destination.clear();
2079 destination.reserve(numElements);
2083 for ( int i = 0 ; i < numElements; ++i ) {
2084 memcpy(&value, pTagData, sizeof(float));
2085 pTagData += sizeof(float);
2086 destination.push_back(value);
2093 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
2094 \brief Retrieves the BAM tag type-code associated with requested tag name.
2096 \param tag 2-character tag name
2097 \param type destination for the retrieved (1-character) tag type
2099 \return \c true if found
2100 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
2102 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
2104 // make sure tag data exists
2105 if ( SupportData.HasCoreOnly || TagData.empty() )
2108 // localize the tag data
2109 char* pTagData = (char*)TagData.data();
2110 const unsigned int tagDataLength = TagData.size();
2111 unsigned int numBytesParsed = 0;
2114 if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
2116 // retrieve tag type code
2117 type = *(pTagData - 1);
2119 // validate that type is a proper BAM tag type
2121 case (Constants::BAM_TAG_TYPE_ASCII) :
2122 case (Constants::BAM_TAG_TYPE_INT8) :
2123 case (Constants::BAM_TAG_TYPE_UINT8) :
2124 case (Constants::BAM_TAG_TYPE_INT16) :
2125 case (Constants::BAM_TAG_TYPE_UINT16) :
2126 case (Constants::BAM_TAG_TYPE_INT32) :
2127 case (Constants::BAM_TAG_TYPE_UINT32) :
2128 case (Constants::BAM_TAG_TYPE_FLOAT) :
2129 case (Constants::BAM_TAG_TYPE_STRING) :
2130 case (Constants::BAM_TAG_TYPE_HEX) :
2131 case (Constants::BAM_TAG_TYPE_ARRAY) :
2136 cerr << "BamAlignment ERROR: unknown tag type encountered: "
2142 // tag not found, return failure
2146 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2147 \brief Returns true if alignment has a record for requested tag.
2148 \param tag 2-character tag name
2149 \return \c true if alignment has a record for tag
2151 bool BamAlignment::HasTag(const std::string& tag) const {
2153 // return false if no tag data present
2154 if ( SupportData.HasCoreOnly || TagData.empty() )
2157 // localize the tag data for lookup
2158 char* pTagData = (char*)TagData.data();
2159 const unsigned int tagDataLength = TagData.size();
2160 unsigned int numBytesParsed = 0;
2162 // if result of tag lookup
2163 return Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2166 /*! \fn bool BamAlignment::IsDuplicate(void) const
2167 \return \c true if this read is a PCR duplicate
2169 bool BamAlignment::IsDuplicate(void) const {
2170 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2173 /*! \fn bool BamAlignment::IsFailedQC(void) const
2174 \return \c true if this read failed quality control
2176 bool BamAlignment::IsFailedQC(void) const {
2177 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2180 /*! \fn bool BamAlignment::IsFirstMate(void) const
2181 \return \c true if alignment is first mate on paired-end read
2183 bool BamAlignment::IsFirstMate(void) const {
2184 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2187 /*! \fn bool BamAlignment::IsMapped(void) const
2188 \return \c true if alignment is mapped
2190 bool BamAlignment::IsMapped(void) const {
2191 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2194 /*! \fn bool BamAlignment::IsMateMapped(void) const
2195 \return \c true if alignment's mate is mapped
2197 bool BamAlignment::IsMateMapped(void) const {
2198 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2201 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2202 \return \c true if alignment's mate mapped to reverse strand
2204 bool BamAlignment::IsMateReverseStrand(void) const {
2205 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2208 /*! \fn bool BamAlignment::IsPaired(void) const
2209 \return \c true if alignment part of paired-end read
2211 bool BamAlignment::IsPaired(void) const {
2212 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2215 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2216 \return \c true if reported position is primary alignment
2218 bool BamAlignment::IsPrimaryAlignment(void) const {
2219 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2222 /*! \fn bool BamAlignment::IsProperPair(void) const
2223 \return \c true if alignment is part of read that satisfied paired-end resolution
2225 bool BamAlignment::IsProperPair(void) const {
2226 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2229 /*! \fn bool BamAlignment::IsReverseStrand(void) const
2230 \return \c true if alignment mapped to reverse strand
2232 bool BamAlignment::IsReverseStrand(void) const {
2233 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2236 /*! \fn bool BamAlignment::IsSecondMate(void) const
2237 \return \c true if alignment is second mate on read
2239 bool BamAlignment::IsSecondMate(void) const {
2240 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2243 /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2244 \brief Removes field from BAM tags.
2246 \return \c true if tag was removed successfully (or didn't exist before)
2248 bool BamAlignment::RemoveTag(const std::string& tag) {
2250 // skip if no tag data available
2251 if ( SupportData.HasCoreOnly || TagData.empty() )
2254 // localize the tag data
2255 char* pOriginalTagData = (char*)TagData.data();
2256 char* pTagData = pOriginalTagData;
2257 const unsigned int originalTagDataLength = TagData.size();
2258 unsigned int newTagDataLength = 0;
2259 unsigned int numBytesParsed = 0;
2262 if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2264 char newTagData[originalTagDataLength];
2266 // copy original tag data up til desired tag
2268 numBytesParsed -= 3;
2269 const unsigned int beginningTagDataLength = numBytesParsed;
2270 newTagDataLength += beginningTagDataLength;
2271 memcpy(newTagData, pOriginalTagData, numBytesParsed);
2273 // skip to next tag (if tag for removal is last, return true)
2274 const char* pTagStorageType = pTagData + 2;
2276 numBytesParsed += 3;
2277 if ( !Internal::SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2280 // copy everything from current tag (the next one after tag for removal) to end
2281 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2282 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2283 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2285 // save new tag data
2286 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2290 // tag not found, no removal - return failure
2294 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2295 \brief Sets value of "PCR duplicate" flag to \a ok.
2297 void BamAlignment::SetIsDuplicate(bool ok) {
2298 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2299 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2302 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2303 \brief Sets "failed quality control" flag to \a ok.
2305 void BamAlignment::SetIsFailedQC(bool ok) {
2306 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2307 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2310 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2311 \brief Sets "alignment is first mate" flag to \a ok.
2313 void BamAlignment::SetIsFirstMate(bool ok) {
2314 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2315 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2318 /*! \fn void BamAlignment::SetIsMapped(bool ok)
2319 \brief Sets "alignment is mapped" flag to \a ok.
2321 void BamAlignment::SetIsMapped(bool ok) {
2322 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2323 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2326 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2327 \brief Sets "alignment's mate is mapped" flag to \a ok.
2329 void BamAlignment::SetIsMateMapped(bool ok) {
2330 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2331 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2334 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2335 \brief Complement of using SetIsMateMapped().
2336 \deprecated For sake of symmetry with the query methods
2337 \sa IsMateMapped(), SetIsMateMapped()
2339 void BamAlignment::SetIsMateUnmapped(bool ok) {
2340 SetIsMateMapped(!ok);
2343 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2344 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2346 void BamAlignment::SetIsMateReverseStrand(bool ok) {
2347 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2348 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2351 /*! \fn void BamAlignment::SetIsPaired(bool ok)
2352 \brief Sets "alignment part of paired-end read" flag to \a ok.
2354 void BamAlignment::SetIsPaired(bool ok) {
2355 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2356 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2359 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2360 \brief Sets "position is primary alignment" flag to \a ok.
2362 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2363 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2364 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2367 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2368 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2370 void BamAlignment::SetIsProperPair(bool ok) {
2371 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2372 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2375 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2376 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2378 void BamAlignment::SetIsReverseStrand(bool ok) {
2379 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2380 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2383 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2384 \brief Complement of using SetIsPrimaryAlignment().
2385 \deprecated For sake of symmetry with the query methods
2386 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2388 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2389 SetIsPrimaryAlignment(!ok);
2392 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2393 \brief Sets "alignment is second mate on read" flag to \a ok.
2395 void BamAlignment::SetIsSecondMate(bool ok) {
2396 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2397 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2400 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2401 \brief Complement of using SetIsMapped().
2402 \deprecated For sake of symmetry with the query methods
2403 \sa IsMapped(), SetIsMapped()
2405 void BamAlignment::SetIsUnmapped(bool ok) {