1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 8 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #include <api/BamAlignment.h>
11 #include <api/BamConstants.h>
12 using namespace BamTools;
15 /*! \class BamTools::BamAlignment
16 \brief The main BAM alignment data structure.
18 Provides methods to query/modify BAM alignment data fields.
20 /*! \var BamAlignment::Name
23 /*! \var BamAlignment::Length
24 \brief length of query sequence
26 /*! \var BamAlignment::QueryBases
27 \brief 'original' sequence (as reported from sequencing machine)
29 /*! \var BamAlignment::AlignedBases
30 \brief 'aligned' sequence (includes any indels, padding, clipping)
32 /*! \var BamAlignment::Qualities
33 \brief FASTQ qualities (ASCII characters, not numeric values)
35 /*! \var BamAlignment::TagData
36 \brief tag data (use the provided methods to query/modify)
38 /*! \var BamAlignment::RefID
39 \brief ID number for reference sequence
41 /*! \var BamAlignment::Position
42 \brief position (0-based) where alignment starts
44 /*! \var BamAlignment::Bin
45 \brief BAM (standard) index bin number for this alignment
47 /*! \var BamAlignment::MapQuality
48 \brief mapping quality score
50 /*! \var BamAlignment::AlignmentFlag
51 \brief alignment bit-flag (use the provided methods to query/modify)
53 /*! \var BamAlignment::CigarData
54 \brief CIGAR operations for this alignment
56 /*! \var BamAlignment::MateRefID
57 \brief ID number for reference sequence where alignment's mate was aligned
59 /*! \var BamAlignment::MatePosition
60 \brief position (0-based) where alignment's mate starts
62 /*! \var BamAlignment::InsertSize
63 \brief mate-pair insert size
65 /*! \var BamAlignment::Filename
66 \brief name of BAM file which this alignment comes from
69 /*! \fn BamAlignment::BamAlignment(void)
72 BamAlignment::BamAlignment(void)
80 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
81 \brief copy constructor
83 BamAlignment::BamAlignment(const BamAlignment& other)
85 , Length(other.Length)
86 , QueryBases(other.QueryBases)
87 , AlignedBases(other.AlignedBases)
88 , Qualities(other.Qualities)
89 , TagData(other.TagData)
91 , Position(other.Position)
93 , MapQuality(other.MapQuality)
94 , AlignmentFlag(other.AlignmentFlag)
95 , CigarData(other.CigarData)
96 , MateRefID(other.MateRefID)
97 , MatePosition(other.MatePosition)
98 , InsertSize(other.InsertSize)
99 , Filename(other.Filename)
100 , SupportData(other.SupportData)
103 /*! \fn BamAlignment::~BamAlignment(void)
106 BamAlignment::~BamAlignment(void) { }
108 ///*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
109 // \brief Adds a field with string data to the BAM tags.
111 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
113 // \param[in] tag 2-character tag name
114 // \param[in] type 1-character tag type (must be "Z" or "H")
115 // \param[in] value string data to store
116 // \return \c true if the \b new tag was added successfully
117 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
121 ///*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
122 // \brief Adds a numeric array field to the BAM tags.
124 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
126 // \param tag 2-character tag name
127 // \param values vector of uint8_t values to store
129 // \return \c true if the \b new tag was added successfully
130 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
133 /*! \fn bool BamAlignment::BuildCharData(void)
134 \brief Populates alignment string fields (read name, bases, qualities, tag data).
136 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
137 Using that method makes parsing much quicker when only positional data is required.
139 However, if you later want to access the character data fields from such an alignment,
140 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
143 \return \c true if character data populated successfully (or was already available to begin with)
145 bool BamAlignment::BuildCharData(void) {
147 // skip if char data already parsed
148 if ( !SupportData.HasCoreOnly )
151 // check system endianness
152 bool IsBigEndian = BamTools::SystemIsBigEndian();
154 // calculate character lengths/offsets
155 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
156 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
157 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
158 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
159 const unsigned int tagDataLength = dataLength - tagDataOffset;
161 // check offsets to see what char data exists
162 const bool hasSeqData = ( seqDataOffset < dataLength );
163 const bool hasQualData = ( qualDataOffset < dataLength );
164 const bool hasTagData = ( tagDataOffset < dataLength );
166 // set up char buffers
167 const char* allCharData = SupportData.AllCharData.data();
168 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
169 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
170 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
172 // store alignment name (relies on null char in name as terminator)
173 Name.assign((const char*)(allCharData));
175 // save query sequence
178 QueryBases.reserve(SupportData.QuerySequenceLength);
179 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
180 const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
181 QueryBases.append(1, singleBase);
185 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
188 Qualities.reserve(SupportData.QuerySequenceLength);
189 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
190 const char singleQuality = static_cast<const char>(qualData[i]+33);
191 Qualities.append(1, singleQuality);
195 // clear previous AlignedBases
196 AlignedBases.clear();
198 // if QueryBases has data, build AlignedBases using CIGAR data
199 // otherwise, AlignedBases will remain empty (this case IS allowed)
200 if ( !QueryBases.empty() ) {
202 // resize AlignedBases
203 AlignedBases.reserve(SupportData.QuerySequenceLength);
205 // iterate over CigarOps
207 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
208 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
209 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
210 const CigarOp& op = (*cigarIter);
214 // for 'M', 'I', '=', 'X' - write bases
215 case (Constants::BAM_CIGAR_MATCH_CHAR) :
216 case (Constants::BAM_CIGAR_INS_CHAR) :
217 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
218 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
219 AlignedBases.append(QueryBases.substr(k, op.Length));
222 // for 'S' - soft clip, do not write bases
223 // but increment placeholder 'k'
224 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
228 // for 'D' - write gap character
229 case (Constants::BAM_CIGAR_DEL_CHAR) :
230 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
233 // for 'P' - write padding character
234 case (Constants::BAM_CIGAR_PAD_CHAR) :
235 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
238 // for 'N' - write N's, skip bases in original query sequence
239 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
240 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
243 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
244 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
247 // invalid CIGAR op-code
249 const string message = string("invalid CIGAR operation type: ") + op.Type;
250 SetErrorString("BamAlignment::BuildCharData", message);
261 while ( i < tagDataLength ) {
263 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
264 const char type = tagData[i]; // get tag type at position i
265 ++i; // move i past tag type
269 case(Constants::BAM_TAG_TYPE_ASCII) :
270 case(Constants::BAM_TAG_TYPE_INT8) :
271 case(Constants::BAM_TAG_TYPE_UINT8) :
272 // no endian swapping necessary for single-byte data
276 case(Constants::BAM_TAG_TYPE_INT16) :
277 case(Constants::BAM_TAG_TYPE_UINT16) :
278 BamTools::SwapEndian_16p(&tagData[i]);
279 i += sizeof(uint16_t);
282 case(Constants::BAM_TAG_TYPE_FLOAT) :
283 case(Constants::BAM_TAG_TYPE_INT32) :
284 case(Constants::BAM_TAG_TYPE_UINT32) :
285 BamTools::SwapEndian_32p(&tagData[i]);
286 i += sizeof(uint32_t);
289 case(Constants::BAM_TAG_TYPE_HEX) :
290 case(Constants::BAM_TAG_TYPE_STRING) :
291 // no endian swapping necessary for hex-string/string data
294 // increment one more for null terminator
298 case(Constants::BAM_TAG_TYPE_ARRAY) :
302 const char arrayType = tagData[i];
305 // swap endian-ness of number of elements in place, then retrieve for loop
306 BamTools::SwapEndian_32p(&tagData[i]);
307 uint32_t numElements;
308 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
309 i += sizeof(uint32_t);
311 // swap endian-ness of array elements
312 for ( size_t j = 0; j < numElements; ++j ) {
314 case (Constants::BAM_TAG_TYPE_INT8) :
315 case (Constants::BAM_TAG_TYPE_UINT8) :
316 // no endian-swapping necessary
319 case (Constants::BAM_TAG_TYPE_INT16) :
320 case (Constants::BAM_TAG_TYPE_UINT16) :
321 BamTools::SwapEndian_16p(&tagData[i]);
322 i += sizeof(uint16_t);
324 case (Constants::BAM_TAG_TYPE_FLOAT) :
325 case (Constants::BAM_TAG_TYPE_INT32) :
326 case (Constants::BAM_TAG_TYPE_UINT32) :
327 BamTools::SwapEndian_32p(&tagData[i]);
328 i += sizeof(uint32_t);
331 const string message = string("invalid binary array type: ") + arrayType;
332 SetErrorString("BamAlignment::BuildCharData", message);
340 // invalid tag type-code
342 const string message = string("invalid tag type: ") + type;
343 SetErrorString("BamAlignment::BuildCharData", message);
349 // store tagData in alignment
350 TagData.resize(tagDataLength);
351 memcpy((char*)(TagData.data()), tagData, tagDataLength);
354 // clear the core-only flag
355 SupportData.HasCoreOnly = false;
361 ///*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
362 // \brief Edits a BAM tag field containing string data.
364 // If \a tag does not exist, a new entry is created.
366 // \param tag 2-character tag name
367 // \param type 1-character tag type (must be "Z" or "H")
368 // \param value string data to store
370 // \return \c true if the tag was modified/created successfully
372 // \sa BamAlignment::RemoveTag()
373 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
376 ///*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
377 // \brief Edits a BAM tag field containing a numeric array.
379 // If \a tag does not exist, a new entry is created.
381 // \param tag 2-character tag name
382 // \param value vector of uint8_t values to store
384 // \return \c true if the tag was modified/created successfully
385 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
388 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
391 Searches for requested tag in BAM tag data.
393 \param tag requested 2-character tag name
394 \param pTagData pointer to current position in BamAlignment::TagData
395 \param tagDataLength length of BamAlignment::TagData
396 \param numBytesParsed number of bytes parsed so far
398 \return \c true if found
400 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
401 \a numBytesParsed will correspond to the position in the full TagData string.
404 bool BamAlignment::FindTag(const std::string& tag,
406 const unsigned int& tagDataLength,
407 unsigned int& numBytesParsed) const
410 while ( numBytesParsed < tagDataLength ) {
412 const char* pTagType = pTagData;
413 const char* pTagStorageType = pTagData + 2;
417 // check the current tag, return true on match
418 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
421 // get the storage class and find the next tag
422 if ( *pTagStorageType == '\0' ) return false;
423 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
424 if ( *pTagData == '\0' ) return false;
427 // checked all tags, none match
431 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
432 \brief Retrieves value of edit distance tag ("NM").
434 \deprecated Instead use BamAlignment::GetTag()
436 BamAlignment::GetTag("NM", editDistance);
439 \param editDistance destination for retrieved value
441 \return \c true if found
444 // TODO : REMOVE THIS METHOD
445 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
446 return GetTag("NM", (uint32_t&)editDistance);
449 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool closedInterval = USE_CLOSED_DEFAULT) const
450 \brief Calculates alignment end position, based on starting position and CIGAR data.
452 \warning The position returned now represents a zero-based, HALF-OPEN interval.
453 In previous versions of BamTools (0.x & 1.x) all intervals were treated
454 as zero-based, CLOSED. I whole-heartedly apologize for any inconsistencies this
455 may have caused if you assumed that BT was always half-open; full aplogies also
456 to those who recognized that BamTools originally used a closed interval, but may
457 need to update their code to reflect this new change.
459 \param usePadded Allow inserted bases to affect the reported position. Default is false, so that
460 reported position stays synced with reference coordinates.
462 \param closedInterval Setting this to true will return a 0-based end coordinate. Default is false,
463 so that his value represents a standard, half-open interval.
465 \return alignment end position
467 int BamAlignment::GetEndPosition(bool usePadded, bool closedInterval) const {
469 // initialize alignment end to starting position
470 int alignEnd = Position;
472 // iterate over cigar operations
473 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
474 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
475 for ( ; cigarIter != cigarEnd; ++cigarIter) {
476 const CigarOp& op = (*cigarIter);
480 // increase end position on CIGAR chars [DMXN=]
481 case Constants::BAM_CIGAR_DEL_CHAR :
482 case Constants::BAM_CIGAR_MATCH_CHAR :
483 case Constants::BAM_CIGAR_MISMATCH_CHAR :
484 case Constants::BAM_CIGAR_REFSKIP_CHAR :
485 case Constants::BAM_CIGAR_SEQMATCH_CHAR :
486 alignEnd += op.Length;
489 // increase end position when CIGAR char is 'I' only if @usePadded is true
490 case Constants::BAM_CIGAR_INS_CHAR :
492 alignEnd += op.Length;
495 // all other CIGAR chars do not affect end position
501 // adjust for closedInterval, if requested
502 if ( closedInterval )
509 /*! \fn std::string BamAlignment::GetErrorString(void) const
510 \brief Returns a description of the last error that occurred
512 This method allows elimnation of STDERR pollution. Developers of client code
513 may choose how the messages are displayed to the user, if at all.
515 \return description of last error that occurred
517 std::string BamAlignment::GetErrorString(void) const {
521 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
522 \brief Retrieves value of read group tag ("RG").
524 \deprecated Instead use BamAlignment::GetTag()
526 BamAlignment::GetTag("RG", readGroup);
529 \param readGroup destination for retrieved value
531 \return \c true if found
534 // TODO : REMOVE THIS METHOD
535 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
536 return GetTag("RG", readGroup);
539 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
540 // \brief Retrieves the string value associated with a BAM tag.
542 // \param tag 2-character tag name
543 // \param destination destination for retrieved value
545 // \return \c true if found
548 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
549 // \brief Retrieves the numeric array data associated with a BAM tag
551 // \param tag 2-character tag name
552 // \param destination destination for retrieved data
554 // \return \c true if found
557 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
558 \brief Retrieves the BAM tag type-code associated with requested tag name.
560 \param tag 2-character tag name
561 \param type destination for the retrieved (1-character) tag type
563 \return \c true if found
564 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
566 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
568 // skip if alignment is core-only
569 if ( SupportData.HasCoreOnly ) {
570 // TODO: set error string?
574 // skip if no tags present
575 if ( TagData.empty() ) {
576 // TODO: set error string?
580 // localize the tag data
581 char* pTagData = (char*)TagData.data();
582 const unsigned int tagDataLength = TagData.size();
583 unsigned int numBytesParsed = 0;
585 // if tag not found, return failure
586 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
587 // TODO: set error string?
591 // otherwise, retrieve & validate tag type code
592 type = *(pTagData - 1);
594 case (Constants::BAM_TAG_TYPE_ASCII) :
595 case (Constants::BAM_TAG_TYPE_INT8) :
596 case (Constants::BAM_TAG_TYPE_UINT8) :
597 case (Constants::BAM_TAG_TYPE_INT16) :
598 case (Constants::BAM_TAG_TYPE_UINT16) :
599 case (Constants::BAM_TAG_TYPE_INT32) :
600 case (Constants::BAM_TAG_TYPE_UINT32) :
601 case (Constants::BAM_TAG_TYPE_FLOAT) :
602 case (Constants::BAM_TAG_TYPE_STRING) :
603 case (Constants::BAM_TAG_TYPE_HEX) :
604 case (Constants::BAM_TAG_TYPE_ARRAY) :
609 const string message = string("invalid tag type: ") + type;
610 SetErrorString("BamAlignment::GetTagType", message);
615 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
616 \brief Returns true if alignment has a record for requested tag.
617 \param tag 2-character tag name
618 \return \c true if alignment has a record for tag
620 bool BamAlignment::HasTag(const std::string& tag) const {
622 // return false if no tag data present
623 if ( SupportData.HasCoreOnly || TagData.empty() )
626 // localize the tag data for lookup
627 char* pTagData = (char*)TagData.data();
628 const unsigned int tagDataLength = TagData.size();
629 unsigned int numBytesParsed = 0;
631 // if result of tag lookup
632 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
635 /*! \fn bool BamAlignment::IsDuplicate(void) const
636 \return \c true if this read is a PCR duplicate
638 bool BamAlignment::IsDuplicate(void) const {
639 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
642 /*! \fn bool BamAlignment::IsFailedQC(void) const
643 \return \c true if this read failed quality control
645 bool BamAlignment::IsFailedQC(void) const {
646 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
649 /*! \fn bool BamAlignment::IsFirstMate(void) const
650 \return \c true if alignment is first mate on paired-end read
652 bool BamAlignment::IsFirstMate(void) const {
653 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
656 /*! \fn bool BamAlignment::IsMapped(void) const
657 \return \c true if alignment is mapped
659 bool BamAlignment::IsMapped(void) const {
660 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
663 /*! \fn bool BamAlignment::IsMateMapped(void) const
664 \return \c true if alignment's mate is mapped
666 bool BamAlignment::IsMateMapped(void) const {
667 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
670 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
671 \return \c true if alignment's mate mapped to reverse strand
673 bool BamAlignment::IsMateReverseStrand(void) const {
674 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
677 /*! \fn bool BamAlignment::IsPaired(void) const
678 \return \c true if alignment part of paired-end read
680 bool BamAlignment::IsPaired(void) const {
681 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
684 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
685 \return \c true if reported position is primary alignment
687 bool BamAlignment::IsPrimaryAlignment(void) const {
688 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
691 /*! \fn bool BamAlignment::IsProperPair(void) const
692 \return \c true if alignment is part of read that satisfied paired-end resolution
694 bool BamAlignment::IsProperPair(void) const {
695 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
698 /*! \fn bool BamAlignment::IsReverseStrand(void) const
699 \return \c true if alignment mapped to reverse strand
701 bool BamAlignment::IsReverseStrand(void) const {
702 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
705 /*! \fn bool BamAlignment::IsSecondMate(void) const
706 \return \c true if alignment is second mate on read
708 bool BamAlignment::IsSecondMate(void) const {
709 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
712 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
715 Checks that tag name & type strings are expected sizes.
716 \a tag should have length
717 \a type should have length 1
719 \param tag BAM tag name
720 \param type BAM tag type-code
722 \return \c true if both \a tag and \a type are correct sizes
724 bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
725 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
726 (type.size() == Constants::BAM_TAG_TYPESIZE);
729 /*! \fn void BamAlignment::RemoveTag(const std::string& tag)
730 \brief Removes field from BAM tags.
732 void BamAlignment::RemoveTag(const std::string& tag) {
734 // if char data not populated, do that first
735 if ( SupportData.HasCoreOnly )
738 // skip if no tags available
739 if ( TagData.empty() )
742 // localize the tag data
743 char* pOriginalTagData = (char*)TagData.data();
744 char* pTagData = pOriginalTagData;
745 const unsigned int originalTagDataLength = TagData.size();
746 unsigned int newTagDataLength = 0;
747 unsigned int numBytesParsed = 0;
749 // skip if tag not found
750 if ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
753 // otherwise, remove it
754 RaiiBuffer newTagData(originalTagDataLength);
756 // copy original tag data up til desired tag
759 const unsigned int beginningTagDataLength = numBytesParsed;
760 newTagDataLength += beginningTagDataLength;
761 memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
763 // attemp to skip to next tag
764 const char* pTagStorageType = pTagData + 2;
767 if ( SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) {
769 // squeeze remaining tag data
770 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
771 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
772 memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
774 // save modified tag data in alignment
775 TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
779 /*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
782 Sets a formatted error string for this alignment.
784 void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
785 static const string SEPARATOR = ": ";
786 ErrorString = where + SEPARATOR + what;
789 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
790 \brief Sets value of "PCR duplicate" flag to \a ok.
792 void BamAlignment::SetIsDuplicate(bool ok) {
793 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
794 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
797 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
798 \brief Sets "failed quality control" flag to \a ok.
800 void BamAlignment::SetIsFailedQC(bool ok) {
801 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
802 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
805 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
806 \brief Sets "alignment is first mate" flag to \a ok.
808 void BamAlignment::SetIsFirstMate(bool ok) {
809 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
810 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
813 /*! \fn void BamAlignment::SetIsMapped(bool ok)
814 \brief Sets "alignment is mapped" flag to \a ok.
816 void BamAlignment::SetIsMapped(bool ok) {
817 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
818 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
821 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
822 \brief Sets "alignment's mate is mapped" flag to \a ok.
824 void BamAlignment::SetIsMateMapped(bool ok) {
825 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
826 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
829 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
830 \brief Complement of using SetIsMateMapped().
831 \deprecated For sake of symmetry with the query methods
832 \sa IsMateMapped(), SetIsMateMapped()
834 void BamAlignment::SetIsMateUnmapped(bool ok) {
835 SetIsMateMapped(!ok);
838 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
839 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
841 void BamAlignment::SetIsMateReverseStrand(bool ok) {
842 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
843 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
846 /*! \fn void BamAlignment::SetIsPaired(bool ok)
847 \brief Sets "alignment part of paired-end read" flag to \a ok.
849 void BamAlignment::SetIsPaired(bool ok) {
850 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
851 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
854 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
855 \brief Sets "position is primary alignment" flag to \a ok.
857 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
858 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
859 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
862 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
863 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
865 void BamAlignment::SetIsProperPair(bool ok) {
866 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
867 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
870 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
871 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
873 void BamAlignment::SetIsReverseStrand(bool ok) {
874 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
875 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
878 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
879 \brief Complement of using SetIsPrimaryAlignment().
880 \deprecated For sake of symmetry with the query methods
881 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
883 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
884 SetIsPrimaryAlignment(!ok);
887 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
888 \brief Sets "alignment is second mate on read" flag to \a ok.
890 void BamAlignment::SetIsSecondMate(bool ok) {
891 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
892 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
895 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
896 \brief Complement of using SetIsMapped().
897 \deprecated For sake of symmetry with the query methods
898 \sa IsMapped(), SetIsMapped()
900 void BamAlignment::SetIsUnmapped(bool ok) {
904 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
907 Moves to next available tag in tag data string
909 \param storageType BAM tag type-code that determines how far to move cursor
910 \param pTagData pointer to current position (cursor) in tag string
911 \param numBytesParsed report of how many bytes were parsed (cumulatively)
913 \return \c if storageType was a recognized BAM tag type
914 \post \a pTagData will point to the byte where the next tag data begins.
915 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
917 bool BamAlignment::SkipToNextTag(const char storageType,
919 unsigned int& numBytesParsed) const
921 switch (storageType) {
923 case (Constants::BAM_TAG_TYPE_ASCII) :
924 case (Constants::BAM_TAG_TYPE_INT8) :
925 case (Constants::BAM_TAG_TYPE_UINT8) :
930 case (Constants::BAM_TAG_TYPE_INT16) :
931 case (Constants::BAM_TAG_TYPE_UINT16) :
932 numBytesParsed += sizeof(uint16_t);
933 pTagData += sizeof(uint16_t);
936 case (Constants::BAM_TAG_TYPE_FLOAT) :
937 case (Constants::BAM_TAG_TYPE_INT32) :
938 case (Constants::BAM_TAG_TYPE_UINT32) :
939 numBytesParsed += sizeof(uint32_t);
940 pTagData += sizeof(uint32_t);
943 case (Constants::BAM_TAG_TYPE_STRING) :
944 case (Constants::BAM_TAG_TYPE_HEX) :
949 // increment for null-terminator
954 case (Constants::BAM_TAG_TYPE_ARRAY) :
958 const char arrayType = *pTagData;
962 // read number of elements
964 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
965 numBytesParsed += sizeof(uint32_t);
966 pTagData += sizeof(uint32_t);
968 // calculate number of bytes to skip
971 case (Constants::BAM_TAG_TYPE_INT8) :
972 case (Constants::BAM_TAG_TYPE_UINT8) :
973 bytesToSkip = numElements;
975 case (Constants::BAM_TAG_TYPE_INT16) :
976 case (Constants::BAM_TAG_TYPE_UINT16) :
977 bytesToSkip = numElements*sizeof(uint16_t);
979 case (Constants::BAM_TAG_TYPE_FLOAT) :
980 case (Constants::BAM_TAG_TYPE_INT32) :
981 case (Constants::BAM_TAG_TYPE_UINT32) :
982 bytesToSkip = numElements*sizeof(uint32_t);
985 const string message = string("invalid binary array type: ") + arrayType;
986 SetErrorString("BamAlignment::SkipToNextTag", message);
990 // skip binary array contents
991 numBytesParsed += bytesToSkip;
992 pTagData += bytesToSkip;
997 const string message = string("invalid tag type: ") + storageType;
998 SetErrorString("BamAlignment::SkipToNextTag", message);
1002 // if we get here, tag skipped OK - return success