1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 7 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #include <api/BamAlignment.h>
11 #include <api/BamConstants.h>
12 using namespace BamTools;
15 /*! \class BamTools::BamAlignment
16 \brief The main BAM alignment data structure.
18 Provides methods to query/modify BAM alignment data fields.
20 /*! \var BamAlignment::Name
23 /*! \var BamAlignment::Length
24 \brief length of query sequence
26 /*! \var BamAlignment::QueryBases
27 \brief 'original' sequence (as reported from sequencing machine)
29 /*! \var BamAlignment::AlignedBases
30 \brief 'aligned' sequence (includes any indels, padding, clipping)
32 /*! \var BamAlignment::Qualities
33 \brief FASTQ qualities (ASCII characters, not numeric values)
35 /*! \var BamAlignment::TagData
36 \brief tag data (use the provided methods to query/modify)
38 /*! \var BamAlignment::RefID
39 \brief ID number for reference sequence
41 /*! \var BamAlignment::Position
42 \brief position (0-based) where alignment starts
44 /*! \var BamAlignment::Bin
45 \brief BAM (standard) index bin number for this alignment
47 /*! \var BamAlignment::MapQuality
48 \brief mapping quality score
50 /*! \var BamAlignment::AlignmentFlag
51 \brief alignment bit-flag (use the provided methods to query/modify)
53 /*! \var BamAlignment::CigarData
54 \brief CIGAR operations for this alignment
56 /*! \var BamAlignment::MateRefID
57 \brief ID number for reference sequence where alignment's mate was aligned
59 /*! \var BamAlignment::MatePosition
60 \brief position (0-based) where alignment's mate starts
62 /*! \var BamAlignment::InsertSize
63 \brief mate-pair insert size
65 /*! \var BamAlignment::Filename
66 \brief name of BAM file which this alignment comes from
69 /*! \fn BamAlignment::BamAlignment(void)
72 BamAlignment::BamAlignment(void)
80 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
81 \brief copy constructor
83 BamAlignment::BamAlignment(const BamAlignment& other)
85 , Length(other.Length)
86 , QueryBases(other.QueryBases)
87 , AlignedBases(other.AlignedBases)
88 , Qualities(other.Qualities)
89 , TagData(other.TagData)
91 , Position(other.Position)
93 , MapQuality(other.MapQuality)
94 , AlignmentFlag(other.AlignmentFlag)
95 , CigarData(other.CigarData)
96 , MateRefID(other.MateRefID)
97 , MatePosition(other.MatePosition)
98 , InsertSize(other.InsertSize)
99 , Filename(other.Filename)
100 , SupportData(other.SupportData)
103 /*! \fn BamAlignment::~BamAlignment(void)
106 BamAlignment::~BamAlignment(void) { }
108 ///*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
109 // \brief Adds a field with string data to the BAM tags.
111 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
113 // \param[in] tag 2-character tag name
114 // \param[in] type 1-character tag type (must be "Z" or "H")
115 // \param[in] value string data to store
116 // \return \c true if the \b new tag was added successfully
117 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
121 ///*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
122 // \brief Adds a numeric array field to the BAM tags.
124 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
126 // \param tag 2-character tag name
127 // \param values vector of uint8_t values to store
129 // \return \c true if the \b new tag was added successfully
130 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
133 /*! \fn bool BamAlignment::BuildCharData(void)
134 \brief Populates alignment string fields (read name, bases, qualities, tag data).
136 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
137 Using that method makes parsing much quicker when only positional data is required.
139 However, if you later want to access the character data fields from such an alignment,
140 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
143 \return \c true if character data populated successfully (or was already available to begin with)
145 bool BamAlignment::BuildCharData(void) {
147 // skip if char data already parsed
148 if ( !SupportData.HasCoreOnly )
151 // check system endianness
152 bool IsBigEndian = BamTools::SystemIsBigEndian();
154 // calculate character lengths/offsets
155 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
156 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
157 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
158 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
159 const unsigned int tagDataLength = dataLength - tagDataOffset;
161 // check offsets to see what char data exists
162 const bool hasSeqData = ( seqDataOffset < dataLength );
163 const bool hasQualData = ( qualDataOffset < dataLength );
164 const bool hasTagData = ( tagDataOffset < dataLength );
166 // set up char buffers
167 const char* allCharData = SupportData.AllCharData.data();
168 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
169 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
170 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
172 // store alignment name (relies on null char in name as terminator)
173 Name.assign((const char*)(allCharData));
175 // save query sequence
178 QueryBases.reserve(SupportData.QuerySequenceLength);
179 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
180 const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
181 QueryBases.append(1, singleBase);
185 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
188 Qualities.reserve(SupportData.QuerySequenceLength);
189 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
190 const char singleQuality = static_cast<const char>(qualData[i]+33);
191 Qualities.append(1, singleQuality);
195 // clear previous AlignedBases
196 AlignedBases.clear();
198 // if QueryBases has data, build AlignedBases using CIGAR data
199 // otherwise, AlignedBases will remain empty (this case IS allowed)
200 if ( !QueryBases.empty() ) {
202 // resize AlignedBases
203 AlignedBases.reserve(SupportData.QuerySequenceLength);
205 // iterate over CigarOps
207 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
208 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
209 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
210 const CigarOp& op = (*cigarIter);
214 // for 'M', 'I', '=', 'X' - write bases
215 case (Constants::BAM_CIGAR_MATCH_CHAR) :
216 case (Constants::BAM_CIGAR_INS_CHAR) :
217 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
218 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
219 AlignedBases.append(QueryBases.substr(k, op.Length));
222 // for 'S' - soft clip, do not write bases
223 // but increment placeholder 'k'
224 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
228 // for 'D' - write gap character
229 case (Constants::BAM_CIGAR_DEL_CHAR) :
230 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
233 // for 'P' - write padding character
234 case (Constants::BAM_CIGAR_PAD_CHAR) :
235 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
238 // for 'N' - write N's, skip bases in original query sequence
239 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
240 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
243 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
244 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
247 // invalid CIGAR op-code
249 const string message = string("invalid CIGAR operation type: ") + op.Type;
250 SetErrorString("BamAlignment::BuildCharData", message);
261 while ( i < tagDataLength ) {
263 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
264 const char type = tagData[i]; // get tag type at position i
265 ++i; // move i past tag type
269 case(Constants::BAM_TAG_TYPE_ASCII) :
270 case(Constants::BAM_TAG_TYPE_INT8) :
271 case(Constants::BAM_TAG_TYPE_UINT8) :
272 // no endian swapping necessary for single-byte data
276 case(Constants::BAM_TAG_TYPE_INT16) :
277 case(Constants::BAM_TAG_TYPE_UINT16) :
278 BamTools::SwapEndian_16p(&tagData[i]);
279 i += sizeof(uint16_t);
282 case(Constants::BAM_TAG_TYPE_FLOAT) :
283 case(Constants::BAM_TAG_TYPE_INT32) :
284 case(Constants::BAM_TAG_TYPE_UINT32) :
285 BamTools::SwapEndian_32p(&tagData[i]);
286 i += sizeof(uint32_t);
289 case(Constants::BAM_TAG_TYPE_HEX) :
290 case(Constants::BAM_TAG_TYPE_STRING) :
291 // no endian swapping necessary for hex-string/string data
294 // increment one more for null terminator
298 case(Constants::BAM_TAG_TYPE_ARRAY) :
302 const char arrayType = tagData[i];
305 // swap endian-ness of number of elements in place, then retrieve for loop
306 BamTools::SwapEndian_32p(&tagData[i]);
307 uint32_t numElements;
308 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
309 i += sizeof(uint32_t);
311 // swap endian-ness of array elements
312 for ( size_t j = 0; j < numElements; ++j ) {
314 case (Constants::BAM_TAG_TYPE_INT8) :
315 case (Constants::BAM_TAG_TYPE_UINT8) :
316 // no endian-swapping necessary
319 case (Constants::BAM_TAG_TYPE_INT16) :
320 case (Constants::BAM_TAG_TYPE_UINT16) :
321 BamTools::SwapEndian_16p(&tagData[i]);
322 i += sizeof(uint16_t);
324 case (Constants::BAM_TAG_TYPE_FLOAT) :
325 case (Constants::BAM_TAG_TYPE_INT32) :
326 case (Constants::BAM_TAG_TYPE_UINT32) :
327 BamTools::SwapEndian_32p(&tagData[i]);
328 i += sizeof(uint32_t);
331 const string message = string("invalid binary array type: ") + arrayType;
332 SetErrorString("BamAlignment::BuildCharData", message);
340 // invalid tag type-code
342 const string message = string("invalid tag type: ") + type;
343 SetErrorString("BamAlignment::BuildCharData", message);
349 // store tagData in alignment
350 TagData.resize(tagDataLength);
351 memcpy((char*)(TagData.data()), tagData, tagDataLength);
354 // clear the core-only flag
355 SupportData.HasCoreOnly = false;
361 ///*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
362 // \brief Edits a BAM tag field containing string data.
364 // If \a tag does not exist, a new entry is created.
366 // \param tag 2-character tag name
367 // \param type 1-character tag type (must be "Z" or "H")
368 // \param value string data to store
370 // \return \c true if the tag was modified/created successfully
372 // \sa BamAlignment::RemoveTag()
373 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
376 ///*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
377 // \brief Edits a BAM tag field containing a numeric array.
379 // If \a tag does not exist, a new entry is created.
381 // \param tag 2-character tag name
382 // \param value vector of uint8_t values to store
384 // \return \c true if the tag was modified/created successfully
385 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
388 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
391 Searches for requested tag in BAM tag data.
393 \param tag requested 2-character tag name
394 \param pTagData pointer to current position in BamAlignment::TagData
395 \param tagDataLength length of BamAlignment::TagData
396 \param numBytesParsed number of bytes parsed so far
398 \return \c true if found
400 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
401 \a numBytesParsed will correspond to the position in the full TagData string.
404 bool BamAlignment::FindTag(const std::string& tag,
406 const unsigned int& tagDataLength,
407 unsigned int& numBytesParsed) const
410 while ( numBytesParsed < tagDataLength ) {
412 const char* pTagType = pTagData;
413 const char* pTagStorageType = pTagData + 2;
417 // check the current tag, return true on match
418 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
421 // get the storage class and find the next tag
422 if ( *pTagStorageType == '\0' ) return false;
423 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
424 if ( *pTagData == '\0' ) return false;
427 // checked all tags, none match
431 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
432 \brief Retrieves value of edit distance tag ("NM").
434 \deprecated Instead use BamAlignment::GetTag()
436 BamAlignment::GetTag("NM", editDistance);
439 \param editDistance destination for retrieved value
441 \return \c true if found
444 // TODO : REMOVE THIS METHOD
445 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
446 return GetTag("NM", (uint32_t&)editDistance);
449 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
450 \brief Calculates alignment end position, based on starting position and CIGAR data.
452 \param usePadded Inserted bases affect reported position. Default is false, so that reported
453 position stays 'sync-ed' with reference coordinates.
454 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
455 when using BAM data with half-open formats (e.g. BED).
457 \return alignment end position
459 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
461 // TODO: Come back to this for coordinate issues !!!
463 // initialize alignment end to starting position
464 int alignEnd = Position;
466 // iterate over cigar operations
467 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
468 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
469 for ( ; cigarIter != cigarEnd; ++cigarIter) {
470 const char cigarType = (*cigarIter).Type;
471 const uint32_t& cigarLength = (*cigarIter).Length;
473 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
474 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
475 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
476 alignEnd += cigarLength;
477 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
478 alignEnd += cigarLength;
481 // adjust for zero-based coordinates, if requested
482 if ( zeroBased ) alignEnd -= 1;
488 /*! \fn std::string BamAlignment::GetErrorString(void) const
489 \brief Returns a description of the last error that occurred
491 This method allows elimnation of STDERR pollution. Developers of client code
492 may choose how the messages are displayed to the user, if at all.
494 \return description of last error that occurred
496 std::string BamAlignment::GetErrorString(void) const {
500 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
501 \brief Retrieves value of read group tag ("RG").
503 \deprecated Instead use BamAlignment::GetTag()
505 BamAlignment::GetTag("RG", readGroup);
508 \param readGroup destination for retrieved value
510 \return \c true if found
513 // TODO : REMOVE THIS METHOD
514 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
515 return GetTag("RG", readGroup);
518 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
519 // \brief Retrieves the string value associated with a BAM tag.
521 // \param tag 2-character tag name
522 // \param destination destination for retrieved value
524 // \return \c true if found
527 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
528 // \brief Retrieves the numeric array data associated with a BAM tag
530 // \param tag 2-character tag name
531 // \param destination destination for retrieved data
533 // \return \c true if found
536 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
537 \brief Retrieves the BAM tag type-code associated with requested tag name.
539 \param tag 2-character tag name
540 \param type destination for the retrieved (1-character) tag type
542 \return \c true if found
543 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
545 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
547 // skip if alignment is core-only
548 if ( SupportData.HasCoreOnly ) {
549 // TODO: set error string?
553 // skip if no tags present
554 if ( TagData.empty() ) {
555 // TODO: set error string?
559 // localize the tag data
560 char* pTagData = (char*)TagData.data();
561 const unsigned int tagDataLength = TagData.size();
562 unsigned int numBytesParsed = 0;
564 // if tag not found, return failure
565 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
566 // TODO: set error string?
570 // otherwise, retrieve & validate tag type code
571 type = *(pTagData - 1);
573 case (Constants::BAM_TAG_TYPE_ASCII) :
574 case (Constants::BAM_TAG_TYPE_INT8) :
575 case (Constants::BAM_TAG_TYPE_UINT8) :
576 case (Constants::BAM_TAG_TYPE_INT16) :
577 case (Constants::BAM_TAG_TYPE_UINT16) :
578 case (Constants::BAM_TAG_TYPE_INT32) :
579 case (Constants::BAM_TAG_TYPE_UINT32) :
580 case (Constants::BAM_TAG_TYPE_FLOAT) :
581 case (Constants::BAM_TAG_TYPE_STRING) :
582 case (Constants::BAM_TAG_TYPE_HEX) :
583 case (Constants::BAM_TAG_TYPE_ARRAY) :
588 const string message = string("invalid tag type: ") + type;
589 SetErrorString("BamAlignment::GetTagType", message);
594 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
595 \brief Returns true if alignment has a record for requested tag.
596 \param tag 2-character tag name
597 \return \c true if alignment has a record for tag
599 bool BamAlignment::HasTag(const std::string& tag) const {
601 // return false if no tag data present
602 if ( SupportData.HasCoreOnly || TagData.empty() )
605 // localize the tag data for lookup
606 char* pTagData = (char*)TagData.data();
607 const unsigned int tagDataLength = TagData.size();
608 unsigned int numBytesParsed = 0;
610 // if result of tag lookup
611 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
614 /*! \fn bool BamAlignment::IsDuplicate(void) const
615 \return \c true if this read is a PCR duplicate
617 bool BamAlignment::IsDuplicate(void) const {
618 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
621 /*! \fn bool BamAlignment::IsFailedQC(void) const
622 \return \c true if this read failed quality control
624 bool BamAlignment::IsFailedQC(void) const {
625 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
628 /*! \fn bool BamAlignment::IsFirstMate(void) const
629 \return \c true if alignment is first mate on paired-end read
631 bool BamAlignment::IsFirstMate(void) const {
632 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
635 /*! \fn bool BamAlignment::IsMapped(void) const
636 \return \c true if alignment is mapped
638 bool BamAlignment::IsMapped(void) const {
639 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
642 /*! \fn bool BamAlignment::IsMateMapped(void) const
643 \return \c true if alignment's mate is mapped
645 bool BamAlignment::IsMateMapped(void) const {
646 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
649 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
650 \return \c true if alignment's mate mapped to reverse strand
652 bool BamAlignment::IsMateReverseStrand(void) const {
653 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
656 /*! \fn bool BamAlignment::IsPaired(void) const
657 \return \c true if alignment part of paired-end read
659 bool BamAlignment::IsPaired(void) const {
660 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
663 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
664 \return \c true if reported position is primary alignment
666 bool BamAlignment::IsPrimaryAlignment(void) const {
667 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
670 /*! \fn bool BamAlignment::IsProperPair(void) const
671 \return \c true if alignment is part of read that satisfied paired-end resolution
673 bool BamAlignment::IsProperPair(void) const {
674 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
677 /*! \fn bool BamAlignment::IsReverseStrand(void) const
678 \return \c true if alignment mapped to reverse strand
680 bool BamAlignment::IsReverseStrand(void) const {
681 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
684 /*! \fn bool BamAlignment::IsSecondMate(void) const
685 \return \c true if alignment is second mate on read
687 bool BamAlignment::IsSecondMate(void) const {
688 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
691 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
694 Checks that tag name & type strings are expected sizes.
695 \a tag should have length
696 \a type should have length 1
698 \param tag BAM tag name
699 \param type BAM tag type-code
701 \return \c true if both \a tag and \a type are correct sizes
703 bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
704 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
705 (type.size() == Constants::BAM_TAG_TYPESIZE);
708 /*! \fn void BamAlignment::RemoveTag(const std::string& tag)
709 \brief Removes field from BAM tags.
711 void BamAlignment::RemoveTag(const std::string& tag) {
713 // if char data not populated, do that first
714 if ( SupportData.HasCoreOnly )
717 // skip if no tags available
718 if ( TagData.empty() )
721 // localize the tag data
722 char* pOriginalTagData = (char*)TagData.data();
723 char* pTagData = pOriginalTagData;
724 const unsigned int originalTagDataLength = TagData.size();
725 unsigned int newTagDataLength = 0;
726 unsigned int numBytesParsed = 0;
728 // skip if tag not found
729 if ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
732 // otherwise, remove it
733 RaiiBuffer newTagData(originalTagDataLength);
735 // copy original tag data up til desired tag
738 const unsigned int beginningTagDataLength = numBytesParsed;
739 newTagDataLength += beginningTagDataLength;
740 memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
742 // attemp to skip to next tag
743 const char* pTagStorageType = pTagData + 2;
746 if ( SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) {
748 // squeeze remaining tag data
749 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
750 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
751 memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
753 // save modified tag data in alignment
754 TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
758 /*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
761 Sets a formatted error string for this alignment.
763 void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
764 static const string SEPARATOR = ": ";
765 ErrorString = where + SEPARATOR + what;
768 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
769 \brief Sets value of "PCR duplicate" flag to \a ok.
771 void BamAlignment::SetIsDuplicate(bool ok) {
772 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
773 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
776 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
777 \brief Sets "failed quality control" flag to \a ok.
779 void BamAlignment::SetIsFailedQC(bool ok) {
780 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
781 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
784 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
785 \brief Sets "alignment is first mate" flag to \a ok.
787 void BamAlignment::SetIsFirstMate(bool ok) {
788 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
789 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
792 /*! \fn void BamAlignment::SetIsMapped(bool ok)
793 \brief Sets "alignment is mapped" flag to \a ok.
795 void BamAlignment::SetIsMapped(bool ok) {
796 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
797 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
800 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
801 \brief Sets "alignment's mate is mapped" flag to \a ok.
803 void BamAlignment::SetIsMateMapped(bool ok) {
804 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
805 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
808 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
809 \brief Complement of using SetIsMateMapped().
810 \deprecated For sake of symmetry with the query methods
811 \sa IsMateMapped(), SetIsMateMapped()
813 void BamAlignment::SetIsMateUnmapped(bool ok) {
814 SetIsMateMapped(!ok);
817 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
818 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
820 void BamAlignment::SetIsMateReverseStrand(bool ok) {
821 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
822 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
825 /*! \fn void BamAlignment::SetIsPaired(bool ok)
826 \brief Sets "alignment part of paired-end read" flag to \a ok.
828 void BamAlignment::SetIsPaired(bool ok) {
829 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
830 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
833 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
834 \brief Sets "position is primary alignment" flag to \a ok.
836 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
837 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
838 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
841 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
842 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
844 void BamAlignment::SetIsProperPair(bool ok) {
845 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
846 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
849 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
850 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
852 void BamAlignment::SetIsReverseStrand(bool ok) {
853 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
854 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
857 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
858 \brief Complement of using SetIsPrimaryAlignment().
859 \deprecated For sake of symmetry with the query methods
860 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
862 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
863 SetIsPrimaryAlignment(!ok);
866 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
867 \brief Sets "alignment is second mate on read" flag to \a ok.
869 void BamAlignment::SetIsSecondMate(bool ok) {
870 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
871 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
874 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
875 \brief Complement of using SetIsMapped().
876 \deprecated For sake of symmetry with the query methods
877 \sa IsMapped(), SetIsMapped()
879 void BamAlignment::SetIsUnmapped(bool ok) {
883 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
886 Moves to next available tag in tag data string
888 \param storageType BAM tag type-code that determines how far to move cursor
889 \param pTagData pointer to current position (cursor) in tag string
890 \param numBytesParsed report of how many bytes were parsed (cumulatively)
892 \return \c if storageType was a recognized BAM tag type
893 \post \a pTagData will point to the byte where the next tag data begins.
894 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
896 bool BamAlignment::SkipToNextTag(const char storageType,
898 unsigned int& numBytesParsed) const
900 switch (storageType) {
902 case (Constants::BAM_TAG_TYPE_ASCII) :
903 case (Constants::BAM_TAG_TYPE_INT8) :
904 case (Constants::BAM_TAG_TYPE_UINT8) :
909 case (Constants::BAM_TAG_TYPE_INT16) :
910 case (Constants::BAM_TAG_TYPE_UINT16) :
911 numBytesParsed += sizeof(uint16_t);
912 pTagData += sizeof(uint16_t);
915 case (Constants::BAM_TAG_TYPE_FLOAT) :
916 case (Constants::BAM_TAG_TYPE_INT32) :
917 case (Constants::BAM_TAG_TYPE_UINT32) :
918 numBytesParsed += sizeof(uint32_t);
919 pTagData += sizeof(uint32_t);
922 case (Constants::BAM_TAG_TYPE_STRING) :
923 case (Constants::BAM_TAG_TYPE_HEX) :
928 // increment for null-terminator
933 case (Constants::BAM_TAG_TYPE_ARRAY) :
937 const char arrayType = *pTagData;
941 // read number of elements
943 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
944 numBytesParsed += sizeof(uint32_t);
945 pTagData += sizeof(uint32_t);
947 // calculate number of bytes to skip
950 case (Constants::BAM_TAG_TYPE_INT8) :
951 case (Constants::BAM_TAG_TYPE_UINT8) :
952 bytesToSkip = numElements;
954 case (Constants::BAM_TAG_TYPE_INT16) :
955 case (Constants::BAM_TAG_TYPE_UINT16) :
956 bytesToSkip = numElements*sizeof(uint16_t);
958 case (Constants::BAM_TAG_TYPE_FLOAT) :
959 case (Constants::BAM_TAG_TYPE_INT32) :
960 case (Constants::BAM_TAG_TYPE_UINT32) :
961 bytesToSkip = numElements*sizeof(uint32_t);
964 const string message = string("invalid binary array type: ") + arrayType;
965 SetErrorString("BamAlignment::SkipToNextTag", message);
969 // skip binary array contents
970 numBytesParsed += bytesToSkip;
971 pTagData += bytesToSkip;
976 const string message = string("invalid tag type: ") + storageType;
977 SetErrorString("BamAlignment::SkipToNextTag", message);
981 // if we get here, tag skipped OK - return success