1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #include <api/BamAlignment.h>
11 #include <api/BamConstants.h>
12 using namespace BamTools;
15 /*! \class BamTools::BamAlignment
16 \brief The main BAM alignment data structure.
18 Provides methods to query/modify BAM alignment data fields.
20 /*! \var BamAlignment::Name
23 /*! \var BamAlignment::Length
24 \brief length of query sequence
26 /*! \var BamAlignment::QueryBases
27 \brief 'original' sequence (as reported from sequencing machine)
29 /*! \var BamAlignment::AlignedBases
30 \brief 'aligned' sequence (includes any indels, padding, clipping)
32 /*! \var BamAlignment::Qualities
33 \brief FASTQ qualities (ASCII characters, not numeric values)
35 /*! \var BamAlignment::TagData
36 \brief tag data (use the provided methods to query/modify)
38 /*! \var BamAlignment::RefID
39 \brief ID number for reference sequence
41 /*! \var BamAlignment::Position
42 \brief position (0-based) where alignment starts
44 /*! \var BamAlignment::Bin
45 \brief BAM (standard) index bin number for this alignment
47 /*! \var BamAlignment::MapQuality
48 \brief mapping quality score
50 /*! \var BamAlignment::AlignmentFlag
51 \brief alignment bit-flag (use the provided methods to query/modify)
53 /*! \var BamAlignment::CigarData
54 \brief CIGAR operations for this alignment
56 /*! \var BamAlignment::MateRefID
57 \brief ID number for reference sequence where alignment's mate was aligned
59 /*! \var BamAlignment::MatePosition
60 \brief position (0-based) where alignment's mate starts
62 /*! \var BamAlignment::InsertSize
63 \brief mate-pair insert size
65 /*! \var BamAlignment::Filename
66 \brief name of BAM file which this alignment comes from
69 /*! \fn BamAlignment::BamAlignment(void)
72 BamAlignment::BamAlignment(void)
80 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
81 \brief copy constructor
83 BamAlignment::BamAlignment(const BamAlignment& other)
85 , Length(other.Length)
86 , QueryBases(other.QueryBases)
87 , AlignedBases(other.AlignedBases)
88 , Qualities(other.Qualities)
89 , TagData(other.TagData)
91 , Position(other.Position)
93 , MapQuality(other.MapQuality)
94 , AlignmentFlag(other.AlignmentFlag)
95 , CigarData(other.CigarData)
96 , MateRefID(other.MateRefID)
97 , MatePosition(other.MatePosition)
98 , InsertSize(other.InsertSize)
99 , Filename(other.Filename)
100 , SupportData(other.SupportData)
103 /*! \fn BamAlignment::~BamAlignment(void)
106 BamAlignment::~BamAlignment(void) { }
108 /*! \fn bool BamAlignment::BuildCharData(void)
109 \brief Populates alignment string fields (read name, bases, qualities, tag data).
111 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
112 Using that method makes parsing much quicker when only positional data is required.
114 However, if you later want to access the character data fields from such an alignment,
115 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
118 \return \c true if character data populated successfully (or was already available to begin with)
120 bool BamAlignment::BuildCharData(void) {
122 // skip if char data already parsed
123 if ( !SupportData.HasCoreOnly )
126 // check system endianness
127 bool IsBigEndian = BamTools::SystemIsBigEndian();
129 // calculate character lengths/offsets
130 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
131 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
132 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
133 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
134 const unsigned int tagDataLength = dataLength - tagDataOffset;
136 // check offsets to see what char data exists
137 const bool hasSeqData = ( seqDataOffset < dataLength );
138 const bool hasQualData = ( qualDataOffset < dataLength );
139 const bool hasTagData = ( tagDataOffset < dataLength );
141 // set up char buffers
142 const char* allCharData = SupportData.AllCharData.data();
143 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
144 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
145 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
147 // store alignment name (relies on null char in name as terminator)
148 Name.assign((const char*)(allCharData));
150 // save query sequence
153 QueryBases.reserve(SupportData.QuerySequenceLength);
154 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
155 const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
156 QueryBases.append(1, singleBase);
160 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
163 Qualities.reserve(SupportData.QuerySequenceLength);
164 for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
165 const char singleQuality = static_cast<const char>(qualData[i]+33);
166 Qualities.append(1, singleQuality);
170 // clear previous AlignedBases
171 AlignedBases.clear();
173 // if QueryBases has data, build AlignedBases using CIGAR data
174 // otherwise, AlignedBases will remain empty (this case IS allowed)
175 if ( !QueryBases.empty() ) {
177 // resize AlignedBases
178 AlignedBases.reserve(SupportData.QuerySequenceLength);
180 // iterate over CigarOps
182 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
183 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
184 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
185 const CigarOp& op = (*cigarIter);
189 // for 'M', 'I', '=', 'X' - write bases
190 case (Constants::BAM_CIGAR_MATCH_CHAR) :
191 case (Constants::BAM_CIGAR_INS_CHAR) :
192 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
193 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
194 AlignedBases.append(QueryBases.substr(k, op.Length));
197 // for 'S' - soft clip, do not write bases
198 // but increment placeholder 'k'
199 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
203 // for 'D' - write gap character
204 case (Constants::BAM_CIGAR_DEL_CHAR) :
205 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
208 // for 'P' - write padding character
209 case (Constants::BAM_CIGAR_PAD_CHAR) :
210 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
213 // for 'N' - write N's, skip bases in original query sequence
214 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
215 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
218 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
219 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
222 // invalid CIGAR op-code
224 const string message = string("invalid CIGAR operation type: ") + op.Type;
225 SetErrorString("BamAlignment::BuildCharData", message);
236 while ( i < tagDataLength ) {
238 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
239 const char type = tagData[i]; // get tag type at position i
240 ++i; // move i past tag type
244 case(Constants::BAM_TAG_TYPE_ASCII) :
245 case(Constants::BAM_TAG_TYPE_INT8) :
246 case(Constants::BAM_TAG_TYPE_UINT8) :
247 // no endian swapping necessary for single-byte data
251 case(Constants::BAM_TAG_TYPE_INT16) :
252 case(Constants::BAM_TAG_TYPE_UINT16) :
253 BamTools::SwapEndian_16p(&tagData[i]);
254 i += sizeof(uint16_t);
257 case(Constants::BAM_TAG_TYPE_FLOAT) :
258 case(Constants::BAM_TAG_TYPE_INT32) :
259 case(Constants::BAM_TAG_TYPE_UINT32) :
260 BamTools::SwapEndian_32p(&tagData[i]);
261 i += sizeof(uint32_t);
264 case(Constants::BAM_TAG_TYPE_HEX) :
265 case(Constants::BAM_TAG_TYPE_STRING) :
266 // no endian swapping necessary for hex-string/string data
269 // increment one more for null terminator
273 case(Constants::BAM_TAG_TYPE_ARRAY) :
277 const char arrayType = tagData[i];
280 // swap endian-ness of number of elements in place, then retrieve for loop
281 BamTools::SwapEndian_32p(&tagData[i]);
282 uint32_t numElements;
283 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
284 i += sizeof(uint32_t);
286 // swap endian-ness of array elements
287 for ( size_t j = 0; j < numElements; ++j ) {
289 case (Constants::BAM_TAG_TYPE_INT8) :
290 case (Constants::BAM_TAG_TYPE_UINT8) :
291 // no endian-swapping necessary
294 case (Constants::BAM_TAG_TYPE_INT16) :
295 case (Constants::BAM_TAG_TYPE_UINT16) :
296 BamTools::SwapEndian_16p(&tagData[i]);
297 i += sizeof(uint16_t);
299 case (Constants::BAM_TAG_TYPE_FLOAT) :
300 case (Constants::BAM_TAG_TYPE_INT32) :
301 case (Constants::BAM_TAG_TYPE_UINT32) :
302 BamTools::SwapEndian_32p(&tagData[i]);
303 i += sizeof(uint32_t);
306 const string message = string("invalid binary array type: ") + arrayType;
307 SetErrorString("BamAlignment::BuildCharData", message);
315 // invalid tag type-code
317 const string message = string("invalid tag type: ") + type;
318 SetErrorString("BamAlignment::BuildCharData", message);
324 // store tagData in alignment
325 TagData.resize(tagDataLength);
326 memcpy((char*)(TagData.data()), tagData, tagDataLength);
329 // clear core-only flag & return success
330 SupportData.HasCoreOnly = false;
334 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) const
337 Searches for requested tag in BAM tag data.
339 \param[in] tag requested 2-character tag name
340 \param[in,out] pTagData pointer to current position in BamAlignment::TagData
341 \param[in] tagDataLength length of BamAlignment::TagData
342 \param[in,out] numBytesParsed number of bytes parsed so far
344 \return \c true if found
346 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
347 \a numBytesParsed will correspond to the position in the full TagData string.
350 bool BamAlignment::FindTag(const std::string& tag,
352 const unsigned int& tagDataLength,
353 unsigned int& numBytesParsed) const
356 while ( numBytesParsed < tagDataLength ) {
358 const char* pTagType = pTagData;
359 const char* pTagStorageType = pTagData + 2;
363 // check the current tag, return true on match
364 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
367 // get the storage class and find the next tag
368 if ( *pTagStorageType == '\0' ) return false;
369 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
370 if ( *pTagData == '\0' ) return false;
373 // checked all tags, none match
377 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool closedInterval = false) const
378 \brief Calculates alignment end position, based on its starting position and CIGAR data.
380 \warning The position returned now represents a zero-based, HALF-OPEN interval.
381 In previous versions of BamTools (0.x & 1.x) all intervals were treated
382 as zero-based, CLOSED.
384 \param[in] usePadded Allow inserted bases to affect the reported position. Default is
385 false, so that reported position stays synced with reference
387 \param[in] closedInterval Setting this to true will return a 0-based end coordinate. Default is
388 false, so that his value represents a standard, half-open interval.
390 \return alignment end position
392 int BamAlignment::GetEndPosition(bool usePadded, bool closedInterval) const {
394 // initialize alignment end to starting position
395 int alignEnd = Position;
397 // iterate over cigar operations
398 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
399 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
400 for ( ; cigarIter != cigarEnd; ++cigarIter) {
401 const CigarOp& op = (*cigarIter);
405 // increase end position on CIGAR chars [DMXN=]
406 case Constants::BAM_CIGAR_DEL_CHAR :
407 case Constants::BAM_CIGAR_MATCH_CHAR :
408 case Constants::BAM_CIGAR_MISMATCH_CHAR :
409 case Constants::BAM_CIGAR_REFSKIP_CHAR :
410 case Constants::BAM_CIGAR_SEQMATCH_CHAR :
411 alignEnd += op.Length;
414 // increase end position on insertion, only if @usePadded is true
415 case Constants::BAM_CIGAR_INS_CHAR :
417 alignEnd += op.Length;
420 // all other CIGAR chars do not affect end position
426 // adjust for closedInterval, if requested
427 if ( closedInterval )
434 /*! \fn std::string BamAlignment::GetErrorString(void) const
435 \brief Returns a human-readable description of the last error that occurred
437 This method allows elimination of STDERR pollution. Developers of client code
438 may choose how the messages are displayed to the user, if at all.
440 \return error description
442 std::string BamAlignment::GetErrorString(void) const {
446 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
447 \brief Retrieves the BAM tag type-code associated with requested tag name.
449 \param[in] tag 2-character tag name
450 \param[out] type retrieved (1-character) type-code
452 \return \c true if found
453 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
455 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
457 // skip if alignment is core-only
458 if ( SupportData.HasCoreOnly ) {
459 // TODO: set error string?
463 // skip if no tags present
464 if ( TagData.empty() ) {
465 // TODO: set error string?
469 // localize the tag data
470 char* pTagData = (char*)TagData.data();
471 const unsigned int tagDataLength = TagData.size();
472 unsigned int numBytesParsed = 0;
474 // if tag not found, return failure
475 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
476 // TODO: set error string?
480 // otherwise, retrieve & validate tag type code
481 type = *(pTagData - 1);
483 case (Constants::BAM_TAG_TYPE_ASCII) :
484 case (Constants::BAM_TAG_TYPE_INT8) :
485 case (Constants::BAM_TAG_TYPE_UINT8) :
486 case (Constants::BAM_TAG_TYPE_INT16) :
487 case (Constants::BAM_TAG_TYPE_UINT16) :
488 case (Constants::BAM_TAG_TYPE_INT32) :
489 case (Constants::BAM_TAG_TYPE_UINT32) :
490 case (Constants::BAM_TAG_TYPE_FLOAT) :
491 case (Constants::BAM_TAG_TYPE_STRING) :
492 case (Constants::BAM_TAG_TYPE_HEX) :
493 case (Constants::BAM_TAG_TYPE_ARRAY) :
498 const string message = string("invalid tag type: ") + type;
499 SetErrorString("BamAlignment::GetTagType", message);
504 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
505 \brief Returns true if alignment has a record for requested tag.
507 \param[in] tag 2-character tag name
508 \return \c true if alignment has a record for tag
510 bool BamAlignment::HasTag(const std::string& tag) const {
512 // return false if no tag data present
513 if ( SupportData.HasCoreOnly || TagData.empty() )
516 // localize the tag data for lookup
517 char* pTagData = (char*)TagData.data();
518 const unsigned int tagDataLength = TagData.size();
519 unsigned int numBytesParsed = 0;
521 // if result of tag lookup
522 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
525 /*! \fn bool BamAlignment::IsDuplicate(void) const
526 \return \c true if this read is a PCR duplicate
528 bool BamAlignment::IsDuplicate(void) const {
529 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
532 /*! \fn bool BamAlignment::IsFailedQC(void) const
533 \return \c true if this read failed quality control
535 bool BamAlignment::IsFailedQC(void) const {
536 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
539 /*! \fn bool BamAlignment::IsFirstMate(void) const
540 \return \c true if alignment is first mate on paired-end read
542 bool BamAlignment::IsFirstMate(void) const {
543 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
546 /*! \fn bool BamAlignment::IsMapped(void) const
547 \return \c true if alignment is mapped
549 bool BamAlignment::IsMapped(void) const {
550 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
553 /*! \fn bool BamAlignment::IsMateMapped(void) const
554 \return \c true if alignment's mate is mapped
556 bool BamAlignment::IsMateMapped(void) const {
557 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
560 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
561 \return \c true if alignment's mate mapped to reverse strand
563 bool BamAlignment::IsMateReverseStrand(void) const {
564 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
567 /*! \fn bool BamAlignment::IsPaired(void) const
568 \return \c true if alignment part of paired-end read
570 bool BamAlignment::IsPaired(void) const {
571 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
574 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
575 \return \c true if reported position is primary alignment
577 bool BamAlignment::IsPrimaryAlignment(void) const {
578 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
581 /*! \fn bool BamAlignment::IsProperPair(void) const
582 \return \c true if alignment is part of read that satisfied paired-end resolution
584 bool BamAlignment::IsProperPair(void) const {
585 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
588 /*! \fn bool BamAlignment::IsReverseStrand(void) const
589 \return \c true if alignment mapped to reverse strand
591 bool BamAlignment::IsReverseStrand(void) const {
592 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
595 /*! \fn bool BamAlignment::IsSecondMate(void) const
596 \return \c true if alignment is second mate on read
598 bool BamAlignment::IsSecondMate(void) const {
599 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
602 /*! \fn bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const
605 Checks that tag name & type strings are expected sizes.
607 \param tag[in] BAM tag name
608 \param type[in] BAM tag type-code
609 \return \c true if both input strings are valid sizes
611 bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
612 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
613 (type.size() == Constants::BAM_TAG_TYPESIZE);
616 /*! \fn void BamAlignment::RemoveTag(const std::string& tag)
617 \brief Removes field from BAM tags.
619 \param[in] tag 2-character name of field to remove
621 void BamAlignment::RemoveTag(const std::string& tag) {
623 // if char data not populated, do that first
624 if ( SupportData.HasCoreOnly )
627 // skip if no tags available
628 if ( TagData.empty() )
631 // localize the tag data
632 char* pOriginalTagData = (char*)TagData.data();
633 char* pTagData = pOriginalTagData;
634 const unsigned int originalTagDataLength = TagData.size();
635 unsigned int newTagDataLength = 0;
636 unsigned int numBytesParsed = 0;
638 // skip if tag not found
639 if ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
642 // otherwise, remove it
643 RaiiBuffer newTagData(originalTagDataLength);
645 // copy original tag data up til desired tag
648 const unsigned int beginningTagDataLength = numBytesParsed;
649 newTagDataLength += beginningTagDataLength;
650 memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
652 // attemp to skip to next tag
653 const char* pTagStorageType = pTagData + 2;
656 if ( SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) {
658 // squeeze remaining tag data
659 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
660 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
661 memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
663 // save modified tag data in alignment
664 TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
668 /*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
671 Sets a formatted error string for this alignment.
673 \param[in] where class/method where error occurred
674 \param[in] what description of error
676 void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
677 static const string SEPARATOR = ": ";
678 ErrorString = where + SEPARATOR + what;
681 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
682 \brief Sets value of "PCR duplicate" flag to \a ok.
684 void BamAlignment::SetIsDuplicate(bool ok) {
685 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
686 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
689 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
690 \brief Sets "failed quality control" flag to \a ok.
692 void BamAlignment::SetIsFailedQC(bool ok) {
693 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
694 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
697 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
698 \brief Sets "alignment is first mate" flag to \a ok.
700 void BamAlignment::SetIsFirstMate(bool ok) {
701 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
702 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
705 /*! \fn void BamAlignment::SetIsMapped(bool ok)
706 \brief Sets "alignment is mapped" flag to \a ok.
708 void BamAlignment::SetIsMapped(bool ok) {
709 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
710 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
713 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
714 \brief Sets "alignment's mate is mapped" flag to \a ok.
716 void BamAlignment::SetIsMateMapped(bool ok) {
717 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
718 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
721 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
722 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
724 void BamAlignment::SetIsMateReverseStrand(bool ok) {
725 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
726 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
729 /*! \fn void BamAlignment::SetIsPaired(bool ok)
730 \brief Sets "alignment part of paired-end read" flag to \a ok.
732 void BamAlignment::SetIsPaired(bool ok) {
733 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
734 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
737 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
738 \brief Sets "position is primary alignment" flag to \a ok.
740 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
741 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
742 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
745 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
746 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
748 void BamAlignment::SetIsProperPair(bool ok) {
749 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
750 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
753 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
754 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
756 void BamAlignment::SetIsReverseStrand(bool ok) {
757 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
758 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
761 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
762 \brief Sets "alignment is second mate on read" flag to \a ok.
764 void BamAlignment::SetIsSecondMate(bool ok) {
765 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
766 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
769 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed) const
772 Moves to next available tag in tag data string
774 \param[in] storageType BAM tag type-code that determines how far to move cursor
775 \param[in,out] pTagData pointer to current position (cursor) in tag string
776 \param[in,out] numBytesParsed report of how many bytes were parsed (cumulatively)
778 \return \c if storageType was a recognized BAM tag type
780 \post \a pTagData will point to the byte where the next tag data begins.
781 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
783 bool BamAlignment::SkipToNextTag(const char storageType,
785 unsigned int& numBytesParsed) const
787 switch (storageType) {
789 case (Constants::BAM_TAG_TYPE_ASCII) :
790 case (Constants::BAM_TAG_TYPE_INT8) :
791 case (Constants::BAM_TAG_TYPE_UINT8) :
796 case (Constants::BAM_TAG_TYPE_INT16) :
797 case (Constants::BAM_TAG_TYPE_UINT16) :
798 numBytesParsed += sizeof(uint16_t);
799 pTagData += sizeof(uint16_t);
802 case (Constants::BAM_TAG_TYPE_FLOAT) :
803 case (Constants::BAM_TAG_TYPE_INT32) :
804 case (Constants::BAM_TAG_TYPE_UINT32) :
805 numBytesParsed += sizeof(uint32_t);
806 pTagData += sizeof(uint32_t);
809 case (Constants::BAM_TAG_TYPE_STRING) :
810 case (Constants::BAM_TAG_TYPE_HEX) :
815 // increment for null-terminator
820 case (Constants::BAM_TAG_TYPE_ARRAY) :
824 const char arrayType = *pTagData;
828 // read number of elements
830 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
831 numBytesParsed += sizeof(uint32_t);
832 pTagData += sizeof(uint32_t);
834 // calculate number of bytes to skip
837 case (Constants::BAM_TAG_TYPE_INT8) :
838 case (Constants::BAM_TAG_TYPE_UINT8) :
839 bytesToSkip = numElements;
841 case (Constants::BAM_TAG_TYPE_INT16) :
842 case (Constants::BAM_TAG_TYPE_UINT16) :
843 bytesToSkip = numElements*sizeof(uint16_t);
845 case (Constants::BAM_TAG_TYPE_FLOAT) :
846 case (Constants::BAM_TAG_TYPE_INT32) :
847 case (Constants::BAM_TAG_TYPE_UINT32) :
848 bytesToSkip = numElements*sizeof(uint32_t);
851 const string message = string("invalid binary array type: ") + arrayType;
852 SetErrorString("BamAlignment::SkipToNextTag", message);
856 // skip binary array contents
857 numBytesParsed += bytesToSkip;
858 pTagData += bytesToSkip;
863 const string message = string("invalid tag type: ") + storageType;
864 SetErrorString("BamAlignment::SkipToNextTag", message);
868 // if we get here, tag skipped OK - return success