1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 4 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #include <api/BamAlignment.h>
11 #include <api/BamConstants.h>
12 using namespace BamTools;
24 /*! \class BamTools::BamAlignment
25 \brief The main BAM alignment data structure.
27 Provides methods to query/modify BAM alignment data fields.
29 /*! \var BamAlignment::Name
32 /*! \var BamAlignment::Length
33 \brief length of query sequence
35 /*! \var BamAlignment::QueryBases
36 \brief 'original' sequence (as reported from sequencing machine)
38 /*! \var BamAlignment::AlignedBases
39 \brief 'aligned' sequence (includes any indels, padding, clipping)
41 /*! \var BamAlignment::Qualities
42 \brief FASTQ qualities (ASCII characters, not numeric values)
44 /*! \var BamAlignment::TagData
45 \brief tag data (use the provided methods to query/modify)
47 /*! \var BamAlignment::RefID
48 \brief ID number for reference sequence
50 /*! \var BamAlignment::Position
51 \brief position (0-based) where alignment starts
53 /*! \var BamAlignment::Bin
54 \brief BAM (standard) index bin number for this alignment
56 /*! \var BamAlignment::MapQuality
57 \brief mapping quality score
59 /*! \var BamAlignment::AlignmentFlag
60 \brief alignment bit-flag (use the provided methods to query/modify)
62 /*! \var BamAlignment::CigarData
63 \brief CIGAR operations for this alignment
65 /*! \var BamAlignment::MateRefID
66 \brief ID number for reference sequence where alignment's mate was aligned
68 /*! \var BamAlignment::MatePosition
69 \brief position (0-based) where alignment's mate starts
71 /*! \var BamAlignment::InsertSize
72 \brief mate-pair insert size
74 /*! \var BamAlignment::Filename
75 \brief name of BAM file which this alignment comes from
78 /*! \fn BamAlignment::BamAlignment(void)
81 BamAlignment::BamAlignment(void)
89 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
90 \brief copy constructor
92 BamAlignment::BamAlignment(const BamAlignment& other)
94 , Length(other.Length)
95 , QueryBases(other.QueryBases)
96 , AlignedBases(other.AlignedBases)
97 , Qualities(other.Qualities)
98 , TagData(other.TagData)
100 , Position(other.Position)
102 , MapQuality(other.MapQuality)
103 , AlignmentFlag(other.AlignmentFlag)
104 , CigarData(other.CigarData)
105 , MateRefID(other.MateRefID)
106 , MatePosition(other.MatePosition)
107 , InsertSize(other.InsertSize)
108 , Filename(other.Filename)
109 , SupportData(other.SupportData)
112 /*! \fn BamAlignment::~BamAlignment(void)
115 BamAlignment::~BamAlignment(void) { }
117 ///*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
118 // \brief Adds a field with string data to the BAM tags.
120 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
122 // \param[in] tag 2-character tag name
123 // \param[in] type 1-character tag type (must be "Z" or "H")
124 // \param[in] value string data to store
125 // \return \c true if the \b new tag was added successfully
126 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
130 ///*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
131 // \brief Adds a numeric array field to the BAM tags.
133 // Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
135 // \param tag 2-character tag name
136 // \param values vector of uint8_t values to store
138 // \return \c true if the \b new tag was added successfully
139 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
142 /*! \fn bool BamAlignment::BuildCharData(void)
143 \brief Populates alignment string fields (read name, bases, qualities, tag data).
145 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
146 Using that method makes parsing much quicker when only positional data is required.
148 However, if you later want to access the character data fields from such an alignment,
149 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
152 \return \c true if character data populated successfully (or was already available to begin with)
154 bool BamAlignment::BuildCharData(void) {
156 // skip if char data already parsed
157 if ( !SupportData.HasCoreOnly )
160 // check system endianness
161 bool IsBigEndian = BamTools::SystemIsBigEndian();
163 // calculate character lengths/offsets
164 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
165 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
166 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
167 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
168 const unsigned int tagDataLength = dataLength - tagDataOffset;
170 // check offsets to see what char data exists
171 const bool hasSeqData = ( seqDataOffset < dataLength );
172 const bool hasQualData = ( qualDataOffset < dataLength );
173 const bool hasTagData = ( tagDataOffset < dataLength );
175 // set up char buffers
176 const char* allCharData = SupportData.AllCharData.data();
177 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
178 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
179 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
181 // store alignment name (relies on null char in name as terminator)
182 Name.assign((const char*)(allCharData));
184 // save query sequence
187 QueryBases.reserve(SupportData.QuerySequenceLength);
188 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
189 char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
190 QueryBases.append(1, singleBase);
194 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
197 Qualities.reserve(SupportData.QuerySequenceLength);
198 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
199 char singleQuality = (char)(qualData[i]+33);
200 Qualities.append(1, singleQuality);
204 // clear previous AlignedBases
205 AlignedBases.clear();
207 // if QueryBases has data, build AlignedBases using CIGAR data
208 // otherwise, AlignedBases will remain empty (this case IS allowed)
209 if ( !QueryBases.empty() ) {
211 // resize AlignedBases
212 AlignedBases.reserve(SupportData.QuerySequenceLength);
214 // iterate over CigarOps
216 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
217 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
218 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
219 const CigarOp& op = (*cigarIter);
223 // for 'M', 'I', '=', 'X' - write bases
224 case (Constants::BAM_CIGAR_MATCH_CHAR) :
225 case (Constants::BAM_CIGAR_INS_CHAR) :
226 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
227 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
228 AlignedBases.append(QueryBases.substr(k, op.Length));
231 // for 'S' - soft clip, do not write bases
232 // but increment placeholder 'k'
233 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
237 // for 'D' - write gap character
238 case (Constants::BAM_CIGAR_DEL_CHAR) :
239 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
242 // for 'P' - write padding character
243 case (Constants::BAM_CIGAR_PAD_CHAR) :
244 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
247 // for 'N' - write N's, skip bases in original query sequence
248 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
249 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
252 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
253 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
256 // shouldn't get here
258 cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
270 while ( (unsigned int)i < tagDataLength ) {
272 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
273 const char type = tagData[i]; // get tag type at position i
274 ++i; // move i past tag type
278 case(Constants::BAM_TAG_TYPE_ASCII) :
279 case(Constants::BAM_TAG_TYPE_INT8) :
280 case(Constants::BAM_TAG_TYPE_UINT8) :
281 // no endian swapping necessary for single-byte data
285 case(Constants::BAM_TAG_TYPE_INT16) :
286 case(Constants::BAM_TAG_TYPE_UINT16) :
287 BamTools::SwapEndian_16p(&tagData[i]);
288 i += sizeof(uint16_t);
291 case(Constants::BAM_TAG_TYPE_FLOAT) :
292 case(Constants::BAM_TAG_TYPE_INT32) :
293 case(Constants::BAM_TAG_TYPE_UINT32) :
294 BamTools::SwapEndian_32p(&tagData[i]);
295 i += sizeof(uint32_t);
298 case(Constants::BAM_TAG_TYPE_HEX) :
299 case(Constants::BAM_TAG_TYPE_STRING) :
300 // no endian swapping necessary for hex-string/string data
303 // increment one more for null terminator
307 case(Constants::BAM_TAG_TYPE_ARRAY) :
311 const char arrayType = tagData[i];
314 // swap endian-ness of number of elements in place, then retrieve for loop
315 BamTools::SwapEndian_32p(&tagData[i]);
317 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
318 i += sizeof(uint32_t);
320 // swap endian-ness of array elements
321 for ( int j = 0; j < numElements; ++j ) {
323 case (Constants::BAM_TAG_TYPE_INT8) :
324 case (Constants::BAM_TAG_TYPE_UINT8) :
325 // no endian-swapping necessary
328 case (Constants::BAM_TAG_TYPE_INT16) :
329 case (Constants::BAM_TAG_TYPE_UINT16) :
330 BamTools::SwapEndian_16p(&tagData[i]);
331 i += sizeof(uint16_t);
333 case (Constants::BAM_TAG_TYPE_FLOAT) :
334 case (Constants::BAM_TAG_TYPE_INT32) :
335 case (Constants::BAM_TAG_TYPE_UINT32) :
336 BamTools::SwapEndian_32p(&tagData[i]);
337 i += sizeof(uint32_t);
341 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
342 << arrayType << endl;
350 // shouldn't get here
352 cerr << "BamAlignment ERROR: invalid tag value type: "
359 // store tagData in alignment
360 TagData.resize(tagDataLength);
361 memcpy((char*)TagData.data(), tagData, tagDataLength);
364 // clear the core-only flag
365 SupportData.HasCoreOnly = false;
371 ///*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
372 // \brief Edits a BAM tag field containing string data.
374 // If \a tag does not exist, a new entry is created.
376 // \param tag 2-character tag name
377 // \param type 1-character tag type (must be "Z" or "H")
378 // \param value string data to store
380 // \return \c true if the tag was modified/created successfully
382 // \sa BamAlignment::RemoveTag()
383 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
386 ///*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
387 // \brief Edits a BAM tag field containing a numeric array.
389 // If \a tag does not exist, a new entry is created.
391 // \param tag 2-character tag name
392 // \param value vector of uint8_t values to store
394 // \return \c true if the tag was modified/created successfully
395 // \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
398 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
401 Searches for requested tag in BAM tag data.
403 \param tag requested 2-character tag name
404 \param pTagData pointer to current position in BamAlignment::TagData
405 \param tagDataLength length of BamAlignment::TagData
406 \param numBytesParsed number of bytes parsed so far
408 \return \c true if found
410 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
411 \a numBytesParsed will correspond to the position in the full TagData string.
414 bool BamAlignment::FindTag(const std::string& tag,
416 const unsigned int& tagDataLength,
417 unsigned int& numBytesParsed)
420 while ( numBytesParsed < tagDataLength ) {
422 const char* pTagType = pTagData;
423 const char* pTagStorageType = pTagData + 2;
427 // check the current tag, return true on match
428 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
431 // get the storage class and find the next tag
432 if ( *pTagStorageType == '\0' ) return false;
433 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
434 if ( *pTagData == '\0' ) return false;
437 // checked all tags, none match
441 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
442 \brief Retrieves value of edit distance tag ("NM").
444 \deprecated Instead use BamAlignment::GetTag()
446 BamAlignment::GetTag("NM", editDistance);
449 \param editDistance destination for retrieved value
451 \return \c true if found
454 // TODO : REMOVE THIS METHOD
455 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
456 return GetTag("NM", (uint32_t&)editDistance);
459 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
460 \brief Calculates alignment end position, based on starting position and CIGAR data.
462 \param usePadded Inserted bases affect reported position. Default is false, so that reported
463 position stays 'sync-ed' with reference coordinates.
464 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
465 when using BAM data with half-open formats (e.g. BED).
467 \return alignment end position
469 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
471 // initialize alignment end to starting position
472 int alignEnd = Position;
474 // iterate over cigar operations
475 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
476 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
477 for ( ; cigarIter != cigarEnd; ++cigarIter) {
478 const char cigarType = (*cigarIter).Type;
479 const uint32_t& cigarLength = (*cigarIter).Length;
481 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
482 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
483 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
484 alignEnd += cigarLength;
485 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
486 alignEnd += cigarLength;
489 // adjust for zero-based coordinates, if requested
490 if ( zeroBased ) alignEnd -= 1;
496 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
497 \brief Retrieves value of read group tag ("RG").
499 \deprecated Instead use BamAlignment::GetTag()
501 BamAlignment::GetTag("RG", readGroup);
504 \param readGroup destination for retrieved value
506 \return \c true if found
509 // TODO : REMOVE THIS METHOD
510 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
511 return GetTag("RG", readGroup);
514 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
515 // \brief Retrieves the string value associated with a BAM tag.
517 // \param tag 2-character tag name
518 // \param destination destination for retrieved value
520 // \return \c true if found
523 ///*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
524 // \brief Retrieves the numeric array data associated with a BAM tag
526 // \param tag 2-character tag name
527 // \param destination destination for retrieved data
529 // \return \c true if found
532 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
533 \brief Retrieves the BAM tag type-code associated with requested tag name.
535 \param tag 2-character tag name
536 \param type destination for the retrieved (1-character) tag type
538 \return \c true if found
539 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
541 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
543 // make sure tag data exists
544 if ( SupportData.HasCoreOnly || TagData.empty() )
547 // localize the tag data
548 char* pTagData = (char*)TagData.data();
549 const unsigned int tagDataLength = TagData.size();
550 unsigned int numBytesParsed = 0;
552 // if tag not found, return failure
553 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
556 // otherwise, retrieve & validate tag type code
557 type = *(pTagData - 1);
559 case (Constants::BAM_TAG_TYPE_ASCII) :
560 case (Constants::BAM_TAG_TYPE_INT8) :
561 case (Constants::BAM_TAG_TYPE_UINT8) :
562 case (Constants::BAM_TAG_TYPE_INT16) :
563 case (Constants::BAM_TAG_TYPE_UINT16) :
564 case (Constants::BAM_TAG_TYPE_INT32) :
565 case (Constants::BAM_TAG_TYPE_UINT32) :
566 case (Constants::BAM_TAG_TYPE_FLOAT) :
567 case (Constants::BAM_TAG_TYPE_STRING) :
568 case (Constants::BAM_TAG_TYPE_HEX) :
569 case (Constants::BAM_TAG_TYPE_ARRAY) :
574 cerr << "BamAlignment ERROR: unknown tag type encountered: "
580 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
581 \brief Returns true if alignment has a record for requested tag.
582 \param tag 2-character tag name
583 \return \c true if alignment has a record for tag
585 bool BamAlignment::HasTag(const std::string& tag) const {
587 // return false if no tag data present
588 if ( SupportData.HasCoreOnly || TagData.empty() )
591 // localize the tag data for lookup
592 char* pTagData = (char*)TagData.data();
593 const unsigned int tagDataLength = TagData.size();
594 unsigned int numBytesParsed = 0;
596 // if result of tag lookup
597 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
600 /*! \fn bool BamAlignment::IsDuplicate(void) const
601 \return \c true if this read is a PCR duplicate
603 bool BamAlignment::IsDuplicate(void) const {
604 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
607 /*! \fn bool BamAlignment::IsFailedQC(void) const
608 \return \c true if this read failed quality control
610 bool BamAlignment::IsFailedQC(void) const {
611 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
614 /*! \fn bool BamAlignment::IsFirstMate(void) const
615 \return \c true if alignment is first mate on paired-end read
617 bool BamAlignment::IsFirstMate(void) const {
618 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
621 /*! \fn bool BamAlignment::IsMapped(void) const
622 \return \c true if alignment is mapped
624 bool BamAlignment::IsMapped(void) const {
625 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
628 /*! \fn bool BamAlignment::IsMateMapped(void) const
629 \return \c true if alignment's mate is mapped
631 bool BamAlignment::IsMateMapped(void) const {
632 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
635 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
636 \return \c true if alignment's mate mapped to reverse strand
638 bool BamAlignment::IsMateReverseStrand(void) const {
639 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
642 /*! \fn bool BamAlignment::IsPaired(void) const
643 \return \c true if alignment part of paired-end read
645 bool BamAlignment::IsPaired(void) const {
646 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
649 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
650 \return \c true if reported position is primary alignment
652 bool BamAlignment::IsPrimaryAlignment(void) const {
653 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
656 /*! \fn bool BamAlignment::IsProperPair(void) const
657 \return \c true if alignment is part of read that satisfied paired-end resolution
659 bool BamAlignment::IsProperPair(void) const {
660 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
663 /*! \fn bool BamAlignment::IsReverseStrand(void) const
664 \return \c true if alignment mapped to reverse strand
666 bool BamAlignment::IsReverseStrand(void) const {
667 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
670 /*! \fn bool BamAlignment::IsSecondMate(void) const
671 \return \c true if alignment is second mate on read
673 bool BamAlignment::IsSecondMate(void) const {
674 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
677 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
680 Checks that tag name & type strings are expected sizes.
681 \a tag should have length
682 \a type should have length 1
684 \param tag BAM tag name
685 \param type BAM tag type-code
687 \return \c true if both \a tag and \a type are correct sizes
689 bool BamAlignment::IsValidSize(const string& tag, const string& type) {
690 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
691 (type.size() == Constants::BAM_TAG_TYPESIZE);
694 /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
695 \brief Removes field from BAM tags.
697 \return \c true if tag was removed successfully (or didn't exist before)
699 bool BamAlignment::RemoveTag(const std::string& tag) {
701 // if char data not populated, do that first
702 if ( SupportData.HasCoreOnly )
705 // skip if no tags available
706 if ( TagData.empty() )
709 // localize the tag data
710 char* pOriginalTagData = (char*)TagData.data();
711 char* pTagData = pOriginalTagData;
712 const unsigned int originalTagDataLength = TagData.size();
713 unsigned int newTagDataLength = 0;
714 unsigned int numBytesParsed = 0;
716 // if tag not found, simply return true
717 if ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
720 // otherwise, remove it
721 char* newTagData = new char[originalTagDataLength];
723 // copy original tag data up til desired tag
726 const unsigned int beginningTagDataLength = numBytesParsed;
727 newTagDataLength += beginningTagDataLength;
728 memcpy(newTagData, pOriginalTagData, numBytesParsed);
730 // attemp to skip to next tag
731 const char* pTagStorageType = pTagData + 2;
734 if ( SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) {
736 // squeeze remaining tag data
737 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
738 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
739 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
741 // save modified tag data in alignment
742 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
745 // clean up & return success
750 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
751 \brief Sets value of "PCR duplicate" flag to \a ok.
753 void BamAlignment::SetIsDuplicate(bool ok) {
754 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
755 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
758 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
759 \brief Sets "failed quality control" flag to \a ok.
761 void BamAlignment::SetIsFailedQC(bool ok) {
762 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
763 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
766 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
767 \brief Sets "alignment is first mate" flag to \a ok.
769 void BamAlignment::SetIsFirstMate(bool ok) {
770 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
771 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
774 /*! \fn void BamAlignment::SetIsMapped(bool ok)
775 \brief Sets "alignment is mapped" flag to \a ok.
777 void BamAlignment::SetIsMapped(bool ok) {
778 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
779 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
782 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
783 \brief Sets "alignment's mate is mapped" flag to \a ok.
785 void BamAlignment::SetIsMateMapped(bool ok) {
786 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
787 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
790 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
791 \brief Complement of using SetIsMateMapped().
792 \deprecated For sake of symmetry with the query methods
793 \sa IsMateMapped(), SetIsMateMapped()
795 void BamAlignment::SetIsMateUnmapped(bool ok) {
796 SetIsMateMapped(!ok);
799 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
800 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
802 void BamAlignment::SetIsMateReverseStrand(bool ok) {
803 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
804 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
807 /*! \fn void BamAlignment::SetIsPaired(bool ok)
808 \brief Sets "alignment part of paired-end read" flag to \a ok.
810 void BamAlignment::SetIsPaired(bool ok) {
811 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
812 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
815 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
816 \brief Sets "position is primary alignment" flag to \a ok.
818 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
819 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
820 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
823 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
824 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
826 void BamAlignment::SetIsProperPair(bool ok) {
827 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
828 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
831 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
832 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
834 void BamAlignment::SetIsReverseStrand(bool ok) {
835 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
836 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
839 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
840 \brief Complement of using SetIsPrimaryAlignment().
841 \deprecated For sake of symmetry with the query methods
842 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
844 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
845 SetIsPrimaryAlignment(!ok);
848 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
849 \brief Sets "alignment is second mate on read" flag to \a ok.
851 void BamAlignment::SetIsSecondMate(bool ok) {
852 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
853 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
856 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
857 \brief Complement of using SetIsMapped().
858 \deprecated For sake of symmetry with the query methods
859 \sa IsMapped(), SetIsMapped()
861 void BamAlignment::SetIsUnmapped(bool ok) {
865 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
868 Moves to next available tag in tag data string
870 \param storageType BAM tag type-code that determines how far to move cursor
871 \param pTagData pointer to current position (cursor) in tag string
872 \param numBytesParsed report of how many bytes were parsed (cumulatively)
874 \return \c if storageType was a recognized BAM tag type
875 \post \a pTagData will point to the byte where the next tag data begins.
876 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
878 bool BamAlignment::SkipToNextTag(const char storageType,
880 unsigned int& numBytesParsed)
882 switch (storageType) {
884 case (Constants::BAM_TAG_TYPE_ASCII) :
885 case (Constants::BAM_TAG_TYPE_INT8) :
886 case (Constants::BAM_TAG_TYPE_UINT8) :
891 case (Constants::BAM_TAG_TYPE_INT16) :
892 case (Constants::BAM_TAG_TYPE_UINT16) :
893 numBytesParsed += sizeof(uint16_t);
894 pTagData += sizeof(uint16_t);
897 case (Constants::BAM_TAG_TYPE_FLOAT) :
898 case (Constants::BAM_TAG_TYPE_INT32) :
899 case (Constants::BAM_TAG_TYPE_UINT32) :
900 numBytesParsed += sizeof(uint32_t);
901 pTagData += sizeof(uint32_t);
904 case (Constants::BAM_TAG_TYPE_STRING) :
905 case (Constants::BAM_TAG_TYPE_HEX) :
910 // increment for null-terminator
915 case (Constants::BAM_TAG_TYPE_ARRAY) :
919 const char arrayType = *pTagData;
923 // read number of elements
925 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
926 numBytesParsed += sizeof(uint32_t);
927 pTagData += sizeof(uint32_t);
929 // calculate number of bytes to skip
932 case (Constants::BAM_TAG_TYPE_INT8) :
933 case (Constants::BAM_TAG_TYPE_UINT8) :
934 bytesToSkip = numElements;
936 case (Constants::BAM_TAG_TYPE_INT16) :
937 case (Constants::BAM_TAG_TYPE_UINT16) :
938 bytesToSkip = numElements*sizeof(uint16_t);
940 case (Constants::BAM_TAG_TYPE_FLOAT) :
941 case (Constants::BAM_TAG_TYPE_INT32) :
942 case (Constants::BAM_TAG_TYPE_UINT32) :
943 bytesToSkip = numElements*sizeof(uint32_t);
946 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
947 << arrayType << endl;
951 // skip binary array contents
952 numBytesParsed += bytesToSkip;
953 pTagData += bytesToSkip;
958 cerr << "BamAlignment ERROR: unknown tag type encountered"
959 << storageType << endl;