1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 December 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 using namespace BamTools;
23 const char* DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
26 BamAlignment::BamAlignment(void)
35 BamAlignment::BamAlignment(const BamAlignment& other)
37 , Length(other.Length)
38 , QueryBases(other.QueryBases)
39 , AlignedBases(other.AlignedBases)
40 , Qualities(other.Qualities)
41 , TagData(other.TagData)
43 , Position(other.Position)
45 , MapQuality(other.MapQuality)
46 , AlignmentFlag(other.AlignmentFlag)
47 , CigarData(other.CigarData)
48 , MateRefID(other.MateRefID)
49 , MatePosition(other.MatePosition)
50 , InsertSize(other.InsertSize)
51 , SupportData(other.SupportData)
55 BamAlignment::~BamAlignment(void) { }
57 // Queries against alignment flags
58 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
59 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
60 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
61 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
62 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
63 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
64 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
65 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
66 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
67 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
68 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
70 // Manipulate alignment flags
71 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
72 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
73 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
74 void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); }
75 void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); }
76 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
77 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
78 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
79 void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); }
80 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
81 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
82 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
83 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
84 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
86 // fills out character data
87 bool BamAlignment::BuildCharData(void) {
89 // skip if char data already parsed
90 if ( !SupportData.HasCoreOnly ) return true;
92 // check system endianness
93 bool IsBigEndian = BamTools::SystemIsBigEndian();
95 // calculate character lengths/offsets
96 const unsigned int dataLength = SupportData.BlockLength - BAM_CORE_SIZE;
97 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
98 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
99 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
100 const unsigned int tagDataLength = dataLength - tagDataOffset;
102 // check offsets to see what char data exists
103 const bool hasSeqData = ( seqDataOffset < dataLength );
104 const bool hasQualData = ( qualDataOffset < dataLength );
105 const bool hasTagData = ( tagDataOffset < dataLength );
107 // set up char buffers
108 const char* allCharData = SupportData.AllCharData.data();
109 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
110 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
111 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
113 // store alignment name (relies on null char in name as terminator)
114 Name.assign((const char*)(allCharData));
116 // save query sequence
119 QueryBases.reserve(SupportData.QuerySequenceLength);
120 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
121 char singleBase = DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
122 QueryBases.append(1, singleBase);
126 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
129 Qualities.reserve(SupportData.QuerySequenceLength);
130 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
131 char singleQuality = (char)(qualData[i]+33);
132 Qualities.append(1, singleQuality);
136 // clear previous AlignedBases
137 AlignedBases.clear();
139 // if QueryBases has data, build AlignedBases using CIGAR data
140 // otherwise, AlignedBases will remain empty (this case IS allowed)
141 if ( !QueryBases.empty() ) {
143 // resize AlignedBases
144 AlignedBases.reserve(SupportData.QuerySequenceLength);
146 // iterate over CigarOps
148 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
149 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
150 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
152 const CigarOp& op = (*cigarIter);
155 // for 'M', 'I' - write bases
158 AlignedBases.append(QueryBases.substr(k, op.Length));
161 // for 'S' - soft clip, do not write bases
162 // but increment placeholder 'k'
167 // for 'D' - write gap character
169 AlignedBases.append(op.Length, '-');
172 // for 'P' - write padding character
174 AlignedBases.append( op.Length, '*' );
177 // for 'N' - write N's, skip bases in original query sequence
179 AlignedBases.append( op.Length, 'N' );
182 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
186 // shouldn't get here
188 fprintf(stderr, "ERROR: Invalid Cigar op type\n");
199 while ( (unsigned int)i < tagDataLength ) {
201 i += 2; // skip tagType chars (e.g. "RG", "NM", etc.)
202 uint8_t type = toupper(tagData[i]); // lower & upper case letters have same meaning
203 ++i; // skip valueType char (e.g. 'A', 'I', 'Z', etc.)
213 SwapEndian_16p(&tagData[i]);
214 i += sizeof(uint16_t);
219 SwapEndian_32p(&tagData[i]);
220 i += sizeof(uint32_t);
224 SwapEndian_64p(&tagData[i]);
225 i += sizeof(uint64_t);
230 while (tagData[i]) { ++i; }
231 ++i; // increment one more for null terminator
234 // shouldn't get here
236 fprintf(stderr, "ERROR: Invalid tag value type\n");
242 // store tagData in alignment
243 TagData.resize(tagDataLength);
244 memcpy((char*)TagData.data(), tagData, tagDataLength);
247 // clear the core-only flag
248 SupportData.HasCoreOnly = false;
254 // calculates alignment end position, based on starting position and CIGAR operations
255 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
257 // initialize alignment end to starting position
258 int alignEnd = Position;
260 // iterate over cigar operations
261 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
262 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
263 for ( ; cigarIter != cigarEnd; ++cigarIter) {
264 const char cigarType = (*cigarIter).Type;
265 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
266 alignEnd += (*cigarIter).Length;
267 else if ( usePadded && cigarType == 'I' )
268 alignEnd += (*cigarIter).Length;
271 // adjust for zeroBased, if necessary
278 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
280 if ( SupportData.HasCoreOnly ) return false;
281 if ( tag.size() != 2 || type.size() != 1 ) return false;
282 if ( type != "Z" && type != "H" ) return false;
284 // localize the tag data
285 char* pTagData = (char*)TagData.data();
286 const unsigned int tagDataLength = TagData.size();
287 unsigned int numBytesParsed = 0;
289 // if tag already exists, return false
290 // use EditTag explicitly instead
291 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
293 // otherwise, copy tag data to temp buffer
294 string newTag = tag + type + value;
295 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
296 char originalTagData[newTagDataLength];
297 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
300 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
302 // store temp buffer back in TagData
303 const char* newTagData = (const char*)originalTagData;
304 TagData.assign(newTagData, newTagDataLength);
310 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
312 if ( SupportData.HasCoreOnly ) return false;
313 if ( tag.size() != 2 || type.size() != 1 ) return false;
314 if ( type == "f" || type == "Z" || type == "H" ) return false;
316 // localize the tag data
317 char* pTagData = (char*)TagData.data();
318 const unsigned int tagDataLength = TagData.size();
319 unsigned int numBytesParsed = 0;
321 // if tag already exists, return false
322 // use EditTag explicitly instead
323 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
325 // otherwise, convert value to string
326 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
329 // copy original tag data to temp buffer
330 string newTag = tag + type;
331 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
332 char originalTagData[newTagDataLength];
333 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
336 strcat(originalTagData + tagDataLength, newTag.data());
337 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
339 // store temp buffer back in TagData
340 const char* newTagData = (const char*)originalTagData;
341 TagData.assign(newTagData, newTagDataLength);
347 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
348 return AddTag(tag, type, (const uint32_t&)value);
351 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
353 if ( SupportData.HasCoreOnly ) return false;
354 if ( tag.size() != 2 || type.size() != 1 ) return false;
355 if ( type == "Z" || type == "H" ) return false;
357 // localize the tag data
358 char* pTagData = (char*)TagData.data();
359 const unsigned int tagDataLength = TagData.size();
360 unsigned int numBytesParsed = 0;
362 // if tag already exists, return false
363 // use EditTag explicitly instead
364 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
366 // otherwise, convert value to string
367 union { float value; char valueBuffer[sizeof(float)]; } un;
370 // copy original tag data to temp buffer
371 string newTag = tag + type;
372 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
373 char originalTagData[newTagDataLength];
374 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
377 strcat(originalTagData + tagDataLength, newTag.data());
378 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
380 // store temp buffer back in TagData
381 const char* newTagData = (const char*)originalTagData;
382 TagData.assign(newTagData, newTagDataLength);
388 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
390 if ( SupportData.HasCoreOnly ) return false;
391 if ( tag.size() != 2 || type.size() != 1 ) return false;
392 if ( type != "Z" && type != "H" ) return false;
394 // localize the tag data
395 char* pOriginalTagData = (char*)TagData.data();
396 char* pTagData = pOriginalTagData;
397 const unsigned int originalTagDataLength = TagData.size();
399 unsigned int newTagDataLength = 0;
400 unsigned int numBytesParsed = 0;
402 // if tag found, store data in readGroup, return success
403 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
405 // make sure array is more than big enough
406 char newTagData[originalTagDataLength + value.size()];
408 // copy original tag data up til desired tag
409 const unsigned int beginningTagDataLength = numBytesParsed;
410 newTagDataLength += beginningTagDataLength;
411 memcpy(newTagData, pOriginalTagData, numBytesParsed);
413 // copy new VALUE in place of current tag data
414 const unsigned int dataLength = strlen(value.c_str());
415 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
417 // skip to next tag (if tag for removal is last, return true)
418 const char* pTagStorageType = pTagData - 1;
419 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
421 // copy everything from current tag (the next one after tag for removal) to end
422 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
423 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
424 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
425 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
427 // ensure null-terminator
428 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
431 TagData.assign(newTagData, endTagOffset + endTagDataLength);
435 // tag not found, attempt AddTag
436 else return AddTag(tag, type, value);
439 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
441 if ( SupportData.HasCoreOnly ) return false;
442 if ( tag.size() != 2 || type.size() != 1 ) return false;
443 if ( type == "f" || type == "Z" || type == "H" ) return false;
445 // localize the tag data
446 char* pOriginalTagData = (char*)TagData.data();
447 char* pTagData = pOriginalTagData;
448 const unsigned int originalTagDataLength = TagData.size();
450 unsigned int newTagDataLength = 0;
451 unsigned int numBytesParsed = 0;
453 // if tag found, store data in readGroup, return success
454 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
456 // make sure array is more than big enough
457 char newTagData[originalTagDataLength + sizeof(value)];
459 // copy original tag data up til desired tag
460 const unsigned int beginningTagDataLength = numBytesParsed;
461 newTagDataLength += beginningTagDataLength;
462 memcpy(newTagData, pOriginalTagData, numBytesParsed);
464 // copy new VALUE in place of current tag data
465 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
467 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
469 // skip to next tag (if tag for removal is last, return true)
470 const char* pTagStorageType = pTagData - 1;
471 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
473 // copy everything from current tag (the next one after tag for removal) to end
474 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
475 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
476 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
477 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
479 // ensure null-terminator
480 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
483 TagData.assign(newTagData, endTagOffset + endTagDataLength);
487 // tag not found, attempt AddTag
488 else return AddTag(tag, type, value);
491 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
492 return EditTag(tag, type, (const uint32_t&)value);
495 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
497 if ( SupportData.HasCoreOnly ) return false;
498 if ( tag.size() != 2 || type.size() != 1 ) return false;
499 if ( type == "Z" || type == "H" ) return false;
501 // localize the tag data
502 char* pOriginalTagData = (char*)TagData.data();
503 char* pTagData = pOriginalTagData;
504 const unsigned int originalTagDataLength = TagData.size();
506 unsigned int newTagDataLength = 0;
507 unsigned int numBytesParsed = 0;
509 // if tag found, store data in readGroup, return success
510 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
512 // make sure array is more than big enough
513 char newTagData[originalTagDataLength + sizeof(value)];
515 // copy original tag data up til desired tag
516 const unsigned int beginningTagDataLength = numBytesParsed;
517 newTagDataLength += beginningTagDataLength;
518 memcpy(newTagData, pOriginalTagData, numBytesParsed);
520 // copy new VALUE in place of current tag data
521 union { float value; char valueBuffer[sizeof(float)]; } un;
523 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
525 // skip to next tag (if tag for removal is last, return true)
526 const char* pTagStorageType = pTagData - 1;
527 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
529 // copy everything from current tag (the next one after tag for removal) to end
530 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
531 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
532 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
533 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
535 // ensure null-terminator
536 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
539 TagData.assign(newTagData, endTagOffset + endTagDataLength);
543 // tag not found, attempt AddTag
544 else return AddTag(tag, type, value);
547 // get "NM" tag data - originally contributed by Aaron Quinlan
548 // stores data in 'editDistance', returns success/fail
549 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
550 return GetTag("NM", (uint32_t&)editDistance);
554 // stores data in 'readGroup', returns success/fail
555 bool BamAlignment::GetReadGroup(string& readGroup) const {
556 return GetTag("RG", readGroup);
559 bool BamAlignment::GetTag(const string& tag, string& destination) const {
561 // make sure tag data exists
562 if ( SupportData.HasCoreOnly || TagData.empty() )
565 // localize the tag data
566 char* pTagData = (char*)TagData.data();
567 const unsigned int tagDataLength = TagData.size();
568 unsigned int numBytesParsed = 0;
570 // if tag found, store data in readGroup, return success
571 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
572 const unsigned int dataLength = strlen(pTagData);
574 destination.resize(dataLength);
575 memcpy( (char*)destination.data(), pTagData, dataLength );
579 // tag not found, return failure
583 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
585 // make sure tag data exists
586 if ( SupportData.HasCoreOnly || TagData.empty() )
589 // localize the tag data
590 char* pTagData = (char*)TagData.data();
591 const unsigned int tagDataLength = TagData.size();
592 unsigned int numBytesParsed = 0;
594 // if tag found, determine data byte-length, store data in readGroup, return success
595 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
597 // determine data byte-length
598 const char type = *(pTagData - 1);
599 int destinationLength = 0;
606 destinationLength = 1;
612 destinationLength = 2;
618 destinationLength = 4;
621 // unsupported type for integer destination (float or var-length strings)
625 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
630 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
634 // store in destination
636 memcpy(&destination, pTagData, destinationLength);
640 // tag not found, return failure
644 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
645 return GetTag(tag, (uint32_t&)destination);
648 bool BamAlignment::GetTag(const string& tag, float& destination) const {
650 // make sure tag data exists
651 if ( SupportData.HasCoreOnly || TagData.empty() )
654 // localize the tag data
655 char* pTagData = (char*)TagData.data();
656 const unsigned int tagDataLength = TagData.size();
657 unsigned int numBytesParsed = 0;
659 // if tag found, determine data byte-length, store data in readGroup, return success
660 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
662 // determine data byte-length
663 const char type = *(pTagData - 1);
664 int destinationLength = 0;
671 destinationLength = 1;
677 destinationLength = 2;
684 destinationLength = 4;
687 // unsupported type (var-length strings)
690 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
695 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
699 // store in destination
701 memcpy(&destination, pTagData, destinationLength);
705 // tag not found, return failure
709 bool BamAlignment::GetTagType(const string& tag, char& type) const {
711 // make sure tag data exists
712 if ( SupportData.HasCoreOnly || TagData.empty() )
715 // localize the tag data
716 char* pTagData = (char*)TagData.data();
717 const unsigned int tagDataLength = TagData.size();
718 unsigned int numBytesParsed = 0;
721 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
723 // retrieve tag type code
724 type = *(pTagData - 1);
726 // validate that type is a proper BAM tag type
742 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
747 // tag not found, return failure
751 bool BamAlignment::RemoveTag(const string& tag) {
753 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
754 // also, return false if no data present to remove
755 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
757 // localize the tag data
758 char* pOriginalTagData = (char*)TagData.data();
759 char* pTagData = pOriginalTagData;
760 const unsigned int originalTagDataLength = TagData.size();
761 unsigned int newTagDataLength = 0;
762 unsigned int numBytesParsed = 0;
764 // if tag found, store data in readGroup, return success
765 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
767 char newTagData[originalTagDataLength];
769 // copy original tag data up til desired tag
772 const unsigned int beginningTagDataLength = numBytesParsed;
773 newTagDataLength += beginningTagDataLength;
774 memcpy(newTagData, pOriginalTagData, numBytesParsed);
776 // skip to next tag (if tag for removal is last, return true)
777 const char* pTagStorageType = pTagData + 2;
780 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
782 // copy everything from current tag (the next one after tag for removal) to end
783 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
784 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
785 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
788 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
792 // tag not found, no removal - return failure
796 bool BamAlignment::FindTag(const string& tag,
798 const unsigned int& tagDataLength,
799 unsigned int& numBytesParsed)
802 while ( numBytesParsed < tagDataLength ) {
804 const char* pTagType = pTagData;
805 const char* pTagStorageType = pTagData + 2;
809 // check the current tag, return true on match
810 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
813 // get the storage class and find the next tag
814 if ( *pTagStorageType == '\0' ) return false;
815 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
816 if ( *pTagData == '\0' ) return false;
819 // checked all tags, none match
823 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
825 switch(storageType) {
853 // increment for null-terminator
860 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);