1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 13 December 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 using namespace BamTools;
24 BamAlignment::BamAlignment(void)
33 BamAlignment::BamAlignment(const BamAlignment& other)
35 , Length(other.Length)
36 , QueryBases(other.QueryBases)
37 , AlignedBases(other.AlignedBases)
38 , Qualities(other.Qualities)
39 , TagData(other.TagData)
41 , Position(other.Position)
43 , MapQuality(other.MapQuality)
44 , AlignmentFlag(other.AlignmentFlag)
45 , CigarData(other.CigarData)
46 , MateRefID(other.MateRefID)
47 , MatePosition(other.MatePosition)
48 , InsertSize(other.InsertSize)
49 , SupportData(other.SupportData)
53 BamAlignment::~BamAlignment(void) { }
55 // Queries against alignment flags
56 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
57 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
58 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
59 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
60 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
61 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
62 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
63 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
64 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
65 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
66 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
68 // Manipulate alignment flags
69 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
70 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
71 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
72 void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); }
73 void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); }
74 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
75 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
76 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
77 void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); }
78 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
79 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
80 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
81 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
82 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
84 // calculates alignment end position, based on starting position and CIGAR operations
85 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
87 // initialize alignment end to starting position
88 int alignEnd = Position;
90 // iterate over cigar operations
91 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
92 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
93 for ( ; cigarIter != cigarEnd; ++cigarIter) {
94 const char cigarType = (*cigarIter).Type;
95 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
96 alignEnd += (*cigarIter).Length;
97 else if ( usePadded && cigarType == 'I' )
98 alignEnd += (*cigarIter).Length;
101 // adjust for zeroBased, if necessary
108 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
110 if ( SupportData.HasCoreOnly ) return false;
111 if ( tag.size() != 2 || type.size() != 1 ) return false;
112 if ( type != "Z" && type != "H" ) return false;
114 // localize the tag data
115 char* pTagData = (char*)TagData.data();
116 const unsigned int tagDataLength = TagData.size();
117 unsigned int numBytesParsed = 0;
119 // if tag already exists, return false
120 // use EditTag explicitly instead
121 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
123 // otherwise, copy tag data to temp buffer
124 string newTag = tag + type + value;
125 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
126 char originalTagData[newTagDataLength];
127 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
130 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
132 // store temp buffer back in TagData
133 const char* newTagData = (const char*)originalTagData;
134 TagData.assign(newTagData, newTagDataLength);
140 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
142 if ( SupportData.HasCoreOnly ) return false;
143 if ( tag.size() != 2 || type.size() != 1 ) return false;
144 if ( type == "f" || type == "Z" || type == "H" ) return false;
146 // localize the tag data
147 char* pTagData = (char*)TagData.data();
148 const unsigned int tagDataLength = TagData.size();
149 unsigned int numBytesParsed = 0;
151 // if tag already exists, return false
152 // use EditTag explicitly instead
153 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
155 // otherwise, convert value to string
156 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
159 // copy original tag data to temp buffer
160 string newTag = tag + type;
161 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
162 char originalTagData[newTagDataLength];
163 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
166 strcat(originalTagData + tagDataLength, newTag.data());
167 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
169 // store temp buffer back in TagData
170 const char* newTagData = (const char*)originalTagData;
171 TagData.assign(newTagData, newTagDataLength);
177 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
178 return AddTag(tag, type, (const uint32_t&)value);
181 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
183 if ( SupportData.HasCoreOnly ) return false;
184 if ( tag.size() != 2 || type.size() != 1 ) return false;
185 if ( type == "Z" || type == "H" ) return false;
187 // localize the tag data
188 char* pTagData = (char*)TagData.data();
189 const unsigned int tagDataLength = TagData.size();
190 unsigned int numBytesParsed = 0;
192 // if tag already exists, return false
193 // use EditTag explicitly instead
194 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
196 // otherwise, convert value to string
197 union { float value; char valueBuffer[sizeof(float)]; } un;
200 // copy original tag data to temp buffer
201 string newTag = tag + type;
202 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
203 char originalTagData[newTagDataLength];
204 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
207 strcat(originalTagData + tagDataLength, newTag.data());
208 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
210 // store temp buffer back in TagData
211 const char* newTagData = (const char*)originalTagData;
212 TagData.assign(newTagData, newTagDataLength);
218 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
220 if ( SupportData.HasCoreOnly ) return false;
221 if ( tag.size() != 2 || type.size() != 1 ) return false;
222 if ( type != "Z" && type != "H" ) return false;
224 // localize the tag data
225 char* pOriginalTagData = (char*)TagData.data();
226 char* pTagData = pOriginalTagData;
227 const unsigned int originalTagDataLength = TagData.size();
229 unsigned int newTagDataLength = 0;
230 unsigned int numBytesParsed = 0;
232 // if tag found, store data in readGroup, return success
233 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
235 // make sure array is more than big enough
236 char newTagData[originalTagDataLength + value.size()];
238 // copy original tag data up til desired tag
239 const unsigned int beginningTagDataLength = numBytesParsed;
240 newTagDataLength += beginningTagDataLength;
241 memcpy(newTagData, pOriginalTagData, numBytesParsed);
243 // copy new VALUE in place of current tag data
244 const unsigned int dataLength = strlen(value.c_str());
245 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
247 // skip to next tag (if tag for removal is last, return true)
248 const char* pTagStorageType = pTagData - 1;
249 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
251 // copy everything from current tag (the next one after tag for removal) to end
252 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
253 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
254 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
255 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
257 // ensure null-terminator
258 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
261 TagData.assign(newTagData, endTagOffset + endTagDataLength);
265 // tag not found, attempt AddTag
266 else return AddTag(tag, type, value);
269 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
271 if ( SupportData.HasCoreOnly ) return false;
272 if ( tag.size() != 2 || type.size() != 1 ) return false;
273 if ( type == "f" || type == "Z" || type == "H" ) return false;
275 // localize the tag data
276 char* pOriginalTagData = (char*)TagData.data();
277 char* pTagData = pOriginalTagData;
278 const unsigned int originalTagDataLength = TagData.size();
280 unsigned int newTagDataLength = 0;
281 unsigned int numBytesParsed = 0;
283 // if tag found, store data in readGroup, return success
284 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
286 // make sure array is more than big enough
287 char newTagData[originalTagDataLength + sizeof(value)];
289 // copy original tag data up til desired tag
290 const unsigned int beginningTagDataLength = numBytesParsed;
291 newTagDataLength += beginningTagDataLength;
292 memcpy(newTagData, pOriginalTagData, numBytesParsed);
294 // copy new VALUE in place of current tag data
295 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
297 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
299 // skip to next tag (if tag for removal is last, return true)
300 const char* pTagStorageType = pTagData - 1;
301 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
303 // copy everything from current tag (the next one after tag for removal) to end
304 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
305 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
306 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
307 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
309 // ensure null-terminator
310 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
313 TagData.assign(newTagData, endTagOffset + endTagDataLength);
317 // tag not found, attempt AddTag
318 else return AddTag(tag, type, value);
321 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
322 return EditTag(tag, type, (const uint32_t&)value);
325 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
327 if ( SupportData.HasCoreOnly ) return false;
328 if ( tag.size() != 2 || type.size() != 1 ) return false;
329 if ( type == "Z" || type == "H" ) return false;
331 // localize the tag data
332 char* pOriginalTagData = (char*)TagData.data();
333 char* pTagData = pOriginalTagData;
334 const unsigned int originalTagDataLength = TagData.size();
336 unsigned int newTagDataLength = 0;
337 unsigned int numBytesParsed = 0;
339 // if tag found, store data in readGroup, return success
340 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
342 // make sure array is more than big enough
343 char newTagData[originalTagDataLength + sizeof(value)];
345 // copy original tag data up til desired tag
346 const unsigned int beginningTagDataLength = numBytesParsed;
347 newTagDataLength += beginningTagDataLength;
348 memcpy(newTagData, pOriginalTagData, numBytesParsed);
350 // copy new VALUE in place of current tag data
351 union { float value; char valueBuffer[sizeof(float)]; } un;
353 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
355 // skip to next tag (if tag for removal is last, return true)
356 const char* pTagStorageType = pTagData - 1;
357 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
359 // copy everything from current tag (the next one after tag for removal) to end
360 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
361 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
362 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
363 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
365 // ensure null-terminator
366 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
369 TagData.assign(newTagData, endTagOffset + endTagDataLength);
373 // tag not found, attempt AddTag
374 else return AddTag(tag, type, value);
377 // get "NM" tag data - originally contributed by Aaron Quinlan
378 // stores data in 'editDistance', returns success/fail
379 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
380 return GetTag("NM", (uint32_t&)editDistance);
384 // stores data in 'readGroup', returns success/fail
385 bool BamAlignment::GetReadGroup(string& readGroup) const {
386 return GetTag("RG", readGroup);
389 bool BamAlignment::GetTag(const string& tag, string& destination) const {
391 // make sure tag data exists
392 if ( SupportData.HasCoreOnly || TagData.empty() )
395 // localize the tag data
396 char* pTagData = (char*)TagData.data();
397 const unsigned int tagDataLength = TagData.size();
398 unsigned int numBytesParsed = 0;
400 // if tag found, store data in readGroup, return success
401 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
402 const unsigned int dataLength = strlen(pTagData);
404 destination.resize(dataLength);
405 memcpy( (char*)destination.data(), pTagData, dataLength );
409 // tag not found, return failure
413 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
415 // make sure tag data exists
416 if ( SupportData.HasCoreOnly || TagData.empty() )
419 // localize the tag data
420 char* pTagData = (char*)TagData.data();
421 const unsigned int tagDataLength = TagData.size();
422 unsigned int numBytesParsed = 0;
424 // if tag found, determine data byte-length, store data in readGroup, return success
425 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
427 // determine data byte-length
428 const char type = *(pTagData - 1);
429 int destinationLength = 0;
436 destinationLength = 1;
442 destinationLength = 2;
448 destinationLength = 4;
451 // unsupported type for integer destination (float or var-length strings)
455 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
460 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
464 // store in destination
466 memcpy(&destination, pTagData, destinationLength);
470 // tag not found, return failure
474 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
475 return GetTag(tag, (uint32_t&)destination);
478 bool BamAlignment::GetTag(const string& tag, float& destination) const {
480 // make sure tag data exists
481 if ( SupportData.HasCoreOnly || TagData.empty() )
484 // localize the tag data
485 char* pTagData = (char*)TagData.data();
486 const unsigned int tagDataLength = TagData.size();
487 unsigned int numBytesParsed = 0;
489 // if tag found, determine data byte-length, store data in readGroup, return success
490 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
492 // determine data byte-length
493 const char type = *(pTagData - 1);
494 int destinationLength = 0;
501 destinationLength = 1;
507 destinationLength = 2;
514 destinationLength = 4;
517 // unsupported type (var-length strings)
520 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
525 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
529 // store in destination
531 memcpy(&destination, pTagData, destinationLength);
535 // tag not found, return failure
539 bool BamAlignment::GetTagType(const string& tag, char& type) const {
541 // make sure tag data exists
542 if ( SupportData.HasCoreOnly || TagData.empty() )
545 // localize the tag data
546 char* pTagData = (char*)TagData.data();
547 const unsigned int tagDataLength = TagData.size();
548 unsigned int numBytesParsed = 0;
551 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
553 // retrieve tag type code
554 type = *(pTagData - 1);
556 // validate that type is a proper BAM tag type
572 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
577 // tag not found, return failure
581 bool BamAlignment::RemoveTag(const string& tag) {
583 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
584 // also, return false if no data present to remove
585 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
587 // localize the tag data
588 char* pOriginalTagData = (char*)TagData.data();
589 char* pTagData = pOriginalTagData;
590 const unsigned int originalTagDataLength = TagData.size();
591 unsigned int newTagDataLength = 0;
592 unsigned int numBytesParsed = 0;
594 // if tag found, store data in readGroup, return success
595 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
597 char newTagData[originalTagDataLength];
599 // copy original tag data up til desired tag
602 const unsigned int beginningTagDataLength = numBytesParsed;
603 newTagDataLength += beginningTagDataLength;
604 memcpy(newTagData, pOriginalTagData, numBytesParsed);
606 // skip to next tag (if tag for removal is last, return true)
607 const char* pTagStorageType = pTagData + 2;
610 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
612 // copy everything from current tag (the next one after tag for removal) to end
613 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
614 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
615 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
618 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
622 // tag not found, no removal - return failure
626 bool BamAlignment::FindTag(const string& tag,
628 const unsigned int& tagDataLength,
629 unsigned int& numBytesParsed)
632 while ( numBytesParsed < tagDataLength ) {
634 const char* pTagType = pTagData;
635 const char* pTagStorageType = pTagData + 2;
639 // check the current tag, return true on match
640 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
643 // get the storage class and find the next tag
644 if ( *pTagStorageType == '\0' ) return false;
645 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
646 if ( *pTagData == '\0' ) return false;
649 // checked all tags, none match
653 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
655 switch(storageType) {
683 // increment for null-terminator
690 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);