1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 using namespace BamTools;
24 BamAlignment::BamAlignment(void)
33 BamAlignment::BamAlignment(const BamAlignment& other)
35 , Length(other.Length)
36 , QueryBases(other.QueryBases)
37 , AlignedBases(other.AlignedBases)
38 , Qualities(other.Qualities)
39 , TagData(other.TagData)
41 , Position(other.Position)
43 , MapQuality(other.MapQuality)
44 , AlignmentFlag(other.AlignmentFlag)
45 , CigarData(other.CigarData)
46 , MateRefID(other.MateRefID)
47 , MatePosition(other.MatePosition)
48 , InsertSize(other.InsertSize)
49 , SupportData(other.SupportData)
53 BamAlignment::~BamAlignment(void) { }
55 // Queries against alignment flags
56 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
57 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
58 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
59 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
60 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
61 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
62 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
63 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
64 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
65 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
66 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
68 // Manipulate alignment flags
69 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
70 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
71 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
72 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
73 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
74 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
75 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
76 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
77 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
78 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
79 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
81 // calculates alignment end position, based on starting position and CIGAR operations
82 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
84 // initialize alignment end to starting position
85 int alignEnd = Position;
87 // iterate over cigar operations
88 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
89 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
90 for ( ; cigarIter != cigarEnd; ++cigarIter) {
91 const char cigarType = (*cigarIter).Type;
92 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
93 alignEnd += (*cigarIter).Length;
94 else if ( usePadded && cigarType == 'I' )
95 alignEnd += (*cigarIter).Length;
98 // adjust for zeroBased, if necessary
105 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
107 if ( SupportData.HasCoreOnly ) return false;
108 if ( tag.size() != 2 || type.size() != 1 ) return false;
109 if ( type != "Z" && type != "H" ) return false;
111 // localize the tag data
112 char* pTagData = (char*)TagData.data();
113 const unsigned int tagDataLength = TagData.size();
114 unsigned int numBytesParsed = 0;
116 // if tag already exists, return false
117 // use EditTag explicitly instead
118 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
120 // otherwise, copy tag data to temp buffer
121 string newTag = tag + type + value;
122 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
123 char originalTagData[newTagDataLength];
124 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
127 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
129 // store temp buffer back in TagData
130 const char* newTagData = (const char*)originalTagData;
131 TagData.assign(newTagData, newTagDataLength);
137 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
139 if ( SupportData.HasCoreOnly ) return false;
140 if ( tag.size() != 2 || type.size() != 1 ) return false;
141 if ( type == "f" || type == "Z" || type == "H" ) return false;
143 // localize the tag data
144 char* pTagData = (char*)TagData.data();
145 const unsigned int tagDataLength = TagData.size();
146 unsigned int numBytesParsed = 0;
148 // if tag already exists, return false
149 // use EditTag explicitly instead
150 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
152 // otherwise, convert value to string
153 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
156 // copy original tag data to temp buffer
157 string newTag = tag + type;
158 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
159 char originalTagData[newTagDataLength];
160 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
163 strcat(originalTagData + tagDataLength, newTag.data());
164 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
166 // store temp buffer back in TagData
167 const char* newTagData = (const char*)originalTagData;
168 TagData.assign(newTagData, newTagDataLength);
174 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
175 return AddTag(tag, type, (const uint32_t&)value);
178 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
180 if ( SupportData.HasCoreOnly ) return false;
181 if ( tag.size() != 2 || type.size() != 1 ) return false;
182 if ( type == "Z" || type == "H" ) return false;
184 // localize the tag data
185 char* pTagData = (char*)TagData.data();
186 const unsigned int tagDataLength = TagData.size();
187 unsigned int numBytesParsed = 0;
189 // if tag already exists, return false
190 // use EditTag explicitly instead
191 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
193 // otherwise, convert value to string
194 union { float value; char valueBuffer[sizeof(float)]; } un;
197 // copy original tag data to temp buffer
198 string newTag = tag + type;
199 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
200 char originalTagData[newTagDataLength];
201 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
204 strcat(originalTagData + tagDataLength, newTag.data());
205 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
207 // store temp buffer back in TagData
208 const char* newTagData = (const char*)originalTagData;
209 TagData.assign(newTagData, newTagDataLength);
215 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
217 if ( SupportData.HasCoreOnly ) return false;
218 if ( tag.size() != 2 || type.size() != 1 ) return false;
219 if ( type != "Z" && type != "H" ) return false;
221 // localize the tag data
222 char* pOriginalTagData = (char*)TagData.data();
223 char* pTagData = pOriginalTagData;
224 const unsigned int originalTagDataLength = TagData.size();
226 unsigned int newTagDataLength = 0;
227 unsigned int numBytesParsed = 0;
229 // if tag found, store data in readGroup, return success
230 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
232 // make sure array is more than big enough
233 char newTagData[originalTagDataLength + value.size()];
235 // copy original tag data up til desired tag
236 const unsigned int beginningTagDataLength = numBytesParsed;
237 newTagDataLength += beginningTagDataLength;
238 memcpy(newTagData, pOriginalTagData, numBytesParsed);
240 // copy new VALUE in place of current tag data
241 const unsigned int dataLength = strlen(value.c_str());
242 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
244 // skip to next tag (if tag for removal is last, return true)
245 const char* pTagStorageType = pTagData - 1;
246 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
248 // copy everything from current tag (the next one after tag for removal) to end
249 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
250 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
251 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
252 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
254 // ensure null-terminator
255 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
258 TagData.assign(newTagData, endTagOffset + endTagDataLength);
262 // tag not found, attempt AddTag
263 else return AddTag(tag, type, value);
266 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
268 if ( SupportData.HasCoreOnly ) return false;
269 if ( tag.size() != 2 || type.size() != 1 ) return false;
270 if ( type == "f" || type == "Z" || type == "H" ) return false;
272 // localize the tag data
273 char* pOriginalTagData = (char*)TagData.data();
274 char* pTagData = pOriginalTagData;
275 const unsigned int originalTagDataLength = TagData.size();
277 unsigned int newTagDataLength = 0;
278 unsigned int numBytesParsed = 0;
280 // if tag found, store data in readGroup, return success
281 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
283 // make sure array is more than big enough
284 char newTagData[originalTagDataLength + sizeof(value)];
286 // copy original tag data up til desired tag
287 const unsigned int beginningTagDataLength = numBytesParsed;
288 newTagDataLength += beginningTagDataLength;
289 memcpy(newTagData, pOriginalTagData, numBytesParsed);
291 // copy new VALUE in place of current tag data
292 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
294 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
296 // skip to next tag (if tag for removal is last, return true)
297 const char* pTagStorageType = pTagData - 1;
298 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
300 // copy everything from current tag (the next one after tag for removal) to end
301 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
302 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
303 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
304 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
306 // ensure null-terminator
307 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
310 TagData.assign(newTagData, endTagOffset + endTagDataLength);
314 // tag not found, attempt AddTag
315 else return AddTag(tag, type, value);
318 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
319 return EditTag(tag, type, (const uint32_t&)value);
322 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
324 if ( SupportData.HasCoreOnly ) return false;
325 if ( tag.size() != 2 || type.size() != 1 ) return false;
326 if ( type == "Z" || type == "H" ) return false;
328 // localize the tag data
329 char* pOriginalTagData = (char*)TagData.data();
330 char* pTagData = pOriginalTagData;
331 const unsigned int originalTagDataLength = TagData.size();
333 unsigned int newTagDataLength = 0;
334 unsigned int numBytesParsed = 0;
336 // if tag found, store data in readGroup, return success
337 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
339 // make sure array is more than big enough
340 char newTagData[originalTagDataLength + sizeof(value)];
342 // copy original tag data up til desired tag
343 const unsigned int beginningTagDataLength = numBytesParsed;
344 newTagDataLength += beginningTagDataLength;
345 memcpy(newTagData, pOriginalTagData, numBytesParsed);
347 // copy new VALUE in place of current tag data
348 union { float value; char valueBuffer[sizeof(float)]; } un;
350 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
352 // skip to next tag (if tag for removal is last, return true)
353 const char* pTagStorageType = pTagData - 1;
354 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
356 // copy everything from current tag (the next one after tag for removal) to end
357 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
358 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
359 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
360 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
362 // ensure null-terminator
363 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
366 TagData.assign(newTagData, endTagOffset + endTagDataLength);
370 // tag not found, attempt AddTag
371 else return AddTag(tag, type, value);
374 // get "NM" tag data - originally contributed by Aaron Quinlan
375 // stores data in 'editDistance', returns success/fail
376 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
377 return GetTag("NM", (uint32_t&)editDistance);
381 // stores data in 'readGroup', returns success/fail
382 bool BamAlignment::GetReadGroup(string& readGroup) const {
383 return GetTag("RG", readGroup);
386 bool BamAlignment::GetTag(const string& tag, string& destination) const {
388 // make sure tag data exists
389 if ( SupportData.HasCoreOnly || TagData.empty() )
392 // localize the tag data
393 char* pTagData = (char*)TagData.data();
394 const unsigned int tagDataLength = TagData.size();
395 unsigned int numBytesParsed = 0;
397 // if tag found, store data in readGroup, return success
398 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
399 const unsigned int dataLength = strlen(pTagData);
401 destination.resize(dataLength);
402 memcpy( (char*)destination.data(), pTagData, dataLength );
406 // tag not found, return failure
410 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
412 // make sure tag data exists
413 if ( SupportData.HasCoreOnly || TagData.empty() )
416 // localize the tag data
417 char* pTagData = (char*)TagData.data();
418 const unsigned int tagDataLength = TagData.size();
419 unsigned int numBytesParsed = 0;
421 // if tag found, determine data byte-length, store data in readGroup, return success
422 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
424 // determine data byte-length
425 const char type = *(pTagData - 1);
426 int destinationLength = 0;
432 destinationLength = 1;
438 destinationLength = 2;
444 destinationLength = 4;
447 // unsupported type for integer destination (float or var-length strings)
451 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
456 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
460 // store in destination
462 memcpy(&destination, pTagData, destinationLength);
466 // tag not found, return failure
470 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
471 return GetTag(tag, (uint32_t&)destination);
474 bool BamAlignment::GetTag(const string& tag, float& destination) const {
476 // make sure tag data exists
477 if ( SupportData.HasCoreOnly || TagData.empty() )
480 // localize the tag data
481 char* pTagData = (char*)TagData.data();
482 const unsigned int tagDataLength = TagData.size();
483 unsigned int numBytesParsed = 0;
485 // if tag found, determine data byte-length, store data in readGroup, return success
486 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
488 // determine data byte-length
489 const char type = *(pTagData - 1);
490 int destinationLength = 0;
497 destinationLength = 1;
503 destinationLength = 2;
510 destinationLength = 4;
513 // unsupported type (var-length strings)
516 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
521 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
525 // store in destination
527 memcpy(&destination, pTagData, destinationLength);
531 // tag not found, return failure
535 bool BamAlignment::GetTagType(const string& tag, char& type) const {
537 // make sure tag data exists
538 if ( SupportData.HasCoreOnly || TagData.empty() )
541 // localize the tag data
542 char* pTagData = (char*)TagData.data();
543 const unsigned int tagDataLength = TagData.size();
544 unsigned int numBytesParsed = 0;
547 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
549 // retrieve tag type code
550 type = *(pTagData - 1);
552 // validate that type is a proper BAM tag type
568 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
573 // tag not found, return failure
577 bool BamAlignment::RemoveTag(const string& tag) {
579 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
580 // also, return false if no data present to remove
581 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
583 // localize the tag data
584 char* pOriginalTagData = (char*)TagData.data();
585 char* pTagData = pOriginalTagData;
586 const unsigned int originalTagDataLength = TagData.size();
587 unsigned int newTagDataLength = 0;
588 unsigned int numBytesParsed = 0;
590 // if tag found, store data in readGroup, return success
591 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
593 char newTagData[originalTagDataLength];
595 // copy original tag data up til desired tag
598 const unsigned int beginningTagDataLength = numBytesParsed;
599 newTagDataLength += beginningTagDataLength;
600 memcpy(newTagData, pOriginalTagData, numBytesParsed);
602 // skip to next tag (if tag for removal is last, return true)
603 const char* pTagStorageType = pTagData + 2;
606 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
608 // copy everything from current tag (the next one after tag for removal) to end
609 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
610 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
611 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
614 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
618 // tag not found, no removal - return failure
622 bool BamAlignment::FindTag(const string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {
624 while ( numBytesParsed < tagDataLength ) {
626 const char* pTagType = pTagData;
627 const char* pTagStorageType = pTagData + 2;
631 // check the current tag, return true on match
632 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
635 // get the storage class and find the next tag
636 if ( *pTagStorageType == '\0' ) return false;
637 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
638 if ( *pTagData == '\0' ) return false;
641 // checked all tags, none match
645 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
647 switch(storageType) {
675 // increment for null-terminator
682 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);