1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 18 September 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
18 #include "BamAlignment.h"
19 using namespace BamTools;
22 BamAlignment::BamAlignment(void)
31 BamAlignment::BamAlignment(const BamAlignment& other)
33 , Length(other.Length)
34 , QueryBases(other.QueryBases)
35 , AlignedBases(other.AlignedBases)
36 , Qualities(other.Qualities)
37 , TagData(other.TagData)
39 , Position(other.Position)
41 , MapQuality(other.MapQuality)
42 , AlignmentFlag(other.AlignmentFlag)
43 , CigarData(other.CigarData)
44 , MateRefID(other.MateRefID)
45 , MatePosition(other.MatePosition)
46 , InsertSize(other.InsertSize)
47 , SupportData(other.SupportData)
51 BamAlignment::~BamAlignment(void) { }
53 // Queries against alignment flags
54 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
55 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
56 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
57 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
58 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
59 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
60 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
61 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
62 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
63 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
64 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
66 // Manipulate alignment flags
67 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
68 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
69 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
70 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
71 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
72 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
73 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
74 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
75 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
76 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
77 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
79 // calculates alignment end position, based on starting position and CIGAR operations
80 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
82 // initialize alignment end to starting position
83 int alignEnd = Position;
85 // iterate over cigar operations
86 std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
87 std::vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
88 for ( ; cigarIter != cigarEnd; ++cigarIter) {
89 const char cigarType = (*cigarIter).Type;
90 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
91 alignEnd += (*cigarIter).Length;
92 else if ( usePadded && cigarType == 'I' )
93 alignEnd += (*cigarIter).Length;
96 // adjust for zeroBased, if necessary
103 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
105 if ( SupportData.HasCoreOnly ) return false;
106 if ( tag.size() != 2 || type.size() != 1 ) return false;
107 if ( type != "Z" && type != "H" ) return false;
109 // localize the tag data
110 char* pTagData = (char*)TagData.data();
111 const unsigned int tagDataLength = TagData.size();
112 unsigned int numBytesParsed = 0;
114 // if tag already exists, return false
115 // use EditTag explicitly instead
116 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
118 // otherwise, copy tag data to temp buffer
119 std::string newTag = tag + type + value;
120 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
121 char originalTagData[newTagDataLength];
122 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
125 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
127 // store temp buffer back in TagData
128 const char* newTagData = (const char*)originalTagData;
129 TagData.assign(newTagData, newTagDataLength);
135 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
137 if ( SupportData.HasCoreOnly ) return false;
138 if ( tag.size() != 2 || type.size() != 1 ) return false;
139 if ( type == "f" || type == "Z" || type == "H" ) return false;
141 // localize the tag data
142 char* pTagData = (char*)TagData.data();
143 const unsigned int tagDataLength = TagData.size();
144 unsigned int numBytesParsed = 0;
146 // if tag already exists, return false
147 // use EditTag explicitly instead
148 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
150 // otherwise, convert value to string
151 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
154 // copy original tag data to temp buffer
155 std::string newTag = tag + type;
156 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
157 char originalTagData[newTagDataLength];
158 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
161 strcat(originalTagData + tagDataLength, newTag.data());
162 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
164 // store temp buffer back in TagData
165 const char* newTagData = (const char*)originalTagData;
166 TagData.assign(newTagData, newTagDataLength);
172 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
173 return AddTag(tag, type, (const uint32_t&)value);
176 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
178 if ( SupportData.HasCoreOnly ) return false;
179 if ( tag.size() != 2 || type.size() != 1 ) return false;
180 if ( type == "Z" || type == "H" ) return false;
182 // localize the tag data
183 char* pTagData = (char*)TagData.data();
184 const unsigned int tagDataLength = TagData.size();
185 unsigned int numBytesParsed = 0;
187 // if tag already exists, return false
188 // use EditTag explicitly instead
189 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
191 // otherwise, convert value to string
192 union { float value; char valueBuffer[sizeof(float)]; } un;
195 // copy original tag data to temp buffer
196 std::string newTag = tag + type;
197 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
198 char originalTagData[newTagDataLength];
199 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
202 strcat(originalTagData + tagDataLength, newTag.data());
203 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
205 // store temp buffer back in TagData
206 const char* newTagData = (const char*)originalTagData;
207 TagData.assign(newTagData, newTagDataLength);
213 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
215 if ( SupportData.HasCoreOnly ) return false;
216 if ( tag.size() != 2 || type.size() != 1 ) return false;
217 if ( type != "Z" && type != "H" ) return false;
219 // localize the tag data
220 char* pOriginalTagData = (char*)TagData.data();
221 char* pTagData = pOriginalTagData;
222 const unsigned int originalTagDataLength = TagData.size();
224 unsigned int newTagDataLength = 0;
225 unsigned int numBytesParsed = 0;
227 // if tag found, store data in readGroup, return success
228 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
230 // make sure array is more than big enough
231 char newTagData[originalTagDataLength + value.size()];
233 // copy original tag data up til desired tag
234 const unsigned int beginningTagDataLength = numBytesParsed;
235 newTagDataLength += beginningTagDataLength;
236 memcpy(newTagData, pOriginalTagData, numBytesParsed);
238 // copy new VALUE in place of current tag data
239 const unsigned int dataLength = strlen(value.c_str());
240 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
242 // skip to next tag (if tag for removal is last, return true)
243 const char* pTagStorageType = pTagData - 1;
244 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
246 // copy everything from current tag (the next one after tag for removal) to end
247 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
248 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
249 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
250 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
252 // ensure null-terminator
253 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
256 TagData.assign(newTagData, endTagOffset + endTagDataLength);
260 // tag not found, attempt AddTag
261 else return AddTag(tag, type, value);
264 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
266 if ( SupportData.HasCoreOnly ) return false;
267 if ( tag.size() != 2 || type.size() != 1 ) return false;
268 if ( type == "f" || type == "Z" || type == "H" ) return false;
270 // localize the tag data
271 char* pOriginalTagData = (char*)TagData.data();
272 char* pTagData = pOriginalTagData;
273 const unsigned int originalTagDataLength = TagData.size();
275 unsigned int newTagDataLength = 0;
276 unsigned int numBytesParsed = 0;
278 // if tag found, store data in readGroup, return success
279 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
281 // make sure array is more than big enough
282 char newTagData[originalTagDataLength + sizeof(value)];
284 // copy original tag data up til desired tag
285 const unsigned int beginningTagDataLength = numBytesParsed;
286 newTagDataLength += beginningTagDataLength;
287 memcpy(newTagData, pOriginalTagData, numBytesParsed);
289 // copy new VALUE in place of current tag data
290 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
292 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
294 // skip to next tag (if tag for removal is last, return true)
295 const char* pTagStorageType = pTagData - 1;
296 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
298 // copy everything from current tag (the next one after tag for removal) to end
299 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
300 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
301 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
302 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
304 // ensure null-terminator
305 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
308 TagData.assign(newTagData, endTagOffset + endTagDataLength);
312 // tag not found, attempt AddTag
313 else return AddTag(tag, type, value);
316 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
317 return EditTag(tag, type, (const uint32_t&)value);
320 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
322 if ( SupportData.HasCoreOnly ) return false;
323 if ( tag.size() != 2 || type.size() != 1 ) return false;
324 if ( type == "Z" || type == "H" ) return false;
326 // localize the tag data
327 char* pOriginalTagData = (char*)TagData.data();
328 char* pTagData = pOriginalTagData;
329 const unsigned int originalTagDataLength = TagData.size();
331 unsigned int newTagDataLength = 0;
332 unsigned int numBytesParsed = 0;
334 // if tag found, store data in readGroup, return success
335 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
337 // make sure array is more than big enough
338 char newTagData[originalTagDataLength + sizeof(value)];
340 // copy original tag data up til desired tag
341 const unsigned int beginningTagDataLength = numBytesParsed;
342 newTagDataLength += beginningTagDataLength;
343 memcpy(newTagData, pOriginalTagData, numBytesParsed);
345 // copy new VALUE in place of current tag data
346 union { float value; char valueBuffer[sizeof(float)]; } un;
348 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
350 // skip to next tag (if tag for removal is last, return true)
351 const char* pTagStorageType = pTagData - 1;
352 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
354 // copy everything from current tag (the next one after tag for removal) to end
355 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
356 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
357 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
358 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
360 // ensure null-terminator
361 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
364 TagData.assign(newTagData, endTagOffset + endTagDataLength);
368 // tag not found, attempt AddTag
369 else return AddTag(tag, type, value);
372 // get "NM" tag data - originally contributed by Aaron Quinlan
373 // stores data in 'editDistance', returns success/fail
374 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
375 return GetTag("NM", (uint32_t&)editDistance);
379 // stores data in 'readGroup', returns success/fail
380 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
381 return GetTag("RG", readGroup);
384 bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
386 // make sure tag data exists
387 if ( SupportData.HasCoreOnly || TagData.empty() )
390 // localize the tag data
391 char* pTagData = (char*)TagData.data();
392 const unsigned int tagDataLength = TagData.size();
393 unsigned int numBytesParsed = 0;
395 // if tag found, store data in readGroup, return success
396 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
397 const unsigned int dataLength = strlen(pTagData);
399 destination.resize(dataLength);
400 memcpy( (char*)destination.data(), pTagData, dataLength );
404 // tag not found, return failure
408 bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
410 // make sure tag data exists
411 if ( SupportData.HasCoreOnly || TagData.empty() )
414 // localize the tag data
415 char* pTagData = (char*)TagData.data();
416 const unsigned int tagDataLength = TagData.size();
417 unsigned int numBytesParsed = 0;
419 // if tag found, determine data byte-length, store data in readGroup, return success
420 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
422 // determine data byte-length
423 const char type = *(pTagData - 1);
424 int destinationLength = 0;
430 destinationLength = 1;
436 destinationLength = 2;
442 destinationLength = 4;
445 // unsupported type for integer destination (float or var-length strings)
449 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
454 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
458 // store in destination
460 memcpy(&destination, pTagData, destinationLength);
464 // tag not found, return failure
468 bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
469 return GetTag(tag, (uint32_t&)destination);
472 bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
474 // make sure tag data exists
475 if ( SupportData.HasCoreOnly || TagData.empty() )
478 // localize the tag data
479 char* pTagData = (char*)TagData.data();
480 const unsigned int tagDataLength = TagData.size();
481 unsigned int numBytesParsed = 0;
483 // if tag found, determine data byte-length, store data in readGroup, return success
484 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
485 //pTagData += numBytesParsed;
487 // determine data byte-length
488 const char type = *(pTagData - 1);
489 int destinationLength = 0;
496 destinationLength = 1;
502 destinationLength = 2;
509 destinationLength = 4;
512 // unsupported type (var-length strings)
515 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
520 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
524 // store in destination
526 memcpy(&destination, pTagData, destinationLength);
530 // tag not found, return failure
534 bool BamAlignment::RemoveTag(const std::string& tag) {
536 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
537 // also, return false if no data present to remove
538 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
540 // localize the tag data
541 char* pOriginalTagData = (char*)TagData.data();
542 char* pTagData = pOriginalTagData;
543 const unsigned int originalTagDataLength = TagData.size();
544 unsigned int newTagDataLength = 0;
545 unsigned int numBytesParsed = 0;
547 // if tag found, store data in readGroup, return success
548 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
550 char newTagData[originalTagDataLength];
552 // copy original tag data up til desired tag
555 const unsigned int beginningTagDataLength = numBytesParsed;
556 newTagDataLength += beginningTagDataLength;
557 memcpy(newTagData, pOriginalTagData, numBytesParsed);
559 // skip to next tag (if tag for removal is last, return true)
560 const char* pTagStorageType = pTagData + 2;
563 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
565 // copy everything from current tag (the next one after tag for removal) to end
566 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
567 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
568 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
571 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
575 // tag not found, no removal - return failure
579 bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {
581 while ( numBytesParsed < tagDataLength ) {
583 const char* pTagType = pTagData;
584 const char* pTagStorageType = pTagData + 2;
588 // check the current tag, return true on match
589 if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 )
592 // get the storage class and find the next tag
593 if ( *pTagStorageType == '\0' ) return false;
594 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
595 if ( *pTagData == '\0' ) return false;
598 // checked all tags, none match
602 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
604 switch(storageType) {
632 // increment for null-terminator
639 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);