1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 22 April 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides the BamAlignment data structure
8 // ***************************************************************************
10 #include <api/BamAlignment.h>
11 #include <api/BamConstants.h>
12 using namespace BamTools;
24 /*! \class BamTools::BamAlignment
25 \brief The main BAM alignment data structure.
27 Provides methods to query/modify BAM alignment data fields.
29 /*! \var BamAlignment::Name
32 /*! \var BamAlignment::Length
33 \brief length of query sequence
35 /*! \var BamAlignment::QueryBases
36 \brief 'original' sequence (as reported from sequencing machine)
38 /*! \var BamAlignment::AlignedBases
39 \brief 'aligned' sequence (includes any indels, padding, clipping)
41 /*! \var BamAlignment::Qualities
42 \brief FASTQ qualities (ASCII characters, not numeric values)
44 /*! \var BamAlignment::TagData
45 \brief tag data (use the provided methods to query/modify)
47 /*! \var BamAlignment::RefID
48 \brief ID number for reference sequence
50 /*! \var BamAlignment::Position
51 \brief position (0-based) where alignment starts
53 /*! \var BamAlignment::Bin
54 \brief BAM (standard) index bin number for this alignment
56 /*! \var BamAlignment::MapQuality
57 \brief mapping quality score
59 /*! \var BamAlignment::AlignmentFlag
60 \brief alignment bit-flag (use the provided methods to query/modify)
62 /*! \var BamAlignment::CigarData
63 \brief CIGAR operations for this alignment
65 /*! \var BamAlignment::MateRefID
66 \brief ID number for reference sequence where alignment's mate was aligned
68 /*! \var BamAlignment::MatePosition
69 \brief position (0-based) where alignment's mate starts
71 /*! \var BamAlignment::InsertSize
72 \brief mate-pair insert size
74 /*! \var BamAlignment::Filename
75 \brief name of BAM file which this alignment comes from
78 /*! \fn BamAlignment::BamAlignment(void)
81 BamAlignment::BamAlignment(void)
89 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
90 \brief copy constructor
92 BamAlignment::BamAlignment(const BamAlignment& other)
94 , Length(other.Length)
95 , QueryBases(other.QueryBases)
96 , AlignedBases(other.AlignedBases)
97 , Qualities(other.Qualities)
98 , TagData(other.TagData)
100 , Position(other.Position)
102 , MapQuality(other.MapQuality)
103 , AlignmentFlag(other.AlignmentFlag)
104 , CigarData(other.CigarData)
105 , MateRefID(other.MateRefID)
106 , MatePosition(other.MatePosition)
107 , InsertSize(other.InsertSize)
108 , Filename(other.Filename)
109 , SupportData(other.SupportData)
112 /*! \fn BamAlignment::~BamAlignment(void)
115 BamAlignment::~BamAlignment(void) { }
117 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
118 \brief Adds a field with string data to the BAM tags.
120 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
122 \param tag 2-character tag name
123 \param type 1-character tag type (must be "Z" or "H")
124 \param value string data to store
126 \return \c true if the \b new tag was added successfully
127 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
129 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
131 // skip if core data not parsed
132 if ( SupportData.HasCoreOnly ) return false;
134 // validate tag/type size & that type is OK for string value
135 if ( !IsValidSize(tag, type) ) return false;
136 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
137 type.at(0) != Constants::BAM_TAG_TYPE_HEX
143 // localize the tag data
144 char* pTagData = (char*)TagData.data();
145 const unsigned int tagDataLength = TagData.size();
146 unsigned int numBytesParsed = 0;
148 // if tag already exists, return false
149 // use EditTag explicitly instead
150 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
153 // otherwise, copy tag data to temp buffer
154 string newTag = tag + type + value;
155 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
156 char* originalTagData = new char[newTagDataLength];
157 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
160 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
162 // store temp buffer back in TagData
163 const char* newTagData = (const char*)originalTagData;
164 TagData.assign(newTagData, newTagDataLength);
166 delete[] originalTagData;
172 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
173 \brief Adds a field with unsigned integer data to the BAM tags.
175 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
177 \param tag 2-character tag name
178 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
179 \param value unsigned int data to store
181 \return \c true if the \b new tag was added successfully
182 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
184 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
186 // skip if core data not parsed
187 if ( SupportData.HasCoreOnly ) return false;
189 // validate tag/type size & that type is OK for uint32_t value
190 if ( !IsValidSize(tag, type) ) return false;
191 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
192 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
193 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
194 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
200 // localize the tag data
201 char* pTagData = (char*)TagData.data();
202 const unsigned int tagDataLength = TagData.size();
203 unsigned int numBytesParsed = 0;
205 // if tag already exists, return false
206 // use EditTag explicitly instead
207 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
210 // otherwise, convert value to string
211 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
214 // copy original tag data to temp buffer
215 string newTag = tag + type;
216 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
217 char* originalTagData = new char[newTagDataLength];
218 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
221 strcat(originalTagData + tagDataLength, newTag.data());
222 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
224 // store temp buffer back in TagData
225 const char* newTagData = (const char*)originalTagData;
226 TagData.assign(newTagData, newTagDataLength);
227 delete[] originalTagData;
233 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
234 \brief Adds a field with signed integer data to the BAM tags.
236 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
238 \param tag 2-character tag name
239 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
240 \param value signed int data to store
242 \return \c true if the \b new tag was added successfully
243 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
245 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
246 return AddTag(tag, type, (const uint32_t&)value);
249 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
250 \brief Adds a field with floating-point data to the BAM tags.
252 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
254 \param tag 2-character tag name
255 \param type 1-character tag type (must NOT be "Z", "H", or "B")
256 \param value float data to store
258 \return \c true if the \b new tag was added successfully
259 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
261 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
263 // skip if core data not parsed
264 if ( SupportData.HasCoreOnly ) return false;
266 // validate tag/type size & that type is OK for float value
267 if ( !IsValidSize(tag, type) ) return false;
268 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
269 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
270 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
276 // localize the tag data
277 char* pTagData = (char*)TagData.data();
278 const unsigned int tagDataLength = TagData.size();
279 unsigned int numBytesParsed = 0;
281 // if tag already exists, return false
282 // use EditTag explicitly instead
283 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
286 // otherwise, convert value to string
287 union { float value; char valueBuffer[sizeof(float)]; } un;
290 // copy original tag data to temp buffer
291 string newTag = tag + type;
292 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
293 char* originalTagData = new char[newTagDataLength];
294 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
297 strcat(originalTagData + tagDataLength, newTag.data());
298 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
300 // store temp buffer back in TagData
301 const char* newTagData = (const char*)originalTagData;
302 TagData.assign(newTagData, newTagDataLength);
304 delete[] originalTagData;
310 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
311 \brief Adds a numeric array field to the BAM tags.
313 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
315 \param tag 2-character tag name
316 \param values vector of uint8_t values to store
318 \return \c true if the \b new tag was added successfully
319 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
321 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
323 // skip if core data not parsed
324 if ( SupportData.HasCoreOnly ) return false;
326 // check for valid tag length
327 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
329 // localize the tag data
330 char* pTagData = (char*)TagData.data();
331 const unsigned int tagDataLength = TagData.size();
332 unsigned int numBytesParsed = 0;
334 // if tag already exists, return false
335 // use EditTag explicitly instead
336 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
339 // build new tag's base information
340 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
341 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
342 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
343 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
345 // add number of array elements to newTagBase
346 const int32_t numElements = values.size();
347 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
349 // copy current TagData string to temp buffer, leaving room for new tag's contents
350 const int newTagDataLength = tagDataLength +
351 Constants::BAM_TAG_ARRAYBASE_SIZE +
352 numElements*sizeof(uint8_t);
353 char* originalTagData = new char[newTagDataLength];
354 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
356 // write newTagBase (removes old null term)
357 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
359 // add vector elements to tag
360 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
361 for ( int i = 0 ; i < numElements; ++i ) {
362 const uint8_t value = values.at(i);
363 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
364 &value, sizeof(uint8_t));
367 // store temp buffer back in TagData
368 const char* newTagData = (const char*)originalTagData;
369 TagData.assign(newTagData, newTagDataLength);
371 delete[] originalTagData;
377 /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
378 \brief Adds a numeric array field to the BAM tags.
380 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
382 \param tag 2-character tag name
383 \param values vector of int8_t values to store
385 \return \c true if the \b new tag was added successfully
386 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
388 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
390 // skip if core data not parsed
391 if ( SupportData.HasCoreOnly ) return false;
393 // check for valid tag length
394 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
396 // localize the tag data
397 char* pTagData = (char*)TagData.data();
398 const unsigned int tagDataLength = TagData.size();
399 unsigned int numBytesParsed = 0;
401 // if tag already exists, return false
402 // use EditTag explicitly instead
403 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
406 // build new tag's base information
407 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
408 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
409 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
410 newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
412 // add number of array elements to newTagBase
413 const int32_t numElements = values.size();
414 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
416 // copy current TagData string to temp buffer, leaving room for new tag's contents
417 const int newTagDataLength = tagDataLength +
418 Constants::BAM_TAG_ARRAYBASE_SIZE +
419 numElements*sizeof(int8_t);
420 char* originalTagData = new char[newTagDataLength];
421 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
423 // write newTagBase (removes old null term)
424 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
426 // add vector elements to tag
427 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
428 for ( int i = 0 ; i < numElements; ++i ) {
429 const int8_t value = values.at(i);
430 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
431 &value, sizeof(int8_t));
434 // store temp buffer back in TagData
435 const char* newTagData = (const char*)originalTagData;
436 TagData.assign(newTagData, newTagDataLength);
438 delete[] originalTagData;
444 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
445 \brief Adds a numeric array field to the BAM tags.
447 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
449 \param tag 2-character tag name
450 \param values vector of uint16_t values to store
452 \return \c true if the \b new tag was added successfully
453 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
455 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
457 // skip if core data not parsed
458 if ( SupportData.HasCoreOnly ) return false;
460 // check for valid tag length
461 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
463 // localize the tag data
464 char* pTagData = (char*)TagData.data();
465 const unsigned int tagDataLength = TagData.size();
466 unsigned int numBytesParsed = 0;
468 // if tag already exists, return false
469 // use EditTag explicitly instead
470 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
473 // build new tag's base information
474 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
475 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
476 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
477 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
479 // add number of array elements to newTagBase
480 const int32_t numElements = values.size();
481 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
483 // copy current TagData string to temp buffer, leaving room for new tag's contents
484 const int newTagDataLength = tagDataLength +
485 Constants::BAM_TAG_ARRAYBASE_SIZE +
486 numElements*sizeof(uint16_t);
487 char* originalTagData = new char[newTagDataLength];
488 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
490 // write newTagBase (removes old null term)
491 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
493 // add vector elements to tag
494 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
495 for ( int i = 0 ; i < numElements; ++i ) {
496 const uint16_t value = values.at(i);
497 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
498 &value, sizeof(uint16_t));
501 // store temp buffer back in TagData
502 const char* newTagData = (const char*)originalTagData;
503 TagData.assign(newTagData, newTagDataLength);
505 delete[] originalTagData;
511 /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
512 \brief Adds a numeric array field to the BAM tags.
514 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
516 \param tag 2-character tag name
517 \param values vector of int16_t values to store
519 \return \c true if the \b new tag was added successfully
520 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
522 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
524 // skip if core data not parsed
525 if ( SupportData.HasCoreOnly ) return false;
527 // check for valid tag length
528 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
530 // localize the tag data
531 char* pTagData = (char*)TagData.data();
532 const unsigned int tagDataLength = TagData.size();
533 unsigned int numBytesParsed = 0;
535 // if tag already exists, return false
536 // use EditTag explicitly instead
537 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
540 // build new tag's base information
541 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
542 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
543 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
544 newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
546 // add number of array elements to newTagBase
547 const int32_t numElements = values.size();
548 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
550 // copy current TagData string to temp buffer, leaving room for new tag's contents
551 const int newTagDataLength = tagDataLength +
552 Constants::BAM_TAG_ARRAYBASE_SIZE +
553 numElements*sizeof(int16_t);
554 char* originalTagData = new char[newTagDataLength];
555 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
557 // write newTagBase (removes old null term)
558 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
560 // add vector elements to tag
561 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
562 for ( int i = 0 ; i < numElements; ++i ) {
563 const int16_t value = values.at(i);
564 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
565 &value, sizeof(int16_t));
568 // store temp buffer back in TagData
569 const char* newTagData = (const char*)originalTagData;
570 TagData.assign(newTagData, newTagDataLength);
572 delete[] originalTagData;
578 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
579 \brief Adds a numeric array field to the BAM tags.
581 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
583 \param tag 2-character tag name
584 \param values vector of uint32_t values to store
586 \return \c true if the \b new tag was added successfully
587 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
589 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
591 // skip if core data not parsed
592 if ( SupportData.HasCoreOnly ) return false;
594 // check for valid tag length
595 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
597 // localize the tag data
598 char* pTagData = (char*)TagData.data();
599 const unsigned int tagDataLength = TagData.size();
600 unsigned int numBytesParsed = 0;
602 // if tag already exists, return false
603 // use EditTag explicitly instead
604 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
607 // build new tag's base information
608 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
609 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
610 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
611 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
613 // add number of array elements to newTagBase
614 const int32_t numElements = values.size();
615 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
617 // copy current TagData string to temp buffer, leaving room for new tag's contents
618 const int newTagDataLength = tagDataLength +
619 Constants::BAM_TAG_ARRAYBASE_SIZE +
620 numElements*sizeof(uint32_t);
621 char* originalTagData = new char[newTagDataLength];
622 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
624 // write newTagBase (removes old null term)
625 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
627 // add vector elements to tag
628 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
629 for ( int i = 0 ; i < numElements; ++i ) {
630 const uint32_t value = values.at(i);
631 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
632 &value, sizeof(uint32_t));
635 // store temp buffer back in TagData
636 const char* newTagData = (const char*)originalTagData;
637 TagData.assign(newTagData, newTagDataLength);
639 delete[] originalTagData;
645 /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
646 \brief Adds a numeric array field to the BAM tags.
648 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
650 \param tag 2-character tag name
651 \param values vector of int32_t values to store
653 \return \c true if the \b new tag was added successfully
654 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
656 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
658 // skip if core data not parsed
659 if ( SupportData.HasCoreOnly ) return false;
661 // check for valid tag length
662 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
664 // localize the tag data
665 char* pTagData = (char*)TagData.data();
666 const unsigned int tagDataLength = TagData.size();
667 unsigned int numBytesParsed = 0;
669 // if tag already exists, return false
670 // use EditTag explicitly instead
671 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
674 // build new tag's base information
675 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
676 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
677 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
678 newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
680 // add number of array elements to newTagBase
681 const int32_t numElements = values.size();
682 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
684 // copy current TagData string to temp buffer, leaving room for new tag's contents
685 const int newTagDataLength = tagDataLength +
686 Constants::BAM_TAG_ARRAYBASE_SIZE +
687 numElements*sizeof(int32_t);
688 char* originalTagData = new char[newTagDataLength];
689 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
691 // write newTagBase (removes old null term)
692 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
694 // add vector elements to tag
695 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
696 for ( int i = 0 ; i < numElements; ++i ) {
697 const int32_t value = values.at(i);
698 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
699 &value, sizeof(int32_t));
702 // store temp buffer back in TagData
703 const char* newTagData = (const char*)originalTagData;
704 TagData.assign(newTagData, newTagDataLength);
706 delete[] originalTagData;
712 /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
713 \brief Adds a numeric array field to the BAM tags.
715 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
717 \param tag 2-character tag name
718 \param values vector of float values to store
720 \return \c true if the \b new tag was added successfully
721 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
723 bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
725 // skip if core data not parsed
726 if ( SupportData.HasCoreOnly ) return false;
728 // check for valid tag length
729 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
731 // localize the tag data
732 char* pTagData = (char*)TagData.data();
733 const unsigned int tagDataLength = TagData.size();
734 unsigned int numBytesParsed = 0;
736 // if tag already exists, return false
737 // use EditTag explicitly instead
738 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
741 // build new tag's base information
742 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
743 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
744 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
745 newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
747 // add number of array elements to newTagBase
748 const int32_t numElements = values.size();
749 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
751 // copy current TagData string to temp buffer, leaving room for new tag's contents
752 const int newTagDataLength = tagDataLength +
753 Constants::BAM_TAG_ARRAYBASE_SIZE +
754 numElements*sizeof(float);
755 char* originalTagData = new char[newTagDataLength];
756 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
758 // write newTagBase (removes old null term)
759 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
761 // add vector elements to tag
762 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
763 for ( int i = 0 ; i < numElements; ++i ) {
764 const float value = values.at(i);
765 memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
766 &value, sizeof(float));
769 // store temp buffer back in TagData
770 const char* newTagData = (const char*)originalTagData;
771 TagData.assign(newTagData, newTagDataLength);
773 delete[] originalTagData;
779 /*! \fn bool BamAlignment::BuildCharData(void)
780 \brief Populates alignment string fields (read name, bases, qualities, tag data).
782 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
783 Using that method makes parsing much quicker when only positional data is required.
785 However, if you later want to access the character data fields from such an alignment,
786 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
789 \return \c true if character data populated successfully (or was already available to begin with)
791 bool BamAlignment::BuildCharData(void) {
793 // skip if char data already parsed
794 if ( !SupportData.HasCoreOnly )
797 // check system endianness
798 bool IsBigEndian = BamTools::SystemIsBigEndian();
800 // calculate character lengths/offsets
801 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
802 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
803 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
804 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
805 const unsigned int tagDataLength = dataLength - tagDataOffset;
807 // check offsets to see what char data exists
808 const bool hasSeqData = ( seqDataOffset < dataLength );
809 const bool hasQualData = ( qualDataOffset < dataLength );
810 const bool hasTagData = ( tagDataOffset < dataLength );
812 // set up char buffers
813 const char* allCharData = SupportData.AllCharData.data();
814 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
815 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
816 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
818 // store alignment name (relies on null char in name as terminator)
819 Name.assign((const char*)(allCharData));
821 // save query sequence
824 QueryBases.reserve(SupportData.QuerySequenceLength);
825 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
826 char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
827 QueryBases.append(1, singleBase);
831 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
834 Qualities.reserve(SupportData.QuerySequenceLength);
835 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
836 char singleQuality = (char)(qualData[i]+33);
837 Qualities.append(1, singleQuality);
841 // clear previous AlignedBases
842 AlignedBases.clear();
844 // if QueryBases has data, build AlignedBases using CIGAR data
845 // otherwise, AlignedBases will remain empty (this case IS allowed)
846 if ( !QueryBases.empty() ) {
848 // resize AlignedBases
849 AlignedBases.reserve(SupportData.QuerySequenceLength);
851 // iterate over CigarOps
853 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
854 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
855 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
856 const CigarOp& op = (*cigarIter);
860 // for 'M', 'I', '=', 'X' - write bases
861 case (Constants::BAM_CIGAR_MATCH_CHAR) :
862 case (Constants::BAM_CIGAR_INS_CHAR) :
863 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
864 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
865 AlignedBases.append(QueryBases.substr(k, op.Length));
868 // for 'S' - soft clip, do not write bases
869 // but increment placeholder 'k'
870 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
874 // for 'D' - write gap character
875 case (Constants::BAM_CIGAR_DEL_CHAR) :
876 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
879 // for 'P' - write padding character
880 case (Constants::BAM_CIGAR_PAD_CHAR) :
881 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
884 // for 'N' - write N's, skip bases in original query sequence
885 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
886 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
889 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
890 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
893 // shouldn't get here
895 cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
907 while ( (unsigned int)i < tagDataLength ) {
909 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
910 const char type = tagData[i]; // get tag type at position i
911 ++i; // move i past tag type
915 case(Constants::BAM_TAG_TYPE_ASCII) :
916 case(Constants::BAM_TAG_TYPE_INT8) :
917 case(Constants::BAM_TAG_TYPE_UINT8) :
918 // no endian swapping necessary for single-byte data
922 case(Constants::BAM_TAG_TYPE_INT16) :
923 case(Constants::BAM_TAG_TYPE_UINT16) :
924 BamTools::SwapEndian_16p(&tagData[i]);
925 i += sizeof(uint16_t);
928 case(Constants::BAM_TAG_TYPE_FLOAT) :
929 case(Constants::BAM_TAG_TYPE_INT32) :
930 case(Constants::BAM_TAG_TYPE_UINT32) :
931 BamTools::SwapEndian_32p(&tagData[i]);
932 i += sizeof(uint32_t);
935 case(Constants::BAM_TAG_TYPE_HEX) :
936 case(Constants::BAM_TAG_TYPE_STRING) :
937 // no endian swapping necessary for hex-string/string data
940 // increment one more for null terminator
944 case(Constants::BAM_TAG_TYPE_ARRAY) :
948 const char arrayType = tagData[i];
951 // swap endian-ness of number of elements in place, then retrieve for loop
952 BamTools::SwapEndian_32p(&tagData[i]);
954 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
955 i += sizeof(uint32_t);
957 // swap endian-ness of array elements
958 for ( int j = 0; j < numElements; ++j ) {
960 case (Constants::BAM_TAG_TYPE_INT8) :
961 case (Constants::BAM_TAG_TYPE_UINT8) :
962 // no endian-swapping necessary
965 case (Constants::BAM_TAG_TYPE_INT16) :
966 case (Constants::BAM_TAG_TYPE_UINT16) :
967 BamTools::SwapEndian_16p(&tagData[i]);
968 i += sizeof(uint16_t);
970 case (Constants::BAM_TAG_TYPE_FLOAT) :
971 case (Constants::BAM_TAG_TYPE_INT32) :
972 case (Constants::BAM_TAG_TYPE_UINT32) :
973 BamTools::SwapEndian_32p(&tagData[i]);
974 i += sizeof(uint32_t);
978 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
979 << arrayType << endl;
987 // shouldn't get here
989 cerr << "BamAlignment ERROR: invalid tag value type: "
996 // store tagData in alignment
997 TagData.resize(tagDataLength);
998 memcpy((char*)TagData.data(), tagData, tagDataLength);
1001 // clear the core-only flag
1002 SupportData.HasCoreOnly = false;
1008 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
1009 \brief Edits a BAM tag field containing string data.
1011 If \a tag does not exist, a new entry is created.
1013 \param tag 2-character tag name
1014 \param type 1-character tag type (must be "Z" or "H")
1015 \param value string data to store
1017 \return \c true if the tag was modified/created successfully
1019 \sa BamAlignment::RemoveTag()
1020 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1022 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1024 // skip if core data not parsed
1025 if ( SupportData.HasCoreOnly ) return false;
1027 // validate tag/type size & that type is OK for string value
1028 if ( !IsValidSize(tag, type) ) return false;
1029 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1030 type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1033 // localize the tag data
1034 char* pOriginalTagData = (char*)TagData.data();
1035 char* pTagData = pOriginalTagData;
1036 const unsigned int originalTagDataLength = TagData.size();
1038 unsigned int newTagDataLength = 0;
1039 unsigned int numBytesParsed = 0;
1042 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1044 // make sure array is more than big enough
1045 char* newTagData = new char[originalTagDataLength + value.size()];
1047 // copy original tag data up til desired tag
1048 const unsigned int beginningTagDataLength = numBytesParsed;
1049 newTagDataLength += beginningTagDataLength;
1050 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1052 // copy new @value in place of current tag data
1053 const unsigned int dataLength = strlen(value.c_str());
1054 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1056 // skip to next tag (if tag for removal is last, return true)
1057 const char* pTagStorageType = pTagData - 1;
1058 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1061 // copy everything from current tag (the next one after tag for removal) to end
1062 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1063 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1064 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1065 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1067 // ensure null-terminator
1068 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1070 // save new tag data
1071 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1073 delete[] newTagData;
1078 // tag not found, attempt AddTag
1079 else return AddTag(tag, type, value);
1082 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1083 \brief Edits a BAM tag field containing unsigned integer data.
1085 If \a tag does not exist, a new entry is created.
1087 \param tag 2-character tag name
1088 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1089 \param value unsigned integer data to store
1091 \return \c true if the tag was modified/created successfully
1093 \sa BamAlignment::RemoveTag()
1094 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1096 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1098 // skip if core data not parsed
1099 if ( SupportData.HasCoreOnly ) return false;
1101 // validate tag/type size & that type is OK for uint32_t value
1102 if ( !IsValidSize(tag, type) ) return false;
1103 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1104 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1105 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1106 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1112 // localize the tag data
1113 char* pOriginalTagData = (char*)TagData.data();
1114 char* pTagData = pOriginalTagData;
1115 const unsigned int originalTagDataLength = TagData.size();
1117 unsigned int newTagDataLength = 0;
1118 unsigned int numBytesParsed = 0;
1121 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1123 // make sure array is more than big enough
1124 char* newTagData = new char[originalTagDataLength + sizeof(value)];
1126 // copy original tag data up til desired tag
1127 const unsigned int beginningTagDataLength = numBytesParsed;
1128 newTagDataLength += beginningTagDataLength;
1129 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1131 // copy new @value in place of current tag data
1132 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1134 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1136 // skip to next tag (if tag for removal is last, return true)
1137 const char* pTagStorageType = pTagData - 1;
1138 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1141 // copy everything from current tag (the next one after tag for removal) to end
1142 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1143 const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1144 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1145 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1147 // ensure null-terminator
1148 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1150 // save new tag data
1151 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1153 delete[] newTagData;
1158 // tag not found, attempt AddTag
1159 else return AddTag(tag, type, value);
1162 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1163 \brief Edits a BAM tag field containing signed integer data.
1165 If \a tag does not exist, a new entry is created.
1167 \param tag 2-character tag name
1168 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1169 \param value signed integer data to store
1171 \return \c true if the tag was modified/created successfully
1173 \sa BamAlignment::RemoveTag()
1174 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1176 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1177 return EditTag(tag, type, (const uint32_t&)value);
1180 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1181 \brief Edits a BAM tag field containing floating-point data.
1183 If \a tag does not exist, a new entry is created.
1185 \param tag 2-character tag name
1186 \param type 1-character tag type (must NOT be "Z", "H", or "B")
1187 \param value float data to store
1189 \return \c true if the tag was modified/created successfully
1191 \sa BamAlignment::RemoveTag()
1192 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1194 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1196 // skip if core data not parsed
1197 if ( SupportData.HasCoreOnly ) return false;
1199 // validate tag/type size & that type is OK for float value
1200 if ( !IsValidSize(tag, type) ) return false;
1201 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1202 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1203 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1209 // localize the tag data
1210 char* pOriginalTagData = (char*)TagData.data();
1211 char* pTagData = pOriginalTagData;
1212 const unsigned int originalTagDataLength = TagData.size();
1214 unsigned int newTagDataLength = 0;
1215 unsigned int numBytesParsed = 0;
1218 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1220 // make sure array is more than big enough
1221 char* newTagData = new char[originalTagDataLength + sizeof(value)];
1223 // copy original tag data up til desired tag
1224 const unsigned int beginningTagDataLength = numBytesParsed;
1225 newTagDataLength += beginningTagDataLength;
1226 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1228 // copy new @value in place of current tag data
1229 union { float value; char valueBuffer[sizeof(float)]; } un;
1231 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1233 // skip to next tag (if tag for removal is last, return true)
1234 const char* pTagStorageType = pTagData - 1;
1235 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1238 // copy everything from current tag (the next one after tag for removal) to end
1239 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1240 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1241 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1242 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1244 // ensure null-terminator
1245 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1247 // save new tag data
1248 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1250 delete[] newTagData;
1255 // tag not found, attempt AddTag
1256 else return AddTag(tag, type, value);
1259 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1260 \brief Edits a BAM tag field containing a numeric array.
1262 If \a tag does not exist, a new entry is created.
1264 \param tag 2-character tag name
1265 \param value vector of uint8_t values to store
1267 \return \c true if the tag was modified/created successfully
1268 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1270 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1272 // can't do anything if TagData not parsed
1273 if ( SupportData.HasCoreOnly )
1276 // remove existing tag if present
1280 // add tag record with new values
1281 return AddTag(tag, values);
1284 /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1285 \brief Edits a BAM tag field containing a numeric array.
1287 If \a tag does not exist, a new entry is created.
1289 \param tag 2-character tag name
1290 \param value vector of int8_t values to store
1292 \return \c true if the tag was modified/created successfully
1293 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1295 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1297 // can't do anything if TagData not parsed
1298 if ( SupportData.HasCoreOnly )
1301 // remove existing tag if present
1305 // add tag record with new values
1306 return AddTag(tag, values);
1309 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1310 \brief Edits a BAM tag field containing a numeric array.
1312 If \a tag does not exist, a new entry is created.
1314 \param tag 2-character tag name
1315 \param value vector of uint16_t values to store
1317 \return \c true if the tag was modified/created successfully
1318 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1320 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1322 // can't do anything if TagData not parsed
1323 if ( SupportData.HasCoreOnly )
1326 // remove existing tag if present
1330 // add tag record with new values
1331 return AddTag(tag, values);
1334 /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1335 \brief Edits a BAM tag field containing a numeric array.
1337 If \a tag does not exist, a new entry is created.
1339 \param tag 2-character tag name
1340 \param value vector of int16_t values to store
1342 \return \c true if the tag was modified/created successfully
1343 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1345 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1347 // can't do anything if TagData not parsed
1348 if ( SupportData.HasCoreOnly )
1351 // remove existing tag if present
1355 // add tag record with new values
1356 return AddTag(tag, values);
1359 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1360 \brief Edits a BAM tag field containing a numeric array.
1362 If \a tag does not exist, a new entry is created.
1364 \param tag 2-character tag name
1365 \param value vector of uint32_t values to store
1367 \return \c true if the tag was modified/created successfully
1368 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1370 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1372 // can't do anything if TagData not parsed
1373 if ( SupportData.HasCoreOnly )
1376 // remove existing tag if present
1380 // add tag record with new values
1381 return AddTag(tag, values);
1384 /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1385 \brief Edits a BAM tag field containing a numeric array.
1387 If \a tag does not exist, a new entry is created.
1389 \param tag 2-character tag name
1390 \param value vector of int32_t values to store
1392 \return \c true if the tag was modified/created successfully
1393 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1395 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1397 // can't do anything if TagData not parsed
1398 if ( SupportData.HasCoreOnly )
1401 // remove existing tag if present
1405 // add tag record with new values
1406 return AddTag(tag, values);
1409 /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1410 \brief Edits a BAM tag field containing a numeric array.
1412 If \a tag does not exist, a new entry is created.
1414 \param tag 2-character tag name
1415 \param value vector of float values to store
1417 \return \c true if the tag was modified/created successfully
1418 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1420 bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1422 // can't do anything if TagData not parsed
1423 if ( SupportData.HasCoreOnly )
1426 // remove existing tag if present
1430 // add tag record with new values
1431 return AddTag(tag, values);
1434 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
1437 Searches for requested tag in BAM tag data.
1439 \param tag requested 2-character tag name
1440 \param pTagData pointer to current position in BamAlignment::TagData
1441 \param tagDataLength length of BamAlignment::TagData
1442 \param numBytesParsed number of bytes parsed so far
1444 \return \c true if found
1446 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
1447 \a numBytesParsed will correspond to the position in the full TagData string.
1450 bool BamAlignment::FindTag(const std::string& tag,
1452 const unsigned int& tagDataLength,
1453 unsigned int& numBytesParsed) const
1456 while ( numBytesParsed < tagDataLength ) {
1458 const char* pTagType = pTagData;
1459 const char* pTagStorageType = pTagData + 2;
1461 numBytesParsed += 3;
1463 // check the current tag, return true on match
1464 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
1467 // get the storage class and find the next tag
1468 if ( *pTagStorageType == '\0' ) return false;
1469 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
1470 if ( *pTagData == '\0' ) return false;
1473 // checked all tags, none match
1477 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1478 \brief Retrieves value of edit distance tag ("NM").
1480 \deprecated Instead use BamAlignment::GetTag()
1482 BamAlignment::GetTag("NM", editDistance);
1485 \param editDistance destination for retrieved value
1487 \return \c true if found
1489 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1490 return GetTag("NM", (uint32_t&)editDistance);
1493 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1494 \brief Calculates alignment end position, based on starting position and CIGAR data.
1496 \param usePadded Inserted bases affect reported position. Default is false, so that reported
1497 position stays 'sync-ed' with reference coordinates.
1498 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1499 when using BAM data with half-open formats (e.g. BED).
1501 \return alignment end position
1503 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1505 // initialize alignment end to starting position
1506 int alignEnd = Position;
1508 // iterate over cigar operations
1509 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1510 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1511 for ( ; cigarIter != cigarEnd; ++cigarIter) {
1512 const char cigarType = (*cigarIter).Type;
1513 const uint32_t& cigarLength = (*cigarIter).Length;
1515 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1516 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1517 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1518 alignEnd += cigarLength;
1519 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1520 alignEnd += cigarLength;
1523 // adjust for zero-based coordinates, if requested
1524 if ( zeroBased ) alignEnd -= 1;
1530 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1531 \brief Retrieves value of read group tag ("RG").
1533 \deprecated Instead use BamAlignment::GetTag()
1535 BamAlignment::GetTag("RG", readGroup);
1538 \param readGroup destination for retrieved value
1540 \return \c true if found
1542 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1543 return GetTag("RG", readGroup);
1546 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1547 \brief Retrieves the string value associated with a BAM tag.
1549 \param tag 2-character tag name
1550 \param destination destination for retrieved value
1552 \return \c true if found
1554 bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1556 // make sure tag data exists
1557 if ( SupportData.HasCoreOnly || TagData.empty() )
1560 // localize the tag data
1561 char* pTagData = (char*)TagData.data();
1562 const unsigned int tagDataLength = TagData.size();
1563 unsigned int numBytesParsed = 0;
1566 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1567 const unsigned int dataLength = strlen(pTagData);
1568 destination.clear();
1569 destination.resize(dataLength);
1570 memcpy( (char*)destination.data(), pTagData, dataLength );
1574 // tag not found, return failure
1578 /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1579 \brief Retrieves the unsigned integer value associated with a BAM tag.
1581 \param tag 2-character tag name
1582 \param destination destination for retrieved value
1584 \return \c true if found
1586 bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1588 // make sure tag data exists
1589 if ( SupportData.HasCoreOnly || TagData.empty() )
1592 // localize the tag data
1593 char* pTagData = (char*)TagData.data();
1594 const unsigned int tagDataLength = TagData.size();
1595 unsigned int numBytesParsed = 0;
1598 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1600 // determine data byte-length
1601 const char type = *(pTagData - 1);
1602 int destinationLength = 0;
1606 case (Constants::BAM_TAG_TYPE_ASCII) :
1607 case (Constants::BAM_TAG_TYPE_INT8) :
1608 case (Constants::BAM_TAG_TYPE_UINT8) :
1609 destinationLength = 1;
1613 case (Constants::BAM_TAG_TYPE_INT16) :
1614 case (Constants::BAM_TAG_TYPE_UINT16) :
1615 destinationLength = 2;
1619 case (Constants::BAM_TAG_TYPE_INT32) :
1620 case (Constants::BAM_TAG_TYPE_UINT32) :
1621 destinationLength = 4;
1624 // unsupported type for integer destination (float or var-length strings)
1625 case (Constants::BAM_TAG_TYPE_FLOAT) :
1626 case (Constants::BAM_TAG_TYPE_STRING) :
1627 case (Constants::BAM_TAG_TYPE_HEX) :
1628 case (Constants::BAM_TAG_TYPE_ARRAY) :
1629 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1630 << " in integer destination" << endl;
1635 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1640 // store in destination
1642 memcpy(&destination, pTagData, destinationLength);
1646 // tag not found, return failure
1650 /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1651 \brief Retrieves the signed integer value associated with a BAM tag.
1653 \param tag 2-character tag name
1654 \param destination destination for retrieved value
1656 \return \c true if found
1658 bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1659 return GetTag(tag, (uint32_t&)destination);
1662 /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1663 \brief Retrieves the floating-point value associated with a BAM tag.
1665 \param tag 2-character tag name
1666 \param destination destination for retrieved value
1668 \return \c true if found
1670 bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1672 // make sure tag data exists
1673 if ( SupportData.HasCoreOnly || TagData.empty() )
1676 // localize the tag data
1677 char* pTagData = (char*)TagData.data();
1678 const unsigned int tagDataLength = TagData.size();
1679 unsigned int numBytesParsed = 0;
1682 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1684 // determine data byte-length
1685 const char type = *(pTagData - 1);
1686 int destinationLength = 0;
1690 case (Constants::BAM_TAG_TYPE_ASCII) :
1691 case (Constants::BAM_TAG_TYPE_INT8) :
1692 case (Constants::BAM_TAG_TYPE_UINT8) :
1693 destinationLength = 1;
1697 case (Constants::BAM_TAG_TYPE_INT16) :
1698 case (Constants::BAM_TAG_TYPE_UINT16) :
1699 destinationLength = 2;
1703 case (Constants::BAM_TAG_TYPE_FLOAT) :
1704 case (Constants::BAM_TAG_TYPE_INT32) :
1705 case (Constants::BAM_TAG_TYPE_UINT32) :
1706 destinationLength = 4;
1709 // unsupported type (var-length strings)
1710 case (Constants::BAM_TAG_TYPE_STRING) :
1711 case (Constants::BAM_TAG_TYPE_HEX) :
1712 case (Constants::BAM_TAG_TYPE_ARRAY) :
1713 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1714 << " in float destination" << endl;
1719 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1724 // store in destination
1726 memcpy(&destination, pTagData, destinationLength);
1730 // tag not found, return failure
1734 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1735 \brief Retrieves the numeric array data associated with a BAM tag
1737 \param tag 2-character tag name
1738 \param destination destination for retrieved data
1740 \return \c true if found
1742 bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1744 // make sure tag data exists
1745 if ( SupportData.HasCoreOnly || TagData.empty() )
1748 // localize the tag data
1749 char* pTagData = (char*)TagData.data();
1750 const unsigned int tagDataLength = TagData.size();
1751 unsigned int numBytesParsed = 0;
1753 // return false if tag not found
1754 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1757 // check that tag is array type
1758 const char tagType = *(pTagData - 1);
1759 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1760 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1761 << tag << " in array destination" << endl;
1765 // calculate length of each element in tag's array
1766 const char elementType = *pTagData;
1768 int elementLength = 0;
1769 switch ( elementType ) {
1770 case (Constants::BAM_TAG_TYPE_ASCII) :
1771 case (Constants::BAM_TAG_TYPE_INT8) :
1772 case (Constants::BAM_TAG_TYPE_UINT8) :
1773 elementLength = sizeof(uint8_t);
1776 case (Constants::BAM_TAG_TYPE_INT16) :
1777 case (Constants::BAM_TAG_TYPE_UINT16) :
1778 elementLength = sizeof(uint16_t);
1781 case (Constants::BAM_TAG_TYPE_INT32) :
1782 case (Constants::BAM_TAG_TYPE_UINT32) :
1783 elementLength = sizeof(uint32_t);
1786 // unsupported type for integer destination (float or var-length data)
1787 case (Constants::BAM_TAG_TYPE_FLOAT) :
1788 case (Constants::BAM_TAG_TYPE_STRING) :
1789 case (Constants::BAM_TAG_TYPE_HEX) :
1790 case (Constants::BAM_TAG_TYPE_ARRAY) :
1791 cerr << "BamAlignment ERROR: array element type: " << elementType
1792 << " cannot be stored in integer value" << endl;
1797 cerr << "BamAlignment ERROR: unknown element type encountered: "
1798 << elementType << endl;
1802 // get number of elements
1803 int32_t numElements;
1804 memcpy(&numElements, pTagData, sizeof(int32_t));
1806 destination.clear();
1807 destination.reserve(numElements);
1811 for ( int i = 0 ; i < numElements; ++i ) {
1812 memcpy(&value, pTagData, sizeof(uint32_t));
1813 pTagData += sizeof(uint32_t);
1814 destination.push_back(value);
1821 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1822 \brief Retrieves the numeric array data associated with a BAM tag
1824 \param tag 2-character tag name
1825 \param destination destination for retrieved data
1827 \return \c true if found
1829 bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1831 // make sure tag data exists
1832 if ( SupportData.HasCoreOnly || TagData.empty() )
1835 // localize the tag data
1836 char* pTagData = (char*)TagData.data();
1837 const unsigned int tagDataLength = TagData.size();
1838 unsigned int numBytesParsed = 0;
1840 // return false if tag not found
1841 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1844 // check that tag is array type
1845 const char tagType = *(pTagData - 1);
1846 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1847 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1848 << tag << " in array destination" << endl;
1852 // calculate length of each element in tag's array
1853 const char elementType = *pTagData;
1855 int elementLength = 0;
1856 switch ( elementType ) {
1857 case (Constants::BAM_TAG_TYPE_ASCII) :
1858 case (Constants::BAM_TAG_TYPE_INT8) :
1859 case (Constants::BAM_TAG_TYPE_UINT8) :
1860 elementLength = sizeof(uint8_t);
1863 case (Constants::BAM_TAG_TYPE_INT16) :
1864 case (Constants::BAM_TAG_TYPE_UINT16) :
1865 elementLength = sizeof(uint16_t);
1868 case (Constants::BAM_TAG_TYPE_INT32) :
1869 case (Constants::BAM_TAG_TYPE_UINT32) :
1870 elementLength = sizeof(uint32_t);
1873 // unsupported type for integer destination (float or var-length data)
1874 case (Constants::BAM_TAG_TYPE_FLOAT) :
1875 case (Constants::BAM_TAG_TYPE_STRING) :
1876 case (Constants::BAM_TAG_TYPE_HEX) :
1877 case (Constants::BAM_TAG_TYPE_ARRAY) :
1878 cerr << "BamAlignment ERROR: array element type: " << elementType
1879 << " cannot be stored in integer value" << endl;
1884 cerr << "BamAlignment ERROR: unknown element type encountered: "
1885 << elementType << endl;
1889 // get number of elements
1890 int32_t numElements;
1891 memcpy(&numElements, pTagData, sizeof(int32_t));
1893 destination.clear();
1894 destination.reserve(numElements);
1898 for ( int i = 0 ; i < numElements; ++i ) {
1899 memcpy(&value, pTagData, sizeof(int32_t));
1900 pTagData += sizeof(int32_t);
1901 destination.push_back(value);
1909 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
1910 \brief Retrieves the numeric array data associated with a BAM tag
1912 \param tag 2-character tag name
1913 \param destination destination for retrieved data
1915 \return \c true if found
1917 bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
1919 // make sure tag data exists
1920 if ( SupportData.HasCoreOnly || TagData.empty() )
1923 // localize the tag data
1924 char* pTagData = (char*)TagData.data();
1925 const unsigned int tagDataLength = TagData.size();
1926 unsigned int numBytesParsed = 0;
1928 // return false if tag not found
1929 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1932 // check that tag is array type
1933 const char tagType = *(pTagData - 1);
1934 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1935 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1936 << tag << " in array destination" << endl;
1940 // calculate length of each element in tag's array
1941 const char elementType = *pTagData;
1943 int elementLength = 0;
1944 switch ( elementType ) {
1945 case (Constants::BAM_TAG_TYPE_ASCII) :
1946 case (Constants::BAM_TAG_TYPE_INT8) :
1947 case (Constants::BAM_TAG_TYPE_UINT8) :
1948 elementLength = sizeof(uint8_t);
1951 case (Constants::BAM_TAG_TYPE_INT16) :
1952 case (Constants::BAM_TAG_TYPE_UINT16) :
1953 elementLength = sizeof(uint16_t);
1956 case (Constants::BAM_TAG_TYPE_INT32) :
1957 case (Constants::BAM_TAG_TYPE_UINT32) :
1958 case (Constants::BAM_TAG_TYPE_FLOAT) :
1959 elementLength = sizeof(uint32_t);
1962 // unsupported type for float destination (var-length data)
1963 case (Constants::BAM_TAG_TYPE_STRING) :
1964 case (Constants::BAM_TAG_TYPE_HEX) :
1965 case (Constants::BAM_TAG_TYPE_ARRAY) :
1966 cerr << "BamAlignment ERROR: array element type: " << elementType
1967 << " cannot be stored in float value" << endl;
1972 cerr << "BamAlignment ERROR: unknown element type encountered: "
1973 << elementType << endl;
1977 // get number of elements
1978 int32_t numElements;
1979 memcpy(&numElements, pTagData, sizeof(int32_t));
1981 destination.clear();
1982 destination.reserve(numElements);
1986 for ( int i = 0 ; i < numElements; ++i ) {
1987 memcpy(&value, pTagData, sizeof(float));
1988 pTagData += sizeof(float);
1989 destination.push_back(value);
1996 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
1997 \brief Retrieves the BAM tag type-code associated with requested tag name.
1999 \param tag 2-character tag name
2000 \param type destination for the retrieved (1-character) tag type
2002 \return \c true if found
2003 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
2005 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
2007 // make sure tag data exists
2008 if ( SupportData.HasCoreOnly || TagData.empty() )
2011 // localize the tag data
2012 char* pTagData = (char*)TagData.data();
2013 const unsigned int tagDataLength = TagData.size();
2014 unsigned int numBytesParsed = 0;
2017 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
2019 // retrieve tag type code
2020 type = *(pTagData - 1);
2022 // validate that type is a proper BAM tag type
2024 case (Constants::BAM_TAG_TYPE_ASCII) :
2025 case (Constants::BAM_TAG_TYPE_INT8) :
2026 case (Constants::BAM_TAG_TYPE_UINT8) :
2027 case (Constants::BAM_TAG_TYPE_INT16) :
2028 case (Constants::BAM_TAG_TYPE_UINT16) :
2029 case (Constants::BAM_TAG_TYPE_INT32) :
2030 case (Constants::BAM_TAG_TYPE_UINT32) :
2031 case (Constants::BAM_TAG_TYPE_FLOAT) :
2032 case (Constants::BAM_TAG_TYPE_STRING) :
2033 case (Constants::BAM_TAG_TYPE_HEX) :
2034 case (Constants::BAM_TAG_TYPE_ARRAY) :
2039 cerr << "BamAlignment ERROR: unknown tag type encountered: "
2045 // tag not found, return failure
2049 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2050 \brief Returns true if alignment has a record for requested tag.
2051 \param tag 2-character tag name
2052 \return \c true if alignment has a record for tag
2054 bool BamAlignment::HasTag(const std::string& tag) const {
2056 // return false if no tag data present
2057 if ( SupportData.HasCoreOnly || TagData.empty() )
2060 // localize the tag data for lookup
2061 char* pTagData = (char*)TagData.data();
2062 const unsigned int tagDataLength = TagData.size();
2063 unsigned int numBytesParsed = 0;
2065 // if result of tag lookup
2066 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2069 /*! \fn bool BamAlignment::IsDuplicate(void) const
2070 \return \c true if this read is a PCR duplicate
2072 bool BamAlignment::IsDuplicate(void) const {
2073 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2076 /*! \fn bool BamAlignment::IsFailedQC(void) const
2077 \return \c true if this read failed quality control
2079 bool BamAlignment::IsFailedQC(void) const {
2080 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2083 /*! \fn bool BamAlignment::IsFirstMate(void) const
2084 \return \c true if alignment is first mate on paired-end read
2086 bool BamAlignment::IsFirstMate(void) const {
2087 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2090 /*! \fn bool BamAlignment::IsMapped(void) const
2091 \return \c true if alignment is mapped
2093 bool BamAlignment::IsMapped(void) const {
2094 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2097 /*! \fn bool BamAlignment::IsMateMapped(void) const
2098 \return \c true if alignment's mate is mapped
2100 bool BamAlignment::IsMateMapped(void) const {
2101 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2104 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2105 \return \c true if alignment's mate mapped to reverse strand
2107 bool BamAlignment::IsMateReverseStrand(void) const {
2108 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2111 /*! \fn bool BamAlignment::IsPaired(void) const
2112 \return \c true if alignment part of paired-end read
2114 bool BamAlignment::IsPaired(void) const {
2115 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2118 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2119 \return \c true if reported position is primary alignment
2121 bool BamAlignment::IsPrimaryAlignment(void) const {
2122 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2125 /*! \fn bool BamAlignment::IsProperPair(void) const
2126 \return \c true if alignment is part of read that satisfied paired-end resolution
2128 bool BamAlignment::IsProperPair(void) const {
2129 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2132 /*! \fn bool BamAlignment::IsReverseStrand(void) const
2133 \return \c true if alignment mapped to reverse strand
2135 bool BamAlignment::IsReverseStrand(void) const {
2136 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2139 /*! \fn bool BamAlignment::IsSecondMate(void) const
2140 \return \c true if alignment is second mate on read
2142 bool BamAlignment::IsSecondMate(void) const {
2143 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2146 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
2149 Checks that tag name & type strings are expected sizes.
2150 \a tag should have length
2151 \a type should have length 1
2153 \param tag BAM tag name
2154 \param type BAM tag type-code
2156 \return \c true if both \a tag and \a type are correct sizes
2158 bool BamAlignment::IsValidSize(const string& tag, const string& type) const {
2159 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
2160 (type.size() == Constants::BAM_TAG_TYPESIZE);
2163 /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2164 \brief Removes field from BAM tags.
2166 \return \c true if tag was removed successfully (or didn't exist before)
2168 bool BamAlignment::RemoveTag(const std::string& tag) {
2170 // skip if no tag data available
2171 if ( SupportData.HasCoreOnly || TagData.empty() )
2174 // localize the tag data
2175 char* pOriginalTagData = (char*)TagData.data();
2176 char* pTagData = pOriginalTagData;
2177 const unsigned int originalTagDataLength = TagData.size();
2178 unsigned int newTagDataLength = 0;
2179 unsigned int numBytesParsed = 0;
2182 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2184 char* newTagData = new char[originalTagDataLength];
2186 // copy original tag data up til desired tag
2188 numBytesParsed -= 3;
2189 const unsigned int beginningTagDataLength = numBytesParsed;
2190 newTagDataLength += beginningTagDataLength;
2191 memcpy(newTagData, pOriginalTagData, numBytesParsed);
2193 // skip to next tag (if tag for removal is last, return true)
2194 const char* pTagStorageType = pTagData + 2;
2196 numBytesParsed += 3;
2197 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2200 // copy everything from current tag (the next one after tag for removal) to end
2201 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2202 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2203 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2205 // save new tag data
2206 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2208 delete[] newTagData;
2213 // tag not found, no removal - return failure
2217 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2218 \brief Sets value of "PCR duplicate" flag to \a ok.
2220 void BamAlignment::SetIsDuplicate(bool ok) {
2221 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2222 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2225 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2226 \brief Sets "failed quality control" flag to \a ok.
2228 void BamAlignment::SetIsFailedQC(bool ok) {
2229 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2230 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2233 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2234 \brief Sets "alignment is first mate" flag to \a ok.
2236 void BamAlignment::SetIsFirstMate(bool ok) {
2237 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2238 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2241 /*! \fn void BamAlignment::SetIsMapped(bool ok)
2242 \brief Sets "alignment is mapped" flag to \a ok.
2244 void BamAlignment::SetIsMapped(bool ok) {
2245 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2246 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2249 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2250 \brief Sets "alignment's mate is mapped" flag to \a ok.
2252 void BamAlignment::SetIsMateMapped(bool ok) {
2253 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2254 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2257 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2258 \brief Complement of using SetIsMateMapped().
2259 \deprecated For sake of symmetry with the query methods
2260 \sa IsMateMapped(), SetIsMateMapped()
2262 void BamAlignment::SetIsMateUnmapped(bool ok) {
2263 SetIsMateMapped(!ok);
2266 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2267 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2269 void BamAlignment::SetIsMateReverseStrand(bool ok) {
2270 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2271 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2274 /*! \fn void BamAlignment::SetIsPaired(bool ok)
2275 \brief Sets "alignment part of paired-end read" flag to \a ok.
2277 void BamAlignment::SetIsPaired(bool ok) {
2278 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2279 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2282 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2283 \brief Sets "position is primary alignment" flag to \a ok.
2285 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2286 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2287 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2290 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2291 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2293 void BamAlignment::SetIsProperPair(bool ok) {
2294 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2295 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2298 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2299 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2301 void BamAlignment::SetIsReverseStrand(bool ok) {
2302 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2303 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2306 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2307 \brief Complement of using SetIsPrimaryAlignment().
2308 \deprecated For sake of symmetry with the query methods
2309 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2311 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2312 SetIsPrimaryAlignment(!ok);
2315 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2316 \brief Sets "alignment is second mate on read" flag to \a ok.
2318 void BamAlignment::SetIsSecondMate(bool ok) {
2319 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2320 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2323 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2324 \brief Complement of using SetIsMapped().
2325 \deprecated For sake of symmetry with the query methods
2326 \sa IsMapped(), SetIsMapped()
2328 void BamAlignment::SetIsUnmapped(bool ok) {
2332 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
2335 Moves to next available tag in tag data string
2337 \param storageType BAM tag type-code that determines how far to move cursor
2338 \param pTagData pointer to current position (cursor) in tag string
2339 \param numBytesParsed report of how many bytes were parsed (cumulatively)
2341 \return \c if storageType was a recognized BAM tag type
2342 \post \a pTagData will point to the byte where the next tag data begins.
2343 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
2345 bool BamAlignment::SkipToNextTag(const char storageType,
2347 unsigned int& numBytesParsed) const
2349 switch (storageType) {
2351 case (Constants::BAM_TAG_TYPE_ASCII) :
2352 case (Constants::BAM_TAG_TYPE_INT8) :
2353 case (Constants::BAM_TAG_TYPE_UINT8) :
2358 case (Constants::BAM_TAG_TYPE_INT16) :
2359 case (Constants::BAM_TAG_TYPE_UINT16) :
2360 numBytesParsed += sizeof(uint16_t);
2361 pTagData += sizeof(uint16_t);
2364 case (Constants::BAM_TAG_TYPE_FLOAT) :
2365 case (Constants::BAM_TAG_TYPE_INT32) :
2366 case (Constants::BAM_TAG_TYPE_UINT32) :
2367 numBytesParsed += sizeof(uint32_t);
2368 pTagData += sizeof(uint32_t);
2371 case (Constants::BAM_TAG_TYPE_STRING) :
2372 case (Constants::BAM_TAG_TYPE_HEX) :
2373 while( *pTagData ) {
2377 // increment for null-terminator
2382 case (Constants::BAM_TAG_TYPE_ARRAY) :
2386 const char arrayType = *pTagData;
2390 // read number of elements
2391 int32_t numElements;
2392 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
2393 numBytesParsed += sizeof(uint32_t);
2394 pTagData += sizeof(uint32_t);
2396 // calculate number of bytes to skip
2397 int bytesToSkip = 0;
2398 switch (arrayType) {
2399 case (Constants::BAM_TAG_TYPE_INT8) :
2400 case (Constants::BAM_TAG_TYPE_UINT8) :
2401 bytesToSkip = numElements;
2403 case (Constants::BAM_TAG_TYPE_INT16) :
2404 case (Constants::BAM_TAG_TYPE_UINT16) :
2405 bytesToSkip = numElements*sizeof(uint16_t);
2407 case (Constants::BAM_TAG_TYPE_FLOAT) :
2408 case (Constants::BAM_TAG_TYPE_INT32) :
2409 case (Constants::BAM_TAG_TYPE_UINT32) :
2410 bytesToSkip = numElements*sizeof(uint32_t);
2413 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
2414 << arrayType << endl;
2418 // skip binary array contents
2419 numBytesParsed += bytesToSkip;
2420 pTagData += bytesToSkip;
2425 cerr << "BamAlignment ERROR: unknown tag type encountered"
2426 << storageType << endl;