1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 #include <api/BamConstants.h>
13 using namespace BamTools;
25 /*! \class BamTools::BamAlignment
26 \brief The main BAM alignment data structure.
28 Provides methods to query/modify BAM alignment data fields.
30 /*! \var BamAlignment::Name
33 /*! \var BamAlignment::Length
34 \brief length of query sequence
36 /*! \var BamAlignment::QueryBases
37 \brief 'original' sequence (as reported from sequencing machine)
39 /*! \var BamAlignment::AlignedBases
40 \brief 'aligned' sequence (includes any indels, padding, clipping)
42 /*! \var BamAlignment::Qualities
43 \brief FASTQ qualities (ASCII characters, not numeric values)
45 /*! \var BamAlignment::TagData
46 \brief tag data (use the provided methods to query/modify)
48 /*! \var BamAlignment::RefID
49 \brief ID number for reference sequence
51 /*! \var BamAlignment::Position
52 \brief position (0-based) where alignment starts
54 /*! \var BamAlignment::Bin
55 \brief BAM (standard) index bin number for this alignment
57 /*! \var BamAlignment::MapQuality
58 \brief mapping quality score
60 /*! \var BamAlignment::AlignmentFlag
61 \brief alignment bit-flag (use the provided methods to query/modify)
63 /*! \var BamAlignment::CigarData
64 \brief CIGAR operations for this alignment
66 /*! \var BamAlignment::MateRefID
67 \brief ID number for reference sequence where alignment's mate was aligned
69 /*! \var BamAlignment::MatePosition
70 \brief position (0-based) where alignment's mate starts
72 /*! \var BamAlignment::InsertSize
73 \brief mate-pair insert size
75 /*! \var BamAlignment::Filename
76 \brief name of BAM file which this alignment comes from
79 /*! \fn BamAlignment::BamAlignment(void)
82 BamAlignment::BamAlignment(void)
90 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
91 \brief copy constructor
93 BamAlignment::BamAlignment(const BamAlignment& other)
95 , Length(other.Length)
96 , QueryBases(other.QueryBases)
97 , AlignedBases(other.AlignedBases)
98 , Qualities(other.Qualities)
99 , TagData(other.TagData)
101 , Position(other.Position)
103 , MapQuality(other.MapQuality)
104 , AlignmentFlag(other.AlignmentFlag)
105 , CigarData(other.CigarData)
106 , MateRefID(other.MateRefID)
107 , MatePosition(other.MatePosition)
108 , InsertSize(other.InsertSize)
109 , Filename(other.Filename)
110 , SupportData(other.SupportData)
113 /*! \fn BamAlignment::~BamAlignment(void)
116 BamAlignment::~BamAlignment(void) { }
118 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
119 \brief Adds a field with string data to the BAM tags.
121 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
123 \param tag 2-character tag name
124 \param type 1-character tag type (must be "Z" or "H")
125 \param value string data to store
127 \return \c true if the \b new tag was added successfully
128 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
130 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
132 // skip if core data not parsed
133 if ( SupportData.HasCoreOnly ) return false;
135 // validate tag/type size & that type is OK for string value
136 if ( !IsValidSize(tag, type) ) return false;
137 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
138 type.at(0) != Constants::BAM_TAG_TYPE_HEX
144 // localize the tag data
145 char* pTagData = (char*)TagData.data();
146 const unsigned int tagDataLength = TagData.size();
147 unsigned int numBytesParsed = 0;
149 // if tag already exists, return false
150 // use EditTag explicitly instead
151 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
154 // otherwise, copy tag data to temp buffer
155 string newTag = tag + type + value;
156 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
157 char* originalTagData = new char[newTagDataLength];
158 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
161 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
163 // store temp buffer back in TagData
164 const char* newTagData = (const char*)originalTagData;
165 TagData.assign(newTagData, newTagDataLength);
167 delete[] originalTagData;
173 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
174 \brief Adds a field with unsigned integer data to the BAM tags.
176 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
178 \param tag 2-character tag name
179 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
180 \param value unsigned int data to store
182 \return \c true if the \b new tag was added successfully
183 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
185 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
187 // skip if core data not parsed
188 if ( SupportData.HasCoreOnly ) return false;
190 // validate tag/type size & that type is OK for uint32_t value
191 if ( !IsValidSize(tag, type) ) return false;
192 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
193 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
194 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
195 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
201 // localize the tag data
202 char* pTagData = (char*)TagData.data();
203 const unsigned int tagDataLength = TagData.size();
204 unsigned int numBytesParsed = 0;
206 // if tag already exists, return false
207 // use EditTag explicitly instead
208 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
211 // otherwise, convert value to string
212 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
215 // copy original tag data to temp buffer
216 string newTag = tag + type;
217 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
218 char* originalTagData = new char[newTagDataLength];
219 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
222 strcat(originalTagData + tagDataLength, newTag.data());
223 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
225 // store temp buffer back in TagData
226 const char* newTagData = (const char*)originalTagData;
227 TagData.assign(newTagData, newTagDataLength);
228 delete[] originalTagData;
234 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
235 \brief Adds a field with signed integer data to the BAM tags.
237 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
239 \param tag 2-character tag name
240 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
241 \param value signed int data to store
243 \return \c true if the \b new tag was added successfully
244 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
246 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
247 return AddTag(tag, type, (const uint32_t&)value);
250 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
251 \brief Adds a field with floating-point data to the BAM tags.
253 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
255 \param tag 2-character tag name
256 \param type 1-character tag type (must NOT be "Z", "H", or "B")
257 \param value float data to store
259 \return \c true if the \b new tag was added successfully
260 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
262 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
264 // skip if core data not parsed
265 if ( SupportData.HasCoreOnly ) return false;
267 // validate tag/type size & that type is OK for float value
268 if ( !IsValidSize(tag, type) ) return false;
269 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
270 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
271 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
277 // localize the tag data
278 char* pTagData = (char*)TagData.data();
279 const unsigned int tagDataLength = TagData.size();
280 unsigned int numBytesParsed = 0;
282 // if tag already exists, return false
283 // use EditTag explicitly instead
284 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
287 // otherwise, convert value to string
288 union { float value; char valueBuffer[sizeof(float)]; } un;
291 // copy original tag data to temp buffer
292 string newTag = tag + type;
293 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
294 char* originalTagData = new char[newTagDataLength];
295 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
298 strcat(originalTagData + tagDataLength, newTag.data());
299 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
301 // store temp buffer back in TagData
302 const char* newTagData = (const char*)originalTagData;
303 TagData.assign(newTagData, newTagDataLength);
305 delete[] originalTagData;
311 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
312 \brief Adds a numeric array field to the BAM tags.
314 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
316 \param tag 2-character tag name
317 \param values vector of uint8_t values to store
319 \return \c true if the \b new tag was added successfully
320 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
322 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
324 // skip if core data not parsed
325 if ( SupportData.HasCoreOnly ) return false;
327 // check for valid tag length
328 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
330 // localize the tag data
331 char* pTagData = (char*)TagData.data();
332 const unsigned int tagDataLength = TagData.size();
333 unsigned int numBytesParsed = 0;
335 // if tag already exists, return false
336 // use EditTag explicitly instead
337 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
340 // build new tag's base information
341 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
342 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
343 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
344 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
346 // add number of array elements to newTagBase
347 const int32_t numElements = values.size();
348 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
350 // copy current TagData string to temp buffer, leaving room for new tag's contents
351 const int newTagDataLength = tagDataLength +
352 Constants::BAM_TAG_ARRAYBASE_SIZE +
353 numElements*sizeof(uint8_t);
354 char* originalTagData = new char[newTagDataLength];
355 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
357 // write newTagBase (removes old null term)
358 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
360 // add vector elements to tag
361 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
362 for ( int i = 0 ; i < numElements; ++i ) {
363 const uint8_t value = values.at(i);
364 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
365 &value, sizeof(uint8_t));
368 // store temp buffer back in TagData
369 const char* newTagData = (const char*)originalTagData;
370 TagData.assign(newTagData, newTagDataLength);
372 delete[] originalTagData;
378 /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
379 \brief Adds a numeric array field to the BAM tags.
381 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
383 \param tag 2-character tag name
384 \param values vector of int8_t values to store
386 \return \c true if the \b new tag was added successfully
387 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
389 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
391 // skip if core data not parsed
392 if ( SupportData.HasCoreOnly ) return false;
394 // check for valid tag length
395 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
397 // localize the tag data
398 char* pTagData = (char*)TagData.data();
399 const unsigned int tagDataLength = TagData.size();
400 unsigned int numBytesParsed = 0;
402 // if tag already exists, return false
403 // use EditTag explicitly instead
404 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
407 // build new tag's base information
408 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
409 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
410 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
411 newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
413 // add number of array elements to newTagBase
414 const int32_t numElements = values.size();
415 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
417 // copy current TagData string to temp buffer, leaving room for new tag's contents
418 const int newTagDataLength = tagDataLength +
419 Constants::BAM_TAG_ARRAYBASE_SIZE +
420 numElements*sizeof(int8_t);
421 char* originalTagData = new char[newTagDataLength];
422 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
424 // write newTagBase (removes old null term)
425 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
427 // add vector elements to tag
428 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
429 for ( int i = 0 ; i < numElements; ++i ) {
430 const int8_t value = values.at(i);
431 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
432 &value, sizeof(int8_t));
435 // store temp buffer back in TagData
436 const char* newTagData = (const char*)originalTagData;
437 TagData.assign(newTagData, newTagDataLength);
439 delete[] originalTagData;
445 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
446 \brief Adds a numeric array field to the BAM tags.
448 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
450 \param tag 2-character tag name
451 \param values vector of uint16_t values to store
453 \return \c true if the \b new tag was added successfully
454 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
456 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
458 // skip if core data not parsed
459 if ( SupportData.HasCoreOnly ) return false;
461 // check for valid tag length
462 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
464 // localize the tag data
465 char* pTagData = (char*)TagData.data();
466 const unsigned int tagDataLength = TagData.size();
467 unsigned int numBytesParsed = 0;
469 // if tag already exists, return false
470 // use EditTag explicitly instead
471 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
474 // build new tag's base information
475 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
476 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
477 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
478 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
480 // add number of array elements to newTagBase
481 const int32_t numElements = values.size();
482 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
484 // copy current TagData string to temp buffer, leaving room for new tag's contents
485 const int newTagDataLength = tagDataLength +
486 Constants::BAM_TAG_ARRAYBASE_SIZE +
487 numElements*sizeof(uint16_t);
488 char* originalTagData = new char[newTagDataLength];
489 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
491 // write newTagBase (removes old null term)
492 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
494 // add vector elements to tag
495 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
496 for ( int i = 0 ; i < numElements; ++i ) {
497 const uint16_t value = values.at(i);
498 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
499 &value, sizeof(uint16_t));
502 // store temp buffer back in TagData
503 const char* newTagData = (const char*)originalTagData;
504 TagData.assign(newTagData, newTagDataLength);
506 delete[] originalTagData;
512 /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
513 \brief Adds a numeric array field to the BAM tags.
515 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
517 \param tag 2-character tag name
518 \param values vector of int16_t values to store
520 \return \c true if the \b new tag was added successfully
521 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
523 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
525 // skip if core data not parsed
526 if ( SupportData.HasCoreOnly ) return false;
528 // check for valid tag length
529 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
531 // localize the tag data
532 char* pTagData = (char*)TagData.data();
533 const unsigned int tagDataLength = TagData.size();
534 unsigned int numBytesParsed = 0;
536 // if tag already exists, return false
537 // use EditTag explicitly instead
538 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
541 // build new tag's base information
542 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
543 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
544 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
545 newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
547 // add number of array elements to newTagBase
548 const int32_t numElements = values.size();
549 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
551 // copy current TagData string to temp buffer, leaving room for new tag's contents
552 const int newTagDataLength = tagDataLength +
553 Constants::BAM_TAG_ARRAYBASE_SIZE +
554 numElements*sizeof(int16_t);
555 char* originalTagData = new char[newTagDataLength];
556 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
558 // write newTagBase (removes old null term)
559 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
561 // add vector elements to tag
562 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
563 for ( int i = 0 ; i < numElements; ++i ) {
564 const int16_t value = values.at(i);
565 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
566 &value, sizeof(int16_t));
569 // store temp buffer back in TagData
570 const char* newTagData = (const char*)originalTagData;
571 TagData.assign(newTagData, newTagDataLength);
573 delete[] originalTagData;
579 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
580 \brief Adds a numeric array field to the BAM tags.
582 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
584 \param tag 2-character tag name
585 \param values vector of uint32_t values to store
587 \return \c true if the \b new tag was added successfully
588 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
590 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
592 // skip if core data not parsed
593 if ( SupportData.HasCoreOnly ) return false;
595 // check for valid tag length
596 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
598 // localize the tag data
599 char* pTagData = (char*)TagData.data();
600 const unsigned int tagDataLength = TagData.size();
601 unsigned int numBytesParsed = 0;
603 // if tag already exists, return false
604 // use EditTag explicitly instead
605 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
608 // build new tag's base information
609 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
610 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
611 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
612 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
614 // add number of array elements to newTagBase
615 const int32_t numElements = values.size();
616 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
618 // copy current TagData string to temp buffer, leaving room for new tag's contents
619 const int newTagDataLength = tagDataLength +
620 Constants::BAM_TAG_ARRAYBASE_SIZE +
621 numElements*sizeof(uint32_t);
622 char* originalTagData = new char[newTagDataLength];
623 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
625 // write newTagBase (removes old null term)
626 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
628 // add vector elements to tag
629 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
630 for ( int i = 0 ; i < numElements; ++i ) {
631 const uint32_t value = values.at(i);
632 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
633 &value, sizeof(uint32_t));
636 // store temp buffer back in TagData
637 const char* newTagData = (const char*)originalTagData;
638 TagData.assign(newTagData, newTagDataLength);
640 delete[] originalTagData;
646 /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
647 \brief Adds a numeric array field to the BAM tags.
649 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
651 \param tag 2-character tag name
652 \param values vector of int32_t values to store
654 \return \c true if the \b new tag was added successfully
655 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
657 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
659 // skip if core data not parsed
660 if ( SupportData.HasCoreOnly ) return false;
662 // check for valid tag length
663 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
665 // localize the tag data
666 char* pTagData = (char*)TagData.data();
667 const unsigned int tagDataLength = TagData.size();
668 unsigned int numBytesParsed = 0;
670 // if tag already exists, return false
671 // use EditTag explicitly instead
672 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
675 // build new tag's base information
676 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
677 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
678 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
679 newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
681 // add number of array elements to newTagBase
682 const int32_t numElements = values.size();
683 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
685 // copy current TagData string to temp buffer, leaving room for new tag's contents
686 const int newTagDataLength = tagDataLength +
687 Constants::BAM_TAG_ARRAYBASE_SIZE +
688 numElements*sizeof(int32_t);
689 char* originalTagData = new char[newTagDataLength];
690 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
692 // write newTagBase (removes old null term)
693 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
695 // add vector elements to tag
696 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
697 for ( int i = 0 ; i < numElements; ++i ) {
698 const int32_t value = values.at(i);
699 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
700 &value, sizeof(int32_t));
703 // store temp buffer back in TagData
704 const char* newTagData = (const char*)originalTagData;
705 TagData.assign(newTagData, newTagDataLength);
707 delete[] originalTagData;
713 /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
714 \brief Adds a numeric array field to the BAM tags.
716 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
718 \param tag 2-character tag name
719 \param values vector of float values to store
721 \return \c true if the \b new tag was added successfully
722 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
724 bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
726 // skip if core data not parsed
727 if ( SupportData.HasCoreOnly ) return false;
729 // check for valid tag length
730 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
732 // localize the tag data
733 char* pTagData = (char*)TagData.data();
734 const unsigned int tagDataLength = TagData.size();
735 unsigned int numBytesParsed = 0;
737 // if tag already exists, return false
738 // use EditTag explicitly instead
739 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
742 // build new tag's base information
743 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
744 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
745 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
746 newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
748 // add number of array elements to newTagBase
749 const int32_t numElements = values.size();
750 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
752 // copy current TagData string to temp buffer, leaving room for new tag's contents
753 const int newTagDataLength = tagDataLength +
754 Constants::BAM_TAG_ARRAYBASE_SIZE +
755 numElements*sizeof(float);
756 char* originalTagData = new char[newTagDataLength];
757 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
759 // write newTagBase (removes old null term)
760 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
762 // add vector elements to tag
763 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
764 for ( int i = 0 ; i < numElements; ++i ) {
765 const float value = values.at(i);
766 memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
767 &value, sizeof(float));
770 // store temp buffer back in TagData
771 const char* newTagData = (const char*)originalTagData;
772 TagData.assign(newTagData, newTagDataLength);
774 delete[] originalTagData;
780 /*! \fn bool BamAlignment::BuildCharData(void)
781 \brief Populates alignment string fields (read name, bases, qualities, tag data).
783 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
784 Using that method makes parsing much quicker when only positional data is required.
786 However, if you later want to access the character data fields from such an alignment,
787 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
790 \return \c true if character data populated successfully (or was already available to begin with)
792 bool BamAlignment::BuildCharData(void) {
794 // skip if char data already parsed
795 if ( !SupportData.HasCoreOnly )
798 // check system endianness
799 bool IsBigEndian = BamTools::SystemIsBigEndian();
801 // calculate character lengths/offsets
802 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
803 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
804 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
805 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
806 const unsigned int tagDataLength = dataLength - tagDataOffset;
808 // check offsets to see what char data exists
809 const bool hasSeqData = ( seqDataOffset < dataLength );
810 const bool hasQualData = ( qualDataOffset < dataLength );
811 const bool hasTagData = ( tagDataOffset < dataLength );
813 // set up char buffers
814 const char* allCharData = SupportData.AllCharData.data();
815 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
816 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
817 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
819 // store alignment name (relies on null char in name as terminator)
820 Name.assign((const char*)(allCharData));
822 // save query sequence
825 QueryBases.reserve(SupportData.QuerySequenceLength);
826 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
827 char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
828 QueryBases.append(1, singleBase);
832 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
835 Qualities.reserve(SupportData.QuerySequenceLength);
836 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
837 char singleQuality = (char)(qualData[i]+33);
838 Qualities.append(1, singleQuality);
842 // clear previous AlignedBases
843 AlignedBases.clear();
845 // if QueryBases has data, build AlignedBases using CIGAR data
846 // otherwise, AlignedBases will remain empty (this case IS allowed)
847 if ( !QueryBases.empty() ) {
849 // resize AlignedBases
850 AlignedBases.reserve(SupportData.QuerySequenceLength);
852 // iterate over CigarOps
854 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
855 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
856 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
857 const CigarOp& op = (*cigarIter);
861 // for 'M', 'I', '=', 'X' - write bases
862 case (Constants::BAM_CIGAR_MATCH_CHAR) :
863 case (Constants::BAM_CIGAR_INS_CHAR) :
864 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
865 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
866 AlignedBases.append(QueryBases.substr(k, op.Length));
869 // for 'S' - soft clip, do not write bases
870 // but increment placeholder 'k'
871 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
875 // for 'D' - write gap character
876 case (Constants::BAM_CIGAR_DEL_CHAR) :
877 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
880 // for 'P' - write padding character
881 case (Constants::BAM_CIGAR_PAD_CHAR) :
882 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
885 // for 'N' - write N's, skip bases in original query sequence
886 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
887 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
890 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
891 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
894 // shouldn't get here
896 cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
908 while ( (unsigned int)i < tagDataLength ) {
910 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
911 const char type = tagData[i]; // get tag type at position i
912 ++i; // move i past tag type
916 case(Constants::BAM_TAG_TYPE_ASCII) :
917 case(Constants::BAM_TAG_TYPE_INT8) :
918 case(Constants::BAM_TAG_TYPE_UINT8) :
919 // no endian swapping necessary for single-byte data
923 case(Constants::BAM_TAG_TYPE_INT16) :
924 case(Constants::BAM_TAG_TYPE_UINT16) :
925 BamTools::SwapEndian_16p(&tagData[i]);
926 i += sizeof(uint16_t);
929 case(Constants::BAM_TAG_TYPE_FLOAT) :
930 case(Constants::BAM_TAG_TYPE_INT32) :
931 case(Constants::BAM_TAG_TYPE_UINT32) :
932 BamTools::SwapEndian_32p(&tagData[i]);
933 i += sizeof(uint32_t);
936 case(Constants::BAM_TAG_TYPE_HEX) :
937 case(Constants::BAM_TAG_TYPE_STRING) :
938 // no endian swapping necessary for hex-string/string data
941 // increment one more for null terminator
945 case(Constants::BAM_TAG_TYPE_ARRAY) :
949 const char arrayType = tagData[i];
952 // swap endian-ness of number of elements in place, then retrieve for loop
953 BamTools::SwapEndian_32p(&tagData[i]);
955 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
956 i += sizeof(uint32_t);
958 // swap endian-ness of array elements
959 for ( int j = 0; j < numElements; ++j ) {
961 case (Constants::BAM_TAG_TYPE_INT8) :
962 case (Constants::BAM_TAG_TYPE_UINT8) :
963 // no endian-swapping necessary
966 case (Constants::BAM_TAG_TYPE_INT16) :
967 case (Constants::BAM_TAG_TYPE_UINT16) :
968 BamTools::SwapEndian_16p(&tagData[i]);
969 i += sizeof(uint16_t);
971 case (Constants::BAM_TAG_TYPE_FLOAT) :
972 case (Constants::BAM_TAG_TYPE_INT32) :
973 case (Constants::BAM_TAG_TYPE_UINT32) :
974 BamTools::SwapEndian_32p(&tagData[i]);
975 i += sizeof(uint32_t);
979 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
980 << arrayType << endl;
988 // shouldn't get here
990 cerr << "BamAlignment ERROR: invalid tag value type: "
997 // store tagData in alignment
998 TagData.resize(tagDataLength);
999 memcpy((char*)TagData.data(), tagData, tagDataLength);
1002 // clear the core-only flag
1003 SupportData.HasCoreOnly = false;
1009 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
1010 \brief Edits a BAM tag field containing string data.
1012 If \a tag does not exist, a new entry is created.
1014 \param tag 2-character tag name
1015 \param type 1-character tag type (must be "Z" or "H")
1016 \param value string data to store
1018 \return \c true if the tag was modified/created successfully
1020 \sa BamAlignment::RemoveTag()
1021 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1023 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1025 // skip if core data not parsed
1026 if ( SupportData.HasCoreOnly ) return false;
1028 // validate tag/type size & that type is OK for string value
1029 if ( !IsValidSize(tag, type) ) return false;
1030 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1031 type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1034 // localize the tag data
1035 char* pOriginalTagData = (char*)TagData.data();
1036 char* pTagData = pOriginalTagData;
1037 const unsigned int originalTagDataLength = TagData.size();
1039 unsigned int newTagDataLength = 0;
1040 unsigned int numBytesParsed = 0;
1043 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1045 // make sure array is more than big enough
1046 char* newTagData = new char[originalTagDataLength + value.size()];
1048 // copy original tag data up til desired tag
1049 const unsigned int beginningTagDataLength = numBytesParsed;
1050 newTagDataLength += beginningTagDataLength;
1051 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1053 // copy new @value in place of current tag data
1054 const unsigned int dataLength = strlen(value.c_str());
1055 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1057 // skip to next tag (if tag for removal is last, return true)
1058 const char* pTagStorageType = pTagData - 1;
1059 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1062 // copy everything from current tag (the next one after tag for removal) to end
1063 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1064 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1065 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1066 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1068 // ensure null-terminator
1069 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1071 // save new tag data
1072 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1074 delete[] newTagData;
1079 // tag not found, attempt AddTag
1080 else return AddTag(tag, type, value);
1083 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1084 \brief Edits a BAM tag field containing unsigned integer data.
1086 If \a tag does not exist, a new entry is created.
1088 \param tag 2-character tag name
1089 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1090 \param value unsigned integer data to store
1092 \return \c true if the tag was modified/created successfully
1094 \sa BamAlignment::RemoveTag()
1095 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1097 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1099 // skip if core data not parsed
1100 if ( SupportData.HasCoreOnly ) return false;
1102 // validate tag/type size & that type is OK for uint32_t value
1103 if ( !IsValidSize(tag, type) ) return false;
1104 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1105 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1106 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1107 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1113 // localize the tag data
1114 char* pOriginalTagData = (char*)TagData.data();
1115 char* pTagData = pOriginalTagData;
1116 const unsigned int originalTagDataLength = TagData.size();
1118 unsigned int newTagDataLength = 0;
1119 unsigned int numBytesParsed = 0;
1122 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1124 // make sure array is more than big enough
1125 char* newTagData = new char[originalTagDataLength + sizeof(value)];
1127 // copy original tag data up til desired tag
1128 const unsigned int beginningTagDataLength = numBytesParsed;
1129 newTagDataLength += beginningTagDataLength;
1130 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1132 // copy new @value in place of current tag data
1133 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1135 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1137 // skip to next tag (if tag for removal is last, return true)
1138 const char* pTagStorageType = pTagData - 1;
1139 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1142 // copy everything from current tag (the next one after tag for removal) to end
1143 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1144 const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1145 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1146 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1148 // ensure null-terminator
1149 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1151 // save new tag data
1152 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1154 delete[] newTagData;
1159 // tag not found, attempt AddTag
1160 else return AddTag(tag, type, value);
1163 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1164 \brief Edits a BAM tag field containing signed integer data.
1166 If \a tag does not exist, a new entry is created.
1168 \param tag 2-character tag name
1169 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1170 \param value signed integer data to store
1172 \return \c true if the tag was modified/created successfully
1174 \sa BamAlignment::RemoveTag()
1175 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1177 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1178 return EditTag(tag, type, (const uint32_t&)value);
1181 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1182 \brief Edits a BAM tag field containing floating-point data.
1184 If \a tag does not exist, a new entry is created.
1186 \param tag 2-character tag name
1187 \param type 1-character tag type (must NOT be "Z", "H", or "B")
1188 \param value float data to store
1190 \return \c true if the tag was modified/created successfully
1192 \sa BamAlignment::RemoveTag()
1193 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1195 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1197 // skip if core data not parsed
1198 if ( SupportData.HasCoreOnly ) return false;
1200 // validate tag/type size & that type is OK for float value
1201 if ( !IsValidSize(tag, type) ) return false;
1202 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1203 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1204 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1210 // localize the tag data
1211 char* pOriginalTagData = (char*)TagData.data();
1212 char* pTagData = pOriginalTagData;
1213 const unsigned int originalTagDataLength = TagData.size();
1215 unsigned int newTagDataLength = 0;
1216 unsigned int numBytesParsed = 0;
1219 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1221 // make sure array is more than big enough
1222 char* newTagData = new char[originalTagDataLength + sizeof(value)];
1224 // copy original tag data up til desired tag
1225 const unsigned int beginningTagDataLength = numBytesParsed;
1226 newTagDataLength += beginningTagDataLength;
1227 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1229 // copy new @value in place of current tag data
1230 union { float value; char valueBuffer[sizeof(float)]; } un;
1232 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1234 // skip to next tag (if tag for removal is last, return true)
1235 const char* pTagStorageType = pTagData - 1;
1236 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1239 // copy everything from current tag (the next one after tag for removal) to end
1240 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1241 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1242 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1243 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1245 // ensure null-terminator
1246 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1248 // save new tag data
1249 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1251 delete[] newTagData;
1256 // tag not found, attempt AddTag
1257 else return AddTag(tag, type, value);
1260 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1261 \brief Edits a BAM tag field containing a numeric array.
1263 If \a tag does not exist, a new entry is created.
1265 \param tag 2-character tag name
1266 \param value vector of uint8_t values to store
1268 \return \c true if the tag was modified/created successfully
1269 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1271 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1273 // can't do anything if TagData not parsed
1274 if ( SupportData.HasCoreOnly )
1277 // remove existing tag if present
1281 // add tag record with new values
1282 return AddTag(tag, values);
1285 /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1286 \brief Edits a BAM tag field containing a numeric array.
1288 If \a tag does not exist, a new entry is created.
1290 \param tag 2-character tag name
1291 \param value vector of int8_t values to store
1293 \return \c true if the tag was modified/created successfully
1294 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1296 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1298 // can't do anything if TagData not parsed
1299 if ( SupportData.HasCoreOnly )
1302 // remove existing tag if present
1306 // add tag record with new values
1307 return AddTag(tag, values);
1310 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1311 \brief Edits a BAM tag field containing a numeric array.
1313 If \a tag does not exist, a new entry is created.
1315 \param tag 2-character tag name
1316 \param value vector of uint16_t values to store
1318 \return \c true if the tag was modified/created successfully
1319 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1321 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1323 // can't do anything if TagData not parsed
1324 if ( SupportData.HasCoreOnly )
1327 // remove existing tag if present
1331 // add tag record with new values
1332 return AddTag(tag, values);
1335 /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1336 \brief Edits a BAM tag field containing a numeric array.
1338 If \a tag does not exist, a new entry is created.
1340 \param tag 2-character tag name
1341 \param value vector of int16_t values to store
1343 \return \c true if the tag was modified/created successfully
1344 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1346 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1348 // can't do anything if TagData not parsed
1349 if ( SupportData.HasCoreOnly )
1352 // remove existing tag if present
1356 // add tag record with new values
1357 return AddTag(tag, values);
1360 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1361 \brief Edits a BAM tag field containing a numeric array.
1363 If \a tag does not exist, a new entry is created.
1365 \param tag 2-character tag name
1366 \param value vector of uint32_t values to store
1368 \return \c true if the tag was modified/created successfully
1369 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1371 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1373 // can't do anything if TagData not parsed
1374 if ( SupportData.HasCoreOnly )
1377 // remove existing tag if present
1381 // add tag record with new values
1382 return AddTag(tag, values);
1385 /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1386 \brief Edits a BAM tag field containing a numeric array.
1388 If \a tag does not exist, a new entry is created.
1390 \param tag 2-character tag name
1391 \param value vector of int32_t values to store
1393 \return \c true if the tag was modified/created successfully
1394 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1396 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1398 // can't do anything if TagData not parsed
1399 if ( SupportData.HasCoreOnly )
1402 // remove existing tag if present
1406 // add tag record with new values
1407 return AddTag(tag, values);
1410 /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1411 \brief Edits a BAM tag field containing a numeric array.
1413 If \a tag does not exist, a new entry is created.
1415 \param tag 2-character tag name
1416 \param value vector of float values to store
1418 \return \c true if the tag was modified/created successfully
1419 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1421 bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1423 // can't do anything if TagData not parsed
1424 if ( SupportData.HasCoreOnly )
1427 // remove existing tag if present
1431 // add tag record with new values
1432 return AddTag(tag, values);
1435 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
1438 Searches for requested tag in BAM tag data.
1440 \param tag requested 2-character tag name
1441 \param pTagData pointer to current position in BamAlignment::TagData
1442 \param tagDataLength length of BamAlignment::TagData
1443 \param numBytesParsed number of bytes parsed so far
1445 \return \c true if found
1447 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
1448 \a numBytesParsed will correspond to the position in the full TagData string.
1451 bool BamAlignment::FindTag(const std::string& tag,
1453 const unsigned int& tagDataLength,
1454 unsigned int& numBytesParsed) const
1457 while ( numBytesParsed < tagDataLength ) {
1459 const char* pTagType = pTagData;
1460 const char* pTagStorageType = pTagData + 2;
1462 numBytesParsed += 3;
1464 // check the current tag, return true on match
1465 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
1468 // get the storage class and find the next tag
1469 if ( *pTagStorageType == '\0' ) return false;
1470 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
1471 if ( *pTagData == '\0' ) return false;
1474 // checked all tags, none match
1478 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1479 \brief Retrieves value of edit distance tag ("NM").
1481 \deprecated Instead use BamAlignment::GetTag()
1483 BamAlignment::GetTag("NM", editDistance);
1486 \param editDistance destination for retrieved value
1488 \return \c true if found
1490 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1491 return GetTag("NM", (uint32_t&)editDistance);
1494 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1495 \brief Calculates alignment end position, based on starting position and CIGAR data.
1497 \param usePadded Inserted bases affect reported position. Default is false, so that reported
1498 position stays 'sync-ed' with reference coordinates.
1499 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1500 when using BAM data with half-open formats (e.g. BED).
1502 \return alignment end position
1504 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1506 // initialize alignment end to starting position
1507 int alignEnd = Position;
1509 // iterate over cigar operations
1510 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1511 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1512 for ( ; cigarIter != cigarEnd; ++cigarIter) {
1513 const char cigarType = (*cigarIter).Type;
1514 const uint32_t& cigarLength = (*cigarIter).Length;
1516 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1517 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1518 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1519 alignEnd += cigarLength;
1520 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1521 alignEnd += cigarLength;
1524 // adjust for zero-based coordinates, if requested
1525 if ( zeroBased ) alignEnd -= 1;
1531 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1532 \brief Retrieves value of read group tag ("RG").
1534 \deprecated Instead use BamAlignment::GetTag()
1536 BamAlignment::GetTag("RG", readGroup);
1539 \param readGroup destination for retrieved value
1541 \return \c true if found
1543 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1544 return GetTag("RG", readGroup);
1547 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1548 \brief Retrieves the string value associated with a BAM tag.
1550 \param tag 2-character tag name
1551 \param destination destination for retrieved value
1553 \return \c true if found
1555 bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1557 // make sure tag data exists
1558 if ( SupportData.HasCoreOnly || TagData.empty() )
1561 // localize the tag data
1562 char* pTagData = (char*)TagData.data();
1563 const unsigned int tagDataLength = TagData.size();
1564 unsigned int numBytesParsed = 0;
1567 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1568 const unsigned int dataLength = strlen(pTagData);
1569 destination.clear();
1570 destination.resize(dataLength);
1571 memcpy( (char*)destination.data(), pTagData, dataLength );
1575 // tag not found, return failure
1579 /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1580 \brief Retrieves the unsigned integer value associated with a BAM tag.
1582 \param tag 2-character tag name
1583 \param destination destination for retrieved value
1585 \return \c true if found
1587 bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1589 // make sure tag data exists
1590 if ( SupportData.HasCoreOnly || TagData.empty() )
1593 // localize the tag data
1594 char* pTagData = (char*)TagData.data();
1595 const unsigned int tagDataLength = TagData.size();
1596 unsigned int numBytesParsed = 0;
1599 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1601 // determine data byte-length
1602 const char type = *(pTagData - 1);
1603 int destinationLength = 0;
1607 case (Constants::BAM_TAG_TYPE_ASCII) :
1608 case (Constants::BAM_TAG_TYPE_INT8) :
1609 case (Constants::BAM_TAG_TYPE_UINT8) :
1610 destinationLength = 1;
1614 case (Constants::BAM_TAG_TYPE_INT16) :
1615 case (Constants::BAM_TAG_TYPE_UINT16) :
1616 destinationLength = 2;
1620 case (Constants::BAM_TAG_TYPE_INT32) :
1621 case (Constants::BAM_TAG_TYPE_UINT32) :
1622 destinationLength = 4;
1625 // unsupported type for integer destination (float or var-length strings)
1626 case (Constants::BAM_TAG_TYPE_FLOAT) :
1627 case (Constants::BAM_TAG_TYPE_STRING) :
1628 case (Constants::BAM_TAG_TYPE_HEX) :
1629 case (Constants::BAM_TAG_TYPE_ARRAY) :
1630 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1631 << " in integer destination" << endl;
1636 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1641 // store in destination
1643 memcpy(&destination, pTagData, destinationLength);
1647 // tag not found, return failure
1651 /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1652 \brief Retrieves the signed integer value associated with a BAM tag.
1654 \param tag 2-character tag name
1655 \param destination destination for retrieved value
1657 \return \c true if found
1659 bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1660 return GetTag(tag, (uint32_t&)destination);
1663 /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1664 \brief Retrieves the floating-point value associated with a BAM tag.
1666 \param tag 2-character tag name
1667 \param destination destination for retrieved value
1669 \return \c true if found
1671 bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1673 // make sure tag data exists
1674 if ( SupportData.HasCoreOnly || TagData.empty() )
1677 // localize the tag data
1678 char* pTagData = (char*)TagData.data();
1679 const unsigned int tagDataLength = TagData.size();
1680 unsigned int numBytesParsed = 0;
1683 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1685 // determine data byte-length
1686 const char type = *(pTagData - 1);
1687 int destinationLength = 0;
1691 case (Constants::BAM_TAG_TYPE_ASCII) :
1692 case (Constants::BAM_TAG_TYPE_INT8) :
1693 case (Constants::BAM_TAG_TYPE_UINT8) :
1694 destinationLength = 1;
1698 case (Constants::BAM_TAG_TYPE_INT16) :
1699 case (Constants::BAM_TAG_TYPE_UINT16) :
1700 destinationLength = 2;
1704 case (Constants::BAM_TAG_TYPE_FLOAT) :
1705 case (Constants::BAM_TAG_TYPE_INT32) :
1706 case (Constants::BAM_TAG_TYPE_UINT32) :
1707 destinationLength = 4;
1710 // unsupported type (var-length strings)
1711 case (Constants::BAM_TAG_TYPE_STRING) :
1712 case (Constants::BAM_TAG_TYPE_HEX) :
1713 case (Constants::BAM_TAG_TYPE_ARRAY) :
1714 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1715 << " in float destination" << endl;
1720 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1725 // store in destination
1727 memcpy(&destination, pTagData, destinationLength);
1731 // tag not found, return failure
1735 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1736 \brief Retrieves the numeric array data associated with a BAM tag
1738 \param tag 2-character tag name
1739 \param destination destination for retrieved data
1741 \return \c true if found
1743 bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1745 // make sure tag data exists
1746 if ( SupportData.HasCoreOnly || TagData.empty() )
1749 // localize the tag data
1750 char* pTagData = (char*)TagData.data();
1751 const unsigned int tagDataLength = TagData.size();
1752 unsigned int numBytesParsed = 0;
1754 // return false if tag not found
1755 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1758 // check that tag is array type
1759 const char tagType = *(pTagData - 1);
1760 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1761 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1762 << tag << " in array destination" << endl;
1766 // calculate length of each element in tag's array
1767 const char elementType = *pTagData;
1769 int elementLength = 0;
1770 switch ( elementType ) {
1771 case (Constants::BAM_TAG_TYPE_ASCII) :
1772 case (Constants::BAM_TAG_TYPE_INT8) :
1773 case (Constants::BAM_TAG_TYPE_UINT8) :
1774 elementLength = sizeof(uint8_t);
1777 case (Constants::BAM_TAG_TYPE_INT16) :
1778 case (Constants::BAM_TAG_TYPE_UINT16) :
1779 elementLength = sizeof(uint16_t);
1782 case (Constants::BAM_TAG_TYPE_INT32) :
1783 case (Constants::BAM_TAG_TYPE_UINT32) :
1784 elementLength = sizeof(uint32_t);
1787 // unsupported type for integer destination (float or var-length data)
1788 case (Constants::BAM_TAG_TYPE_FLOAT) :
1789 case (Constants::BAM_TAG_TYPE_STRING) :
1790 case (Constants::BAM_TAG_TYPE_HEX) :
1791 case (Constants::BAM_TAG_TYPE_ARRAY) :
1792 cerr << "BamAlignment ERROR: array element type: " << elementType
1793 << " cannot be stored in integer value" << endl;
1798 cerr << "BamAlignment ERROR: unknown element type encountered: "
1799 << elementType << endl;
1803 // get number of elements
1804 int32_t numElements;
1805 memcpy(&numElements, pTagData, sizeof(int32_t));
1807 destination.clear();
1808 destination.reserve(numElements);
1812 for ( int i = 0 ; i < numElements; ++i ) {
1813 memcpy(&value, pTagData, sizeof(uint32_t));
1814 pTagData += sizeof(uint32_t);
1815 destination.push_back(value);
1822 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1823 \brief Retrieves the numeric array data associated with a BAM tag
1825 \param tag 2-character tag name
1826 \param destination destination for retrieved data
1828 \return \c true if found
1830 bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1832 // make sure tag data exists
1833 if ( SupportData.HasCoreOnly || TagData.empty() )
1836 // localize the tag data
1837 char* pTagData = (char*)TagData.data();
1838 const unsigned int tagDataLength = TagData.size();
1839 unsigned int numBytesParsed = 0;
1841 // return false if tag not found
1842 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1845 // check that tag is array type
1846 const char tagType = *(pTagData - 1);
1847 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1848 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1849 << tag << " in array destination" << endl;
1853 // calculate length of each element in tag's array
1854 const char elementType = *pTagData;
1856 int elementLength = 0;
1857 switch ( elementType ) {
1858 case (Constants::BAM_TAG_TYPE_ASCII) :
1859 case (Constants::BAM_TAG_TYPE_INT8) :
1860 case (Constants::BAM_TAG_TYPE_UINT8) :
1861 elementLength = sizeof(uint8_t);
1864 case (Constants::BAM_TAG_TYPE_INT16) :
1865 case (Constants::BAM_TAG_TYPE_UINT16) :
1866 elementLength = sizeof(uint16_t);
1869 case (Constants::BAM_TAG_TYPE_INT32) :
1870 case (Constants::BAM_TAG_TYPE_UINT32) :
1871 elementLength = sizeof(uint32_t);
1874 // unsupported type for integer destination (float or var-length data)
1875 case (Constants::BAM_TAG_TYPE_FLOAT) :
1876 case (Constants::BAM_TAG_TYPE_STRING) :
1877 case (Constants::BAM_TAG_TYPE_HEX) :
1878 case (Constants::BAM_TAG_TYPE_ARRAY) :
1879 cerr << "BamAlignment ERROR: array element type: " << elementType
1880 << " cannot be stored in integer value" << endl;
1885 cerr << "BamAlignment ERROR: unknown element type encountered: "
1886 << elementType << endl;
1890 // get number of elements
1891 int32_t numElements;
1892 memcpy(&numElements, pTagData, sizeof(int32_t));
1894 destination.clear();
1895 destination.reserve(numElements);
1899 for ( int i = 0 ; i < numElements; ++i ) {
1900 memcpy(&value, pTagData, sizeof(int32_t));
1901 pTagData += sizeof(int32_t);
1902 destination.push_back(value);
1910 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
1911 \brief Retrieves the numeric array data associated with a BAM tag
1913 \param tag 2-character tag name
1914 \param destination destination for retrieved data
1916 \return \c true if found
1918 bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
1920 // make sure tag data exists
1921 if ( SupportData.HasCoreOnly || TagData.empty() )
1924 // localize the tag data
1925 char* pTagData = (char*)TagData.data();
1926 const unsigned int tagDataLength = TagData.size();
1927 unsigned int numBytesParsed = 0;
1929 // return false if tag not found
1930 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1933 // check that tag is array type
1934 const char tagType = *(pTagData - 1);
1935 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1936 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1937 << tag << " in array destination" << endl;
1941 // calculate length of each element in tag's array
1942 const char elementType = *pTagData;
1944 int elementLength = 0;
1945 switch ( elementType ) {
1946 case (Constants::BAM_TAG_TYPE_ASCII) :
1947 case (Constants::BAM_TAG_TYPE_INT8) :
1948 case (Constants::BAM_TAG_TYPE_UINT8) :
1949 elementLength = sizeof(uint8_t);
1952 case (Constants::BAM_TAG_TYPE_INT16) :
1953 case (Constants::BAM_TAG_TYPE_UINT16) :
1954 elementLength = sizeof(uint16_t);
1957 case (Constants::BAM_TAG_TYPE_INT32) :
1958 case (Constants::BAM_TAG_TYPE_UINT32) :
1959 case (Constants::BAM_TAG_TYPE_FLOAT) :
1960 elementLength = sizeof(uint32_t);
1963 // unsupported type for float destination (var-length data)
1964 case (Constants::BAM_TAG_TYPE_STRING) :
1965 case (Constants::BAM_TAG_TYPE_HEX) :
1966 case (Constants::BAM_TAG_TYPE_ARRAY) :
1967 cerr << "BamAlignment ERROR: array element type: " << elementType
1968 << " cannot be stored in float value" << endl;
1973 cerr << "BamAlignment ERROR: unknown element type encountered: "
1974 << elementType << endl;
1978 // get number of elements
1979 int32_t numElements;
1980 memcpy(&numElements, pTagData, sizeof(int32_t));
1982 destination.clear();
1983 destination.reserve(numElements);
1987 for ( int i = 0 ; i < numElements; ++i ) {
1988 memcpy(&value, pTagData, sizeof(float));
1989 pTagData += sizeof(float);
1990 destination.push_back(value);
1997 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
1998 \brief Retrieves the BAM tag type-code associated with requested tag name.
2000 \param tag 2-character tag name
2001 \param type destination for the retrieved (1-character) tag type
2003 \return \c true if found
2004 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
2006 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
2008 // make sure tag data exists
2009 if ( SupportData.HasCoreOnly || TagData.empty() )
2012 // localize the tag data
2013 char* pTagData = (char*)TagData.data();
2014 const unsigned int tagDataLength = TagData.size();
2015 unsigned int numBytesParsed = 0;
2018 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
2020 // retrieve tag type code
2021 type = *(pTagData - 1);
2023 // validate that type is a proper BAM tag type
2025 case (Constants::BAM_TAG_TYPE_ASCII) :
2026 case (Constants::BAM_TAG_TYPE_INT8) :
2027 case (Constants::BAM_TAG_TYPE_UINT8) :
2028 case (Constants::BAM_TAG_TYPE_INT16) :
2029 case (Constants::BAM_TAG_TYPE_UINT16) :
2030 case (Constants::BAM_TAG_TYPE_INT32) :
2031 case (Constants::BAM_TAG_TYPE_UINT32) :
2032 case (Constants::BAM_TAG_TYPE_FLOAT) :
2033 case (Constants::BAM_TAG_TYPE_STRING) :
2034 case (Constants::BAM_TAG_TYPE_HEX) :
2035 case (Constants::BAM_TAG_TYPE_ARRAY) :
2040 cerr << "BamAlignment ERROR: unknown tag type encountered: "
2046 // tag not found, return failure
2050 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2051 \brief Returns true if alignment has a record for requested tag.
2052 \param tag 2-character tag name
2053 \return \c true if alignment has a record for tag
2055 bool BamAlignment::HasTag(const std::string& tag) const {
2057 // return false if no tag data present
2058 if ( SupportData.HasCoreOnly || TagData.empty() )
2061 // localize the tag data for lookup
2062 char* pTagData = (char*)TagData.data();
2063 const unsigned int tagDataLength = TagData.size();
2064 unsigned int numBytesParsed = 0;
2066 // if result of tag lookup
2067 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2070 /*! \fn bool BamAlignment::IsDuplicate(void) const
2071 \return \c true if this read is a PCR duplicate
2073 bool BamAlignment::IsDuplicate(void) const {
2074 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2077 /*! \fn bool BamAlignment::IsFailedQC(void) const
2078 \return \c true if this read failed quality control
2080 bool BamAlignment::IsFailedQC(void) const {
2081 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2084 /*! \fn bool BamAlignment::IsFirstMate(void) const
2085 \return \c true if alignment is first mate on paired-end read
2087 bool BamAlignment::IsFirstMate(void) const {
2088 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2091 /*! \fn bool BamAlignment::IsMapped(void) const
2092 \return \c true if alignment is mapped
2094 bool BamAlignment::IsMapped(void) const {
2095 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2098 /*! \fn bool BamAlignment::IsMateMapped(void) const
2099 \return \c true if alignment's mate is mapped
2101 bool BamAlignment::IsMateMapped(void) const {
2102 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2105 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2106 \return \c true if alignment's mate mapped to reverse strand
2108 bool BamAlignment::IsMateReverseStrand(void) const {
2109 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2112 /*! \fn bool BamAlignment::IsPaired(void) const
2113 \return \c true if alignment part of paired-end read
2115 bool BamAlignment::IsPaired(void) const {
2116 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2119 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2120 \return \c true if reported position is primary alignment
2122 bool BamAlignment::IsPrimaryAlignment(void) const {
2123 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2126 /*! \fn bool BamAlignment::IsProperPair(void) const
2127 \return \c true if alignment is part of read that satisfied paired-end resolution
2129 bool BamAlignment::IsProperPair(void) const {
2130 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2133 /*! \fn bool BamAlignment::IsReverseStrand(void) const
2134 \return \c true if alignment mapped to reverse strand
2136 bool BamAlignment::IsReverseStrand(void) const {
2137 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2140 /*! \fn bool BamAlignment::IsSecondMate(void) const
2141 \return \c true if alignment is second mate on read
2143 bool BamAlignment::IsSecondMate(void) const {
2144 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2147 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
2150 Checks that tag name & type strings are expected sizes.
2151 \a tag should have length
2152 \a type should have length 1
2154 \param tag BAM tag name
2155 \param type BAM tag type-code
2157 \return \c true if both \a tag and \a type are correct sizes
2159 bool BamAlignment::IsValidSize(const string& tag, const string& type) const {
2160 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
2161 (type.size() == Constants::BAM_TAG_TYPESIZE);
2164 /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2165 \brief Removes field from BAM tags.
2167 \return \c true if tag was removed successfully (or didn't exist before)
2169 bool BamAlignment::RemoveTag(const std::string& tag) {
2171 // skip if no tag data available
2172 if ( SupportData.HasCoreOnly || TagData.empty() )
2175 // localize the tag data
2176 char* pOriginalTagData = (char*)TagData.data();
2177 char* pTagData = pOriginalTagData;
2178 const unsigned int originalTagDataLength = TagData.size();
2179 unsigned int newTagDataLength = 0;
2180 unsigned int numBytesParsed = 0;
2183 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2185 char* newTagData = new char[originalTagDataLength];
2187 // copy original tag data up til desired tag
2189 numBytesParsed -= 3;
2190 const unsigned int beginningTagDataLength = numBytesParsed;
2191 newTagDataLength += beginningTagDataLength;
2192 memcpy(newTagData, pOriginalTagData, numBytesParsed);
2194 // skip to next tag (if tag for removal is last, return true)
2195 const char* pTagStorageType = pTagData + 2;
2197 numBytesParsed += 3;
2198 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2201 // copy everything from current tag (the next one after tag for removal) to end
2202 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2203 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2204 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2206 // save new tag data
2207 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2209 delete[] newTagData;
2214 // tag not found, no removal - return failure
2218 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2219 \brief Sets value of "PCR duplicate" flag to \a ok.
2221 void BamAlignment::SetIsDuplicate(bool ok) {
2222 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2223 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2226 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2227 \brief Sets "failed quality control" flag to \a ok.
2229 void BamAlignment::SetIsFailedQC(bool ok) {
2230 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2231 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2234 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2235 \brief Sets "alignment is first mate" flag to \a ok.
2237 void BamAlignment::SetIsFirstMate(bool ok) {
2238 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2239 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2242 /*! \fn void BamAlignment::SetIsMapped(bool ok)
2243 \brief Sets "alignment is mapped" flag to \a ok.
2245 void BamAlignment::SetIsMapped(bool ok) {
2246 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2247 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2250 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2251 \brief Sets "alignment's mate is mapped" flag to \a ok.
2253 void BamAlignment::SetIsMateMapped(bool ok) {
2254 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2255 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2258 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2259 \brief Complement of using SetIsMateMapped().
2260 \deprecated For sake of symmetry with the query methods
2261 \sa IsMateMapped(), SetIsMateMapped()
2263 void BamAlignment::SetIsMateUnmapped(bool ok) {
2264 SetIsMateMapped(!ok);
2267 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2268 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2270 void BamAlignment::SetIsMateReverseStrand(bool ok) {
2271 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2272 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2275 /*! \fn void BamAlignment::SetIsPaired(bool ok)
2276 \brief Sets "alignment part of paired-end read" flag to \a ok.
2278 void BamAlignment::SetIsPaired(bool ok) {
2279 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2280 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2283 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2284 \brief Sets "position is primary alignment" flag to \a ok.
2286 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2287 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2288 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2291 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2292 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2294 void BamAlignment::SetIsProperPair(bool ok) {
2295 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2296 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2299 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2300 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2302 void BamAlignment::SetIsReverseStrand(bool ok) {
2303 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2304 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2307 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2308 \brief Complement of using SetIsPrimaryAlignment().
2309 \deprecated For sake of symmetry with the query methods
2310 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2312 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2313 SetIsPrimaryAlignment(!ok);
2316 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2317 \brief Sets "alignment is second mate on read" flag to \a ok.
2319 void BamAlignment::SetIsSecondMate(bool ok) {
2320 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2321 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2324 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2325 \brief Complement of using SetIsMapped().
2326 \deprecated For sake of symmetry with the query methods
2327 \sa IsMapped(), SetIsMapped()
2329 void BamAlignment::SetIsUnmapped(bool ok) {
2333 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
2336 Moves to next available tag in tag data string
2338 \param storageType BAM tag type-code that determines how far to move cursor
2339 \param pTagData pointer to current position (cursor) in tag string
2340 \param numBytesParsed report of how many bytes were parsed (cumulatively)
2342 \return \c if storageType was a recognized BAM tag type
2343 \post \a pTagData will point to the byte where the next tag data begins.
2344 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
2346 bool BamAlignment::SkipToNextTag(const char storageType,
2348 unsigned int& numBytesParsed) const
2350 switch (storageType) {
2352 case (Constants::BAM_TAG_TYPE_ASCII) :
2353 case (Constants::BAM_TAG_TYPE_INT8) :
2354 case (Constants::BAM_TAG_TYPE_UINT8) :
2359 case (Constants::BAM_TAG_TYPE_INT16) :
2360 case (Constants::BAM_TAG_TYPE_UINT16) :
2361 numBytesParsed += sizeof(uint16_t);
2362 pTagData += sizeof(uint16_t);
2365 case (Constants::BAM_TAG_TYPE_FLOAT) :
2366 case (Constants::BAM_TAG_TYPE_INT32) :
2367 case (Constants::BAM_TAG_TYPE_UINT32) :
2368 numBytesParsed += sizeof(uint32_t);
2369 pTagData += sizeof(uint32_t);
2372 case (Constants::BAM_TAG_TYPE_STRING) :
2373 case (Constants::BAM_TAG_TYPE_HEX) :
2374 while( *pTagData ) {
2378 // increment for null-terminator
2383 case (Constants::BAM_TAG_TYPE_ARRAY) :
2387 const char arrayType = *pTagData;
2391 // read number of elements
2392 int32_t numElements;
2393 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
2394 numBytesParsed += sizeof(uint32_t);
2395 pTagData += sizeof(uint32_t);
2397 // calculate number of bytes to skip
2398 int bytesToSkip = 0;
2399 switch (arrayType) {
2400 case (Constants::BAM_TAG_TYPE_INT8) :
2401 case (Constants::BAM_TAG_TYPE_UINT8) :
2402 bytesToSkip = numElements;
2404 case (Constants::BAM_TAG_TYPE_INT16) :
2405 case (Constants::BAM_TAG_TYPE_UINT16) :
2406 bytesToSkip = numElements*sizeof(uint16_t);
2408 case (Constants::BAM_TAG_TYPE_FLOAT) :
2409 case (Constants::BAM_TAG_TYPE_INT32) :
2410 case (Constants::BAM_TAG_TYPE_UINT32) :
2411 bytesToSkip = numElements*sizeof(uint32_t);
2414 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
2415 << arrayType << endl;
2419 // skip binary array contents
2420 numBytesParsed += bytesToSkip;
2421 pTagData += bytesToSkip;
2426 cerr << "BamAlignment ERROR: unknown tag type encountered"
2427 << storageType << endl;