1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 April 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
11 #include <api/BamAlignment.h>
12 #include <api/BamConstants.h>
13 using namespace BamTools;
25 /*! \class BamTools::BamAlignment
26 \brief The main BAM alignment data structure.
28 Provides methods to query/modify BAM alignment data fields.
30 /*! \var BamAlignment::Name
33 /*! \var BamAlignment::Length
34 \brief length of query sequence
36 /*! \var BamAlignment::QueryBases
37 \brief 'original' sequence (as reported from sequencing machine)
39 /*! \var BamAlignment::AlignedBases
40 \brief 'aligned' sequence (includes any indels, padding, clipping)
42 /*! \var BamAlignment::Qualities
43 \brief FASTQ qualities (ASCII characters, not numeric values)
45 /*! \var BamAlignment::TagData
46 \brief tag data (use the provided methods to query/modify)
48 /*! \var BamAlignment::RefID
49 \brief ID number for reference sequence
51 /*! \var BamAlignment::Position
52 \brief position (0-based) where alignment starts
54 /*! \var BamAlignment::Bin
55 \brief BAM (standard) index bin number for this alignment
57 /*! \var BamAlignment::MapQuality
58 \brief mapping quality score
60 /*! \var BamAlignment::AlignmentFlag
61 \brief alignment bit-flag (use the provided methods to query/modify)
63 /*! \var BamAlignment::CigarData
64 \brief CIGAR operations for this alignment
66 /*! \var BamAlignment::MateRefID
67 \brief ID number for reference sequence where alignment's mate was aligned
69 /*! \var BamAlignment::MatePosition
70 \brief position (0-based) where alignment's mate starts
72 /*! \var BamAlignment::InsertSize
73 \brief mate-pair insert size
75 /*! \var BamAlignment::Filename
76 \brief name of BAM file which this alignment comes from
79 /*! \fn BamAlignment::BamAlignment(void)
82 BamAlignment::BamAlignment(void)
90 /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
91 \brief copy constructor
93 BamAlignment::BamAlignment(const BamAlignment& other)
95 , Length(other.Length)
96 , QueryBases(other.QueryBases)
97 , AlignedBases(other.AlignedBases)
98 , Qualities(other.Qualities)
99 , TagData(other.TagData)
101 , Position(other.Position)
103 , MapQuality(other.MapQuality)
104 , AlignmentFlag(other.AlignmentFlag)
105 , CigarData(other.CigarData)
106 , MateRefID(other.MateRefID)
107 , MatePosition(other.MatePosition)
108 , InsertSize(other.InsertSize)
109 , Filename(other.Filename)
110 , SupportData(other.SupportData)
113 /*! \fn BamAlignment::~BamAlignment(void)
116 BamAlignment::~BamAlignment(void) { }
118 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
119 \brief Adds a field with string data to the BAM tags.
121 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
123 \param tag 2-character tag name
124 \param type 1-character tag type (must be "Z" or "H")
125 \param value string data to store
127 \return \c true if the \b new tag was added successfully
128 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
130 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
132 // skip if core data not parsed
133 if ( SupportData.HasCoreOnly ) return false;
135 // validate tag/type size & that type is OK for string value
136 if ( !IsValidSize(tag, type) ) return false;
137 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
138 type.at(0) != Constants::BAM_TAG_TYPE_HEX
144 // localize the tag data
145 char* pTagData = (char*)TagData.data();
146 const unsigned int tagDataLength = TagData.size();
147 unsigned int numBytesParsed = 0;
149 // if tag already exists, return false
150 // use EditTag explicitly instead
151 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
154 // otherwise, copy tag data to temp buffer
155 string newTag = tag + type + value;
156 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
157 char originalTagData[newTagDataLength];
158 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
161 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
163 // store temp buffer back in TagData
164 const char* newTagData = (const char*)originalTagData;
165 TagData.assign(newTagData, newTagDataLength);
171 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
172 \brief Adds a field with unsigned integer data to the BAM tags.
174 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
176 \param tag 2-character tag name
177 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
178 \param value unsigned int data to store
180 \return \c true if the \b new tag was added successfully
181 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
183 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
185 // skip if core data not parsed
186 if ( SupportData.HasCoreOnly ) return false;
188 // validate tag/type size & that type is OK for uint32_t value
189 if ( !IsValidSize(tag, type) ) return false;
190 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
191 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
192 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
193 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
199 // localize the tag data
200 char* pTagData = (char*)TagData.data();
201 const unsigned int tagDataLength = TagData.size();
202 unsigned int numBytesParsed = 0;
204 // if tag already exists, return false
205 // use EditTag explicitly instead
206 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
209 // otherwise, convert value to string
210 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
213 // copy original tag data to temp buffer
214 string newTag = tag + type;
215 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
216 char originalTagData[newTagDataLength];
217 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
220 strcat(originalTagData + tagDataLength, newTag.data());
221 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
223 // store temp buffer back in TagData
224 const char* newTagData = (const char*)originalTagData;
225 TagData.assign(newTagData, newTagDataLength);
231 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
232 \brief Adds a field with signed integer data to the BAM tags.
234 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
236 \param tag 2-character tag name
237 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
238 \param value signed int data to store
240 \return \c true if the \b new tag was added successfully
241 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
243 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
244 return AddTag(tag, type, (const uint32_t&)value);
247 /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
248 \brief Adds a field with floating-point data to the BAM tags.
250 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
252 \param tag 2-character tag name
253 \param type 1-character tag type (must NOT be "Z", "H", or "B")
254 \param value float data to store
256 \return \c true if the \b new tag was added successfully
257 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
259 bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
261 // skip if core data not parsed
262 if ( SupportData.HasCoreOnly ) return false;
264 // validate tag/type size & that type is OK for float value
265 if ( !IsValidSize(tag, type) ) return false;
266 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
267 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
268 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
274 // localize the tag data
275 char* pTagData = (char*)TagData.data();
276 const unsigned int tagDataLength = TagData.size();
277 unsigned int numBytesParsed = 0;
279 // if tag already exists, return false
280 // use EditTag explicitly instead
281 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
284 // otherwise, convert value to string
285 union { float value; char valueBuffer[sizeof(float)]; } un;
288 // copy original tag data to temp buffer
289 string newTag = tag + type;
290 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
291 char originalTagData[newTagDataLength];
292 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
295 strcat(originalTagData + tagDataLength, newTag.data());
296 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
298 // store temp buffer back in TagData
299 const char* newTagData = (const char*)originalTagData;
300 TagData.assign(newTagData, newTagDataLength);
306 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
307 \brief Adds a numeric array field to the BAM tags.
309 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
311 \param tag 2-character tag name
312 \param values vector of uint8_t values to store
314 \return \c true if the \b new tag was added successfully
315 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
317 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
319 // skip if core data not parsed
320 if ( SupportData.HasCoreOnly ) return false;
322 // check for valid tag length
323 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
325 // localize the tag data
326 char* pTagData = (char*)TagData.data();
327 const unsigned int tagDataLength = TagData.size();
328 unsigned int numBytesParsed = 0;
330 // if tag already exists, return false
331 // use EditTag explicitly instead
332 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
335 // build new tag's base information
336 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
337 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
338 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
339 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
341 // add number of array elements to newTagBase
342 const int32_t numElements = values.size();
343 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
345 // copy current TagData string to temp buffer, leaving room for new tag's contents
346 const int newTagDataLength = tagDataLength +
347 Constants::BAM_TAG_ARRAYBASE_SIZE +
348 numElements*sizeof(uint8_t);
349 char originalTagData[newTagDataLength];
350 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
352 // write newTagBase (removes old null term)
353 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
355 // add vector elements to tag
356 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
357 for ( int i = 0 ; i < numElements; ++i ) {
358 const uint8_t value = values.at(i);
359 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
360 &value, sizeof(uint8_t));
363 // store temp buffer back in TagData
364 const char* newTagData = (const char*)originalTagData;
365 TagData.assign(newTagData, newTagDataLength);
371 /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
372 \brief Adds a numeric array field to the BAM tags.
374 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
376 \param tag 2-character tag name
377 \param values vector of int8_t values to store
379 \return \c true if the \b new tag was added successfully
380 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
382 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
384 // skip if core data not parsed
385 if ( SupportData.HasCoreOnly ) return false;
387 // check for valid tag length
388 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
390 // localize the tag data
391 char* pTagData = (char*)TagData.data();
392 const unsigned int tagDataLength = TagData.size();
393 unsigned int numBytesParsed = 0;
395 // if tag already exists, return false
396 // use EditTag explicitly instead
397 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
400 // build new tag's base information
401 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
402 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
403 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
404 newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
406 // add number of array elements to newTagBase
407 const int32_t numElements = values.size();
408 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
410 // copy current TagData string to temp buffer, leaving room for new tag's contents
411 const int newTagDataLength = tagDataLength +
412 Constants::BAM_TAG_ARRAYBASE_SIZE +
413 numElements*sizeof(int8_t);
414 char originalTagData[newTagDataLength];
415 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
417 // write newTagBase (removes old null term)
418 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
420 // add vector elements to tag
421 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
422 for ( int i = 0 ; i < numElements; ++i ) {
423 const int8_t value = values.at(i);
424 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
425 &value, sizeof(int8_t));
428 // store temp buffer back in TagData
429 const char* newTagData = (const char*)originalTagData;
430 TagData.assign(newTagData, newTagDataLength);
436 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
437 \brief Adds a numeric array field to the BAM tags.
439 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
441 \param tag 2-character tag name
442 \param values vector of uint16_t values to store
444 \return \c true if the \b new tag was added successfully
445 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
447 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
449 // skip if core data not parsed
450 if ( SupportData.HasCoreOnly ) return false;
452 // check for valid tag length
453 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
455 // localize the tag data
456 char* pTagData = (char*)TagData.data();
457 const unsigned int tagDataLength = TagData.size();
458 unsigned int numBytesParsed = 0;
460 // if tag already exists, return false
461 // use EditTag explicitly instead
462 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
465 // build new tag's base information
466 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
467 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
468 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
469 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
471 // add number of array elements to newTagBase
472 const int32_t numElements = values.size();
473 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
475 // copy current TagData string to temp buffer, leaving room for new tag's contents
476 const int newTagDataLength = tagDataLength +
477 Constants::BAM_TAG_ARRAYBASE_SIZE +
478 numElements*sizeof(uint16_t);
479 char originalTagData[newTagDataLength];
480 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
482 // write newTagBase (removes old null term)
483 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
485 // add vector elements to tag
486 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
487 for ( int i = 0 ; i < numElements; ++i ) {
488 const uint16_t value = values.at(i);
489 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
490 &value, sizeof(uint16_t));
493 // store temp buffer back in TagData
494 const char* newTagData = (const char*)originalTagData;
495 TagData.assign(newTagData, newTagDataLength);
501 /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
502 \brief Adds a numeric array field to the BAM tags.
504 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
506 \param tag 2-character tag name
507 \param values vector of int16_t values to store
509 \return \c true if the \b new tag was added successfully
510 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
512 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
514 // skip if core data not parsed
515 if ( SupportData.HasCoreOnly ) return false;
517 // check for valid tag length
518 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
520 // localize the tag data
521 char* pTagData = (char*)TagData.data();
522 const unsigned int tagDataLength = TagData.size();
523 unsigned int numBytesParsed = 0;
525 // if tag already exists, return false
526 // use EditTag explicitly instead
527 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
530 // build new tag's base information
531 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
532 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
533 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
534 newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
536 // add number of array elements to newTagBase
537 const int32_t numElements = values.size();
538 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
540 // copy current TagData string to temp buffer, leaving room for new tag's contents
541 const int newTagDataLength = tagDataLength +
542 Constants::BAM_TAG_ARRAYBASE_SIZE +
543 numElements*sizeof(int16_t);
544 char originalTagData[newTagDataLength];
545 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
547 // write newTagBase (removes old null term)
548 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
550 // add vector elements to tag
551 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
552 for ( int i = 0 ; i < numElements; ++i ) {
553 const int16_t value = values.at(i);
554 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
555 &value, sizeof(int16_t));
558 // store temp buffer back in TagData
559 const char* newTagData = (const char*)originalTagData;
560 TagData.assign(newTagData, newTagDataLength);
566 /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
567 \brief Adds a numeric array field to the BAM tags.
569 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
571 \param tag 2-character tag name
572 \param values vector of uint32_t values to store
574 \return \c true if the \b new tag was added successfully
575 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
577 bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
579 // skip if core data not parsed
580 if ( SupportData.HasCoreOnly ) return false;
582 // check for valid tag length
583 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
585 // localize the tag data
586 char* pTagData = (char*)TagData.data();
587 const unsigned int tagDataLength = TagData.size();
588 unsigned int numBytesParsed = 0;
590 // if tag already exists, return false
591 // use EditTag explicitly instead
592 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
595 // build new tag's base information
596 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
597 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
598 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
599 newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
601 // add number of array elements to newTagBase
602 const int32_t numElements = values.size();
603 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
605 // copy current TagData string to temp buffer, leaving room for new tag's contents
606 const int newTagDataLength = tagDataLength +
607 Constants::BAM_TAG_ARRAYBASE_SIZE +
608 numElements*sizeof(uint32_t);
609 char originalTagData[newTagDataLength];
610 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
612 // write newTagBase (removes old null term)
613 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
615 // add vector elements to tag
616 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
617 for ( int i = 0 ; i < numElements; ++i ) {
618 const uint32_t value = values.at(i);
619 memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
620 &value, sizeof(uint32_t));
623 // store temp buffer back in TagData
624 const char* newTagData = (const char*)originalTagData;
625 TagData.assign(newTagData, newTagDataLength);
631 /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
632 \brief Adds a numeric array field to the BAM tags.
634 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
636 \param tag 2-character tag name
637 \param values vector of int32_t values to store
639 \return \c true if the \b new tag was added successfully
640 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
642 bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
644 // skip if core data not parsed
645 if ( SupportData.HasCoreOnly ) return false;
647 // check for valid tag length
648 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
650 // localize the tag data
651 char* pTagData = (char*)TagData.data();
652 const unsigned int tagDataLength = TagData.size();
653 unsigned int numBytesParsed = 0;
655 // if tag already exists, return false
656 // use EditTag explicitly instead
657 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
660 // build new tag's base information
661 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
662 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
663 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
664 newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
666 // add number of array elements to newTagBase
667 const int32_t numElements = values.size();
668 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
670 // copy current TagData string to temp buffer, leaving room for new tag's contents
671 const int newTagDataLength = tagDataLength +
672 Constants::BAM_TAG_ARRAYBASE_SIZE +
673 numElements*sizeof(int32_t);
674 char originalTagData[newTagDataLength];
675 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
677 // write newTagBase (removes old null term)
678 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
680 // add vector elements to tag
681 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
682 for ( int i = 0 ; i < numElements; ++i ) {
683 const int32_t value = values.at(i);
684 memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
685 &value, sizeof(int32_t));
688 // store temp buffer back in TagData
689 const char* newTagData = (const char*)originalTagData;
690 TagData.assign(newTagData, newTagDataLength);
696 /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
697 \brief Adds a numeric array field to the BAM tags.
699 Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
701 \param tag 2-character tag name
702 \param values vector of float values to store
704 \return \c true if the \b new tag was added successfully
705 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
707 bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
709 // skip if core data not parsed
710 if ( SupportData.HasCoreOnly ) return false;
712 // check for valid tag length
713 if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
715 // localize the tag data
716 char* pTagData = (char*)TagData.data();
717 const unsigned int tagDataLength = TagData.size();
718 unsigned int numBytesParsed = 0;
720 // if tag already exists, return false
721 // use EditTag explicitly instead
722 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
725 // build new tag's base information
726 char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
727 memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
728 newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
729 newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
731 // add number of array elements to newTagBase
732 const int32_t numElements = values.size();
733 memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
735 // copy current TagData string to temp buffer, leaving room for new tag's contents
736 const int newTagDataLength = tagDataLength +
737 Constants::BAM_TAG_ARRAYBASE_SIZE +
738 numElements*sizeof(float);
739 char originalTagData[newTagDataLength];
740 memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
742 // write newTagBase (removes old null term)
743 strcat(originalTagData + tagDataLength, (const char*)newTagBase);
745 // add vector elements to tag
746 int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
747 for ( int i = 0 ; i < numElements; ++i ) {
748 const float value = values.at(i);
749 memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
750 &value, sizeof(float));
753 // store temp buffer back in TagData
754 const char* newTagData = (const char*)originalTagData;
755 TagData.assign(newTagData, newTagDataLength);
761 /*! \fn bool BamAlignment::BuildCharData(void)
762 \brief Populates alignment string fields (read name, bases, qualities, tag data).
764 An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
765 Using that method makes parsing much quicker when only positional data is required.
767 However, if you later want to access the character data fields from such an alignment,
768 use this method to populate those fields. Provides ability to do 'lazy evaluation' of
771 \return \c true if character data populated successfully (or was already available to begin with)
773 bool BamAlignment::BuildCharData(void) {
775 // skip if char data already parsed
776 if ( !SupportData.HasCoreOnly )
779 // check system endianness
780 bool IsBigEndian = BamTools::SystemIsBigEndian();
782 // calculate character lengths/offsets
783 const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
784 const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
785 const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
786 const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
787 const unsigned int tagDataLength = dataLength - tagDataOffset;
789 // check offsets to see what char data exists
790 const bool hasSeqData = ( seqDataOffset < dataLength );
791 const bool hasQualData = ( qualDataOffset < dataLength );
792 const bool hasTagData = ( tagDataOffset < dataLength );
794 // set up char buffers
795 const char* allCharData = SupportData.AllCharData.data();
796 const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
797 const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
798 char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
800 // store alignment name (relies on null char in name as terminator)
801 Name.assign((const char*)(allCharData));
803 // save query sequence
806 QueryBases.reserve(SupportData.QuerySequenceLength);
807 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
808 char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
809 QueryBases.append(1, singleBase);
813 // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
816 Qualities.reserve(SupportData.QuerySequenceLength);
817 for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
818 char singleQuality = (char)(qualData[i]+33);
819 Qualities.append(1, singleQuality);
823 // clear previous AlignedBases
824 AlignedBases.clear();
826 // if QueryBases has data, build AlignedBases using CIGAR data
827 // otherwise, AlignedBases will remain empty (this case IS allowed)
828 if ( !QueryBases.empty() ) {
830 // resize AlignedBases
831 AlignedBases.reserve(SupportData.QuerySequenceLength);
833 // iterate over CigarOps
835 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
836 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
837 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
838 const CigarOp& op = (*cigarIter);
842 // for 'M', 'I', '=', 'X' - write bases
843 case (Constants::BAM_CIGAR_MATCH_CHAR) :
844 case (Constants::BAM_CIGAR_INS_CHAR) :
845 case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
846 case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
847 AlignedBases.append(QueryBases.substr(k, op.Length));
850 // for 'S' - soft clip, do not write bases
851 // but increment placeholder 'k'
852 case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
856 // for 'D' - write gap character
857 case (Constants::BAM_CIGAR_DEL_CHAR) :
858 AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
861 // for 'P' - write padding character
862 case (Constants::BAM_CIGAR_PAD_CHAR) :
863 AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
866 // for 'N' - write N's, skip bases in original query sequence
867 case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
868 AlignedBases.append( op.Length, Constants::BAM_DNA_N );
871 // for 'H' - hard clip, do nothing to AlignedBases, move to next op
872 case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
875 // shouldn't get here
877 cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
889 while ( (unsigned int)i < tagDataLength ) {
891 i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
892 const char type = tagData[i]; // get tag type at position i
893 ++i; // move i past tag type
897 case(Constants::BAM_TAG_TYPE_ASCII) :
898 case(Constants::BAM_TAG_TYPE_INT8) :
899 case(Constants::BAM_TAG_TYPE_UINT8) :
900 // no endian swapping necessary for single-byte data
904 case(Constants::BAM_TAG_TYPE_INT16) :
905 case(Constants::BAM_TAG_TYPE_UINT16) :
906 BamTools::SwapEndian_16p(&tagData[i]);
907 i += sizeof(uint16_t);
910 case(Constants::BAM_TAG_TYPE_FLOAT) :
911 case(Constants::BAM_TAG_TYPE_INT32) :
912 case(Constants::BAM_TAG_TYPE_UINT32) :
913 BamTools::SwapEndian_32p(&tagData[i]);
914 i += sizeof(uint32_t);
917 case(Constants::BAM_TAG_TYPE_HEX) :
918 case(Constants::BAM_TAG_TYPE_STRING) :
919 // no endian swapping necessary for hex-string/string data
922 // increment one more for null terminator
926 case(Constants::BAM_TAG_TYPE_ARRAY) :
930 const char arrayType = tagData[i];
933 // swap endian-ness of number of elements in place, then retrieve for loop
934 BamTools::SwapEndian_32p(&tagData[i]);
936 memcpy(&numElements, &tagData[i], sizeof(uint32_t));
937 i += sizeof(uint32_t);
939 // swap endian-ness of array elements
940 for ( int j = 0; j < numElements; ++j ) {
942 case (Constants::BAM_TAG_TYPE_INT8) :
943 case (Constants::BAM_TAG_TYPE_UINT8) :
944 // no endian-swapping necessary
947 case (Constants::BAM_TAG_TYPE_INT16) :
948 case (Constants::BAM_TAG_TYPE_UINT16) :
949 BamTools::SwapEndian_16p(&tagData[i]);
950 i += sizeof(uint16_t);
952 case (Constants::BAM_TAG_TYPE_FLOAT) :
953 case (Constants::BAM_TAG_TYPE_INT32) :
954 case (Constants::BAM_TAG_TYPE_UINT32) :
955 BamTools::SwapEndian_32p(&tagData[i]);
956 i += sizeof(uint32_t);
960 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
961 << arrayType << endl;
969 // shouldn't get here
971 cerr << "BamAlignment ERROR: invalid tag value type: "
978 // store tagData in alignment
979 TagData.resize(tagDataLength);
980 memcpy((char*)TagData.data(), tagData, tagDataLength);
983 // clear the core-only flag
984 SupportData.HasCoreOnly = false;
990 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
991 \brief Edits a BAM tag field containing string data.
993 If \a tag does not exist, a new entry is created.
995 \param tag 2-character tag name
996 \param type 1-character tag type (must be "Z" or "H")
997 \param value string data to store
999 \return \c true if the tag was modified/created successfully
1001 \sa BamAlignment::RemoveTag()
1002 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1004 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1006 // skip if core data not parsed
1007 if ( SupportData.HasCoreOnly ) return false;
1009 // validate tag/type size & that type is OK for string value
1010 if ( !IsValidSize(tag, type) ) return false;
1011 if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1012 type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1015 // localize the tag data
1016 char* pOriginalTagData = (char*)TagData.data();
1017 char* pTagData = pOriginalTagData;
1018 const unsigned int originalTagDataLength = TagData.size();
1020 unsigned int newTagDataLength = 0;
1021 unsigned int numBytesParsed = 0;
1024 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1026 // make sure array is more than big enough
1027 char newTagData[originalTagDataLength + value.size()];
1029 // copy original tag data up til desired tag
1030 const unsigned int beginningTagDataLength = numBytesParsed;
1031 newTagDataLength += beginningTagDataLength;
1032 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1034 // copy new @value in place of current tag data
1035 const unsigned int dataLength = strlen(value.c_str());
1036 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1038 // skip to next tag (if tag for removal is last, return true)
1039 const char* pTagStorageType = pTagData - 1;
1040 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1043 // copy everything from current tag (the next one after tag for removal) to end
1044 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1045 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1046 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1047 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1049 // ensure null-terminator
1050 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1052 // save new tag data
1053 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1057 // tag not found, attempt AddTag
1058 else return AddTag(tag, type, value);
1061 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1062 \brief Edits a BAM tag field containing unsigned integer data.
1064 If \a tag does not exist, a new entry is created.
1066 \param tag 2-character tag name
1067 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1068 \param value unsigned integer data to store
1070 \return \c true if the tag was modified/created successfully
1072 \sa BamAlignment::RemoveTag()
1073 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1075 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1077 // skip if core data not parsed
1078 if ( SupportData.HasCoreOnly ) return false;
1080 // validate tag/type size & that type is OK for uint32_t value
1081 if ( !IsValidSize(tag, type) ) return false;
1082 if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1083 type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1084 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1085 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1091 // localize the tag data
1092 char* pOriginalTagData = (char*)TagData.data();
1093 char* pTagData = pOriginalTagData;
1094 const unsigned int originalTagDataLength = TagData.size();
1096 unsigned int newTagDataLength = 0;
1097 unsigned int numBytesParsed = 0;
1100 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1102 // make sure array is more than big enough
1103 char newTagData[originalTagDataLength + sizeof(value)];
1105 // copy original tag data up til desired tag
1106 const unsigned int beginningTagDataLength = numBytesParsed;
1107 newTagDataLength += beginningTagDataLength;
1108 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1110 // copy new @value in place of current tag data
1111 union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1113 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1115 // skip to next tag (if tag for removal is last, return true)
1116 const char* pTagStorageType = pTagData - 1;
1117 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1120 // copy everything from current tag (the next one after tag for removal) to end
1121 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1122 const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1123 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1124 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1126 // ensure null-terminator
1127 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1129 // save new tag data
1130 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1134 // tag not found, attempt AddTag
1135 else return AddTag(tag, type, value);
1138 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1139 \brief Edits a BAM tag field containing signed integer data.
1141 If \a tag does not exist, a new entry is created.
1143 \param tag 2-character tag name
1144 \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1145 \param value signed integer data to store
1147 \return \c true if the tag was modified/created successfully
1149 \sa BamAlignment::RemoveTag()
1150 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1152 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1153 return EditTag(tag, type, (const uint32_t&)value);
1156 /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1157 \brief Edits a BAM tag field containing floating-point data.
1159 If \a tag does not exist, a new entry is created.
1161 \param tag 2-character tag name
1162 \param type 1-character tag type (must NOT be "Z", "H", or "B")
1163 \param value float data to store
1165 \return \c true if the tag was modified/created successfully
1167 \sa BamAlignment::RemoveTag()
1168 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1170 bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1172 // skip if core data not parsed
1173 if ( SupportData.HasCoreOnly ) return false;
1175 // validate tag/type size & that type is OK for float value
1176 if ( !IsValidSize(tag, type) ) return false;
1177 if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1178 type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1179 type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1185 // localize the tag data
1186 char* pOriginalTagData = (char*)TagData.data();
1187 char* pTagData = pOriginalTagData;
1188 const unsigned int originalTagDataLength = TagData.size();
1190 unsigned int newTagDataLength = 0;
1191 unsigned int numBytesParsed = 0;
1194 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1196 // make sure array is more than big enough
1197 char newTagData[originalTagDataLength + sizeof(value)];
1199 // copy original tag data up til desired tag
1200 const unsigned int beginningTagDataLength = numBytesParsed;
1201 newTagDataLength += beginningTagDataLength;
1202 memcpy(newTagData, pOriginalTagData, numBytesParsed);
1204 // copy new @value in place of current tag data
1205 union { float value; char valueBuffer[sizeof(float)]; } un;
1207 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1209 // skip to next tag (if tag for removal is last, return true)
1210 const char* pTagStorageType = pTagData - 1;
1211 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1214 // copy everything from current tag (the next one after tag for removal) to end
1215 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1216 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1217 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1218 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1220 // ensure null-terminator
1221 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1223 // save new tag data
1224 TagData.assign(newTagData, endTagOffset + endTagDataLength);
1228 // tag not found, attempt AddTag
1229 else return AddTag(tag, type, value);
1232 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1233 \brief Edits a BAM tag field containing a numeric array.
1235 If \a tag does not exist, a new entry is created.
1237 \param tag 2-character tag name
1238 \param value vector of uint8_t values to store
1240 \return \c true if the tag was modified/created successfully
1241 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1243 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1245 // can't do anything if TagData not parsed
1246 if ( SupportData.HasCoreOnly )
1249 // remove existing tag if present
1253 // add tag record with new values
1254 return AddTag(tag, values);
1257 /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1258 \brief Edits a BAM tag field containing a numeric array.
1260 If \a tag does not exist, a new entry is created.
1262 \param tag 2-character tag name
1263 \param value vector of int8_t values to store
1265 \return \c true if the tag was modified/created successfully
1266 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1268 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1270 // can't do anything if TagData not parsed
1271 if ( SupportData.HasCoreOnly )
1274 // remove existing tag if present
1278 // add tag record with new values
1279 return AddTag(tag, values);
1282 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1283 \brief Edits a BAM tag field containing a numeric array.
1285 If \a tag does not exist, a new entry is created.
1287 \param tag 2-character tag name
1288 \param value vector of uint16_t values to store
1290 \return \c true if the tag was modified/created successfully
1291 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1293 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1295 // can't do anything if TagData not parsed
1296 if ( SupportData.HasCoreOnly )
1299 // remove existing tag if present
1303 // add tag record with new values
1304 return AddTag(tag, values);
1307 /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1308 \brief Edits a BAM tag field containing a numeric array.
1310 If \a tag does not exist, a new entry is created.
1312 \param tag 2-character tag name
1313 \param value vector of int16_t values to store
1315 \return \c true if the tag was modified/created successfully
1316 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1318 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1320 // can't do anything if TagData not parsed
1321 if ( SupportData.HasCoreOnly )
1324 // remove existing tag if present
1328 // add tag record with new values
1329 return AddTag(tag, values);
1332 /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1333 \brief Edits a BAM tag field containing a numeric array.
1335 If \a tag does not exist, a new entry is created.
1337 \param tag 2-character tag name
1338 \param value vector of uint32_t values to store
1340 \return \c true if the tag was modified/created successfully
1341 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1343 bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1345 // can't do anything if TagData not parsed
1346 if ( SupportData.HasCoreOnly )
1349 // remove existing tag if present
1353 // add tag record with new values
1354 return AddTag(tag, values);
1357 /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1358 \brief Edits a BAM tag field containing a numeric array.
1360 If \a tag does not exist, a new entry is created.
1362 \param tag 2-character tag name
1363 \param value vector of int32_t values to store
1365 \return \c true if the tag was modified/created successfully
1366 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1368 bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1370 // can't do anything if TagData not parsed
1371 if ( SupportData.HasCoreOnly )
1374 // remove existing tag if present
1378 // add tag record with new values
1379 return AddTag(tag, values);
1382 /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1383 \brief Edits a BAM tag field containing a numeric array.
1385 If \a tag does not exist, a new entry is created.
1387 \param tag 2-character tag name
1388 \param value vector of float values to store
1390 \return \c true if the tag was modified/created successfully
1391 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1393 bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1395 // can't do anything if TagData not parsed
1396 if ( SupportData.HasCoreOnly )
1399 // remove existing tag if present
1403 // add tag record with new values
1404 return AddTag(tag, values);
1407 /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
1410 Searches for requested tag in BAM tag data.
1412 \param tag requested 2-character tag name
1413 \param pTagData pointer to current position in BamAlignment::TagData
1414 \param tagDataLength length of BamAlignment::TagData
1415 \param numBytesParsed number of bytes parsed so far
1417 \return \c true if found
1419 \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
1420 \a numBytesParsed will correspond to the position in the full TagData string.
1423 bool BamAlignment::FindTag(const std::string& tag,
1425 const unsigned int& tagDataLength,
1426 unsigned int& numBytesParsed) const
1429 while ( numBytesParsed < tagDataLength ) {
1431 const char* pTagType = pTagData;
1432 const char* pTagStorageType = pTagData + 2;
1434 numBytesParsed += 3;
1436 // check the current tag, return true on match
1437 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
1440 // get the storage class and find the next tag
1441 if ( *pTagStorageType == '\0' ) return false;
1442 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
1443 if ( *pTagData == '\0' ) return false;
1446 // checked all tags, none match
1450 /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1451 \brief Retrieves value of edit distance tag ("NM").
1453 \deprecated Instead use BamAlignment::GetTag()
1455 BamAlignment::GetTag("NM", editDistance);
1458 \param editDistance destination for retrieved value
1460 \return \c true if found
1462 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1463 return GetTag("NM", (uint32_t&)editDistance);
1466 /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1467 \brief Calculates alignment end position, based on starting position and CIGAR data.
1469 \param usePadded Inserted bases affect reported position. Default is false, so that reported
1470 position stays 'sync-ed' with reference coordinates.
1471 \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1472 when using BAM data with half-open formats (e.g. BED).
1474 \return alignment end position
1476 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1478 // initialize alignment end to starting position
1479 int alignEnd = Position;
1481 // iterate over cigar operations
1482 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1483 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1484 for ( ; cigarIter != cigarEnd; ++cigarIter) {
1485 const char cigarType = (*cigarIter).Type;
1486 const uint32_t& cigarLength = (*cigarIter).Length;
1488 if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1489 cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1490 cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1491 alignEnd += cigarLength;
1492 else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1493 alignEnd += cigarLength;
1496 // adjust for zero-based coordinates, if requested
1497 if ( zeroBased ) alignEnd -= 1;
1503 /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1504 \brief Retrieves value of read group tag ("RG").
1506 \deprecated Instead use BamAlignment::GetTag()
1508 BamAlignment::GetTag("RG", readGroup);
1511 \param readGroup destination for retrieved value
1513 \return \c true if found
1515 bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1516 return GetTag("RG", readGroup);
1519 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1520 \brief Retrieves the string value associated with a BAM tag.
1522 \param tag 2-character tag name
1523 \param destination destination for retrieved value
1525 \return \c true if found
1527 bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1529 // make sure tag data exists
1530 if ( SupportData.HasCoreOnly || TagData.empty() )
1533 // localize the tag data
1534 char* pTagData = (char*)TagData.data();
1535 const unsigned int tagDataLength = TagData.size();
1536 unsigned int numBytesParsed = 0;
1539 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1540 const unsigned int dataLength = strlen(pTagData);
1541 destination.clear();
1542 destination.resize(dataLength);
1543 memcpy( (char*)destination.data(), pTagData, dataLength );
1547 // tag not found, return failure
1551 /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1552 \brief Retrieves the unsigned integer value associated with a BAM tag.
1554 \param tag 2-character tag name
1555 \param destination destination for retrieved value
1557 \return \c true if found
1559 bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1561 // make sure tag data exists
1562 if ( SupportData.HasCoreOnly || TagData.empty() )
1565 // localize the tag data
1566 char* pTagData = (char*)TagData.data();
1567 const unsigned int tagDataLength = TagData.size();
1568 unsigned int numBytesParsed = 0;
1571 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1573 // determine data byte-length
1574 const char type = *(pTagData - 1);
1575 int destinationLength = 0;
1579 case (Constants::BAM_TAG_TYPE_ASCII) :
1580 case (Constants::BAM_TAG_TYPE_INT8) :
1581 case (Constants::BAM_TAG_TYPE_UINT8) :
1582 destinationLength = 1;
1586 case (Constants::BAM_TAG_TYPE_INT16) :
1587 case (Constants::BAM_TAG_TYPE_UINT16) :
1588 destinationLength = 2;
1592 case (Constants::BAM_TAG_TYPE_INT32) :
1593 case (Constants::BAM_TAG_TYPE_UINT32) :
1594 destinationLength = 4;
1597 // unsupported type for integer destination (float or var-length strings)
1598 case (Constants::BAM_TAG_TYPE_FLOAT) :
1599 case (Constants::BAM_TAG_TYPE_STRING) :
1600 case (Constants::BAM_TAG_TYPE_HEX) :
1601 case (Constants::BAM_TAG_TYPE_ARRAY) :
1602 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1603 << " in integer destination" << endl;
1608 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1613 // store in destination
1615 memcpy(&destination, pTagData, destinationLength);
1619 // tag not found, return failure
1623 /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1624 \brief Retrieves the signed integer value associated with a BAM tag.
1626 \param tag 2-character tag name
1627 \param destination destination for retrieved value
1629 \return \c true if found
1631 bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1632 return GetTag(tag, (uint32_t&)destination);
1635 /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1636 \brief Retrieves the floating-point value associated with a BAM tag.
1638 \param tag 2-character tag name
1639 \param destination destination for retrieved value
1641 \return \c true if found
1643 bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1645 // make sure tag data exists
1646 if ( SupportData.HasCoreOnly || TagData.empty() )
1649 // localize the tag data
1650 char* pTagData = (char*)TagData.data();
1651 const unsigned int tagDataLength = TagData.size();
1652 unsigned int numBytesParsed = 0;
1655 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1657 // determine data byte-length
1658 const char type = *(pTagData - 1);
1659 int destinationLength = 0;
1663 case (Constants::BAM_TAG_TYPE_ASCII) :
1664 case (Constants::BAM_TAG_TYPE_INT8) :
1665 case (Constants::BAM_TAG_TYPE_UINT8) :
1666 destinationLength = 1;
1670 case (Constants::BAM_TAG_TYPE_INT16) :
1671 case (Constants::BAM_TAG_TYPE_UINT16) :
1672 destinationLength = 2;
1676 case (Constants::BAM_TAG_TYPE_FLOAT) :
1677 case (Constants::BAM_TAG_TYPE_INT32) :
1678 case (Constants::BAM_TAG_TYPE_UINT32) :
1679 destinationLength = 4;
1682 // unsupported type (var-length strings)
1683 case (Constants::BAM_TAG_TYPE_STRING) :
1684 case (Constants::BAM_TAG_TYPE_HEX) :
1685 case (Constants::BAM_TAG_TYPE_ARRAY) :
1686 cerr << "BamAlignment ERROR: cannot store tag of type " << type
1687 << " in float destination" << endl;
1692 cerr << "BamAlignment ERROR: unknown tag type encountered: "
1697 // store in destination
1699 memcpy(&destination, pTagData, destinationLength);
1703 // tag not found, return failure
1707 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1708 \brief Retrieves the numeric array data associated with a BAM tag
1710 \param tag 2-character tag name
1711 \param destination destination for retrieved data
1713 \return \c true if found
1715 bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1717 // make sure tag data exists
1718 if ( SupportData.HasCoreOnly || TagData.empty() )
1721 // localize the tag data
1722 char* pTagData = (char*)TagData.data();
1723 const unsigned int tagDataLength = TagData.size();
1724 unsigned int numBytesParsed = 0;
1726 // return false if tag not found
1727 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1730 // check that tag is array type
1731 const char tagType = *(pTagData - 1);
1732 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1733 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1734 << tag << " in array destination" << endl;
1738 // calculate length of each element in tag's array
1739 const char elementType = *pTagData;
1741 int elementLength = 0;
1742 switch ( elementType ) {
1743 case (Constants::BAM_TAG_TYPE_ASCII) :
1744 case (Constants::BAM_TAG_TYPE_INT8) :
1745 case (Constants::BAM_TAG_TYPE_UINT8) :
1746 elementLength = sizeof(uint8_t);
1749 case (Constants::BAM_TAG_TYPE_INT16) :
1750 case (Constants::BAM_TAG_TYPE_UINT16) :
1751 elementLength = sizeof(uint16_t);
1754 case (Constants::BAM_TAG_TYPE_INT32) :
1755 case (Constants::BAM_TAG_TYPE_UINT32) :
1756 elementLength = sizeof(uint32_t);
1759 // unsupported type for integer destination (float or var-length data)
1760 case (Constants::BAM_TAG_TYPE_FLOAT) :
1761 case (Constants::BAM_TAG_TYPE_STRING) :
1762 case (Constants::BAM_TAG_TYPE_HEX) :
1763 case (Constants::BAM_TAG_TYPE_ARRAY) :
1764 cerr << "BamAlignment ERROR: array element type: " << elementType
1765 << " cannot be stored in integer value" << endl;
1770 cerr << "BamAlignment ERROR: unknown element type encountered: "
1771 << elementType << endl;
1775 // get number of elements
1776 int32_t numElements;
1777 memcpy(&numElements, pTagData, sizeof(int32_t));
1779 destination.clear();
1780 destination.reserve(numElements);
1784 for ( int i = 0 ; i < numElements; ++i ) {
1785 memcpy(&value, pTagData, sizeof(uint32_t));
1786 pTagData += sizeof(uint32_t);
1787 destination.push_back(value);
1794 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1795 \brief Retrieves the numeric array data associated with a BAM tag
1797 \param tag 2-character tag name
1798 \param destination destination for retrieved data
1800 \return \c true if found
1802 bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1804 // make sure tag data exists
1805 if ( SupportData.HasCoreOnly || TagData.empty() )
1808 // localize the tag data
1809 char* pTagData = (char*)TagData.data();
1810 const unsigned int tagDataLength = TagData.size();
1811 unsigned int numBytesParsed = 0;
1813 // return false if tag not found
1814 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1817 // check that tag is array type
1818 const char tagType = *(pTagData - 1);
1819 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1820 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1821 << tag << " in array destination" << endl;
1825 // calculate length of each element in tag's array
1826 const char elementType = *pTagData;
1828 int elementLength = 0;
1829 switch ( elementType ) {
1830 case (Constants::BAM_TAG_TYPE_ASCII) :
1831 case (Constants::BAM_TAG_TYPE_INT8) :
1832 case (Constants::BAM_TAG_TYPE_UINT8) :
1833 elementLength = sizeof(uint8_t);
1836 case (Constants::BAM_TAG_TYPE_INT16) :
1837 case (Constants::BAM_TAG_TYPE_UINT16) :
1838 elementLength = sizeof(uint16_t);
1841 case (Constants::BAM_TAG_TYPE_INT32) :
1842 case (Constants::BAM_TAG_TYPE_UINT32) :
1843 elementLength = sizeof(uint32_t);
1846 // unsupported type for integer destination (float or var-length data)
1847 case (Constants::BAM_TAG_TYPE_FLOAT) :
1848 case (Constants::BAM_TAG_TYPE_STRING) :
1849 case (Constants::BAM_TAG_TYPE_HEX) :
1850 case (Constants::BAM_TAG_TYPE_ARRAY) :
1851 cerr << "BamAlignment ERROR: array element type: " << elementType
1852 << " cannot be stored in integer value" << endl;
1857 cerr << "BamAlignment ERROR: unknown element type encountered: "
1858 << elementType << endl;
1862 // get number of elements
1863 int32_t numElements;
1864 memcpy(&numElements, pTagData, sizeof(int32_t));
1866 destination.clear();
1867 destination.reserve(numElements);
1871 for ( int i = 0 ; i < numElements; ++i ) {
1872 memcpy(&value, pTagData, sizeof(int32_t));
1873 pTagData += sizeof(int32_t);
1874 destination.push_back(value);
1882 /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
1883 \brief Retrieves the numeric array data associated with a BAM tag
1885 \param tag 2-character tag name
1886 \param destination destination for retrieved data
1888 \return \c true if found
1890 bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
1892 // make sure tag data exists
1893 if ( SupportData.HasCoreOnly || TagData.empty() )
1896 // localize the tag data
1897 char* pTagData = (char*)TagData.data();
1898 const unsigned int tagDataLength = TagData.size();
1899 unsigned int numBytesParsed = 0;
1901 // return false if tag not found
1902 if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1905 // check that tag is array type
1906 const char tagType = *(pTagData - 1);
1907 if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1908 cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1909 << tag << " in array destination" << endl;
1913 // calculate length of each element in tag's array
1914 const char elementType = *pTagData;
1916 int elementLength = 0;
1917 switch ( elementType ) {
1918 case (Constants::BAM_TAG_TYPE_ASCII) :
1919 case (Constants::BAM_TAG_TYPE_INT8) :
1920 case (Constants::BAM_TAG_TYPE_UINT8) :
1921 elementLength = sizeof(uint8_t);
1924 case (Constants::BAM_TAG_TYPE_INT16) :
1925 case (Constants::BAM_TAG_TYPE_UINT16) :
1926 elementLength = sizeof(uint16_t);
1929 case (Constants::BAM_TAG_TYPE_INT32) :
1930 case (Constants::BAM_TAG_TYPE_UINT32) :
1931 case (Constants::BAM_TAG_TYPE_FLOAT) :
1932 elementLength = sizeof(uint32_t);
1935 // unsupported type for float destination (var-length data)
1936 case (Constants::BAM_TAG_TYPE_STRING) :
1937 case (Constants::BAM_TAG_TYPE_HEX) :
1938 case (Constants::BAM_TAG_TYPE_ARRAY) :
1939 cerr << "BamAlignment ERROR: array element type: " << elementType
1940 << " cannot be stored in float value" << endl;
1945 cerr << "BamAlignment ERROR: unknown element type encountered: "
1946 << elementType << endl;
1950 // get number of elements
1951 int32_t numElements;
1952 memcpy(&numElements, pTagData, sizeof(int32_t));
1954 destination.clear();
1955 destination.reserve(numElements);
1959 for ( int i = 0 ; i < numElements; ++i ) {
1960 memcpy(&value, pTagData, sizeof(float));
1961 pTagData += sizeof(float);
1962 destination.push_back(value);
1969 /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
1970 \brief Retrieves the BAM tag type-code associated with requested tag name.
1972 \param tag 2-character tag name
1973 \param type destination for the retrieved (1-character) tag type
1975 \return \c true if found
1976 \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1978 bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
1980 // make sure tag data exists
1981 if ( SupportData.HasCoreOnly || TagData.empty() )
1984 // localize the tag data
1985 char* pTagData = (char*)TagData.data();
1986 const unsigned int tagDataLength = TagData.size();
1987 unsigned int numBytesParsed = 0;
1990 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1992 // retrieve tag type code
1993 type = *(pTagData - 1);
1995 // validate that type is a proper BAM tag type
1997 case (Constants::BAM_TAG_TYPE_ASCII) :
1998 case (Constants::BAM_TAG_TYPE_INT8) :
1999 case (Constants::BAM_TAG_TYPE_UINT8) :
2000 case (Constants::BAM_TAG_TYPE_INT16) :
2001 case (Constants::BAM_TAG_TYPE_UINT16) :
2002 case (Constants::BAM_TAG_TYPE_INT32) :
2003 case (Constants::BAM_TAG_TYPE_UINT32) :
2004 case (Constants::BAM_TAG_TYPE_FLOAT) :
2005 case (Constants::BAM_TAG_TYPE_STRING) :
2006 case (Constants::BAM_TAG_TYPE_HEX) :
2007 case (Constants::BAM_TAG_TYPE_ARRAY) :
2012 cerr << "BamAlignment ERROR: unknown tag type encountered: "
2018 // tag not found, return failure
2022 /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2023 \brief Returns true if alignment has a record for requested tag.
2024 \param tag 2-character tag name
2025 \return \c true if alignment has a record for tag
2027 bool BamAlignment::HasTag(const std::string& tag) const {
2029 // return false if no tag data present
2030 if ( SupportData.HasCoreOnly || TagData.empty() )
2033 // localize the tag data for lookup
2034 char* pTagData = (char*)TagData.data();
2035 const unsigned int tagDataLength = TagData.size();
2036 unsigned int numBytesParsed = 0;
2038 // if result of tag lookup
2039 return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2042 /*! \fn bool BamAlignment::IsDuplicate(void) const
2043 \return \c true if this read is a PCR duplicate
2045 bool BamAlignment::IsDuplicate(void) const {
2046 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2049 /*! \fn bool BamAlignment::IsFailedQC(void) const
2050 \return \c true if this read failed quality control
2052 bool BamAlignment::IsFailedQC(void) const {
2053 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2056 /*! \fn bool BamAlignment::IsFirstMate(void) const
2057 \return \c true if alignment is first mate on paired-end read
2059 bool BamAlignment::IsFirstMate(void) const {
2060 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2063 /*! \fn bool BamAlignment::IsMapped(void) const
2064 \return \c true if alignment is mapped
2066 bool BamAlignment::IsMapped(void) const {
2067 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2070 /*! \fn bool BamAlignment::IsMateMapped(void) const
2071 \return \c true if alignment's mate is mapped
2073 bool BamAlignment::IsMateMapped(void) const {
2074 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2077 /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2078 \return \c true if alignment's mate mapped to reverse strand
2080 bool BamAlignment::IsMateReverseStrand(void) const {
2081 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2084 /*! \fn bool BamAlignment::IsPaired(void) const
2085 \return \c true if alignment part of paired-end read
2087 bool BamAlignment::IsPaired(void) const {
2088 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2091 /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2092 \return \c true if reported position is primary alignment
2094 bool BamAlignment::IsPrimaryAlignment(void) const {
2095 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2098 /*! \fn bool BamAlignment::IsProperPair(void) const
2099 \return \c true if alignment is part of read that satisfied paired-end resolution
2101 bool BamAlignment::IsProperPair(void) const {
2102 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2105 /*! \fn bool BamAlignment::IsReverseStrand(void) const
2106 \return \c true if alignment mapped to reverse strand
2108 bool BamAlignment::IsReverseStrand(void) const {
2109 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2112 /*! \fn bool BamAlignment::IsSecondMate(void) const
2113 \return \c true if alignment is second mate on read
2115 bool BamAlignment::IsSecondMate(void) const {
2116 return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2119 /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
2122 Checks that tag name & type strings are expected sizes.
2123 \a tag should have length
2124 \a type should have length 1
2126 \param tag BAM tag name
2127 \param type BAM tag type-code
2129 \return \c true if both \a tag and \a type are correct sizes
2131 bool BamAlignment::IsValidSize(const string& tag, const string& type) const {
2132 return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
2133 (type.size() == Constants::BAM_TAG_TYPESIZE);
2136 /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2137 \brief Removes field from BAM tags.
2139 \return \c true if tag was removed successfully (or didn't exist before)
2141 bool BamAlignment::RemoveTag(const std::string& tag) {
2143 // skip if no tag data available
2144 if ( SupportData.HasCoreOnly || TagData.empty() )
2147 // localize the tag data
2148 char* pOriginalTagData = (char*)TagData.data();
2149 char* pTagData = pOriginalTagData;
2150 const unsigned int originalTagDataLength = TagData.size();
2151 unsigned int newTagDataLength = 0;
2152 unsigned int numBytesParsed = 0;
2155 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2157 char newTagData[originalTagDataLength];
2159 // copy original tag data up til desired tag
2161 numBytesParsed -= 3;
2162 const unsigned int beginningTagDataLength = numBytesParsed;
2163 newTagDataLength += beginningTagDataLength;
2164 memcpy(newTagData, pOriginalTagData, numBytesParsed);
2166 // skip to next tag (if tag for removal is last, return true)
2167 const char* pTagStorageType = pTagData + 2;
2169 numBytesParsed += 3;
2170 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2173 // copy everything from current tag (the next one after tag for removal) to end
2174 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2175 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2176 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2178 // save new tag data
2179 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2183 // tag not found, no removal - return failure
2187 /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2188 \brief Sets value of "PCR duplicate" flag to \a ok.
2190 void BamAlignment::SetIsDuplicate(bool ok) {
2191 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2192 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2195 /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2196 \brief Sets "failed quality control" flag to \a ok.
2198 void BamAlignment::SetIsFailedQC(bool ok) {
2199 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2200 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2203 /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2204 \brief Sets "alignment is first mate" flag to \a ok.
2206 void BamAlignment::SetIsFirstMate(bool ok) {
2207 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2208 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2211 /*! \fn void BamAlignment::SetIsMapped(bool ok)
2212 \brief Sets "alignment is mapped" flag to \a ok.
2214 void BamAlignment::SetIsMapped(bool ok) {
2215 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2216 else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2219 /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2220 \brief Sets "alignment's mate is mapped" flag to \a ok.
2222 void BamAlignment::SetIsMateMapped(bool ok) {
2223 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2224 else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2227 /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2228 \brief Complement of using SetIsMateMapped().
2229 \deprecated For sake of symmetry with the query methods
2230 \sa IsMateMapped(), SetIsMateMapped()
2232 void BamAlignment::SetIsMateUnmapped(bool ok) {
2233 SetIsMateMapped(!ok);
2236 /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2237 \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2239 void BamAlignment::SetIsMateReverseStrand(bool ok) {
2240 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2241 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2244 /*! \fn void BamAlignment::SetIsPaired(bool ok)
2245 \brief Sets "alignment part of paired-end read" flag to \a ok.
2247 void BamAlignment::SetIsPaired(bool ok) {
2248 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2249 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2252 /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2253 \brief Sets "position is primary alignment" flag to \a ok.
2255 void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2256 if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2257 else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2260 /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2261 \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2263 void BamAlignment::SetIsProperPair(bool ok) {
2264 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2265 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2268 /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2269 \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2271 void BamAlignment::SetIsReverseStrand(bool ok) {
2272 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2273 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2276 /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2277 \brief Complement of using SetIsPrimaryAlignment().
2278 \deprecated For sake of symmetry with the query methods
2279 \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2281 void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2282 SetIsPrimaryAlignment(!ok);
2285 /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2286 \brief Sets "alignment is second mate on read" flag to \a ok.
2288 void BamAlignment::SetIsSecondMate(bool ok) {
2289 if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2290 else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2293 /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2294 \brief Complement of using SetIsMapped().
2295 \deprecated For sake of symmetry with the query methods
2296 \sa IsMapped(), SetIsMapped()
2298 void BamAlignment::SetIsUnmapped(bool ok) {
2302 /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
2305 Moves to next available tag in tag data string
2307 \param storageType BAM tag type-code that determines how far to move cursor
2308 \param pTagData pointer to current position (cursor) in tag string
2309 \param numBytesParsed report of how many bytes were parsed (cumulatively)
2311 \return \c if storageType was a recognized BAM tag type
2312 \post \a pTagData will point to the byte where the next tag data begins.
2313 \a numBytesParsed will correspond to the cursor's position in the full TagData string.
2315 bool BamAlignment::SkipToNextTag(const char storageType,
2317 unsigned int& numBytesParsed) const
2319 switch (storageType) {
2321 case (Constants::BAM_TAG_TYPE_ASCII) :
2322 case (Constants::BAM_TAG_TYPE_INT8) :
2323 case (Constants::BAM_TAG_TYPE_UINT8) :
2328 case (Constants::BAM_TAG_TYPE_INT16) :
2329 case (Constants::BAM_TAG_TYPE_UINT16) :
2330 numBytesParsed += sizeof(uint16_t);
2331 pTagData += sizeof(uint16_t);
2334 case (Constants::BAM_TAG_TYPE_FLOAT) :
2335 case (Constants::BAM_TAG_TYPE_INT32) :
2336 case (Constants::BAM_TAG_TYPE_UINT32) :
2337 numBytesParsed += sizeof(uint32_t);
2338 pTagData += sizeof(uint32_t);
2341 case (Constants::BAM_TAG_TYPE_STRING) :
2342 case (Constants::BAM_TAG_TYPE_HEX) :
2343 while( *pTagData ) {
2347 // increment for null-terminator
2352 case (Constants::BAM_TAG_TYPE_ARRAY) :
2356 const char arrayType = *pTagData;
2360 // read number of elements
2361 int32_t numElements;
2362 memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
2363 numBytesParsed += sizeof(uint32_t);
2364 pTagData += sizeof(uint32_t);
2366 // calculate number of bytes to skip
2367 int bytesToSkip = 0;
2368 switch (arrayType) {
2369 case (Constants::BAM_TAG_TYPE_INT8) :
2370 case (Constants::BAM_TAG_TYPE_UINT8) :
2371 bytesToSkip = numElements;
2373 case (Constants::BAM_TAG_TYPE_INT16) :
2374 case (Constants::BAM_TAG_TYPE_UINT16) :
2375 bytesToSkip = numElements*sizeof(uint16_t);
2377 case (Constants::BAM_TAG_TYPE_FLOAT) :
2378 case (Constants::BAM_TAG_TYPE_INT32) :
2379 case (Constants::BAM_TAG_TYPE_UINT32) :
2380 bytesToSkip = numElements*sizeof(uint32_t);
2383 cerr << "BamAlignment ERROR: unknown binary array type encountered: "
2384 << arrayType << endl;
2388 // skip binary array contents
2389 numBytesParsed += bytesToSkip;
2390 pTagData += bytesToSkip;
2395 cerr << "BamAlignment ERROR: unknown tag type encountered"
2396 << storageType << endl;