+/*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of uint16_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(uint16_t);
+ char* originalTagData = new char[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const uint16_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
+ &value, sizeof(uint16_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ delete[] originalTagData;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of int16_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(int16_t);
+ char* originalTagData = new char[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const int16_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
+ &value, sizeof(int16_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ delete[] originalTagData;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of uint32_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(uint32_t);
+ char* originalTagData = new char[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const uint32_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
+ &value, sizeof(uint32_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ delete[] originalTagData;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of int32_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(int32_t);
+ char* originalTagData = new char[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const int32_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
+ &value, sizeof(int32_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ delete[] originalTagData;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of float values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(float);
+ char* originalTagData = new char[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const float value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
+ &value, sizeof(float));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ delete[] originalTagData;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool BamAlignment::BuildCharData(void)
+ \brief Populates alignment string fields (read name, bases, qualities, tag data).
+
+ An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
+ Using that method makes parsing much quicker when only positional data is required.
+
+ However, if you later want to access the character data fields from such an alignment,
+ use this method to populate those fields. Provides ability to do 'lazy evaluation' of
+ alignment parsing.
+
+ \return \c true if character data populated successfully (or was already available to begin with)
+*/
+bool BamAlignment::BuildCharData(void) {
+
+ // skip if char data already parsed
+ if ( !SupportData.HasCoreOnly )
+ return true;
+
+ // check system endianness
+ bool IsBigEndian = BamTools::SystemIsBigEndian();
+
+ // calculate character lengths/offsets
+ const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+ const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
+ const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
+ const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
+ const unsigned int tagDataLength = dataLength - tagDataOffset;
+
+ // check offsets to see what char data exists
+ const bool hasSeqData = ( seqDataOffset < dataLength );
+ const bool hasQualData = ( qualDataOffset < dataLength );
+ const bool hasTagData = ( tagDataOffset < dataLength );
+
+ // set up char buffers
+ const char* allCharData = SupportData.AllCharData.data();
+ const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
+ const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
+ char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
+
+ // store alignment name (relies on null char in name as terminator)
+ Name.assign((const char*)(allCharData));
+
+ // save query sequence
+ QueryBases.clear();
+ if ( hasSeqData ) {
+ QueryBases.reserve(SupportData.QuerySequenceLength);
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
+ char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+ QueryBases.append(1, singleBase);
+ }
+ }
+
+ // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
+ Qualities.clear();
+ if ( hasQualData ) {
+ Qualities.reserve(SupportData.QuerySequenceLength);
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
+ char singleQuality = (char)(qualData[i]+33);
+ Qualities.append(1, singleQuality);
+ }
+ }
+
+ // clear previous AlignedBases
+ AlignedBases.clear();
+
+ // if QueryBases has data, build AlignedBases using CIGAR data
+ // otherwise, AlignedBases will remain empty (this case IS allowed)
+ if ( !QueryBases.empty() ) {
+
+ // resize AlignedBases
+ AlignedBases.reserve(SupportData.QuerySequenceLength);
+
+ // iterate over CigarOps
+ int k = 0;
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+ const CigarOp& op = (*cigarIter);
+
+ switch (op.Type) {
+
+ // for 'M', 'I', '=', 'X' - write bases
+ case (Constants::BAM_CIGAR_MATCH_CHAR) :
+ case (Constants::BAM_CIGAR_INS_CHAR) :
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
+ AlignedBases.append(QueryBases.substr(k, op.Length));
+ // fall through
+
+ // for 'S' - soft clip, do not write bases
+ // but increment placeholder 'k'
+ case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
+ k += op.Length;
+ break;
+
+ // for 'D' - write gap character
+ case (Constants::BAM_CIGAR_DEL_CHAR) :
+ AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
+ break;
+
+ // for 'P' - write padding character
+ case (Constants::BAM_CIGAR_PAD_CHAR) :
+ AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
+ break;
+
+ // for 'N' - write N's, skip bases in original query sequence
+ case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
+ AlignedBases.append( op.Length, Constants::BAM_DNA_N );
+ break;
+
+ // for 'H' - hard clip, do nothing to AlignedBases, move to next op
+ case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
+ break;
+
+ // shouldn't get here
+ default:
+ cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
+ << op.Type << endl;
+ exit(1);
+ }
+ }
+ }
+
+ // save tag data
+ TagData.clear();
+ if ( hasTagData ) {
+ if ( IsBigEndian ) {
+ int i = 0;
+ while ( (unsigned int)i < tagDataLength ) {
+
+ i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
+ const char type = tagData[i]; // get tag type at position i
+ ++i; // move i past tag type
+
+ switch (type) {
+
+ case(Constants::BAM_TAG_TYPE_ASCII) :
+ case(Constants::BAM_TAG_TYPE_INT8) :
+ case(Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian swapping necessary for single-byte data
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_INT16) :
+ case(Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_FLOAT) :
+ case(Constants::BAM_TAG_TYPE_INT32) :
+ case(Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_HEX) :
+ case(Constants::BAM_TAG_TYPE_STRING) :
+ // no endian swapping necessary for hex-string/string data
+ while ( tagData[i] )
+ ++i;
+ // increment one more for null terminator
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = tagData[i];
+ ++i;
+
+ // swap endian-ness of number of elements in place, then retrieve for loop
+ BamTools::SwapEndian_32p(&tagData[i]);
+ int32_t numElements;
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ // swap endian-ness of array elements
+ for ( int j = 0; j < numElements; ++j ) {
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian-swapping necessary
+ ++i;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+ default:
+ // error case
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
+ << arrayType << endl;
+ return false;
+ }
+ }
+
+ break;
+ }
+
+ // shouldn't get here
+ default :
+ cerr << "BamAlignment ERROR: invalid tag value type: "
+ << type << endl;
+ exit(1);
+ }
+ }
+ }
+
+ // store tagData in alignment
+ TagData.resize(tagDataLength);
+ memcpy((char*)TagData.data(), tagData, tagDataLength);
+ }
+
+ // clear the core-only flag
+ SupportData.HasCoreOnly = false;
+
+ // return success
+ return true;
+}
+
+/*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
+ \brief Edits a BAM tag field containing string data.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param type 1-character tag type (must be "Z" or "H")
+ \param value string data to store
+
+ \return \c true if the tag was modified/created successfully
+
+ \sa BamAlignment::RemoveTag()
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {