# will result in a user-defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.
-ALIASES =
+ALIASES = samSpecURL=http://samtools.sourceforge.net/SAM1.pdf
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
# sources only. Doxygen will then generate output that is more tailored for C.
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
#include <cstdlib>
#include <cstring>
#include <exception>
+#include <iostream>
#include <map>
#include <utility>
using namespace std;
case (Constants::BAM_TAG_TYPE_INT16) :
case (Constants::BAM_TAG_TYPE_UINT16) :
- numBytesParsed += 2;
- pTagData += 2;
+ numBytesParsed += sizeof(uint16_t);
+ pTagData += sizeof(uint16_t);
break;
case (Constants::BAM_TAG_TYPE_FLOAT) :
case (Constants::BAM_TAG_TYPE_INT32) :
case (Constants::BAM_TAG_TYPE_UINT32) :
- numBytesParsed += 4;
- pTagData += 4;
+ numBytesParsed += sizeof(uint32_t);
+ pTagData += sizeof(uint32_t);
break;
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
- while(*pTagData) {
+ while( *pTagData ) {
++numBytesParsed;
++pTagData;
}
++pTagData;
break;
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = *pTagData;
+ ++numBytesParsed;
+ ++pTagData;
+
+ // read number of elements
+ int32_t numElements;
+ memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
+ numBytesParsed += sizeof(uint32_t);
+ pTagData += sizeof(uint32_t);
+
+ // calculate number of bytes to skip
+ int bytesToSkip = 0;
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ bytesToSkip = numElements;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ bytesToSkip = numElements*sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ bytesToSkip = numElements*sizeof(uint32_t);
+ break;
+ default:
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
+ << arrayType << endl;
+ return false;
+ }
+
+ // skip binary array contents
+ numBytesParsed += bytesToSkip;
+ pTagData += bytesToSkip;
+ break;
+ }
+
default:
- // error case
- fprintf(stderr, "BamAlignment ERROR: unknown tag type encountered: [%c]\n", storageType);
+ cerr << "BamAlignment ERROR: unknown tag type encountered"
+ << storageType << endl;
return false;
}
\param value string data to store
\return \c true if the \b new tag was added successfully
-
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
// validate tag/type size & that type is OK for string value
if ( !Internal::IsValidSize(tag, type) ) return false;
if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
- type.at(0) != Constants::BAM_TAG_TYPE_HEX )
+ type.at(0) != Constants::BAM_TAG_TYPE_HEX
+ )
+ {
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "f", "Z", or "H")
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
\param value unsigned int data to store
\return \c true if the \b new tag was added successfully
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
// validate tag/type size & that type is OK for uint32_t value
if ( !Internal::IsValidSize(tag, type) ) return false;
- if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
- type.at(0) == Constants::BAM_TAG_TYPE_HEX )
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
+ )
+ {
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "f", "Z", or "H")
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
\param value signed int data to store
\return \c true if the \b new tag was added successfully
-
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
return AddTag(tag, type, (const uint32_t&)value);
Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "Z" or "H")
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
\param value float data to store
\return \c true if the \b new tag was added successfully
-
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
// validate tag/type size & that type is OK for float value
if ( !Internal::IsValidSize(tag, type) ) return false;
if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
- type.at(0) == Constants::BAM_TAG_TYPE_HEX )
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
+ )
+ {
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
return true;
}
+/*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of uint8_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(uint8_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const uint8_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
+ &value, sizeof(uint8_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of int8_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(int8_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const int8_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
+ &value, sizeof(int8_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of uint16_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(uint16_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const uint16_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
+ &value, sizeof(uint16_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of int16_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(int16_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const int16_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
+ &value, sizeof(int16_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of uint32_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(uint32_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const uint32_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
+ &value, sizeof(uint32_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of int32_t values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(int32_t);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const int32_t value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
+ &value, sizeof(int32_t));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
+/*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
+ \brief Adds a numeric array field to the BAM tags.
+
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
+
+ \param tag 2-character tag name
+ \param values vector of float values to store
+
+ \return \c true if the \b new tag was added successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
+
+ // skip if core data not parsed
+ if ( SupportData.HasCoreOnly ) return false;
+
+ // check for valid tag length
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag already exists, return false
+ // use EditTag explicitly instead
+ if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // build new tag's base information
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
+ newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
+
+ // add number of array elements to newTagBase
+ const int32_t numElements = values.size();
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
+
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
+ const int newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(float);
+ char originalTagData[newTagDataLength];
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+
+ // write newTagBase (removes old null term)
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+
+ // add vector elements to tag
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ const float value = values.at(i);
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
+ &value, sizeof(float));
+ }
+
+ // store temp buffer back in TagData
+ const char* newTagData = (const char*)originalTagData;
+ TagData.assign(newTagData, newTagDataLength);
+
+ // return success
+ return true;
+}
+
/*! \fn bool BamAlignment::BuildCharData(void)
\brief Populates alignment string fields (read name, bases, qualities, tag data).
switch (op.Type) {
- // for 'M', 'I' - write bases
- case (Constants::BAM_CIGAR_MATCH_CHAR) :
- case (Constants::BAM_CIGAR_INS_CHAR) :
+ // for 'M', 'I', '=', 'X' - write bases
+ case (Constants::BAM_CIGAR_MATCH_CHAR) :
+ case (Constants::BAM_CIGAR_INS_CHAR) :
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
AlignedBases.append(QueryBases.substr(k, op.Length));
// fall through
// shouldn't get here
default:
- fprintf(stderr, "BamAlignment ERROR: invalid CIGAR operation type: %c\n", op.Type);
+ cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
+ << op.Type << endl;
exit(1);
}
}
case(Constants::BAM_TAG_TYPE_ASCII) :
case(Constants::BAM_TAG_TYPE_INT8) :
case(Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian swapping necessary for single-byte data
++i;
break;
case(Constants::BAM_TAG_TYPE_HEX) :
case(Constants::BAM_TAG_TYPE_STRING) :
// no endian swapping necessary for hex-string/string data
- while (tagData[i]) { ++i; }
+ while ( tagData[i] )
+ ++i;
// increment one more for null terminator
++i;
break;
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = tagData[i];
+ ++i;
+
+ // swap endian-ness of number of elements in place, then retrieve for loop
+ BamTools::SwapEndian_32p(&tagData[i]);
+ int32_t numElements;
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ // swap endian-ness of array elements
+ for ( int j = 0; j < numElements; ++j ) {
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian-swapping necessary
+ ++i;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+ default:
+ // error case
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
+ << arrayType << endl;
+ return false;
+ }
+ }
+
+ break;
+ }
+
// shouldn't get here
default :
- fprintf(stderr, "BamAlignment ERROR: invalid tag value type: %c\n", type);
+ cerr << "BamAlignment ERROR: invalid tag value type: "
+ << type << endl;
exit(1);
}
}
\return \c true if the tag was modified/created successfully
\sa BamAlignment::RemoveTag()
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
unsigned int newTagDataLength = 0;
unsigned int numBytesParsed = 0;
- // if tag found, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
// make sure array is more than big enough
newTagDataLength += beginningTagDataLength;
memcpy(newTagData, pOriginalTagData, numBytesParsed);
- // copy new VALUE in place of current tag data
+ // copy new @value in place of current tag data
const unsigned int dataLength = strlen(value.c_str());
memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
If \a tag does not exist, a new entry is created.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "f", "Z", or "H")
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
\param value unsigned integer data to store
\return \c true if the tag was modified/created successfully
\sa BamAlignment::RemoveTag()
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
// validate tag/type size & that type is OK for uint32_t value
if ( !Internal::IsValidSize(tag, type) ) return false;
- if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
- type.at(0) == Constants::BAM_TAG_TYPE_HEX )
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
+ )
+ {
return false;
+ }
// localize the tag data
char* pOriginalTagData = (char*)TagData.data();
unsigned int newTagDataLength = 0;
unsigned int numBytesParsed = 0;
- // if tag found, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
// make sure array is more than big enough
newTagDataLength += beginningTagDataLength;
memcpy(newTagData, pOriginalTagData, numBytesParsed);
- // copy new VALUE in place of current tag data
+ // copy new @value in place of current tag data
union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
un.value = value;
memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
If \a tag does not exist, a new entry is created.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "f", "Z", or "H")
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
\param value signed integer data to store
\return \c true if the tag was modified/created successfully
\sa BamAlignment::RemoveTag()
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
return EditTag(tag, type, (const uint32_t&)value);
If \a tag does not exist, a new entry is created.
\param tag 2-character tag name
- \param type 1-character tag type (must NOT be "Z" or "H")
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
\param value float data to store
\return \c true if the tag was modified/created successfully
\sa BamAlignment::RemoveTag()
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
// validate tag/type size & that type is OK for float value
if ( !Internal::IsValidSize(tag, type) ) return false;
if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
- type.at(0) == Constants::BAM_TAG_TYPE_HEX )
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
+ )
+ {
return false;
+ }
// localize the tag data
char* pOriginalTagData = (char*)TagData.data();
unsigned int newTagDataLength = 0;
unsigned int numBytesParsed = 0;
- // if tag found, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
// make sure array is more than big enough
newTagDataLength += beginningTagDataLength;
memcpy(newTagData, pOriginalTagData, numBytesParsed);
- // copy new VALUE in place of current tag data
+ // copy new @value in place of current tag data
union { float value; char valueBuffer[sizeof(float)]; } un;
un.value = value;
memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
else return AddTag(tag, type, value);
}
+/*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of uint8_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of int8_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of uint16_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of int16_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of uint32_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of int32_t values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
+/*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
+ \brief Edits a BAM tag field containing a numeric array.
+
+ If \a tag does not exist, a new entry is created.
+
+ \param tag 2-character tag name
+ \param value vector of float values to store
+
+ \return \c true if the tag was modified/created successfully
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
+
+ // can't do anything if TagData not parsed
+ if ( SupportData.HasCoreOnly )
+ return false;
+
+ // remove existing tag if present
+ if ( HasTag(tag) )
+ RemoveTag(tag);
+
+ // add tag record with new values
+ return AddTag(tag, values);
+}
+
/*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
\brief Retrieves value of edit distance tag ("NM").
const unsigned int tagDataLength = TagData.size();
unsigned int numBytesParsed = 0;
- // if tag found, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
const unsigned int dataLength = strlen(pTagData);
destination.clear();
const unsigned int tagDataLength = TagData.size();
unsigned int numBytesParsed = 0;
- // if tag found, determine data byte-length, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
// determine data byte-length
case (Constants::BAM_TAG_TYPE_FLOAT) :
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
- fprintf(stderr, "BamAlignment ERROR: cannot store tag of type %c in integer destination\n", type);
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
+ << " in integer destination" << endl;
return false;
// unknown tag type
default:
- fprintf(stderr, "BamAlignment ERROR: unknown tag type encountered: [%c]\n", type);
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
+ << type << endl;
return false;
}
const unsigned int tagDataLength = TagData.size();
unsigned int numBytesParsed = 0;
- // if tag found, determine data byte-length, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
// determine data byte-length
// unsupported type (var-length strings)
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
- fprintf(stderr, "BamAlignment ERROR: cannot store tag of type %c in float destination\n", type);
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
+ << " in float destination" << endl;
return false;
// unknown tag type
default:
- fprintf(stderr, "BamAlignment ERROR: unknown tag type encountered: [%c]\n", type);
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
+ << type << endl;
return false;
}
return false;
}
+/*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
+ \brief Retrieves the numeric array data associated with a BAM tag
+
+ \param tag 2-character tag name
+ \param destination destination for retrieved data
+
+ \return \c true if found
+*/
+bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
+
+ // make sure tag data exists
+ if ( SupportData.HasCoreOnly || TagData.empty() )
+ return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // return false if tag not found
+ if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // check that tag is array type
+ const char tagType = *(pTagData - 1);
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
+ << tag << " in array destination" << endl;
+ return false;
+ }
+
+ // calculate length of each element in tag's array
+ const char elementType = *pTagData;
+ ++pTagData;
+ int elementLength = 0;
+ switch ( elementType ) {
+ case (Constants::BAM_TAG_TYPE_ASCII) :
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ elementLength = sizeof(uint8_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ elementLength = sizeof(uint16_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ elementLength = sizeof(uint32_t);
+ break;
+
+ // unsupported type for integer destination (float or var-length data)
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_STRING) :
+ case (Constants::BAM_TAG_TYPE_HEX) :
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "BamAlignment ERROR: array element type: " << elementType
+ << " cannot be stored in integer value" << endl;
+ return false;
+
+ // unknown tag type
+ default:
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
+ << elementType << endl;
+ return false;
+ }
+
+ // get number of elements
+ int32_t numElements;
+ memcpy(&numElements, pTagData, sizeof(int32_t));
+ pTagData += 4;
+ destination.clear();
+ destination.reserve(numElements);
+
+ // read in elements
+ uint32_t value;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ memcpy(&value, pTagData, sizeof(uint32_t));
+ pTagData += sizeof(uint32_t);
+ destination.push_back(value);
+ }
+
+ // return success
+ return false;
+}
+
+/*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
+ \brief Retrieves the numeric array data associated with a BAM tag
+
+ \param tag 2-character tag name
+ \param destination destination for retrieved data
+
+ \return \c true if found
+*/
+bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
+
+ // make sure tag data exists
+ if ( SupportData.HasCoreOnly || TagData.empty() )
+ return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // return false if tag not found
+ if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // check that tag is array type
+ const char tagType = *(pTagData - 1);
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
+ << tag << " in array destination" << endl;
+ return false;
+ }
+
+ // calculate length of each element in tag's array
+ const char elementType = *pTagData;
+ ++pTagData;
+ int elementLength = 0;
+ switch ( elementType ) {
+ case (Constants::BAM_TAG_TYPE_ASCII) :
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ elementLength = sizeof(uint8_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ elementLength = sizeof(uint16_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ elementLength = sizeof(uint32_t);
+ break;
+
+ // unsupported type for integer destination (float or var-length data)
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_STRING) :
+ case (Constants::BAM_TAG_TYPE_HEX) :
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "BamAlignment ERROR: array element type: " << elementType
+ << " cannot be stored in integer value" << endl;
+ return false;
+
+ // unknown tag type
+ default:
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
+ << elementType << endl;
+ return false;
+ }
+
+ // get number of elements
+ int32_t numElements;
+ memcpy(&numElements, pTagData, sizeof(int32_t));
+ pTagData += 4;
+ destination.clear();
+ destination.reserve(numElements);
+
+ // read in elements
+ int32_t value;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ memcpy(&value, pTagData, sizeof(int32_t));
+ pTagData += sizeof(int32_t);
+ destination.push_back(value);
+ }
+
+ // return success
+ return false;
+
+}
+
+/*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
+ \brief Retrieves the numeric array data associated with a BAM tag
+
+ \param tag 2-character tag name
+ \param destination destination for retrieved data
+
+ \return \c true if found
+*/
+bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
+
+ // make sure tag data exists
+ if ( SupportData.HasCoreOnly || TagData.empty() )
+ return false;
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // return false if tag not found
+ if ( !Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ return false;
+
+ // check that tag is array type
+ const char tagType = *(pTagData - 1);
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
+ << tag << " in array destination" << endl;
+ return false;
+ }
+
+ // calculate length of each element in tag's array
+ const char elementType = *pTagData;
+ ++pTagData;
+ int elementLength = 0;
+ switch ( elementType ) {
+ case (Constants::BAM_TAG_TYPE_ASCII) :
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ elementLength = sizeof(uint8_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ elementLength = sizeof(uint16_t);
+ break;
+
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ elementLength = sizeof(uint32_t);
+ break;
+
+ // unsupported type for float destination (var-length data)
+ case (Constants::BAM_TAG_TYPE_STRING) :
+ case (Constants::BAM_TAG_TYPE_HEX) :
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "BamAlignment ERROR: array element type: " << elementType
+ << " cannot be stored in float value" << endl;
+ return false;
+
+ // unknown tag type
+ default:
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
+ << elementType << endl;
+ return false;
+ }
+
+ // get number of elements
+ int32_t numElements;
+ memcpy(&numElements, pTagData, sizeof(int32_t));
+ pTagData += 4;
+ destination.clear();
+ destination.reserve(numElements);
+
+ // read in elements
+ float value;
+ for ( int i = 0 ; i < numElements; ++i ) {
+ memcpy(&value, pTagData, sizeof(float));
+ pTagData += sizeof(float);
+ destination.push_back(value);
+ }
+
+ // return success
+ return false;
+}
+
/*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
\brief Retrieves the BAM tag type-code associated with requested tag name.
\param type destination for the retrieved (1-character) tag type
\return \c true if found
-
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
- for more details on reserved tag names, supported tag types, etc.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
*/
bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
case (Constants::BAM_TAG_TYPE_FLOAT) :
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
return true;
// unknown tag type
default:
- fprintf(stderr, "BamAlignment ERROR: unknown tag type encountered: [%c]\n", type);
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
+ << type << endl;
return false;
}
}
return false;
}
+/*! \fn bool BamAlignment::HasTag(const std::string& tag) const
+ \brief Returns true if alignment has a record for requested tag.
+ \param tag 2-character tag name
+ \return \c true if alignment has a record for tag
+*/
+bool BamAlignment::HasTag(const std::string& tag) const {
+
+ // return false if no tag data present
+ if ( SupportData.HasCoreOnly || TagData.empty() )
+ return false;
+
+ // localize the tag data for lookup
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if result of tag lookup
+ return Internal::FindTag(tag, pTagData, tagDataLength, numBytesParsed);
+}
+
/*! \fn bool BamAlignment::IsDuplicate(void) const
\return \c true if this read is a PCR duplicate
*/
*/
bool BamAlignment::RemoveTag(const std::string& tag) {
- // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
- // also, return false if no data present to remove
+ // skip if no tag data available
if ( SupportData.HasCoreOnly || TagData.empty() )
return false;
unsigned int newTagDataLength = 0;
unsigned int numBytesParsed = 0;
- // if tag found, store data in readGroup, return success
+ // if tag found
if ( Internal::FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
char newTagData[originalTagDataLength];
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);
bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);
bool AddTag(const std::string& tag, const std::string& type, const float& value);
-
+
+ // adds a "binary array" tag
+ bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
+ bool AddTag(const std::string& tag, const std::vector<float>& values);
+
// edits a tag
bool EditTag(const std::string& tag, const std::string& type, const std::string& value);
bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);
bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);
bool EditTag(const std::string& tag, const std::string& type, const float& value);
+ // edits a "binary array" tag
+ bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
+ bool EditTag(const std::string& tag, const std::vector<float>& values);
+
// retrieves data for a tag
bool GetTag(const std::string& tag, std::string& destination) const;
bool GetTag(const std::string& tag, uint32_t& destination) const;
bool GetTag(const std::string& tag, int32_t& destination) const;
bool GetTag(const std::string& tag, float& destination) const;
+ // retrieves data for a "binary array" tag
+ bool GetTag(const std::string& tag, std::vector<uint32_t>& destination) const;
+ bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;
+ bool GetTag(const std::string& tag, std::vector<float>& destination) const;
+
// retrieves the BAM tag-type character for a tag
bool GetTagType(const std::string& tag, char& type) const;
bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag
bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag
+ // returns true if alignment has a record for this tag name
+ bool HasTag(const std::string& tag) const;
+
// removes a tag
bool RemoveTag(const std::string& tag);
uint16_t Bin; // BAM (standard) index bin number for this alignment
uint16_t MapQuality; // mapping quality score
uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)
- std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
+ std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
int32_t MatePosition; // position (0-based) where alignment's mate starts
int32_t InsertSize; // mate-pair insert size
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides basic constants for handling BAM files.
// ***************************************************************************
const int BAM_CORE_BUFFER_SIZE = 8;
// BAM alignment flags
-const int BAM_ALIGNMENT_PAIRED = 1;
-const int BAM_ALIGNMENT_PROPER_PAIR = 2;
-const int BAM_ALIGNMENT_UNMAPPED = 4;
-const int BAM_ALIGNMENT_MATE_UNMAPPED = 8;
-const int BAM_ALIGNMENT_REVERSE_STRAND = 16;
-const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 32;
-const int BAM_ALIGNMENT_READ_1 = 64;
-const int BAM_ALIGNMENT_READ_2 = 128;
-const int BAM_ALIGNMENT_SECONDARY = 256;
-const int BAM_ALIGNMENT_QC_FAILED = 512;
-const int BAM_ALIGNMENT_DUPLICATE = 1024;
+const int BAM_ALIGNMENT_PAIRED = 0x0001;
+const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002;
+const int BAM_ALIGNMENT_UNMAPPED = 0x0004;
+const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008;
+const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010;
+const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020;
+const int BAM_ALIGNMENT_READ_1 = 0x0040;
+const int BAM_ALIGNMENT_READ_2 = 0x0080;
+const int BAM_ALIGNMENT_SECONDARY = 0x0100;
+const int BAM_ALIGNMENT_QC_FAILED = 0x0200;
+const int BAM_ALIGNMENT_DUPLICATE = 0x0400;
// CIGAR constants
-const char* const BAM_CIGAR_LOOKUP = "MIDNSHP";
+const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
const int BAM_CIGAR_MATCH = 0;
const int BAM_CIGAR_INS = 1;
const int BAM_CIGAR_DEL = 2;
const int BAM_CIGAR_SOFTCLIP = 4;
const int BAM_CIGAR_HARDCLIP = 5;
const int BAM_CIGAR_PAD = 6;
-
-// TODO: Add support for 'X' and '=' ops
+const int BAM_CIGAR_SEQMATCH = 7;
+const int BAM_CIGAR_MISMATCH = 8;
const char BAM_CIGAR_MATCH_CHAR = 'M';
const char BAM_CIGAR_INS_CHAR = 'I';
const char BAM_CIGAR_SOFTCLIP_CHAR = 'S';
const char BAM_CIGAR_HARDCLIP_CHAR = 'H';
const char BAM_CIGAR_PAD_CHAR = 'P';
+const char BAM_CIGAR_SEQMATCH_CHAR = '=';
+const char BAM_CIGAR_MISMATCH_CHAR = 'X';
const int BAM_CIGAR_SHIFT = 4;
const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
const char BAM_TAG_TYPE_FLOAT = 'f';
const char BAM_TAG_TYPE_STRING = 'Z';
const char BAM_TAG_TYPE_HEX = 'H';
+const char BAM_TAG_TYPE_ARRAY = 'B';
const size_t BAM_TAG_TAGSIZE = 2;
const size_t BAM_TAG_TYPESIZE = 1;
+const int BAM_TAG_ARRAYBASE_SIZE = 8;
// DNA bases
const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
BamReader.cpp
BamWriter.cpp
SamHeader.cpp
+ SamProgram.cpp
+ SamProgramChain.cpp
SamReadGroup.cpp
SamReadGroupDictionary.cpp
SamSequence.cpp
# create main BamTools API shared library
add_library( BamTools SHARED ${BamToolsAPISources} )
-set_target_properties( BamTools PROPERTIES SOVERSION "1.0.0" )
+set_target_properties( BamTools PROPERTIES SOVERSION "1.0.1" )
set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" )
# create main BamTools API static library
ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgram.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgramChain.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamSequence.h ${ApiIncludeDir})
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides constants for SAM header
// ***************************************************************************
namespace BamTools {
namespace Constants {
+// basic char constants used in SAM format
const char SAM_COLON = ':';
const char SAM_EQUAL = '=';
const char SAM_PERIOD = '.';
// SQ entries
const std::string SAM_SQ_BEGIN_TOKEN = "@SQ";
-const std::string SAM_SQ_NAME_TAG = "SN";
-const std::string SAM_SQ_LENGTH_TAG = "LN";
const std::string SAM_SQ_ASSEMBLYID_TAG = "AS";
-const std::string SAM_SQ_URI_TAG = "UR";
const std::string SAM_SQ_CHECKSUM_TAG = "M5";
+const std::string SAM_SQ_LENGTH_TAG = "LN";
+const std::string SAM_SQ_NAME_TAG = "SN";
const std::string SAM_SQ_SPECIES_TAG = "SP";
+const std::string SAM_SQ_URI_TAG = "UR";
// RG entries
const std::string SAM_RG_BEGIN_TOKEN = "@RG";
+const std::string SAM_RG_DESCRIPTION_TAG = "DS";
+const std::string SAM_RG_FLOWORDER_TAG = "FO";
const std::string SAM_RG_ID_TAG = "ID";
-const std::string SAM_RG_SAMPLE_TAG = "SM";
+const std::string SAM_RG_KEYSEQUENCE_TAG = "KS";
const std::string SAM_RG_LIBRARY_TAG = "LB";
-const std::string SAM_RG_DESCRIPTION_TAG = "DS";
const std::string SAM_RG_PLATFORMUNIT_TAG = "PU";
const std::string SAM_RG_PREDICTEDINSERTSIZE_TAG = "PI";
-const std::string SAM_RG_SEQCENTER_TAG = "CN";
const std::string SAM_RG_PRODUCTIONDATE_TAG = "DT";
+const std::string SAM_RG_PROGRAM_TAG = "PG";
+const std::string SAM_RG_SAMPLE_TAG = "SM";
+const std::string SAM_RG_SEQCENTER_TAG = "CN";
const std::string SAM_RG_SEQTECHNOLOGY_TAG = "PL";
// PG entries
-const std::string SAM_PG_BEGIN_TOKEN = "@PG";
-const std::string SAM_PG_NAME_TAG = "ID";
-const std::string SAM_PG_VERSION_TAG = "VN";
-const std::string SAM_PG_COMMANDLINE_TAG = "CL";
+const std::string SAM_PG_BEGIN_TOKEN = "@PG";
+const std::string SAM_PG_COMMANDLINE_TAG = "CL";
+const std::string SAM_PG_ID_TAG = "ID";
+const std::string SAM_PG_NAME_TAG = "PN";
+const std::string SAM_PG_PREVIOUSPROGRAM_TAG = "PP";
+const std::string SAM_PG_VERSION_TAG = "VN";
// CO entries
const std::string SAM_CO_BEGIN_TOKEN = "@CO";
// HD:SO values
const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate";
const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname";
+const std::string SAM_HD_SORTORDER_UNKNOWN = "unknown";
const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted";
// HD:GO values
const unsigned int SAM_SQ_LENGTH_MIN = 1;
const unsigned int SAM_SQ_LENGTH_MAX = 536870911; // 2^29 - 1
-// --------------
// RG:PL values
-
-// 454
-const std::string SAM_RG_SEQTECHNOLOGY_454 = "454";
-const std::string SAM_RG_SEQTECHNOLOGY_LS454_LOWER = "ls454";
-const std::string SAM_RG_SEQTECHNOLOGY_LS454_UPPER = "LS454";
-
-// Helicos
-const std::string SAM_RG_SEQTECHNOLOGY_HELICOS_LOWER = "helicos";
-const std::string SAM_RG_SEQTECHNOLOGY_HELICOS_UPPER = "HELICOS";
-
-// Illumina
-const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA_LOWER = "illumina";
-const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA_UPPER = "ILLUMINA";
-
-// PacBio
-const std::string SAM_RG_SEQTECHNOLOGY_PACBIO_LOWER = "pacbio";
-const std::string SAM_RG_SEQTECHNOLOGY_PACBIO_UPPER = "PACBIO";
-
-// SOLiD
-const std::string SAM_RG_SEQTECHNOLOGY_SOLID_LOWER = "solid";
-const std::string SAM_RG_SEQTECHNOLOGY_SOLID_UPPER = "SOLID";
+const std::string SAM_RG_SEQTECHNOLOGY_CAPILLARY = "CAPILLARY";
+const std::string SAM_RG_SEQTECHNOLOGY_HELICOS = "HELICOS";
+const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA = "ILLUMINA";
+const std::string SAM_RG_SEQTECHNOLOGY_IONTORRENT = "IONTORRENT";
+const std::string SAM_RG_SEQTECHNOLOGY_LS454 = "LS454";
+const std::string SAM_RG_SEQTECHNOLOGY_PACBIO = "PACBIO";
+const std::string SAM_RG_SEQTECHNOLOGY_SOLID = "SOLID";
} // namespace Constants
} // namespace BamTools
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM header data fields.
// ***************************************************************************
+#include <api/SamConstants.h>
#include <api/SamHeader.h>
#include <api/internal/SamFormatParser_p.h>
#include <api/internal/SamFormatPrinter_p.h>
Provides direct read/write access to the SAM header data fields.
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
+ \sa \samSpecURL
*/
/*! \var SamHeader::Version
\brief corresponds to \@HD VN:\<Version\>
+
+ Required for valid SAM header, if @HD record is present.
*/
/*! \var SamHeader::SortOrder
\brief corresponds to \@HD SO:\<SortOrder\>
*/
SamHeader::SamHeader(const std::string& headerText)
: Version("")
- , SortOrder("")
+ , SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN)
, GroupOrder("")
- , ProgramName("")
- , ProgramVersion("")
- , ProgramCommandLine("")
{
SamFormatParser parser(*this);
parser.Parse(headerText);
, GroupOrder(other.GroupOrder)
, Sequences(other.Sequences)
, ReadGroups(other.ReadGroups)
- , ProgramName(other.ProgramName)
- , ProgramVersion(other.ProgramVersion)
- , ProgramCommandLine(other.ProgramCommandLine)
+ , Programs(other.Programs)
{ }
/*! \fn SamHeader::~SamHeader(void)
GroupOrder.clear();
Sequences.Clear();
ReadGroups.Clear();
- ProgramName.clear();
- ProgramVersion.clear();
- ProgramCommandLine.clear();
+ Programs.Clear();
Comments.clear();
}
return (!ReadGroups.IsEmpty());
}
-/*! \fn bool SamHeader::HasProgramName(void) const
- \brief Returns \c true if header contains \@PG ID:\<ProgramName\>
-*/
-bool SamHeader::HasProgramName(void) const {
- return (!ProgramName.empty());
-}
-
-/*! \fn bool SamHeader::HasProgramVersion(void) const
- \brief Returns \c true if header contains \@PG VN:\<ProgramVersion\>
-*/
-bool SamHeader::HasProgramVersion(void) const {
- return (!ProgramVersion.empty());
-}
-
-/*! \fn bool SamHeader::HasProgramCommandLine(void) const
- \brief Returns \c true if header contains \@PG CL:\<ProgramCommandLine\>
+/*! \fn bool SamHeader::HasPrograms(void) const
+ \brief Returns \c true if header contains any \@PG entries
*/
-bool SamHeader::HasProgramCommandLine(void) const {
- return (!ProgramCommandLine.empty());
+bool SamHeader::HasPrograms(void) const {
+ return (!Programs.IsEmpty());
}
/*! \fn bool SamHeader::HasComments(void) const
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM header data fields.
// ***************************************************************************
#define SAM_HEADER_H
#include <api/api_global.h>
+#include <api/SamProgramChain.h>
#include <api/SamReadGroupDictionary.h>
#include <api/SamSequenceDictionary.h>
#include <string>
bool HasGroupOrder(void) const; // returns true if header contains group order entry
bool HasSequences(void) const; // returns true if header contains any sequence entries
bool HasReadGroups(void) const; // returns true if header contains any read group entries
- bool HasProgramName(void) const; // returns true if header contains program name
- bool HasProgramVersion(void) const; // returns true if header contains program version
- bool HasProgramCommandLine(void) const; // returns true if header contains program command line
+ bool HasPrograms(void) const; // returns true if header contains any program record entries
bool HasComments(void) const; // returns true if header contains comments
+ // --------------
// data members
+ // --------------
// header metadata (@HD line)
- std::string Version; // VN:<Version>
- std::string SortOrder; // SO:<SortOrder>
- std::string GroupOrder; // GO:<GroupOrder>
+ std::string Version; // VN:<Version> *Required for valid SAM header, if @HD record is present*
+ std::string SortOrder; // SO:<SortOrder>
+ std::string GroupOrder; // GO:<GroupOrder>
// header sequences (@SQ entries)
SamSequenceDictionary Sequences;
SamReadGroupDictionary ReadGroups;
// header program data (@PG entries)
- std::string ProgramName; // ID:<ProgramName>
- std::string ProgramVersion; // VN:<ProgramVersion>
- std::string ProgramCommandLine; // CL:<ProgramCommandLine>
+ SamProgramChain Programs;
// header comments (@CO entries)
std::vector<std::string> Comments;
--- /dev/null
+// ***************************************************************************
+// SamProgram.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header program records.
+// ***************************************************************************
+
+#include <api/SamProgram.h>
+using namespace BamTools;
+using namespace std;
+
+/*! \struct BamTools::SamProgram
+ \brief Represents a SAM program record.
+
+ Provides direct read/write access to the SAM header program records.
+
+ \sa \samSpecURL
+*/
+/*! \var SamProgram::CommandLine
+ \brief corresponds to \@PG CL:\<CommandLine\>
+*/
+/*! \var SamProgram::ID
+ \brief corresponds to \@PG ID:\<ID\>
+
+ Required for valid SAM header.
+*/
+/*! \var SamProgram::Name
+ \brief corresponds to \@PG PN:\<Name\>
+*/
+/*! \var SamProgram::PreviousProgramID
+ \brief corresponds to \@PG PP:\<PreviousProgramID\>
+*/
+/*! \var SamProgram::Version
+ \brief corresponds to \@PG VN:\<Version\>
+*/
+/*! \var SamProgram::NextProgramID
+ \internal
+ Holds ID of the "next" program record in a SamProgramChain
+*/
+
+/*! \fn SamProgram::SamProgram(void)
+ \brief default constructor
+*/
+SamProgram::SamProgram(void)
+ : CommandLine("")
+ , ID("")
+ , Name("")
+ , PreviousProgramID("")
+ , Version("")
+ , NextProgramID("")
+{ }
+
+/*! \fn SamProgram::SamProgram(const std::string& id)
+ \brief constructs program record with \a id
+
+ \param id desired program record ID
+*/
+SamProgram::SamProgram(const std::string& id)
+ : CommandLine("")
+ , ID(id)
+ , Name("")
+ , PreviousProgramID("")
+ , Version("")
+ , NextProgramID("")
+{ }
+
+/*! \fn SamProgram::SamProgram(const SamProgram& other)
+ \brief copy constructor
+*/
+SamProgram::SamProgram(const SamProgram& other)
+ : CommandLine(other.CommandLine)
+ , ID(other.ID)
+ , Name(other.Name)
+ , PreviousProgramID(other.PreviousProgramID)
+ , Version(other.Version)
+ , NextProgramID(other.NextProgramID)
+{ }
+
+/*! \fn SamProgram::~SamProgram(void)
+ \brief destructor
+*/
+SamProgram::~SamProgram(void) { }
+
+/*! \fn void SamProgram::Clear(void)
+ \brief Clears all data fields.
+*/
+void SamProgram::Clear(void) {
+ CommandLine.clear();
+ ID.clear();
+ Name.clear();
+ PreviousProgramID.clear();
+ Version.clear();
+ NextProgramID.clear();
+}
+
+/*! \fn bool SamProgram::HasCommandLine(void) const
+ \brief Returns \c true if program record contains \@PG: CL:\<CommandLine\>
+*/
+bool SamProgram::HasCommandLine(void) const {
+ return (!CommandLine.empty());
+}
+
+/*! \fn bool SamProgram::HasID(void) const
+ \brief Returns \c true if program record contains \@PG: ID:\<ID\>
+*/
+bool SamProgram::HasID(void) const {
+ return (!ID.empty());
+}
+
+/*! \fn bool SamProgram::HasName(void) const
+ \brief Returns \c true if program record contains \@PG: PN:\<Name\>
+*/
+bool SamProgram::HasName(void) const {
+ return (!Name.empty());
+}
+
+/*! \fn bool SamProgram::HasNextProgramID(void) const
+ \internal
+ \return true if program has a "next" record in a SamProgramChain
+*/
+bool SamProgram::HasNextProgramID(void) const {
+ return (!NextProgramID.empty());
+}
+
+/*! \fn bool SamProgram::HasPreviousProgramID(void) const
+ \brief Returns \c true if program record contains \@PG: PP:\<PreviousProgramID\>
+*/
+bool SamProgram::HasPreviousProgramID(void) const {
+ return (!PreviousProgramID.empty());
+}
+
+/*! \fn bool SamProgram::HasVersion(void) const
+ \brief Returns \c true if program record contains \@PG: VN:\<Version\>
+*/
+bool SamProgram::HasVersion(void) const {
+ return (!Version.empty());
+}
--- /dev/null
+// ***************************************************************************
+// SamProgram.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header program records.
+// ***************************************************************************
+
+#ifndef SAM_PROGRAM_H
+#define SAM_PROGRAM_H
+
+#include "api/api_global.h"
+#include <string>
+
+namespace BamTools {
+
+class SamProgramChain;
+
+struct API_EXPORT SamProgram {
+
+ // ctor & dtor
+ SamProgram(void);
+ SamProgram(const std::string& id);
+ SamProgram(const SamProgram& other);
+ ~SamProgram(void);
+
+ // query/modify entire program record
+ void Clear(void); // clears all data fields
+
+ // convenience query methods
+ bool HasCommandLine(void) const; // returns true if program record has a command line entry
+ bool HasID(void) const; // returns true if program record has an ID
+ bool HasName(void) const; // returns true if program record has a name
+ bool HasPreviousProgramID(void) const; // returns true if program record has a 'previous program ID'
+ bool HasVersion(void) const; // returns true if program record has a version
+
+ // data members
+ std::string CommandLine; // CL:<CommandLine>
+ std::string ID; // ID:<ID> *Required for valid SAM header*
+ std::string Name; // PN:<Name>
+ std::string PreviousProgramID; // PP:<PreviousProgramID>
+ std::string Version; // VN:<Version>
+
+ // internal (non-standard) methods & fields
+ private:
+ bool HasNextProgramID(void) const;
+ std::string NextProgramID;
+ friend class BamTools::SamProgramChain;
+};
+
+/*! \fn bool operator==(const SamProgram& lhs, const SamProgram& rhs)
+ \brief tests equality by comparing program IDs
+*/
+API_EXPORT inline bool operator==(const SamProgram& lhs, const SamProgram& rhs) {
+ return lhs.ID == rhs.ID;
+}
+
+} // namespace BamTools
+
+#endif // SAM_PROGRAM_H
--- /dev/null
+// ***************************************************************************
+// SamProgramChain.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides methods for operating on a SamProgram record "chain"
+// ***************************************************************************
+
+#include <api/SamProgramChain.h>
+using namespace BamTools;
+
+#include <algorithm>
+#include <iostream>
+#include <cstdlib>
+using namespace std;
+
+/*! \class BamTools::SamProgramChain
+ \brief Sorted container "chain" of SamProgram records.
+
+ Provides methods for operating on a collection of SamProgram records.
+
+ N.B. - Underlying container is *NOT* ordered by linkage, but by order of
+ appearance in SamHeader and subsequent Add() calls. Using the current
+ iterators will not allow you to step through the header's program history.
+ Instead use First()/Last() to access oldest/newest records, respectively.
+*/
+
+/*! \fn SamProgramChain::SamProgramChain(void)
+ \brief constructor
+*/
+SamProgramChain::SamProgramChain(void) { }
+
+/*! \fn SamProgramChain::SamProgramChain(const SamProgramChain& other)
+ \brief copy constructor
+*/
+SamProgramChain::SamProgramChain(const SamProgramChain& other)
+ : m_data(other.m_data)
+{ }
+
+/*! \fn SamProgramChain::~SamProgramChain(void)
+ \brief destructor
+*/
+SamProgramChain::~SamProgramChain(void) { }
+
+/*! \fn void SamProgramChain::Add(SamProgram& program)
+ \brief Appends a program to program chain.
+
+ Duplicate entries are silently discarded.
+
+ N.B. - Underlying container is *NOT* ordered by linkage, but by order of
+ appearance in SamHeader and subsequent Add() calls. Using the current
+ iterators will not allow you to step through the header's program history.
+ Instead use First()/Last() to access oldest/newest records, respectively.
+
+ \param program entry to be appended
+*/
+void SamProgramChain::Add(SamProgram& program) {
+
+ // ignore duplicated records
+ if ( Contains(program) )
+ return;
+
+ // if other programs already in chain, try to find the "next" record
+ // tries to match another record's PPID with @program's ID
+ if ( !IsEmpty() )
+ program.NextProgramID = NextIdFor(program.ID);
+
+ // store program record
+ m_data.push_back(program);
+}
+
+/*! \fn void SamProgramChain::Add(const std::vector<SamProgram>& programs)
+ \brief Appends a batch of programs to the end of the chain.
+
+ This is an overloaded function.
+
+ \param programs batch of program records to append
+ \sa Add()
+*/
+void SamProgramChain::Add(std::vector<SamProgram>& programs) {
+ vector<SamProgram>::iterator pgIter = programs.begin();
+ vector<SamProgram>::iterator pgEnd = programs.end();
+ for ( ; pgIter != pgEnd; ++pgIter )
+ Add(*pgIter);
+}
+
+/*! \fn SamProgramIterator SamProgramChain::Begin(void)
+ \return an STL iterator pointing to the first (oldest) program record
+ \sa ConstBegin(), End(), First()
+*/
+SamProgramIterator SamProgramChain::Begin(void) {
+ return m_data.begin();
+}
+
+/*! \fn SamProgramConstIterator SamProgramChain::Begin(void) const
+ \return an STL const_iterator pointing to the first (oldest) program record
+
+ This is an overloaded function.
+
+ \sa ConstBegin(), End(), First()
+*/
+SamProgramConstIterator SamProgramChain::Begin(void) const {
+ return m_data.begin();
+}
+
+/*! \fn void SamProgramChain::Clear(void)
+ \brief Clears all program records.
+*/
+void SamProgramChain::Clear(void) {
+ m_data.clear();
+}
+
+/*! \fn SamProgramConstIterator SamProgramChain::ConstBegin(void) const
+ \return an STL const_iterator pointing to the first (oldest) program record
+ \sa Begin(), ConstEnd(), First()
+*/
+SamProgramConstIterator SamProgramChain::ConstBegin(void) const {
+ return m_data.begin();
+}
+
+/*! \fn SamProgramConstIterator SamProgramChain::ConstEnd(void) const
+ \return an STL const_iterator pointing to the imaginary entry after the last (newest) program record
+ \sa ConstBegin(), End(), Last()
+*/
+SamProgramConstIterator SamProgramChain::ConstEnd(void) const {
+ return m_data.end();
+}
+
+/*! \fn bool SamProgramChain::Contains(const SamProgram& program) const
+ \brief Returns true if chains has this program record (matching on ID).
+
+ This is an overloaded function.
+
+ \param program SamProgram to search for
+ \return \c true if chain contains program (matching on ID)
+*/
+bool SamProgramChain::Contains(const SamProgram& program) const {
+ return Contains(program.ID);
+}
+
+/*! \fn bool SamProgramChain::Contains(const std::string& programId) const
+ \brief Returns true if chains has a program record with this ID
+ \param programId search for program matching this ID
+ \return \c true if chain contains a program record with this ID
+*/
+bool SamProgramChain::Contains(const std::string& programId) const {
+ return ( IndexOf(programId) != (int)m_data.size() );
+}
+
+/*! \fn SamProgramIterator SamProgramChain::End(void)
+ \return an STL iterator pointing to the imaginary entry after the last (newest) program record
+ \sa Begin(), ConstEnd(), Last()
+*/
+SamProgramIterator SamProgramChain::End(void) {
+ return m_data.end();
+}
+
+/*! \fn SamProgramConstIterator SamProgramChain::End(void) const
+ \return an STL const_iterator pointing to the imaginary entry after the last (newest) program record
+
+ This is an overloaded function.
+
+ \sa Begin(), ConstEnd(), Last()
+*/
+SamProgramConstIterator SamProgramChain::End(void) const {
+ return m_data.end();
+}
+
+/*! \fn SamProgram& SamProgramChain::First(void)
+ \brief Fetches first (oldest) record in the chain.
+
+ N.B. - This function will fail if the chain is empty. If this is possible,
+ check the result of IsEmpty() before calling this function.
+
+ \return a modifiable reference to the first (oldest) program entry
+ \sa Begin(), Last()
+*/
+SamProgram& SamProgramChain::First(void) {
+
+ // find first record in container that has no PreviousProgramID entry
+ SamProgramIterator iter = Begin();
+ SamProgramIterator end = End();
+ for ( ; iter != end; ++iter ) {
+ SamProgram& current = (*iter);
+ if ( !current.HasPreviousProgramID() )
+ return current;
+ }
+
+ // otherwise error
+ cerr << "SamProgramChain ERROR - could not find any record without a PP tag" << endl;
+ exit(1);
+}
+
+/*! \fn const SamProgram& SamProgramChain::First(void) const
+ \brief Fetches first (oldest) record in the chain.
+
+ This is an overloaded function.
+
+ N.B. - This function will fail if the chain is empty. If this is possible,
+ check the result of IsEmpty() before calling this function.
+
+ \return a read-only reference to the first (oldest) program entry
+ \sa Begin(), ConstBegin(), Last()
+*/
+const SamProgram& SamProgramChain::First(void) const {
+
+ // find first record in container that has no PreviousProgramID entry
+ SamProgramConstIterator iter = ConstBegin();
+ SamProgramConstIterator end = ConstEnd();
+ for ( ; iter != end; ++iter ) {
+ const SamProgram& current = (*iter);
+ if ( !current.HasPreviousProgramID() )
+ return current;
+ }
+
+ // otherwise error
+ cerr << "SamProgramChain ERROR - could not find any record without a PP tag" << endl;
+ exit(1);
+}
+
+/*! \fn int SamProgramChain::IndexOf(const std::string& programId) const
+ \internal
+ \return index of program record if found.
+ Otherwise, returns vector::size() (invalid index).
+*/
+int SamProgramChain::IndexOf(const std::string& programId) const {
+ SamProgramConstIterator begin = ConstBegin();
+ SamProgramConstIterator iter = begin;
+ SamProgramConstIterator end = ConstEnd();
+ for ( ; iter != end; ++iter ) {
+ const SamProgram& current = (*iter);
+ if ( current.ID == programId )
+ break;
+ }
+ return distance( begin, iter );
+}
+
+/*! \fn bool SamProgramChain::IsEmpty(void) const
+ \brief Returns \c true if chain contains no records
+ \sa Size()
+*/
+bool SamProgramChain::IsEmpty(void) const {
+ return m_data.empty();
+}
+
+/*! \fn SamProgram& SamProgramChain::Last(void)
+ \brief Fetches last (newest) record in the chain.
+
+ N.B. - This function will fail if the chain is empty. If this is possible,
+ check the result of IsEmpty() before calling this function.
+
+ \return a modifiable reference to the last (newest) program entry
+ \sa End(), First()
+*/
+SamProgram& SamProgramChain::Last(void) {
+ // find first record in container that has no NextProgramID entry
+ SamProgramIterator iter = Begin();
+ SamProgramIterator end = End();
+ for ( ; iter != end; ++iter ) {
+ SamProgram& current = (*iter);
+ if ( !current.HasNextProgramID() )
+ return current;
+ }
+
+ // otherwise error
+ cerr << "SamProgramChain ERROR - could not determine last record" << endl;
+ exit(1);
+}
+
+/*! \fn const SamProgram& SamProgramChain::Last(void) const
+ \brief Fetches last (newest) record in the chain.
+
+ This is an overloaded function.
+
+ N.B. - This function will fail if the chain is empty. If this is possible,
+ check the result of IsEmpty() before calling this function.
+
+ \return a read-only reference to the last (newest) program entry
+ \sa End(), ConstEnd(), First()
+*/
+const SamProgram& SamProgramChain::Last(void) const {
+ // find first record in container that has no NextProgramID entry
+ SamProgramConstIterator iter = ConstBegin();
+ SamProgramConstIterator end = ConstEnd();
+ for ( ; iter != end; ++iter ) {
+ const SamProgram& current = (*iter);
+ if ( !current.HasNextProgramID() )
+ return current;
+ }
+
+ // otherwise error
+ cerr << "SamProgramChain ERROR - could not determine last record" << endl;
+ exit(1);
+}
+
+/*! \fn const std::string SamProgramChain::NextIdFor(const std::string& programId) const
+ \internal
+ \return ID of program record, whose PreviousProgramID matches \a programId.
+ Otherwise, returns empty string if none found.
+*/
+const std::string SamProgramChain::NextIdFor(const std::string& programId) const {
+
+ // find first record in container whose PreviousProgramID matches @programId
+ SamProgramConstIterator iter = ConstBegin();
+ SamProgramConstIterator end = ConstEnd();
+ for ( ; iter != end; ++iter ) {
+ const SamProgram& current = (*iter);
+ if ( !current.HasPreviousProgramID() &&
+ current.PreviousProgramID == programId
+ )
+ {
+ return current.ID;
+ }
+ }
+
+ // none found
+ return string();
+}
+
+/*! \fn int SamProgramChain::Size(void) const
+ \brief Returns number of program records in the chain.
+ \sa IsEmpty()
+*/
+int SamProgramChain::Size(void) const {
+ return m_data.size();
+}
+
+/*! \fn SamProgram& SamProgramChain::operator[](const std::string& programId)
+ \brief Retrieves the modifiable SamProgram record that matches \a programId.
+
+ NOTE - If the chain contains no read group matching this ID, this function will
+ print an error and terminate.
+
+ \param programId ID of program record to retrieve
+ \return a modifiable reference to the SamProgram associated with the ID
+*/
+SamProgram& SamProgramChain::operator[](const std::string& programId) {
+
+ // look up program record matching this ID
+ int index = IndexOf(programId);
+
+ // if record not found
+ if ( index == (int)m_data.size() ) {
+ cerr << "SamProgramChain ERROR - unknown programId: " << programId << endl;
+ exit(1);
+ }
+
+ // otherwise return program record at index
+ return m_data.at(index);
+}
--- /dev/null
+// ***************************************************************************
+// SamProgramChain.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides methods for operating on a SamProgram record "chain"
+// ***************************************************************************
+
+#ifndef SAM_PROGRAMCHAIN_H
+#define SAM_PROGRAMCHAIN_H
+
+#include <api/api_global.h>
+#include <api/SamProgram.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+// chain is *NOT* sorted in any order
+// use First()/Last() to retrieve oldest/newest programs, respectively
+typedef std::vector<SamProgram> SamProgramContainer;
+typedef SamProgramContainer::iterator SamProgramIterator;
+typedef SamProgramContainer::const_iterator SamProgramConstIterator;
+
+class API_EXPORT SamProgramChain {
+
+ // ctor & dtor
+ public:
+ SamProgramChain(void);
+ SamProgramChain(const SamProgramChain& other);
+ ~SamProgramChain(void);
+
+ // query/modify program data
+ public:
+ // appends a program record to the chain
+ void Add(SamProgram& program);
+ void Add(std::vector<SamProgram>& programs);
+
+ // clears all read group entries
+ void Clear(void);
+
+ // returns true if chain contains this program record (matches on ID)
+ bool Contains(const SamProgram& program) const;
+ bool Contains(const std::string& programId) const;
+
+ // returns the first (oldest) program in the chain
+ SamProgram& First(void);
+ const SamProgram& First(void) const;
+
+ // returns true if chain is empty
+ bool IsEmpty(void) const;
+
+ // returns last (most recent) program in the chain
+ SamProgram& Last(void);
+ const SamProgram& Last(void) const;
+
+ // returns number of program records in the chain
+ int Size(void) const;
+
+ // retrieves a modifiable reference to the SamProgram object associated with this ID
+ SamProgram& operator[](const std::string& programId);
+
+ // retrieve STL-compatible iterators
+ public:
+ SamProgramIterator Begin(void); // returns iterator to begin()
+ SamProgramConstIterator Begin(void) const; // returns const_iterator to begin()
+ SamProgramConstIterator ConstBegin(void) const; // returns const_iterator to begin()
+ SamProgramIterator End(void); // returns iterator to end()
+ SamProgramConstIterator End(void) const; // returns const_iterator to end()
+ SamProgramConstIterator ConstEnd(void) const; // returns const_iterator to end()
+
+ // internal methods
+ private:
+ int IndexOf(const std::string& programId) const;
+ const std::string NextIdFor(const std::string& programId) const;
+
+ // data members
+ private:
+ SamProgramContainer m_data;
+};
+
+} // namespace BamTools
+
+#endif // SAM_PROGRAMCHAIN_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM read group data fields.
// ***************************************************************************
Provides direct read/write access to the SAM read group data fields.
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
+ \sa \samSpecURL
+*/
+/*! \var SamReadGroup::Description
+ \brief corresponds to \@RG DS:\<Description\>
+*/
+/*! \var SamReadGroup::FlowOrder
+ \brief corresponds to \@RG FO:\<FlowOrder\>
*/
/*! \var SamReadGroup::ID
\brief corresponds to \@RG ID:\<ID\>
+
+ Required for valid SAM header.
*/
-/*! \var SamReadGroup::Sample
- \brief corresponds to \@RG SM:\<Sample\>
+/*! \var SamReadGroup::KeySequence
+ \brief corresponds to \@RG KS:\<KeySequence\>
*/
/*! \var SamReadGroup::Library
\brief corresponds to \@RG LB:\<Library\>
*/
-/*! \var SamReadGroup::Description
- \brief corresponds to \@RG DS:\<Description\>
-*/
/*! \var SamReadGroup::PlatformUnit
\brief corresponds to \@RG PU:\<PlatformUnit\>
*/
/*! \var SamReadGroup::PredictedInsertSize
\brief corresponds to \@RG PI:\<PredictedInsertSize\>
*/
-/*! \var SamReadGroup::SequencingCenter
- \brief corresponds to \@RG CN:\<SequencingCenter\>
-*/
/*! \var SamReadGroup::ProductionDate
\brief corresponds to \@RG DT:\<ProductionDate\>
*/
+/*! \var SamReadGroup::Program
+ \brief corresponds to \@RG PG:\<Program\>
+*/
+/*! \var SamReadGroup::Sample
+ \brief corresponds to \@RG SM:\<Sample\>
+*/
+/*! \var SamReadGroup::SequencingCenter
+ \brief corresponds to \@RG CN:\<SequencingCenter\>
+*/
/*! \var SamReadGroup::SequencingTechnology
\brief corresponds to \@RG PL:\<SequencingTechnology\>
*/
\brief default constructor
*/
SamReadGroup::SamReadGroup(void)
- : ID("")
- , Sample("")
+ : Description("")
+ , FlowOrder("")
+ , ID("")
+ , KeySequence("")
, Library("")
- , Description("")
, PlatformUnit("")
, PredictedInsertSize("")
- , SequencingCenter("")
, ProductionDate("")
+ , Program("")
+ , Sample("")
+ , SequencingCenter("")
, SequencingTechnology("")
{ }
\param id desired read group ID
*/
SamReadGroup::SamReadGroup(const std::string& id)
- : ID(id)
- , Sample("")
+ : Description("")
+ , FlowOrder("")
+ , ID(id)
+ , KeySequence("")
, Library("")
- , Description("")
, PlatformUnit("")
, PredictedInsertSize("")
- , SequencingCenter("")
, ProductionDate("")
+ , Program("")
+ , Sample("")
+ , SequencingCenter("")
, SequencingTechnology("")
{ }
\brief copy constructor
*/
SamReadGroup::SamReadGroup(const SamReadGroup& other)
- : ID(other.ID)
- , Sample(other.Sample)
+ : Description(other.Description)
+ , FlowOrder(other.FlowOrder)
+ , ID(other.ID)
+ , KeySequence(other.KeySequence)
, Library(other.Library)
- , Description(other.Description)
, PlatformUnit(other.PlatformUnit)
, PredictedInsertSize(other.PredictedInsertSize)
- , SequencingCenter(other.SequencingCenter)
, ProductionDate(other.ProductionDate)
+ , Program(other.Program)
+ , Sample(other.Sample)
+ , SequencingCenter(other.SequencingCenter)
, SequencingTechnology(other.SequencingTechnology)
{ }
\brief Clears all data fields.
*/
void SamReadGroup::Clear(void) {
+ Description.clear();
+ FlowOrder.clear();
ID.clear();
- Sample.clear();
+ KeySequence.clear();
Library.clear();
- Description.clear();
PlatformUnit.clear();
PredictedInsertSize.clear();
- SequencingCenter.clear();
ProductionDate.clear();
+ Program.clear();
+ Sample.clear();
+ SequencingCenter.clear();
SequencingTechnology.clear();
}
+/*! \fn bool SamReadGroup::HasDescription(void) const
+ \brief Returns \c true if read group contains \@RG DS:\<Description\>
+*/
+bool SamReadGroup::HasDescription(void) const {
+ return (!Description.empty());
+}
+
+/*! \fn bool SamReadGroup::HasFlowOrder(void) const
+ \brief Returns \c true if read group contains \@RG FO:\<FlowOrder\>
+*/
+bool SamReadGroup::HasFlowOrder(void) const {
+ return (!FlowOrder.empty());
+}
+
/*! \fn bool SamReadGroup::HasID(void) const
\brief Returns \c true if read group contains \@RG: ID:\<ID\>
*/
return (!ID.empty());
}
-/*! \fn bool SamReadGroup::HasSample(void) const
- \brief Returns \c true if read group contains \@RG SM:\<Sample\>
+/*! \fn bool SamReadGroup::HasKeySequence(void) const
+ \brief Returns \c true if read group contains \@RG KS:\<KeySequence\>
*/
-bool SamReadGroup::HasSample(void) const {
- return (!Sample.empty());
+bool SamReadGroup::HasKeySequence(void) const {
+ return (!KeySequence.empty());
}
/*! \fn bool SamReadGroup::HasLibrary(void) const
return (!Library.empty());
}
-/*! \fn bool SamReadGroup::HasDescription(void) const
- \brief Returns \c true if read group contains \@RG DS:\<Description\>
-*/
-bool SamReadGroup::HasDescription(void) const {
- return (!Description.empty());
-}
-
/*! \fn bool SamReadGroup::HasPlatformUnit(void) const
\brief Returns \c true if read group contains \@RG PU:\<PlatformUnit\>
*/
return (!PredictedInsertSize.empty());
}
-/*! \fn bool SamReadGroup::HasSequencingCenter(void) const
- \brief Returns \c true if read group contains \@RG CN:\<SequencingCenter\>
-*/
-bool SamReadGroup::HasSequencingCenter(void) const {
- return (!SequencingCenter.empty());
-}
-
/*! \fn bool SamReadGroup::HasProductionDate(void) const
\brief Returns \c true if read group contains \@RG DT:\<ProductionDate\>
*/
return (!ProductionDate.empty());
}
+/*! \fn bool SamReadGroup::HasProgram(void) const
+ \brief Returns \c true if read group contains \@RG PG:\<Program\>
+*/
+bool SamReadGroup::HasProgram(void) const {
+ return (!Program.empty());
+}
+
+/*! \fn bool SamReadGroup::HasSample(void) const
+ \brief Returns \c true if read group contains \@RG SM:\<Sample\>
+*/
+bool SamReadGroup::HasSample(void) const {
+ return (!Sample.empty());
+}
+
+/*! \fn bool SamReadGroup::HasSequencingCenter(void) const
+ \brief Returns \c true if read group contains \@RG CN:\<SequencingCenter\>
+*/
+bool SamReadGroup::HasSequencingCenter(void) const {
+ return (!SequencingCenter.empty());
+}
+
/*! \fn bool SamReadGroup::HasSequencingTechnology(void) const
\brief Returns \c true if read group contains \@RG PL:\<SequencingTechnology\>
*/
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM read group data fields.
// ***************************************************************************
void Clear(void); // clears all data fields
// convenience query methods
+ bool HasDescription(void) const; // returns true if read group has a description
+ bool HasFlowOrder(void) const; // returns true if read group has a flow order entry
bool HasID(void) const; // returns true if read group has a group ID
- bool HasSample(void) const; // returns true if read group has a sample name
+ bool HasKeySequence(void) const; // returns true if read group has a key sequence
bool HasLibrary(void) const; // returns true if read group has a library name
- bool HasDescription(void) const; // returns true if read group has a description
bool HasPlatformUnit(void) const; // returns true if read group has a platform unit ID
bool HasPredictedInsertSize(void) const; // returns true if read group has a predicted insert size
- bool HasSequencingCenter(void) const; // returns true if read group has a sequencing center ID
bool HasProductionDate(void) const; // returns true if read group has a production date
+ bool HasProgram(void) const; // returns true if read group has a program entry
+ bool HasSample(void) const; // returns true if read group has a sample name
+ bool HasSequencingCenter(void) const; // returns true if read group has a sequencing center ID
bool HasSequencingTechnology(void) const; // returns true if read group has a sequencing technology ID
- // data members
- std::string ID; // ID:<ID>
- std::string Sample; // SM:<Sample>
- std::string Library; // LB:<Library>
- std::string Description; // DS:<Description>
- std::string PlatformUnit; // PU:<PlatformUnit>
- std::string PredictedInsertSize; // PI:<PredictedInsertSize>
- std::string SequencingCenter; // CN:<SequencingCenter>
- std::string ProductionDate; // DT:<ProductionDate>
- std::string SequencingTechnology; // PL:<SequencingTechnology>
+
+ // data fields
+ std::string Description; // DS:<Description>
+ std::string FlowOrder; // FO:<FlowOrder>
+ std::string ID; // ID:<ID> *Required for valid SAM header*
+ std::string KeySequence; // KS:<KeySequence>
+ std::string Library; // LB:<Library>
+ std::string PlatformUnit; // PU:<PlatformUnit>
+ std::string PredictedInsertSize; // PI:<PredictedInsertSize>
+ std::string ProductionDate; // DT:<ProductionDate>
+ std::string Program; // PG:<Program>
+ std::string Sample; // SM:<Sample>
+ std::string SequencingCenter; // CN:<SequencingCenter>
+ std::string SequencingTechnology; // PL:<SequencingTechnology>
};
/*! \fn bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs)
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamReadGroup entries.
// ***************************************************************************
/*! \fn void SamReadGroupDictionary::Add(const SamReadGroup& readGroup)
\brief Adds a read group to the dictionary.
- Duplicate entries are discarded.
+ Duplicate entries are silently discarded.
\param readGroup entry to be added
*/
void SamReadGroupDictionary::Add(const SamReadGroup& readGroup) {
+
+ // TODO: report error on attempted duplicate?
+
if ( IsEmpty() || !Contains(readGroup) )
m_data.push_back(readGroup);
}
}
/*! \fn SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const
+ \return an STL const_iterator pointing to the first read group
This is an overloaded function.
- \return a const STL iterator pointing to the first read group
\sa ConstBegin(), End()
*/
SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const {
}
/*! \fn SamReadGroupConstIterator SamReadGroupDictionary::ConstBegin(void) const
- \return a const STL iterator pointing to the first read group
+ \return an STL const_iterator pointing to the first read group
\sa Begin(), ConstEnd()
*/
SamReadGroupConstIterator SamReadGroupDictionary::ConstBegin(void) const {
}
/*! \fn SamReadGroupConstIterator SamReadGroupDictionary::ConstEnd(void) const
- \return a const STL iterator pointing to the imaginary entry after the last read group
+ \return an STL const_iterator pointing to the imaginary entry after the last read group
\sa ConstBegin(), End()
*/
SamReadGroupConstIterator SamReadGroupDictionary::ConstEnd(void) const {
/*! \fn bool SamReadGroupDictionary::Contains(const std::string& readGroupId) const
\brief Returns true if dictionary contains read group.
-
- This is an overloaded function.
-
\param readGroupId search for read group matching this ID
\return \c true if dictionary contains a read group with this ID
*/
bool SamReadGroupDictionary::Contains(const std::string& readGroupId) const {
- return Contains( SamReadGroup(readGroupId) );
+ return ( IndexOf(readGroupId) != (int)m_data.size() );
}
/*! \fn bool SamReadGroupDictionary::Contains(const SamReadGroup& readGroup) const
- \brief Returns true if dictionary contains read group.
+ \brief Returns true if dictionary contains read group (matching on ID).
+
+ This is an overloaded function.
+
\param readGroup search for this read group
- \return \c true if dictionary contains read group
+ \return \c true if dictionary contains read group (matching on ID).
*/
bool SamReadGroupDictionary::Contains(const SamReadGroup& readGroup) const {
- return ( IndexOf(readGroup) != (int)m_data.size() );
+ return Contains( readGroup.ID );
}
/*! \fn SamReadGroupIterator SamReadGroupDictionary::End(void)
}
/*! \fn SamReadGroupConstIterator SamReadGroupDictionary::End(void) const
+ \return an STL const_iterator pointing to the imaginary entry after the last read group
This is an overloaded function.
- \return a const STL iterator pointing to the imaginary entry after the last read group
\sa Begin(), ConstEnd()
*/
SamReadGroupConstIterator SamReadGroupDictionary::End(void) const {
return m_data.end();
}
-/*! \fn int SamReadGroupDictionary::IndexOf(const SamReadGroup& readGroup) const
+/*! \fn int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const
\internal
\return index of read group if found. Otherwise, returns vector::size() (invalid index).
*/
-int SamReadGroupDictionary::IndexOf(const SamReadGroup& readGroup) const {
+int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const {
SamReadGroupConstIterator begin = ConstBegin();
SamReadGroupConstIterator iter = begin;
SamReadGroupConstIterator end = ConstEnd();
for ( ; iter != end; ++iter ) {
- if ( *iter == readGroup )
+ const SamReadGroup& current = (*iter);
+ if ( current.ID == readGroupId )
break;
}
return distance( begin, iter );
}
/*! \fn void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup)
- \brief Removes read group from dictionary, if found.
- \param readGroup read group to remove
+ \brief Removes read group from dictionary, if found (matching on ID).
+
+ This is an overloaded function.
+
+ \param readGroup read group to remove (matches on ID)
*/
void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup) {
- if ( Contains(readGroup) )
- m_data.erase( m_data.begin() + IndexOf(readGroup) );
+ Remove( readGroup.ID );
}
/*! \fn void SamReadGroupDictionary::Remove(const std::string& readGroupId)
\brief Removes read group from dictionary, if found.
-
- This is an overloaded function.
-
\param readGroupId ID of read group to remove
\sa Remove()
*/
void SamReadGroupDictionary::Remove(const std::string& readGroupId) {
- Remove( SamReadGroup(readGroupId) );
+ if ( Contains(readGroupId) )
+ m_data.erase( m_data.begin() + IndexOf(readGroupId) );
}
/*! \fn void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups)
- \brief Removes multiple read groups from dictionary.
+ \brief Removes multiple read groups from dictionary (matching on ID).
This is an overloaded function.
SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId) {
// look up read group ID
- int index = IndexOf( SamReadGroup(readGroupId) );
+ int index = IndexOf(readGroupId);
// if found, return read group at index
if ( index != (int)m_data.size() )
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamReadGroup entries.
// ***************************************************************************
// internal methods
private:
- int IndexOf(const SamReadGroup& readGroup) const;
+ int IndexOf(const std::string& readGroupId) const;
// data members
private:
} // namespace BamTools
-#endif // SAM_READGROUP_DICTIONARY
+#endif // SAM_READGROUP_DICTIONARY_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 20 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM sequence data fields.
// ***************************************************************************
Provides direct read/write access to the SAM sequence data fields.
- \sa http://samtools.sourceforge.net/SAM-1.3.pdf
-*/
-/*! \var SamSequence::Name
- \brief corresponds to \@SQ SN:\<Name\>
-*/
-/*! \var SamSequence::Length
- \brief corresponds to \@SQ LN:\<Length\>
+ \sa \samSpecURL
*/
/*! \var SamSequence::AssemblyID
\brief corresponds to \@SQ AS:\<AssemblyID\>
/*! \var SamSequence::Checksum
\brief corresponds to \@SQ M5:\<Checksum\>
*/
-/*! \var SamSequence::URI
- \brief corresponds to \@SQ UR:\<URI\>
+/*! \var SamSequence::Length
+ \brief corresponds to \@SQ LN:\<Length\>
+
+ Required for valid SAM header.
+*/
+/*! \var SamSequence::Name
+ \brief corresponds to \@SQ SN:\<Name\>
+
+ Required for valid SAM header.
*/
/*! \var SamSequence::Species
\brief corresponds to \@SQ SP:\<Species\>
*/
+/*! \var SamSequence::URI
+ \brief corresponds to \@SQ UR:\<URI\>
+*/
/*! \fn SamSequence::SamSequence(void)
\brief default constructor
*/
SamSequence::SamSequence(void)
- : Name("")
- , Length("")
- , AssemblyID("")
+ : AssemblyID("")
, Checksum("")
- , URI("")
+ , Length("")
+ , Name("")
, Species("")
+ , URI("")
{ }
/*! \fn SamSequence::SamSequence(const std::string& name, const int& length)
\brief constructs sequence with \a name and \a length
- \param name desired sequence name
+ \param name desired sequence name
\param length desired sequence length (numeric value)
*/
-SamSequence::SamSequence(const std::string& name, const int& length)
- : Name(name)
- , AssemblyID("")
+SamSequence::SamSequence(const std::string& name,
+ const int& length)
+ : AssemblyID("")
, Checksum("")
- , URI("")
+ , Name(name)
, Species("")
+ , URI("")
{
stringstream s("");
s << length;
Length = s.str();
}
+/*! \fn SamSequence::SamSequence(const std::string& name, const std::string& length)
+ \brief constructs sequence with \a name and \a length
+
+ \param name desired sequence name
+ \param length desired sequence length (string value)
+*/
+SamSequence::SamSequence(const std::string& name,
+ const std::string& length)
+ : AssemblyID("")
+ , Checksum("")
+ , Length(length)
+ , Name(name)
+ , Species("")
+ , URI("")
+{ }
+
/*! \fn SamSequence::SamSequence(const SamSequence& other)
\brief copy constructor
*/
SamSequence::SamSequence(const SamSequence& other)
- : Name(other.Name)
- , Length(other.Length)
- , AssemblyID(other.AssemblyID)
+ : AssemblyID(other.AssemblyID)
, Checksum(other.Checksum)
- , URI(other.URI)
+ , Length(other.Length)
+ , Name(other.Name)
, Species(other.Species)
+ , URI(other.URI)
{ }
/*! \fn SamSequence::~SamSequence(void)
\brief Clears all data fields.
*/
void SamSequence::Clear(void) {
- Name.clear();
- Length.clear();
AssemblyID.clear();
Checksum.clear();
- URI.clear();
+ Length.clear();
+ Name.clear();
Species.clear();
-}
-
-/*! \fn bool SamSequence::HasName(void) const
- \brief Returns \c true if sequence contains \@SQ SN:\<Name\>
-*/
-bool SamSequence::HasName(void) const {
- return (!Name.empty());
-}
-
-/*! \fn bool SamSequence::HasLength(void) const
- \brief Returns \c true if sequence contains \@SQ LN:\<Length\>
-*/
-bool SamSequence::HasLength(void) const {
- return (!Length.empty());
+ URI.clear();
}
/*! \fn bool SamSequence::HasAssemblyID(void) const
return (!Checksum.empty());
}
-/*! \fn bool SamSequence::HasURI(void) const
- \brief Returns \c true if sequence contains \@SQ UR:\<URI\>
+/*! \fn bool SamSequence::HasLength(void) const
+ \brief Returns \c true if sequence contains \@SQ LN:\<Length\>
*/
-bool SamSequence::HasURI(void) const {
- return (!URI.empty());
+bool SamSequence::HasLength(void) const {
+ return (!Length.empty());
+}
+
+/*! \fn bool SamSequence::HasName(void) const
+ \brief Returns \c true if sequence contains \@SQ SN:\<Name\>
+*/
+bool SamSequence::HasName(void) const {
+ return (!Name.empty());
}
/*! \fn bool SamSequence::HasSpecies(void) const
bool SamSequence::HasSpecies(void) const {
return (!Species.empty());
}
+
+/*! \fn bool SamSequence::HasURI(void) const
+ \brief Returns \c true if sequence contains \@SQ UR:\<URI\>
+*/
+bool SamSequence::HasURI(void) const {
+ return (!URI.empty());
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM sequence data fields.
// ***************************************************************************
// ctor & dtor
SamSequence(void);
SamSequence(const std::string& name, const int& length);
+ SamSequence(const std::string& name, const std::string& length);
SamSequence(const SamSequence& other);
~SamSequence(void);
// query/modify entire sequence
- void Clear(void); // clears all contents
+ void Clear(void); // clears all contents
// convenience query methods
- bool HasName(void) const; // returns true if sequence has a name
- bool HasLength(void) const; // returns true if sequence has a length
- bool HasAssemblyID(void) const; // returns true if sequence has an assembly ID
- bool HasChecksum(void) const; // returns true if sequence has an MD5 checksum
- bool HasURI(void) const; // returns true if sequence has a URI
- bool HasSpecies(void) const; // returns true if sequence has a species ID
+ bool HasAssemblyID(void) const; // returns true if sequence has an assembly ID
+ bool HasChecksum(void) const; // returns true if sequence has an MD5 checksum
+ bool HasLength(void) const; // returns true if sequence has a length
+ bool HasName(void) const; // returns true if sequence has a name
+ bool HasSpecies(void) const; // returns true if sequence has a species ID
+ bool HasURI(void) const; // returns true if sequence has a URI
// data members
- std::string Name; // SN:<Name>
- std::string Length; // LN:<Length>
- std::string AssemblyID; // AS:<AssemblyID>
- std::string Checksum; // M5:<Checksum>
- std::string URI; // UR:<URI>
- std::string Species; // SP:<Species>
+ std::string AssemblyID; // AS:<AssemblyID>
+ std::string Checksum; // M5:<Checksum>
+ std::string Length; // LN:<Length> *Required for valid SAM header*
+ std::string Name; // SN:<Name> *Required for valid SAM header*
+ std::string Species; // SP:<Species>
+ std::string URI; // UR:<URI>
};
/*! \fn bool operator==(const SamSequence& lhs, const SamSequence& rhs)
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 20 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamSequence entries.
// *************************************************************************
/*! \fn void SamSequenceDictionary::Add(const SamSequence& sequence)
\brief Adds a sequence to the dictionary.
- Duplicate entries are discarded.
+ Duplicate entries are silently discarded.
\param sequence entry to be added
*/
void SamSequenceDictionary::Add(const SamSequence& sequence) {
+
+ // TODO: report error on attempted duplicate?
+
if ( IsEmpty() || !Contains(sequence) )
m_data.push_back(sequence);
}
}
/*! \fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const
+ \return an STL const_iterator pointing to the first sequence
This is an overloaded function.
- \return a const STL iterator pointing to the first sequence
\sa ConstBegin(), End()
*/
SamSequenceConstIterator SamSequenceDictionary::Begin(void) const {
}
/*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const
- \return a const STL iterator pointing to the first sequence
+ \return an STL const_iterator pointing to the first sequence
\sa Begin(), ConstEnd()
*/
SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const {
}
/*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const
- \return a const STL iterator pointing to the imaginary entry after the last sequence
+ \return an STL const_iterator pointing to the imaginary entry after the last sequence
\sa End(), ConstBegin()
*/
SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const {
}
/*! \fn bool SamSequenceDictionary::Contains(const SamSequence& sequence) const
- \brief Returns true if dictionary contains sequence.
+ \brief Returns true if dictionary contains sequence (matches on name).
+
+ This is an overloaded function.
+
\param sequence search for this sequence
- \return \c true if dictionary contains sequence
+ \return \c true if dictionary contains sequence (matching on name)
*/
bool SamSequenceDictionary::Contains(const SamSequence& sequence) const {
- return ( IndexOf(sequence) != (int)m_data.size() );
+ return ( IndexOf(sequence.Name) != (int)m_data.size() );
}
/*! \fn SamSequenceIterator SamSequenceDictionary::End(void)
}
/*! \fn SamSequenceConstIterator SamSequenceDictionary::End(void) const
+ \return an STL const_iterator pointing to the imaginary entry after the last sequence
This is an overloaded function.
- \return a const STL iterator pointing to the imaginary entry after the last sequence
\sa Begin(), ConstEnd()
*/
SamSequenceConstIterator SamSequenceDictionary::End(void) const {
return m_data.end();
}
-/*! \fn int SamSequenceDictionary::IndexOf(const SamSequence& sequence) const
- \internal
-
- Uses operator==(SamSequence, SamSequence)
-
- \return index of sequence if found. Otherwise, returns vector::size() (invalid index).
-*/
-int SamSequenceDictionary::IndexOf(const SamSequence& sequence) const {
- SamSequenceConstIterator begin = ConstBegin();
- SamSequenceConstIterator iter = begin;
- SamSequenceConstIterator end = ConstEnd();
- for ( ; iter != end; ++iter ) {
- const SamSequence& currentSeq = (*iter);
- if ( currentSeq == sequence )
- break;
- }
- return distance( begin, iter );
-}
-
/*! \fn int SamSequenceDictionary::IndexOf(const std::string& name) const
\internal
-
- Use comparison of SamSequence::Name to \a name
-
- \return index of sequence if found. Otherwise, returns vector::size() (invalid index).
+ \return index of sequence if found (matching on name). Otherwise, returns vector::size() (invalid index).
*/
int SamSequenceDictionary::IndexOf(const std::string& name) const {
SamSequenceConstIterator begin = ConstBegin();
}
/*! \fn void SamSequenceDictionary::Remove(const SamSequence& sequence)
- \brief Removes sequence from dictionary, if found.
- \param sequence sequence to remove
+ \brief Removes sequence from dictionary, if found (matches on name).
+
+ This is an overloaded function.
+
+ \param sequence SamSequence to remove (matching on name)
*/
void SamSequenceDictionary::Remove(const SamSequence& sequence) {
- if ( Contains(sequence) )
- m_data.erase( m_data.begin() + IndexOf(sequence) );
+ Remove( sequence.Name );
}
/*! \fn void SamSequenceDictionary::Remove(const std::string& sequenceName)
\brief Retrieves the modifiable SamSequence that matches \a sequenceName.
NOTE - If the dictionary contains no sequence matching this name, this function inserts
- a new one with this name, and returns a reference to it.
+ a new one with this name (length:0), and returns a reference to it.
If you want to avoid this insertion behavior, check the result of Contains() before
using this operator.
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 20 March 2011
+// Last modified: 18 April 2011
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamSequence entries.
// ***************************************************************************
// internal methods
private:
- int IndexOf(const SamSequence& sequence) const;
int IndexOf(const std::string& name) const;
// data members
} // namespace BamTools
-#endif // SAM_SEQUENCE_DICTIONARY
+#endif // SAM_SEQUENCE_DICTIONARY_H
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for producing BAM files
// ***************************************************************************
case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;
case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;
case (Constants::BAM_CIGAR_PAD_CHAR) : cigarOp = Constants::BAM_CIGAR_PAD; break;
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
default:
fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\n", coIter->Type);
exit(1);
if ( m_isBigEndian ) {
char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
memcpy(cigarData, packedCigar.data(), packedCigarLength);
- for (unsigned int i = 0; i < packedCigarLength; ++i)
- if (m_isBigEndian) BamTools::SwapEndian_32p(&cigarData[i]);
+ if ( m_isBigEndian ) {
+ for ( unsigned int i = 0; i < packedCigarLength; ++i )
+ BamTools::SwapEndian_32p(&cigarData[i]);
+ }
m_stream.Write(cigarData, packedCigarLength);
free(cigarData);
}
// write the base qualities
char* pBaseQualities = (char*)al.Qualities.data();
- for(unsigned int i = 0; i < queryLength; i++)
+ for ( unsigned int i = 0; i < queryLength; i++ )
pBaseQualities[i] -= 33; // FASTQ conversion
m_stream.Write(pBaseQualities, queryLength);
case(Constants::BAM_TAG_TYPE_HEX) :
case(Constants::BAM_TAG_TYPE_STRING) :
// no endian swapping necessary for hex-string/string data
- while (tagData[i]) { ++i; }
+ while ( tagData[i] )
+ ++i;
// increment one more for null terminator
++i;
break;
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = tagData[i];
+ ++i;
+
+ // swap endian-ness of number of elements in place, then retrieve for loop
+ BamTools::SwapEndian_32p(&tagData[i]);
+ int32_t numElements;
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ // swap endian-ness of array elements
+ for ( int j = 0; j < numElements; ++j ) {
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian-swapping necessary
+ ++i;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+ default:
+ // error case
+ fprintf(stderr,
+ "BamWriter ERROR: unknown binary array type encountered: [%c]\n",
+ arrayType);
+ exit(1);
+ }
+ }
+
+ break;
+ }
+
default :
fprintf(stderr, "BamWriter ERROR: invalid tag value type\n"); // shouldn't get here
free(tagData);
m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);
// write the SAM header text
- if (samHeaderLen > 0)
+ if ( samHeaderLen > 0 )
m_stream.Write(samHeaderText.data(), samHeaderLen);
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for parsing SAM header text into SamHeader object
// ***************************************************************************
string headerLine("");
while ( getline(headerStream, headerLine) )
ParseSamLine(headerLine);
- return;
}
void SamFormatParser::ParseSamLine(const string& line) {
else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
else
cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;
-
- return;
}
void SamFormatParser::ParseHDLine(const string& line) {
// set header contents
if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;
- else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;
+ else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
else
cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;
}
// if @HD line exists, VN must be provided
- if ( !m_header.HasVersion() ) {
+ if ( !m_header.HasVersion() )
cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;
- return;
- }
}
void SamFormatParser::ParseSQLine(const string& line) {
if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG ) seq.Length = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue;
+ else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
else
cerr << "SamFormatParser ERROR: unknown SQ tag: " << tokenTag << endl;
}
+ bool isMissingRequiredFields = false;
+
// if @SQ line exists, SN must be provided
if ( !seq.HasName() ) {
+ isMissingRequiredFields = true;
cerr << "SamFormatParser ERROR: @SQ line is missing SN tag" << endl;
- return;
}
// if @SQ line exists, LN must be provided
if ( !seq.HasLength() ) {
+ isMissingRequiredFields = true;
cerr << "SamFormatParser ERROR: @SQ line is missing LN tag" << endl;
- return;
}
// store SAM sequence entry
- m_header.Sequences.Add(seq);
+ if ( !isMissingRequiredFields )
+ m_header.Sequences.Add(seq);
}
void SamFormatParser::ParseRGLine(const string& line) {
// set read group contents
if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;
else
cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;
}
+ bool isMissingRequiredFields = false;
+
// if @RG line exists, ID must be provided
if ( !rg.HasID() ) {
+ isMissingRequiredFields = true;
cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;
- return;
- }
-
- // if @RG line exists, SM must be provided
- if ( !rg.HasSample() ) {
- cerr << "SamFormatParser ERROR: @RG line is missing SM tag" << endl;
- return;
}
// store SAM read group entry
- m_header.ReadGroups.Add(rg);
+ if ( !isMissingRequiredFields )
+ m_header.ReadGroups.Add(rg);
}
void SamFormatParser::ParsePGLine(const string& line) {
+ SamProgram pg;
+
// split string into tokens
vector<string> tokens = Split(line, Constants::SAM_TAB);
const string tokenTag = (*tokenIter).substr(0,2);
const string tokenValue = (*tokenIter).substr(3);
- // set header contents
- if ( tokenTag == Constants::SAM_PG_NAME_TAG ) m_header.ProgramName = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) m_header.ProgramVersion = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) m_header.ProgramCommandLine = tokenValue;
+ // set program record contents
+ if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;
else
cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;
}
+ bool isMissingRequiredFields = false;
+
// if @PG line exists, ID must be provided
- if ( !m_header.HasProgramName() ) {
- cerr << "SamFormatParser ERROR:- @PG line is missing ID tag" << endl;
- return;
+ if ( !pg.HasID() ) {
+ isMissingRequiredFields = true;
+ cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;
}
+
+ // store SAM program record
+ if ( !isMissingRequiredFields )
+ m_header.Programs.Add(pg);
}
void SamFormatParser::ParseCOLine(const string& line) {
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 19 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for printing formatted SAM header to string
// ***************************************************************************
if ( seq.HasChecksum() )
out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
- // UR:<URI>
- if ( seq.HasURI() )
- out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
-
// SP:<Species>
if ( seq.HasSpecies() )
out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
+ // UR:<URI>
+ if ( seq.HasURI() )
+ out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
+
// newline
out << endl;
}
for ( ; rgIter != rgEnd; ++rgIter ) {
const SamReadGroup& rg = (*rgIter);
- // @RG ID:<ID> SM:<Sample>
+ // @RG ID:<ID>
out << Constants::SAM_RG_BEGIN_TOKEN
- << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID)
- << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+ << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
- // LB:<Library>
- if ( rg.HasLibrary() )
- out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+ // CN:<SequencingCenter>
+ if ( rg.HasSequencingCenter() )
+ out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
// DS:<Description>
if ( rg.HasDescription() )
out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
- // PU:<PlatformUnit>
- if ( rg.HasPlatformUnit() )
- out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+ // DT:<ProductionDate>
+ if ( rg.HasProductionDate() )
+ out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
+
+ // FO:<FlowOrder>
+ if ( rg.HasFlowOrder() )
+ out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
+
+ // KS:<KeySequence>
+ if ( rg.HasKeySequence() )
+ out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
+
+ // LB:<Library>
+ if ( rg.HasLibrary() )
+ out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+
+ // PG:<Program>
+ if ( rg.HasProgram() )
+ out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
// PI:<PredictedInsertSize>
if ( rg.HasPredictedInsertSize() )
out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
- // CN:<SequencingCenter>
- if ( rg.HasSequencingCenter() )
- out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
-
- // DT:<ProductionDate>
- if ( rg.HasProductionDate() )
- out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
-
// PL:<SequencingTechnology>
if ( rg.HasSequencingTechnology() )
out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
+ // PU:<PlatformUnit>
+ if ( rg.HasPlatformUnit() )
+ out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+
+ // SM:<Sample>
+ if ( rg.HasSample() )
+ out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+
// newline
out << endl;
}
void SamFormatPrinter::PrintPG(std::stringstream& out) const {
- // if header has @PG data
- if ( m_header.HasProgramName() ) {
+ // iterate over program record entries
+ SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
+ SamProgramConstIterator pgEnd = m_header.Programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
- // @PG ID:<ProgramName>
+ // @PG ID:<ID>
out << Constants::SAM_PG_BEGIN_TOKEN
- << FormatTag(Constants::SAM_PG_NAME_TAG, m_header.ProgramName);
+ << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
+
+ // PN:<Name>
+ if ( pg.HasName() )
+ out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
+
+ // CL:<CommandLine>
+ if ( pg.HasCommandLine() )
+ out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
- // VN:<ProgramVersion>
- if ( m_header.HasProgramVersion() )
- out << FormatTag(Constants::SAM_PG_VERSION_TAG, m_header.ProgramVersion);
+ // PP:<PreviousProgramID>
+ if ( pg.HasPreviousProgramID() )
+ out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
- // CL:<ProgramCommandLine>
- if ( m_header.HasProgramCommandLine() )
- out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, m_header.ProgramCommandLine);
+ // VN:<Version>
+ if ( pg.HasVersion() )
+ out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
// newline
out << endl;
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 18 April 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for validating SamHeader data
// ***************************************************************************
using namespace BamTools;
using namespace BamTools::Internal;
+#include <cctype>
#include <iostream>
#include <set>
#include <sstream>
using namespace std;
+namespace BamTools {
+namespace Internal {
+
+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
+
+ // can omit checking chars if lengths not equal
+ const int lhsLength = lhs.length();
+ const int rhsLength = rhs.length();
+ if ( lhsLength != rhsLength )
+ return false;
+
+ // do *basic* toupper checks on each string char's
+ for ( int i = 0; i < lhsLength; ++i ) {
+ if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )
+ return false;
+ }
+
+ // otherwise OK
+ return true;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
// ------------------------------------------------------------------------
// Allow validation rules to vary, as needed, between SAM header versions
//
// // use rule introduced with version 2.0
static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);
+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);
+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);
static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);
+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);
// TODO: This functionality is currently unused.
// Make validation "version-aware."
isValid &= ValidateMetadata();
isValid &= ValidateSequenceDictionary();
isValid &= ValidateReadGroupDictionary();
- isValid &= ValidateProgramData();
+ isValid &= ValidateProgramChain();
// report errors if desired
if ( verbose ) {
bool SamHeaderValidator::ValidateSequenceDictionary(void) {
- // TODO: warn/error if no sequences ?
-
bool isValid = true;
// check for unique sequence names
SamSequenceConstIterator seqEnd = sequences.ConstEnd();
for ( ; seqIter != seqEnd; ++seqIter ) {
const SamSequence& seq = (*seqIter);
- const string& name = seq.Name;
// lookup sequence name
+ const string& name = seq.Name;
nameIter = sequenceNames.find(name);
// error if found (duplicate entry)
bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
- // TODO: warn/error if no read groups ?
-
bool isValid = true;
// check for unique read group IDs & platform units
return true;
// if technology is valid keyword
- if ( Is454(technology) ||
- IsHelicos(technology) ||
- IsIllumina(technology) ||
- IsPacBio(technology) ||
- IsSolid(technology)
+ if ( caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_CAPILLARY) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO) ||
+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)
)
{
return true;
return false;
}
-bool SamHeaderValidator::Is454(const string& technology) {
- return ( technology == Constants::SAM_RG_SEQTECHNOLOGY_454 ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_LS454_LOWER ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_LS454_UPPER
- );
-}
-
-bool SamHeaderValidator::IsHelicos(const string& technology) {
- return ( technology == Constants::SAM_RG_SEQTECHNOLOGY_HELICOS_LOWER ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_HELICOS_UPPER
- );
-}
-
-bool SamHeaderValidator::IsIllumina(const string& technology) {
- return ( technology == Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA_LOWER ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA_UPPER
- );
-}
-
-bool SamHeaderValidator::IsPacBio(const string& technology) {
- return ( technology == Constants::SAM_RG_SEQTECHNOLOGY_PACBIO_LOWER ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_PACBIO_UPPER
- );
-}
-
-bool SamHeaderValidator::IsSolid(const string& technology) {
- return ( technology == Constants::SAM_RG_SEQTECHNOLOGY_SOLID_LOWER ||
- technology == Constants::SAM_RG_SEQTECHNOLOGY_SOLID_UPPER
- );
-}
-
-bool SamHeaderValidator::ValidateProgramData(void) {
+bool SamHeaderValidator::ValidateProgramChain(void) {
bool isValid = true;
isValid &= ContainsUniqueProgramIds();
isValid &= ValidatePreviousProgramIds();
}
bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
+
bool isValid = true;
- // TODO: once we have ability to handle multiple @PG entries,
- // check here for duplicate ID's
- // but for now, just return true
+ set<string> programIds;
+ set<string>::iterator pgIdIter;
+
+ // iterate over program records
+ const SamProgramChain& programs = m_header.Programs;
+ SamProgramConstIterator pgIter = programs.ConstBegin();
+ SamProgramConstIterator pgEnd = programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
+
+ // lookup program ID
+ const string& pgId = pg.ID;
+ pgIdIter = programIds.find(pgId);
+
+ // error if found (duplicate entry)
+ if ( pgIdIter != programIds.end() ) {
+ AddError("Program ID (ID): " + pgId + " is not unique");
+ isValid = false;
+ }
+
+ // otherwise ok, store ID
+ programIds.insert(pgId);
+ }
+
+ // return validation state
return isValid;
}
bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
+
bool isValid = true;
- // TODO: check that PP entries are valid later, after we get multiple @PG-entry handling
- // just return true for now
+
+ // iterate over program records
+ const SamProgramChain& programs = m_header.Programs;
+ SamProgramConstIterator pgIter = programs.ConstBegin();
+ SamProgramConstIterator pgEnd = programs.ConstEnd();
+ for ( ; pgIter != pgEnd; ++pgIter ) {
+ const SamProgram& pg = (*pgIter);
+
+ // ignore record for validation if PreviousProgramID is empty
+ const string& ppId = pg.PreviousProgramID;
+ if ( ppId.empty() )
+ continue;
+
+ // see if program "chain" contains an entry for ppId
+ if ( !programs.Contains(ppId) ) {
+ AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");
+ isValid = false;
+ }
+ }
+
+ // return validation state
return isValid;
}
void SamHeaderValidator::AddError(const string& message) {
// validate read group dictionary
bool ValidateReadGroupDictionary(void);
- bool ValidateReadGroup(const SamReadGroup& rg);
bool ContainsUniqueIDsAndPlatformUnits(void);
+ bool ValidateReadGroup(const SamReadGroup& rg);
bool CheckReadGroupID(const std::string& id);
bool CheckSequencingTechnology(const std::string& technology);
- bool Is454(const std::string& technology);
- bool IsHelicos(const std::string& technology);
- bool IsIllumina(const std::string& technology);
- bool IsPacBio(const std::string& technology);
- bool IsSolid(const std::string& technology);
// validate program data
- bool ValidateProgramData(void);
+ bool ValidateProgramChain(void);
bool ContainsUniqueProgramIds(void);
bool ValidatePreviousProgramIds(void);
--- /dev/null
+// ***************************************************************************
+// bamtools_resolve.cpp (c) 2011
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 7 April 2011
+// ---------------------------------------------------------------------------
+// Resolves paired-end reads (marking the IsProperPair flag as needed)
+// ***************************************************************************
+
+#include "bamtools_resolve.h"
+
+#include <api/BamReader.h>
+#include <api/BamWriter.h>
+#include <utils/bamtools_options.h>
+#include <utils/bamtools_utilities.h>
+using namespace BamTools;
+
+#include <iostream>
+#include <string>
+using namespace std;
+
+// ---------------------------------------------
+// ResolveSettings implementation
+
+struct ResolveTool::ResolveSettings {
+
+ // flags
+ bool HasInput;
+ bool HasOutput;
+ bool IsForceCompression;
+
+ // filenames
+ string InputFilename;
+ string OutputFilename;
+
+ // constructor
+ ResolveSettings(void)
+ : HasInput(false)
+ , HasOutput(false)
+ , IsForceCompression(false)
+ , InputFilename(Options::StandardIn())
+ , OutputFilename(Options::StandardOut())
+ { }
+};
+
+// ---------------------------------------------
+// ResolveToolPrivate implementation
+
+struct ResolveTool::ResolveToolPrivate {
+
+ // ctor & dtor
+ public:
+ ResolveToolPrivate(ResolveTool::ResolveSettings* settings)
+ : m_settings(settings)
+ { }
+
+ ~ResolveToolPrivate(void) { }
+
+ // 'public' interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+
+ // data members
+ private:
+ ResolveTool::ResolveSettings* m_settings;
+};
+
+bool ResolveTool::ResolveToolPrivate::Run(void) {
+ cerr << "Resoling BAM file..." << endl;
+ return true;
+}
+
+// ---------------------------------------------
+// ResolveTool implementation
+
+ResolveTool::ResolveTool(void)
+ : AbstractTool()
+ , m_settings(new ResolveSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools resolve", "resolves paired-end reads (marking the IsProperPair flag as needed)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [resolveOptions]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFilename, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
+}
+
+ResolveTool::~ResolveTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int ResolveTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int ResolveTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize ResolveTool
+ m_impl = new ResolveToolPrivate(m_settings);
+
+ // run ResolveTool, return success/failure
+ if ( m_impl->Run() ) return 0;
+ else return 1;
+}
--- /dev/null
+// ***************************************************************************
+// bamtools_resolve.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 7 April 2011
+// ---------------------------------------------------------------------------
+// Resolves paired-end reads (marking the IsProperPair flag as needed)
+// ***************************************************************************
+
+#ifndef BAMTOOLS_RESOLVE_H
+#define BAMTOOLS_RESOLVE_H
+
+#include "bamtools_tool.h"
+
+namespace BamTools {
+
+class ResolveTool : public AbstractTool {
+
+ public:
+ ResolveTool(void);
+ ~ResolveTool(void);
+
+ public:
+ int Help(void);
+ int Run(int argc, char* argv[]);
+
+ private:
+ struct ResolveSettings;
+ ResolveSettings* m_settings;
+
+ struct ResolveToolPrivate;
+ ResolveToolPrivate* m_impl;
+};
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_RESOLVE_H