// BamAlignment.cpp (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
#include <api/BamAlignment.h>
#include <api/BamConstants.h>
using namespace BamTools;
-
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <exception>
-#include <iostream>
-#include <map>
-#include <utility>
using namespace std;
/*! \class BamTools::BamAlignment
// calculate character lengths/offsets
const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
- const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
+ const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations*4);
const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
const unsigned int tagDataLength = dataLength - tagDataOffset;
QueryBases.clear();
if ( hasSeqData ) {
QueryBases.reserve(SupportData.QuerySequenceLength);
- for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
- char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+ for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+ const char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
QueryBases.append(1, singleBase);
}
}
Qualities.clear();
if ( hasQualData ) {
Qualities.reserve(SupportData.QuerySequenceLength);
- for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
- char singleQuality = (char)(qualData[i]+33);
+ for ( size_t i = 0; i < SupportData.QuerySequenceLength; ++i ) {
+ const char singleQuality = static_cast<const char>(qualData[i]+33);
Qualities.append(1, singleQuality);
}
}
for ( ; cigarIter != cigarEnd; ++cigarIter ) {
const CigarOp& op = (*cigarIter);
- switch (op.Type) {
+ switch ( op.Type ) {
// for 'M', 'I', '=', 'X' - write bases
case (Constants::BAM_CIGAR_MATCH_CHAR) :
case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
break;
- // shouldn't get here
+ // invalid CIGAR op-code
default:
- cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
- << op.Type << endl;
- exit(1);
+ const string message = string("invalid CIGAR operation type: ") + op.Type;
+ SetErrorString("BamAlignment::BuildCharData", message);
+ return false;
}
}
}
TagData.clear();
if ( hasTagData ) {
if ( IsBigEndian ) {
- int i = 0;
- while ( (unsigned int)i < tagDataLength ) {
+ size_t i = 0;
+ while ( i < tagDataLength ) {
i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
const char type = tagData[i]; // get tag type at position i
// swap endian-ness of number of elements in place, then retrieve for loop
BamTools::SwapEndian_32p(&tagData[i]);
- int32_t numElements;
+ uint32_t numElements;
memcpy(&numElements, &tagData[i], sizeof(uint32_t));
i += sizeof(uint32_t);
// swap endian-ness of array elements
- for ( int j = 0; j < numElements; ++j ) {
+ for ( size_t j = 0; j < numElements; ++j ) {
switch (arrayType) {
case (Constants::BAM_TAG_TYPE_INT8) :
case (Constants::BAM_TAG_TYPE_UINT8) :
i += sizeof(uint32_t);
break;
default:
- // error case
- cerr << "BamAlignment ERROR: unknown binary array type encountered: "
- << arrayType << endl;
+ const string message = string("invalid binary array type: ") + arrayType;
+ SetErrorString("BamAlignment::BuildCharData", message);
return false;
}
}
break;
}
- // shouldn't get here
+ // invalid tag type-code
default :
- cerr << "BamAlignment ERROR: invalid tag value type: "
- << type << endl;
- exit(1);
+ const string message = string("invalid tag type: ") + type;
+ SetErrorString("BamAlignment::BuildCharData", message);
+ return false;
}
}
}
// store tagData in alignment
TagData.resize(tagDataLength);
- memcpy((char*)TagData.data(), tagData, tagDataLength);
+ memcpy((char*)(TagData.data()), tagData, tagDataLength);
}
// clear the core-only flag
bool BamAlignment::FindTag(const std::string& tag,
char*& pTagData,
const unsigned int& tagDataLength,
- unsigned int& numBytesParsed)
+ unsigned int& numBytesParsed) const
{
while ( numBytesParsed < tagDataLength ) {
*/
int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
+ // TODO: Come back to this for coordinate issues !!!
+
// initialize alignment end to starting position
int alignEnd = Position;
return alignEnd;
}
+/*! \fn std::string BamAlignment::GetErrorString(void) const
+ \brief Returns a description of the last error that occurred
+
+ This method allows elimnation of STDERR pollution. Developers of client code
+ may choose how the messages are displayed to the user, if at all.
+
+ \return description of last error that occurred
+*/
+std::string BamAlignment::GetErrorString(void) const {
+ return ErrorString;
+}
+
/*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
\brief Retrieves value of read group tag ("RG").
*/
bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
- // make sure tag data exists
- if ( SupportData.HasCoreOnly || TagData.empty() )
+ // skip if alignment is core-only
+ if ( SupportData.HasCoreOnly ) {
+ // TODO: set error string?
+ return false;
+ }
+
+ // skip if no tags present
+ if ( TagData.empty() ) {
+ // TODO: set error string?
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
unsigned int numBytesParsed = 0;
// if tag not found, return failure
- if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
+ // TODO: set error string?
return false;
+ }
// otherwise, retrieve & validate tag type code
type = *(pTagData - 1);
// unknown tag type
default:
- cerr << "BamAlignment ERROR: unknown tag type encountered: "
- << type << endl;
+ const string message = string("invalid tag type: ") + type;
+ SetErrorString("BamAlignment::GetTagType", message);
return false;
}
}
\return \c true if both \a tag and \a type are correct sizes
*/
-bool BamAlignment::IsValidSize(const string& tag, const string& type) {
+bool BamAlignment::IsValidSize(const std::string& tag, const std::string& type) const {
return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
(type.size() == Constants::BAM_TAG_TYPESIZE);
}
-/*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
+/*! \fn void BamAlignment::RemoveTag(const std::string& tag)
\brief Removes field from BAM tags.
-
- \return \c true if tag was removed successfully (or didn't exist before)
*/
-bool BamAlignment::RemoveTag(const std::string& tag) {
+void BamAlignment::RemoveTag(const std::string& tag) {
// if char data not populated, do that first
if ( SupportData.HasCoreOnly )
// skip if no tags available
if ( TagData.empty() )
- return false;
+ return;
// localize the tag data
char* pOriginalTagData = (char*)TagData.data();
unsigned int newTagDataLength = 0;
unsigned int numBytesParsed = 0;
- // if tag not found, simply return true
+ // skip if tag not found
if ( !FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) )
- return true;
+ return;
// otherwise, remove it
- char* newTagData = new char[originalTagDataLength];
+ RaiiBuffer newTagData(originalTagDataLength);
// copy original tag data up til desired tag
pTagData -= 3;
numBytesParsed -= 3;
const unsigned int beginningTagDataLength = numBytesParsed;
newTagDataLength += beginningTagDataLength;
- memcpy(newTagData, pOriginalTagData, numBytesParsed);
+ memcpy(newTagData.Buffer, pOriginalTagData, numBytesParsed);
// attemp to skip to next tag
const char* pTagStorageType = pTagData + 2;
// squeeze remaining tag data
const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
- memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
+ memcpy(newTagData.Buffer + beginningTagDataLength, pTagData, endTagDataLength );
// save modified tag data in alignment
- TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
+ TagData.assign(newTagData.Buffer, beginningTagDataLength + endTagDataLength);
}
+}
- // clean up & return success
- delete[] newTagData;
- return true;
+/*! \fn void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const
+ \internal
+
+ Sets a formatted error string for this alignment.
+*/
+void BamAlignment::SetErrorString(const std::string& where, const std::string& what) const {
+ static const string SEPARATOR = ": ";
+ ErrorString = where + SEPARATOR + what;
}
/*! \fn void BamAlignment::SetIsDuplicate(bool ok)
*/
bool BamAlignment::SkipToNextTag(const char storageType,
char*& pTagData,
- unsigned int& numBytesParsed)
+ unsigned int& numBytesParsed) const
{
switch (storageType) {
// read number of elements
int32_t numElements;
- memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
+ memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped, if needed
numBytesParsed += sizeof(uint32_t);
pTagData += sizeof(uint32_t);
bytesToSkip = numElements*sizeof(uint32_t);
break;
default:
- cerr << "BamAlignment ERROR: unknown binary array type encountered: "
- << arrayType << endl;
+ const string message = string("invalid binary array type: ") + arrayType;
+ SetErrorString("BamAlignment::SkipToNextTag", message);
return false;
}
}
default:
- cerr << "BamAlignment ERROR: unknown tag type encountered"
- << storageType << endl;
+ const string message = string("invalid tag type: ") + storageType;
+ SetErrorString("BamAlignment::SkipToNextTag", message);
return false;
}
- // return success
+ // if we get here, tag skipped OK - return success
return true;
}
// BamAlignment.h (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
#include <api/api_global.h>
#include <api/BamAux.h>
#include <api/BamConstants.h>
-#include <cctype>
-#include <cstdio>
#include <cstdlib>
#include <cstring>
-#include <iostream>
#include <string>
#include <vector>
public:
// add a new tag
- template<typename T> bool AddTag(const std::string& tag,
- const std::string& type,
- const T& value);
- template<typename T> bool AddTag(const std::string& tag,
- const std::vector<T>& values);
+ template<typename T> bool AddTag(const std::string& tag, const std::string& type, const T& value);
+ template<typename T> bool AddTag(const std::string& tag, const std::vector<T>& values);
// edit (or append) tag
- template<typename T> bool EditTag(const std::string& tag,
- const std::string& type,
- const T& value);
- template<typename T> bool EditTag(const std::string& tag,
- const std::vector<T>& values);
+ template<typename T> bool EditTag(const std::string& tag, const std::string& type, const T& value);
+ template<typename T> bool EditTag(const std::string& tag, const std::vector<T>& values);
// retrieves tag data
- template<typename T> bool GetTag(const std::string& tag,
- T& destination) const;
- template<typename T> bool GetTag(const std::string& tag,
- std::vector<T>& destination) const;
+ template<typename T> bool GetTag(const std::string& tag, T& destination) const;
+ template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const;
// retrieves the BAM type-code for requested tag
// (returns whether or not tag exists, and type-code is valid)
// legacy methods (consider deprecated, but still available)
bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag
bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag
-
+
// returns true if alignment has a record for this tag name
bool HasTag(const std::string& tag) const;
// removes a tag
- bool RemoveTag(const std::string& tag);
+ void RemoveTag(const std::string& tag);
// additional methods
public:
// calculates alignment end position
int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;
+ // returns a description of the last error that occurred
+ std::string GetErrorString(void) const;
+
// public data fields
public:
std::string Name; // read name
//! \cond
// internal utility methods
private:
- static bool FindTag(const std::string& tag,
- char*& pTagData,
- const unsigned int& tagDataLength,
- unsigned int& numBytesParsed);
- static bool IsValidSize(const std::string& tag,
- const std::string& type);
- static bool SkipToNextTag(const char storageType,
+ bool FindTag(const std::string& tag,
+ char*& pTagData,
+ const unsigned int& tagDataLength,
+ unsigned int& numBytesParsed) const;
+ bool IsValidSize(const std::string& tag,
+ const std::string& type) const;
+ void SetErrorString(const std::string& where,
+ const std::string& what) const;
+ bool SkipToNextTag(const char storageType,
char*& pTagData,
- unsigned int& numBytesParsed);
+ unsigned int& numBytesParsed) const;
// internal data
private:
BamAlignmentSupportData SupportData;
friend class Internal::BamReaderPrivate;
friend class Internal::BamWriterPrivate;
+
+ mutable std::string ErrorString; // mutable to allow updates even in logically const methods
//! \endcond
};
if ( SupportData.HasCoreOnly )
BuildCharData();
- // validate tag/type size & that storage type code is OK for T
- if ( !IsValidSize(tag, type) ) return false;
- if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) )
+ // check tag/type size
+ if ( !IsValidSize(tag, type) ) {
+ // TODO: set error string?
+ return false;
+ }
+
+ // check that storage type code is OK for T
+ if ( !TagTypeHelper<T>::CanConvertTo(type.at(0)) ) {
+ // TODO: set error string?
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
// if tag already exists, return false
// use EditTag explicitly instead
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
// otherwise, convert value to string
union { T value; char valueBuffer[sizeof(T)]; } un;
// copy original tag data to temp buffer
const std::string newTag = tag + type;
- const int newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T
- char* originalTagData = new char[newTagDataLength];
- memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
+ const size_t newTagDataLength = tagDataLength + newTag.size() + sizeof(T); // leave room for new T
+ RaiiBuffer originalTagData(newTagDataLength);
+ memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
// append newTag
- strcat(originalTagData + tagDataLength, newTag.data());
- memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
+ strcat(originalTagData.Buffer + tagDataLength, newTag.data());
+ memcpy(originalTagData.Buffer + tagDataLength + newTag.size(), un.valueBuffer, sizeof(T));
// store temp buffer back in TagData
- const char* newTagData = (const char*)originalTagData;
+ const char* newTagData = (const char*)originalTagData.Buffer;
TagData.assign(newTagData, newTagDataLength);
-
- // clean up & return success
- delete[] originalTagData;
return true;
}
if ( SupportData.HasCoreOnly )
BuildCharData();
- // validate tag/type size & that storage type code is OK for string
- if ( !IsValidSize(tag, type) ) return false;
- if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) )
+ // check tag/type size
+ if ( !IsValidSize(tag, type) ) {
+ // TODO: set error string?
+ return false;
+ }
+
+ // check that storage type code is OK for string
+ if ( !TagTypeHelper<std::string>::CanConvertTo(type.at(0)) ) {
+ // TODO: set error string?
return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
// if tag already exists, return false
// use EditTag explicitly instead
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
// otherwise, copy tag data to temp buffer
const std::string newTag = tag + type + value;
- const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
- char* originalTagData = new char[newTagDataLength];
- memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
+ const size_t newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
+ RaiiBuffer originalTagData(newTagDataLength);
+ memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
- // append newTag
- strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
+ // append newTag (removes original null-term, then appends newTag + null-term)
+ strcat(originalTagData.Buffer + tagDataLength, newTag.data());
// store temp buffer back in TagData
- const char* newTagData = (const char*)originalTagData;
+ const char* newTagData = (const char*)originalTagData.Buffer;
TagData.assign(newTagData, newTagDataLength);
-
- // clean up & return success
- delete[] originalTagData;
return true;
}
// if tag already exists, return false
// use EditTag explicitly instead
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
// build new tag's base information
char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
// copy current TagData string to temp buffer, leaving room for new tag's contents
- const int newTagDataLength = tagDataLength +
- Constants::BAM_TAG_ARRAYBASE_SIZE +
- numElements*sizeof(T);
- char* originalTagData = new char[newTagDataLength];
- memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
+ const size_t newTagDataLength = tagDataLength +
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
+ numElements*sizeof(T);
+ RaiiBuffer originalTagData(newTagDataLength);
+ memcpy(originalTagData.Buffer, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
// write newTagBase (removes old null term)
- strcat(originalTagData + tagDataLength, (const char*)newTagBase);
+ strcat(originalTagData.Buffer + tagDataLength, (const char*)newTagBase);
// add vector elements to tag
int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
for ( int i = 0 ; i < numElements; ++i ) {
const T& value = values.at(i);
- memcpy(originalTagData + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
+ memcpy(originalTagData.Buffer + elementsBeginOffset + i*sizeof(T), &value, sizeof(T));
}
// store temp buffer back in TagData
- const char* newTagData = (const char*)originalTagData;
+ const char* newTagData = (const char*)originalTagData.Buffer;
TagData.assign(newTagData, newTagDataLength);
-
- // cleanup & return success
- delete[] originalTagData;
return true;
}
inline bool BamAlignment::GetTag(const std::string& tag,
T& destination) const
{
- // skip if core-only or no tags present
- if ( SupportData.HasCoreOnly || TagData.empty() )
+ // skip if alignment is core-only
+ if ( SupportData.HasCoreOnly ) {
+ // TODO: set error string?
return false;
+ }
+
+ // skip if no tags present
+ if ( TagData.empty() ) {
+ // TODO: set error string?
+ return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
unsigned int numBytesParsed = 0;
// return failure if tag not found
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
- // otherwise try to copy data into destination
+ // fetch data type
const char type = *(pTagData - 1);
- if ( !TagTypeHelper<T>::CanConvertFrom(type) )
+ if ( !TagTypeHelper<T>::CanConvertFrom(type) ) {
+ // TODO: set error string ?
return false;
+ }
+
+ // determine data length
int destinationLength = 0;
switch ( type ) {
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
case (Constants::BAM_TAG_TYPE_ARRAY) :
- std::cerr << "BamAlignment ERROR: cannot store tag of type " << type
- << " in integer destination" << std::endl;
+ SetErrorString("BamAlignment::GetTag",
+ "cannot store variable length tag data into a numeric destination");
return false;
// unrecognized tag type
default:
- std::cerr << "BamAlignment ERROR: unknown tag type encountered: "
- << type << std::endl;
+ const std::string message = std::string("invalid tag type: ") + type;
+ SetErrorString("BamAlignment::GetTag", message);
return false;
}
- // store in destination
+ // store data in destination
destination = 0;
memcpy(&destination, pTagData, destinationLength);
inline bool BamAlignment::GetTag<std::string>(const std::string& tag,
std::string& destination) const
{
- // skip if core-only or no tags present
- if ( SupportData.HasCoreOnly || TagData.empty() )
+ // skip if alignment is core-only
+ if ( SupportData.HasCoreOnly ) {
+ // TODO: set error string?
return false;
+ }
+
+ // skip if no tags present
+ if ( TagData.empty() ) {
+ // TODO: set error string?
+ return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
unsigned int numBytesParsed = 0;
// return failure if tag not found
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
// otherwise copy data into destination
const unsigned int dataLength = strlen(pTagData);
inline bool BamAlignment::GetTag(const std::string& tag,
std::vector<T>& destination) const
{
- // skip if core-only or no tags present
- if ( SupportData.HasCoreOnly || TagData.empty() )
+ // skip if alignment is core-only
+ if ( SupportData.HasCoreOnly ) {
+ // TODO: set error string?
return false;
+ }
+
+ // skip if no tags present
+ if ( TagData.empty() ) {
+ // TODO: set error string?
+ return false;
+ }
// localize the tag data
char* pTagData = (char*)TagData.data();
unsigned int numBytesParsed = 0;
// return false if tag not found
- if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+ // TODO: set error string?
return false;
+ }
// check that tag is array type
const char tagType = *(pTagData - 1);
if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
- std::cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
- << tag << " in array destination" << std::endl;
+ SetErrorString("BamAlignment::GetTag", "cannot store a non-array tag in array destination");
return false;
}
- // calculate length of each element in tag's array
+ // fetch element type
const char elementType = *pTagData;
- if ( !TagTypeHelper<T>::CanConvertFrom(elementType) )
+ if ( !TagTypeHelper<T>::CanConvertFrom(elementType) ) {
+ // TODO: set error string ?
return false;
+ }
++pTagData;
+
+ // calculate length of each element in tag's array
int elementLength = 0;
switch ( elementType ) {
case (Constants::BAM_TAG_TYPE_ASCII) :
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
case (Constants::BAM_TAG_TYPE_ARRAY) :
- std::cerr << "BamAlignment ERROR: array element type: " << elementType
- << " cannot be stored in integer value" << std::endl;
+ SetErrorString("BamAlignment::GetTag",
+ "invalid array data, variable-length elements are not allowed");
return false;
// unknown tag type
default:
- std::cerr << "BamAlignment ERROR: unknown element type encountered: "
- << elementType << std::endl;
+ const std::string message = std::string("invalid array element type: ") + elementType;
+ SetErrorString("BamAlignment::GetTag", message);
return false;
}
// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 7 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides data structures & utility methods that are used throughout the API.\r
// ***************************************************************************\r
return UnpackUnsignedShort( (const char*)buffer );\r
}\r
\r
+// ----------------------------------------------------------------\r
+// 'internal' helper structs\r
+\r
+struct RaiiBuffer {\r
+ RaiiBuffer(const unsigned int n)\r
+ : Buffer( new char[n]() )\r
+ { }\r
+ ~RaiiBuffer(void) {\r
+ delete[] Buffer;\r
+ }\r
+ char* Buffer;\r
+};\r
+\r
} // namespace BamTools\r
\r
#endif // BAMAUX_H\r
// BamConstants.h (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 October 2011 (DB)
+// Last modified: 5 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides basic constants for handling BAM files.
// ***************************************************************************
namespace BamTools {
namespace Constants {
-const int BAM_SIZEOF_INT = 4;
+const uint8_t BAM_SIZEOF_INT = 4;
// header magic number
-const char* const BAM_HEADER_MAGIC = "BAM\1";
-const unsigned int BAM_HEADER_MAGIC_LENGTH = 4;
+const char* const BAM_HEADER_MAGIC = "BAM\1";
+const uint8_t BAM_HEADER_MAGIC_LENGTH = 4;
// BAM alignment core size
-const int BAM_CORE_SIZE = 32;
-const int BAM_CORE_BUFFER_SIZE = 8;
+const uint8_t BAM_CORE_SIZE = 32;
+const uint8_t BAM_CORE_BUFFER_SIZE = 8;
// BAM alignment flags
const int BAM_ALIGNMENT_PAIRED = 0x0001;
// CIGAR constants
const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
-const int BAM_CIGAR_MATCH = 0;
-const int BAM_CIGAR_INS = 1;
-const int BAM_CIGAR_DEL = 2;
-const int BAM_CIGAR_REFSKIP = 3;
-const int BAM_CIGAR_SOFTCLIP = 4;
-const int BAM_CIGAR_HARDCLIP = 5;
-const int BAM_CIGAR_PAD = 6;
-const int BAM_CIGAR_SEQMATCH = 7;
-const int BAM_CIGAR_MISMATCH = 8;
+const uint8_t BAM_CIGAR_MATCH = 0;
+const uint8_t BAM_CIGAR_INS = 1;
+const uint8_t BAM_CIGAR_DEL = 2;
+const uint8_t BAM_CIGAR_REFSKIP = 3;
+const uint8_t BAM_CIGAR_SOFTCLIP = 4;
+const uint8_t BAM_CIGAR_HARDCLIP = 5;
+const uint8_t BAM_CIGAR_PAD = 6;
+const uint8_t BAM_CIGAR_SEQMATCH = 7;
+const uint8_t BAM_CIGAR_MISMATCH = 8;
const char BAM_CIGAR_MATCH_CHAR = 'M';
const char BAM_CIGAR_INS_CHAR = 'I';
const char BAM_CIGAR_SEQMATCH_CHAR = '=';
const char BAM_CIGAR_MISMATCH_CHAR = 'X';
-const int BAM_CIGAR_SHIFT = 4;
-const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
+const int BAM_CIGAR_SHIFT = 4;
+const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
// BAM tag types
const char BAM_TAG_TYPE_ASCII = 'A';
const char BAM_TAG_TYPE_HEX = 'H';
const char BAM_TAG_TYPE_ARRAY = 'B';
-const size_t BAM_TAG_TAGSIZE = 2;
-const size_t BAM_TAG_TYPESIZE = 1;
-const int BAM_TAG_ARRAYBASE_SIZE = 8;
+const uint8_t BAM_TAG_TAGSIZE = 2;
+const uint8_t BAM_TAG_TYPESIZE = 1;
+const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8;
// DNA bases
const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
-const unsigned char BAM_BASECODE_EQUAL = 0;
-const unsigned char BAM_BASECODE_A = 1;
-const unsigned char BAM_BASECODE_C = 2;
-const unsigned char BAM_BASECODE_G = 4;
-const unsigned char BAM_BASECODE_T = 8;
-const unsigned char BAM_BASECODE_N = 15;
+const uint8_t BAM_BASECODE_EQUAL = 0;
+const uint8_t BAM_BASECODE_A = 1;
+const uint8_t BAM_BASECODE_C = 2;
+const uint8_t BAM_BASECODE_M = 3;
+const uint8_t BAM_BASECODE_G = 4;
+const uint8_t BAM_BASECODE_R = 5;
+const uint8_t BAM_BASECODE_S = 6;
+const uint8_t BAM_BASECODE_V = 7;
+const uint8_t BAM_BASECODE_T = 8;
+const uint8_t BAM_BASECODE_W = 9;
+const uint8_t BAM_BASECODE_Y = 10;
+const uint8_t BAM_BASECODE_H = 11;
+const uint8_t BAM_BASECODE_K = 12;
+const uint8_t BAM_BASECODE_D = 13;
+const uint8_t BAM_BASECODE_B = 14;
+const uint8_t BAM_BASECODE_N = 15;
-const char BAM_DNA_EQUAL = '=';
-const char BAM_DNA_A = 'A';
-const char BAM_DNA_C = 'C';
-const char BAM_DNA_G = 'G';
-const char BAM_DNA_T = 'T';
-const char BAM_DNA_N = 'N';
-const char BAM_DNA_DEL = '-';
-const char BAM_DNA_PAD = '*';
+const char BAM_DNA_EQUAL = '=';
+const char BAM_DNA_A = 'A';
+const char BAM_DNA_C = 'C';
+const char BAM_DNA_M = 'M';
+const char BAM_DNA_G = 'G';
+const char BAM_DNA_R = 'R';
+const char BAM_DNA_S = 'S';
+const char BAM_DNA_V = 'V';
+const char BAM_DNA_T = 'T';
+const char BAM_DNA_W = 'W';
+const char BAM_DNA_Y = 'Y';
+const char BAM_DNA_H = 'H';
+const char BAM_DNA_K = 'K';
+const char BAM_DNA_D = 'D';
+const char BAM_DNA_B = 'B';
+const char BAM_DNA_N = 'N';
+const char BAM_DNA_DEL = '-';
+const char BAM_DNA_PAD = '*';
// zlib constants
-const int GZIP_ID1 = 31;
-const int GZIP_ID2 = 139;
-const int CM_DEFLATE = 8;
-const int FLG_FEXTRA = 4;
-const int OS_UNKNOWN = 255;
-const int BGZF_XLEN = 6;
-const int BGZF_ID1 = 66;
-const int BGZF_ID2 = 67;
-const int BGZF_LEN = 2;
-const int GZIP_WINDOW_BITS = -15;
-const int Z_DEFAULT_MEM_LEVEL = 8;
+const char GZIP_ID1 = 31;
+const char GZIP_ID2 = 139;
+const char CM_DEFLATE = 8;
+const char FLG_FEXTRA = 4;
+const char OS_UNKNOWN = 255;
+const char BGZF_XLEN = 6;
+const char BGZF_ID1 = 66;
+const char BGZF_ID2 = 67;
+const char BGZF_LEN = 2;
+const int8_t GZIP_WINDOW_BITS = -15;
+const int8_t Z_DEFAULT_MEM_LEVEL = 8;
// BZGF constants
-const int BGZF_BLOCK_HEADER_LENGTH = 18;
-const int BGZF_BLOCK_FOOTER_LENGTH = 8;
-const int BGZF_MAX_BLOCK_SIZE = 65536;
-const int BGZF_DEFAULT_BLOCK_SIZE = 65536;
+const uint8_t BGZF_BLOCK_HEADER_LENGTH = 18;
+const uint8_t BGZF_BLOCK_FOOTER_LENGTH = 8;
+const uint32_t BGZF_MAX_BLOCK_SIZE = 65536;
+const uint32_t BGZF_DEFAULT_BLOCK_SIZE = 65536;
} // namespace Constants
// BamIndex.h (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides basic BAM index interface
// ***************************************************************************
// index interface
public:
// builds index from associated BAM file & writes out to index file
- virtual bool Create(void) =0; // creates index file from BAM file
+ virtual bool Create(void) =0;
+
+ // returns a human-readable description of the last error encountered
+ std::string GetErrorString(void) { return m_errorString; }
+
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID) const =0;
+
// attempts to use index data to jump to @region, returns success/fail
// a "successful" jump indicates no error, but not whether this region has data
// * thus, the method sets a flag to indicate whether there are alignments
// available after the jump position
virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+
// loads existing data from file into memory
virtual bool Load(const std::string& filename) =0;
+
// change the index caching behavior
virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0;
+ // internal methods
+ protected:
+ void SetErrorString(const std::string& where, const std::string& what) {
+ m_errorString = where + ": " + what;
+ }
+
// data members
protected:
- Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate:
+ Internal::BamReaderPrivate* m_reader; // copy, not owned
+ std::string m_errorString;
};
} // namespace BamTools
// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 1 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
//
\sa CloseFile(), IsOpen(), Open(), BamReader::Close()
*/
-void BamMultiReader::Close(void) {
- d->Close();
+bool BamMultiReader::Close(void) {
+ return d->Close();
}
/*! \fn void BamMultiReader::CloseFile(const std::string& filename)
\sa Close(), IsOpen(), Open(), BamReader::Close()
*/
-void BamMultiReader::CloseFile(const std::string& filename) {
- d->CloseFile(filename);
+bool BamMultiReader::CloseFile(const std::string& filename) {
+ return d->CloseFile(filename);
}
/*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
return d->Filenames();
}
+// returns a description of the last error that occurred
+std::string BamMultiReader::GetErrorString(void) const {
+ return d->GetErrorString();
+}
+
/*! \fn SamHeader BamMultiReader::GetHeader(void) const
\brief Returns unified SAM-format header for all files
// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 1 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
// ***************************************************************************
// ----------------------
// closes all open BAM files
- void Close(void);
+ bool Close(void);
// close only the requested BAM file
- void CloseFile(const std::string& filename);
+ bool CloseFile(const std::string& filename);
// returns list of filenames for all open BAM files
const std::vector<std::string> Filenames(void) const;
// returns true if multireader has any open BAM files
// changes the caching behavior of the index data
void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+ // ----------------------
+ // error handling
+ // ----------------------
+
+ // returns a human-readable description of the last error that occurred
+ std::string GetErrorString(void) const;
+
// private implementation
private:
Internal::BamMultiReaderPrivate* d;
// BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 March 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides read access to BAM files.
// ***************************************************************************
d = 0;
}
-/*! \fn void BamReader::Close(void)
+/*! \fn bool BamReader::Close(void)
\brief Closes the current BAM file.
Also clears out all header and reference data.
+ \return \c true if file closed OK
\sa IsOpen(), Open()
*/
-void BamReader::Close(void) {
- d->Close();
+bool BamReader::Close(void) {
+ return d->Close();
}
/*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
return d->CreateIndex(type);
}
+string BamReader::GetErrorString(void) const {
+ return d->GetErrorString();
+}
+
/*! \fn const std::string BamReader::GetFilename(void) const
\brief Returns name of current BAM file.
// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 7 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides read access to BAM files.\r
// ***************************************************************************\r
// ----------------------\r
\r
// closes the current BAM file\r
- void Close(void);\r
+ bool Close(void);\r
// returns filename of current BAM file\r
const std::string GetFilename(void) const;\r
// returns true if a BAM file is open for reading\r
// changes the caching behavior of the index data\r
void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);\r
\r
+ // ----------------------\r
+ // error handling\r
+ // ----------------------\r
+\r
+ // returns a human-readable description of the last error that occurred\r
+ std::string GetErrorString(void) const;\r
+\r
// deprecated methods\r
public:\r
// returns true if index data is available\r
// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 4 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic functionality for producing BAM files\r
// ***************************************************************************\r
#include <api/internal/BamWriter_p.h>\r
using namespace BamTools;\r
using namespace BamTools::Internal;\r
-\r
-#include <iostream>\r
using namespace std;\r
\r
/*! \class BamTools::BamWriter\r
d->Close();\r
}\r
\r
+// returns a human-readable description of the last error that occurred\r
+std::string BamWriter::GetErrorString(void) const {\r
+ return d->GetErrorString();\r
+}\r
+\r
/*! \fn bool BamWriter::IsOpen(void) const\r
\brief Returns \c true if BAM file is open for writing.\r
\sa Open()\r
\param alignment BamAlignment record to save\r
\sa BamReader::GetNextAlignment(), BamReader::GetNextAlignmentCore()\r
*/\r
-void BamWriter::SaveAlignment(const BamAlignment& alignment) {\r
- d->SaveAlignment(alignment);\r
+bool BamWriter::SaveAlignment(const BamAlignment& alignment) {\r
+ return d->SaveAlignment(alignment);\r
}\r
\r
-/*! \fn void BamWriter::SetCompressionMode(const CompressionMode& compressionMode)\r
+/*! \fn void BamWriter::SetCompressionMode(const BamWriter::CompressionMode& compressionMode)\r
\brief Sets the output compression mode.\r
\r
Default mode is BamWriter::Compressed.\r
\param compressionMode desired output compression behavior\r
\sa IsOpen(), Open()\r
*/\r
-void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) {\r
+void BamWriter::SetCompressionMode(const BamWriter::CompressionMode& compressionMode) {\r
d->SetWriteCompressed( compressionMode == BamWriter::Compressed );\r
}\r
// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 4 March 2011 (DB)\r
+// Last modified: 5 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic functionality for producing BAM files\r
// ***************************************************************************\r
\r
class API_EXPORT BamWriter {\r
\r
- public: enum CompressionMode { Compressed = 0\r
- , Uncompressed\r
- };\r
+ // enums\r
+ public:\r
+ enum CompressionMode { Compressed = 0\r
+ , Uncompressed\r
+ };\r
\r
// ctor & dtor\r
public:\r
public:\r
// closes the current BAM file\r
void Close(void);\r
+ // returns a human-readable description of the last error that occurred\r
+ std::string GetErrorString(void) const;\r
// returns true if BAM file is open for writing\r
bool IsOpen(void) const;\r
// opens a BAM file for writing\r
const SamHeader& samHeader,\r
const RefVector& referenceSequences);\r
// saves the alignment to the alignment archive\r
- void SaveAlignment(const BamAlignment& alignment);\r
+ bool SaveAlignment(const BamAlignment& alignment);\r
// sets the output compression mode\r
- void SetCompressionMode(const CompressionMode& compressionMode);\r
+ void SetCompressionMode(const BamWriter::CompressionMode& compressionMode);\r
\r
// private implementation\r
private:\r
SamReadGroupDictionary.cpp
SamSequence.cpp
SamSequenceDictionary.cpp
+ internal/BamException_p.cpp
internal/BamHeader_p.cpp
internal/BamIndexFactory_p.cpp
internal/BamMultiReader_p.cpp
// SamHeader.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM header data fields.
// ***************************************************************************
#include <api/SamConstants.h>
#include <api/SamHeader.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/SamFormatParser_p.h>
#include <api/internal/SamFormatPrinter_p.h>
#include <api/internal/SamHeaderValidator_p.h>
, SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN)
, GroupOrder("")
{
- SamFormatParser parser(*this);
- parser.Parse(headerText);
+ SetHeaderText(headerText);
}
/*! \fn SamHeader::SamHeader(const SamHeader& other)
\brief Clears all header contents.
*/
void SamHeader::Clear(void) {
+
+ // clear SAM header components
Version.clear();
SortOrder.clear();
GroupOrder.clear();
ReadGroups.Clear();
Programs.Clear();
Comments.clear();
+
+ // clear error string
+ m_errorString.clear();
+}
+
+/*! \fn std::string SamHeader::GetErrorString(void) const
+ \brief Returns human-readable description of last error encountered
+*/
+std::string SamHeader::GetErrorString(void) const {
+ return m_errorString;
+}
+
+/*! \fn bool SamHeader::HasError(void) const
+ \brief Returns \c true if header encountered an error
+*/
+bool SamHeader::HasError(void) const {
+ return (!m_errorString.empty());
}
/*! \fn bool SamHeader::HasVersion(void) const
/*! \fn bool SamHeader::IsValid(bool verbose = false) const
\brief Checks header contents for required data and proper formatting.
\param verbose If set to true, validation errors & warnings will be printed to stderr.
- Otherwise, output is suppressed and only validation check occurs.
+ Otherwise, messages are available through SamHeader::GetErrorString().
\return \c true if SAM header is well-formed
*/
bool SamHeader::IsValid(bool verbose) const {
+
SamHeaderValidator validator(*this);
- return validator.Validate(verbose);
+
+ // if SAM header is valid, return success
+ if ( validator.Validate() )
+ return true;
+
+ // otherwiser
+ else {
+
+ // print messages to stderr
+ if ( verbose )
+ validator.PrintMessages(std::cerr);
+
+ // or catch in local error string
+ else {
+ stringstream errorStream("");
+ validator.PrintMessages(errorStream);
+ m_errorString = errorStream.str();
+ }
+ return false;
+ }
}
/*! \fn void SamHeader::SetHeaderText(const std::string& headerText)
// clear prior data
Clear();
- // parse header text into data
- SamFormatParser parser(*this);
- parser.Parse(headerText);
+ try {
+ SamFormatParser parser(*this);
+ parser.Parse(headerText);
+ } catch ( BamException& e ) {
+
+ // clear anything parsed so far
+ // no telling what's valid and what's partially parsed
+ Clear();
+
+ // set error string
+ m_errorString = e.what();
+ }
}
/*! \fn std::string SamHeader::ToString(void) const
// SamHeader.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 18 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides direct read/write access to the SAM header data fields.
// ***************************************************************************
// query/modify entire SamHeader
void Clear(void); // clears all header contents
+ std::string GetErrorString(void) const;
+ bool HasError(void) const;
bool IsValid(bool verbose = false) const; // returns true if SAM header is well-formed
void SetHeaderText(const std::string& headerText); // replaces data fields with contents of SAM-formatted text
std::string ToString(void) const; // returns the printable, SAM-formatted header text
// convenience query methods
- bool HasVersion(void) const; // returns true if header contains format version entry
- bool HasSortOrder(void) const; // returns true if header contains sort order entry
- bool HasGroupOrder(void) const; // returns true if header contains group order entry
- bool HasSequences(void) const; // returns true if header contains any sequence entries
- bool HasReadGroups(void) const; // returns true if header contains any read group entries
- bool HasPrograms(void) const; // returns true if header contains any program record entries
- bool HasComments(void) const; // returns true if header contains comments
+ bool HasVersion(void) const; // returns true if header contains format version entry
+ bool HasSortOrder(void) const; // returns true if header contains sort order entry
+ bool HasGroupOrder(void) const; // returns true if header contains group order entry
+ bool HasSequences(void) const; // returns true if header contains any sequence entries
+ bool HasReadGroups(void) const; // returns true if header contains any read group entries
+ bool HasPrograms(void) const; // returns true if header contains any program record entries
+ bool HasComments(void) const; // returns true if header contains comments
// --------------
// data members
// --------------
// header metadata (@HD line)
- std::string Version; // VN:<Version> *Required for valid SAM header, if @HD record is present*
- std::string SortOrder; // SO:<SortOrder>
- std::string GroupOrder; // GO:<GroupOrder>
+ std::string Version; // VN:<Version> *Required, if @HD record is present*
+ std::string SortOrder; // SO:<SortOrder>
+ std::string GroupOrder; // GO:<GroupOrder>
// header sequences (@SQ entries)
SamSequenceDictionary Sequences;
// header comments (@CO entries)
std::vector<std::string> Comments;
+
+ private:
+ mutable std::string m_errorString;
};
} // namespace BamTools
--- /dev/null
+// ***************************************************************************
+// BamException_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 5 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#include <api/internal/BamException_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+const string BamException::SEPARATOR = ": ";
--- /dev/null
+// ***************************************************************************
+// BamException_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 6 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides a basic exception class for BamTools internals
+// ***************************************************************************
+
+#ifndef BAMEXCEPTION_P_H
+#define BAMEXCEPTION_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <exception>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamException : public std::exception {
+
+ public:
+ inline BamException(const std::string& where, const std::string& message)
+ : std::exception()
+ , m_errorString(where + SEPARATOR + message)
+ { }
+
+ inline ~BamException(void) throw() { }
+
+ inline const char* what(void) const throw() {
+ return m_errorString.c_str();
+ }
+
+ private:
+ std::string m_errorString;
+ static const std::string SEPARATOR;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMEXCEPTION_P_H
// BamHeader_p.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for handling BAM headers.
// ***************************************************************************
#include <api/BamAux.h>
#include <api/BamConstants.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamHeader_p.h>
#include <api/internal/BgzfStream_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
-#include <cstdio>
#include <cstdlib>
#include <cstring>
-#include <iostream>
using namespace std;
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+bool isValidMagicNumber(const char* buffer) {
+ return ( strncmp(buffer, Constants::BAM_HEADER_MAGIC,
+ Constants::BAM_HEADER_MAGIC_LENGTH) == 0 );
+}
+
+// --------------------------
+// BamHeader implementation
+// --------------------------
+
// ctor
BamHeader::BamHeader(void) { }
BamHeader::~BamHeader(void) { }
// reads magic number from BGZF stream, returns true if valid
-bool BamHeader::CheckMagicNumber(BgzfStream* stream) {
+void BamHeader::CheckMagicNumber(BgzfStream* stream) {
// try to read magic number
char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
- if ( stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH) != (int)Constants::BAM_HEADER_MAGIC_LENGTH ) {
- fprintf(stderr, "BamHeader ERROR: could not read magic number\n");
- return false;
- }
+ const size_t numBytesRead = stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH);
+ if ( numBytesRead != (int)Constants::BAM_HEADER_MAGIC_LENGTH )
+ throw BamException("BamHeader::CheckMagicNumber", "could not read magic number");
// validate magic number
- if ( strncmp(buffer, Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH) != 0 ) {
- fprintf(stderr, "BamHeader ERROR: invalid magic number\n");
- return false;
- }
-
- // all checks out
- return true;
+ if ( !isValidMagicNumber(buffer) )
+ throw BamException("BamHeader::CheckMagicNumber", "invalid magic number");
}
// clear SamHeader data
}
// load BAM header ('magic number' and SAM header text) from BGZF stream
-// returns true if all OK
-bool BamHeader::Load(BgzfStream* stream) {
+void BamHeader::Load(BgzfStream* stream) {
- // cannot load if invalid stream
- if ( stream == 0 )
- return false;
+ // read & check magic number
+ CheckMagicNumber(stream);
- // cannot load if magic number is invalid
- if ( !CheckMagicNumber(stream) )
- return false;
-
- // cannot load header if cannot read header length
+ // read header (length, then actual text)
uint32_t length(0);
- if ( !ReadHeaderLength(stream, length) )
- return false;
-
- // cannot load header if cannot read header text
- if ( !ReadHeaderText(stream, length) )
- return false;
-
- // otherwise, everything OK
- return true;
+ ReadHeaderLength(stream, length);
+ ReadHeaderText(stream, length);
}
// reads SAM header text length from BGZF stream, stores it in @length
-// returns read success/fail status
-bool BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+void BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
- // attempt to read BAM header text length
+ // read BAM header text length
char buffer[sizeof(uint32_t)];
- if ( stream->Read(buffer, sizeof(uint32_t)) != sizeof(uint32_t) ) {
- fprintf(stderr, "BamHeader ERROR: could not read header length\n");
- return false;
- }
+ const size_t numBytesRead = stream->Read(buffer, sizeof(uint32_t));
+ if ( numBytesRead != sizeof(uint32_t) )
+ throw BamException("BamHeader::ReadHeaderLength", "could not read header length");
- // convert char buffer to length, return success
+ // convert char buffer to length
length = BamTools::UnpackUnsignedInt(buffer);
if ( BamTools::SystemIsBigEndian() )
BamTools::SwapEndian_32(length);
- return true;
}
// reads SAM header text from BGZF stream, stores in SamHeader object
-// returns read success/fail status
-bool BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+void BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
- // set up destination buffer
+ // read header text
char* headerText = (char*)calloc(length + 1, 1);
+ const size_t bytesRead = stream->Read(headerText, length);
- // attempt to read header text
- const unsigned bytesRead = stream->Read(headerText, length);
- const bool readOk = ( bytesRead == length );
- if ( readOk )
- m_header.SetHeaderText( (string)((const char*)headerText) );
- else
- fprintf(stderr, "BamHeader ERROR: could not read header text\n");
+ // if error reading, clean up buffer & throw
+ if ( bytesRead != length ) {
+ free(headerText);
+ throw BamException("BamHeader::ReadHeaderText", "could not read header text");
+ }
- // clean up calloc-ed temp variable (on success or fail)
+ // otherwise, text was read OK
+ // store & cleanup
+ m_header.SetHeaderText( (string)((const char*)headerText) );
free(headerText);
-
- // return read success
- return readOk;
}
// returns *copy* of SamHeader data object
// BamHeader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 26 January 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for handling BAM headers.
// ***************************************************************************
bool IsValid(void) const;
// load BAM header ('magic number' and SAM header text) from BGZF stream
// returns true if all OK
- bool Load(BgzfStream* stream);
+ void Load(BgzfStream* stream);
// returns (editable) copy of SamHeader data object
SamHeader ToSamHeader(void) const;
// returns SAM-formatted string of header data
// internal methods
private:
- // reads magic number from BGZF stream, returns true if valid
- bool CheckMagicNumber(BgzfStream* stream);
+ // reads magic number from BGZF stream
+ void CheckMagicNumber(BgzfStream* stream);
// reads SAM header length from BGZF stream, stores it in @length
- // returns read success/fail status
- bool ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+ void ReadHeaderLength(BgzfStream* stream, uint32_t& length);
// reads SAM header text from BGZF stream, stores in SamHeader object
- // returns read success/fail status
- bool ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+ void ReadHeaderText(BgzfStream* stream, const uint32_t& length);
// data members
private:
// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides interface for generating BamIndex implementations
// ***************************************************************************
#include <api/internal/BamToolsIndex_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
-
-#include <cstdio>
using namespace std;
// generates index filename from BAM filename (depending on requested type)
case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
default :
- cerr << "BamIndexFactory ERROR: unknown index type" << type << endl;
return string();
}
}
case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
default :
- cerr << "BamIndexFactory ERROR: unknown index type " << type << endl;
return 0;
}
}
return string();
// look for last dot in filename
- size_t lastDotPosition = filename.find_last_of('.');
+ const size_t lastDotPosition = filename.find_last_of('.');
// if none found, return empty string
if ( lastDotPosition == string::npos )
const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
const BamIndex::IndexType& preferredType)
{
+ // skip if BAM filename provided is empty
+ if ( bamFilename.empty() )
+ return string();
+
// try to find index of preferred type first
// return index filename if found
string indexFilename = CreateIndexFilename(bamFilename, preferredType);
template <typename Compare>
inline void MultiMerger<Compare>::Add(MergeItem item) {
+
+ // N.B. - any future custom Compare types must define this method
+ // see algorithms/Sort.h
+
if ( CompareType::UsesCharData() )
item.Alignment->BuildCharData();
m_data.insert(item);
// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 3 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
// dtor
BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
-
- // close all open BAM readers (& clean up cache)
Close();
}
// close all BAM files
-void BamMultiReaderPrivate::Close(void) {
- CloseFiles( Filenames() );
+bool BamMultiReaderPrivate::Close(void) {
+
+ m_errorString.clear();
+
+ if ( CloseFiles(Filenames()) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("error encountered while closing all files: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Close", message);
+ return false;
+ }
}
// close requested BAM file
-void BamMultiReaderPrivate::CloseFile(const string& filename) {
+bool BamMultiReaderPrivate::CloseFile(const string& filename) {
+
+ m_errorString.clear();
+
vector<string> filenames(1, filename);
- CloseFiles(filenames);
+ if ( CloseFiles(filenames) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("error while closing file: ") + filename + "\n" + currentError;
+ SetErrorString("BamMultiReader::CloseFile", message);
+ return false;
+ }
}
// close requested BAM files
-void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+bool BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {
+
+ bool errorsEncountered = false;
+ m_errorString.clear();
// iterate over filenames
vector<string>::const_iterator filesIter = filenames.begin();
m_alignmentCache->Remove(reader);
// clean up reader & its alignment
- reader->Close();
+ if ( !reader->Close() ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
delete reader;
reader = 0;
delete m_alignmentCache;
m_alignmentCache = 0;
}
+
+ // return whether all readers closed OK
+ return !errorsEncountered;
}
// creates index files for BAM files that don't have them
bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {
- bool result = true;
+ bool errorsEncountered = false;
+ m_errorString.clear();
// iterate over readers
vector<MergeItem>::iterator itemIter = m_readers.begin();
if ( reader == 0 ) continue;
// if reader doesn't have an index, create one
- if ( !reader->HasIndex() )
- result &= reader->CreateIndex(type);
+ if ( !reader->HasIndex() ) {
+ if ( !reader->CreateIndex(type) ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
}
- return result;
+ // check for errors encountered before returning success/fail
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("error while creating index files: ") + "\n" + currentError;
+ SetErrorString("BamMultiReader::CreateIndexes", message);
+ return false;
+ } else
+ return true;
}
IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
return filenames;
}
+string BamMultiReaderPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
SamHeader BamMultiReaderPrivate::GetHeader(void) const {
const string& text = GetHeaderText();
return SamHeader(text);
int BamMultiReaderPrivate::GetReferenceCount(void) const {
// handle empty multireader
- if ( m_readers.empty() )
- return 0;
+ if ( m_readers.empty() ) return 0;
// return reference count from first reader
const MergeItem& item = m_readers.front();
const BamReader* reader = item.Reader;
- if ( reader ) return reader->GetReferenceCount();
-
- // invalid reader
- return 0;
+ if ( reader == 0 ) return 0;
+ else
+ return reader->GetReferenceCount();
}
// returns vector of reference objects
const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
// handle empty multireader
- if ( m_readers.empty() )
- return RefVector();
+ if ( m_readers.empty() ) return RefVector();
// return reference data from first BamReader
const MergeItem& item = m_readers.front();
const BamReader* reader = item.Reader;
- if ( reader ) return reader->GetReferenceData();
-
- // invalid reader
- return RefVector();
+ if ( reader == 0 ) return RefVector();
+ else
+ return reader->GetReferenceData();
}
// returns refID from reference name
int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
// handle empty multireader
- if ( m_readers.empty() )
- return -1;
+ if ( m_readers.empty() ) return -1;
// return reference ID from first BamReader
const MergeItem& item = m_readers.front();
const BamReader* reader = item.Reader;
- if ( reader ) return reader->GetReferenceID(refName);
-
- // invalid reader
- return -1;
+ if ( reader == 0 ) return -1;
+ else
+ return reader->GetReferenceID(refName);
}
// ---------------------------------------------------------------------------------------
BamReader* reader = item.Reader;
if ( reader == 0 ) continue;
- // attempt jump() on each
- if ( !reader->Jump(refID, position) ) {
- cerr << "BamMultiReader ERROR: could not jump " << reader->GetFilename()
- << " to " << refID << ":" << position << endl;
- }
+ // jump in each BamReader to position of interest
+ reader->Jump(refID, position);
}
// returns status of cache update
// locate (& load) index files for BAM readers that don't already have one loaded
bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) {
- bool result = true;
+ bool errorsEncountered = false;
+ m_errorString.clear();
// iterate over readers
vector<MergeItem>::iterator readerIter = m_readers.begin();
if ( reader == 0 ) continue;
// if reader has no index, try to locate one
- if ( !reader->HasIndex() )
- result &= reader->LocateIndex(preferredType);
+ if ( !reader->HasIndex() ) {
+ if ( !reader->LocateIndex(preferredType) ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append(reader->GetErrorString());
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
+ }
}
- return result;
+ // check for errors encountered before returning success/fail
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("error while locating index files: ") + "\n" + currentError;
+ SetErrorString("BamMultiReader::LocatingIndexes", message);
+ return false;
+ } else
+ return true;
}
// opens BAM files
bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
- bool openedOk = true;
-
- // put all current readers back at beginning
- openedOk &= Rewind();
+ m_errorString.clear();
// put all current readers back at beginning (refreshes alignment cache)
- Rewind();
+ if ( !Rewind() ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to rewind existing readers: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
// iterate over filenames
+ bool errorsEncountered = false;
vector<string>::const_iterator filenameIter = filenames.begin();
vector<string>::const_iterator filenameEnd = filenames.end();
for ( ; filenameIter != filenameEnd; ++filenameIter ) {
if ( readerOpened )
m_readers.push_back( MergeItem(reader, new BamAlignment) );
- // otherwise clean up invalid reader
- else delete reader;
+ // otherwise store error & clean up invalid reader
+ else {
+ m_errorString.append(1, '\t');
+ m_errorString += string("unable to open file: ") + filename;
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
- // update method return status
- openedOk &= readerOpened;
+ delete reader;
+ reader = 0;
+ }
}
- // if more than one reader open, check for consistency
- if ( m_readers.size() > 1 )
- openedOk &= ValidateReaders();
+ // check for errors while opening
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to open all files: \t\n") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
- // update alignment cache
- openedOk &= UpdateAlignmentCache();
+ // check for BAM file consistency
+ if ( !ValidateReaders() ) {
+ const string currentError = m_errorString;
+ const string message = string("unable to open inconsistent files: \t\n") + currentError;
+ SetErrorString("BamMultiReader::Open", message);
+ return false;
+ }
- // return success
- return openedOk;
+ // update alignment cache
+ return UpdateAlignmentCache();
}
bool BamMultiReaderPrivate::OpenFile(const std::string& filename) {
vector<string> filenames(1, filename);
- return Open(filenames);
+ if ( Open(filenames) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("could not open file: ") + filename + "\n\t" + currentError;
+ SetErrorString("BamMultiReader::OpenFile", message);
+ return false;
+ }
}
bool BamMultiReaderPrivate::OpenIndexes(const vector<string>& indexFilenames) {
// first reader without an index?
// make sure same number of index filenames as readers
- if ( m_readers.size() != indexFilenames.size() )
+ if ( m_readers.size() != indexFilenames.size() ) {
+ const string message("size of index file list does not match current BAM file count");
+ SetErrorString("BamMultiReader::OpenIndexes", message);
return false;
+ }
- // init result flag
- bool result = true;
+ bool errorsEncountered = false;
+ m_errorString.clear();
// iterate over BamReaders
vector<string>::const_iterator indexFilenameIter = indexFilenames.begin();
// open index filename on reader
if ( reader ) {
const string& indexFilename = (*indexFilenameIter);
- result &= reader->OpenIndex(indexFilename);
+ if ( !reader->OpenIndex(indexFilename) ) {
+ m_errorString.append(1, '\t');
+ m_errorString += reader->GetErrorString();
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
}
// increment filename iterator, skip if no more index files to open
break;
}
- // TODO: any validation needed here??
-
// return success/fail
- return result;
+ if ( errorsEncountered ) {
+ const string currentError = m_errorString;
+ const string message = string("could not open all index files: \n\t") + currentError;
+ SetErrorString("BamMultiReader::OpenIndexes", message);
+ return false;
+ } else
+ return true;
}
-
bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
// skip if no alignments available
// load next alignment from reader & store in cache
SaveNextAlignment(reader, alignment);
-
- // return success
return true;
}
// attempt to rewind files
if ( !RewindReaders() ) {
- cerr << "BamMultiReader ERROR: could not rewind file(s) successfully";
+ const string currentError = m_errorString;
+ const string message = string("could not rewind readers: \n\t") + currentError;
+ SetErrorString("BamMultiReader::Rewind", message);
return false;
}
// returns BAM file pointers to beginning of alignment data
bool BamMultiReaderPrivate::RewindReaders(void) {
- bool result = true;
+ m_errorString.clear();
+ bool errorsEncountered = false;
// iterate over readers
vector<MergeItem>::iterator readerIter = m_readers.begin();
if ( reader == 0 ) continue;
// attempt rewind on BamReader
- result &= reader->Rewind();
+ if ( !reader->Rewind() ) {
+ m_errorString.append(1, '\t');
+ m_errorString.append( reader->GetErrorString() );
+ m_errorString.append(1, '\n');
+ errorsEncountered = true;
+ }
}
- return result;
+ return !errorsEncountered;
}
void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
// if can read alignment from reader, store in cache
- // N.B. - lazy building of alignment's char data,
- // only populated on demand by sorting merger or client call to GetNextAlignment()
+ //
+ // N.B. - lazy building of alignment's char data - populated only:
+ // automatically by alignment cache to maintain its sorting OR
+ // on demand from client call to future call to GetNextAlignment()
+
if ( reader->GetNextAlignmentCore(*alignment) )
- m_alignmentCache->Add(MergeItem(reader, alignment));
+ m_alignmentCache->Add( MergeItem(reader, alignment) );
+}
+
+void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
+ static const string SEPARATOR = ": ";
+ m_errorString = where + SEPARATOR + what;
}
// sets the index caching mode on the readers
BamReader* reader = item.Reader;
if ( reader == 0 ) continue;
- // attempt to set BamReader's region of interest
- if ( !reader->SetRegion(region) ) {
- cerr << "BamMultiReader WARNING: could not jump " << reader->GetFilename() << " to "
- << region.LeftRefID << ":" << region.LeftPosition << ".."
- << region.RightRefID << ":" << region.RightPosition << endl;
- }
+ // set region of interest
+ reader->SetRegion(region);
}
// return status of cache update
if ( m_alignmentCache == 0 ) {
m_alignmentCache = CreateAlignmentCache();
if ( m_alignmentCache == 0 ) {
- // set error string
+ SetErrorString("BamMultiReader::UpdateAlignmentCache", "unable to create new alignment cache");
return false;
}
}
// the multireader is undefined, so we force program exit.
bool BamMultiReaderPrivate::ValidateReaders(void) const {
- // skip if no readers opened
- if ( m_readers.empty() )
+ m_errorString.clear();
+
+ // skip if 0 or 1 readers opened
+ if ( m_readers.empty() || (m_readers.size() == 1) )
return true;
// retrieve first reader
// check compatible sort order
if ( currentReaderSortOrder != firstReaderSortOrder ) {
- // error string
- cerr << "BamMultiReader ERROR: mismatched sort order in " << reader->GetFilename()
- << ", expected " << firstReaderSortOrder
- << ", but found " << currentReaderSortOrder << endl;
+ const string message = string("mismatched sort order in ") + reader->GetFilename() +
+ ", expected " + firstReaderSortOrder +
+ ", but found " + currentReaderSortOrder;
+ SetErrorString("BamMultiReader::ValidateReaders", message);
return false;
}
if ( (currentReaderRefCount != firstReaderRefCount) ||
(firstReaderRefSize != currentReaderRefSize) )
{
- cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename()
- << " expected " << firstReaderRefCount
- << " reference sequences but only found " << currentReaderRefCount << endl;
+ stringstream s("");
+ s << "mismatched reference count in " << reader->GetFilename()
+ << ", expected " << firstReaderRefCount
+ << ", but found " << currentReaderRefCount;
+ SetErrorString("BamMultiReader::ValidateReaders", s.str());
return false;
}
if ( (firstRef.RefName != currentRef.RefName) ||
(firstRef.RefLength != currentRef.RefLength) )
{
- cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename()
- << " expected: " << endl;
+ stringstream s("");
+ s << "mismatched references found in" << reader->GetFilename()
+ << "expected: " << endl;
// print first reader's reference data
RefVector::const_iterator refIter = firstReaderRefData.begin();
RefVector::const_iterator refEnd = firstReaderRefData.end();
for ( ; refIter != refEnd; ++refIter ) {
const RefData& entry = (*refIter);
- cerr << entry.RefName << " " << entry.RefLength << endl;
+ stringstream s("");
+ s << entry.RefName << " " << endl;
}
- cerr << "but found: " << endl;
+ s << "but found: " << endl;
// print current reader's reference data
refIter = currentReaderRefData.begin();
refEnd = currentReaderRefData.end();
for ( ; refIter != refEnd; ++refIter ) {
const RefData& entry = (*refIter);
- cerr << entry.RefName << " " << entry.RefLength << endl;
+ s << entry.RefName << " " << entry.RefLength << endl;
}
+ SetErrorString("BamMultiReader::ValidateReaders", s.str());
return false;
}
// BamMultiReader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 3 October 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
public:
// file operations
- void Close(void);
- void CloseFile(const std::string& filename);
- void CloseFiles(const std::vector<std::string>& filenames);
+ bool Close(void);
+ bool CloseFile(const std::string& filename);
const std::vector<std::string> Filenames(void) const;
bool Jump(int refID, int position = 0);
bool Open(const std::vector<std::string>& filenames);
bool OpenIndexes(const std::vector<std::string>& indexFilenames);
void SetIndexCacheMode(const BamIndex::IndexCacheMode mode);
+ // error handling
+ std::string GetErrorString(void) const;
+
// 'internal' methods
public:
+ bool CloseFiles(const std::vector<std::string>& filenames);
IMultiMerger* CreateAlignmentCache(void) const;
bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
bool RewindReaders(void);
void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
+ void SetErrorString(const std::string& where, const std::string& what) const; //
bool UpdateAlignmentCache(void);
bool ValidateReaders(void) const;
public:
std::vector<MergeItem> m_readers;
IMultiMerger* m_alignmentCache;
+ mutable std::string m_errorString;
};
} // namespace Internal
// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011(DB)
+// Last modified: 6 October 2011(DB)
// ---------------------------------------------------------------------------
// Manages random access operations in a BAM file
// **************************************************************************
#include <api/BamIndex.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamRandomAccessController_p.h>
#include <api/internal/BamReader_p.h>
#include <api/internal/BamIndexFactory_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
-#include <iostream>
+#include <cassert>
+#include <sstream>
using namespace std;
BamRandomAccessController::BamRandomAccessController(void)
}
void BamRandomAccessController::ClearIndex(void) {
- delete m_index;
- m_index = 0;
+ if ( m_index ) {
+ delete m_index;
+ m_index = 0;
+ }
}
void BamRandomAccessController::ClearRegion(void) {
}
bool BamRandomAccessController::CreateIndex(BamReaderPrivate* reader,
- const BamIndex::IndexType& type) {
-
+ const BamIndex::IndexType& type)
+{
// skip if reader is invalid
- if ( reader == 0 )
+ assert(reader);
+ if ( !reader->IsOpen() ) {
+ SetErrorString("BamRandomAccessController::CreateIndex",
+ "cannot create index for unopened reader");
return false;
+ }
// create new index of requested type
BamIndex* newIndex = BamIndexFactory::CreateIndexOfType(type, reader);
if ( newIndex == 0 ) {
- cerr << "BamRandomAccessController ERROR: could not create index of type " << type << endl;
+ stringstream s("");
+ s << "could not create index of type: " << type;
+ SetErrorString("BamRandomAccessController::CreateIndex", s.str());
return false;
}
// attempt to build index from current BamReader file
if ( !newIndex->Create() ) {
- cerr << "BamRandomAccessController ERROR: could not create index for BAM file: "
- << reader->Filename() << endl;
+ const string indexError = newIndex->GetErrorString();
+ const string message = "could not create index: \n\t" + indexError;
+ SetErrorString("BamRandomAccessController::CreateIndex", message);
return false;
}
return true;
}
+string BamRandomAccessController::GetErrorString(void) const {
+ return m_errorString;
+}
+
bool BamRandomAccessController::HasIndex(void) const {
return ( m_index != 0 );
}
const BamIndex::IndexType& preferredType)
{
// look up index filename, deferring to preferredType if possible
+ assert(reader);
const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);
// if no index file found (of any type)
if ( indexFilename.empty() ) {
- cerr << "BamRandomAccessController WARNING: "
- << "could not find index file for BAM: "
- << reader->Filename() << endl;
+ const string message = string("could not find index file for:") + reader->Filename();
+ SetErrorString("BamRandomAccessController::LocateIndex", message);
return false;
}
// attempt create new index of type based on filename
BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);
if ( index == 0 ) {
- cerr << "BamRandomAccessController ERROR: could not create index for file: " << indexFilename << endl;
+ const string message = string("could not open index file: ") + indexFilename;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
return false;
}
// attempt to load data from index file
if ( !index->Load(indexFilename) ) {
- cerr << "BamRandomAccessController ERROR: could not load index data from file: " << indexFilename << endl;
+ const string indexError = index->GetErrorString();
+ const string message = string("could not load index data from file: ") + indexFilename +
+ "\n\t" + indexError;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
return false;
}
return m_hasAlignmentsInRegion;
}
+void BamRandomAccessController::SetErrorString(const string& where, const string& what) {
+ m_errorString = where + ": " + what;
+}
+
void BamRandomAccessController::SetIndex(BamIndex* index) {
if ( m_index )
ClearIndex();
m_index->SetCacheMode(mode);
}
-bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,
- const BamRegion& region,
- const int& referenceCount)
-{
+bool BamRandomAccessController::SetRegion(const BamRegion& region, const int& referenceCount) {
+
// store region
m_region = region;
// cannot jump when no index is available
- if ( !HasIndex() )
+ if ( !HasIndex() ) {
+ SetErrorString("BamRandomAccessController", "cannot jump if no index data available");
return false;
+ }
// adjust region as necessary to reflect where data actually begins
AdjustRegion(referenceCount);
// if no data present, return true
// * Not an error, but future attempts to access alignments in this region will not return data
// Returning true is useful in a BamMultiReader setting where some BAM files may
- // lack alignments in regions where other BAMs do have data.
+ // lack alignments in regions where other files still have data available.
if ( !m_hasAlignmentsInRegion )
return true;
// This covers 'corner case' where a region is requested that lies beyond the last
// alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]
// will not return data. BamMultiReader will still be able to successfully pull alignments
- // from a region from multiple files even if one or more have no data.
- return m_index->Jump(m_region, &m_hasAlignmentsInRegion);
+ // from a region from other files even if this one has no data.
+ if ( !m_index->Jump(m_region, &m_hasAlignmentsInRegion) ) {
+ const string indexError = m_index->GetErrorString();
+ const string message = string("could not set region\n\t") + indexError;
+ SetErrorString("BamRandomAccessController::OpenIndex", message);
+ return false;
+ }
+ else
+ return true;
}
// BamRandomAccessController_p.h (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 24 February 2011(DB)
+// Last modified: 6 October 2011(DB)
// ---------------------------------------------------------------------------
// Manages random access operations in a BAM file
// ***************************************************************************
BamRandomAccessController(void);
~BamRandomAccessController(void);
- // general interface
+ // BamRandomAccessController interface
public:
- void Close(void);
- // index operations
- public:
- //
+ // index methods
void ClearIndex(void);
bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
bool HasIndex(void) const;
void SetIndex(BamIndex* index);
void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
- // region operations
- public:
+ // region methods
void ClearRegion(void);
bool HasRegion(void) const;
RegionState AlignmentState(const BamAlignment& alignment) const;
bool RegionHasAlignments(void) const;
- bool SetRegion(BamReaderPrivate* reader,
- const BamRegion& region,
- const int& referenceCount);
+ bool SetRegion(const BamRegion& region, const int& referenceCount);
- // 'internal' methods
- public:
+ // general methods
+ void Close(void);
+ std::string GetErrorString(void) const;
+
+ // internal methods
+ private:
// adjusts requested region if necessary (depending on where data actually begins)
void AdjustRegion(const int& referenceCount);
+ // error-string handling
+ void SetErrorString(const std::string& where, const std::string& what);
// data members
private:
// index data
- BamIndex* m_index; // owns index, not a copy - responsible for deleting
+ BamIndex* m_index; // owns the index, not a copy - responsible for deleting
BamIndex::IndexCacheMode m_indexCacheMode;
// region data
BamRegion m_region;
bool m_hasAlignmentsInRegion;
+
+ // general data
+ std::string m_errorString;
};
} // namespace Internal
// BamReader_p.cpp (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 May 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading BAM files
// ***************************************************************************
#include <api/BamConstants.h>
#include <api/BamReader.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamHeader_p.h>
#include <api/internal/BamRandomAccessController_p.h>
#include <api/internal/BamReader_p.h>
using namespace BamTools::Internal;
#include <algorithm>
+#include <cassert>
#include <iostream>
#include <iterator>
#include <vector>
}
// closes the BAM file
-void BamReaderPrivate::Close(void) {
+bool BamReaderPrivate::Close(void) {
- // clear header & reference data
+ // clear BAM metadata
m_references.clear();
m_header.Clear();
- // close internal
- m_randomAccessController.Close();
- m_stream.Close();
-
// clear filename
m_filename.clear();
+
+ // close random access controller
+ m_randomAccessController.Close();
+
+ // if stream is open, attempt close
+ if ( IsOpen() ) {
+ try {
+ m_stream.Close();
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error closing BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Close", message);
+ return false;
+ }
+ }
+
+ // return success
+ return true;
}
// creates an index file of requested type on current BAM file
bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
- if ( !IsOpen() ) return false;
- return m_randomAccessController.CreateIndex(this, type);
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+ return false;
+ }
+
+ // attempt to create index
+ if ( m_randomAccessController.CreateIndex(this, type) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not create index: \n\t") + bracError;
+ SetErrorString("BamReader::CreateIndex", message);
+ return false;
+ }
}
// return path & filename of current BAM file
return m_filename;
}
+string BamReaderPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
// return header data as std::string
string BamReaderPrivate::GetHeaderText(void) const {
return m_header.ToString();
alignment.Filename = m_filename;
// return success/failure of parsing char data
- return alignment.BuildCharData();
+ if ( alignment.BuildCharData() )
+ return true;
+ else {
+ const string alError = alignment.GetErrorString();
+ const string message = string("could not populate alignment data: \n\t") + alError;
+ SetErrorString("BamReader::GetNextAlignment", message);
+ return false;
+ }
}
// no valid alignment found
// useful for operations requiring ONLY positional or other alignment-related information
bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
- // skip if region is set but has no alignments
- if ( m_randomAccessController.HasRegion() &&
- !m_randomAccessController.RegionHasAlignments() )
- {
- return false;
- }
-
- // if can't read next alignment
- if ( !LoadNextAlignment(alignment) )
- return false;
-
- // check alignment's region-overlap state
- BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
-
- // if alignment starts after region, no need to keep reading
- if ( state == BamRandomAccessController::AfterRegion )
- return false;
+ try {
- // read until overlap is found
- while ( state != BamRandomAccessController::OverlapsRegion ) {
+ // skip if region is set but has no alignments
+ if ( m_randomAccessController.HasRegion() &&
+ !m_randomAccessController.RegionHasAlignments() )
+ {
+ return false;
+ }
// if can't read next alignment
if ( !LoadNextAlignment(alignment) )
return false;
// check alignment's region-overlap state
- state = m_randomAccessController.AlignmentState(alignment);
+ BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
// if alignment starts after region, no need to keep reading
if ( state == BamRandomAccessController::AfterRegion )
return false;
- }
- // if we get here, we found the next 'valid' alignment
- // (e.g. overlaps current region if one was set, simply the next alignment if not)
- alignment.SupportData.HasCoreOnly = true;
- return true;
+ // read until overlap is found
+ while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+ // if can't read next alignment
+ if ( !LoadNextAlignment(alignment) )
+ return false;
+
+ // check alignment's region-overlap state
+ state = m_randomAccessController.AlignmentState(alignment);
+
+ // if alignment starts after region, no need to keep reading
+ if ( state == BamRandomAccessController::AfterRegion )
+ return false;
+ }
+
+ // if we get here, we found the next 'valid' alignment
+ // (e.g. overlaps current region if one was set, simply the next alignment if not)
+ alignment.SupportData.HasCoreOnly = true;
+ return true;
+
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+ SetErrorString("BamReader::GetNextAlignmentCore", message);
+ return false;
+ }
}
int BamReaderPrivate::GetReferenceCount(void) const {
}
// load BAM header data
-bool BamReaderPrivate::LoadHeaderData(void) {
- return m_header.Load(&m_stream);
+void BamReaderPrivate::LoadHeaderData(void) {
+ m_header.Load(&m_stream);
}
// populates BamAlignment with alignment data under file pointer, returns success/fail
m_stream.Read(buffer, sizeof(uint32_t));
alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
- if ( alignment.SupportData.BlockLength == 0 ) return false;
+ if ( alignment.SupportData.BlockLength == 0 )
+ return false;
// read in core alignment data, make sure the right size of data was read
char x[Constants::BAM_CORE_SIZE];
// read in character data - make sure proper data size was read
bool readCharDataOK = false;
const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
- char* allCharData = (char*)calloc(sizeof(char), dataLength);
+ RaiiBuffer allCharData(dataLength);
- if ( m_stream.Read(allCharData, dataLength) == (signed int)dataLength ) {
+ if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
// store 'allCharData' in supportData structure
- alignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);
+ alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
// set success flag
readCharDataOK = true;
// need to calculate this here so that BamAlignment::GetEndPosition() performs correctly,
// even when GetNextAlignmentCore() is called
const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
- uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
+ uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
CigarOp op;
alignment.CigarData.clear();
alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
}
}
- // clean up & return parsing success/failure
- free(allCharData);
+ // return success/failure
return readCharDataOK;
}
m_stream.Read(buffer, sizeof(uint32_t));
uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
- char* refName = (char*)calloc(refNameLength, 1);
+ RaiiBuffer refName(refNameLength);
// get reference name and reference sequence length
- m_stream.Read(refName, refNameLength);
+ m_stream.Read(refName.Buffer, refNameLength);
m_stream.Read(buffer, sizeof(int32_t));
int32_t refLength = BamTools::UnpackSignedInt(buffer);
if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
// store data for reference
RefData aReference;
- aReference.RefName = (string)((const char*)refName);
+ aReference.RefName = (string)((const char*)refName.Buffer);
aReference.RefLength = refLength;
m_references.push_back(aReference);
-
- // clean up calloc-ed temp variable
- free(refName);
}
// return success
}
bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
- return m_randomAccessController.LocateIndex(this, preferredType);
+
+ if ( m_randomAccessController.LocateIndex(this, preferredType) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not locate index: \n\t") + bracError;
+ SetErrorString("BamReader::LocateIndex", message);
+ return false;
+ }
}
// opens BAM file (and index)
bool BamReaderPrivate::Open(const string& filename) {
- // close current BAM file if open
- if ( m_stream.IsOpen )
- Close();
+ bool result;
- // attempt to open BgzfStream for reading
- if ( !m_stream.Open(filename, "rb") ) {
- cerr << "BamReader ERROR: Could not open BGZF stream for " << filename << endl;
- return false;
- }
+ try {
- // attempt to load header data
- if ( !LoadHeaderData() ) {
- cerr << "BamReader ERROR: Could not load header data for " << filename << endl;
+ // make sure we're starting with fresh state
Close();
- return false;
- }
- // attempt to load reference data
- if ( !LoadReferenceData() ) {
- cerr << "BamReader ERROR: Could not load reference data for " << filename << endl;
- Close();
+ // open BgzfStream
+ m_stream.Open(filename, "rb");
+ assert(m_stream);
+
+ // load BAM metadata
+ LoadHeaderData();
+ LoadReferenceData();
+
+ // store filename & offset of first alignment
+ m_filename = filename;
+ m_alignmentsBeginOffset = m_stream.Tell();
+
+ // set flag
+ result = true;
+
+ } catch ( BamException& e ) {
+ const string error = e.what();
+ const string message = string("could not open file: ") + filename +
+ "\n\t" + error;
+ SetErrorString("BamReader::Open", message);
return false;
}
- // if all OK, store filename & offset of first alignment
- m_filename = filename;
- m_alignmentsBeginOffset = m_stream.Tell();
-
- // return success
- return true;
+ // return success/failure
+ return result;
}
bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
- return m_randomAccessController.OpenIndex(indexFilename, this);
+
+ if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not open index: \n\t") + bracError;
+ SetErrorString("BamReader::OpenIndex", message);
+ return false;
+ }
}
// returns BAM file pointer to beginning of alignment data
bool BamReaderPrivate::Rewind(void) {
- // attempt rewind to first alignment
- if ( !m_stream.Seek(m_alignmentsBeginOffset) )
- return false;
-
- // verify that we can read first alignment
- BamAlignment al;
- if ( !LoadNextAlignment(al) )
- return false;
-
// reset region
m_randomAccessController.ClearRegion();
- // rewind back to beginning of first alignment
- // return success/fail of seek
- return m_stream.Seek(m_alignmentsBeginOffset);
+ // return status of seeking back to first alignment
+ if ( Seek(m_alignmentsBeginOffset) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("could not rewind: \n\t") + currentError;
+ SetErrorString("BamReader::Rewind", message);
+ return false;
+ }
}
bool BamReaderPrivate::Seek(const int64_t& position) {
- return m_stream.Seek(position);
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+ return false;
+ }
+
+ try {
+ m_stream.Seek(position);
+ return true;
+ }
+ catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("could not seek in BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Seek", message);
+ return false;
+ }
+}
+
+void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+ static const string SEPARATOR = ": ";
+ m_errorString = where + SEPARATOR + what;
}
void BamReaderPrivate::SetIndex(BamIndex* index) {
// sets current region & attempts to jump to it
// returns success/failure
bool BamReaderPrivate::SetRegion(const BamRegion& region) {
- return m_randomAccessController.SetRegion(this, region, m_references.size());
+
+ if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not set region: \n\t") + bracError;
+ SetErrorString("BamReader::SetRegion", message);
+ return false;
+ }
}
int64_t BamReaderPrivate::Tell(void) const {
// BamReader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 7 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading BAM files
// ***************************************************************************
public:
// file operations
- void Close(void);
+ bool Close(void);
const std::string Filename(void) const;
bool IsOpen(void) const;
bool Open(const std::string& filename);
void SetIndex(BamIndex* index);
void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+ // error handling
+ std::string GetErrorString(void) const;
+ void SetErrorString(const std::string& where, const std::string& what);
+
// internal methods, but available as a BamReaderPrivate 'interface'
//
// these methods should only be used by BamTools::Internal classes
// (currently only used by the BamIndex subclasses)
public:
// retrieves header text from BAM file
- bool LoadHeaderData(void);
+ void LoadHeaderData(void);
// retrieves BAM alignment under file pointer
// (does no overlap checking or character data parsing)
bool LoadNextAlignment(BamAlignment& alignment);
BamHeader m_header;
BamRandomAccessController m_randomAccessController;
BgzfStream m_stream;
+
+ // error handling
+ std::string m_errorString;
};
} // namespace Internal
// BamStandardIndex.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 24 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the standardized BAM index format (".bai")
// ***************************************************************************
#include <api/BamAlignment.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamReader_p.h>
#include <api/internal/BamStandardIndex_p.h>
using namespace BamTools;
#include <cstdlib>
#include <cstring>
#include <algorithm>
-#include <iostream>
+#include <sstream>
using namespace std;
+// -----------------------------------
// static BamStandardIndex constants
+// -----------------------------------
+
const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
const string BamStandardIndex::BAI_EXTENSION = ".bai";
const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamStandardIndex::RaiiWrapper::RaiiWrapper(void)
+ : IndexStream(0)
+ , Buffer(0)
+{ }
+
+BamStandardIndex::RaiiWrapper::~RaiiWrapper(void) {
+
+ if ( IndexStream ) {
+ fclose(IndexStream);
+ IndexStream = 0;
+ }
+
+ if ( Buffer ) {
+ delete[] Buffer;
+ Buffer = 0;
+ }
+}
+
+// ---------------------------------
+// BamStandardIndex implementation
+// ---------------------------------
+
// ctor
BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
: BamIndex(reader)
- , m_indexStream(0)
, m_cacheMode(BamIndex::LimitedIndexCaching)
- , m_buffer(0)
, m_bufferLength(0)
{
m_isBigEndian = BamTools::SystemIsBigEndian();
CloseFile();
}
-bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
+void BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
// retrieve references from reader
const RefVector& references = m_reader->GetReferenceData();
// make sure left-bound position is valid
if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
- return false;
+ throw BamException("BamStandardIndex::AdjustRegion", "invalid region requested");
// set region 'begin'
begin = (unsigned int)region.LeftPosition;
// otherwise, set region 'end' to last reference base
else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;
-
- // return success
- return true;
}
void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
}
-bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+void BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
const uint64_t& minOffset,
set<uint16_t>& candidateBins,
vector<int64_t>& offsets)
{
- // attempt seek to first bin
- if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )
- return false;
+ // seek to first bin
+ Seek(refSummary.FirstBinFilePosition, SEEK_SET);
// iterate over reference bins
uint32_t binId;
for ( int i = 0; i < refSummary.NumBins; ++i ) {
// read bin contents (if successful, alignment chunks are now in m_buffer)
- if ( !ReadBinIntoBuffer(binId, numAlignmentChunks) )
- return false;
+ ReadBinIntoBuffer(binId, numAlignmentChunks);
// see if bin is a 'candidate bin'
candidateBinIter = candidateBins.find(binId);
// otherwise, check bin's contents against for overlap
else {
- unsigned int offset = 0;
+ size_t offset = 0;
uint64_t chunkStart;
uint64_t chunkStop;
// iterate over alignment chunks
- for (int j = 0; j < numAlignmentChunks; ++j ) {
+ for ( int j = 0; j < numAlignmentChunks; ++j ) {
// read chunk start & stop from buffer
- memcpy((char*)&chunkStart, m_buffer+offset, sizeof(uint64_t));
+ memcpy((char*)&chunkStart, Resources.Buffer+offset, sizeof(uint64_t));
offset += sizeof(uint64_t);
- memcpy((char*)&chunkStop, m_buffer+offset, sizeof(uint64_t));
+ memcpy((char*)&chunkStop, Resources.Buffer+offset, sizeof(uint64_t));
offset += sizeof(uint64_t);
// swap endian-ness if necessary
break;
}
}
-
- // return success
- return true;
}
uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
delete[] buffer;
buffer = new char[bufferLength];
}
- } catch ( std::bad_alloc ) {
- cerr << "BamStandardIndex ERROR: out of memory when allocating "
- << requestedBytes << " byes" << endl;
- exit(1);
+ } catch ( std::bad_alloc& ) {
+ stringstream s("");
+ s << "out of memory when allocating " << requestedBytes << " bytes";
+ throw BamException("BamStandardIndex::CheckBufferSize", s.str());
}
}
delete[] buffer;
buffer = new unsigned char[bufferLength];
}
- } catch ( std::bad_alloc ) {
- cerr << "BamStandardIndex ERROR: out of memory when allocating "
- << requestedBytes << " byes" << endl;
- exit(1);
+ } catch ( std::bad_alloc& ) {
+ stringstream s("");
+ s << "out of memory when allocating " << requestedBytes << " bytes";
+ throw BamException("BamStandardIndex::CheckBufferSize", s.str());
}
}
-bool BamStandardIndex::CheckMagicNumber(void) {
+void BamStandardIndex::CheckMagicNumber(void) {
// check 'magic number' to see if file is BAI index
char magic[4];
- size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
- if ( elementsRead != 4 ) {
- cerr << "BamStandardIndex ERROR: could not read format 'magic number'" << endl;
- return false;
- }
+ const size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+ if ( elementsRead != 4 )
+ throw BamException("BamStandardIndex::CheckMagicNumber", "could not read BAI magic number");
// compare to expected value
- if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 ) {
- cerr << "BamStandardIndex ERROR: invalid format" << endl;
- return false;
- }
-
- // otherwise OK
- return true;
+ if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 )
+ throw BamException("BamStandardIndex::CheckMagicNumber", "invalid BAI magic number");
}
void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
void BamStandardIndex::CloseFile(void) {
// close file stream
- if ( IsFileOpen() )
- fclose(m_indexStream);
+ if ( IsFileOpen() ) {
+ fclose(Resources.IndexStream);
+ Resources.IndexStream = 0;
+ }
// clear index file summary data
m_indexFileSummary.clear();
// clean up I/O buffer
- delete[] m_buffer;
- m_buffer = 0;
+ delete[] Resources.Buffer;
+ Resources.Buffer = 0;
m_bufferLength = 0;
}
// builds index from associated BAM file & writes out to index file
bool BamStandardIndex::Create(void) {
- // return false if BamReader is invalid or not open
+ // skip if BamReader is invalid or not open
if ( m_reader == 0 || !m_reader->IsOpen() ) {
- cerr << "BamStandardIndex ERROR: BamReader is not open"
- << ", aborting index creation" << endl;
+ SetErrorString("BamStandardIndex::Create", "could not create index: reader is not open");
return false;
}
// rewind BamReader
if ( !m_reader->Rewind() ) {
- cerr << "BamStandardIndex ERROR: could not rewind BamReader to create index"
- << ", aborting index creation" << endl;
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamStandardIndex::Create", message);
return false;
}
- // open new index file (read & write)
- string indexFilename = m_reader->Filename() + Extension();
- if ( !OpenFile(indexFilename, "w+b") ) {
- cerr << "BamStandardIndex ERROR: could not open output index file: " << indexFilename
- << ", aborting index creation" << endl;
- return false;
- }
+ try {
- // initialize BaiFileSummary with number of references
- const int& numReferences = m_reader->GetReferenceCount();
- ReserveForSummary(numReferences);
+ // open new index file (read & write)
+ string indexFilename = m_reader->Filename() + Extension();
+ OpenFile(indexFilename, "w+b");
+
+ // initialize BaiFileSummary with number of references
+ const int& numReferences = m_reader->GetReferenceCount();
+ ReserveForSummary(numReferences);
+
+ // initialize output file
+ WriteHeader();
+
+ // set up bin, ID, offset, & coordinate markers
+ const uint32_t defaultValue = 0xffffffffu;
+ uint32_t currentBin = defaultValue;
+ uint32_t lastBin = defaultValue;
+ int32_t currentRefID = defaultValue;
+ int32_t lastRefID = defaultValue;
+ uint64_t currentOffset = (uint64_t)m_reader->Tell();
+ uint64_t lastOffset = currentOffset;
+ int32_t lastPosition = defaultValue;
+
+ // iterate through alignments in BAM file
+ BamAlignment al;
+ BaiReferenceEntry refEntry;
+ while ( m_reader->LoadNextAlignment(al) ) {
+
+ // changed to new reference
+ if ( lastRefID != al.RefID ) {
+
+ // if not first reference, save previous reference data
+ if ( lastRefID != (int32_t)defaultValue ) {
+
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
+
+ // write any empty references between (but *NOT* including) lastRefID & al.RefID
+ for ( int i = lastRefID+1; i < al.RefID; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
+
+ // update bin markers
+ currentOffset = lastOffset;
+ currentBin = al.Bin;
+ lastBin = al.Bin;
+ currentRefID = al.RefID;
+ }
- // initialize output file
- bool createdOk = true;
- createdOk &= WriteHeader();
-
- // set up bin, ID, offset, & coordinate markers
- const uint32_t defaultValue = 0xffffffffu;
- uint32_t currentBin = defaultValue;
- uint32_t lastBin = defaultValue;
- int32_t currentRefID = defaultValue;
- int32_t lastRefID = defaultValue;
- uint64_t currentOffset = (uint64_t)m_reader->Tell();
- uint64_t lastOffset = currentOffset;
- int32_t lastPosition = defaultValue;
-
- // iterate through alignments in BAM file
- BamAlignment al;
- BaiReferenceEntry refEntry;
- while ( m_reader->LoadNextAlignment(al) ) {
+ // otherwise, this is first pass
+ // be sure to write any empty references up to (but *NOT* including) current RefID
+ else {
+ for ( int i = 0; i < al.RefID; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
+ }
- // changed to new reference
- if ( lastRefID != al.RefID ) {
+ // update reference markers
+ refEntry.ID = al.RefID;
+ lastRefID = al.RefID;
+ lastBin = defaultValue;
+ }
- // if not first reference, save previous reference data
- if ( lastRefID != (int32_t)defaultValue ) {
+ // if lastPosition greater than current alignment position - file not sorted properly
+ else if ( lastPosition > al.Position ) {
+ stringstream s("");
+ s << "BAM file is not properly sorted by coordinate" << endl
+ << "Current alignment position: " << al.Position
+ << " < previous alignment position: " << lastPosition
+ << " on reference ID: " << al.RefID << endl;
+ SetErrorString("BamStandardIndex::Create", s.str());
+ return false;
+ }
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
- createdOk &= WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
+ // if alignment's ref ID is valid & its bin is not a 'leaf'
+ if ( (al.RefID >= 0) && (al.Bin < 4681) )
+ SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
- // write any empty references between (but *NOT* including) lastRefID & al.RefID
- for ( int i = lastRefID+1; i < al.RefID; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
- }
+ // changed to new BAI bin
+ if ( al.Bin != lastBin ) {
- // update bin markers
+ // if not first bin on reference, save previous bin data
+ if ( currentBin != defaultValue )
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+
+ // update markers
currentOffset = lastOffset;
currentBin = al.Bin;
lastBin = al.Bin;
currentRefID = al.RefID;
- }
- // first pass
- // write any empty references up to (but *NOT* including) al.RefID
- else {
- for ( int i = 0; i < al.RefID; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
- }
+ // if invalid RefID, break out
+ if ( currentRefID < 0 )
+ break;
}
- // update reference markers
- refEntry.ID = al.RefID;
- lastRefID = al.RefID;
- lastBin = defaultValue;
- }
+ // make sure that current file pointer is beyond lastOffset
+ if ( m_reader->Tell() <= (int64_t)lastOffset ) {
+ SetErrorString("BamStandardIndex::Create", "calculating offsets failed");
+ return false;
+ }
- // if lastPosition greater than current alignment position - file not sorted properly
- else if ( lastPosition > al.Position ) {
- cerr << "BamStandardIndex ERROR: BAM file is not properly sorted by coordinate"
- << ", aborting index creation"
- << endl
- << "At alignment: " << al.Name
- << " : previous position " << lastPosition
- << " > this alignment position " << al.Position
- << " on reference id: " << al.RefID << endl;
- return false;
+ // update lastOffset & lastPosition
+ lastOffset = m_reader->Tell();
+ lastPosition = al.Position;
}
- // if alignment's ref ID is valid & its bin is not a 'leaf'
- if ( (al.RefID >= 0) && (al.Bin < 4681) )
- SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
-
- // changed to new BAI bin
- if ( al.Bin != lastBin ) {
-
- // if not first bin on reference, save previous bin data
- if ( currentBin != defaultValue )
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+ // after finishing alignments, if any data was read, check:
+ if ( currentRefID >= 0 ) {
- // update markers
- currentOffset = lastOffset;
- currentBin = al.Bin;
- lastBin = al.Bin;
- currentRefID = al.RefID;
+ // store last alignment chunk to its bin, then write last reference entry with data
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
+ WriteReferenceEntry(refEntry);
- // if invalid RefID, break out
- if ( currentRefID < 0 )
- break;
- }
-
- // make sure that current file pointer is beyond lastOffset
- if ( m_reader->Tell() <= (int64_t)lastOffset ) {
- cerr << "BamStandardIndex ERROR: calculating offsets failed"
- << ", aborting index creation" << endl;
- return false;
+ // then write any empty references remaining at end of file
+ for ( int i = currentRefID+1; i < numReferences; ++i ) {
+ BaiReferenceEntry emptyEntry(i);
+ WriteReferenceEntry(emptyEntry);
+ }
}
- // update lastOffset & lastPosition
- lastOffset = m_reader->Tell();
- lastPosition = al.Position;
+ } catch ( BamException& e) {
+ m_errorString = e.what();
+ return false;
}
- // after finishing alignments, if any data was read, check:
- if ( currentRefID >= 0 ) {
-
- // store last alignment chunk to its bin, then write last reference entry with data
- SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
- createdOk &= WriteReferenceEntry(refEntry);
-
- // then write any empty references remaining at end of file
- for ( int i = currentRefID+1; i < numReferences; ++i ) {
- BaiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
- }
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamStandardIndex::Create", message);
+ return false;
}
- // rewind reader now that we're done building
- createdOk &= m_reader->Rewind();
-
- // return result
- return createdOk;
+ // return success
+ return true;
}
// returns format's file extension
return BamStandardIndex::BAI_EXTENSION;
}
-bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+void BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
// cannot calculate offsets if unknown/invalid reference ID requested
if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
- return false;
+ throw BamException("BamStandardIndex::GetOffset", "invalid reference ID requested");
// retrieve index summary for left bound reference
const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
// set up region boundaries based on actual BamReader data
uint32_t begin;
uint32_t end;
- if ( !AdjustRegion(region, begin, end) ) {
- cerr << "BamStandardIndex ERROR: cannot calculate offsets on invalid region" << endl;
- return false;
- }
+ AdjustRegion(region, begin, end);
// retrieve all candidate bin IDs for region
set<uint16_t> candidateBins;
const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
// attempt to use reference summary, minOffset, & candidateBins to calculate offsets
- // no data should not be error
+ // no data should not be error, just bail
vector<int64_t> offsets;
- if ( !CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets) ) {
- cerr << "BamStandardIndex ERROR: could not calculate candidate offsets for requested region" << endl;
- return false;
- }
-
- // if not candidate offsets are present in the indexed (most likely sparce coverage)
- // then silently bail
- if( offsets.size() == 0 ) {
- return false;
- }
+ CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets);
+ if ( offsets.empty() )
+ return;
// ensure that offsets are sorted before processing
sort( offsets.begin(), offsets.end() );
// attempt seek to candidate offset
const int64_t& candidateOffset = (*offsetIter);
if ( !m_reader->Seek(candidateOffset) ) {
- cerr << "BamStandardIndex ERROR: could not jump"
- << ", there was a problem seeking in BAM file" << endl;
- return false;
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not seek in BAM file: \n\t" + readerError;
+ throw BamException("BamToolsIndex::GetOffset", message);
}
// load first available alignment, setting flag to true if data exists
} else count = step;
}
- // seek back to the offset before the 'current offset' (to cover overlaps)
+ // step back to the offset before the 'current offset' (to make sure we cover overlaps)
if ( offsetIter != offsets.begin() )
--offsetIter;
offset = (*offsetIter);
-
- // return succes
- return true;
}
// returns whether reference has alignments or no
}
bool BamStandardIndex::IsFileOpen(void) const {
- return ( m_indexStream != 0 );
+ return ( Resources.IndexStream != 0 );
}
// attempts to use index data to jump to @region, returns success/fail
// clear out flag
*hasAlignmentsInRegion = false;
- // skip if reader is not valid or is not open
- if ( m_reader == 0 || !m_reader->IsOpen() )
+ // skip if invalid reader or not open
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamStandardIndex::Jump", "could not jump: reader is not open");
return false;
+ }
// calculate nearest offset to jump to
int64_t offset;
- if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
- cerr << "BamStandardIndex ERROR: could not jump"
- << ", unable to calculate offset for specified region" << endl;
+ try {
+ GetOffset(region, offset, hasAlignmentsInRegion);
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
return false;
}
// loads existing data from file into memory
bool BamStandardIndex::Load(const std::string& filename) {
- // attempt open index file (read-only)
- if ( !OpenFile(filename, "rb") ) {
- cerr << "BamStandardIndex ERROR: could not open input index file: " << filename
- << ", aborting index load" << endl;
- return false;
- }
+ try {
- // if invalid format 'magic number', close & return failure
- if ( !CheckMagicNumber() ) {
- cerr << "BamStandardIndex ERROR: unexpected format for index file: " << filename
- << ", aborting index load" << endl;
- CloseFile();
- return false;
- }
+ // attempt to open file (read-only)
+ OpenFile(filename, "rb");
+
+ // validate format
+ CheckMagicNumber();
+
+ // load in-memory summary of index data
+ SummarizeIndexFile();
- // attempt to load index file summary, return success/failure
- if ( !SummarizeIndexFile() ) {
- cerr << "BamStandardIndex ERROR: could not generate a summary of index file " << filename
- << ", aborting index load" << endl;
- CloseFile();
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
return false;
}
-
- // if we get here, index summary is loaded OK
- return true;
}
uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
// attempt seek to proper index file position
const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
index*BamStandardIndex::SIZEOF_LINEAROFFSET;
- if ( !Seek(linearOffsetFilePosition, SEEK_SET) )
- return 0;
+ Seek(linearOffsetFilePosition, SEEK_SET);
// read linear offset from BAI file
- uint64_t linearOffset(0);
- if ( !ReadLinearOffset(linearOffset) )
- return 0;
+ uint64_t linearOffset;
+ ReadLinearOffset(linearOffset);
return linearOffset;
}
chunks = mergedChunks;
}
-bool BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
+void BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
// make sure any previous index file is closed
CloseFile();
// attempt to open file
- m_indexStream = fopen(filename.c_str(), mode);
- return IsFileOpen();
+ Resources.IndexStream = fopen(filename.c_str(), mode);
+ if ( !IsFileOpen() ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamStandardIndex::OpenFile", message);
+ }
}
-bool BamStandardIndex::ReadBinID(uint32_t& binId) {
- size_t elementsRead = 0;
- elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
+void BamStandardIndex::ReadBinID(uint32_t& binId) {
+ const size_t elementsRead = fread(&binId, sizeof(binId), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(binId);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadBinID", "could not read BAI bin ID");
}
-bool BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
-
- bool readOk = true;
+void BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
// read bin header
- readOk &= ReadBinID(binId);
- readOk &= ReadNumAlignmentChunks(numAlignmentChunks);
+ ReadBinID(binId);
+ ReadNumAlignmentChunks(numAlignmentChunks);
// read bin contents
const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
- readOk &= ReadIntoBuffer(bytesRequested);
-
- // return success/failure
- return readOk;
+ ReadIntoBuffer(bytesRequested);
}
-bool BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
+void BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
// ensure that our buffer is big enough for request
- BamStandardIndex::CheckBufferSize(m_buffer, m_bufferLength, bytesRequested);
+ BamStandardIndex::CheckBufferSize(Resources.Buffer, m_bufferLength, bytesRequested);
// read from BAI file stream
- size_t bytesRead = fread( m_buffer, sizeof(char), bytesRequested, m_indexStream );
- return ( bytesRead == (size_t)bytesRequested );
+ const size_t bytesRead = fread( Resources.Buffer, sizeof(char), bytesRequested, Resources.IndexStream );
+ if ( bytesRead != (size_t)bytesRequested ) {
+ stringstream s("");
+ s << "expected to read: " << bytesRequested << " bytes, "
+ << "but instead read: " << bytesRead;
+ throw BamException("BamStandardIndex::ReadIntoBuffer", s.str());
+ }
}
-bool BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
- size_t elementsRead = 0;
- elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+void BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
+ const size_t elementsRead = fread(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadLinearOffset", "could not read BAI linear offset");
}
-bool BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
- size_t elementsRead = 0;
- elementsRead += fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, m_indexStream);
+void BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
+ const size_t elementsRead = fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI chunk count");
}
-bool BamStandardIndex::ReadNumBins(int& numBins) {
- size_t elementsRead = 0;
- elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
+void BamStandardIndex::ReadNumBins(int& numBins) {
+ const size_t elementsRead = fread(&numBins, sizeof(numBins), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numBins);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadNumBins", "could not read BAI bin count");
}
-bool BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
- size_t elementsRead = 0;
- elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
+void BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
+ const size_t elementsRead = fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadNumAlignmentChunks", "could not read BAI linear offset count");
}
-bool BamStandardIndex::ReadNumReferences(int& numReferences) {
- size_t elementsRead = 0;
- elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+void BamStandardIndex::ReadNumReferences(int& numReferences) {
+ const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numReferences);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamStandardIndex::ReadNumReferences", "could not read reference count");
}
void BamStandardIndex::ReserveForSummary(const int& numReferences) {
}
void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
- const uint32_t& currentBin,
- const uint64_t& currentOffset,
- const uint64_t& lastOffset)
+ const uint32_t& currentBin,
+ const uint64_t& currentOffset,
+ const uint64_t& lastOffset)
{
// create new alignment chunk
BaiAlignmentChunk newChunk(currentOffset, lastOffset);
-
-
// if no entry exists yet for this bin, create one and store alignment chunk
BaiBinMap::iterator binIter = binMap.find(currentBin);
if ( binIter == binMap.end() ) {
}
// seek to position in index file stream
-bool BamStandardIndex::Seek(const int64_t& position, const int& origin) {
- return ( fseek64(m_indexStream, position, origin) == 0 );
+void BamStandardIndex::Seek(const int64_t& position, const int& origin) {
+ if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+ throw BamException("BamStandardIndex::Seek", "could not seek in BAI file");
}
// change the index caching behavior
// do nothing else here ? cache mode will be ignored from now on, most likely
}
-bool BamStandardIndex::SkipBins(const int& numBins) {
+void BamStandardIndex::SkipBins(const int& numBins) {
uint32_t binId;
int32_t numAlignmentChunks;
- bool skippedOk = true;
for (int i = 0; i < numBins; ++i)
- skippedOk &= ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
- return skippedOk;
+ ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
}
-bool BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
+void BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
- return ReadIntoBuffer(bytesRequested);
+ ReadIntoBuffer(bytesRequested);
}
void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
sort( linearOffsets.begin(), linearOffsets.end() );
}
-bool BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
+void BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
// load number of bins
int numBins;
- if ( !ReadNumBins(numBins) )
- return false;
+ ReadNumBins(numBins);
// store bins summary for this reference
refSummary.NumBins = numBins;
refSummary.FirstBinFilePosition = Tell();
- // attempt skip reference bins, return success/failure
- if ( !SkipBins(numBins) )
- return false;
-
- // if we get here, bin summarized OK
- return true;
+ // skip this reference's bins
+ SkipBins(numBins);
}
-bool BamStandardIndex::SummarizeIndexFile(void) {
+void BamStandardIndex::SummarizeIndexFile(void) {
// load number of reference sequences
int numReferences;
- if ( !ReadNumReferences(numReferences) )
- return false;
+ ReadNumReferences(numReferences);
// initialize file summary data
ReserveForSummary(numReferences);
// iterate over reference entries
- bool loadedOk = true;
BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
BaiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
- loadedOk &= SummarizeReference(*summaryIter);
-
- // return result
- return loadedOk;
+ SummarizeReference(*summaryIter);
}
-bool BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
+void BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
// load number of linear offsets
int numLinearOffsets;
- if ( !ReadNumLinearOffsets(numLinearOffsets) )
- return false;
+ ReadNumLinearOffsets(numLinearOffsets);
// store bin summary data for this reference
refSummary.NumLinearOffsets = numLinearOffsets;
refSummary.FirstLinearOffsetFilePosition = Tell();
// skip linear offsets in index file
- if ( !SkipLinearOffsets(numLinearOffsets) )
- return false;
-
- // if get here, linear offsets summarized OK
- return true;
+ SkipLinearOffsets(numLinearOffsets);
}
-bool BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
-
- bool loadedOk = true;
- loadedOk &= SummarizeBins(refSummary);
- loadedOk &= SummarizeLinearOffsets(refSummary);
- return loadedOk;
+void BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
+ SummarizeBins(refSummary);
+ SummarizeLinearOffsets(refSummary);
}
// return position of file pointer in index file stream
int64_t BamStandardIndex::Tell(void) const {
- return ftell64(m_indexStream);
+ return ftell64(Resources.IndexStream);
}
-bool BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
-
- size_t elementsWritten = 0;
+void BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
// localize alignment chunk offsets
uint64_t start = chunk.Start;
}
// write to index file
- elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
- elementsWritten += fwrite(&stop, sizeof(stop), 1, m_indexStream);
-
- // return success/failure of write
- return ( elementsWritten == 2 );
+ size_t elementsWritten = 0;
+ elementsWritten += fwrite(&start, sizeof(start), 1, Resources.IndexStream);
+ elementsWritten += fwrite(&stop, sizeof(stop), 1, Resources.IndexStream);
+ if ( elementsWritten != 2 )
+ throw BamException("BamStandardIndex::WriteAlignmentChunk", "could not write BAI alignment chunk");
}
-bool BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
+void BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
// make sure chunks are merged (simplified) before writing & saving summary
MergeAlignmentChunks(chunks);
- size_t elementsWritten = 0;
-
// write chunks
int32_t chunkCount = chunks.size();
if ( m_isBigEndian ) SwapEndian_32(chunkCount);
- elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
+ const size_t elementsWritten = fwrite(&chunkCount, sizeof(chunkCount), 1, Resources.IndexStream);
+ if ( elementsWritten != 1 )
+ throw BamException("BamStandardIndex::WriteAlignmentChunks", "could not write BAI chunk count");
// iterate over chunks
- bool chunksOk = true;
BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();
for ( ; chunkIter != chunkEnd; ++chunkIter )
- chunksOk &= WriteAlignmentChunk( (*chunkIter) );
-
- // return success/failure of write
- return ( (elementsWritten == 1) && chunksOk );
+ WriteAlignmentChunk( (*chunkIter) );
}
-bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
-
- size_t elementsWritten = 0;
+void BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
// write BAM bin ID
uint32_t binKey = binId;
if ( m_isBigEndian ) SwapEndian_32(binKey);
- elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
+ const size_t elementsWritten = fwrite(&binKey, sizeof(binKey), 1, Resources.IndexStream);
+ if ( elementsWritten != 1 )
+ throw BamException("BamStandardIndex::WriteBin", "could not write bin ID");
// write bin's alignment chunks
- bool chunksOk = WriteAlignmentChunks(chunks);
-
- // return success/failure of write
- return ( (elementsWritten == 1) && chunksOk );
+ WriteAlignmentChunks(chunks);
}
-bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
-
- size_t elementsWritten = 0;
+void BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
// write number of bins
int32_t binCount = bins.size();
if ( m_isBigEndian ) SwapEndian_32(binCount);
- elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
+ const size_t elementsWritten = fwrite(&binCount, sizeof(binCount), 1, Resources.IndexStream);
+ if ( elementsWritten != 1 )
+ throw BamException("BamStandardIndex::WriteBins", "could not write bin count");
// save summary for reference's bins
SaveBinsSummary(refId, bins.size());
// iterate over bins
- bool binsOk = true;
BaiBinMap::iterator binIter = bins.begin();
BaiBinMap::iterator binEnd = bins.end();
for ( ; binIter != binEnd; ++binIter )
- binsOk &= WriteBin( (*binIter).first, (*binIter).second );
-
- // return success/failure of write
- return ( (elementsWritten == 1) && binsOk );
+ WriteBin( (*binIter).first, (*binIter).second );
}
-bool BamStandardIndex::WriteHeader(void) {
+void BamStandardIndex::WriteHeader(void) {
size_t elementsWritten = 0;
// write magic number
- elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);
+ elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, Resources.IndexStream);
// write number of reference sequences
int32_t numReferences = m_indexFileSummary.size();
if ( m_isBigEndian ) SwapEndian_32(numReferences);
- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
+ elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
- // return success/failure of write
- return (elementsWritten == 5);
+ if ( elementsWritten != 5 )
+ throw BamException("BamStandardIndex::WriteHeader", "could not write BAI header");
}
-bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
+void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
// make sure linear offsets are sorted before writing & saving summary
SortLinearOffsets(linearOffsets);
// write number of linear offsets
int32_t offsetCount = linearOffsets.size();
if ( m_isBigEndian ) SwapEndian_32(offsetCount);
- elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
+ elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, Resources.IndexStream);
// save summary for reference's linear offsets
SaveLinearOffsetsSummary(refId, linearOffsets.size());
// write linear offset
uint64_t linearOffset = (*offsetIter);
if ( m_isBigEndian ) SwapEndian_64(linearOffset);
- elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+ elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, Resources.IndexStream);
}
- // return success/failure of write
- return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );
+ if ( elementsWritten != (linearOffsets.size() + 1) )
+ throw BamException("BamStandardIndex::WriteLinearOffsets", "could not write BAI linear offsets");
}
-bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
- bool refOk = true;
- refOk &= WriteBins(refEntry.ID, refEntry.Bins);
- refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
- return refOk;
+void BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
+ WriteBins(refEntry.ID, refEntry.Bins);
+ WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
}
// BamStandardIndex.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 24 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the standardized BAM index format (".bai")
// ***************************************************************************
// returns format's file extension
static const std::string Extension(void);
- // internal file ops
+ // internal methods
private:
- bool CheckMagicNumber(void);
+
+ // index file ops
+ void CheckMagicNumber(void);
void CloseFile(void);
bool IsFileOpen(void) const;
- bool OpenFile(const std::string& filename, const char* mode);
- bool Seek(const int64_t& position, const int& origin);
+ void OpenFile(const std::string& filename, const char* mode);
+ void Seek(const int64_t& position, const int& origin);
int64_t Tell(void) const;
- // internal BAI index building methods
- private:
+ // BAI index building methods
void ClearReferenceEntry(BaiReferenceEntry& refEntry);
void SaveAlignmentChunkToBin(BaiBinMap& binMap,
const uint32_t& currentBin,
const int& alignmentStopPosition,
const uint64_t& lastOffset);
- // internal random-access methods
- private:
- bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+ // random-access methods
+ void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
void CalculateCandidateBins(const uint32_t& begin,
const uint32_t& end,
std::set<uint16_t>& candidateBins);
- bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+ void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
const uint64_t& minOffset,
std::set<uint16_t>& candidateBins,
std::vector<int64_t>& offsets);
uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
- bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
- // internal BAI summary (create/load) methods
- private:
+ // BAI summary (create/load) methods
void ReserveForSummary(const int& numReferences);
void SaveBinsSummary(const int& refId, const int& numBins);
void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
- bool SkipBins(const int& numBins);
- bool SkipLinearOffsets(const int& numLinearOffsets);
- bool SummarizeBins(BaiReferenceSummary& refSummary);
- bool SummarizeIndexFile(void);
- bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
- bool SummarizeReference(BaiReferenceSummary& refSummary);
-
- // internal BAI full index input methods
- private:
- bool ReadBinID(uint32_t& binId);
- bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
- bool ReadIntoBuffer(const unsigned int& bytesRequested);
- bool ReadLinearOffset(uint64_t& linearOffset);
- bool ReadNumAlignmentChunks(int& numAlignmentChunks);
- bool ReadNumBins(int& numBins);
- bool ReadNumLinearOffsets(int& numLinearOffsets);
- bool ReadNumReferences(int& numReferences);
-
- // internal BAI full index output methods
- private:
+ void SkipBins(const int& numBins);
+ void SkipLinearOffsets(const int& numLinearOffsets);
+ void SummarizeBins(BaiReferenceSummary& refSummary);
+ void SummarizeIndexFile(void);
+ void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+ void SummarizeReference(BaiReferenceSummary& refSummary);
+
+ // BAI full index input methods
+ void ReadBinID(uint32_t& binId);
+ void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+ void ReadIntoBuffer(const unsigned int& bytesRequested);
+ void ReadLinearOffset(uint64_t& linearOffset);
+ void ReadNumAlignmentChunks(int& numAlignmentChunks);
+ void ReadNumBins(int& numBins);
+ void ReadNumLinearOffsets(int& numLinearOffsets);
+ void ReadNumReferences(int& numReferences);
+
+ // BAI full index output methods
void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
- bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
- bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
- bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
- bool WriteBins(const int& refId, BaiBinMap& bins);
- bool WriteHeader(void);
- bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
- bool WriteReferenceEntry(BaiReferenceEntry& refEntry);
+ void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+ void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+ void WriteBins(const int& refId, BaiBinMap& bins);
+ void WriteHeader(void);
+ void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+ void WriteReferenceEntry(BaiReferenceEntry& refEntry);
// data members
private:
- FILE* m_indexStream;
- bool m_isBigEndian;
+ bool m_isBigEndian;
BamIndex::IndexCacheMode m_cacheMode;
BaiFileSummary m_indexFileSummary;
// our input buffer
- char* m_buffer;
unsigned int m_bufferLength;
+ struct RaiiWrapper {
+ FILE* IndexStream;
+ char* Buffer;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper Resources;
+
// static methods
private:
// checks if the buffer is large enough to accomodate the requested size
// BamToolsIndex.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 27 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the BamTools index format (".bti")
// ***************************************************************************
#include <api/BamAlignment.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamReader_p.h>
#include <api/internal/BamToolsIndex_p.h>
#include <api/internal/BgzfStream_p.h>
#include <map>
using namespace std;
+// --------------------------------
// static BamToolsIndex constants
-const int BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
+// --------------------------------
+
+const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
const string BamToolsIndex::BTI_EXTENSION = ".bti";
const char* const BamToolsIndex::BTI_MAGIC = "BTI\1";
const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BamToolsIndex::RaiiWrapper::RaiiWrapper(void)
+ : IndexStream(0)
+{ }
+
+BamToolsIndex::RaiiWrapper::~RaiiWrapper(void) {
+ if ( IndexStream )
+ fclose(IndexStream);
+}
+
+// ------------------------------
+// BamToolsIndex implementation
+// ------------------------------
+
// ctor
BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)
: BamIndex(reader)
- , m_indexStream(0)
, m_cacheMode(BamIndex::LimitedIndexCaching)
, m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)
, m_inputVersion(0)
CloseFile();
}
-bool BamToolsIndex::CheckMagicNumber(void) {
+void BamToolsIndex::CheckMagicNumber(void) {
- // check 'magic number' to see if file is BTI index
+ // read magic number
char magic[4];
- size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
- if ( elementsRead != 4 ) {
- cerr << "BamToolsIndex ERROR: could not read format 'magic' number" << endl;
- return false;
- }
-
- if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 ) {
- cerr << "BamToolsIndex ERROR: invalid format" << endl;
- return false;
- }
+ size_t elementsRead = fread(magic, sizeof(char), 4, Resources.IndexStream);
+ if ( elementsRead != 4 )
+ throw BamException("BamToolsIndex::CheckMagicNumber", "could not read BTI magic number");
- // otherwise ok
- return true;
+ // validate expected magic number
+ if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 )
+ throw BamException("BamToolsIndex::CheckMagicNumber", "invalid BTI magic number");
}
// check index file version, return true if OK
-bool BamToolsIndex::CheckVersion(void) {
+void BamToolsIndex::CheckVersion(void) {
// read version from file
- size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);
- if ( elementsRead != 1 ) return false;
+ size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, Resources.IndexStream);
+ if ( elementsRead != 1 )
+ throw BamException("BamToolsIndex::CheckVersion", "could not read format version");
if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
// if version is negative, or zero
- if ( m_inputVersion <= 0 ) {
- cerr << "BamToolsIndex ERROR: could not load index file: invalid version."
- << endl;
- return false;
- }
+ if ( m_inputVersion <= 0 )
+ throw BamException("BamToolsIndex::CheckVersion", "invalid format version");
// if version is newer than can be supported by this version of bamtools
else if ( m_inputVersion > m_outputVersion ) {
- cerr << "BamToolsIndex ERROR: could not load index file. This version of BamTools does not recognize new index file version"
- << endl
- << "Please update BamTools to a more recent version to support this index file."
- << endl;
- return false;
+ const string message = "unsupported format: this index was created by a newer version of BamTools. "
+ "Update your local version of BamTools to use the index file.";
+ throw BamException("BamToolsIndex::CheckVersion", message);
}
// ------------------------------------------------------------------
// check for deprecated, unsupported versions
- // (typically whose format did not accomodate a particular bug fix)
+ // (the format had to be modified to accomodate a particular bug fix)
else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) {
- cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to accessing data near reference ends."
- << endl << endl
- << "Please run 'bamtools index -bti -in yourData.bam' to generate an up-to-date, fixed BTI file."
- << endl << endl;
- return false;
+ const string message = "unsupported format: this version of the index may not properly handle "
+ "reference ends. Please run 'bamtools index -bti -in yourData.bam' to "
+ "generate an up-to-date, fixed BTI file.";
+ throw BamException("BamToolsIndex::CheckVersion", message);
}
else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) {
- cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to handling empty references."
- << endl << endl
- << "Please run 'bamtools index -bti -in yourData.bam' to generate an up-to-date, fixed BTI file."
- << endl << endl;
- return false;
+ const string message = "unsupported format: this version of the index may not properly handle "
+ "empty references. Please run 'bamtools index -bti -in yourData.bam to "
+ "generate an up-to-date, fixed BTI file.";
+ throw BamException("BamToolsIndex::CheckVersion", message);
}
-
- // otherwise ok
- else return true;
}
void BamToolsIndex::ClearReferenceEntry(BtiReferenceEntry& refEntry) {
}
void BamToolsIndex::CloseFile(void) {
- if ( IsFileOpen() )
- fclose(m_indexStream);
+ if ( IsFileOpen() ) {
+ fclose(Resources.IndexStream);
+ Resources.IndexStream = 0;
+ }
m_indexFileSummary.clear();
}
// builds index from associated BAM file & writes out to index file
bool BamToolsIndex::Create(void) {
- // return false if BamReader is invalid or not open
+ // skip if BamReader is invalid or not open
if ( m_reader == 0 || !m_reader->IsOpen() ) {
- cerr << "BamToolsIndex ERROR: BamReader is not open"
- << ", aborting index creation" << endl;
+ SetErrorString("BamToolsIndex::Create", "could not create index: reader is not open");
return false;
}
// rewind BamReader
if ( !m_reader->Rewind() ) {
- cerr << "BamToolsIndex ERROR: could not rewind BamReader to create index"
- << ", aborting index creation" << endl;
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamToolsIndex::Create", message);
return false;
}
- // open new index file (read & write)
- string indexFilename = m_reader->Filename() + Extension();
- if ( !OpenFile(indexFilename, "w+b") ) {
- cerr << "BamToolsIndex ERROR: could not open output index file " << indexFilename
- << ", aborting index creation" << endl;
- return false;
- }
+ try {
+ // open new index file (read & write)
+ string indexFilename = m_reader->Filename() + Extension();
+ OpenFile(indexFilename, "w+b");
- // initialize BtiFileSummary with number of references
- const int& numReferences = m_reader->GetReferenceCount();
- InitializeFileSummary(numReferences);
+ // initialize BtiFileSummary with number of references
+ const int& numReferences = m_reader->GetReferenceCount();
+ InitializeFileSummary(numReferences);
- // initialize output file
- bool createdOk = true;
- createdOk &= WriteHeader();
+ // intialize output file header
+ WriteHeader();
- // index building markers
- int32_t currentBlockCount = 0;
- int64_t currentAlignmentOffset = m_reader->Tell();
- int32_t blockRefId = -1;
- int32_t blockMaxEndPosition = -1;
- int64_t blockStartOffset = currentAlignmentOffset;
- int32_t blockStartPosition = -1;
+ // index building markers
+ uint32_t currentBlockCount = 0;
+ int64_t currentAlignmentOffset = m_reader->Tell();
+ int32_t blockRefId = -1;
+ int32_t blockMaxEndPosition = -1;
+ int64_t blockStartOffset = currentAlignmentOffset;
+ int32_t blockStartPosition = -1;
- // plow through alignments, storing index entries
- BamAlignment al;
- BtiReferenceEntry refEntry;
- while ( m_reader->LoadNextAlignment(al) ) {
+ // plow through alignments, storing index entries
+ BamAlignment al;
+ BtiReferenceEntry refEntry;
+ while ( m_reader->LoadNextAlignment(al) ) {
- // if moved to new reference
- if ( al.RefID != blockRefId ) {
+ // if moved to new reference
+ if ( al.RefID != blockRefId ) {
- // if first pass, check:
- if ( currentBlockCount == 0 ) {
+ // if first pass, check:
+ if ( currentBlockCount == 0 ) {
- // write any empty references up to (but not including) al.RefID
- for ( int i = 0; i < al.RefID; ++i ) {
- BtiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
+ // write any empty references up to (but not including) al.RefID
+ for ( int i = 0; i < al.RefID; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
}
- }
- // not first pass:
- else {
+ // not first pass:
+ else {
- // store previous BTI block data in reference entry
- BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
+ // store previous BTI block data in reference entry
+ BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
- // write reference entry, then clear
- createdOk &= WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
+ // write reference entry, then clear
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
- // write any empty references between (but not including) the last blockRefID and current al.RefID
- for ( int i = blockRefId+1; i < al.RefID; ++i ) {
- BtiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
+ // write any empty references between (but not including)
+ // the last blockRefID and current al.RefID
+ for ( int i = blockRefId+1; i < al.RefID; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
+
+ // reset block count
+ currentBlockCount = 0;
}
- // reset block count
- currentBlockCount = 0;
+ // set ID for new reference entry
+ refEntry.ID = al.RefID;
}
- // set ID for new reference entry
- refEntry.ID = al.RefID;
- }
+ // if beginning of block, update counters
+ if ( currentBlockCount == 0 ) {
+ blockRefId = al.RefID;
+ blockStartOffset = currentAlignmentOffset;
+ blockStartPosition = al.Position;
+ blockMaxEndPosition = al.GetEndPosition();
+ }
- // if beginning of block, update counters
- if ( currentBlockCount == 0 ) {
- blockRefId = al.RefID;
- blockStartOffset = currentAlignmentOffset;
- blockStartPosition = al.Position;
- blockMaxEndPosition = al.GetEndPosition();
- }
+ // increment block counter
+ ++currentBlockCount;
- // increment block counter
- ++currentBlockCount;
+ // check end position
+ int32_t alignmentEndPosition = al.GetEndPosition();
+ if ( alignmentEndPosition > blockMaxEndPosition )
+ blockMaxEndPosition = alignmentEndPosition;
- // check end position
- int32_t alignmentEndPosition = al.GetEndPosition();
- if ( alignmentEndPosition > blockMaxEndPosition )
- blockMaxEndPosition = alignmentEndPosition;
+ // if block is full, get offset for next block, reset currentBlockCount
+ if ( currentBlockCount == m_blockSize ) {
- // if block is full, get offset for next block, reset currentBlockCount
- if ( currentBlockCount == m_blockSize ) {
+ // store previous block data in reference entry
+ BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
- // store previous block data in reference entry
- BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
+ // update markers
+ blockStartOffset = m_reader->Tell();
+ currentBlockCount = 0;
+ }
- // update markers
- blockStartOffset = m_reader->Tell();
- currentBlockCount = 0;
+ // not the best name, but for the next iteration, this value will be the offset of the
+ // *current* alignment. this is necessary because we won't know if this next alignment
+ // is on a new reference until we actually read it
+ currentAlignmentOffset = m_reader->Tell();
}
- // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
- // necessary because we won't know if this next alignment is on a new reference until we actually read it
- currentAlignmentOffset = m_reader->Tell();
- }
+ // after finishing alignments, if any data was read, check:
+ if ( blockRefId >= 0 ) {
- // after finishing alignments, if any data was read, check:
- if ( blockRefId >= 0 ) {
-
- // store last BTI block data in reference entry
- BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
- refEntry.Blocks.push_back(block);
+ // store last BTI block data in reference entry
+ BtiBlock block(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+ refEntry.Blocks.push_back(block);
- // write last reference entry, then clear
- createdOk &= WriteReferenceEntry(refEntry);
- ClearReferenceEntry(refEntry);
+ // write last reference entry, then clear
+ WriteReferenceEntry(refEntry);
+ ClearReferenceEntry(refEntry);
- // then write any empty references remaining at end of file
- for ( int i = blockRefId+1; i < numReferences; ++i ) {
- BtiReferenceEntry emptyEntry(i);
- createdOk &= WriteReferenceEntry(emptyEntry);
+ // then write any empty references remaining at end of file
+ for ( int i = blockRefId+1; i < numReferences; ++i )
+ WriteReferenceEntry( BtiReferenceEntry(i) );
}
+
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
}
- // rewind reader & return result
- createdOk &= m_reader->Rewind();
+ // rewind BamReader
+ if ( !m_reader->Rewind() ) {
+ const string readerError = m_reader->GetErrorString();
+ const string message = "could not create index: \n\t" + readerError;
+ SetErrorString("BamToolsIndex::Create", message);
+ return false;
+ }
- // return result
- return createdOk;
+ // return success
+ return true;
}
// returns format's file extension
return BamToolsIndex::BTI_EXTENSION;
}
-bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+void BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
// return false ref ID is not a valid index in file summary data
if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
- return false;
+ throw BamException("BamToolsIndex::GetOffset", "invalid region requested");
// retrieve reference index data for left bound reference
BtiReferenceEntry refEntry(region.LeftRefID);
- if ( !ReadReferenceEntry(refEntry) ) {
- cerr << "BamToolsIndex ERROR: could not retrieve index data from BTI file" << endl;
- return false;
- }
+ ReadReferenceEntry(refEntry);
// binary search for an overlapping block (may not be first one though)
bool found = false;
// sets to false if blocks container is empty, or if no matching block could be found
*hasAlignmentsInRegion = found;
-
- // return success
- return true;
}
// returns whether reference has alignments or no
return ( refSummary.NumBlocks > 0 );
}
+// pre-allocates space for each reference's summary data
void BamToolsIndex::InitializeFileSummary(const int& numReferences) {
m_indexFileSummary.clear();
for ( int i = 0; i < numReferences; ++i )
m_indexFileSummary.push_back( BtiReferenceSummary() );
}
+// returns true if the index stream is open
bool BamToolsIndex::IsFileOpen(void) const {
- return ( m_indexStream != 0 );
+ return ( Resources.IndexStream != 0 );
}
// attempts to use index data to jump to @region, returns success/fail
*hasAlignmentsInRegion = false;
// skip if invalid reader or not open
- if ( m_reader == 0 || !m_reader->IsOpen() )
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
+ SetErrorString("BamToolsIndex::Jump", "could not jump: reader is not open");
return false;
+ }
// make sure left-bound position is valid
const RefVector& references = m_reader->GetReferenceData();
- if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
+ if ( region.LeftPosition > references.at(region.LeftRefID).RefLength ) {
+ SetErrorString("BamToolsIndex::Jump", "could not create index: invalid region requested");
return false;
+ }
// calculate nearest offset to jump to
int64_t offset;
- if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
- cerr << "BamToolsIndex ERROR: could not jump"
- << ", unable to calculate offset for specified region" << endl;
+ try {
+ GetOffset(region, offset, hasAlignmentsInRegion);
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
return false;
}
// loads existing data from file into memory
bool BamToolsIndex::Load(const std::string& filename) {
- // attempt open index file (read-only)
- if ( !OpenFile(filename, "rb") ) {
- cerr << "BamToolsIndex ERROR: could not open input index file " << filename
- << ", aborting index load" << endl;
- return false;
- }
+ try {
- // attempt to load & validate BTI header data
- if ( !LoadHeader() ) {
- cerr << "BamToolsIndex ERROR: could load header from index file " << filename
- << ", aborting index load" << endl;
- CloseFile();
- return false;
- }
+ // attempt to open file (read-only)
+ OpenFile(filename, "rb");
+
+ // load metadata & generate in-memory summary
+ LoadHeader();
+ LoadFileSummary();
+
+ // return success
+ return true;
- // attempt to load index file summary
- if ( !LoadFileSummary() ) {
- cerr << "BamToolsIndex ERROR: could not generate a summary of index file " << filename
- << ", aborting index load" << endl;
- CloseFile();
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
return false;
}
-
- // if we get here, index summary is loaded OK
- return true;
}
-bool BamToolsIndex::LoadFileSummary(void) {
+void BamToolsIndex::LoadFileSummary(void) {
// load number of reference sequences
int numReferences;
- if ( !LoadNumReferences(numReferences) )
- return false;
+ LoadNumReferences(numReferences);
// initialize file summary data
InitializeFileSummary(numReferences);
- // iterate over reference entries
- bool loadedOk = true;
+ // load summary for each reference
BtiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
BtiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
for ( ; summaryIter != summaryEnd; ++summaryIter )
- loadedOk &= LoadReferenceSummary(*summaryIter);
-
- // return result
- return loadedOk;
+ LoadReferenceSummary(*summaryIter);
}
-bool BamToolsIndex::LoadHeader(void) {
+void BamToolsIndex::LoadHeader(void) {
- // if invalid format 'magic number'
- if ( !CheckMagicNumber() )
- return false;
-
- // if invalid BTI version
- if ( !CheckVersion() )
- return false;
+ // check BTI file metadata
+ CheckMagicNumber();
+ CheckVersion();
// use file's BTI block size to set member variable
- size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, m_indexStream);
+ const size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamToolsIndex::LoadHeader", "could not read BTI block size");
}
-bool BamToolsIndex::LoadNumBlocks(int& numBlocks) {
- size_t elementsRead = 0;
- elementsRead += fread(&numBlocks, sizeof(numBlocks), 1, m_indexStream);
+void BamToolsIndex::LoadNumBlocks(int& numBlocks) {
+ const size_t elementsRead = fread(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numBlocks);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamToolsIndex::LoadNumBlocks", "could not read number of BTI blocks");
}
-bool BamToolsIndex::LoadNumReferences(int& numReferences) {
- size_t elementsRead = 0;
- elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+void BamToolsIndex::LoadNumReferences(int& numReferences) {
+ const size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
if ( m_isBigEndian ) SwapEndian_32(numReferences);
- return ( elementsRead == 1 );
+ if ( elementsRead != 1 )
+ throw BamException("BamToolsIndex::LoadNumReferences", "could not read number of references");
}
-bool BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
+void BamToolsIndex::LoadReferenceSummary(BtiReferenceSummary& refSummary) {
// load number of blocks
int numBlocks;
- if ( !LoadNumBlocks(numBlocks) )
- return false;
+ LoadNumBlocks(numBlocks);
// store block summary data for this reference
refSummary.NumBlocks = numBlocks;
refSummary.FirstBlockFilePosition = Tell();
- // skip blocks in index file (and return status)
- return SkipBlocks(numBlocks);
+ // skip reference's blocks
+ SkipBlocks(numBlocks);
}
-bool BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
+void BamToolsIndex::OpenFile(const std::string& filename, const char* mode) {
// make sure any previous index file is closed
CloseFile();
// attempt to open file
- m_indexStream = fopen(filename.c_str(), mode);
- return IsFileOpen();
+ Resources.IndexStream = fopen(filename.c_str(), mode);
+ if ( !IsFileOpen() ) {
+ const string message = string("could not open file: ") + filename;
+ throw BamException("BamToolsIndex::OpenFile", message);
+ }
}
-bool BamToolsIndex::ReadBlock(BtiBlock& block) {
+void BamToolsIndex::ReadBlock(BtiBlock& block) {
// read in block data members
size_t elementsRead = 0;
- elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, m_indexStream);
- elementsRead += fread(&block.StartOffset, sizeof(block.StartOffset), 1, m_indexStream);
- elementsRead += fread(&block.StartPosition, sizeof(block.StartPosition), 1, m_indexStream);
+ elementsRead += fread(&block.MaxEndPosition, sizeof(block.MaxEndPosition), 1, Resources.IndexStream);
+ elementsRead += fread(&block.StartOffset, sizeof(block.StartOffset), 1, Resources.IndexStream);
+ elementsRead += fread(&block.StartPosition, sizeof(block.StartPosition), 1, Resources.IndexStream);
// swap endian-ness if necessary
if ( m_isBigEndian ) {
SwapEndian_32(block.StartPosition);
}
- // return success/failure
- return ( elementsRead == 3 );
+ if ( elementsRead != 3 )
+ throw BamException("BamToolsIndex::ReadBlock", "could not read block");
}
-bool BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
+void BamToolsIndex::ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks) {
// prep blocks container
blocks.clear();
blocks.reserve(refSummary.NumBlocks);
// skip to first block entry
- if ( !Seek( refSummary.FirstBlockFilePosition, SEEK_SET ) ) {
- cerr << "BamToolsIndex ERROR: could not seek to position "
- << refSummary.FirstBlockFilePosition << endl;
- return false;
- }
+ Seek( refSummary.FirstBlockFilePosition, SEEK_SET );
// read & store block entries
- bool readOk = true;
BtiBlock block;
for ( int i = 0; i < refSummary.NumBlocks; ++i ) {
- readOk &= ReadBlock(block);
+ ReadBlock(block);
blocks.push_back(block);
}
- return readOk;
}
-bool BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
+void BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {
// return false if refId not valid index in file summary structure
if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )
- return false;
+ throw BamException("BamToolsIndex::ReadReferenceEntry", "invalid reference requested");
// use index summary to assist reading the reference's BTI blocks
const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);
- return ReadBlocks(refSummary, refEntry.Blocks);
+ ReadBlocks(refSummary, refEntry.Blocks);
}
-bool BamToolsIndex::Seek(const int64_t& position, const int& origin) {
- return ( fseek64(m_indexStream, position, origin) == 0 );
+void BamToolsIndex::Seek(const int64_t& position, const int& origin) {
+ if ( fseek64(Resources.IndexStream, position, origin) != 0 )
+ throw BamException("BamToolsIndex::Seek", "could not seek in BAI file");
}
// change the index caching behavior
// do nothing else here ? cache mode will be ignored from now on, most likely
}
-bool BamToolsIndex::SkipBlocks(const int& numBlocks) {
- return Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
+void BamToolsIndex::SkipBlocks(const int& numBlocks) {
+ Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );
}
int64_t BamToolsIndex::Tell(void) const {
- return ftell64(m_indexStream);
+ return ftell64(Resources.IndexStream);
}
-bool BamToolsIndex::WriteBlock(const BtiBlock& block) {
+void BamToolsIndex::WriteBlock(const BtiBlock& block) {
// copy entry data
int32_t maxEndPosition = block.MaxEndPosition;
// write the reference index entry
size_t elementsWritten = 0;
- elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);
- elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, m_indexStream);
- elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, m_indexStream);
- return ( elementsWritten == 3 );
+ elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, Resources.IndexStream);
+ elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, Resources.IndexStream);
+ elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, Resources.IndexStream);
+ if ( elementsWritten != 3 )
+ throw BamException("BamToolsIndex::WriteBlock", "could not write BTI block");
}
-bool BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
- bool writtenOk = true;
+void BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {
BtiBlockVector::const_iterator blockIter = blocks.begin();
BtiBlockVector::const_iterator blockEnd = blocks.end();
for ( ; blockIter != blockEnd; ++blockIter )
- writtenOk &= WriteBlock(*blockIter);
- return writtenOk;
+ WriteBlock(*blockIter);
}
-bool BamToolsIndex::WriteHeader(void) {
+void BamToolsIndex::WriteHeader(void) {
size_t elementsWritten = 0;
// write BTI index format 'magic number'
- elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, m_indexStream);
+ elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, Resources.IndexStream);
// write BTI index format version
int32_t currentVersion = (int32_t)m_outputVersion;
if ( m_isBigEndian ) SwapEndian_32(currentVersion);
- elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, m_indexStream);
+ elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, Resources.IndexStream);
// write block size
- int32_t blockSize = m_blockSize;
+ uint32_t blockSize = m_blockSize;
if ( m_isBigEndian ) SwapEndian_32(blockSize);
- elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);
+ elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, Resources.IndexStream);
// write number of references
int32_t numReferences = m_indexFileSummary.size();
if ( m_isBigEndian ) SwapEndian_32(numReferences);
- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
+ elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, Resources.IndexStream);
- // return success/failure of write
- return ( elementsWritten == 7 );
+ if ( elementsWritten != 7 )
+ throw BamException("BamToolsIndex::WriteHeader", "could not write BTI header");
}
-bool BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
-
- size_t elementsWritten = 0;
+void BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {
// write number of blocks this reference
uint32_t numBlocks = refEntry.Blocks.size();
if ( m_isBigEndian ) SwapEndian_32(numBlocks);
- elementsWritten += fwrite(&numBlocks, sizeof(numBlocks), 1, m_indexStream);
+ const size_t elementsWritten = fwrite(&numBlocks, sizeof(numBlocks), 1, Resources.IndexStream);
+ if ( elementsWritten != 1 )
+ throw BamException("BamToolsIndex::WriteReferenceEntry", "could not write number of blocks");
// write actual block entries
- const bool blocksOk = WriteBlocks(refEntry.Blocks);
-
- // return success/fail
- return ( elementsWritten == 1) && blocksOk;
+ WriteBlocks(refEntry.Blocks);
}
// BamToolsIndex.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides index operations for the BamTools index format (".bti")
// ***************************************************************************
// returns format's file extension
static const std::string Extension(void);
- // internal file ops
+ // internal methods
private:
- bool CheckMagicNumber(void);
- bool CheckVersion(void);
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CheckVersion(void);
void CloseFile(void);
bool IsFileOpen(void) const;
- bool OpenFile(const std::string& filename, const char* mode);
- bool Seek(const int64_t& position, const int& origin);
+ void OpenFile(const std::string& filename, const char* mode);
+ void Seek(const int64_t& position, const int& origin);
int64_t Tell(void) const;
- // internal BTI index building methods
- private:
+ // index-creation methods
void ClearReferenceEntry(BtiReferenceEntry& refEntry);
-
- // internal random-access methods
- private:
- bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
-
- // internal BTI summary data methods
- private:
+ void WriteBlock(const BtiBlock& block);
+ void WriteBlocks(const BtiBlockVector& blocks);
+ void WriteHeader(void);
+ void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+ // random-access methods
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ void ReadBlock(BtiBlock& block);
+ void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+ void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+ // BTI summary data methods
void InitializeFileSummary(const int& numReferences);
- bool LoadFileSummary(void);
- bool LoadHeader(void);
- bool LoadNumBlocks(int& numBlocks);
- bool LoadNumReferences(int& numReferences);
- bool LoadReferenceSummary(BtiReferenceSummary& refSummary);
- bool SkipBlocks(const int& numBlocks);
-
- // internal BTI full index input methods
- private:
- bool ReadBlock(BtiBlock& block);
- bool ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
- bool ReadReferenceEntry(BtiReferenceEntry& refEntry);
-
- // internal BTI full index output methods
- private:
- bool WriteBlock(const BtiBlock& block);
- bool WriteBlocks(const BtiBlockVector& blocks);
- bool WriteHeader(void);
- bool WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+ void LoadFileSummary(void);
+ void LoadHeader(void);
+ void LoadNumBlocks(int& numBlocks);
+ void LoadNumReferences(int& numReferences);
+ void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+ void SkipBlocks(const int& numBlocks);
// data members
private:
- FILE* m_indexStream;
bool m_isBigEndian;
BamIndex::IndexCacheMode m_cacheMode;
BtiFileSummary m_indexFileSummary;
- int m_blockSize;
+ uint32_t m_blockSize;
int32_t m_inputVersion; // Version is serialized as int
Version m_outputVersion;
+ struct RaiiWrapper {
+ FILE* IndexStream;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper Resources;
+
// static constants
private:
- static const int DEFAULT_BLOCK_LENGTH;
+ static const uint32_t DEFAULT_BLOCK_LENGTH;
static const std::string BTI_EXTENSION;
static const char* const BTI_MAGIC;
static const int SIZEOF_BLOCK;
// BamWriter_p.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 16 June 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for producing BAM files
// ***************************************************************************
#include <api/BamAlignment.h>
#include <api/BamConstants.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/BamWriter_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
-#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace std;
// dtor
BamWriterPrivate::~BamWriterPrivate(void) {
- m_stream.Close();
+ Close();
}
// calculates minimum bin for a BAM alignment interval
-unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+uint32_t BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
--end;
if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
if ( (begin >> 17) == (end >> 17) ) return 585 + (begin >> 17);
// closes the alignment archive
void BamWriterPrivate::Close(void) {
- m_stream.Close();
+
+ // skip if file not open
+ if ( !IsOpen() ) return;
+
+ // close output stream
+ try {
+ m_stream.Close();
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ }
}
// creates a cigar string from the supplied alignment
void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
// initialize
- const unsigned int numCigarOperations = cigarOperations.size();
+ const size_t numCigarOperations = cigarOperations.size();
packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);
// pack the cigar data into the string
for ( ; coIter != coEnd; ++coIter ) {
// store op in packedCigar
- unsigned int cigarOp;
+ uint8_t cigarOp;
switch ( coIter->Type ) {
case (Constants::BAM_CIGAR_MATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MATCH; break;
case (Constants::BAM_CIGAR_INS_CHAR) : cigarOp = Constants::BAM_CIGAR_INS; break;
case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;
case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;
default:
- fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\n", coIter->Type);
- exit(1);
+ const string message = string("invalid CIGAR operation type") + coIter->Type;
+ throw BamException("BamWriter::CreatePackedCigar", message);
}
*pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;
void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
// prepare the encoded query string
- const unsigned int queryLen = query.size();
- const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
- encodedQuery.resize(encodedQueryLen);
+ const size_t queryLength = query.size();
+ const size_t encodedQueryLength = static_cast<size_t>((queryLength+1)/2);
+ encodedQuery.resize(encodedQueryLength);
char* pEncodedQuery = (char*)encodedQuery.data();
const char* pQuery = (const char*)query.data();
+ // walk through original query sequence, encoding its bases
unsigned char nucleotideCode;
bool useHighWord = true;
-
while ( *pQuery ) {
switch ( *pQuery ) {
case (Constants::BAM_DNA_EQUAL) : nucleotideCode = Constants::BAM_BASECODE_EQUAL; break;
case (Constants::BAM_DNA_A) : nucleotideCode = Constants::BAM_BASECODE_A; break;
case (Constants::BAM_DNA_C) : nucleotideCode = Constants::BAM_BASECODE_C; break;
+ case (Constants::BAM_DNA_M) : nucleotideCode = Constants::BAM_BASECODE_M; break;
case (Constants::BAM_DNA_G) : nucleotideCode = Constants::BAM_BASECODE_G; break;
+ case (Constants::BAM_DNA_R) : nucleotideCode = Constants::BAM_BASECODE_R; break;
+ case (Constants::BAM_DNA_S) : nucleotideCode = Constants::BAM_BASECODE_S; break;
+ case (Constants::BAM_DNA_V) : nucleotideCode = Constants::BAM_BASECODE_V; break;
case (Constants::BAM_DNA_T) : nucleotideCode = Constants::BAM_BASECODE_T; break;
+ case (Constants::BAM_DNA_W) : nucleotideCode = Constants::BAM_BASECODE_W; break;
+ case (Constants::BAM_DNA_Y) : nucleotideCode = Constants::BAM_BASECODE_Y; break;
+ case (Constants::BAM_DNA_H) : nucleotideCode = Constants::BAM_BASECODE_H; break;
+ case (Constants::BAM_DNA_K) : nucleotideCode = Constants::BAM_BASECODE_K; break;
+ case (Constants::BAM_DNA_D) : nucleotideCode = Constants::BAM_BASECODE_D; break;
+ case (Constants::BAM_DNA_B) : nucleotideCode = Constants::BAM_BASECODE_B; break;
case (Constants::BAM_DNA_N) : nucleotideCode = Constants::BAM_BASECODE_N; break;
default:
- fprintf(stderr, "BamWriter ERROR: only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
- exit(1);
+ const string message = string("invalid base: ") + *pQuery;
+ throw BamException("BamWriter::EncodeQuerySequence", message);
}
// pack the nucleotide code
}
}
+// returns a description of the last error that occurred
+std::string BamWriterPrivate::GetErrorString(void) const {
+ return m_errorString;
+}
+
// returns whether BAM file is open for writing or not
bool BamWriterPrivate::IsOpen(void) const {
return m_stream.IsOpen;
const string& samHeaderText,
const RefVector& referenceSequences)
{
- // open the BGZF file for writing, return failure if error
- if ( !m_stream.Open(filename, "wb") )
- return false;
+ try {
- // write BAM file 'metadata' components
- WriteMagicNumber();
- WriteSamHeaderText(samHeaderText);
- WriteReferences(referenceSequences);
- return true;
-}
+ // open the BGZF file for writing, return failure if error
+ m_stream.Open(filename, "wb");
-// saves the alignment to the alignment archive
-void BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
-
- // if BamAlignment contains only the core data and a raw char data buffer
- // (as a result of BamReader::GetNextAlignmentCore())
- if ( al.SupportData.HasCoreOnly ) {
-
- // write the block size
- unsigned int blockSize = al.SupportData.BlockLength;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
- m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
- // re-calculate bin (in case BamAlignment's position has been previously modified)
- const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
-
- // assign the BAM core data
- uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
- buffer[0] = al.RefID;
- buffer[1] = al.Position;
- buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
- buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
- buffer[4] = al.SupportData.QuerySequenceLength;
- buffer[5] = al.MateRefID;
- buffer[6] = al.MatePosition;
- buffer[7] = al.InsertSize;
-
- // swap BAM core endian-ness, if necessary
- if ( m_isBigEndian ) {
- for ( int i = 0; i < 8; ++i )
- BamTools::SwapEndian_32(buffer[i]);
- }
+ // write BAM file 'metadata' components
+ WriteMagicNumber();
+ WriteSamHeaderText(samHeaderText);
+ WriteReferences(referenceSequences);
- // write the BAM core
- m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+ // return success
+ return true;
- // write the raw char data
- m_stream.Write((char*)al.SupportData.AllCharData.data(),
- al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
}
+}
- // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
- // ( resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code )
- else {
-
- // calculate char lengths
- const unsigned int nameLength = al.Name.size() + 1;
- const unsigned int numCigarOperations = al.CigarData.size();
- const unsigned int queryLength = al.QueryBases.size();
- const unsigned int tagDataLength = al.TagData.size();
-
- // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
- // force calculation of Bin before storing
- const int endPosition = al.GetEndPosition();
- const unsigned int alignmentBin = CalculateMinimumBin(al.Position, endPosition);
-
- // create our packed cigar string
- string packedCigar;
- CreatePackedCigar(al.CigarData, packedCigar);
- const unsigned int packedCigarLength = packedCigar.size();
-
- // encode the query
- string encodedQuery;
- EncodeQuerySequence(al.QueryBases, encodedQuery);
- const unsigned int encodedQueryLength = encodedQuery.size();
-
- // write the block size
- const unsigned int dataBlockSize = nameLength +
- packedCigarLength +
- encodedQueryLength +
- queryLength +
- tagDataLength;
- unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
- if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
- m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
-
- // assign the BAM core data
- uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
- buffer[0] = al.RefID;
- buffer[1] = al.Position;
- buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
- buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
- buffer[4] = queryLength;
- buffer[5] = al.MateRefID;
- buffer[6] = al.MatePosition;
- buffer[7] = al.InsertSize;
-
- // swap BAM core endian-ness, if necessary
- if ( m_isBigEndian ) {
- for ( int i = 0; i < 8; ++i )
- BamTools::SwapEndian_32(buffer[i]);
- }
-
- // write the BAM core
- m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
-
- // write the query name
- m_stream.Write(al.Name.c_str(), nameLength);
-
- // write the packed cigar
- if ( m_isBigEndian ) {
- char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
- memcpy(cigarData, packedCigar.data(), packedCigarLength);
- if ( m_isBigEndian ) {
- for ( unsigned int i = 0; i < packedCigarLength; ++i )
- BamTools::SwapEndian_32p(&cigarData[i]);
- }
- m_stream.Write(cigarData, packedCigarLength);
- free(cigarData);
- }
- else
- m_stream.Write(packedCigar.data(), packedCigarLength);
+// saves the alignment to the alignment archive
+bool BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
- // write the encoded query sequence
- m_stream.Write(encodedQuery.data(), encodedQueryLength);
+ try {
- // write the base qualities
- char* pBaseQualities = (char*)al.Qualities.data();
- for ( unsigned int i = 0; i < queryLength; ++i )
- pBaseQualities[i] -= 33; // FASTQ conversion
- m_stream.Write(pBaseQualities, queryLength);
+ // if BamAlignment contains only the core data and a raw char data buffer
+ // (as a result of BamReader::GetNextAlignmentCore())
+ if ( al.SupportData.HasCoreOnly )
+ WriteCoreAlignment(al);
- // write the read group tag
- if ( m_isBigEndian ) {
+ // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+ // (resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code)
+ else WriteAlignment(al);
- char* tagData = (char*)calloc(sizeof(char), tagDataLength);
- memcpy(tagData, al.TagData.data(), tagDataLength);
+ // if we get here, everything OK
+ return true;
- int i = 0;
- while ( (unsigned int)i < tagDataLength ) {
+ } catch ( BamException& e ) {
+ m_errorString = e.what();
+ return false;
+ }
+}
- i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
- const char type = tagData[i]; // get tag type at position i
- ++i;
+void BamWriterPrivate::SetWriteCompressed(bool ok) {
+ // modifying compression is not allowed if BAM file is open
+ if ( !IsOpen() )
+ m_stream.SetWriteCompressed(ok);
+}
- switch ( type ) {
+void BamWriterPrivate::WriteAlignment(const BamAlignment& al) {
+
+ // calculate char lengths
+ const unsigned int nameLength = al.Name.size() + 1;
+ const unsigned int numCigarOperations = al.CigarData.size();
+ const unsigned int queryLength = al.QueryBases.size();
+ const unsigned int tagDataLength = al.TagData.size();
+
+ // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
+ // force calculation of Bin before storing
+ const int endPosition = al.GetEndPosition();
+ const uint32_t alignmentBin = CalculateMinimumBin(al.Position, endPosition);
+
+ // create our packed cigar string
+ string packedCigar;
+ CreatePackedCigar(al.CigarData, packedCigar);
+ const unsigned int packedCigarLength = packedCigar.size();
+
+ // encode the query
+ string encodedQuery;
+ EncodeQuerySequence(al.QueryBases, encodedQuery);
+ const unsigned int encodedQueryLength = encodedQuery.size();
+
+ // write the block size
+ const unsigned int dataBlockSize = nameLength +
+ packedCigarLength +
+ encodedQueryLength +
+ queryLength +
+ tagDataLength;
+ unsigned int blockSize = Constants::BAM_CORE_SIZE + dataBlockSize;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+ m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+ // assign the BAM core data
+ uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+ buffer[0] = al.RefID;
+ buffer[1] = al.Position;
+ buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+ buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+ buffer[4] = queryLength;
+ buffer[5] = al.MateRefID;
+ buffer[6] = al.MatePosition;
+ buffer[7] = al.InsertSize;
+
+ // swap BAM core endian-ness, if necessary
+ if ( m_isBigEndian ) {
+ for ( int i = 0; i < 8; ++i )
+ BamTools::SwapEndian_32(buffer[i]);
+ }
- case(Constants::BAM_TAG_TYPE_ASCII) :
- case(Constants::BAM_TAG_TYPE_INT8) :
- case(Constants::BAM_TAG_TYPE_UINT8) :
- ++i;
- break;
-
- case(Constants::BAM_TAG_TYPE_INT16) :
- case(Constants::BAM_TAG_TYPE_UINT16) :
- BamTools::SwapEndian_16p(&tagData[i]);
- i += sizeof(uint16_t);
- break;
-
- case(Constants::BAM_TAG_TYPE_FLOAT) :
- case(Constants::BAM_TAG_TYPE_INT32) :
- case(Constants::BAM_TAG_TYPE_UINT32) :
- BamTools::SwapEndian_32p(&tagData[i]);
- i += sizeof(uint32_t);
- break;
-
- case(Constants::BAM_TAG_TYPE_HEX) :
- case(Constants::BAM_TAG_TYPE_STRING) :
- // no endian swapping necessary for hex-string/string data
- while ( tagData[i] )
- ++i;
- // increment one more for null terminator
- ++i;
- break;
+ // write the BAM core
+ m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
- case(Constants::BAM_TAG_TYPE_ARRAY) :
+ // write the query name
+ m_stream.Write(al.Name.c_str(), nameLength);
- {
- // read array type
- const char arrayType = tagData[i];
+ // write the packed cigar
+ if ( m_isBigEndian ) {
+ char* cigarData = new char[packedCigarLength]();
+ memcpy(cigarData, packedCigar.data(), packedCigarLength);
+ if ( m_isBigEndian ) {
+ for ( size_t i = 0; i < packedCigarLength; ++i )
+ BamTools::SwapEndian_32p(&cigarData[i]);
+ }
+ m_stream.Write(cigarData, packedCigarLength);
+ delete[] cigarData; // TODO: cleanup on Write exception thrown?
+ }
+ else
+ m_stream.Write(packedCigar.data(), packedCigarLength);
+
+ // write the encoded query sequence
+ m_stream.Write(encodedQuery.data(), encodedQueryLength);
+
+ // write the base qualities
+ char* pBaseQualities = (char*)al.Qualities.data();
+ for ( size_t i = 0; i < queryLength; ++i )
+ pBaseQualities[i] -= 33; // FASTQ conversion
+ m_stream.Write(pBaseQualities, queryLength);
+
+ // write the read group tag
+ if ( m_isBigEndian ) {
+
+ char* tagData = new char[tagDataLength]();
+ memcpy(tagData, al.TagData.data(), tagDataLength);
+
+ size_t i = 0;
+ while ( i < tagDataLength ) {
+
+ i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
+ const char type = tagData[i]; // get tag type at position i
+ ++i;
+
+ switch ( type ) {
+
+ case(Constants::BAM_TAG_TYPE_ASCII) :
+ case(Constants::BAM_TAG_TYPE_INT8) :
+ case(Constants::BAM_TAG_TYPE_UINT8) :
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_INT16) :
+ case(Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_FLOAT) :
+ case(Constants::BAM_TAG_TYPE_INT32) :
+ case(Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+
+ case(Constants::BAM_TAG_TYPE_HEX) :
+ case(Constants::BAM_TAG_TYPE_STRING) :
+ // no endian swapping necessary for hex-string/string data
+ while ( tagData[i] )
++i;
-
- // swap endian-ness of number of elements in place, then retrieve for loop
- BamTools::SwapEndian_32p(&tagData[i]);
- int32_t numElements;
- memcpy(&numElements, &tagData[i], sizeof(uint32_t));
- i += sizeof(uint32_t);
-
- // swap endian-ness of array elements
- for ( int j = 0; j < numElements; ++j ) {
- switch (arrayType) {
- case (Constants::BAM_TAG_TYPE_INT8) :
- case (Constants::BAM_TAG_TYPE_UINT8) :
- // no endian-swapping necessary
- ++i;
- break;
- case (Constants::BAM_TAG_TYPE_INT16) :
- case (Constants::BAM_TAG_TYPE_UINT16) :
- BamTools::SwapEndian_16p(&tagData[i]);
- i += sizeof(uint16_t);
- break;
- case (Constants::BAM_TAG_TYPE_FLOAT) :
- case (Constants::BAM_TAG_TYPE_INT32) :
- case (Constants::BAM_TAG_TYPE_UINT32) :
- BamTools::SwapEndian_32p(&tagData[i]);
- i += sizeof(uint32_t);
- break;
- default:
- // error case
- fprintf(stderr,
- "BamWriter ERROR: unknown binary array type encountered: [%c]\n",
- arrayType);
- exit(1);
- }
+ // increment one more for null terminator
+ ++i;
+ break;
+
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
+
+ {
+ // read array type
+ const char arrayType = tagData[i];
+ ++i;
+
+ // swap endian-ness of number of elements in place, then retrieve for loop
+ BamTools::SwapEndian_32p(&tagData[i]);
+ int32_t numElements;
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ // swap endian-ness of array elements
+ for ( int j = 0; j < numElements; ++j ) {
+ switch (arrayType) {
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ // no endian-swapping necessary
+ ++i;
+ break;
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ BamTools::SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ BamTools::SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+ default:
+ delete[] tagData;
+ const string message = string("invalid binary array type: ") + arrayType;
+ throw BamException("BamWriter::SaveAlignment", message);
}
-
- break;
}
- default :
- fprintf(stderr, "BamWriter ERROR: invalid tag value type\n"); // shouldn't get here
- free(tagData);
- exit(1);
+ break;
}
+
+ default :
+ delete[] tagData;
+ const string message = string("invalid tag type: ") + type;
+ throw BamException("BamWriter::SaveAlignment", message);
}
- m_stream.Write(tagData, tagDataLength);
- free(tagData);
}
- else
- m_stream.Write(al.TagData.data(), tagDataLength);
+
+ m_stream.Write(tagData, tagDataLength);
+ delete[] tagData; // TODO: cleanup on Write exception thrown?
}
+ else
+ m_stream.Write(al.TagData.data(), tagDataLength);
}
-void BamWriterPrivate::SetWriteCompressed(bool ok) {
-
- // warn if BAM file is already open
- // modifying compression is not allowed in this case
- if ( IsOpen() ) {
- cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "
- << "Ignoring request." << endl;
- return;
+void BamWriterPrivate::WriteCoreAlignment(const BamAlignment& al) {
+
+ // write the block size
+ unsigned int blockSize = al.SupportData.BlockLength;
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(blockSize);
+ m_stream.Write((char*)&blockSize, Constants::BAM_SIZEOF_INT);
+
+ // re-calculate bin (in case BamAlignment's position has been previously modified)
+ const uint32_t alignmentBin = CalculateMinimumBin(al.Position, al.GetEndPosition());
+
+ // assign the BAM core data
+ uint32_t buffer[Constants::BAM_CORE_BUFFER_SIZE];
+ buffer[0] = al.RefID;
+ buffer[1] = al.Position;
+ buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+ buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+ buffer[4] = al.SupportData.QuerySequenceLength;
+ buffer[5] = al.MateRefID;
+ buffer[6] = al.MatePosition;
+ buffer[7] = al.InsertSize;
+
+ // swap BAM core endian-ness, if necessary
+ if ( m_isBigEndian ) {
+ for ( int i = 0; i < 8; ++i )
+ BamTools::SwapEndian_32(buffer[i]);
}
- // set BgzfStream compression mode
- m_stream.SetWriteCompressed(ok);
+ // write the BAM core
+ m_stream.Write((char*)&buffer, Constants::BAM_CORE_SIZE);
+
+ // write the raw char data
+ m_stream.Write((char*)al.SupportData.AllCharData.data(),
+ al.SupportData.BlockLength-Constants::BAM_CORE_SIZE);
}
void BamWriterPrivate::WriteMagicNumber(void) {
// BamWriter_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 24 February 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for producing BAM files
// ***************************************************************************
#include <vector>
namespace BamTools {
+
+class BamAlignment;
+
namespace Internal {
class BamWriterPrivate {
// interface methods
public:
void Close(void);
+ std::string GetErrorString(void) const;
bool IsOpen(void) const;
bool Open(const std::string& filename,
const std::string& samHeaderText,
const BamTools::RefVector& referenceSequences);
- void SaveAlignment(const BamAlignment& al);
+ bool SaveAlignment(const BamAlignment& al);
void SetWriteCompressed(bool ok);
// 'internal' methods
public:
- unsigned int CalculateMinimumBin(const int begin, int end) const;
+ uint32_t CalculateMinimumBin(const int begin, int end) const;
void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+ void WriteAlignment(const BamAlignment& al);
+ void WriteCoreAlignment(const BamAlignment& al);
void WriteMagicNumber(void);
void WriteReferences(const BamTools::RefVector& referenceSequences);
void WriteSamHeaderText(const std::string& samHeaderText);
private:
BgzfStream m_stream;
bool m_isBigEndian;
+ std::string m_errorString;
};
} // namespace Internal
// BgzfStream_p.cpp (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 2 September 2011(DB)
+// Last modified: 6 October 2011(DB)
// ---------------------------------------------------------------------------
// Based on BGZF routines developed at the Broad Institute.
// Provides the basic functionality for reading & writing BGZF files
// Replaces the old BGZF.* files to avoid clashing with other toolkits
// ***************************************************************************
+#include <api/internal/BamException_p.h>
#include <api/internal/BgzfStream_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
#include <cstring>
#include <algorithm>
+#include <sstream>
using namespace std;
+// ----------------------------
+// RaiiWrapper implementation
+// ----------------------------
+
+BgzfStream::RaiiWrapper::RaiiWrapper(void)
+ : Stream(0)
+{
+ CompressedBlock = new char[Constants::BGZF_MAX_BLOCK_SIZE];
+ UncompressedBlock = new char[Constants::BGZF_DEFAULT_BLOCK_SIZE];
+}
+
+BgzfStream::RaiiWrapper::~RaiiWrapper(void) {
+
+ // clean up buffers
+ delete[] CompressedBlock;
+ delete[] UncompressedBlock;
+ CompressedBlock = 0;
+ UncompressedBlock = 0;
+
+ if ( Stream ) {
+ fflush(Stream);
+ fclose(Stream);
+ Stream = 0;
+ }
+}
+
+// ---------------------------
+// BgzfStream implementation
+// ---------------------------
+
// constructor
BgzfStream::BgzfStream(void)
- : UncompressedBlockSize(Constants::BGZF_DEFAULT_BLOCK_SIZE)
- , CompressedBlockSize(Constants::BGZF_MAX_BLOCK_SIZE)
- , BlockLength(0)
+ : BlockLength(0)
, BlockOffset(0)
, BlockAddress(0)
, IsOpen(false)
, IsWriteOnly(false)
, IsWriteCompressed(true)
- , Stream(NULL)
- , UncompressedBlock(NULL)
- , CompressedBlock(NULL)
-{
- try {
- CompressedBlock = new char[CompressedBlockSize];
- UncompressedBlock = new char[UncompressedBlockSize];
- } catch( std::bad_alloc& ba ) {
- fprintf(stderr, "BgzfStream ERROR: unable to allocate memory\n");
- exit(1);
- }
-}
+{ }
// destructor
BgzfStream::~BgzfStream(void) {
- if( CompressedBlock ) delete[] CompressedBlock;
- if( UncompressedBlock ) delete[] UncompressedBlock;
+ Close();
+}
+
+// checks BGZF block header
+bool BgzfStream::CheckBlockHeader(char* header) {
+ return (header[0] == Constants::GZIP_ID1 &&
+ header[1] == Constants::GZIP_ID2 &&
+ header[2] == Z_DEFLATED &&
+ (header[3] & Constants::FLG_FEXTRA) != 0 &&
+ BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+ header[12] == Constants::BGZF_ID1 &&
+ header[13] == Constants::BGZF_ID2 &&
+ BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
}
// closes BGZF file
void BgzfStream::Close(void) {
// skip if file not open
- if ( !IsOpen ) return;
+ if ( !IsOpen )
+ return;
// if writing to file, flush the current BGZF block,
// then write an empty block (as EOF marker)
if ( IsWriteOnly ) {
FlushBlock();
- int blockLength = DeflateBlock();
- fwrite(CompressedBlock, 1, blockLength, Stream);
+ const size_t blockLength = DeflateBlock();
+ fwrite(Resources.CompressedBlock, 1, blockLength, Resources.Stream);
}
// flush and close stream
- fflush(Stream);
- fclose(Stream);
-
- // reset flags
- IsWriteCompressed = true;
+ fflush(Resources.Stream);
+ fclose(Resources.Stream);
+ Resources.Stream = 0;
+
+ // reset initial state
+ BlockLength = 0;
+ BlockOffset = 0;
+ BlockAddress = 0;
IsOpen = false;
+ IsWriteOnly = false;
+ IsWriteCompressed = true;
}
// compresses the current block
-int BgzfStream::DeflateBlock(void) {
+size_t BgzfStream::DeflateBlock(void) {
// initialize the gzip header
- char* buffer = CompressedBlock;
+ char* buffer = Resources.CompressedBlock;
memset(buffer, 0, 18);
buffer[0] = Constants::GZIP_ID1;
- buffer[1] = (char)Constants::GZIP_ID2;
+ buffer[1] = Constants::GZIP_ID2;
buffer[2] = Constants::CM_DEFLATE;
buffer[3] = Constants::FLG_FEXTRA;
- buffer[9] = (char)Constants::OS_UNKNOWN;
+ buffer[9] = Constants::OS_UNKNOWN;
buffer[10] = Constants::BGZF_XLEN;
buffer[12] = Constants::BGZF_ID1;
buffer[13] = Constants::BGZF_ID2;
// loop to retry for blocks that do not compress enough
int inputLength = BlockOffset;
- int compressedLength = 0;
- unsigned int bufferSize = CompressedBlockSize;
+ size_t compressedLength = 0;
+ const unsigned int bufferSize = Constants::BGZF_MAX_BLOCK_SIZE;
while ( true ) {
z_stream zs;
zs.zalloc = NULL;
zs.zfree = NULL;
- zs.next_in = (Bytef*)UncompressedBlock;
+ zs.next_in = (Bytef*)Resources.UncompressedBlock;
zs.avail_in = inputLength;
zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];
- zs.avail_out = bufferSize - Constants::BGZF_BLOCK_HEADER_LENGTH - Constants::BGZF_BLOCK_FOOTER_LENGTH;
+ zs.avail_out = bufferSize -
+ Constants::BGZF_BLOCK_HEADER_LENGTH -
+ Constants::BGZF_BLOCK_FOOTER_LENGTH;
// initialize the zlib compression algorithm
- if ( deflateInit2(&zs,
- compressionLevel,
- Z_DEFLATED,
- Constants::GZIP_WINDOW_BITS,
- Constants::Z_DEFAULT_MEM_LEVEL,
- Z_DEFAULT_STRATEGY) != Z_OK )
- {
- fprintf(stderr, "BgzfStream ERROR: zlib deflate initialization failed\n");
- exit(1);
- }
+ int status = deflateInit2(&zs,
+ compressionLevel,
+ Z_DEFLATED,
+ Constants::GZIP_WINDOW_BITS,
+ Constants::Z_DEFAULT_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY);
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflateInit2 failed");
// compress the data
- int status = deflate(&zs, Z_FINISH);
+ status = deflate(&zs, Z_FINISH);
+
+ // if not at stream end
if ( status != Z_STREAM_END ) {
deflateEnd(&zs);
- // reduce the input length and try again
- if ( status == Z_OK ) {
- inputLength -= 1024;
- if ( inputLength < 0 ) {
- fprintf(stderr, "BgzfStream ERROR: input reduction failed\n");
- exit(1);
- }
- continue;
- }
-
- fprintf(stderr, "BgzfStream ERROR: zlib::deflateEnd() failed\n");
- exit(1);
- }
-
- // finalize the compression routine
- if ( deflateEnd(&zs) != Z_OK ) {
- fprintf(stderr, "BgzfStream ERROR: zlib::deflateEnd() failed\n");
- exit(1);
- }
+ // if error status
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflate failed");
- compressedLength = zs.total_out;
- compressedLength += Constants::BGZF_BLOCK_HEADER_LENGTH + Constants::BGZF_BLOCK_FOOTER_LENGTH;
- if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE ) {
- fprintf(stderr, "BgzfStream ERROR: deflate overflow\n");
- exit(1);
+ // not enough space available in buffer
+ // try to reduce the input length & re-start loop
+ inputLength -= 1024;
+ if ( inputLength <= 0 )
+ throw BamException("BgzfStream::DeflateBlock", "input reduction failed");
+ continue;
}
+ // finalize the compression routine
+ status = deflateEnd(&zs);
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::DeflateBlock", "zlib deflateEnd failed");
+
+ // update compressedLength
+ compressedLength = zs.total_out +
+ Constants::BGZF_BLOCK_HEADER_LENGTH +
+ Constants::BGZF_BLOCK_FOOTER_LENGTH;
+ if ( compressedLength > Constants::BGZF_MAX_BLOCK_SIZE )
+ throw BamException("BgzfStream::DeflateBlock", "deflate overflow");
+
+ // quit while loop
break;
}
// store the compressed length
- BamTools::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
+ BamTools::PackUnsignedShort(&buffer[16], static_cast<uint16_t>(compressedLength - 1));
// store the CRC32 checksum
- unsigned int crc = crc32(0, NULL, 0);
- crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
+ uint32_t crc = crc32(0, NULL, 0);
+ crc = crc32(crc, (Bytef*)Resources.UncompressedBlock, inputLength);
BamTools::PackUnsignedInt(&buffer[compressedLength - 8], crc);
BamTools::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
// ensure that we have less than a block of data left
int remaining = BlockOffset - inputLength;
if ( remaining > 0 ) {
- if ( remaining > inputLength ) {
- fprintf(stderr, "BgzfStream ERROR: after deflate, remainder too large\n");
- exit(1);
- }
- memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
+ if ( remaining > inputLength )
+ throw BamException("BgzfStream::DeflateBlock", "after deflate, remainder too large");
+ memcpy(Resources.UncompressedBlock, Resources.UncompressedBlock + inputLength, remaining);
}
- // update block data
+ // update block data & return compressedlength
BlockOffset = remaining;
-
- // return result
return compressedLength;
}
while ( BlockOffset > 0 ) {
// compress the data block
- int blockLength = DeflateBlock();
+ const size_t blockLength = DeflateBlock();
// flush the data to our output stream
- int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
+ const size_t numBytesWritten = fwrite(Resources.CompressedBlock, 1, blockLength, Resources.Stream);
if ( numBytesWritten != blockLength ) {
- fprintf(stderr, "BgzfStream ERROR: expected to write %u bytes during flushing, but wrote %u bytes\n",
- blockLength, numBytesWritten);
- exit(1);
+ stringstream s("");
+ s << "expected to write " << blockLength
+ << " bytes during flushing, but wrote " << numBytesWritten;
+ throw BamException("BgzfStream::FlushBlock", s.str());
}
// update block data
}
// decompresses the current block
-int BgzfStream::InflateBlock(const int& blockLength) {
+size_t BgzfStream::InflateBlock(const size_t& blockLength) {
- // inflate the data from compressed buffer into uncompressed buffer
+ // setup zlib stream object
z_stream zs;
zs.zalloc = NULL;
zs.zfree = NULL;
- zs.next_in = (Bytef*)CompressedBlock + 18;
+ zs.next_in = (Bytef*)Resources.CompressedBlock + 18;
zs.avail_in = blockLength - 16;
- zs.next_out = (Bytef*)UncompressedBlock;
- zs.avail_out = UncompressedBlockSize;
+ zs.next_out = (Bytef*)Resources.UncompressedBlock;
+ zs.avail_out = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+ // initialize
int status = inflateInit2(&zs, Constants::GZIP_WINDOW_BITS);
- if ( status != Z_OK ) {
- fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflateInit() failed\n");
- return -1;
- }
+ if ( status != Z_OK )
+ throw BamException("BgzfStream::InflateBlock", "zlib inflateInit failed");
+ // decompress
status = inflate(&zs, Z_FINISH);
if ( status != Z_STREAM_END ) {
inflateEnd(&zs);
- fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflate() failed\n");
- return -1;
+ throw BamException("BgzfStream::InflateBlock", "zlib inflate failed");
}
+ // finalize
status = inflateEnd(&zs);
if ( status != Z_OK ) {
- fprintf(stderr, "BgzfStream ERROR: could not decompress block - zlib::inflateEnd() failed\n");
- return -1;
+ inflateEnd(&zs);
+ throw BamException("BgzfStream::InflateBlock", "zlib inflateEnd failed");
}
// return result
}
// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)
-bool BgzfStream::Open(const string& filename, const char* mode) {
+void BgzfStream::Open(const string& filename, const char* mode) {
- // close current stream, if necessary, before opening next
- if ( IsOpen ) Close();
+ // make sure we're starting with fresh state
+ if ( IsOpen )
+ Close();
// determine open mode
if ( strcmp(mode, "rb") == 0 )
else if ( strcmp(mode, "wb") == 0)
IsWriteOnly = true;
else {
- fprintf(stderr, "BgzfStream ERROR: unknown file mode: %s\n", mode);
- return false;
+ const string message = string("unknown file mode: ") + mode;
+ throw BamException("BgzfStream::Open", message);
}
// open BGZF stream on a file
if ( (filename != "stdin") && (filename != "stdout") && (filename != "-"))
- Stream = fopen(filename.c_str(), mode);
+ Resources.Stream = fopen(filename.c_str(), mode);
// open BGZF stream on stdin
else if ( (filename == "stdin" || filename == "-") && (strcmp(mode, "rb") == 0 ) )
- Stream = freopen(NULL, mode, stdin);
+ Resources.Stream = freopen(NULL, mode, stdin);
// open BGZF stream on stdout
else if ( (filename == "stdout" || filename == "-") && (strcmp(mode, "wb") == 0) )
- Stream = freopen(NULL, mode, stdout);
+ Resources.Stream = freopen(NULL, mode, stdout);
- if ( !Stream ) {
- fprintf(stderr, "BgzfStream ERROR: unable to open file %s\n", filename.c_str() );
- return false;
+ // ensure valid Stream
+ if ( !Resources.Stream ) {
+ const string message = string("unable to open file: ") + filename;
+ throw BamException("BgzfStream::Open", message);
}
- // set flag & return success
+ // set flag
IsOpen = true;
- return true;
}
// reads BGZF data into a byte buffer
-int BgzfStream::Read(char* data, const unsigned int dataLength) {
+size_t BgzfStream::Read(char* data, const size_t dataLength) {
// if stream not open for reading (or empty request)
if ( !IsOpen || IsWriteOnly || dataLength == 0 )
return 0;
// read blocks as needed until desired data length is retrieved
- char* output = data;
- unsigned int numBytesRead = 0;
+ size_t numBytesRead = 0;
while ( numBytesRead < dataLength ) {
// determine bytes available in current block
// read (and decompress) next block if needed
if ( bytesAvailable <= 0 ) {
- if ( !ReadBlock() ) return -1;
+ ReadBlock();
bytesAvailable = BlockLength - BlockOffset;
- if ( bytesAvailable <= 0 ) break;
+ if ( bytesAvailable <= 0 )
+ break;
}
// copy data from uncompressed source buffer into data destination buffer
- char* buffer = UncompressedBlock;
- int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
- memcpy(output, buffer + BlockOffset, copyLength);
+ const size_t copyLength = min( (dataLength-numBytesRead), (size_t)bytesAvailable );
+ memcpy(data, Resources.UncompressedBlock + BlockOffset, copyLength);
// update counters
BlockOffset += copyLength;
- output += copyLength;
+ data += copyLength;
numBytesRead += copyLength;
}
// update block data
if ( BlockOffset == BlockLength ) {
- BlockAddress = ftell64(Stream);
+ BlockAddress = ftell64(Resources.Stream);
BlockOffset = 0;
BlockLength = 0;
}
+ // return actual number of bytes read
return numBytesRead;
}
// reads a BGZF block
-bool BgzfStream::ReadBlock(void) {
+void BgzfStream::ReadBlock(void) {
- char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
- int64_t blockAddress = ftell64(Stream);
+ // store block start
+ int64_t blockAddress = ftell64(Resources.Stream);
// read block header from file
- int count = fread(header, 1, sizeof(header), Stream);
+ char header[Constants::BGZF_BLOCK_HEADER_LENGTH];
+ size_t count = fread(header, 1, Constants::BGZF_BLOCK_HEADER_LENGTH, Resources.Stream);
- // if block header empty
+ // if block header empty, set marker & skip rest of method
if ( count == 0 ) {
BlockLength = 0;
- return true;
+ return;
}
// if block header invalid size
- if ( count != sizeof(header) ) {
- fprintf(stderr, "BgzfStream ERROR: read block failed - could not read block header\n");
- return false;
- }
+ if ( count != sizeof(header) )
+ throw BamException("BgzfStream::ReadBlock", "invalid block header size");
// validate block header contents
- if ( !BgzfStream::CheckBlockHeader(header) ) {
- fprintf(stderr, "BgzfStream ERROR: read block failed - invalid block header\n");
- return false;
- }
+ if ( !BgzfStream::CheckBlockHeader(header) )
+ throw BamException("BgzfStream::ReadBlock", "invalid block header contents");
// copy header contents to compressed buffer
- int blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
- char* compressedBlock = CompressedBlock;
- memcpy(compressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
- int remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+ const size_t blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;
+ memcpy(Resources.CompressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);
// read remainder of block
- count = fread(&compressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Stream);
- if ( count != remaining ) {
- fprintf(stderr, "BgzfStream ERROR: read block failed - could not read data from block\n");
- return false;
- }
+ const size_t remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;
+ count = fread(&Resources.CompressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Resources.Stream);
+ if ( count != remaining )
+ throw BamException("BgzfStream::ReadBlock", "could not read data from block");
// decompress block data
count = InflateBlock(blockLength);
- if ( count < 0 ) {
- fprintf(stderr, "BgzfStream ERROR: read block failed - could not decompress block data\n");
- return false;
- }
- // update block data
+ // update block metadata
if ( BlockLength != 0 )
BlockOffset = 0;
BlockAddress = blockAddress;
BlockLength = count;
-
- // return success
- return true;
}
// seek to position in BGZF file
-bool BgzfStream::Seek(const int64_t& position) {
-
- // skip if not open
- if ( !IsOpen ) return false;
+void BgzfStream::Seek(const int64_t& position) {
// determine adjusted offset & address
int blockOffset = (position & 0xFFFF);
int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
// attempt seek in file
- if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {
- fprintf(stderr, "BgzfStream ERROR: unable to seek in file\n");
- return false;
+ if ( fseek64(Resources.Stream, blockAddress, SEEK_SET) != 0 ) {
+ stringstream s("");
+ s << "unable to seek to position: " << position;
+ throw BamException("BgzfStream::Seek", s.str());
}
- // update block data & return success
+ // if successful, update block metadata
BlockLength = 0;
BlockAddress = blockAddress;
BlockOffset = blockOffset;
- return true;
}
void BgzfStream::SetWriteCompressed(bool ok) {
}
// writes the supplied data into the BGZF buffer
-unsigned int BgzfStream::Write(const char* data, const unsigned int dataLen) {
+size_t BgzfStream::Write(const char* data, const size_t dataLength) {
// skip if file not open for writing
- if ( !IsOpen || !IsWriteOnly ) return false;
+ if ( !IsOpen || !IsWriteOnly )
+ return false;
// write blocks as needed til all data is written
- unsigned int numBytesWritten = 0;
+ size_t numBytesWritten = 0;
const char* input = data;
- unsigned int blockLength = UncompressedBlockSize;
- while ( numBytesWritten < dataLen ) {
+ const size_t blockLength = Constants::BGZF_DEFAULT_BLOCK_SIZE;
+ while ( numBytesWritten < dataLength ) {
// copy data contents to uncompressed output buffer
- unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
- char* buffer = UncompressedBlock;
+ const size_t copyLength = min(blockLength - BlockOffset, dataLength - numBytesWritten);
+ char* buffer = Resources.UncompressedBlock;
memcpy(buffer + BlockOffset, input, copyLength);
- // update counter
+ // update counters
BlockOffset += copyLength;
input += copyLength;
numBytesWritten += copyLength;
// flush (& compress) output buffer when full
- if ( BlockOffset == blockLength ) FlushBlock();
+ if ( BlockOffset == blockLength )
+ FlushBlock();
}
- // return result
+ // return actual number of bytes written
return numBytesWritten;
}
#include <api/BamConstants.h>
#include "zlib.h"
#include <cstdio>
+#include <memory>
#include <string>
namespace BamTools {
// closes BGZF file
void Close(void);
// opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
- bool Open(const std::string& filename, const char* mode);
+ void Open(const std::string& filename, const char* mode);
// reads BGZF data into a byte buffer
- int Read(char* data, const unsigned int dataLength);
+ size_t Read(char* data, const size_t dataLength);
// seek to position in BGZF file
- bool Seek(const int64_t& position);
+ void Seek(const int64_t& position);
// enable/disable compressed output
void SetWriteCompressed(bool ok);
// get file position in BGZF file
int64_t Tell(void) const;
// writes the supplied data into the BGZF buffer
- unsigned int Write(const char* data, const unsigned int dataLen);
+ size_t Write(const char* data, const size_t dataLength);
// internal methods
private:
// compresses the current block
- int DeflateBlock(void);
+ size_t DeflateBlock(void);
// flushes the data in the BGZF block
void FlushBlock(void);
// de-compresses the current block
- int InflateBlock(const int& blockLength);
+ size_t InflateBlock(const size_t& blockLength);
// reads a BGZF block
- bool ReadBlock(void);
+ void ReadBlock(void);
// static 'utility' methods
public:
// checks BGZF block header
- static inline bool CheckBlockHeader(char* header);
+ static bool CheckBlockHeader(char* header);
// data members
public:
- unsigned int UncompressedBlockSize;
- unsigned int CompressedBlockSize;
unsigned int BlockLength;
unsigned int BlockOffset;
- uint64_t BlockAddress;
+ int64_t BlockAddress;
bool IsOpen;
bool IsWriteOnly;
bool IsWriteCompressed;
- FILE* Stream;
- char* UncompressedBlock;
- char* CompressedBlock;
-};
-// -------------------------------------------------------------
-// static 'utility' method implementations
+ struct RaiiWrapper {
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ char* UncompressedBlock;
+ char* CompressedBlock;
+ FILE* Stream;
+ };
+ RaiiWrapper Resources;
-// checks BGZF block header
-inline
-bool BgzfStream::CheckBlockHeader(char* header) {
- return (header[0] == Constants::GZIP_ID1 &&
- header[1] == (char)Constants::GZIP_ID2 &&
- header[2] == Z_DEFLATED &&
- (header[3] & Constants::FLG_FEXTRA) != 0 &&
- BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
- header[12] == Constants::BGZF_ID1 &&
- header[13] == Constants::BGZF_ID2 &&
- BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
-}
+};
} // namespace Internal
} // namespace BamTools
// SamFormatParser.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for parsing SAM header text into SamHeader object
// ***************************************************************************
#include <api/SamConstants.h>
#include <api/SamHeader.h>
+#include <api/internal/BamException_p.h>
#include <api/internal/SamFormatParser_p.h>
using namespace BamTools;
using namespace BamTools::Internal;
void SamFormatParser::ParseSamLine(const string& line) {
// skip if line is not long enough to contain true values
- if (line.length() < 5 ) return;
+ if ( line.length() < 5 ) return;
// determine token at beginning of line
const string firstToken = line.substr(0,3);
else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
- else
- cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;
+ else {
+ const string message = string("unknown token: ") + firstToken;
+ throw BamException("SamFormatParser::ParseSamLine", message);
+ }
}
void SamFormatParser::ParseHDLine(const string& line) {
if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;
else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;
else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;
+ else {
+ const string message = string("unknown HD tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseHDLine", message);
+ }
}
- // if @HD line exists, VN must be provided
+ // check for required tags
if ( !m_header.HasVersion() )
- cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;
+ throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
}
void SamFormatParser::ParseSQLine(const string& line) {
else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown SQ tag: " << tokenTag << endl;
+ else {
+ const string message = string("unknown SQ tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseSQLine", message);
+ }
}
- bool isMissingRequiredFields = false;
-
- // if @SQ line exists, SN must be provided
- if ( !seq.HasName() ) {
- isMissingRequiredFields = true;
- cerr << "SamFormatParser ERROR: @SQ line is missing SN tag" << endl;
- }
-
- // if @SQ line exists, LN must be provided
- if ( !seq.HasLength() ) {
- isMissingRequiredFields = true;
- cerr << "SamFormatParser ERROR: @SQ line is missing LN tag" << endl;
- }
+ // check for required tags
+ if ( !seq.HasName() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+ if ( !seq.HasLength() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
// store SAM sequence entry
- if ( !isMissingRequiredFields )
- m_header.Sequences.Add(seq);
+ m_header.Sequences.Add(seq);
}
void SamFormatParser::ParseRGLine(const string& line) {
else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;
+ else {
+ const string message = string("unknown RG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseRGLine", message);
+ }
}
- bool isMissingRequiredFields = false;
-
- // if @RG line exists, ID must be provided
- if ( !rg.HasID() ) {
- isMissingRequiredFields = true;
- cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;
- }
+ // check for required tags
+ if ( !rg.HasID() )
+ throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
// store SAM read group entry
- if ( !isMissingRequiredFields )
- m_header.ReadGroups.Add(rg);
+ m_header.ReadGroups.Add(rg);
}
void SamFormatParser::ParsePGLine(const string& line) {
else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;
else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;
+ else {
+ const string message = string("unknown PG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParsePGLine", message);
+ }
}
- bool isMissingRequiredFields = false;
-
- // if @PG line exists, ID must be provided
- if ( !pg.HasID() ) {
- isMissingRequiredFields = true;
- cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;
- }
+ // check for required tags
+ if ( !pg.HasID() )
+ throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
- // store SAM program record
- if ( !isMissingRequiredFields )
- m_header.Programs.Add(pg);
+ // store SAM program entry
+ m_header.Programs.Add(pg);
}
void SamFormatParser::ParseCOLine(const string& line) {
// SamFormatPrinter.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 19 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for printing formatted SAM header to string
// ***************************************************************************
#include <vector>
using namespace std;
+// ------------------------
+// static utility methods
+// ------------------------
+
+static inline
+const string FormatTag(const string& tag, const string& value) {
+ return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+// ---------------------------------
+// SamFormatPrinter implementation
+// ---------------------------------
+
SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
: m_header(header)
{ }
SamFormatPrinter::~SamFormatPrinter(void) { }
-const string SamFormatPrinter::FormatTag(const string &tag, const string &value) const {
- return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
-}
-
const string SamFormatPrinter::ToString(void) const {
// clear out stream
// SamFormatPrinter.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for printing formatted SAM header to string
// ***************************************************************************
// internal methods
private:
- const std::string FormatTag(const std::string& tag, const std::string& value) const;
void PrintHD(std::stringstream& out) const;
void PrintSQ(std::stringstream& out) const;
void PrintRG(std::stringstream& out) const;
// SamHeaderValidator.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 18 April 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for validating SamHeader data
// ***************************************************************************
using namespace BamTools::Internal;
#include <cctype>
-#include <iostream>
#include <set>
#include <sstream>
using namespace std;
-namespace BamTools {
-namespace Internal {
+// ------------------------
+// static utility methods
+// -------------------------
+static
bool caseInsensitiveCompare(const string& lhs, const string& rhs) {
// can omit checking chars if lengths not equal
return true;
}
-} // namespace Internal
-} // namespace BamTools
-
// ------------------------------------------------------------------------
// Allow validation rules to vary, as needed, between SAM header versions
//
SamHeaderValidator::~SamHeaderValidator(void) { }
-bool SamHeaderValidator::Validate(bool verbose) {
+void SamHeaderValidator::AddError(const string& message) {
+ m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::AddWarning(const string& message) {
+ m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
+}
+
+void SamHeaderValidator::PrintErrorMessages(ostream& stream) {
+
+ // skip if no error messages
+ if ( m_errorMessages.empty() )
+ return;
+
+ // print error header line
+ stream << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
+
+ // print each error message
+ vector<string>::const_iterator errorIter = m_errorMessages.begin();
+ vector<string>::const_iterator errorEnd = m_errorMessages.end();
+ for ( ; errorIter != errorEnd; ++errorIter )
+ stream << (*errorIter);
+}
+
+void SamHeaderValidator::PrintMessages(ostream& stream) {
+ PrintErrorMessages(stream);
+ PrintWarningMessages(stream);
+}
+
+void SamHeaderValidator::PrintWarningMessages(ostream& stream) {
+
+ // skip if no warning messages
+ if ( m_warningMessages.empty() )
+ return;
+
+ // print warning header line
+ stream << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
+
+ // print each warning message
+ vector<string>::const_iterator warnIter = m_warningMessages.begin();
+ vector<string>::const_iterator warnEnd = m_warningMessages.end();
+ for ( ; warnIter != warnEnd; ++warnIter )
+ stream << (*warnIter);
+}
- // validate header components
+// entry point for validation
+bool SamHeaderValidator::Validate(void) {
bool isValid = true;
isValid &= ValidateMetadata();
isValid &= ValidateSequenceDictionary();
isValid &= ValidateReadGroupDictionary();
isValid &= ValidateProgramChain();
-
- // report errors if desired
- if ( verbose ) {
- PrintErrorMessages();
- PrintWarningMessages();
- }
-
- // return validation status
return isValid;
}
+// check all SAM header 'metadata'
bool SamHeaderValidator::ValidateMetadata(void) {
bool isValid = true;
isValid &= ValidateVersion();
return isValid;
}
+// check SAM header version tag
bool SamHeaderValidator::ValidateVersion(void) {
const string& version = m_header.Version;
return ( nonDigitPosition == string::npos ) ;
}
+// validate SAM header sort order tag
bool SamHeaderValidator::ValidateSortOrder(void) {
const string& sortOrder = m_header.SortOrder;
return false;
}
+// validate SAM header group order tag
bool SamHeaderValidator::ValidateGroupOrder(void) {
const string& groupOrder = m_header.GroupOrder;
return false;
}
+// validate SAM header sequence dictionary
bool SamHeaderValidator::ValidateSequenceDictionary(void) {
bool isValid = true;
return isValid;
}
+// make sure all SQ names are unique
bool SamHeaderValidator::ContainsUniqueSequenceNames(void) {
bool isValid = true;
return isValid;
}
+// validate SAM header sequence entry
bool SamHeaderValidator::ValidateSequence(const SamSequence& seq) {
bool isValid = true;
isValid &= CheckNameFormat(seq.Name);
return isValid;
}
+// check sequence name is valid format
bool SamHeaderValidator::CheckNameFormat(const string& name) {
// invalid if name is empty
return true;
}
+// check that sequence length is within accepted range
bool SamHeaderValidator::CheckLengthInRange(const string& length) {
// invalid if empty
return true;
}
+// validate SAM header read group dictionary
bool SamHeaderValidator::ValidateReadGroupDictionary(void) {
bool isValid = true;
return isValid;
}
+// make sure RG IDs and platform units are unique
bool SamHeaderValidator::ContainsUniqueIDsAndPlatformUnits(void) {
bool isValid = true;
return isValid;
}
+// validate SAM header read group entry
bool SamHeaderValidator::ValidateReadGroup(const SamReadGroup& rg) {
bool isValid = true;
isValid &= CheckReadGroupID(rg.ID);
return isValid;
}
+// make sure RG ID exists
bool SamHeaderValidator::CheckReadGroupID(const string& id) {
// invalid if empty
return true;
}
+// make sure RG sequencing tech is one of the accepted keywords
bool SamHeaderValidator::CheckSequencingTechnology(const string& technology) {
// if no technology provided, no problem, just return OK
return false;
}
+// validate the SAM header "program chain"
bool SamHeaderValidator::ValidateProgramChain(void) {
bool isValid = true;
isValid &= ContainsUniqueProgramIds();
return isValid;
}
+// make sure all PG IDs are unique
bool SamHeaderValidator::ContainsUniqueProgramIds(void) {
bool isValid = true;
return isValid;
}
+// make sure that any PP tags present point to existing @PG IDs
bool SamHeaderValidator::ValidatePreviousProgramIds(void) {
bool isValid = true;
// return validation state
return isValid;
}
-void SamHeaderValidator::AddError(const string& message) {
- m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::AddWarning(const string& message) {
- m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);
-}
-
-void SamHeaderValidator::PrintErrorMessages(void) {
-
- // skip if no error messages
- if ( m_errorMessages.empty() ) return;
-
- // print error header line
- cerr << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;
-
- // print each error message
- vector<string>::const_iterator errorIter = m_errorMessages.begin();
- vector<string>::const_iterator errorEnd = m_errorMessages.end();
- for ( ; errorIter != errorEnd; ++errorIter )
- cerr << (*errorIter);
-}
-
-void SamHeaderValidator::PrintWarningMessages(void) {
-
- // skip if no warning messages
- if ( m_warningMessages.empty() ) return;
-
- // print warning header line
- cerr << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;
-
- // print each warning message
- vector<string>::const_iterator warnIter = m_warningMessages.begin();
- vector<string>::const_iterator warnEnd = m_warningMessages.end();
- for ( ; warnIter != warnEnd; ++warnIter )
- cerr << (*warnIter);
-}
// SamHeaderValidator.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 13 January 2011 (DB)
+// Last modified: 6 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for validating SamHeader data
// ***************************************************************************
//
// We mean it.
+#include <iostream>
#include <string>
#include <vector>
// SamHeaderValidator interface
public:
+
+ // prints error & warning messages
+ void PrintMessages(std::ostream& stream);
+
// validates SamHeader data, returns true/false accordingly
- // prints error & warning messages to stderr when @verbose is true
- bool Validate(bool verbose = false);
+ bool Validate(void);
// internal methods
private:
// error reporting
void AddError(const std::string& message);
void AddWarning(const std::string& message);
- void PrintErrorMessages(void);
- void PrintWarningMessages(void);
+ void PrintErrorMessages(std::ostream& stream);
+ void PrintWarningMessages(std::ostream& stream);
// data members
private: