// ***************************************************************************\r
// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
-// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 9 October 2010 (DB)\r
+// Last modified: 10 October 2011 (DB)\r
// ---------------------------------------------------------------------------\r
-// Provides the basic constants, data structures, utilities etc. \r
-// used throughout the API for handling BAM files\r
+// Provides data structures & utility methods that are used throughout the API.\r
// ***************************************************************************\r
\r
#ifndef BAMAUX_H\r
#define BAMAUX_H\r
\r
+#include "api/api_global.h"\r
#include <fstream> \r
#include <iostream>\r
#include <string>\r
#include <vector>\r
\r
-// Platform-specific large-file support\r
-#ifndef BAMTOOLS_LFS\r
-#define BAMTOOLS_LFS\r
- #ifdef WIN32\r
- #define ftell64(a) _ftelli64(a)\r
- #define fseek64(a,b,c) _fseeki64(a,b,c)\r
- #else\r
- #define ftell64(a) ftello(a)\r
- #define fseek64(a,b,c) fseeko(a,b,c)\r
- #endif\r
-#endif // BAMTOOLS_LFS\r
-\r
-// Platform-specific type definitions\r
-#ifndef BAMTOOLS_TYPES\r
-#define BAMTOOLS_TYPES\r
- #ifdef _MSC_VER\r
- typedef char int8_t;\r
- typedef unsigned char uint8_t;\r
- typedef short int16_t;\r
- typedef unsigned short uint16_t;\r
- typedef int int32_t;\r
- typedef unsigned int uint32_t;\r
- typedef long long int64_t;\r
- typedef unsigned long long uint64_t;\r
- #else\r
- #include <stdint.h>\r
- #endif\r
-#endif // BAMTOOLS_TYPES\r
+/*! \file BamAux.h\r
\r
+ Provides data structures & utility methods that are used throughout the API.\r
+*/\r
+\r
+/*! \namespace BamTools\r
+ \brief Contains all BamTools classes & methods.\r
+\r
+ The BamTools API contained in this namespace contains classes and methods\r
+ for reading, writing, and manipulating BAM alignment files.\r
+*/\r
namespace BamTools {\r
\r
// ----------------------------------------------------------------\r
-// ----------------------------------------------------------------\r
-// BAM constants\r
-\r
-const int BAM_CMATCH = 0;\r
-const int BAM_CINS = 1;\r
-const int BAM_CDEL = 2;\r
-const int BAM_CREF_SKIP = 3;\r
-const int BAM_CSOFT_CLIP = 4;\r
-const int BAM_CHARD_CLIP = 5;\r
-const int BAM_CPAD = 6;\r
-const int BAM_CIGAR_SHIFT = 4;\r
-const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);\r
-const int BAM_CORE_SIZE = 32;\r
-const int BT_SIZEOF_INT = 4;\r
+// CigarOp\r
\r
-// ----------------------------------------------------------------\r
-// ----------------------------------------------------------------\r
-// Data structs & typedefs\r
+/*! \struct BamTools::CigarOp\r
+ \brief Represents a CIGAR alignment operation.\r
\r
-// CIGAR operation data structure\r
-struct CigarOp {\r
+ \sa \samSpecURL for more details on using CIGAR operations.\r
+*/\r
+struct API_EXPORT CigarOp {\r
\r
- // data members\r
- char Type; // Operation type (MIDNSHP)\r
- uint32_t Length; // Operation length (number of bases)\r
+ char Type; //!< CIGAR operation type (MIDNSHPX=)\r
+ uint32_t Length; //!< CIGAR operation length (number of bases)\r
\r
- // constructor\r
+ //! constructor\r
CigarOp(const char type = '\0', \r
- const uint32_t length = 0) \r
+ const uint32_t& length = 0)\r
: Type(type)\r
, Length(length) \r
{ }\r
};\r
\r
-// Reference data entry\r
-struct RefData {\r
+// ----------------------------------------------------------------\r
+// RefData\r
+\r
+/*! \struct BamTools::RefData\r
+ \brief Represents a reference sequence entry\r
+*/\r
+struct API_EXPORT RefData {\r
\r
- // data members\r
- std::string RefName; // Name of reference sequence\r
- int32_t RefLength; // Length of reference sequence\r
- bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence\r
+ std::string RefName; //!< name of reference sequence\r
+ int32_t RefLength; //!< length of reference sequence\r
\r
- // constructor\r
- RefData(const int32_t& length = 0, \r
- bool ok = false)\r
- : RefLength(length)\r
- , RefHasAlignments(ok)\r
+ //! constructor\r
+ RefData(const std::string& name = "",\r
+ const int32_t& length = 0)\r
+ : RefName(name)\r
+ , RefLength(length)\r
{ }\r
};\r
+\r
+//! convenience typedef for vector of RefData entries\r
typedef std::vector<RefData> RefVector;\r
\r
-// General (sequential) genome region\r
-struct BamRegion {\r
+// ----------------------------------------------------------------\r
+// BamRegion\r
+\r
+/*! \struct BamTools::BamRegion\r
+ \brief Represents a sequential genomic region\r
+\r
+ Allowed to span multiple (sequential) references.\r
+\r
+ \warning BamRegion now represents a zero-based, HALF-OPEN interval.\r
+ In previous versions of BamTools (0.x & 1.x) all intervals were treated\r
+ as zero-based, CLOSED.\r
+*/\r
+struct API_EXPORT BamRegion {\r
\r
- // data members\r
- int LeftRefID;\r
- int LeftPosition;\r
- int RightRefID;\r
- int RightPosition;\r
+ int LeftRefID; //!< reference ID for region's left boundary\r
+ int LeftPosition; //!< position for region's left boundary\r
+ int RightRefID; //!< reference ID for region's right boundary\r
+ int RightPosition; //!< position for region's right boundary\r
\r
- // constructor\r
+ //! constructor\r
BamRegion(const int& leftID = -1, \r
const int& leftPos = -1,\r
const int& rightID = -1,\r
, RightPosition(rightPos)\r
{ }\r
\r
- // copy constructor\r
+ //! copy constructor\r
BamRegion(const BamRegion& other)\r
- : LeftRefID(other.LeftRefID)\r
- , LeftPosition(other.LeftPosition)\r
- , RightRefID(other.RightRefID)\r
- , RightPosition(other.RightPosition)\r
+ : LeftRefID(other.LeftRefID)\r
+ , LeftPosition(other.LeftPosition)\r
+ , RightRefID(other.RightRefID)\r
+ , RightPosition(other.RightPosition)\r
{ }\r
\r
- // member functions\r
- void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }\r
- bool isLeftBoundSpecified(void) const { return ( LeftRefID != -1 && LeftPosition != -1 ); }\r
- bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }\r
- bool isRightBoundSpecified(void) const { return ( RightRefID != -1 && RightPosition != -1 ); }\r
+ //! Clears region boundaries\r
+ void clear(void) {\r
+ LeftRefID = -1; LeftPosition = -1;\r
+ RightRefID = -1; RightPosition = -1;\r
+ }\r
+\r
+ //! Returns true if region has a left boundary\r
+ bool isLeftBoundSpecified(void) const {\r
+ return ( LeftRefID >= 0 && LeftPosition >= 0 );\r
+ }\r
+\r
+ //! Returns true if region boundaries are not defined\r
+ bool isNull(void) const {\r
+ return ( !isLeftBoundSpecified() && !isRightBoundSpecified() );\r
+ }\r
+\r
+ //! Returns true if region has a right boundary\r
+ bool isRightBoundSpecified(void) const {\r
+ return ( RightRefID >= 0 && RightPosition >= 1 );\r
+ }\r
};\r
\r
// ----------------------------------------------------------------\r
-// ----------------------------------------------------------------\r
-// General utilities \r
+// General utility methods\r
\r
-// returns true if system is big endian\r
-inline bool SystemIsBigEndian(void) {\r
- const uint16_t one = 0x0001;\r
- return ((*(char*) &one) == 0 );\r
+/*! \fn bool FileExists(const std::string& filename)\r
+ \brief returns true if the file exists\r
+*/\r
+API_EXPORT inline bool FileExists(const std::string& filename) {\r
+ std::ifstream f(filename.c_str(), std::ifstream::in);\r
+ return !f.fail();\r
}\r
\r
-// swaps endianness of 16-bit value 'in place'\r
-inline void SwapEndian_16(int16_t& x) {\r
+/*! \fn void SwapEndian_16(int16_t& x)\r
+ \brief swaps endianness of signed 16-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_16(int16_t& x) {\r
x = ((x >> 8) | (x << 8));\r
}\r
\r
-inline void SwapEndian_16(uint16_t& x) {\r
+/*! \fn void SwapEndian_16(uint16_t& x)\r
+ \brief swaps endianness of unsigned 16-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_16(uint16_t& x) {\r
x = ((x >> 8) | (x << 8));\r
}\r
\r
-// swaps endianness of 32-bit value 'in-place'\r
-inline void SwapEndian_32(int32_t& x) {\r
+/*! \fn void SwapEndian_32(int32_t& x)\r
+ \brief swaps endianness of signed 32-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_32(int32_t& x) {\r
x = ( (x >> 24) | \r
((x << 8) & 0x00FF0000) | \r
((x >> 8) & 0x0000FF00) | \r
);\r
}\r
\r
-inline void SwapEndian_32(uint32_t& x) {\r
+/*! \fn void SwapEndian_32(uint32_t& x)\r
+ \brief swaps endianness of unsigned 32-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_32(uint32_t& x) {\r
x = ( (x >> 24) | \r
((x << 8) & 0x00FF0000) | \r
((x >> 8) & 0x0000FF00) | \r
);\r
}\r
\r
-// swaps endianness of 64-bit value 'in-place'\r
-inline void SwapEndian_64(int64_t& x) {\r
+/*! \fn void SwapEndian_64(int64_t& x)\r
+ \brief swaps endianness of signed 64-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_64(int64_t& x) {\r
x = ( (x >> 56) | \r
((x << 40) & 0x00FF000000000000ll) |\r
((x << 24) & 0x0000FF0000000000ll) |\r
);\r
}\r
\r
-inline void SwapEndian_64(uint64_t& x) {\r
+/*! \fn void SwapEndian_64(uint64_t& x)\r
+ \brief swaps endianness of unsigned 64-bit integer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_64(uint64_t& x) {\r
x = ( (x >> 56) | \r
((x << 40) & 0x00FF000000000000ll) |\r
((x << 24) & 0x0000FF0000000000ll) |\r
);\r
}\r
\r
-// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_16p(char* data) {\r
+/*! \fn void SwapEndian_16p(char* data)\r
+ \brief swaps endianness of the next 2 bytes in a buffer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_16p(char* data) {\r
uint16_t& value = (uint16_t&)*data; \r
SwapEndian_16(value);\r
}\r
\r
-// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_32p(char* data) {\r
+/*! \fn void SwapEndian_32p(char* data)\r
+ \brief swaps endianness of the next 4 bytes in a buffer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_32p(char* data) {\r
uint32_t& value = (uint32_t&)*data; \r
SwapEndian_32(value);\r
}\r
\r
-// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
-inline void SwapEndian_64p(char* data) {\r
+/*! \fn void SwapEndian_64p(char* data)\r
+ \brief swaps endianness of the next 8 bytes in a buffer, in place\r
+*/\r
+API_EXPORT inline void SwapEndian_64p(char* data) {\r
uint64_t& value = (uint64_t&)*data; \r
SwapEndian_64(value);\r
}\r
\r
-// returns whether file exists (can be opened OK)\r
-inline bool FileExists(const std::string& filename) {\r
- std::ifstream f(filename.c_str(), std::ifstream::in);\r
- return !f.fail();\r
+/*! \fn bool SystemIsBigEndian(void)\r
+ \brief checks host architecture's byte order\r
+ \return \c true if system uses big-endian ordering\r
+*/\r
+API_EXPORT inline bool SystemIsBigEndian(void) {\r
+ const uint16_t one = 0x0001;\r
+ return ((*(char*) &one) == 0 );\r
+}\r
+\r
+/*! \fn void PackUnsignedInt(char* buffer, unsigned int value)\r
+ \brief stores unsigned integer value in a byte buffer\r
+\r
+ \param[out] buffer destination buffer\r
+ \param[in] value value to 'pack' in buffer\r
+*/\r
+API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) {\r
+ buffer[0] = (char)value;\r
+ buffer[1] = (char)(value >> 8);\r
+ buffer[2] = (char)(value >> 16);\r
+ buffer[3] = (char)(value >> 24);\r
+}\r
+\r
+/*! \fn void PackUnsignedShort(char* buffer, unsigned short value)\r
+ \brief stores unsigned short integer value in a byte buffer\r
+\r
+ \param[out] buffer destination buffer\r
+ \param[in] value value to 'pack' in buffer\r
+*/\r
+API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) {\r
+ buffer[0] = (char)value;\r
+ buffer[1] = (char)(value >> 8);\r
}\r
\r
+/*! \fn double UnpackDouble(const char* buffer)\r
+ \brief reads a double value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (double) value read from the buffer\r
+*/\r
+API_EXPORT inline double UnpackDouble(const char* buffer) {\r
+ union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
+ un.valueBuffer[4] = buffer[4];\r
+ un.valueBuffer[5] = buffer[5];\r
+ un.valueBuffer[6] = buffer[6];\r
+ un.valueBuffer[7] = buffer[7];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn double UnpackDouble(char* buffer)\r
+ \brief reads a double value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (double) value read from the buffer\r
+*/\r
+API_EXPORT inline double UnpackDouble(char* buffer) {\r
+ return UnpackDouble( (const char*)buffer );\r
+}\r
+\r
+/*! \fn double UnpackFloat(const char* buffer)\r
+ \brief reads a float value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (float) value read from the buffer\r
+*/\r
+API_EXPORT inline float UnpackFloat(const char* buffer) {\r
+ union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn double UnpackFloat(char* buffer)\r
+ \brief reads a float value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (float) value read from the buffer\r
+*/\r
+API_EXPORT inline float UnpackFloat(char* buffer) {\r
+ return UnpackFloat( (const char*)buffer );\r
+}\r
+\r
+/*! \fn signed int UnpackSignedInt(const char* buffer)\r
+ \brief reads a signed integer value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (signed int) value read from the buffer\r
+*/\r
+API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r
+ union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn signed int UnpackSignedInt(char* buffer)\r
+ \brief reads a signed integer value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (signed int) value read from the buffer\r
+*/\r
+API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r
+ return UnpackSignedInt( (const char*) buffer );\r
+}\r
+\r
+/*! \fn signed short UnpackSignedShort(const char* buffer)\r
+ \brief reads a signed short integer value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (signed short) value read from the buffer\r
+*/\r
+API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r
+ union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn signed short UnpackSignedShort(char* buffer)\r
+ \brief reads a signed short integer value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (signed short) value read from the buffer\r
+*/\r
+API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r
+ return UnpackSignedShort( (const char*)buffer );\r
+}\r
+\r
+/*! \fn unsigned int UnpackUnsignedInt(const char* buffer)\r
+ \brief reads an unsigned integer value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (unsigned int) value read from the buffer\r
+*/\r
+API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r
+ union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ un.valueBuffer[2] = buffer[2];\r
+ un.valueBuffer[3] = buffer[3];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn unsigned int UnpackUnsignedInt(char* buffer)\r
+ \brief reads an unsigned integer value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (unsigned int) value read from the buffer\r
+*/\r
+API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r
+ return UnpackUnsignedInt( (const char*)buffer );\r
+}\r
+\r
+/*! \fn unsigned short UnpackUnsignedShort(const char* buffer)\r
+ \brief reads an unsigned short integer value from byte buffer\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (unsigned short) value read from the buffer\r
+*/\r
+API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r
+ union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
+ un.value = 0;\r
+ un.valueBuffer[0] = buffer[0];\r
+ un.valueBuffer[1] = buffer[1];\r
+ return un.value;\r
+}\r
+\r
+/*! \fn unsigned short UnpackUnsignedShort(char* buffer)\r
+ \brief reads an unsigned short integer value from byte buffer\r
+\r
+ This is an overloaded function.\r
+\r
+ \param[in] buffer source byte buffer\r
+ \return the (unsigned short) value read from the buffer\r
+*/\r
+API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r
+ return UnpackUnsignedShort( (const char*)buffer );\r
+}\r
+\r
+// ----------------------------------------------------------------\r
+// 'internal' helper structs\r
+\r
+/*! \struct RaiiBuffer\r
+ \internal\r
+*/\r
+struct RaiiBuffer {\r
+ RaiiBuffer(const size_t n)\r
+ : Buffer( new char[n]() )\r
+ { }\r
+ ~RaiiBuffer(void) {\r
+ delete[] Buffer;\r
+ }\r
+ char* Buffer;\r
+};\r
+\r
} // namespace BamTools\r
\r
#endif // BAMAUX_H\r