src/api/BamAux.h

   1 // ***************************************************************************\r
   2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
   3 // Marth Lab, Department of Biology, Boston College\r
   4 // ---------------------------------------------------------------------------\r
   5 // Last modified: 8 October 2011 (DB)\r
   6 // ---------------------------------------------------------------------------\r
   7 // Provides data structures & utility methods that are used throughout the API.\r
   8 // ***************************************************************************\r
   9 \r
  10 #ifndef BAMAUX_H\r
  11 #define BAMAUX_H\r
  12 \r
  13 #include <api/api_global.h>\r
  14 #include <fstream> \r
  15 #include <iostream>\r
  16 #include <string>\r
  17 #include <vector>\r
  18 \r
  19 /*! \file BamAux.h\r
  20 \r
  21     Provides data structures & utility methods that are used throughout the API.\r
  22 */\r
  23 /*! \namespace BamTools\r
  24     \brief Contains all BamTools classes & methods.\r
  25 \r
  26     The BamTools API contained in this namespace contains classes and methods\r
  27     for reading, writing, and manipulating BAM alignment files.\r
  28 */\r
  29 namespace BamTools {\r
  30 \r
  31 // ----------------------------------------------------------------\r
  32 // CigarOp\r
  33 \r
  34 /*! \struct BamTools::CigarOp\r
  35     \brief Represents a CIGAR alignment operation.\r
  36 \r
  37     \sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.\r
  38 */\r
  39 struct API_EXPORT CigarOp {\r
  40   \r
  41     char     Type;   //!< CIGAR operation type (MIDNSHP)\r
  42     uint32_t Length; //!< CIGAR operation length (number of bases)\r
  43     \r
  44     //! constructor\r
  45     CigarOp(const char type = '\0', \r
  46             const uint32_t& length = 0)\r
  47         : Type(type)\r
  48         , Length(length) \r
  49     { }\r
  50 };\r
  51 \r
  52 // ----------------------------------------------------------------\r
  53 // RefData\r
  54 \r
  55 /*! \struct BamTools::RefData\r
  56     \brief Represents a reference sequence entry\r
  57 */\r
  58 struct API_EXPORT RefData {\r
  59    \r
  60     std::string RefName;    //!< name of reference sequence\r
  61     int32_t     RefLength;  //!< length of reference sequence\r
  62     \r
  63     //! constructor\r
  64     RefData(const std::string& name = "",\r
  65             const int32_t& length = 0)\r
  66         : RefName(name)\r
  67         , RefLength(length)\r
  68     { }\r
  69 };\r
  70 \r
  71 //! convenience typedef for vector of RefData entries\r
  72 typedef std::vector<RefData> RefVector;\r
  73 \r
  74 // ----------------------------------------------------------------\r
  75 // BamRegion\r
  76 \r
  77 /*! \struct BamTools::BamRegion\r
  78     \brief Represents a sequential genomic region\r
  79 \r
  80     Allowed to span multiple (sequential) references.\r
  81 \r
  82     \warning BamRegion now represents a zero-based, HALF-OPEN interval.\r
  83     In previous versions of BamTools (0.x & 1.x) all intervals were treated\r
  84     as zero-based, CLOSED. I whole-heartedly apologize for any inconsistencies this\r
  85     may have caused if you assumed that BT was always half-open; full aplogies also\r
  86     to those who recognized that BamTools originally used a closed interval, but may\r
  87     need to update their code to reflect this new change.\r
  88 */\r
  89 struct API_EXPORT BamRegion {\r
  90   \r
  91     int LeftRefID;      //!< reference ID for region's left boundary\r
  92     int LeftPosition;   //!< position for region's left boundary\r
  93     int RightRefID;     //!< reference ID for region's right boundary\r
  94     int RightPosition;  //!< position for region's right boundary\r
  95     \r
  96     //! constructor\r
  97     BamRegion(const int& leftID   = -1, \r
  98               const int& leftPos  = -1,\r
  99               const int& rightID  = -1,\r
 100               const int& rightPos = -1)\r
 101         : LeftRefID(leftID)\r
 102         , LeftPosition(leftPos)\r
 103         , RightRefID(rightID)\r
 104         , RightPosition(rightPos)\r
 105     { }\r
 106     \r
 107     //! copy constructor\r
 108     BamRegion(const BamRegion& other)\r
 109         : LeftRefID(other.LeftRefID)\r
 110         , LeftPosition(other.LeftPosition)\r
 111         , RightRefID(other.RightRefID)\r
 112         , RightPosition(other.RightPosition)\r
 113     { }\r
 114     \r
 115     //! Clears region boundaries\r
 116     void clear(void) {\r
 117         LeftRefID  = -1; LeftPosition  = -1;\r
 118         RightRefID = -1; RightPosition = -1;\r
 119     }\r
 120 \r
 121     //! Returns true if region has a left boundary\r
 122     bool isLeftBoundSpecified(void) const {\r
 123         return ( LeftRefID >= 0 && LeftPosition >= 0 );\r
 124     }\r
 125 \r
 126     //! Returns true if region boundaries are not defined\r
 127     bool isNull(void) const {\r
 128         return ( !isLeftBoundSpecified() && !isRightBoundSpecified() );\r
 129     }\r
 130 \r
 131     //! Returns true if region has a right boundary\r
 132     bool isRightBoundSpecified(void) const {\r
 133         return ( RightRefID >= 0 && RightPosition >= 1 );\r
 134     }\r
 135 };\r
 136 \r
 137 // ----------------------------------------------------------------\r
 138 // General utility methods\r
 139 \r
 140 /*! \fn bool FileExists(const std::string& filename)\r
 141     \brief checks if file exists\r
 142 \r
 143     Attempts to open file in a read-only mode.\r
 144 \r
 145     \return \c true if file can be opened successfully\r
 146 */\r
 147 API_EXPORT inline bool FileExists(const std::string& filename) {\r
 148     std::ifstream f(filename.c_str(), std::ifstream::in);\r
 149     return !f.fail();\r
 150 }\r
 151 \r
 152 /*! \fn void SwapEndian_16(int16_t& x)\r
 153     \brief swaps endianness of signed 16-bit integer, in place\r
 154 \r
 155     Swaps endian representation of value in \a x.\r
 156 */\r
 157 API_EXPORT inline void SwapEndian_16(int16_t& x) {\r
 158     x = ((x >> 8) | (x << 8));\r
 159 }\r
 160 \r
 161 /*! \fn void SwapEndian_16(uint16_t& x)\r
 162     \brief swaps endianness of unsigned 16-bit integer, in place\r
 163 \r
 164     Swaps endian representation of value in \a x.\r
 165 */\r
 166 API_EXPORT inline void SwapEndian_16(uint16_t& x) {\r
 167     x = ((x >> 8) | (x << 8));\r
 168 }\r
 169 \r
 170 /*! \fn void SwapEndian_32(int32_t& x)\r
 171     \brief swaps endianness of signed 32-bit integer, in place\r
 172 \r
 173     Swaps endian representation of value in \a x.\r
 174 */\r
 175 API_EXPORT inline void SwapEndian_32(int32_t& x) {\r
 176     x = ( (x >> 24) | \r
 177          ((x << 8) & 0x00FF0000) | \r
 178          ((x >> 8) & 0x0000FF00) | \r
 179           (x << 24)\r
 180         );\r
 181 }\r
 182 \r
 183 /*! \fn void SwapEndian_32(uint32_t& x)\r
 184     \brief swaps endianness of unsigned 32-bit integer, in place\r
 185 \r
 186     Swaps endian representation of value in \a x.\r
 187 */\r
 188 API_EXPORT inline void SwapEndian_32(uint32_t& x) {\r
 189     x = ( (x >> 24) | \r
 190          ((x << 8) & 0x00FF0000) | \r
 191          ((x >> 8) & 0x0000FF00) | \r
 192           (x << 24)\r
 193         );\r
 194 }\r
 195 \r
 196 /*! \fn void SwapEndian_64(int64_t& x)\r
 197     \brief swaps endianness of signed 64-bit integer, in place\r
 198 \r
 199     Swaps endian representation of value in \a x.\r
 200 */\r
 201 API_EXPORT inline void SwapEndian_64(int64_t& x) {\r
 202     x = ( (x >> 56) | \r
 203          ((x << 40) & 0x00FF000000000000ll) |\r
 204          ((x << 24) & 0x0000FF0000000000ll) |\r
 205          ((x << 8)  & 0x000000FF00000000ll) |\r
 206          ((x >> 8)  & 0x00000000FF000000ll) |\r
 207          ((x >> 24) & 0x0000000000FF0000ll) |\r
 208          ((x >> 40) & 0x000000000000FF00ll) |\r
 209           (x << 56)\r
 210         );\r
 211 }\r
 212 \r
 213 /*! \fn void SwapEndian_64(uint64_t& x)\r
 214     \brief swaps endianness of unsigned 64-bit integer, in place\r
 215 \r
 216     Swaps endian representation of value in \a x.\r
 217 */\r
 218 API_EXPORT inline void SwapEndian_64(uint64_t& x) {\r
 219     x = ( (x >> 56) | \r
 220          ((x << 40) & 0x00FF000000000000ll) |\r
 221          ((x << 24) & 0x0000FF0000000000ll) |\r
 222          ((x << 8)  & 0x000000FF00000000ll) |\r
 223          ((x >> 8)  & 0x00000000FF000000ll) |\r
 224          ((x >> 24) & 0x0000000000FF0000ll) |\r
 225          ((x >> 40) & 0x000000000000FF00ll) |\r
 226           (x << 56)\r
 227         );\r
 228 }\r
 229 \r
 230 /*! \fn void SwapEndian_16p(char* data)\r
 231     \brief swaps endianness of the next 2 bytes in a buffer, in place\r
 232 \r
 233     Swaps endian representation the next 2 bytes in \a data.\r
 234 */\r
 235 API_EXPORT inline void SwapEndian_16p(char* data) {\r
 236     uint16_t& value = (uint16_t&)*data; \r
 237     SwapEndian_16(value);\r
 238 }\r
 239 \r
 240 /*! \fn void SwapEndian_32p(char* data)\r
 241     \brief swaps endianness of the next 4 bytes in a buffer, in place\r
 242 \r
 243     Swaps endian representation the next 4 bytes in \a data.\r
 244 */\r
 245 API_EXPORT inline void SwapEndian_32p(char* data) {\r
 246     uint32_t& value = (uint32_t&)*data; \r
 247     SwapEndian_32(value);\r
 248 }\r
 249 \r
 250 /*! \fn void SwapEndian_64p(char* data)\r
 251     \brief swaps endianness of the next 8 bytes in a buffer, in place\r
 252 \r
 253     Swaps endian representation the next 8 bytes in \a data.\r
 254 */\r
 255 API_EXPORT inline void SwapEndian_64p(char* data) {\r
 256     uint64_t& value = (uint64_t&)*data; \r
 257     SwapEndian_64(value);\r
 258 }\r
 259 \r
 260 /*! \fn bool SystemIsBigEndian(void)\r
 261     \brief checks host architecture's byte order\r
 262     \return \c true if system uses big-endian ordering\r
 263 */\r
 264 API_EXPORT inline bool SystemIsBigEndian(void) {\r
 265    const uint16_t one = 0x0001;\r
 266    return ((*(char*) &one) == 0 );\r
 267 }\r
 268 \r
 269 /*! \fn void PackUnsignedInt(char* buffer, unsigned int value)\r
 270     \brief stores unsigned integer value in a byte buffer\r
 271 \r
 272     \param buffer destination buffer\r
 273     \param value  unsigned integer to 'pack' in buffer\r
 274 */\r
 275 API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) {\r
 276     buffer[0] = (char)value;\r
 277     buffer[1] = (char)(value >> 8);\r
 278     buffer[2] = (char)(value >> 16);\r
 279     buffer[3] = (char)(value >> 24);\r
 280 }\r
 281 \r
 282 /*! \fn void PackUnsignedShort(char* buffer, unsigned short value)\r
 283     \brief stores unsigned short integer value in a byte buffer\r
 284 \r
 285     \param buffer destination buffer\r
 286     \param value  unsigned short integer to 'pack' in buffer\r
 287 */\r
 288 API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) {\r
 289     buffer[0] = (char)value;\r
 290     buffer[1] = (char)(value >> 8);\r
 291 }\r
 292 \r
 293 /*! \fn double UnpackDouble(const char* buffer)\r
 294     \brief reads a double value from byte buffer\r
 295 \r
 296     \param buffer source byte buffer\r
 297     \return the (double) value read from the buffer\r
 298 */\r
 299 API_EXPORT inline double UnpackDouble(const char* buffer) {\r
 300     union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
 301     un.value = 0;\r
 302     un.valueBuffer[0] = buffer[0];\r
 303     un.valueBuffer[1] = buffer[1];\r
 304     un.valueBuffer[2] = buffer[2];\r
 305     un.valueBuffer[3] = buffer[3];\r
 306     un.valueBuffer[4] = buffer[4];\r
 307     un.valueBuffer[5] = buffer[5];\r
 308     un.valueBuffer[6] = buffer[6];\r
 309     un.valueBuffer[7] = buffer[7];\r
 310     return un.value;\r
 311 }\r
 312 \r
 313 /*! \fn double UnpackDouble(char* buffer)\r
 314     \brief reads a double value from byte buffer\r
 315 \r
 316     This is an overloaded function.\r
 317 \r
 318     \param buffer source byte buffer\r
 319     \return the (double) value read from the buffer\r
 320 */\r
 321 API_EXPORT inline double UnpackDouble(char* buffer) {\r
 322     return UnpackDouble( (const char*)buffer );\r
 323 }\r
 324 \r
 325 /*! \fn double UnpackFloat(const char* buffer)\r
 326     \brief reads a float value from byte buffer\r
 327 \r
 328     \param buffer source byte buffer\r
 329     \return the (float) value read from the buffer\r
 330 */\r
 331 API_EXPORT inline float UnpackFloat(const char* buffer) {\r
 332     union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
 333     un.value = 0;\r
 334     un.valueBuffer[0] = buffer[0];\r
 335     un.valueBuffer[1] = buffer[1];\r
 336     un.valueBuffer[2] = buffer[2];\r
 337     un.valueBuffer[3] = buffer[3];\r
 338     return un.value;\r
 339 }\r
 340 \r
 341 /*! \fn double UnpackFloat(char* buffer)\r
 342     \brief reads a float value from byte buffer\r
 343 \r
 344     This is an overloaded function.\r
 345 \r
 346     \param buffer source byte buffer\r
 347     \return the (float) value read from the buffer\r
 348 */\r
 349 API_EXPORT inline float UnpackFloat(char* buffer) {\r
 350     return UnpackFloat( (const char*)buffer );\r
 351 }\r
 352 \r
 353 /*! \fn signed int UnpackSignedInt(const char* buffer)\r
 354     \brief reads a signed integer value from byte buffer\r
 355 \r
 356     \param buffer source byte buffer\r
 357     \return the (signed int) value read from the buffer\r
 358 */\r
 359 API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r
 360     union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
 361     un.value = 0;\r
 362     un.valueBuffer[0] = buffer[0];\r
 363     un.valueBuffer[1] = buffer[1];\r
 364     un.valueBuffer[2] = buffer[2];\r
 365     un.valueBuffer[3] = buffer[3];\r
 366     return un.value;\r
 367 }\r
 368 \r
 369 /*! \fn signed int UnpackSignedInt(char* buffer)\r
 370     \brief reads a signed integer value from byte buffer\r
 371 \r
 372     This is an overloaded function.\r
 373 \r
 374     \param buffer source byte buffer\r
 375     \return the (signed int) value read from the buffer\r
 376 */\r
 377 API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r
 378     return UnpackSignedInt( (const char*) buffer );\r
 379 }\r
 380 \r
 381 /*! \fn signed short UnpackSignedShort(const char* buffer)\r
 382     \brief reads a signed short integer value from byte buffer\r
 383 \r
 384     \param buffer source byte buffer\r
 385     \return the (signed short) value read from the buffer\r
 386 */\r
 387 API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r
 388     union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
 389     un.value = 0;\r
 390     un.valueBuffer[0] = buffer[0];\r
 391     un.valueBuffer[1] = buffer[1];\r
 392     return un.value;\r
 393 }\r
 394 \r
 395 /*! \fn signed short UnpackSignedShort(char* buffer)\r
 396     \brief reads a signed short integer value from byte buffer\r
 397 \r
 398     This is an overloaded function.\r
 399 \r
 400     \param buffer source byte buffer\r
 401     \return the (signed short) value read from the buffer\r
 402 */\r
 403 API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r
 404     return UnpackSignedShort( (const char*)buffer );\r
 405 }\r
 406 \r
 407 /*! \fn unsigned int UnpackUnsignedInt(const char* buffer)\r
 408     \brief reads an unsigned integer value from byte buffer\r
 409 \r
 410     \param buffer source byte buffer\r
 411     \return the (unsigned int) value read from the buffer\r
 412 */\r
 413 API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r
 414     union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
 415     un.value = 0;\r
 416     un.valueBuffer[0] = buffer[0];\r
 417     un.valueBuffer[1] = buffer[1];\r
 418     un.valueBuffer[2] = buffer[2];\r
 419     un.valueBuffer[3] = buffer[3];\r
 420     return un.value;\r
 421 }\r
 422 \r
 423 /*! \fn unsigned int UnpackUnsignedInt(char* buffer)\r
 424     \brief reads an unsigned integer value from byte buffer\r
 425 \r
 426     This is an overloaded function.\r
 427 \r
 428     \param buffer source byte buffer\r
 429     \return the (unsigned int) value read from the buffer\r
 430 */\r
 431 API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r
 432     return UnpackUnsignedInt( (const char*)buffer );\r
 433 }\r
 434 \r
 435 /*! \fn unsigned short UnpackUnsignedShort(const char* buffer)\r
 436     \brief reads an unsigned short integer value from byte buffer\r
 437 \r
 438     \param buffer source byte buffer\r
 439     \return the (unsigned short) value read from the buffer\r
 440 */\r
 441 API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r
 442     union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
 443     un.value = 0;\r
 444     un.valueBuffer[0] = buffer[0];\r
 445     un.valueBuffer[1] = buffer[1];\r
 446     return un.value;\r
 447 }\r
 448 \r
 449 /*! \fn unsigned short UnpackUnsignedShort(char* buffer)\r
 450     \brief reads an unsigned short integer value from byte buffer\r
 451 \r
 452     This is an overloaded function.\r
 453 \r
 454     \param buffer source byte buffer\r
 455     \return the (unsigned short) value read from the buffer\r
 456 */\r
 457 API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r
 458     return UnpackUnsignedShort( (const char*)buffer );\r
 459 }\r
 460 \r
 461 // ----------------------------------------------------------------\r
 462 // 'internal' helper structs\r
 463 \r
 464 struct RaiiBuffer {\r
 465     RaiiBuffer(const unsigned int n)\r
 466         : Buffer( new char[n]() )\r
 467     { }\r
 468     ~RaiiBuffer(void) {\r
 469         delete[] Buffer;\r
 470     }\r
 471     char* Buffer;\r
 472 };\r
 473 \r
 474 } // namespace BamTools\r
 475 \r
 476 #endif // BAMAUX_H\r