src/api/BamAux.h

   1 // ***************************************************************************\r
   2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
   3 // Marth Lab, Department of Biology, Boston College\r
   4 // ---------------------------------------------------------------------------\r
   5 // Last modified: 25 October 2011 (DB)\r
   6 // ---------------------------------------------------------------------------\r
   7 // Provides data structures & utility methods that are used throughout the API.\r
   8 // ***************************************************************************\r
   9 \r
  10 #ifndef BAMAUX_H\r
  11 #define BAMAUX_H\r
  12 \r
  13 #include "api/api_global.h"\r
  14 #include <cstring>\r
  15 #include <fstream> \r
  16 #include <iostream>\r
  17 #include <string>\r
  18 #include <vector>\r
  19 \r
  20 /*! \file BamAux.h\r
  21 \r
  22     Provides data structures & utility methods that are used throughout the API.\r
  23 */\r
  24 \r
  25 /*! \namespace BamTools\r
  26     \brief Contains all BamTools classes & methods.\r
  27 \r
  28     The BamTools API contained in this namespace contains classes and methods\r
  29     for reading, writing, and manipulating BAM alignment files.\r
  30 */\r
  31 namespace BamTools {\r
  32 \r
  33 // ----------------------------------------------------------------\r
  34 // CigarOp\r
  35 \r
  36 /*! \struct BamTools::CigarOp\r
  37     \brief Represents a CIGAR alignment operation.\r
  38 \r
  39     \sa \samSpecURL for more details on using CIGAR operations.\r
  40 */\r
  41 struct API_EXPORT CigarOp {\r
  42   \r
  43     char     Type;   //!< CIGAR operation type (MIDNSHPX=)\r
  44     uint32_t Length; //!< CIGAR operation length (number of bases)\r
  45     \r
  46     //! constructor\r
  47     CigarOp(const char type = '\0', \r
  48             const uint32_t& length = 0)\r
  49         : Type(type)\r
  50         , Length(length) \r
  51     { }\r
  52 };\r
  53 \r
  54 // ----------------------------------------------------------------\r
  55 // RefData\r
  56 \r
  57 /*! \struct BamTools::RefData\r
  58     \brief Represents a reference sequence entry\r
  59 */\r
  60 struct API_EXPORT RefData {\r
  61    \r
  62     std::string RefName;    //!< name of reference sequence\r
  63     int32_t     RefLength;  //!< length of reference sequence\r
  64     \r
  65     //! constructor\r
  66     RefData(const std::string& name = "",\r
  67             const int32_t& length = 0)\r
  68         : RefName(name)\r
  69         , RefLength(length)\r
  70     { }\r
  71 };\r
  72 \r
  73 //! convenience typedef for vector of RefData entries\r
  74 typedef std::vector<RefData> RefVector;\r
  75 \r
  76 // ----------------------------------------------------------------\r
  77 // BamRegion\r
  78 \r
  79 /*! \struct BamTools::BamRegion\r
  80     \brief Represents a sequential genomic region\r
  81 \r
  82     Allowed to span multiple (sequential) references.\r
  83 \r
  84     \warning BamRegion now represents a zero-based, HALF-OPEN interval.\r
  85     In previous versions of BamTools (0.x & 1.x) all intervals were treated\r
  86     as zero-based, CLOSED.\r
  87 */\r
  88 struct API_EXPORT BamRegion {\r
  89   \r
  90     int LeftRefID;      //!< reference ID for region's left boundary\r
  91     int LeftPosition;   //!< position for region's left boundary\r
  92     int RightRefID;     //!< reference ID for region's right boundary\r
  93     int RightPosition;  //!< position for region's right boundary\r
  94     \r
  95     //! constructor\r
  96     BamRegion(const int& leftID   = -1, \r
  97               const int& leftPos  = -1,\r
  98               const int& rightID  = -1,\r
  99               const int& rightPos = -1)\r
 100         : LeftRefID(leftID)\r
 101         , LeftPosition(leftPos)\r
 102         , RightRefID(rightID)\r
 103         , RightPosition(rightPos)\r
 104     { }\r
 105     \r
 106     //! copy constructor\r
 107     BamRegion(const BamRegion& other)\r
 108         : LeftRefID(other.LeftRefID)\r
 109         , LeftPosition(other.LeftPosition)\r
 110         , RightRefID(other.RightRefID)\r
 111         , RightPosition(other.RightPosition)\r
 112     { }\r
 113     \r
 114     //! Clears region boundaries\r
 115     void clear(void) {\r
 116         LeftRefID  = -1; LeftPosition  = -1;\r
 117         RightRefID = -1; RightPosition = -1;\r
 118     }\r
 119 \r
 120     //! Returns true if region has a left boundary\r
 121     bool isLeftBoundSpecified(void) const {\r
 122         return ( LeftRefID >= 0 && LeftPosition >= 0 );\r
 123     }\r
 124 \r
 125     //! Returns true if region boundaries are not defined\r
 126     bool isNull(void) const {\r
 127         return ( !isLeftBoundSpecified() && !isRightBoundSpecified() );\r
 128     }\r
 129 \r
 130     //! Returns true if region has a right boundary\r
 131     bool isRightBoundSpecified(void) const {\r
 132         return ( RightRefID >= 0 && RightPosition >= 1 );\r
 133     }\r
 134 };\r
 135 \r
 136 // ----------------------------------------------------------------\r
 137 // General utility methods\r
 138 \r
 139 /*! \fn bool FileExists(const std::string& filename)\r
 140     \brief returns true if the file exists\r
 141 */\r
 142 API_EXPORT inline bool FileExists(const std::string& filename) {\r
 143     std::ifstream f(filename.c_str(), std::ifstream::in);\r
 144     return !f.fail();\r
 145 }\r
 146 \r
 147 /*! \fn void SwapEndian_16(int16_t& x)\r
 148     \brief swaps endianness of signed 16-bit integer, in place\r
 149 */\r
 150 API_EXPORT inline void SwapEndian_16(int16_t& x) {\r
 151     x = ((x >> 8) | (x << 8));\r
 152 }\r
 153 \r
 154 /*! \fn void SwapEndian_16(uint16_t& x)\r
 155     \brief swaps endianness of unsigned 16-bit integer, in place\r
 156 */\r
 157 API_EXPORT inline void SwapEndian_16(uint16_t& x) {\r
 158     x = ((x >> 8) | (x << 8));\r
 159 }\r
 160 \r
 161 /*! \fn void SwapEndian_32(int32_t& x)\r
 162     \brief swaps endianness of signed 32-bit integer, in place\r
 163 */\r
 164 API_EXPORT inline void SwapEndian_32(int32_t& x) {\r
 165     x = ( (x >> 24) | \r
 166          ((x << 8) & 0x00FF0000) | \r
 167          ((x >> 8) & 0x0000FF00) | \r
 168           (x << 24)\r
 169         );\r
 170 }\r
 171 \r
 172 /*! \fn void SwapEndian_32(uint32_t& x)\r
 173     \brief swaps endianness of unsigned 32-bit integer, in place\r
 174 */\r
 175 API_EXPORT inline void SwapEndian_32(uint32_t& x) {\r
 176     x = ( (x >> 24) | \r
 177          ((x << 8) & 0x00FF0000) | \r
 178          ((x >> 8) & 0x0000FF00) | \r
 179           (x << 24)\r
 180         );\r
 181 }\r
 182 \r
 183 /*! \fn void SwapEndian_64(int64_t& x)\r
 184     \brief swaps endianness of signed 64-bit integer, in place\r
 185 */\r
 186 API_EXPORT inline void SwapEndian_64(int64_t& x) {\r
 187     x = ( (x >> 56) | \r
 188          ((x << 40) & 0x00FF000000000000ll) |\r
 189          ((x << 24) & 0x0000FF0000000000ll) |\r
 190          ((x << 8)  & 0x000000FF00000000ll) |\r
 191          ((x >> 8)  & 0x00000000FF000000ll) |\r
 192          ((x >> 24) & 0x0000000000FF0000ll) |\r
 193          ((x >> 40) & 0x000000000000FF00ll) |\r
 194           (x << 56)\r
 195         );\r
 196 }\r
 197 \r
 198 /*! \fn void SwapEndian_64(uint64_t& x)\r
 199     \brief swaps endianness of unsigned 64-bit integer, in place\r
 200 */\r
 201 API_EXPORT inline void SwapEndian_64(uint64_t& x) {\r
 202     x = ( (x >> 56) | \r
 203          ((x << 40) & 0x00FF000000000000ll) |\r
 204          ((x << 24) & 0x0000FF0000000000ll) |\r
 205          ((x << 8)  & 0x000000FF00000000ll) |\r
 206          ((x >> 8)  & 0x00000000FF000000ll) |\r
 207          ((x >> 24) & 0x0000000000FF0000ll) |\r
 208          ((x >> 40) & 0x000000000000FF00ll) |\r
 209           (x << 56)\r
 210         );\r
 211 }\r
 212 \r
 213 /*! \fn void SwapEndian_16p(char* data)\r
 214     \brief swaps endianness of the next 2 bytes in a buffer, in place\r
 215 */\r
 216 API_EXPORT inline void SwapEndian_16p(char* data) {\r
 217     uint16_t& value = (uint16_t&)*data; \r
 218     SwapEndian_16(value);\r
 219 }\r
 220 \r
 221 /*! \fn void SwapEndian_32p(char* data)\r
 222     \brief swaps endianness of the next 4 bytes in a buffer, in place\r
 223 */\r
 224 API_EXPORT inline void SwapEndian_32p(char* data) {\r
 225     uint32_t& value = (uint32_t&)*data; \r
 226     SwapEndian_32(value);\r
 227 }\r
 228 \r
 229 /*! \fn void SwapEndian_64p(char* data)\r
 230     \brief swaps endianness of the next 8 bytes in a buffer, in place\r
 231 */\r
 232 API_EXPORT inline void SwapEndian_64p(char* data) {\r
 233     uint64_t& value = (uint64_t&)*data; \r
 234     SwapEndian_64(value);\r
 235 }\r
 236 \r
 237 /*! \fn bool SystemIsBigEndian(void)\r
 238     \brief checks host architecture's byte order\r
 239     \return \c true if system uses big-endian ordering\r
 240 */\r
 241 API_EXPORT inline bool SystemIsBigEndian(void) {\r
 242    const uint16_t one = 0x0001;\r
 243    return ((*(char*) &one) == 0 );\r
 244 }\r
 245 \r
 246 /*! \fn void PackUnsignedInt(char* buffer, unsigned int value)\r
 247     \brief stores unsigned integer value in a byte buffer\r
 248 \r
 249     \param[out] buffer destination buffer\r
 250     \param[in]  value  value to 'pack' in buffer\r
 251 */\r
 252 API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) {\r
 253     buffer[0] = (char)value;\r
 254     buffer[1] = (char)(value >> 8);\r
 255     buffer[2] = (char)(value >> 16);\r
 256     buffer[3] = (char)(value >> 24);\r
 257 }\r
 258 \r
 259 /*! \fn void PackUnsignedShort(char* buffer, unsigned short value)\r
 260     \brief stores unsigned short integer value in a byte buffer\r
 261 \r
 262     \param[out] buffer destination buffer\r
 263     \param[in]  value  value to 'pack' in buffer\r
 264 */\r
 265 API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) {\r
 266     buffer[0] = (char)value;\r
 267     buffer[1] = (char)(value >> 8);\r
 268 }\r
 269 \r
 270 /*! \fn double UnpackDouble(const char* buffer)\r
 271     \brief reads a double value from byte buffer\r
 272 \r
 273     \param[in] buffer source byte buffer\r
 274     \return the (double) value read from the buffer\r
 275 */\r
 276 API_EXPORT inline double UnpackDouble(const char* buffer) {\r
 277     union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
 278     un.value = 0;\r
 279     un.valueBuffer[0] = buffer[0];\r
 280     un.valueBuffer[1] = buffer[1];\r
 281     un.valueBuffer[2] = buffer[2];\r
 282     un.valueBuffer[3] = buffer[3];\r
 283     un.valueBuffer[4] = buffer[4];\r
 284     un.valueBuffer[5] = buffer[5];\r
 285     un.valueBuffer[6] = buffer[6];\r
 286     un.valueBuffer[7] = buffer[7];\r
 287     return un.value;\r
 288 }\r
 289 \r
 290 /*! \fn double UnpackDouble(char* buffer)\r
 291     \brief reads a double value from byte buffer\r
 292 \r
 293     This is an overloaded function.\r
 294 \r
 295     \param[in] buffer source byte buffer\r
 296     \return the (double) value read from the buffer\r
 297 */\r
 298 API_EXPORT inline double UnpackDouble(char* buffer) {\r
 299     return UnpackDouble( (const char*)buffer );\r
 300 }\r
 301 \r
 302 /*! \fn double UnpackFloat(const char* buffer)\r
 303     \brief reads a float value from byte buffer\r
 304 \r
 305     \param[in] buffer source byte buffer\r
 306     \return the (float) value read from the buffer\r
 307 */\r
 308 API_EXPORT inline float UnpackFloat(const char* buffer) {\r
 309     union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
 310     un.value = 0;\r
 311     un.valueBuffer[0] = buffer[0];\r
 312     un.valueBuffer[1] = buffer[1];\r
 313     un.valueBuffer[2] = buffer[2];\r
 314     un.valueBuffer[3] = buffer[3];\r
 315     return un.value;\r
 316 }\r
 317 \r
 318 /*! \fn double UnpackFloat(char* buffer)\r
 319     \brief reads a float value from byte buffer\r
 320 \r
 321     This is an overloaded function.\r
 322 \r
 323     \param[in] buffer source byte buffer\r
 324     \return the (float) value read from the buffer\r
 325 */\r
 326 API_EXPORT inline float UnpackFloat(char* buffer) {\r
 327     return UnpackFloat( (const char*)buffer );\r
 328 }\r
 329 \r
 330 /*! \fn signed int UnpackSignedInt(const char* buffer)\r
 331     \brief reads a signed integer value from byte buffer\r
 332 \r
 333     \param[in] buffer source byte buffer\r
 334     \return the (signed int) value read from the buffer\r
 335 */\r
 336 API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r
 337     union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
 338     un.value = 0;\r
 339     un.valueBuffer[0] = buffer[0];\r
 340     un.valueBuffer[1] = buffer[1];\r
 341     un.valueBuffer[2] = buffer[2];\r
 342     un.valueBuffer[3] = buffer[3];\r
 343     return un.value;\r
 344 }\r
 345 \r
 346 /*! \fn signed int UnpackSignedInt(char* buffer)\r
 347     \brief reads a signed integer value from byte buffer\r
 348 \r
 349     This is an overloaded function.\r
 350 \r
 351     \param[in] buffer source byte buffer\r
 352     \return the (signed int) value read from the buffer\r
 353 */\r
 354 API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r
 355     return UnpackSignedInt( (const char*) buffer );\r
 356 }\r
 357 \r
 358 /*! \fn signed short UnpackSignedShort(const char* buffer)\r
 359     \brief reads a signed short integer value from byte buffer\r
 360 \r
 361     \param[in] buffer source byte buffer\r
 362     \return the (signed short) value read from the buffer\r
 363 */\r
 364 API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r
 365     union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
 366     un.value = 0;\r
 367     un.valueBuffer[0] = buffer[0];\r
 368     un.valueBuffer[1] = buffer[1];\r
 369     return un.value;\r
 370 }\r
 371 \r
 372 /*! \fn signed short UnpackSignedShort(char* buffer)\r
 373     \brief reads a signed short integer value from byte buffer\r
 374 \r
 375     This is an overloaded function.\r
 376 \r
 377     \param[in] buffer source byte buffer\r
 378     \return the (signed short) value read from the buffer\r
 379 */\r
 380 API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r
 381     return UnpackSignedShort( (const char*)buffer );\r
 382 }\r
 383 \r
 384 /*! \fn unsigned int UnpackUnsignedInt(const char* buffer)\r
 385     \brief reads an unsigned integer value from byte buffer\r
 386 \r
 387     \param[in] buffer source byte buffer\r
 388     \return the (unsigned int) value read from the buffer\r
 389 */\r
 390 API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r
 391     union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
 392     un.value = 0;\r
 393     un.valueBuffer[0] = buffer[0];\r
 394     un.valueBuffer[1] = buffer[1];\r
 395     un.valueBuffer[2] = buffer[2];\r
 396     un.valueBuffer[3] = buffer[3];\r
 397     return un.value;\r
 398 }\r
 399 \r
 400 /*! \fn unsigned int UnpackUnsignedInt(char* buffer)\r
 401     \brief reads an unsigned integer value from byte buffer\r
 402 \r
 403     This is an overloaded function.\r
 404 \r
 405     \param[in] buffer source byte buffer\r
 406     \return the (unsigned int) value read from the buffer\r
 407 */\r
 408 API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r
 409     return UnpackUnsignedInt( (const char*)buffer );\r
 410 }\r
 411 \r
 412 /*! \fn unsigned short UnpackUnsignedShort(const char* buffer)\r
 413     \brief reads an unsigned short integer value from byte buffer\r
 414 \r
 415     \param[in] buffer source byte buffer\r
 416     \return the (unsigned short) value read from the buffer\r
 417 */\r
 418 API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r
 419     union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
 420     un.value = 0;\r
 421     un.valueBuffer[0] = buffer[0];\r
 422     un.valueBuffer[1] = buffer[1];\r
 423     return un.value;\r
 424 }\r
 425 \r
 426 /*! \fn unsigned short UnpackUnsignedShort(char* buffer)\r
 427     \brief reads an unsigned short integer value from byte buffer\r
 428 \r
 429     This is an overloaded function.\r
 430 \r
 431     \param[in] buffer source byte buffer\r
 432     \return the (unsigned short) value read from the buffer\r
 433 */\r
 434 API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r
 435     return UnpackUnsignedShort( (const char*)buffer );\r
 436 }\r
 437 \r
 438 // ----------------------------------------------------------------\r
 439 // 'internal' helper structs\r
 440 \r
 441 /*! \struct RaiiBuffer\r
 442     \internal\r
 443 */\r
 444 struct RaiiBuffer {\r
 445 \r
 446     // data members\r
 447     char* Buffer;\r
 448     const size_t NumBytes;\r
 449 \r
 450     // ctor & dtor\r
 451     RaiiBuffer(const size_t n)\r
 452         : Buffer( new char[n]() )\r
 453         , NumBytes(n)\r
 454     { }\r
 455 \r
 456     ~RaiiBuffer(void) {\r
 457         delete[] Buffer;\r
 458     }\r
 459 \r
 460     // add'l methods\r
 461     void Clear(void) {\r
 462         memset(Buffer, 0, NumBytes);\r
 463     }\r
 464 };\r
 465 \r
 466 } // namespace BamTools\r
 467 \r
 468 #endif // BAMAUX_H\r