src/api/BamAux.h

   1 // ***************************************************************************\r
   2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg\r
   3 // Marth Lab, Department of Biology, Boston College\r
   4 // All rights reserved.\r
   5 // ---------------------------------------------------------------------------\r
   6 // Last modified: 4 March 2011 (DB)\r
   7 // ---------------------------------------------------------------------------\r
   8 // Provides data structures & utility methods that are used throughout the API.\r
   9 // ***************************************************************************\r
  10 \r
  11 #ifndef BAMAUX_H\r
  12 #define BAMAUX_H\r
  13 \r
  14 #include <api/api_global.h>\r
  15 #include <fstream> \r
  16 #include <iostream>\r
  17 #include <string>\r
  18 #include <vector>\r
  19 \r
  20 /*! \file BamAux.h\r
  21 \r
  22     Provides data structures & utility methods that are used throughout the API.\r
  23 */\r
  24 /*! \namespace BamTools\r
  25     \brief Contains all BamTools classes & methods.\r
  26 \r
  27     The BamTools API contained in this namespace contains classes and methods\r
  28     for reading, writing, and manipulating BAM alignment files.\r
  29 */\r
  30 namespace BamTools {\r
  31 \r
  32 // ----------------------------------------------------------------\r
  33 // CigarOp\r
  34 \r
  35 /*! \struct BamTools::CigarOp\r
  36     \brief Represents a CIGAR alignment operation.\r
  37 \r
  38     \sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.\r
  39 */\r
  40 struct API_EXPORT CigarOp {\r
  41   \r
  42     char     Type;   //!< CIGAR operation type (MIDNSHP)\r
  43     uint32_t Length; //!< CIGAR operation length (number of bases)\r
  44     \r
  45     //! constructor\r
  46     CigarOp(const char type = '\0', \r
  47             const uint32_t& length = 0)\r
  48         : Type(type)\r
  49         , Length(length) \r
  50     { }\r
  51 };\r
  52 \r
  53 // ----------------------------------------------------------------\r
  54 // RefData\r
  55 \r
  56 /*! \struct BamTools::RefData\r
  57     \brief Represents a reference sequence entry\r
  58 */\r
  59 struct API_EXPORT RefData {\r
  60    \r
  61     std::string RefName;    //!< name of reference sequence\r
  62     int32_t     RefLength;  //!< length of reference sequence\r
  63     \r
  64     //! constructor\r
  65     RefData(const std::string& name = "",\r
  66             const int32_t& length = 0)\r
  67         : RefName(name)\r
  68         , RefLength(length)\r
  69     { }\r
  70 };\r
  71 \r
  72 //! convenience typedef for vector of RefData entries\r
  73 typedef std::vector<RefData> RefVector;\r
  74 \r
  75 // ----------------------------------------------------------------\r
  76 // BamRegion\r
  77 \r
  78 /*! \struct BamTools::BamRegion\r
  79     \brief Represents a sequential genomic region\r
  80 \r
  81     Allowed to span multiple (sequential) references.\r
  82 */\r
  83 struct API_EXPORT BamRegion {\r
  84   \r
  85     int LeftRefID;      //!< reference ID for region's left boundary\r
  86     int LeftPosition;   //!< position for region's left boundary\r
  87     int RightRefID;     //!< reference ID for region's right boundary\r
  88     int RightPosition;  //!< position for region's right boundary\r
  89     \r
  90     //! constructor\r
  91     BamRegion(const int& leftID   = -1, \r
  92               const int& leftPos  = -1,\r
  93               const int& rightID  = -1,\r
  94               const int& rightPos = -1)\r
  95         : LeftRefID(leftID)\r
  96         , LeftPosition(leftPos)\r
  97         , RightRefID(rightID)\r
  98         , RightPosition(rightPos)\r
  99     { }\r
 100     \r
 101     //! copy constructor\r
 102     BamRegion(const BamRegion& other)\r
 103         : LeftRefID(other.LeftRefID)\r
 104         , LeftPosition(other.LeftPosition)\r
 105         , RightRefID(other.RightRefID)\r
 106         , RightPosition(other.RightPosition)\r
 107     { }\r
 108     \r
 109     //! Clears region boundaries\r
 110     void clear(void) {\r
 111         LeftRefID  = -1; LeftPosition  = -1;\r
 112         RightRefID = -1; RightPosition = -1;\r
 113     }\r
 114 \r
 115     //! Returns true if region has a left boundary\r
 116     bool isLeftBoundSpecified(void) const {\r
 117         return ( LeftRefID >= 0 && LeftPosition >= 0 );\r
 118     }\r
 119 \r
 120     //! Returns true if region boundaries are not defined\r
 121     bool isNull(void) const {\r
 122         return ( !isLeftBoundSpecified() && !isRightBoundSpecified() );\r
 123     }\r
 124 \r
 125     //! Returns true if region has a right boundary\r
 126     bool isRightBoundSpecified(void) const {\r
 127         return ( RightRefID >= 0 && RightPosition >= 0 );\r
 128     }\r
 129 };\r
 130 \r
 131 // ----------------------------------------------------------------\r
 132 // General utility methods\r
 133 \r
 134 /*! \fn bool FileExists(const std::string& filename)\r
 135     \brief checks if file exists\r
 136 \r
 137     Attempts to open file in a read-only mode.\r
 138 \r
 139     \return \c true if file can be opened successfully\r
 140 */\r
 141 API_EXPORT inline bool FileExists(const std::string& filename) {\r
 142     std::ifstream f(filename.c_str(), std::ifstream::in);\r
 143     return !f.fail();\r
 144 }\r
 145 \r
 146 /*! \fn void SwapEndian_16(int16_t& x)\r
 147     \brief swaps endianness of signed 16-bit integer, in place\r
 148 \r
 149     Swaps endian representation of value in \a x.\r
 150 */\r
 151 API_EXPORT inline void SwapEndian_16(int16_t& x) {\r
 152     x = ((x >> 8) | (x << 8));\r
 153 }\r
 154 \r
 155 /*! \fn void SwapEndian_16(uint16_t& x)\r
 156     \brief swaps endianness of unsigned 16-bit integer, in place\r
 157 \r
 158     Swaps endian representation of value in \a x.\r
 159 */\r
 160 API_EXPORT inline void SwapEndian_16(uint16_t& x) {\r
 161     x = ((x >> 8) | (x << 8));\r
 162 }\r
 163 \r
 164 /*! \fn void SwapEndian_32(int32_t& x)\r
 165     \brief swaps endianness of signed 32-bit integer, in place\r
 166 \r
 167     Swaps endian representation of value in \a x.\r
 168 */\r
 169 API_EXPORT inline void SwapEndian_32(int32_t& x) {\r
 170     x = ( (x >> 24) | \r
 171          ((x << 8) & 0x00FF0000) | \r
 172          ((x >> 8) & 0x0000FF00) | \r
 173           (x << 24)\r
 174         );\r
 175 }\r
 176 \r
 177 /*! \fn void SwapEndian_32(uint32_t& x)\r
 178     \brief swaps endianness of unsigned 32-bit integer, in place\r
 179 \r
 180     Swaps endian representation of value in \a x.\r
 181 */\r
 182 API_EXPORT inline void SwapEndian_32(uint32_t& x) {\r
 183     x = ( (x >> 24) | \r
 184          ((x << 8) & 0x00FF0000) | \r
 185          ((x >> 8) & 0x0000FF00) | \r
 186           (x << 24)\r
 187         );\r
 188 }\r
 189 \r
 190 /*! \fn void SwapEndian_64(int64_t& x)\r
 191     \brief swaps endianness of signed 64-bit integer, in place\r
 192 \r
 193     Swaps endian representation of value in \a x.\r
 194 */\r
 195 API_EXPORT inline void SwapEndian_64(int64_t& x) {\r
 196     x = ( (x >> 56) | \r
 197          ((x << 40) & 0x00FF000000000000ll) |\r
 198          ((x << 24) & 0x0000FF0000000000ll) |\r
 199          ((x << 8)  & 0x000000FF00000000ll) |\r
 200          ((x >> 8)  & 0x00000000FF000000ll) |\r
 201          ((x >> 24) & 0x0000000000FF0000ll) |\r
 202          ((x >> 40) & 0x000000000000FF00ll) |\r
 203           (x << 56)\r
 204         );\r
 205 }\r
 206 \r
 207 /*! \fn void SwapEndian_64(uint64_t& x)\r
 208     \brief swaps endianness of unsigned 64-bit integer, in place\r
 209 \r
 210     Swaps endian representation of value in \a x.\r
 211 */\r
 212 API_EXPORT inline void SwapEndian_64(uint64_t& x) {\r
 213     x = ( (x >> 56) | \r
 214          ((x << 40) & 0x00FF000000000000ll) |\r
 215          ((x << 24) & 0x0000FF0000000000ll) |\r
 216          ((x << 8)  & 0x000000FF00000000ll) |\r
 217          ((x >> 8)  & 0x00000000FF000000ll) |\r
 218          ((x >> 24) & 0x0000000000FF0000ll) |\r
 219          ((x >> 40) & 0x000000000000FF00ll) |\r
 220           (x << 56)\r
 221         );\r
 222 }\r
 223 \r
 224 /*! \fn void SwapEndian_16p(char* data)\r
 225     \brief swaps endianness of the next 2 bytes in a buffer, in place\r
 226 \r
 227     Swaps endian representation the next 2 bytes in \a data.\r
 228 */\r
 229 API_EXPORT inline void SwapEndian_16p(char* data) {\r
 230     uint16_t& value = (uint16_t&)*data; \r
 231     SwapEndian_16(value);\r
 232 }\r
 233 \r
 234 /*! \fn void SwapEndian_32p(char* data)\r
 235     \brief swaps endianness of the next 4 bytes in a buffer, in place\r
 236 \r
 237     Swaps endian representation the next 4 bytes in \a data.\r
 238 */\r
 239 API_EXPORT inline void SwapEndian_32p(char* data) {\r
 240     uint32_t& value = (uint32_t&)*data; \r
 241     SwapEndian_32(value);\r
 242 }\r
 243 \r
 244 /*! \fn void SwapEndian_64p(char* data)\r
 245     \brief swaps endianness of the next 8 bytes in a buffer, in place\r
 246 \r
 247     Swaps endian representation the next 8 bytes in \a data.\r
 248 */\r
 249 API_EXPORT inline void SwapEndian_64p(char* data) {\r
 250     uint64_t& value = (uint64_t&)*data; \r
 251     SwapEndian_64(value);\r
 252 }\r
 253 \r
 254 /*! \fn bool SystemIsBigEndian(void)\r
 255     \brief checks host architecture's byte order\r
 256     \return \c true if system uses big-endian ordering\r
 257 */\r
 258 API_EXPORT inline bool SystemIsBigEndian(void) {\r
 259    const uint16_t one = 0x0001;\r
 260    return ((*(char*) &one) == 0 );\r
 261 }\r
 262 \r
 263 /*! \fn void PackUnsignedInt(char* buffer, unsigned int value)\r
 264     \brief stores unsigned integer value in a byte buffer\r
 265 \r
 266     \param buffer destination buffer\r
 267     \param value  unsigned integer to 'pack' in buffer\r
 268 */\r
 269 API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) {\r
 270     buffer[0] = (char)value;\r
 271     buffer[1] = (char)(value >> 8);\r
 272     buffer[2] = (char)(value >> 16);\r
 273     buffer[3] = (char)(value >> 24);\r
 274 }\r
 275 \r
 276 /*! \fn void PackUnsignedShort(char* buffer, unsigned short value)\r
 277     \brief stores unsigned short integer value in a byte buffer\r
 278 \r
 279     \param buffer destination buffer\r
 280     \param value  unsigned short integer to 'pack' in buffer\r
 281 */\r
 282 API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) {\r
 283     buffer[0] = (char)value;\r
 284     buffer[1] = (char)(value >> 8);\r
 285 }\r
 286 \r
 287 /*! \fn double UnpackDouble(const char* buffer)\r
 288     \brief reads a double value from byte buffer\r
 289 \r
 290     \param buffer source byte buffer\r
 291     \return the (double) value read from the buffer\r
 292 */\r
 293 API_EXPORT inline double UnpackDouble(const char* buffer) {\r
 294     union { double value; unsigned char valueBuffer[sizeof(double)]; } un;\r
 295     un.value = 0;\r
 296     un.valueBuffer[0] = buffer[0];\r
 297     un.valueBuffer[1] = buffer[1];\r
 298     un.valueBuffer[2] = buffer[2];\r
 299     un.valueBuffer[3] = buffer[3];\r
 300     un.valueBuffer[4] = buffer[4];\r
 301     un.valueBuffer[5] = buffer[5];\r
 302     un.valueBuffer[6] = buffer[6];\r
 303     un.valueBuffer[7] = buffer[7];\r
 304     return un.value;\r
 305 }\r
 306 \r
 307 /*! \fn double UnpackDouble(char* buffer)\r
 308     \brief reads a double value from byte buffer\r
 309 \r
 310     This is an overloaded function.\r
 311 \r
 312     \param buffer source byte buffer\r
 313     \return the (double) value read from the buffer\r
 314 */\r
 315 API_EXPORT inline double UnpackDouble(char* buffer) {\r
 316     return UnpackDouble( (const char*)buffer );\r
 317 }\r
 318 \r
 319 /*! \fn double UnpackFloat(const char* buffer)\r
 320     \brief reads a float value from byte buffer\r
 321 \r
 322     \param buffer source byte buffer\r
 323     \return the (float) value read from the buffer\r
 324 */\r
 325 API_EXPORT inline float UnpackFloat(const char* buffer) {\r
 326     union { float value; unsigned char valueBuffer[sizeof(float)]; } un;\r
 327     un.value = 0;\r
 328     un.valueBuffer[0] = buffer[0];\r
 329     un.valueBuffer[1] = buffer[1];\r
 330     un.valueBuffer[2] = buffer[2];\r
 331     un.valueBuffer[3] = buffer[3];\r
 332     return un.value;\r
 333 }\r
 334 \r
 335 /*! \fn double UnpackFloat(char* buffer)\r
 336     \brief reads a float value from byte buffer\r
 337 \r
 338     This is an overloaded function.\r
 339 \r
 340     \param buffer source byte buffer\r
 341     \return the (float) value read from the buffer\r
 342 */\r
 343 API_EXPORT inline float UnpackFloat(char* buffer) {\r
 344     return UnpackFloat( (const char*)buffer );\r
 345 }\r
 346 \r
 347 /*! \fn signed int UnpackSignedInt(const char* buffer)\r
 348     \brief reads a signed integer value from byte buffer\r
 349 \r
 350     \param buffer source byte buffer\r
 351     \return the (signed int) value read from the buffer\r
 352 */\r
 353 API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r
 354     union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r
 355     un.value = 0;\r
 356     un.valueBuffer[0] = buffer[0];\r
 357     un.valueBuffer[1] = buffer[1];\r
 358     un.valueBuffer[2] = buffer[2];\r
 359     un.valueBuffer[3] = buffer[3];\r
 360     return un.value;\r
 361 }\r
 362 \r
 363 /*! \fn signed int UnpackSignedInt(char* buffer)\r
 364     \brief reads a signed integer value from byte buffer\r
 365 \r
 366     This is an overloaded function.\r
 367 \r
 368     \param buffer source byte buffer\r
 369     \return the (signed int) value read from the buffer\r
 370 */\r
 371 API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r
 372     return UnpackSignedInt( (const char*) buffer );\r
 373 }\r
 374 \r
 375 /*! \fn signed short UnpackSignedShort(const char* buffer)\r
 376     \brief reads a signed short integer value from byte buffer\r
 377 \r
 378     \param buffer source byte buffer\r
 379     \return the (signed short) value read from the buffer\r
 380 */\r
 381 API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r
 382     union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r
 383     un.value = 0;\r
 384     un.valueBuffer[0] = buffer[0];\r
 385     un.valueBuffer[1] = buffer[1];\r
 386     return un.value;\r
 387 }\r
 388 \r
 389 /*! \fn signed short UnpackSignedShort(char* buffer)\r
 390     \brief reads a signed short integer value from byte buffer\r
 391 \r
 392     This is an overloaded function.\r
 393 \r
 394     \param buffer source byte buffer\r
 395     \return the (signed short) value read from the buffer\r
 396 */\r
 397 API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r
 398     return UnpackSignedShort( (const char*)buffer );\r
 399 }\r
 400 \r
 401 /*! \fn unsigned int UnpackUnsignedInt(const char* buffer)\r
 402     \brief reads an unsigned integer value from byte buffer\r
 403 \r
 404     \param buffer source byte buffer\r
 405     \return the (unsigned int) value read from the buffer\r
 406 */\r
 407 API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r
 408     union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r
 409     un.value = 0;\r
 410     un.valueBuffer[0] = buffer[0];\r
 411     un.valueBuffer[1] = buffer[1];\r
 412     un.valueBuffer[2] = buffer[2];\r
 413     un.valueBuffer[3] = buffer[3];\r
 414     return un.value;\r
 415 }\r
 416 \r
 417 /*! \fn unsigned int UnpackUnsignedInt(char* buffer)\r
 418     \brief reads an unsigned integer value from byte buffer\r
 419 \r
 420     This is an overloaded function.\r
 421 \r
 422     \param buffer source byte buffer\r
 423     \return the (unsigned int) value read from the buffer\r
 424 */\r
 425 API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r
 426     return UnpackUnsignedInt( (const char*)buffer );\r
 427 }\r
 428 \r
 429 /*! \fn unsigned short UnpackUnsignedShort(const char* buffer)\r
 430     \brief reads an unsigned short integer value from byte buffer\r
 431 \r
 432     \param buffer source byte buffer\r
 433     \return the (unsigned short) value read from the buffer\r
 434 */\r
 435 API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r
 436     union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r
 437     un.value = 0;\r
 438     un.valueBuffer[0] = buffer[0];\r
 439     un.valueBuffer[1] = buffer[1];\r
 440     return un.value;\r
 441 }\r
 442 \r
 443 /*! \fn unsigned short UnpackUnsignedShort(char* buffer)\r
 444     \brief reads an unsigned short integer value from byte buffer\r
 445 \r
 446     This is an overloaded function.\r
 447 \r
 448     \param buffer source byte buffer\r
 449     \return the (unsigned short) value read from the buffer\r
 450 */\r
 451 API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r
 452     return UnpackUnsignedShort( (const char*)buffer );\r
 453 }\r
 454 \r
 455 } // namespace BamTools\r
 456 \r
 457 #endif // BAMAUX_H\r