1 // ***************************************************************************
\r
2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // ---------------------------------------------------------------------------
\r
5 // Last modified: 8 October 2011 (DB)
\r
6 // ---------------------------------------------------------------------------
\r
7 // Provides data structures & utility methods that are used throughout the API.
\r
8 // ***************************************************************************
\r
13 #include <api/api_global.h>
\r
21 Provides data structures & utility methods that are used throughout the API.
\r
23 /*! \namespace BamTools
\r
24 \brief Contains all BamTools classes & methods.
\r
26 The BamTools API contained in this namespace contains classes and methods
\r
27 for reading, writing, and manipulating BAM alignment files.
\r
29 namespace BamTools {
\r
31 // ----------------------------------------------------------------
\r
34 /*! \struct BamTools::CigarOp
\r
35 \brief Represents a CIGAR alignment operation.
\r
37 \sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.
\r
39 struct API_EXPORT CigarOp {
\r
41 char Type; //!< CIGAR operation type (MIDNSHP)
\r
42 uint32_t Length; //!< CIGAR operation length (number of bases)
\r
45 CigarOp(const char type = '\0',
\r
46 const uint32_t& length = 0)
\r
52 // ----------------------------------------------------------------
\r
55 /*! \struct BamTools::RefData
\r
56 \brief Represents a reference sequence entry
\r
58 struct API_EXPORT RefData {
\r
60 std::string RefName; //!< name of reference sequence
\r
61 int32_t RefLength; //!< length of reference sequence
\r
64 RefData(const std::string& name = "",
\r
65 const int32_t& length = 0)
\r
71 //! convenience typedef for vector of RefData entries
\r
72 typedef std::vector<RefData> RefVector;
\r
74 // ----------------------------------------------------------------
\r
77 /*! \struct BamTools::BamRegion
\r
78 \brief Represents a sequential genomic region
\r
80 Allowed to span multiple (sequential) references.
\r
82 \warning BamRegion now represents a zero-based, HALF-OPEN interval.
\r
83 In previous versions of BamTools (0.x & 1.x) all intervals were treated
\r
84 as zero-based, CLOSED. I whole-heartedly apologize for any inconsistencies this
\r
85 may have caused if you assumed that BT was always half-open; full aplogies also
\r
86 to those who recognized that BamTools originally used a closed interval, but may
\r
87 need to update their code to reflect this new change.
\r
89 struct API_EXPORT BamRegion {
\r
91 int LeftRefID; //!< reference ID for region's left boundary
\r
92 int LeftPosition; //!< position for region's left boundary
\r
93 int RightRefID; //!< reference ID for region's right boundary
\r
94 int RightPosition; //!< position for region's right boundary
\r
97 BamRegion(const int& leftID = -1,
\r
98 const int& leftPos = -1,
\r
99 const int& rightID = -1,
\r
100 const int& rightPos = -1)
\r
101 : LeftRefID(leftID)
\r
102 , LeftPosition(leftPos)
\r
103 , RightRefID(rightID)
\r
104 , RightPosition(rightPos)
\r
107 //! copy constructor
\r
108 BamRegion(const BamRegion& other)
\r
109 : LeftRefID(other.LeftRefID)
\r
110 , LeftPosition(other.LeftPosition)
\r
111 , RightRefID(other.RightRefID)
\r
112 , RightPosition(other.RightPosition)
\r
115 //! Clears region boundaries
\r
117 LeftRefID = -1; LeftPosition = -1;
\r
118 RightRefID = -1; RightPosition = -1;
\r
121 //! Returns true if region has a left boundary
\r
122 bool isLeftBoundSpecified(void) const {
\r
123 return ( LeftRefID >= 0 && LeftPosition >= 0 );
\r
126 //! Returns true if region boundaries are not defined
\r
127 bool isNull(void) const {
\r
128 return ( !isLeftBoundSpecified() && !isRightBoundSpecified() );
\r
131 //! Returns true if region has a right boundary
\r
132 bool isRightBoundSpecified(void) const {
\r
133 return ( RightRefID >= 0 && RightPosition >= 1 );
\r
137 // ----------------------------------------------------------------
\r
138 // General utility methods
\r
140 /*! \fn bool FileExists(const std::string& filename)
\r
141 \brief checks if file exists
\r
143 Attempts to open file in a read-only mode.
\r
145 \return \c true if file can be opened successfully
\r
147 API_EXPORT inline bool FileExists(const std::string& filename) {
\r
148 std::ifstream f(filename.c_str(), std::ifstream::in);
\r
152 /*! \fn void SwapEndian_16(int16_t& x)
\r
153 \brief swaps endianness of signed 16-bit integer, in place
\r
155 Swaps endian representation of value in \a x.
\r
157 API_EXPORT inline void SwapEndian_16(int16_t& x) {
\r
158 x = ((x >> 8) | (x << 8));
\r
161 /*! \fn void SwapEndian_16(uint16_t& x)
\r
162 \brief swaps endianness of unsigned 16-bit integer, in place
\r
164 Swaps endian representation of value in \a x.
\r
166 API_EXPORT inline void SwapEndian_16(uint16_t& x) {
\r
167 x = ((x >> 8) | (x << 8));
\r
170 /*! \fn void SwapEndian_32(int32_t& x)
\r
171 \brief swaps endianness of signed 32-bit integer, in place
\r
173 Swaps endian representation of value in \a x.
\r
175 API_EXPORT inline void SwapEndian_32(int32_t& x) {
\r
177 ((x << 8) & 0x00FF0000) |
\r
178 ((x >> 8) & 0x0000FF00) |
\r
183 /*! \fn void SwapEndian_32(uint32_t& x)
\r
184 \brief swaps endianness of unsigned 32-bit integer, in place
\r
186 Swaps endian representation of value in \a x.
\r
188 API_EXPORT inline void SwapEndian_32(uint32_t& x) {
\r
190 ((x << 8) & 0x00FF0000) |
\r
191 ((x >> 8) & 0x0000FF00) |
\r
196 /*! \fn void SwapEndian_64(int64_t& x)
\r
197 \brief swaps endianness of signed 64-bit integer, in place
\r
199 Swaps endian representation of value in \a x.
\r
201 API_EXPORT inline void SwapEndian_64(int64_t& x) {
\r
203 ((x << 40) & 0x00FF000000000000ll) |
\r
204 ((x << 24) & 0x0000FF0000000000ll) |
\r
205 ((x << 8) & 0x000000FF00000000ll) |
\r
206 ((x >> 8) & 0x00000000FF000000ll) |
\r
207 ((x >> 24) & 0x0000000000FF0000ll) |
\r
208 ((x >> 40) & 0x000000000000FF00ll) |
\r
213 /*! \fn void SwapEndian_64(uint64_t& x)
\r
214 \brief swaps endianness of unsigned 64-bit integer, in place
\r
216 Swaps endian representation of value in \a x.
\r
218 API_EXPORT inline void SwapEndian_64(uint64_t& x) {
\r
220 ((x << 40) & 0x00FF000000000000ll) |
\r
221 ((x << 24) & 0x0000FF0000000000ll) |
\r
222 ((x << 8) & 0x000000FF00000000ll) |
\r
223 ((x >> 8) & 0x00000000FF000000ll) |
\r
224 ((x >> 24) & 0x0000000000FF0000ll) |
\r
225 ((x >> 40) & 0x000000000000FF00ll) |
\r
230 /*! \fn void SwapEndian_16p(char* data)
\r
231 \brief swaps endianness of the next 2 bytes in a buffer, in place
\r
233 Swaps endian representation the next 2 bytes in \a data.
\r
235 API_EXPORT inline void SwapEndian_16p(char* data) {
\r
236 uint16_t& value = (uint16_t&)*data;
\r
237 SwapEndian_16(value);
\r
240 /*! \fn void SwapEndian_32p(char* data)
\r
241 \brief swaps endianness of the next 4 bytes in a buffer, in place
\r
243 Swaps endian representation the next 4 bytes in \a data.
\r
245 API_EXPORT inline void SwapEndian_32p(char* data) {
\r
246 uint32_t& value = (uint32_t&)*data;
\r
247 SwapEndian_32(value);
\r
250 /*! \fn void SwapEndian_64p(char* data)
\r
251 \brief swaps endianness of the next 8 bytes in a buffer, in place
\r
253 Swaps endian representation the next 8 bytes in \a data.
\r
255 API_EXPORT inline void SwapEndian_64p(char* data) {
\r
256 uint64_t& value = (uint64_t&)*data;
\r
257 SwapEndian_64(value);
\r
260 /*! \fn bool SystemIsBigEndian(void)
\r
261 \brief checks host architecture's byte order
\r
262 \return \c true if system uses big-endian ordering
\r
264 API_EXPORT inline bool SystemIsBigEndian(void) {
\r
265 const uint16_t one = 0x0001;
\r
266 return ((*(char*) &one) == 0 );
\r
269 /*! \fn void PackUnsignedInt(char* buffer, unsigned int value)
\r
270 \brief stores unsigned integer value in a byte buffer
\r
272 \param buffer destination buffer
\r
273 \param value unsigned integer to 'pack' in buffer
\r
275 API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) {
\r
276 buffer[0] = (char)value;
\r
277 buffer[1] = (char)(value >> 8);
\r
278 buffer[2] = (char)(value >> 16);
\r
279 buffer[3] = (char)(value >> 24);
\r
282 /*! \fn void PackUnsignedShort(char* buffer, unsigned short value)
\r
283 \brief stores unsigned short integer value in a byte buffer
\r
285 \param buffer destination buffer
\r
286 \param value unsigned short integer to 'pack' in buffer
\r
288 API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) {
\r
289 buffer[0] = (char)value;
\r
290 buffer[1] = (char)(value >> 8);
\r
293 /*! \fn double UnpackDouble(const char* buffer)
\r
294 \brief reads a double value from byte buffer
\r
296 \param buffer source byte buffer
\r
297 \return the (double) value read from the buffer
\r
299 API_EXPORT inline double UnpackDouble(const char* buffer) {
\r
300 union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
\r
302 un.valueBuffer[0] = buffer[0];
\r
303 un.valueBuffer[1] = buffer[1];
\r
304 un.valueBuffer[2] = buffer[2];
\r
305 un.valueBuffer[3] = buffer[3];
\r
306 un.valueBuffer[4] = buffer[4];
\r
307 un.valueBuffer[5] = buffer[5];
\r
308 un.valueBuffer[6] = buffer[6];
\r
309 un.valueBuffer[7] = buffer[7];
\r
313 /*! \fn double UnpackDouble(char* buffer)
\r
314 \brief reads a double value from byte buffer
\r
316 This is an overloaded function.
\r
318 \param buffer source byte buffer
\r
319 \return the (double) value read from the buffer
\r
321 API_EXPORT inline double UnpackDouble(char* buffer) {
\r
322 return UnpackDouble( (const char*)buffer );
\r
325 /*! \fn double UnpackFloat(const char* buffer)
\r
326 \brief reads a float value from byte buffer
\r
328 \param buffer source byte buffer
\r
329 \return the (float) value read from the buffer
\r
331 API_EXPORT inline float UnpackFloat(const char* buffer) {
\r
332 union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
\r
334 un.valueBuffer[0] = buffer[0];
\r
335 un.valueBuffer[1] = buffer[1];
\r
336 un.valueBuffer[2] = buffer[2];
\r
337 un.valueBuffer[3] = buffer[3];
\r
341 /*! \fn double UnpackFloat(char* buffer)
\r
342 \brief reads a float value from byte buffer
\r
344 This is an overloaded function.
\r
346 \param buffer source byte buffer
\r
347 \return the (float) value read from the buffer
\r
349 API_EXPORT inline float UnpackFloat(char* buffer) {
\r
350 return UnpackFloat( (const char*)buffer );
\r
353 /*! \fn signed int UnpackSignedInt(const char* buffer)
\r
354 \brief reads a signed integer value from byte buffer
\r
356 \param buffer source byte buffer
\r
357 \return the (signed int) value read from the buffer
\r
359 API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {
\r
360 union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
\r
362 un.valueBuffer[0] = buffer[0];
\r
363 un.valueBuffer[1] = buffer[1];
\r
364 un.valueBuffer[2] = buffer[2];
\r
365 un.valueBuffer[3] = buffer[3];
\r
369 /*! \fn signed int UnpackSignedInt(char* buffer)
\r
370 \brief reads a signed integer value from byte buffer
\r
372 This is an overloaded function.
\r
374 \param buffer source byte buffer
\r
375 \return the (signed int) value read from the buffer
\r
377 API_EXPORT inline signed int UnpackSignedInt(char* buffer) {
\r
378 return UnpackSignedInt( (const char*) buffer );
\r
381 /*! \fn signed short UnpackSignedShort(const char* buffer)
\r
382 \brief reads a signed short integer value from byte buffer
\r
384 \param buffer source byte buffer
\r
385 \return the (signed short) value read from the buffer
\r
387 API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {
\r
388 union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
\r
390 un.valueBuffer[0] = buffer[0];
\r
391 un.valueBuffer[1] = buffer[1];
\r
395 /*! \fn signed short UnpackSignedShort(char* buffer)
\r
396 \brief reads a signed short integer value from byte buffer
\r
398 This is an overloaded function.
\r
400 \param buffer source byte buffer
\r
401 \return the (signed short) value read from the buffer
\r
403 API_EXPORT inline signed short UnpackSignedShort(char* buffer) {
\r
404 return UnpackSignedShort( (const char*)buffer );
\r
407 /*! \fn unsigned int UnpackUnsignedInt(const char* buffer)
\r
408 \brief reads an unsigned integer value from byte buffer
\r
410 \param buffer source byte buffer
\r
411 \return the (unsigned int) value read from the buffer
\r
413 API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {
\r
414 union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
\r
416 un.valueBuffer[0] = buffer[0];
\r
417 un.valueBuffer[1] = buffer[1];
\r
418 un.valueBuffer[2] = buffer[2];
\r
419 un.valueBuffer[3] = buffer[3];
\r
423 /*! \fn unsigned int UnpackUnsignedInt(char* buffer)
\r
424 \brief reads an unsigned integer value from byte buffer
\r
426 This is an overloaded function.
\r
428 \param buffer source byte buffer
\r
429 \return the (unsigned int) value read from the buffer
\r
431 API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {
\r
432 return UnpackUnsignedInt( (const char*)buffer );
\r
435 /*! \fn unsigned short UnpackUnsignedShort(const char* buffer)
\r
436 \brief reads an unsigned short integer value from byte buffer
\r
438 \param buffer source byte buffer
\r
439 \return the (unsigned short) value read from the buffer
\r
441 API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {
\r
442 union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
\r
444 un.valueBuffer[0] = buffer[0];
\r
445 un.valueBuffer[1] = buffer[1];
\r
449 /*! \fn unsigned short UnpackUnsignedShort(char* buffer)
\r
450 \brief reads an unsigned short integer value from byte buffer
\r
452 This is an overloaded function.
\r
454 \param buffer source byte buffer
\r
455 \return the (unsigned short) value read from the buffer
\r
457 API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {
\r
458 return UnpackUnsignedShort( (const char*)buffer );
\r
461 // ----------------------------------------------------------------
\r
462 // 'internal' helper structs
\r
464 struct RaiiBuffer {
\r
465 RaiiBuffer(const unsigned int n)
\r
466 : Buffer( new char[n]() )
\r
468 ~RaiiBuffer(void) {
\r
474 } // namespace BamTools
\r