1 // ***************************************************************************
\r
2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // All rights reserved.
\r
5 // ---------------------------------------------------------------------------
\r
6 // Last modified: 19 November 2010 (DB)
\r
7 // ---------------------------------------------------------------------------
\r
8 // Provides the basic constants, data structures, utilities etc.
\r
9 // used throughout the API for handling BAM files
\r
10 // ***************************************************************************
\r
15 #include <api/api_global.h>
\r
22 // Platform-specific large-file support
\r
23 #ifndef BAMTOOLS_LFS
\r
24 #define BAMTOOLS_LFS
\r
26 #define ftell64(a) _ftelli64(a)
\r
27 #define fseek64(a,b,c) _fseeki64(a,b,c)
\r
29 #define ftell64(a) ftello(a)
\r
30 #define fseek64(a,b,c) fseeko(a,b,c)
\r
32 #endif // BAMTOOLS_LFS
\r
34 // Platform-specific type definitions
\r
35 #ifndef BAMTOOLS_TYPES
\r
36 #define BAMTOOLS_TYPES
\r
38 typedef char int8_t;
\r
39 typedef unsigned char uint8_t;
\r
40 typedef short int16_t;
\r
41 typedef unsigned short uint16_t;
\r
42 typedef int int32_t;
\r
43 typedef unsigned int uint32_t;
\r
44 typedef long long int64_t;
\r
45 typedef unsigned long long uint64_t;
\r
49 #endif // BAMTOOLS_TYPES
\r
51 namespace BamTools {
\r
53 // ----------------------------------------------------------------
\r
54 // ----------------------------------------------------------------
\r
57 const int BAM_CMATCH = 0;
\r
58 const int BAM_CINS = 1;
\r
59 const int BAM_CDEL = 2;
\r
60 const int BAM_CREF_SKIP = 3;
\r
61 const int BAM_CSOFT_CLIP = 4;
\r
62 const int BAM_CHARD_CLIP = 5;
\r
63 const int BAM_CPAD = 6;
\r
64 const int BAM_CIGAR_SHIFT = 4;
\r
65 const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
\r
66 const int BAM_CORE_SIZE = 32;
\r
67 const int BT_SIZEOF_INT = 4;
\r
69 // ----------------------------------------------------------------
\r
70 // ----------------------------------------------------------------
\r
71 // Data structs & typedefs
\r
73 // CIGAR operation data structure
\r
74 struct API_EXPORT CigarOp {
\r
77 char Type; // Operation type (MIDNSHP)
\r
78 uint32_t Length; // Operation length (number of bases)
\r
81 CigarOp(const char type = '\0',
\r
82 const uint32_t length = 0)
\r
88 // Reference data entry
\r
89 struct API_EXPORT RefData {
\r
92 std::string RefName; // Name of reference sequence
\r
93 int32_t RefLength; // Length of reference sequence
\r
94 bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
\r
97 RefData(const int32_t& length = 0,
\r
100 , RefHasAlignments(ok)
\r
103 typedef std::vector<RefData> RefVector;
\r
105 // General (sequential) genome region
\r
106 struct API_EXPORT BamRegion {
\r
115 BamRegion(const int& leftID = -1,
\r
116 const int& leftPos = -1,
\r
117 const int& rightID = -1,
\r
118 const int& rightPos = -1)
\r
119 : LeftRefID(leftID)
\r
120 , LeftPosition(leftPos)
\r
121 , RightRefID(rightID)
\r
122 , RightPosition(rightPos)
\r
125 // copy constructor
\r
126 BamRegion(const BamRegion& other)
\r
127 : LeftRefID(other.LeftRefID)
\r
128 , LeftPosition(other.LeftPosition)
\r
129 , RightRefID(other.RightRefID)
\r
130 , RightPosition(other.RightPosition)
\r
133 // member functions
\r
134 void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }
\r
135 bool isLeftBoundSpecified(void) const { return ( LeftRefID >= 0 && LeftPosition >= 0 ); }
\r
136 bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }
\r
137 bool isRightBoundSpecified(void) const { return ( RightRefID >= 0 && RightPosition >= 0 ); }
\r
140 // ----------------------------------------------------------------
\r
141 // ----------------------------------------------------------------
\r
142 // General utilities
\r
144 // returns true if system is big endian
\r
145 inline bool SystemIsBigEndian(void) {
\r
146 const uint16_t one = 0x0001;
\r
147 return ((*(char*) &one) == 0 );
\r
150 // swaps endianness of 16-bit value 'in place'
\r
151 inline void SwapEndian_16(int16_t& x) {
\r
152 x = ((x >> 8) | (x << 8));
\r
155 inline void SwapEndian_16(uint16_t& x) {
\r
156 x = ((x >> 8) | (x << 8));
\r
159 // swaps endianness of 32-bit value 'in-place'
\r
160 inline void SwapEndian_32(int32_t& x) {
\r
162 ((x << 8) & 0x00FF0000) |
\r
163 ((x >> 8) & 0x0000FF00) |
\r
168 inline void SwapEndian_32(uint32_t& x) {
\r
170 ((x << 8) & 0x00FF0000) |
\r
171 ((x >> 8) & 0x0000FF00) |
\r
176 // swaps endianness of 64-bit value 'in-place'
\r
177 inline void SwapEndian_64(int64_t& x) {
\r
179 ((x << 40) & 0x00FF000000000000ll) |
\r
180 ((x << 24) & 0x0000FF0000000000ll) |
\r
181 ((x << 8) & 0x000000FF00000000ll) |
\r
182 ((x >> 8) & 0x00000000FF000000ll) |
\r
183 ((x >> 24) & 0x0000000000FF0000ll) |
\r
184 ((x >> 40) & 0x000000000000FF00ll) |
\r
189 inline void SwapEndian_64(uint64_t& x) {
\r
191 ((x << 40) & 0x00FF000000000000ll) |
\r
192 ((x << 24) & 0x0000FF0000000000ll) |
\r
193 ((x << 8) & 0x000000FF00000000ll) |
\r
194 ((x >> 8) & 0x00000000FF000000ll) |
\r
195 ((x >> 24) & 0x0000000000FF0000ll) |
\r
196 ((x >> 40) & 0x000000000000FF00ll) |
\r
201 // swaps endianness of 'next 2 bytes' in a char buffer (in-place)
\r
202 inline void SwapEndian_16p(char* data) {
\r
203 uint16_t& value = (uint16_t&)*data;
\r
204 SwapEndian_16(value);
\r
207 // swaps endianness of 'next 4 bytes' in a char buffer (in-place)
\r
208 inline void SwapEndian_32p(char* data) {
\r
209 uint32_t& value = (uint32_t&)*data;
\r
210 SwapEndian_32(value);
\r
213 // swaps endianness of 'next 8 bytes' in a char buffer (in-place)
\r
214 inline void SwapEndian_64p(char* data) {
\r
215 uint64_t& value = (uint64_t&)*data;
\r
216 SwapEndian_64(value);
\r
219 // returns whether file exists (can be opened OK)
\r
220 inline bool FileExists(const std::string& filename) {
\r
221 std::ifstream f(filename.c_str(), std::ifstream::in);
\r
225 } // namespace BamTools
\r