]> git.donarmstrong.com Git - bamtools.git/blobdiff - src/api/BamConstants.h
First stab at templated tag access in BamAlignment
[bamtools.git] / src / api / BamConstants.h
index 5bf03f9aa7258285b97983e53af7cd8ab33a43ed..3e674ee5d3afe06fcf2b0d0c20f8a2775a6d78e2 100644 (file)
@@ -1,35 +1,51 @@
+// ***************************************************************************
+// BamConstants.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// ---------------------------------------------------------------------------
+// Last modified: 4 October 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic constants for handling BAM files.
+// ***************************************************************************
+
 #ifndef BAM_CONSTANTS_H
 #define BAM_CONSTANTS_H
 
+#include <api/api_global.h>
+#include <cassert>
 #include <string>
 
+/*! \namespace BamTools::Constants
+    \brief Provides basic constants for handling BAM files.
+*/
+
 namespace BamTools {
 namespace Constants {
 
 const int BAM_SIZEOF_INT = 4;
 
 // header magic number
-const char* const  BAM_HEADER_MAGIC = "BAM\001";
-const unsigned int BAM_HEADER_MAGIC_SIZE = 4;
+const char* const  BAM_HEADER_MAGIC = "BAM\1";
+const unsigned int BAM_HEADER_MAGIC_LENGTH = 4;
 
 // BAM alignment core size
 const int BAM_CORE_SIZE = 32;
+const int BAM_CORE_BUFFER_SIZE = 8;
 
 // BAM alignment flags
-const int BAM_ALIGNMENT_PAIRED              = 1;
-const int BAM_ALIGNMENT_PROPER_PAIR         = 2;
-const int BAM_ALIGNMENT_UNMAPPED            = 4;
-const int BAM_ALIGNMENT_MATE_UNMAPPED       = 8;
-const int BAM_ALIGNMENT_REVERSE_STRAND      = 16;
-const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 32;
-const int BAM_ALIGNMENT_READ_1              = 64;
-const int BAM_ALIGNMENT_READ_2              = 128;
-const int BAM_ALIGNMENT_SECONDARY           = 256;
-const int BAM_ALIGNMENT_QC_FAILED           = 512;
-const int BAM_ALIGNMENT_DUPLICATE           = 1024;
+const int BAM_ALIGNMENT_PAIRED              = 0x0001;
+const int BAM_ALIGNMENT_PROPER_PAIR         = 0x0002;
+const int BAM_ALIGNMENT_UNMAPPED            = 0x0004;
+const int BAM_ALIGNMENT_MATE_UNMAPPED       = 0x0008;
+const int BAM_ALIGNMENT_REVERSE_STRAND      = 0x0010;
+const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020;
+const int BAM_ALIGNMENT_READ_1              = 0x0040;
+const int BAM_ALIGNMENT_READ_2              = 0x0080;
+const int BAM_ALIGNMENT_SECONDARY           = 0x0100;
+const int BAM_ALIGNMENT_QC_FAILED           = 0x0200;
+const int BAM_ALIGNMENT_DUPLICATE           = 0x0400;
 
 // CIGAR constants
-const char* const BAM_CIGAR_LOOKUP = "MIDNSHP";
+const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
 const int BAM_CIGAR_MATCH    = 0;
 const int BAM_CIGAR_INS      = 1;
 const int BAM_CIGAR_DEL      = 2;
@@ -37,14 +53,208 @@ const int BAM_CIGAR_REFSKIP  = 3;
 const int BAM_CIGAR_SOFTCLIP = 4;
 const int BAM_CIGAR_HARDCLIP = 5;
 const int BAM_CIGAR_PAD      = 6;
+const int BAM_CIGAR_SEQMATCH = 7;
+const int BAM_CIGAR_MISMATCH = 8;
+
+const char BAM_CIGAR_MATCH_CHAR    = 'M';
+const char BAM_CIGAR_INS_CHAR      = 'I';
+const char BAM_CIGAR_DEL_CHAR      = 'D';
+const char BAM_CIGAR_REFSKIP_CHAR  = 'N';
+const char BAM_CIGAR_SOFTCLIP_CHAR = 'S';
+const char BAM_CIGAR_HARDCLIP_CHAR = 'H';
+const char BAM_CIGAR_PAD_CHAR      = 'P';
+const char BAM_CIGAR_SEQMATCH_CHAR = '=';
+const char BAM_CIGAR_MISMATCH_CHAR = 'X';
 
 const int BAM_CIGAR_SHIFT    = 4;
 const int BAM_CIGAR_MASK     = ((1 << BAM_CIGAR_SHIFT) - 1);
 
+// BAM tag types
+const char BAM_TAG_TYPE_ASCII  = 'A';
+const char BAM_TAG_TYPE_UINT8  = 'c';
+const char BAM_TAG_TYPE_INT8   = 'C';
+const char BAM_TAG_TYPE_UINT16 = 's';
+const char BAM_TAG_TYPE_INT16  = 'S';
+const char BAM_TAG_TYPE_UINT32 = 'i';
+const char BAM_TAG_TYPE_INT32  = 'I';
+const char BAM_TAG_TYPE_FLOAT  = 'f';
+const char BAM_TAG_TYPE_STRING = 'Z';
+const char BAM_TAG_TYPE_HEX    = 'H';
+const char BAM_TAG_TYPE_ARRAY  = 'B';
+
+const size_t BAM_TAG_TAGSIZE  = 2;
+const size_t BAM_TAG_TYPESIZE = 1;
+const int BAM_TAG_ARRAYBASE_SIZE = 8;
+
 // DNA bases
 const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
+const unsigned char BAM_BASECODE_EQUAL = 0;
+const unsigned char BAM_BASECODE_A     = 1;
+const unsigned char BAM_BASECODE_C     = 2;
+const unsigned char BAM_BASECODE_G     = 4;
+const unsigned char BAM_BASECODE_T     = 8;
+const unsigned char BAM_BASECODE_N     = 15;
+
+const char BAM_DNA_EQUAL   = '=';
+const char BAM_DNA_A       = 'A';
+const char BAM_DNA_C       = 'C';
+const char BAM_DNA_G       = 'G';
+const char BAM_DNA_T       = 'T';
+const char BAM_DNA_N       = 'N';
+const char BAM_DNA_DEL     = '-';
+const char BAM_DNA_PAD     = '*';
+
+// zlib constants
+const int GZIP_ID1   = 31;
+const int GZIP_ID2   = 139;
+const int CM_DEFLATE = 8;
+const int FLG_FEXTRA = 4;
+const int OS_UNKNOWN = 255;
+const int BGZF_XLEN  = 6;
+const int BGZF_ID1   = 66;
+const int BGZF_ID2   = 67;
+const int BGZF_LEN   = 2;
+const int GZIP_WINDOW_BITS    = -15;
+const int Z_DEFAULT_MEM_LEVEL = 8;
+
+// BZGF constants
+const int BGZF_BLOCK_HEADER_LENGTH = 18;
+const int BGZF_BLOCK_FOOTER_LENGTH = 8;
+const int BGZF_MAX_BLOCK_SIZE      = 65536;
+const int BGZF_DEFAULT_BLOCK_SIZE  = 65536;
 
 } // namespace Constants
+
+// -------------------------
+// tag-type helper structs
+// -------------------------
+
+// fail on any types not specified below
+template<typename T>
+struct TagTypeHelper {
+    static bool CanConvertFrom(const char) { assert(false); return false; }
+    static bool CanConvertTo(const char) { assert(false); return false; }
+    static char TypeCode(void) { assert(false); return 0; }
+};
+
+template<>
+struct TagTypeHelper<uint8_t> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII ||
+                 c == Constants::BAM_TAG_TYPE_UINT8 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII  ||
+                 c == Constants::BAM_TAG_TYPE_UINT8  ||
+                 c == Constants::BAM_TAG_TYPE_UINT16 ||
+                 c == Constants::BAM_TAG_TYPE_UINT32 );
+    }
+
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT8; }
+};
+
+template<>
+struct TagTypeHelper<int8_t> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII ||
+                 c == Constants::BAM_TAG_TYPE_INT8 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII ||
+                 c == Constants::BAM_TAG_TYPE_INT8  ||
+                 c == Constants::BAM_TAG_TYPE_INT16 ||
+                 c == Constants::BAM_TAG_TYPE_INT32 );
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT8; }
+};
+
+template<>
+struct TagTypeHelper<uint16_t> {
+    static bool CanCovnertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII ||
+                 c == Constants::BAM_TAG_TYPE_UINT8 ||
+                 c == Constants::BAM_TAG_TYPE_UINT16 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_UINT16 ||
+                 c == Constants::BAM_TAG_TYPE_UINT32);
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT16; }
+};
+
+template<>
+struct TagTypeHelper<int16_t> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII ||
+                 c == Constants::BAM_TAG_TYPE_INT8 ||
+                 c == Constants::BAM_TAG_TYPE_INT16 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_INT16 ||
+                 c == Constants::BAM_TAG_TYPE_INT32);
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT16; }
+};
+
+template<>
+struct TagTypeHelper<uint32_t> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII  ||
+                 c == Constants::BAM_TAG_TYPE_UINT8  ||
+                 c == Constants::BAM_TAG_TYPE_UINT16 ||
+                 c == Constants::BAM_TAG_TYPE_UINT32 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_UINT32 );
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT32; }
+};
+
+template<>
+struct TagTypeHelper<int32_t> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII  ||
+                 c == Constants::BAM_TAG_TYPE_INT8  ||
+                 c == Constants::BAM_TAG_TYPE_INT16 ||
+                 c == Constants::BAM_TAG_TYPE_INT32 );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_INT32 );
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT32; }
+};
+
+template<>
+struct TagTypeHelper<float> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_ASCII  ||
+                 c == Constants::BAM_TAG_TYPE_UINT8  ||
+                 c == Constants::BAM_TAG_TYPE_INT8   ||
+                 c == Constants::BAM_TAG_TYPE_UINT16 ||
+                 c == Constants::BAM_TAG_TYPE_INT16  ||
+                 c == Constants::BAM_TAG_TYPE_UINT32 ||
+                 c == Constants::BAM_TAG_TYPE_INT32  ||
+                 c == Constants::BAM_TAG_TYPE_FLOAT);
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_FLOAT );
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_FLOAT; }
+};
+
+template<>
+struct TagTypeHelper<std::string> {
+    static bool CanConvertFrom(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_HEX ||
+                 c == Constants::BAM_TAG_TYPE_STRING );
+    }
+    static bool CanConvertTo(const char c) {
+        return ( c == Constants::BAM_TAG_TYPE_HEX ||
+                 c == Constants::BAM_TAG_TYPE_STRING );
+    }
+    static char TypeCode(void) { return Constants::BAM_TAG_TYPE_STRING; }
+};
+
 } // namespace BamTools
 
 #endif // BAM_CONSTANTS_H