X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamConstants.h;h=3e674ee5d3afe06fcf2b0d0c20f8a2775a6d78e2;hb=11fabb69eb8c86635dd9498679b72bf78b3af3d1;hp=5bf03f9aa7258285b97983e53af7cd8ab33a43ed;hpb=d51ee65181b527013be6e4087c1edc816505fbbb;p=bamtools.git diff --git a/src/api/BamConstants.h b/src/api/BamConstants.h index 5bf03f9..3e674ee 100644 --- a/src/api/BamConstants.h +++ b/src/api/BamConstants.h @@ -1,35 +1,51 @@ +// *************************************************************************** +// BamConstants.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// --------------------------------------------------------------------------- +// Last modified: 4 October 2011 (DB) +// --------------------------------------------------------------------------- +// Provides basic constants for handling BAM files. +// *************************************************************************** + #ifndef BAM_CONSTANTS_H #define BAM_CONSTANTS_H +#include +#include #include +/*! \namespace BamTools::Constants + \brief Provides basic constants for handling BAM files. +*/ + namespace BamTools { namespace Constants { const int BAM_SIZEOF_INT = 4; // header magic number -const char* const BAM_HEADER_MAGIC = "BAM\001"; -const unsigned int BAM_HEADER_MAGIC_SIZE = 4; +const char* const BAM_HEADER_MAGIC = "BAM\1"; +const unsigned int BAM_HEADER_MAGIC_LENGTH = 4; // BAM alignment core size const int BAM_CORE_SIZE = 32; +const int BAM_CORE_BUFFER_SIZE = 8; // BAM alignment flags -const int BAM_ALIGNMENT_PAIRED = 1; -const int BAM_ALIGNMENT_PROPER_PAIR = 2; -const int BAM_ALIGNMENT_UNMAPPED = 4; -const int BAM_ALIGNMENT_MATE_UNMAPPED = 8; -const int BAM_ALIGNMENT_REVERSE_STRAND = 16; -const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 32; -const int BAM_ALIGNMENT_READ_1 = 64; -const int BAM_ALIGNMENT_READ_2 = 128; -const int BAM_ALIGNMENT_SECONDARY = 256; -const int BAM_ALIGNMENT_QC_FAILED = 512; -const int BAM_ALIGNMENT_DUPLICATE = 1024; +const int BAM_ALIGNMENT_PAIRED = 0x0001; +const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002; +const int BAM_ALIGNMENT_UNMAPPED = 0x0004; +const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008; +const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010; +const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020; +const int BAM_ALIGNMENT_READ_1 = 0x0040; +const int BAM_ALIGNMENT_READ_2 = 0x0080; +const int BAM_ALIGNMENT_SECONDARY = 0x0100; +const int BAM_ALIGNMENT_QC_FAILED = 0x0200; +const int BAM_ALIGNMENT_DUPLICATE = 0x0400; // CIGAR constants -const char* const BAM_CIGAR_LOOKUP = "MIDNSHP"; +const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X"; const int BAM_CIGAR_MATCH = 0; const int BAM_CIGAR_INS = 1; const int BAM_CIGAR_DEL = 2; @@ -37,14 +53,208 @@ const int BAM_CIGAR_REFSKIP = 3; const int BAM_CIGAR_SOFTCLIP = 4; const int BAM_CIGAR_HARDCLIP = 5; const int BAM_CIGAR_PAD = 6; +const int BAM_CIGAR_SEQMATCH = 7; +const int BAM_CIGAR_MISMATCH = 8; + +const char BAM_CIGAR_MATCH_CHAR = 'M'; +const char BAM_CIGAR_INS_CHAR = 'I'; +const char BAM_CIGAR_DEL_CHAR = 'D'; +const char BAM_CIGAR_REFSKIP_CHAR = 'N'; +const char BAM_CIGAR_SOFTCLIP_CHAR = 'S'; +const char BAM_CIGAR_HARDCLIP_CHAR = 'H'; +const char BAM_CIGAR_PAD_CHAR = 'P'; +const char BAM_CIGAR_SEQMATCH_CHAR = '='; +const char BAM_CIGAR_MISMATCH_CHAR = 'X'; const int BAM_CIGAR_SHIFT = 4; const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); +// BAM tag types +const char BAM_TAG_TYPE_ASCII = 'A'; +const char BAM_TAG_TYPE_UINT8 = 'c'; +const char BAM_TAG_TYPE_INT8 = 'C'; +const char BAM_TAG_TYPE_UINT16 = 's'; +const char BAM_TAG_TYPE_INT16 = 'S'; +const char BAM_TAG_TYPE_UINT32 = 'i'; +const char BAM_TAG_TYPE_INT32 = 'I'; +const char BAM_TAG_TYPE_FLOAT = 'f'; +const char BAM_TAG_TYPE_STRING = 'Z'; +const char BAM_TAG_TYPE_HEX = 'H'; +const char BAM_TAG_TYPE_ARRAY = 'B'; + +const size_t BAM_TAG_TAGSIZE = 2; +const size_t BAM_TAG_TYPESIZE = 1; +const int BAM_TAG_ARRAYBASE_SIZE = 8; + // DNA bases const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"; +const unsigned char BAM_BASECODE_EQUAL = 0; +const unsigned char BAM_BASECODE_A = 1; +const unsigned char BAM_BASECODE_C = 2; +const unsigned char BAM_BASECODE_G = 4; +const unsigned char BAM_BASECODE_T = 8; +const unsigned char BAM_BASECODE_N = 15; + +const char BAM_DNA_EQUAL = '='; +const char BAM_DNA_A = 'A'; +const char BAM_DNA_C = 'C'; +const char BAM_DNA_G = 'G'; +const char BAM_DNA_T = 'T'; +const char BAM_DNA_N = 'N'; +const char BAM_DNA_DEL = '-'; +const char BAM_DNA_PAD = '*'; + +// zlib constants +const int GZIP_ID1 = 31; +const int GZIP_ID2 = 139; +const int CM_DEFLATE = 8; +const int FLG_FEXTRA = 4; +const int OS_UNKNOWN = 255; +const int BGZF_XLEN = 6; +const int BGZF_ID1 = 66; +const int BGZF_ID2 = 67; +const int BGZF_LEN = 2; +const int GZIP_WINDOW_BITS = -15; +const int Z_DEFAULT_MEM_LEVEL = 8; + +// BZGF constants +const int BGZF_BLOCK_HEADER_LENGTH = 18; +const int BGZF_BLOCK_FOOTER_LENGTH = 8; +const int BGZF_MAX_BLOCK_SIZE = 65536; +const int BGZF_DEFAULT_BLOCK_SIZE = 65536; } // namespace Constants + +// ------------------------- +// tag-type helper structs +// ------------------------- + +// fail on any types not specified below +template +struct TagTypeHelper { + static bool CanConvertFrom(const char) { assert(false); return false; } + static bool CanConvertTo(const char) { assert(false); return false; } + static char TypeCode(void) { assert(false); return 0; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_UINT8 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_UINT8 || + c == Constants::BAM_TAG_TYPE_UINT16 || + c == Constants::BAM_TAG_TYPE_UINT32 ); + } + + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT8; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_INT8 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_INT8 || + c == Constants::BAM_TAG_TYPE_INT16 || + c == Constants::BAM_TAG_TYPE_INT32 ); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT8; } +}; + +template<> +struct TagTypeHelper { + static bool CanCovnertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_UINT8 || + c == Constants::BAM_TAG_TYPE_UINT16 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_UINT16 || + c == Constants::BAM_TAG_TYPE_UINT32); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT16; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_INT8 || + c == Constants::BAM_TAG_TYPE_INT16 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_INT16 || + c == Constants::BAM_TAG_TYPE_INT32); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT16; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_UINT8 || + c == Constants::BAM_TAG_TYPE_UINT16 || + c == Constants::BAM_TAG_TYPE_UINT32 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_UINT32 ); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT32; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_INT8 || + c == Constants::BAM_TAG_TYPE_INT16 || + c == Constants::BAM_TAG_TYPE_INT32 ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_INT32 ); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT32; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_ASCII || + c == Constants::BAM_TAG_TYPE_UINT8 || + c == Constants::BAM_TAG_TYPE_INT8 || + c == Constants::BAM_TAG_TYPE_UINT16 || + c == Constants::BAM_TAG_TYPE_INT16 || + c == Constants::BAM_TAG_TYPE_UINT32 || + c == Constants::BAM_TAG_TYPE_INT32 || + c == Constants::BAM_TAG_TYPE_FLOAT); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_FLOAT ); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_FLOAT; } +}; + +template<> +struct TagTypeHelper { + static bool CanConvertFrom(const char c) { + return ( c == Constants::BAM_TAG_TYPE_HEX || + c == Constants::BAM_TAG_TYPE_STRING ); + } + static bool CanConvertTo(const char c) { + return ( c == Constants::BAM_TAG_TYPE_HEX || + c == Constants::BAM_TAG_TYPE_STRING ); + } + static char TypeCode(void) { return Constants::BAM_TAG_TYPE_STRING; } +}; + } // namespace BamTools #endif // BAM_CONSTANTS_H