]> git.donarmstrong.com Git - bamtools.git/blobdiff - BamAux.h
added warning for duplicate @RG tag in header
[bamtools.git] / BamAux.h
index 9c088dda1f8927306b71a358cad87b4ea62ed3f4..46592497888843f7e14295f62290ecad73fa620f 100644 (file)
--- a/BamAux.h
+++ b/BamAux.h
 // ***************************************************************************\r
-// BamAux.h (c) 2009 Derek Barnett, Michael Strömberg\r
+// BamAux.h (c) 2009 Derek Barnett, Michael Strmberg\r
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 24 June 2009 (DB)\r
+// Last modified: 8 June 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad\r
-// Institute.\r
-// ---------------------------------------------------------------------------\r
-// Defines common constants, typedefs, & data structures for BamTools.\r
+// Provides the basic constants, data structures, etc. for using BAM files\r
 // ***************************************************************************\r
 \r
-/*! \r
-       \file BamAux.h\r
-       \brief BamTools constants, typedefs, & data structures\r
-*/\r
+#ifndef BAMAUX_H\r
+#define BAMAUX_H\r
 \r
-#pragma once\r
+// C inclues\r
+#include <cstdio>\r
+#include <cstdlib>\r
+#include <cstring>\r
 \r
 // C++ includes\r
 #include <exception>\r
+#include <map>\r
 #include <string>\r
 #include <utility>\r
 #include <vector>\r
 \r
-// C includes\r
-#include <cstdio>\r
-#include <cstdlib>\r
-#include <cstring>\r
-\r
 // Platform-specific type definitions\r
-#ifdef WIN32\r
-       typedef char                 int8_t;\r
-       typedef unsigned char       uint8_t;\r
-       typedef short               int16_t;\r
-       typedef unsigned short     uint16_t;\r
-       typedef int                 int32_t;\r
-       typedef unsigned int       uint32_t;\r
-       typedef long long           int64_t;\r
-       typedef unsigned long long uint64_t;\r
-#else\r
-       #include <stdint.h>\r
-#endif\r
-\r
-//! \namespace BamTools\r
+#ifndef BAMTOOLS_TYPES\r
+#define BAMTOOLS_TYPES\r
+    #ifdef _MSC_VER\r
+        typedef char                 int8_t;\r
+        typedef unsigned char       uint8_t;\r
+        typedef short               int16_t;\r
+        typedef unsigned short     uint16_t;\r
+        typedef int                 int32_t;\r
+        typedef unsigned int       uint32_t;\r
+        typedef long long           int64_t;\r
+        typedef unsigned long long uint64_t;\r
+    #else\r
+        #include <stdint.h>\r
+    #endif\r
+#endif // BAMTOOLS_TYPES\r
+\r
 namespace BamTools {\r
 \r
-       //! \cond\r
-       // --------------------------------------------------------------------------------------\r
-       // This section is purely internal and can be excluded from main generated documentation.\r
-       \r
-       // zlib constants\r
-       const int GZIP_ID1   = 31;\r
-       const int GZIP_ID2   = 139;\r
-       const int CM_DEFLATE = 8;\r
-       const int FLG_FEXTRA = 4;\r
-       const int OS_UNKNOWN = 255;\r
-       const int BGZF_XLEN  = 6;\r
-       const int BGZF_ID1   = 66;\r
-       const int BGZF_ID2   = 67;\r
-       const int BGZF_LEN   = 2;\r
-       const int GZIP_WINDOW_BITS = -15;\r
-       const int Z_DEFAULT_MEM_LEVEL = 8;\r
-\r
-       // BZGF constants\r
-       const int BLOCK_HEADER_LENGTH = 18;\r
-       const int BLOCK_FOOTER_LENGTH = 8;\r
-       const int MAX_BLOCK_SIZE      = 65536;\r
-       const int DEFAULT_BLOCK_SIZE  = 65536;\r
-\r
-       // BAM constants\r
-       const int BAM_CORE_SIZE   = 32;\r
-       const int BAM_CMATCH      = 0;\r
-       const int BAM_CINS        = 1;\r
-       const int BAM_CDEL        = 2;\r
-       const int BAM_CREF_SKIP   = 3;\r
-       const int BAM_CSOFT_CLIP  = 4;\r
-       const int BAM_CHARD_CLIP  = 5;\r
-       const int BAM_CPAD        = 6;\r
-       const int BAM_CIGAR_SHIFT = 4;\r
-       const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);\r
-\r
-       // BAM index constants\r
-       const int MAX_BIN           = 37450;    // =(8^6-1)/7+1\r
-       const int BAM_MIN_CHUNK_GAP = 32768;\r
-       const int BAM_LIDX_SHIFT    = 14;\r
-\r
-       // Explicit variable sizes\r
-       const int SIZEOF_INT = 4;\r
-       \r
-       struct BgzfData {\r
-               unsigned int UncompressedBlockSize;\r
-               unsigned int CompressedBlockSize;\r
-               unsigned int BlockLength;\r
-               unsigned int BlockOffset;\r
-               uint64_t BlockAddress;\r
-               bool     IsOpen;\r
-               FILE*    Stream;\r
-               char*    UncompressedBlock;\r
-               char*    CompressedBlock;\r
-               \r
-               // constructor\r
-               BgzfData(void)\r
-                       : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)\r
-                       , CompressedBlockSize(MAX_BLOCK_SIZE)\r
-                       , BlockLength(0)\r
-                       , BlockOffset(0)\r
-                       , BlockAddress(0)\r
-                       , IsOpen(false)\r
-                       , Stream(NULL)\r
-                       , UncompressedBlock(NULL)\r
-                       , CompressedBlock(NULL)\r
-               {\r
-                       try {\r
-                               CompressedBlock   = new char[CompressedBlockSize];\r
-                               UncompressedBlock = new char[UncompressedBlockSize];\r
-                       } catch( std::bad_alloc& ba ) {\r
-                               printf("ERROR: Unable to allocate memory for our BGZF object.\n");\r
-                               exit(1);\r
-                       }\r
-               }\r
-               \r
-               // destructor\r
-               ~BgzfData(void) {\r
-                       if(CompressedBlock)   delete [] CompressedBlock;\r
-                       if(UncompressedBlock) delete [] UncompressedBlock;\r
-               }\r
-       };\r
-       //! \endcond\r
-       \r
-       // --------------------------------------------------------------------------------------\r
-       // Data structures\r
-       \r
-       //! \brief Cigar operation data structure\r
-       struct CigarOp {\r
-               char     Type;   //!< Operation type (MIDNSHP)\r
-               uint32_t Length; //!< Operation length (number of bases)\r
-                  \r
-       };\r
-\r
-       //! Reference sequence data structure\r
-       struct RefData {\r
-               std::string  RefName;          //!< Name of reference sequence\r
-               unsigned int RefLength;        //!< Length of reference sequence\r
-               bool         RefHasAlignments; //!< True if BAM file contains alignments mapped to reference sequence\r
-               \r
-               // constructor\r
-               RefData(void)\r
-                       : RefLength(0)\r
-                       , RefHasAlignments(false)\r
-               { }\r
-       };\r
-\r
-       //! BAM alignment data structure\r
-       struct BamAlignment {\r
-               \r
-               // Queries against alignment flag\r
-               public:\r
-                       //! Returns true if this read is a PCR duplicate (determined by external app)\r
-                       bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
-                       //! Returns true if this read failed quality control (determined by external app)\r
-                       bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }    \r
-                       //! Returns true if alignment is first mate on read\r
-                       bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }\r
-                       //! Returns true if alignment is mapped                 \r
-                       bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }\r
-                       //! Returns true if alignment's mate is mapped\r
-                       bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }      \r
-                       //! Returns true if alignment's mate mapped to reverse strand\r
-                       bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }\r
-                       //! Returns true if alignment part of paired-end read\r
-                       bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); } \r
-                       //! Returns true if this position is primary alignment (determined by external app)\r
-                       bool IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY) == 0 ); }   \r
-                       //! Returns true if alignment is part of read that satisfied paired-end resolution (determined by external app)\r
-                       bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }        \r
-                       //! Returns true if alignment mapped to reverse strand\r
-                       bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); } \r
-                       //! Returns true if alignment is second mate on read\r
-                       bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }     \r
-               \r
-               public:\r
-                       /*! \r
-                               \brief Get alignment's read group text.\r
-                               \r
-                               Assigns read group text to readGroup.\r
-                               \r
-                               \return True if read group data successfully retrieved.\r
-                       */\r
-                       bool GetReadGroup(std::string& readGroup) const {\r
-                               \r
-                               if ( TagData.empty() ) { return false; }\r
-                               \r
-                               // localize the tag data\r
-                               char* pTagData = (char*)TagData.data();\r
-                               const unsigned int tagDataLen = TagData.size();\r
-                               unsigned int numBytesParsed = 0;\r
-                               \r
-                               bool foundReadGroupTag = false;\r
-                               while( numBytesParsed < tagDataLen ) {\r
-                                       \r
-                                       const char* pTagType = pTagData;\r
-                                       const char* pTagStorageType = pTagData + 2;\r
-                                       pTagData       += 3;\r
-                                       numBytesParsed += 3;\r
-                                       \r
-                                       // check the current tag\r
-                                       if ( strncmp(pTagType, "RG", 2) == 0 ) {\r
-                                               foundReadGroupTag = true;\r
-                                               break;\r
-                                       }\r
-                                       \r
-                                       // get the storage class and find the next tag\r
-                                       SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
-                               }\r
-                               \r
-                               // return if the read group tag was not present\r
-                               if ( !foundReadGroupTag ) { return false; }\r
-                               \r
-                               // assign the read group\r
-                               const unsigned int readGroupLen = strlen(pTagData);\r
-                               readGroup.resize(readGroupLen);\r
-                               memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
-                               return true;\r
-                       }\r
-               \r
-               private:\r
-                       // skips to the next tag\r
-                       static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
-                               switch(storageType) {\r
-                                       \r
-                                       case 'A':\r
-                                       case 'c':\r
-                                       case 'C':\r
-                                               ++numBytesParsed;\r
-                                               ++pTagData;\r
-                                               break;\r
-                                               \r
-                                       case 's':\r
-                                       case 'S':\r
-                                       case 'f':\r
-                                               numBytesParsed += 2;\r
-                                               pTagData       += 2;\r
-                                               break;\r
-                                               \r
-                                       case 'i':\r
-                                       case 'I':\r
-                                               numBytesParsed += 4;\r
-                                               pTagData       += 4;\r
-                                               break;\r
-                                               \r
-                                       case 'Z':\r
-                                       case 'H':\r
-                                               while(*pTagData) {\r
-                                                       ++numBytesParsed;\r
-                                                       ++pTagData;\r
-                                               }\r
-                                               break;\r
-                                               \r
-                                       default:\r
-                                               printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
-                                               exit(1);\r
-                               }\r
-                       }\r
-               \r
-               // Data members\r
-               public:\r
-                       std::string  Name;              //!< Read name\r
-                       unsigned int Length;            //!< Query length\r
-                       std::string  QueryBases;        //!< 'Original' sequence (as reported from sequencing machine)\r
-                       std::string  AlignedBases;      //!< 'Aligned' sequence (includes any indels, padding, clipping) \r
-                       std::string  Qualities;         //!< FASTQ qualities (ASCII characters, not numeric values)\r
-                       std::string  TagData;           //!< Tag data (accessor methods will pull the requested information out)\r
-                       unsigned int RefID;             //!< ID number for reference sequence\r
-                       unsigned int Position;          //!< Position (0-based) where alignment starts \r
-                       unsigned int Bin;               //!< Bin in BAM file where this alignment resides\r
-                       unsigned int MapQuality;        //!< Mapping quality score \r
-                       unsigned int AlignmentFlag;     //!< Alignment bit-flag - see Is<something>() methods for available queries\r
-                       std::vector<CigarOp> CigarData; //!< CIGAR operations for this alignment\r
-                       unsigned int MateRefID;         //!< ID number for reference sequence where alignment's mate was aligned\r
-                       unsigned int MatePosition;      //!< Position (0-based) where alignment's mate starts\r
-                       unsigned int InsertSize;        //!< Mate-pair insert size\r
-               \r
-               // Alignment flag query constants\r
-               private:\r
-                       enum { PAIRED        = 1,\r
-                                  PROPER_PAIR   = 2,\r
-                                  UNMAPPED      = 4,\r
-                                  MATE_UNMAPPED = 8,\r
-                                  REVERSE       = 16,\r
-                                  MATE_REVERSE  = 32,\r
-                                  READ_1        = 64,\r
-                                  READ_2        = 128,\r
-                                  SECONDARY     = 256,\r
-                                  QC_FAILED     = 512,\r
-                                  DUPLICATE     = 1024\r
-                            };\r
-       };\r
-\r
-       // ----------------------------------------------------------------\r
-       // Typedefs\r
-       \r
-       /*!\r
-               \typedef RefVector\r
-               \brief Vector of RefData objects\r
-       */\r
-       typedef std::vector<RefData> RefVector;\r
-       \r
-       /*! \r
-               \typedef BamAlignmentVector\r
-               \brief Vector of BamAlignments\r
-       */\r
-       typedef std::vector< BamAlignment > BamAlignmentVector;\r
-       \r
-       //! \cond\r
-       // ----------------------------------------------------------------\r
-       // Typedefs (internal - can exclude from main documentation)\r
-       \r
-       //Offsets for linear indexing\r
-       typedef std::vector<uint64_t> LinearOffsetVector;\r
-\r
-       // Alignment 'chunk' boundaries\r
-       typedef std::pair<uint64_t, uint64_t> ChunkPair;\r
-       // Vector of alignment 'chunks'\r
-       typedef std::vector<ChunkPair> ChunkVector;\r
-\r
-       // BAM bin contains a bin ID & a vector of alignment 'chunks'\r
-       typedef std::pair<uint32_t, ChunkVector*> BamBin;\r
-       // Vector of BAM bins\r
-       typedef std::vector<BamBin> BinVector;\r
-\r
-       // Reference sequence index data\r
-       typedef std::pair<BinVector*, LinearOffsetVector*> RefIndex;\r
-\r
-       // Full BAM file index data structure \r
-       typedef std::vector<RefIndex*> BamIndex;\r
-       // ----------------------------------------------------------------\r
-       //! \endcond\r
+// BAM constants\r
+const int BAM_CORE_SIZE   = 32;\r
+const int BAM_CMATCH      = 0;\r
+const int BAM_CINS        = 1;\r
+const int BAM_CDEL        = 2;\r
+const int BAM_CREF_SKIP   = 3;\r
+const int BAM_CSOFT_CLIP  = 4;\r
+const int BAM_CHARD_CLIP  = 5;\r
+const int BAM_CPAD        = 6;\r
+const int BAM_CIGAR_SHIFT = 4;\r
+const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);\r
+\r
+// BAM index constants\r
+const int MAX_BIN           = 37450;   // =(8^6-1)/7+1\r
+const int BAM_MIN_CHUNK_GAP = 32768;\r
+const int BAM_LIDX_SHIFT    = 14;\r
+\r
+// Explicit variable sizes\r
+const int BT_SIZEOF_INT = 4;\r
+\r
+struct CigarOp;\r
+\r
+struct BamAlignment {\r
+\r
+    // constructors & destructor\r
+    public:\r
+        BamAlignment(void);\r
+        BamAlignment(const BamAlignment& other);\r
+        ~BamAlignment(void);\r
+\r
+    // Queries against alignment flags\r
+    public:        \r
+        bool IsDuplicate(void) const;          // Returns true if this read is a PCR duplicate       \r
+        bool IsFailedQC(void) const;           // Returns true if this read failed quality control      \r
+        bool IsFirstMate(void) const;          // Returns true if alignment is first mate on read        \r
+        bool IsMapped(void) const;             // Returns true if alignment is mapped        \r
+        bool IsMateMapped(void) const;         // Returns true if alignment's mate is mapped        \r
+        bool IsMateReverseStrand(void) const;  // Returns true if alignment's mate mapped to reverse strand        \r
+        bool IsPaired(void) const;             // Returns true if alignment part of paired-end read        \r
+        bool IsPrimaryAlignment(void) const;   // Returns true if reported position is primary alignment       \r
+        bool IsProperPair(void) const;         // Returns true if alignment is part of read that satisfied paired-end resolution     \r
+        bool IsReverseStrand(void) const;      // Returns true if alignment mapped to reverse strand\r
+        bool IsSecondMate(void) const;         // Returns true if alignment is second mate on read\r
+\r
+    // Manipulate alignment flags\r
+    public:        \r
+        void SetIsDuplicate(bool ok);          // Sets "PCR duplicate" flag        \r
+        void SetIsFailedQC(bool ok);           // Sets "failed quality control" flag        \r
+        void SetIsFirstMate(bool ok);          // Sets "alignment is first mate" flag        \r
+        void SetIsMateUnmapped(bool ok);       // Sets "alignment's mate is mapped" flag        \r
+        void SetIsMateReverseStrand(bool ok);  // Sets "alignment's mate mapped to reverse strand" flag        \r
+        void SetIsPaired(bool ok);             // Sets "alignment part of paired-end read" flag        \r
+       void SetIsProperPair(bool ok);          // Sets "alignment is part of read that satisfied paired-end resolution" flag        \r
+        void SetIsReverseStrand(bool ok);      // Sets "alignment mapped to reverse strand" flag        \r
+        void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag        \r
+        void SetIsSecondMate(bool ok);         // Sets "alignment is second mate on read" flag        \r
+        void SetIsUnmapped(bool ok);           // Sets "alignment is mapped" flag\r
+\r
+    // Tag data access methods\r
+    public:\r
+        bool GetEditDistance(uint8_t& editDistance) const;     // get "NM" tag data - contributed by Aaron Quinlan\r
+        bool GetReadGroup(std::string& readGroup) const;       // get "RG" tag data\r
+        \r
+        bool GetTag(const std::string& tag, std::string& destination);\r
+        template<typename T> bool GetTag(const std::string& tag, T& destination);\r
+\r
+    // Additional data access methods\r
+    public:\r
+       int GetEndPosition(bool usePadded = false) const;       // calculates alignment end position, based on starting position and CIGAR operations\r
+\r
+    // 'internal' utility methods \r
+    private:\r
+        static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
+\r
+    // Data members\r
+    public:\r
+        std::string  Name;              // Read name\r
+        int32_t      Length;            // Query length\r
+        std::string  QueryBases;        // 'Original' sequence (as reported from sequencing machine)\r
+        std::string  AlignedBases;      // 'Aligned' sequence (includes any indels, padding, clipping)\r
+        std::string  Qualities;         // FASTQ qualities (ASCII characters, not numeric values)\r
+        std::string  TagData;           // Tag data (accessor methods will pull the requested information out)\r
+        int32_t      RefID;             // ID number for reference sequence\r
+        int32_t      Position;          // Position (0-based) where alignment starts\r
+        uint16_t     Bin;               // Bin in BAM file where this alignment resides\r
+        uint16_t     MapQuality;        // Mapping quality score\r
+        uint32_t     AlignmentFlag;     // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
+        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
+        int32_t      MateRefID;         // ID number for reference sequence where alignment's mate was aligned\r
+        int32_t      MatePosition;      // Position (0-based) where alignment's mate starts\r
+        int32_t      InsertSize;        // Mate-pair insert size\r
+\r
+    // Alignment flag query constants\r
+    // Use the get/set methods above instead\r
+    private:\r
+        enum { PAIRED        = 1\r
+             , PROPER_PAIR   = 2\r
+             , UNMAPPED      = 4\r
+             , MATE_UNMAPPED = 8\r
+             , REVERSE       = 16\r
+             , MATE_REVERSE  = 32\r
+             , READ_1        = 64\r
+             , READ_2        = 128\r
+             , SECONDARY     = 256\r
+             , QC_FAILED     = 512\r
+             , DUPLICATE     = 1024 \r
+             };\r
+};\r
+\r
+// ----------------------------------------------------------------\r
+// Auxiliary data structs & typedefs\r
+\r
+struct BamAlignmentSupportData {\r
+      \r
+    // data members\r
+    std::string AllCharData;\r
+    uint32_t    BlockLength;\r
+    uint32_t    NumCigarOperations;\r
+    uint32_t    QueryNameLength;\r
+    uint32_t    QuerySequenceLength;\r
+    \r
+    // constructor\r
+    BamAlignmentSupportData(void)\r
+        : BlockLength(0)\r
+        , NumCigarOperations(0)\r
+        , QueryNameLength(0)\r
+        , QuerySequenceLength(0)\r
+    { }\r
+};\r
+\r
+struct CigarOp {\r
+  \r
+    // data members\r
+    char     Type;   // Operation type (MIDNSHP)\r
+    uint32_t Length; // Operation length (number of bases)\r
+    \r
+    // constructor\r
+    CigarOp(const char type = '\0', \r
+            const uint32_t length = 0) \r
+        : Type(type)\r
+        , Length(length) \r
+    { }\r
+};\r
+\r
+struct RefData {\r
+   \r
+    // data members\r
+    std::string RefName;          // Name of reference sequence\r
+    int32_t     RefLength;        // Length of reference sequence\r
+    bool        RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence\r
+    \r
+    // constructor\r
+    RefData(const int32_t& length = 0, \r
+            bool ok = false)\r
+        : RefLength(length)\r
+        , RefHasAlignments(ok)\r
+    { }\r
+};\r
+\r
+typedef std::vector<RefData>      RefVector;\r
+typedef std::vector<BamAlignment> BamAlignmentVector;\r
+\r
+// ----------------------------------------------------------------\r
+// Indexing structs & typedefs\r
+\r
+struct Chunk {\r
+\r
+    // data members\r
+    uint64_t Start;\r
+    uint64_t Stop;\r
+\r
+    // constructor\r
+    Chunk(const uint64_t& start = 0, \r
+          const uint64_t& stop = 0)\r
+        : Start(start)\r
+        , Stop(stop)\r
+    { }\r
+};\r
+\r
+inline\r
+bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {\r
+    return lhs.Start < rhs.Start;\r
+}\r
+\r
+typedef std::vector<Chunk> ChunkVector;\r
+typedef std::map<uint32_t, ChunkVector> BamBinMap;\r
+typedef std::vector<uint64_t> LinearOffsetVector;\r
+\r
+struct ReferenceIndex {\r
+    // data members\r
+    BamBinMap Bins;\r
+    LinearOffsetVector Offsets;\r
+    // constructor\r
+    ReferenceIndex(const BamBinMap& binMap = BamBinMap(),\r
+                   const LinearOffsetVector& offsets = LinearOffsetVector())\r
+        : Bins(binMap)\r
+        , Offsets(offsets)\r
+    { }\r
+};\r
+\r
+typedef std::vector<ReferenceIndex> BamIndex;\r
+\r
+// ----------------------------------------------------------------\r
+// BamAlignment member methods\r
+\r
+// constructors & destructor\r
+inline \r
+BamAlignment::BamAlignment(void) { }\r
+\r
+inline \r
+BamAlignment::BamAlignment(const BamAlignment& other)\r
+    : Name(other.Name)\r
+    , Length(other.Length)\r
+    , QueryBases(other.QueryBases)\r
+    , AlignedBases(other.AlignedBases)\r
+    , Qualities(other.Qualities)\r
+    , TagData(other.TagData)\r
+    , RefID(other.RefID)\r
+    , Position(other.Position)\r
+    , Bin(other.Bin)\r
+    , MapQuality(other.MapQuality)\r
+    , AlignmentFlag(other.AlignmentFlag)\r
+    , CigarData(other.CigarData)\r
+    , MateRefID(other.MateRefID)\r
+    , MatePosition(other.MatePosition)\r
+    , InsertSize(other.InsertSize)\r
+{ }\r
+\r
+inline \r
+BamAlignment::~BamAlignment(void) { }\r
+\r
+// Queries against alignment flags\r
+inline bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }\r
+inline bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }\r
+inline bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }\r
+inline bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }\r
+inline bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }\r
+inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }\r
+inline bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }\r
+inline bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }\r
+inline bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }\r
+inline bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }\r
+inline bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }\r
+\r
+// Manipulate alignment flags \r
+inline void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }\r
+inline void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }\r
+inline void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }\r
+inline void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }\r
+inline void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }\r
+inline void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }\r
+inline void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }\r
+inline void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }\r
+inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }\r
+inline void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }\r
+inline void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }\r
+\r
+// calculates alignment end position, based on starting position and CIGAR operations\r
+inline \r
+int BamAlignment::GetEndPosition(bool usePadded) const {\r
+\r
+    // initialize alignment end to starting position\r
+    int alignEnd = Position;\r
+\r
+    // iterate over cigar operations\r
+    std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();\r
+    std::vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();\r
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {\r
+       const char cigarType = (*cigarIter).Type;\r
+       if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
+           alignEnd += (*cigarIter).Length;\r
+       } \r
+        else if ( usePadded && cigarType == 'I' ) {\r
+            alignEnd += (*cigarIter).Length;\r
+        }\r
+    }\r
+    return alignEnd;\r
+}\r
+\r
+// get "NM" tag data - contributed by Aaron Quinlan\r
+// stores data in 'editDistance', returns success/fail\r
+inline \r
+bool BamAlignment::GetEditDistance(uint8_t& editDistance) const {\r
+\r
+    if ( TagData.empty() ) { return false; }\r
+\r
+    // localize the tag data\r
+    char* pTagData = (char*)TagData.data();\r
+    const unsigned int tagDataLen = TagData.size();\r
+    unsigned int numBytesParsed = 0;\r
+\r
+    bool foundEditDistanceTag = false;\r
+    while( numBytesParsed < tagDataLen ) {\r
+\r
+        const char* pTagType = pTagData;\r
+        const char* pTagStorageType = pTagData + 2;\r
+        pTagData       += 3;\r
+        numBytesParsed += 3;\r
+\r
+        // check the current tag\r
+        if ( strncmp(pTagType, "NM", 2) == 0 ) {\r
+            foundEditDistanceTag = true;\r
+            break;\r
+        }\r
+\r
+        // get the storage class and find the next tag\r
+        if (*pTagStorageType == '\0') { return false; }\r
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+        if (*pTagData == '\0') { return false; }\r
+    }\r
+    // return if the edit distance tag was not present\r
+    if ( !foundEditDistanceTag ) { return false; }\r
+\r
+    // assign the editDistance value\r
+    std::memcpy(&editDistance, pTagData, 1);\r
+    return true;\r
+}\r
+\r
+// get "RG" tag data\r
+// stores data in 'readGroup', returns success/fail\r
+inline \r
+bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
+\r
+    if ( TagData.empty() ) { return false; }\r
+\r
+    // localize the tag data\r
+    char* pTagData = (char*)TagData.data();\r
+    const unsigned int tagDataLen = TagData.size();\r
+    unsigned int numBytesParsed = 0;\r
+\r
+    bool foundReadGroupTag = false;\r
+    while( numBytesParsed < tagDataLen ) {\r
+\r
+        const char* pTagType = pTagData;\r
+        const char* pTagStorageType = pTagData + 2;\r
+        pTagData       += 3;\r
+        numBytesParsed += 3;\r
+\r
+        // check the current tag\r
+        if ( std::strncmp(pTagType, "RG", 2) == 0 ) {\r
+            foundReadGroupTag = true;\r
+            break;\r
+        }\r
+\r
+        // get the storage class and find the next tag\r
+        if (*pTagStorageType == '\0') { return false; }\r
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+        if (*pTagData == '\0') { return false; }\r
+    }\r
+\r
+    // return if the read group tag was not present\r
+    if ( !foundReadGroupTag ) { return false; }\r
+\r
+    // assign the read group\r
+    const unsigned int readGroupLen = std::strlen(pTagData);\r
+    readGroup.resize(readGroupLen);\r
+    std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );\r
+    return true;\r
+}\r
+\r
+inline\r
+bool BamAlignment::GetTag(const std::string& tag, std::string& destination) {\r
+  \r
+    if ( TagData.empty() ) { return false; }\r
+\r
+    // localize the tag data\r
+    char* pTagData = (char*)TagData.data();\r
+    const unsigned int tagDataLen = TagData.size();\r
+    unsigned int numBytesParsed = 0;\r
+\r
+    bool foundReadGroupTag = false;\r
+    while( numBytesParsed < tagDataLen ) {\r
+\r
+        const char* pTagType = pTagData;\r
+        const char* pTagStorageType = pTagData + 2;\r
+        pTagData       += 3;\r
+        numBytesParsed += 3;\r
+\r
+        // check the current tag\r
+        if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+            foundReadGroupTag = true;\r
+            break;\r
+        }\r
+\r
+        // get the storage class and find the next tag\r
+        if (*pTagStorageType == '\0') { return false; }\r
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+        if (*pTagData == '\0') { return false; }\r
+    }\r
+\r
+    // return if the read group tag was not present\r
+    if ( !foundReadGroupTag ) { return false; }\r
+\r
+    // assign the read group\r
+    const unsigned int dataLen = std::strlen(pTagData);\r
+    destination.resize(dataLen);\r
+    std::memcpy( (char*)destination.data(), pTagData, dataLen );\r
+    return true;\r
+}\r
+\r
+template<typename T> \r
+bool BamAlignment::GetTag(const std::string& tag, T& destination) {\r
+  \r
+    if ( TagData.empty() ) { return false; }\r
+\r
+    // localize the tag data\r
+    char* pTagData = (char*)TagData.data();\r
+    const unsigned int tagDataLen = TagData.size();\r
+    unsigned int numBytesParsed = 0;\r
+\r
+    bool foundDesiredTag = false;\r
+    while( numBytesParsed < tagDataLen ) {\r
+\r
+        const char* pTagType = pTagData;\r
+        const char* pTagStorageType = pTagData + 2;\r
+        pTagData       += 3;\r
+        numBytesParsed += 3;\r
+\r
+        // check the current tag\r
+        if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) {\r
+            foundDesiredTag = true;\r
+            break;\r
+        }\r
+\r
+        // get the storage class and find the next tag\r
+        if (*pTagStorageType == '\0') { return false; }\r
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );\r
+        if (*pTagData == '\0') { return false; }\r
+    }\r
+    // return if the edit distance tag was not present\r
+    if ( !foundDesiredTag ) { return false; }\r
+\r
+    // assign the editDistance value\r
+    std::memcpy(&destination, pTagData, sizeof(T));\r
+    return true;\r
 }\r
+\r
+inline\r
+void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
+    \r
+    switch(storageType) {\r
+\r
+        case 'A':\r
+        case 'c':\r
+        case 'C':\r
+            ++numBytesParsed;\r
+            ++pTagData;\r
+            break;\r
+\r
+        case 's':\r
+        case 'S':\r
+            numBytesParsed += 2;\r
+            pTagData       += 2;\r
+            break;\r
+\r
+        case 'f':\r
+        case 'i':\r
+        case 'I':\r
+            numBytesParsed += 4;\r
+            pTagData       += 4;\r
+            break;\r
+\r
+        case 'Z':\r
+        case 'H':\r
+            while(*pTagData) {\r
+                ++numBytesParsed;\r
+                ++pTagData;\r
+            }\r
+        // ---------------------------\r
+        // Added: 3-25-2010 DWB\r
+        // Contributed: ARQ\r
+        // Fixed: error parsing variable length tag data\r
+            ++pTagData;\r
+        // ---------------------------\r
+            break;\r
+\r
+        default:\r
+            printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);\r
+            exit(1);\r
+    }\r
+}\r
+\r
+// ----------------------------------------------------------------\r
+// Added: 3-35-2010 DWB\r
+// Fixed: Routines to provide endian-correctness\r
+// ----------------------------------------------------------------\r
+\r
+// returns true if system is big endian\r
+inline bool SystemIsBigEndian(void) {\r
+   const uint16_t one = 0x0001;\r
+   return ((*(char*) &one) == 0 );\r
+}\r
+\r
+// swaps endianness of 16-bit value 'in place'\r
+inline void SwapEndian_16(int16_t& x) {\r
+    x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+inline void SwapEndian_16(uint16_t& x) {\r
+    x = ((x >> 8) | (x << 8));\r
+}\r
+\r
+// swaps endianness of 32-bit value 'in-place'\r
+inline void SwapEndian_32(int32_t& x) {\r
+    x = ( (x >> 24) | \r
+         ((x << 8) & 0x00FF0000) | \r
+         ((x >> 8) & 0x0000FF00) | \r
+          (x << 24)\r
+        );\r
+}\r
+\r
+inline void SwapEndian_32(uint32_t& x) {\r
+    x = ( (x >> 24) | \r
+         ((x << 8) & 0x00FF0000) | \r
+         ((x >> 8) & 0x0000FF00) | \r
+          (x << 24)\r
+        );\r
+}\r
+\r
+// swaps endianness of 64-bit value 'in-place'\r
+inline void SwapEndian_64(int64_t& x) {\r
+    x = ( (x >> 56) | \r
+         ((x << 40) & 0x00FF000000000000ll) |\r
+         ((x << 24) & 0x0000FF0000000000ll) |\r
+         ((x << 8)  & 0x000000FF00000000ll) |\r
+         ((x >> 8)  & 0x00000000FF000000ll) |\r
+         ((x >> 24) & 0x0000000000FF0000ll) |\r
+         ((x >> 40) & 0x000000000000FF00ll) |\r
+          (x << 56)\r
+        );\r
+}\r
+\r
+inline void SwapEndian_64(uint64_t& x) {\r
+    x = ( (x >> 56) | \r
+         ((x << 40) & 0x00FF000000000000ll) |\r
+         ((x << 24) & 0x0000FF0000000000ll) |\r
+         ((x << 8)  & 0x000000FF00000000ll) |\r
+         ((x >> 8)  & 0x00000000FF000000ll) |\r
+         ((x >> 24) & 0x0000000000FF0000ll) |\r
+         ((x >> 40) & 0x000000000000FF00ll) |\r
+          (x << 56)\r
+        );\r
+}\r
+\r
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_16p(char* data) {\r
+    uint16_t& value = (uint16_t&)*data; \r
+    SwapEndian_16(value);\r
+}\r
+\r
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_32p(char* data) {\r
+    uint32_t& value = (uint32_t&)*data; \r
+    SwapEndian_32(value);\r
+}\r
+\r
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)\r
+inline void SwapEndian_64p(char* data) {\r
+    uint64_t& value = (uint64_t&)*data; \r
+    SwapEndian_64(value);\r
+}\r
+\r
+} // namespace BamTools\r
+\r
+#endif // BAMAUX_H\r