X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamAux.h;h=14e65476b4c1f69d02bf02e417ededdcc5e02985;hb=2e049ed7f28881bce09653e60f5aea54bfd7afbf;hp=a122b24496e74c4ffa581b646983818989545791;hpb=02a90ba6b35ce74cbb5e9b49403c6f1cb046afae;p=bamtools.git diff --git a/src/api/BamAux.h b/src/api/BamAux.h index a122b24..14e6547 100644 --- a/src/api/BamAux.h +++ b/src/api/BamAux.h @@ -1,268 +1,92 @@ // *************************************************************************** // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 15 September 2010 (DB) +// Last modified: 7 October 2011 (DB) // --------------------------------------------------------------------------- -// Provides the basic constants, data structures, etc. for using BAM files +// Provides data structures & utility methods that are used throughout the API. // *************************************************************************** #ifndef BAMAUX_H #define BAMAUX_H -// C inclues -#include -#include -#include -#include - -// C++ includes -#include -#include +#include +#include #include -#include #include -#include #include -// Platform-specific type definitions -#ifndef BAMTOOLS_TYPES -#define BAMTOOLS_TYPES - #ifdef _MSC_VER - typedef char int8_t; - typedef unsigned char uint8_t; - typedef short int16_t; - typedef unsigned short uint16_t; - typedef int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; - #else - #include - #endif -#endif // BAMTOOLS_TYPES +/*! \file BamAux.h -namespace BamTools { + Provides data structures & utility methods that are used throughout the API. +*/ +/*! \namespace BamTools + \brief Contains all BamTools classes & methods. -// BAM constants -const int BAM_CORE_SIZE = 32; -const int BAM_CMATCH = 0; -const int BAM_CINS = 1; -const int BAM_CDEL = 2; -const int BAM_CREF_SKIP = 3; -const int BAM_CSOFT_CLIP = 4; -const int BAM_CHARD_CLIP = 5; -const int BAM_CPAD = 6; -const int BAM_CIGAR_SHIFT = 4; -const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); - -// BAM index constants -const int MAX_BIN = 37450; // =(8^6-1)/7+1 -const int BAM_MIN_CHUNK_GAP = 32768; -const int BAM_LIDX_SHIFT = 14; - -// Explicit variable sizes -const int BT_SIZEOF_INT = 4; - -struct CigarOp; - -struct BamAlignment { - - // constructors & destructor - public: - BamAlignment(void); - BamAlignment(const BamAlignment& other); - ~BamAlignment(void); - - // Queries against alignment flags - public: - bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate - bool IsFailedQC(void) const; // Returns true if this read failed quality control - bool IsFirstMate(void) const; // Returns true if alignment is first mate on read - bool IsMapped(void) const; // Returns true if alignment is mapped - bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped - bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand - bool IsPaired(void) const; // Returns true if alignment part of paired-end read - bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment - bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution - bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand - bool IsSecondMate(void) const; // Returns true if alignment is second mate on read - - // Manipulate alignment flags - public: - void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag - void SetIsFailedQC(bool ok); // Sets "failed quality control" flag - void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag - void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag - void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag - void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag - void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag - void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag - void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag - void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag - void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag - - // Tag data access methods - public: - // ------------------------------------------------------------------------------------- - // N.B. - The following tag-modifying methods may not be used on BamAlignments fetched - // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in - // error message (to keep output clean) but will ALWAYS return false. Only user- - // generated BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid. - - // add tag data (create new TAG entry with TYPE and VALUE) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if new data added, false if error or TAG already exists - // N.B. - will NOT modify existing tag. Use EditTag() instead - bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present) - // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details - // returns true if edit was successfaul, false if error - bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H - bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i - bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f - - // specific tag data access methods - these only remain for legacy support - bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (implemented as GetTag("NM", editDistance)) - bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (implemented as GetTag("RG", readGroup)) - - // generic tag data access methods - bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings - bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data - bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data - bool GetTag(const std::string& tag, float& destination) const; // access floating point data - - // remove tag data - // returns true if removal was successful, false if error - // N.B. - returns false if TAG does not exist (no removal can occur) - bool RemoveTag(const std::string& tag); - - // Additional data access methods - public: - // calculates alignment end position, based on starting position and CIGAR operations - // @zeroBased - if true, returns 0-based coordinate; else returns 1-based - int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; - - // 'internal' utility methods - private: - static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed); - static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed); - - // Data members - public: - std::string Name; // Read name - int32_t Length; // Query length - std::string QueryBases; // 'Original' sequence (as reported from sequencing machine) - std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping) - std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values) - std::string TagData; // Tag data (accessor methods will pull the requested information out) - int32_t RefID; // ID number for reference sequence - int32_t Position; // Position (0-based) where alignment starts - uint16_t Bin; // Bin in BAM file where this alignment resides - uint16_t MapQuality; // Mapping quality score - uint32_t AlignmentFlag; // Alignment bit-flag - see Is() methods to query this value, SetIs() methods to manipulate - std::vector CigarData; // CIGAR operations for this alignment - int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned - int32_t MatePosition; // Position (0-based) where alignment's mate starts - int32_t InsertSize; // Mate-pair insert size - - // internal data - private: - struct BamAlignmentSupportData { - - // data members - std::string AllCharData; - uint32_t BlockLength; - uint32_t NumCigarOperations; - uint32_t QueryNameLength; - uint32_t QuerySequenceLength; - bool HasCoreOnly; - - // constructor - BamAlignmentSupportData(void) - : BlockLength(0) - , NumCigarOperations(0) - , QueryNameLength(0) - , QuerySequenceLength(0) - , HasCoreOnly(false) - { } - }; - - // contains raw character data & lengths - BamAlignmentSupportData SupportData; - - // allow these classes access to BamAlignment private members (SupportData) - // but client code should not need to touch this data - friend class BamReader; - friend class BamWriter; - - // Alignment flag query constants - // Use the get/set methods above instead - private: - enum { PAIRED = 1 - , PROPER_PAIR = 2 - , UNMAPPED = 4 - , MATE_UNMAPPED = 8 - , REVERSE = 16 - , MATE_REVERSE = 32 - , READ_1 = 64 - , READ_2 = 128 - , SECONDARY = 256 - , QC_FAILED = 512 - , DUPLICATE = 1024 - }; -}; + The BamTools API contained in this namespace contains classes and methods + for reading, writing, and manipulating BAM alignment files. +*/ +namespace BamTools { // ---------------------------------------------------------------- -// Auxiliary data structs & typedefs +// CigarOp -struct CigarOp { +/*! \struct BamTools::CigarOp + \brief Represents a CIGAR alignment operation. + + \sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations. +*/ +struct API_EXPORT CigarOp { - // data members - char Type; // Operation type (MIDNSHP) - uint32_t Length; // Operation length (number of bases) + char Type; //!< CIGAR operation type (MIDNSHP) + uint32_t Length; //!< CIGAR operation length (number of bases) - // constructor + //! constructor CigarOp(const char type = '\0', - const uint32_t length = 0) + const uint32_t& length = 0) : Type(type) , Length(length) { } }; -struct RefData { +// ---------------------------------------------------------------- +// RefData + +/*! \struct BamTools::RefData + \brief Represents a reference sequence entry +*/ +struct API_EXPORT RefData { - // data members - std::string RefName; // Name of reference sequence - int32_t RefLength; // Length of reference sequence - bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence - - // constructor - RefData(const int32_t& length = 0, - bool ok = false) - : RefLength(length) - , RefHasAlignments(ok) + std::string RefName; //!< name of reference sequence + int32_t RefLength; //!< length of reference sequence + + //! constructor + RefData(const std::string& name = "", + const int32_t& length = 0) + : RefName(name) + , RefLength(length) { } }; -typedef std::vector RefVector; -typedef std::vector BamAlignmentVector; +//! convenience typedef for vector of RefData entries +typedef std::vector RefVector; -struct BamRegion { +// ---------------------------------------------------------------- +// BamRegion + +/*! \struct BamTools::BamRegion + \brief Represents a sequential genomic region + + Allowed to span multiple (sequential) references. +*/ +struct API_EXPORT BamRegion { - // data members - int LeftRefID; - int LeftPosition; - int RightRefID; - int RightPosition; + int LeftRefID; //!< reference ID for region's left boundary + int LeftPosition; //!< position for region's left boundary + int RightRefID; //!< reference ID for region's right boundary + int RightPosition; //!< position for region's right boundary - // constructor + //! constructor BamRegion(const int& leftID = -1, const int& leftPos = -1, const int& rightID = -1, @@ -273,35 +97,75 @@ struct BamRegion { , RightPosition(rightPos) { } - // member functions - void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; } - bool isLeftBoundSpecified(void) const { return ( LeftRefID != -1 && LeftPosition != -1 ); } - bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); } - bool isRightBoundSpecified(void) const { return ( RightRefID != -1 && RightPosition != -1 ); } + //! copy constructor + BamRegion(const BamRegion& other) + : LeftRefID(other.LeftRefID) + , LeftPosition(other.LeftPosition) + , RightRefID(other.RightRefID) + , RightPosition(other.RightPosition) + { } + + //! Clears region boundaries + void clear(void) { + LeftRefID = -1; LeftPosition = -1; + RightRefID = -1; RightPosition = -1; + } + + //! Returns true if region has a left boundary + bool isLeftBoundSpecified(void) const { + return ( LeftRefID >= 0 && LeftPosition >= 0 ); + } + + //! Returns true if region boundaries are not defined + bool isNull(void) const { + return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); + } + + //! Returns true if region has a right boundary + bool isRightBoundSpecified(void) const { + return ( RightRefID >= 0 && RightPosition >= 0 ); + } }; // ---------------------------------------------------------------- -// Added: 3-35-2010 DWB -// Fixed: Routines to provide endian-correctness -// ---------------------------------------------------------------- +// General utility methods -// returns true if system is big endian -inline bool SystemIsBigEndian(void) { - const uint16_t one = 0x0001; - return ((*(char*) &one) == 0 ); +/*! \fn bool FileExists(const std::string& filename) + \brief checks if file exists + + Attempts to open file in a read-only mode. + + \return \c true if file can be opened successfully +*/ +API_EXPORT inline bool FileExists(const std::string& filename) { + std::ifstream f(filename.c_str(), std::ifstream::in); + return !f.fail(); } -// swaps endianness of 16-bit value 'in place' -inline void SwapEndian_16(int16_t& x) { +/*! \fn void SwapEndian_16(int16_t& x) + \brief swaps endianness of signed 16-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_16(int16_t& x) { x = ((x >> 8) | (x << 8)); } -inline void SwapEndian_16(uint16_t& x) { +/*! \fn void SwapEndian_16(uint16_t& x) + \brief swaps endianness of unsigned 16-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_16(uint16_t& x) { x = ((x >> 8) | (x << 8)); } -// swaps endianness of 32-bit value 'in-place' -inline void SwapEndian_32(int32_t& x) { +/*! \fn void SwapEndian_32(int32_t& x) + \brief swaps endianness of signed 32-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_32(int32_t& x) { x = ( (x >> 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | @@ -309,7 +173,12 @@ inline void SwapEndian_32(int32_t& x) { ); } -inline void SwapEndian_32(uint32_t& x) { +/*! \fn void SwapEndian_32(uint32_t& x) + \brief swaps endianness of unsigned 32-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_32(uint32_t& x) { x = ( (x >> 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | @@ -317,8 +186,12 @@ inline void SwapEndian_32(uint32_t& x) { ); } -// swaps endianness of 64-bit value 'in-place' -inline void SwapEndian_64(int64_t& x) { +/*! \fn void SwapEndian_64(int64_t& x) + \brief swaps endianness of signed 64-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_64(int64_t& x) { x = ( (x >> 56) | ((x << 40) & 0x00FF000000000000ll) | ((x << 24) & 0x0000FF0000000000ll) | @@ -330,7 +203,12 @@ inline void SwapEndian_64(int64_t& x) { ); } -inline void SwapEndian_64(uint64_t& x) { +/*! \fn void SwapEndian_64(uint64_t& x) + \brief swaps endianness of unsigned 64-bit integer, in place + + Swaps endian representation of value in \a x. +*/ +API_EXPORT inline void SwapEndian_64(uint64_t& x) { x = ( (x >> 56) | ((x << 40) & 0x00FF000000000000ll) | ((x << 24) & 0x0000FF0000000000ll) | @@ -342,669 +220,249 @@ inline void SwapEndian_64(uint64_t& x) { ); } -// swaps endianness of 'next 2 bytes' in a char buffer (in-place) -inline void SwapEndian_16p(char* data) { +/*! \fn void SwapEndian_16p(char* data) + \brief swaps endianness of the next 2 bytes in a buffer, in place + + Swaps endian representation the next 2 bytes in \a data. +*/ +API_EXPORT inline void SwapEndian_16p(char* data) { uint16_t& value = (uint16_t&)*data; SwapEndian_16(value); } -// swaps endianness of 'next 4 bytes' in a char buffer (in-place) -inline void SwapEndian_32p(char* data) { +/*! \fn void SwapEndian_32p(char* data) + \brief swaps endianness of the next 4 bytes in a buffer, in place + + Swaps endian representation the next 4 bytes in \a data. +*/ +API_EXPORT inline void SwapEndian_32p(char* data) { uint32_t& value = (uint32_t&)*data; SwapEndian_32(value); } -// swaps endianness of 'next 8 bytes' in a char buffer (in-place) -inline void SwapEndian_64p(char* data) { +/*! \fn void SwapEndian_64p(char* data) + \brief swaps endianness of the next 8 bytes in a buffer, in place + + Swaps endian representation the next 8 bytes in \a data. +*/ +API_EXPORT inline void SwapEndian_64p(char* data) { uint64_t& value = (uint64_t&)*data; SwapEndian_64(value); } -inline bool FileExists(const std::string& filename) { - std::ifstream f(filename.c_str(), std::ifstream::in); - return !f.fail(); +/*! \fn bool SystemIsBigEndian(void) + \brief checks host architecture's byte order + \return \c true if system uses big-endian ordering +*/ +API_EXPORT inline bool SystemIsBigEndian(void) { + const uint16_t one = 0x0001; + return ((*(char*) &one) == 0 ); } -// ---------------------------------------------------------------- -// BamAlignment member methods - -// constructors & destructor -inline BamAlignment::BamAlignment(void) { } - -inline BamAlignment::BamAlignment(const BamAlignment& other) - : Name(other.Name) - , Length(other.Length) - , QueryBases(other.QueryBases) - , AlignedBases(other.AlignedBases) - , Qualities(other.Qualities) - , TagData(other.TagData) - , RefID(other.RefID) - , Position(other.Position) - , Bin(other.Bin) - , MapQuality(other.MapQuality) - , AlignmentFlag(other.AlignmentFlag) - , CigarData(other.CigarData) - , MateRefID(other.MateRefID) - , MatePosition(other.MatePosition) - , InsertSize(other.InsertSize) - , SupportData(other.SupportData) -{ } - -inline BamAlignment::~BamAlignment(void) { } - -// Queries against alignment flags -inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); } -inline bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); } -inline bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); } -inline bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); } -inline bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); } -inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); } -inline bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); } -inline bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); } -inline bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); } -inline bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); } -inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } - -// Manipulate alignment flags -inline void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; } -inline void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; } -inline void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; } -inline void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; } -inline void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; } -inline void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; } -inline void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; } -inline void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; } -inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; } -inline void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; } -inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; } - -// calculates alignment end position, based on starting position and CIGAR operations -inline -int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const { - - // initialize alignment end to starting position - int alignEnd = Position; - - // iterate over cigar operations - std::vector::const_iterator cigarIter = CigarData.begin(); - std::vector::const_iterator cigarEnd = CigarData.end(); - for ( ; cigarIter != cigarEnd; ++cigarIter) { - const char cigarType = (*cigarIter).Type; - if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) { - alignEnd += (*cigarIter).Length; - } - else if ( usePadded && cigarType == 'I' ) { - alignEnd += (*cigarIter).Length; - } - } - - // adjust for zeroBased, if necessary - if (zeroBased) - return alignEnd - 1; - else - return alignEnd; +/*! \fn void PackUnsignedInt(char* buffer, unsigned int value) + \brief stores unsigned integer value in a byte buffer + + \param buffer destination buffer + \param value unsigned integer to 'pack' in buffer +*/ +API_EXPORT inline void PackUnsignedInt(char* buffer, unsigned int value) { + buffer[0] = (char)value; + buffer[1] = (char)(value >> 8); + buffer[2] = (char)(value >> 16); + buffer[3] = (char)(value >> 24); } -inline -bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type != "Z" && type != "H" ) return false; - - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag already exists, return false - // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; - - // otherwise, copy tag data to temp buffer - std::string newTag = tag + type + value; - const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term - char originalTagData[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term - - // append newTag - strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term - - // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; - TagData.assign(newTagData, newTagDataLength); - - // return success - return true; -} +/*! \fn void PackUnsignedShort(char* buffer, unsigned short value) + \brief stores unsigned short integer value in a byte buffer -inline -bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type == "f" || type == "Z" || type == "H" ) return false; - - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag already exists, return false - // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; - - // otherwise, convert value to string - union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un; - un.value = value; - - // copy original tag data to temp buffer - std::string newTag = tag + type; - const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer - char originalTagData[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term - - // append newTag - strcat(originalTagData + tagDataLength, newTag.data()); - memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int)); - - // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; - TagData.assign(newTagData, newTagDataLength); - - // return success - return true; + \param buffer destination buffer + \param value unsigned short integer to 'pack' in buffer +*/ +API_EXPORT inline void PackUnsignedShort(char* buffer, unsigned short value) { + buffer[0] = (char)value; + buffer[1] = (char)(value >> 8); } -inline -bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) { - return AddTag(tag, type, (const uint32_t&)value); +/*! \fn double UnpackDouble(const char* buffer) + \brief reads a double value from byte buffer + + \param buffer source byte buffer + \return the (double) value read from the buffer +*/ +API_EXPORT inline double UnpackDouble(const char* buffer) { + union { double value; unsigned char valueBuffer[sizeof(double)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + un.valueBuffer[4] = buffer[4]; + un.valueBuffer[5] = buffer[5]; + un.valueBuffer[6] = buffer[6]; + un.valueBuffer[7] = buffer[7]; + return un.value; } -inline -bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type == "Z" || type == "H" ) return false; - - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag already exists, return false - // use EditTag explicitly instead - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; - - // otherwise, convert value to string - union { float value; char valueBuffer[sizeof(float)]; } un; - un.value = value; - - // copy original tag data to temp buffer - std::string newTag = tag + type; - const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float - char originalTagData[newTagDataLength]; - memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term - - // append newTag - strcat(originalTagData + tagDataLength, newTag.data()); - memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float)); - - // store temp buffer back in TagData - const char* newTagData = (const char*)originalTagData; - TagData.assign(newTagData, newTagDataLength); - - // return success - return true; -} +/*! \fn double UnpackDouble(char* buffer) + \brief reads a double value from byte buffer -inline -bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type != "Z" && type != "H" ) return false; - - // localize the tag data - char* pOriginalTagData = (char*)TagData.data(); - char* pTagData = pOriginalTagData; - const unsigned int originalTagDataLength = TagData.size(); - - unsigned int newTagDataLength = 0; - unsigned int numBytesParsed = 0; - - // if tag found, store data in readGroup, return success - if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { - - // make sure array is more than big enough - char newTagData[originalTagDataLength + value.size()]; - - // copy original tag data up til desired tag - const unsigned int beginningTagDataLength = numBytesParsed; - newTagDataLength += beginningTagDataLength; - memcpy(newTagData, pOriginalTagData, numBytesParsed); - - // copy new VALUE in place of current tag data - const unsigned int dataLength = strlen(value.c_str()); - memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 ); - - // skip to next tag (if tag for removal is last, return true) - const char* pTagStorageType = pTagData - 1; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; - - // copy everything from current tag (the next one after tag for removal) to end - const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); - const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1; - const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; - memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); - - // ensure null-terminator - newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; - - // save new tag data - TagData.assign(newTagData, endTagOffset + endTagDataLength); - return true; - } - - // tag not found, attempt AddTag - else return AddTag(tag, type, value); -} + This is an overloaded function. -inline -bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type == "f" || type == "Z" || type == "H" ) return false; - - // localize the tag data - char* pOriginalTagData = (char*)TagData.data(); - char* pTagData = pOriginalTagData; - const unsigned int originalTagDataLength = TagData.size(); - - unsigned int newTagDataLength = 0; - unsigned int numBytesParsed = 0; - - // if tag found, store data in readGroup, return success - if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { - - // make sure array is more than big enough - char newTagData[originalTagDataLength + sizeof(value)]; - - // copy original tag data up til desired tag - const unsigned int beginningTagDataLength = numBytesParsed; - newTagDataLength += beginningTagDataLength; - memcpy(newTagData, pOriginalTagData, numBytesParsed); - - // copy new VALUE in place of current tag data - union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un; - un.value = value; - memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int)); - - // skip to next tag (if tag for removal is last, return true) - const char* pTagStorageType = pTagData - 1; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; - - // copy everything from current tag (the next one after tag for removal) to end - const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); - const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int); - const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; - memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); - - // ensure null-terminator - newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; - - // save new tag data - TagData.assign(newTagData, endTagOffset + endTagDataLength); - return true; - } - - // tag not found, attempt AddTag - else return AddTag(tag, type, value); + \param buffer source byte buffer + \return the (double) value read from the buffer +*/ +API_EXPORT inline double UnpackDouble(char* buffer) { + return UnpackDouble( (const char*)buffer ); } -inline -bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) { - return EditTag(tag, type, (const uint32_t&)value); +/*! \fn double UnpackFloat(const char* buffer) + \brief reads a float value from byte buffer + + \param buffer source byte buffer + \return the (float) value read from the buffer +*/ +API_EXPORT inline float UnpackFloat(const char* buffer) { + union { float value; unsigned char valueBuffer[sizeof(float)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; } -inline -bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) { - - if ( SupportData.HasCoreOnly ) return false; - if ( tag.size() != 2 || type.size() != 1 ) return false; - if ( type == "Z" || type == "H" ) return false; - - // localize the tag data - char* pOriginalTagData = (char*)TagData.data(); - char* pTagData = pOriginalTagData; - const unsigned int originalTagDataLength = TagData.size(); - - unsigned int newTagDataLength = 0; - unsigned int numBytesParsed = 0; - - // if tag found, store data in readGroup, return success - if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { - - // make sure array is more than big enough - char newTagData[originalTagDataLength + sizeof(value)]; - - // copy original tag data up til desired tag - const unsigned int beginningTagDataLength = numBytesParsed; - newTagDataLength += beginningTagDataLength; - memcpy(newTagData, pOriginalTagData, numBytesParsed); - - // copy new VALUE in place of current tag data - union { float value; char valueBuffer[sizeof(float)]; } un; - un.value = value; - memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float)); - - // skip to next tag (if tag for removal is last, return true) - const char* pTagStorageType = pTagData - 1; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; - - // copy everything from current tag (the next one after tag for removal) to end - const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); - const unsigned int endTagOffset = beginningTagDataLength + sizeof(float); - const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; - memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); - - // ensure null-terminator - newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; - - // save new tag data - TagData.assign(newTagData, endTagOffset + endTagDataLength); - return true; - } - - // tag not found, attempt AddTag - else return AddTag(tag, type, value); -} +/*! \fn double UnpackFloat(char* buffer) + \brief reads a float value from byte buffer -// get "NM" tag data - originally contributed by Aaron Quinlan -// stores data in 'editDistance', returns success/fail -inline -bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { - return GetTag("NM", (uint32_t&)editDistance); + This is an overloaded function. + + \param buffer source byte buffer + \return the (float) value read from the buffer +*/ +API_EXPORT inline float UnpackFloat(char* buffer) { + return UnpackFloat( (const char*)buffer ); } -// get "RG" tag data -// stores data in 'readGroup', returns success/fail -inline -bool BamAlignment::GetReadGroup(std::string& readGroup) const { - return GetTag("RG", readGroup); +/*! \fn signed int UnpackSignedInt(const char* buffer) + \brief reads a signed integer value from byte buffer + + \param buffer source byte buffer + \return the (signed int) value read from the buffer +*/ +API_EXPORT inline signed int UnpackSignedInt(const char* buffer) { + union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; } -inline -bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const { +/*! \fn signed int UnpackSignedInt(char* buffer) + \brief reads a signed integer value from byte buffer - // make sure tag data exists - if ( SupportData.HasCoreOnly || TagData.empty() ) - return false; + This is an overloaded function. - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag found, store data in readGroup, return success - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { - const unsigned int dataLength = strlen(pTagData); - destination.clear(); - destination.resize(dataLength); - memcpy( (char*)destination.data(), pTagData, dataLength ); - return true; - } - - // tag not found, return failure - return false; + \param buffer source byte buffer + \return the (signed int) value read from the buffer +*/ +API_EXPORT inline signed int UnpackSignedInt(char* buffer) { + return UnpackSignedInt( (const char*) buffer ); } -inline -bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const { - - // make sure tag data exists - if ( SupportData.HasCoreOnly || TagData.empty() ) - return false; - - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag found, determine data byte-length, store data in readGroup, return success - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { - - // determine data byte-length - const char type = *(pTagData - 1); - int destinationLength = 0; - switch (type) { - // 1 byte data - case 'A': - case 'c': - case 'C': - destinationLength = 1; - break; - - // 2 byte data - case 's': - case 'S': - destinationLength = 2; - break; - - // 4 byte data - case 'i': - case 'I': - destinationLength = 4; - break; - - // unsupported type for integer destination (float or var-length strings) - case 'f': - case 'Z': - case 'H': - fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type); - return false; - - // unknown tag type - default: - fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type); - return false; - } - - // store in destination - destination = 0; - memcpy(&destination, pTagData, destinationLength); - return true; - } - - // tag not found, return failure - return false; +/*! \fn signed short UnpackSignedShort(const char* buffer) + \brief reads a signed short integer value from byte buffer + + \param buffer source byte buffer + \return the (signed short) value read from the buffer +*/ +API_EXPORT inline signed short UnpackSignedShort(const char* buffer) { + union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; } -inline -bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const { - return GetTag(tag, (uint32_t&)destination); -} +/*! \fn signed short UnpackSignedShort(char* buffer) + \brief reads a signed short integer value from byte buffer -inline -bool BamAlignment::GetTag(const std::string& tag, float& destination) const { - - // make sure tag data exists - if ( SupportData.HasCoreOnly || TagData.empty() ) - return false; - - // localize the tag data - char* pTagData = (char*)TagData.data(); - const unsigned int tagDataLength = TagData.size(); - unsigned int numBytesParsed = 0; - - // if tag found, determine data byte-length, store data in readGroup, return success - if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { - //pTagData += numBytesParsed; - - // determine data byte-length - const char type = *(pTagData - 1); - int destinationLength = 0; - switch(type) { - - // 1 byte data - case 'A': - case 'c': - case 'C': - destinationLength = 1; - break; - - // 2 byte data - case 's': - case 'S': - destinationLength = 2; - break; - - // 4 byte data - case 'f': - case 'i': - case 'I': - destinationLength = 4; - break; - - // unsupported type (var-length strings) - case 'Z': - case 'H': - fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type); - return false; - - // unknown tag type - default: - fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type); - return false; - } - - // store in destination - destination = 0.0; - memcpy(&destination, pTagData, destinationLength); - return true; - } - - // tag not found, return failure - return false; + This is an overloaded function. + + \param buffer source byte buffer + \return the (signed short) value read from the buffer +*/ +API_EXPORT inline signed short UnpackSignedShort(char* buffer) { + return UnpackSignedShort( (const char*)buffer ); } -inline -bool BamAlignment::RemoveTag(const std::string& tag) { - - // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed - // also, return false if no data present to remove - if ( SupportData.HasCoreOnly || TagData.empty() ) return false; - - // localize the tag data - char* pOriginalTagData = (char*)TagData.data(); - char* pTagData = pOriginalTagData; - const unsigned int originalTagDataLength = TagData.size(); - unsigned int newTagDataLength = 0; - unsigned int numBytesParsed = 0; - - // if tag found, store data in readGroup, return success - if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { - - char newTagData[originalTagDataLength]; - - // copy original tag data up til desired tag - pTagData -= 3; - numBytesParsed -= 3; - const unsigned int beginningTagDataLength = numBytesParsed; - newTagDataLength += beginningTagDataLength; - memcpy(newTagData, pOriginalTagData, numBytesParsed); - - // skip to next tag (if tag for removal is last, return true) - const char* pTagStorageType = pTagData + 2; - pTagData += 3; - numBytesParsed += 3; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; - - // copy everything from current tag (the next one after tag for removal) to end - const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); - const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; - memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength ); - - // save new tag data - TagData.assign(newTagData, beginningTagDataLength + endTagDataLength); - return true; - } - - // tag not found, no removal - return failure - return false; +/*! \fn unsigned int UnpackUnsignedInt(const char* buffer) + \brief reads an unsigned integer value from byte buffer + + \param buffer source byte buffer + \return the (unsigned int) value read from the buffer +*/ +API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) { + union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; } -inline -bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) { +/*! \fn unsigned int UnpackUnsignedInt(char* buffer) + \brief reads an unsigned integer value from byte buffer - while ( numBytesParsed < tagDataLength ) { + This is an overloaded function. - const char* pTagType = pTagData; - const char* pTagStorageType = pTagData + 2; - pTagData += 3; - numBytesParsed += 3; + \param buffer source byte buffer + \return the (unsigned int) value read from the buffer +*/ +API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) { + return UnpackUnsignedInt( (const char*)buffer ); +} - // check the current tag, return true on match - if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) - return true; +/*! \fn unsigned short UnpackUnsignedShort(const char* buffer) + \brief reads an unsigned short integer value from byte buffer + + \param buffer source byte buffer + \return the (unsigned short) value read from the buffer +*/ +API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) { + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} - // get the storage class and find the next tag - if ( *pTagStorageType == '\0' ) return false; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false; - if ( *pTagData == '\0' ) return false; - } - - // checked all tags, none match - return false; +/*! \fn unsigned short UnpackUnsignedShort(char* buffer) + \brief reads an unsigned short integer value from byte buffer + + This is an overloaded function. + + \param buffer source byte buffer + \return the (unsigned short) value read from the buffer +*/ +API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) { + return UnpackUnsignedShort( (const char*)buffer ); } -inline -bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) { - - switch(storageType) { - - case 'A': - case 'c': - case 'C': - ++numBytesParsed; - ++pTagData; - break; - - case 's': - case 'S': - numBytesParsed += 2; - pTagData += 2; - break; - - case 'f': - case 'i': - case 'I': - numBytesParsed += 4; - pTagData += 4; - break; - - case 'Z': - case 'H': - while(*pTagData) { - ++numBytesParsed; - ++pTagData; - } - // increment for null-terminator - ++numBytesParsed; - ++pTagData; - break; - - default: - // error case - fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType); - return false; +// ---------------------------------------------------------------- +// 'internal' helper structs + +struct RaiiBuffer { + RaiiBuffer(const unsigned int n) + : Buffer( new char[n]() ) + { } + ~RaiiBuffer(void) { + delete[] Buffer; } - - // return success - return true; -} + char* Buffer; +}; } // namespace BamTools