// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 27 July 2010 (DB)\r
+// Last modified: 19 November 2010 (DB)\r
// ---------------------------------------------------------------------------\r
-// Provides the basic constants, data structures, etc. for using BAM files\r
+// Provides the basic constants, data structures, utilities etc. \r
+// used throughout the API for handling BAM files\r
// ***************************************************************************\r
\r
#ifndef BAMAUX_H\r
#define BAMAUX_H\r
\r
-// C inclues\r
-#include <cctype>\r
-#include <cstdio>\r
-#include <cstdlib>\r
-#include <cstring>\r
+#include <api/api_global.h>\r
\r
-// C++ includes\r
-#include <exception>\r
-#include <map>\r
+#include <fstream> \r
+#include <iostream>\r
#include <string>\r
-#include <utility>\r
#include <vector>\r
\r
+// Platform-specific large-file support\r
+#ifndef BAMTOOLS_LFS\r
+#define BAMTOOLS_LFS\r
+ #ifdef WIN32\r
+ #define ftell64(a) _ftelli64(a)\r
+ #define fseek64(a,b,c) _fseeki64(a,b,c)\r
+ #else\r
+ #define ftell64(a) ftello(a)\r
+ #define fseek64(a,b,c) fseeko(a,b,c)\r
+ #endif\r
+#endif // BAMTOOLS_LFS\r
+\r
// Platform-specific type definitions\r
#ifndef BAMTOOLS_TYPES\r
#define BAMTOOLS_TYPES\r
\r
namespace BamTools {\r
\r
+// ----------------------------------------------------------------\r
+// ----------------------------------------------------------------\r
// BAM constants\r
-const int BAM_CORE_SIZE = 32;\r
+\r
const int BAM_CMATCH = 0;\r
const int BAM_CINS = 1;\r
const int BAM_CDEL = 2;\r
const int BAM_CPAD = 6;\r
const int BAM_CIGAR_SHIFT = 4;\r
const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);\r
+const int BAM_CORE_SIZE = 32;\r
+const int BT_SIZEOF_INT = 4;\r
\r
-// BAM index constants\r
-const int MAX_BIN = 37450; // =(8^6-1)/7+1\r
-const int BAM_MIN_CHUNK_GAP = 32768;\r
-const int BAM_LIDX_SHIFT = 14;\r
-\r
-// Explicit variable sizes\r
-const int BT_SIZEOF_INT = 4;\r
-\r
-struct CigarOp;\r
-\r
-struct BamAlignment {\r
-\r
- // constructors & destructor\r
- public:\r
- BamAlignment(void);\r
- BamAlignment(const BamAlignment& other);\r
- ~BamAlignment(void);\r
-\r
- // Queries against alignment flags\r
- public: \r
- bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate \r
- bool IsFailedQC(void) const; // Returns true if this read failed quality control \r
- bool IsFirstMate(void) const; // Returns true if alignment is first mate on read \r
- bool IsMapped(void) const; // Returns true if alignment is mapped \r
- bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped \r
- bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand \r
- bool IsPaired(void) const; // Returns true if alignment part of paired-end read \r
- bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment \r
- bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution \r
- bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand\r
- bool IsSecondMate(void) const; // Returns true if alignment is second mate on read\r
-\r
- // Manipulate alignment flags\r
- public: \r
- void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag \r
- void SetIsFailedQC(bool ok); // Sets "failed quality control" flag \r
- void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag \r
- void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag \r
- void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag \r
- void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag \r
- void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag \r
- void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag \r
- void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag \r
- void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag \r
- void SetIsUnmapped(bool ok); // Sets "alignment is mapped" flag\r
-\r
- // Tag data access methods\r
- public:\r
- // -------------------------------------------------------------------------------------\r
- // N.B. - The following tag-modifying methods may not be used on BamAlignments fetched\r
- // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in \r
- // error message (to keep output clean) but will ALWAYS return false. Only user-\r
- // generated BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid.\r
-\r
- // add tag data (create new TAG entry with TYPE and VALUE)\r
- // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
- // returns true if new data added, false if error or TAG already exists\r
- // N.B. - will NOT modify existing tag. Use EditTag() instead\r
- bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
- bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
- bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
- bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
- \r
- // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)\r
- // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details\r
- // returns true if edit was successfaul, false if error\r
- bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H\r
- bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i\r
- bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i\r
- bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f\r
-\r
- // specific tag data access methods - these only remain for legacy support\r
- bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (implemented as GetTag("NM", editDistance))\r
- bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (implemented as GetTag("RG", readGroup)) \r
- \r
- // generic tag data access methods \r
- bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings \r
- bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data\r
- bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data\r
- bool GetTag(const std::string& tag, float& destination) const; // access floating point data\r
- \r
- // remove tag data\r
- // returns true if removal was successful, false if error\r
- // N.B. - returns false if TAG does not exist (no removal can occur)\r
- bool RemoveTag(const std::string& tag);\r
-\r
- // Additional data access methods\r
- public:\r
- int GetEndPosition(bool usePadded = false) const; // calculates alignment end position, based on starting position and CIGAR operations\r
-\r
- // 'internal' utility methods \r
- private:\r
- static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);\r
- static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);\r
-\r
- // Data members\r
- public:\r
- std::string Name; // Read name\r
- int32_t Length; // Query length\r
- std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)\r
- std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)\r
- std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\r
- std::string TagData; // Tag data (accessor methods will pull the requested information out)\r
- int32_t RefID; // ID number for reference sequence\r
- int32_t Position; // Position (0-based) where alignment starts\r
- uint16_t Bin; // Bin in BAM file where this alignment resides\r
- uint16_t MapQuality; // Mapping quality score\r
- uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate \r
- std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\r
- int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned\r
- int32_t MatePosition; // Position (0-based) where alignment's mate starts\r
- int32_t InsertSize; // Mate-pair insert size\r
- \r
- // internal data\r
- private:\r
- struct BamAlignmentSupportData {\r
- \r
- // data members\r
- std::string AllCharData;\r
- uint32_t BlockLength;\r
- uint32_t NumCigarOperations;\r
- uint32_t QueryNameLength;\r
- uint32_t QuerySequenceLength;\r
- bool HasCoreOnly;\r
- \r
- // constructor\r
- BamAlignmentSupportData(void)\r
- : BlockLength(0)\r
- , NumCigarOperations(0)\r
- , QueryNameLength(0)\r
- , QuerySequenceLength(0)\r
- , HasCoreOnly(false)\r
- { }\r
- };\r
- \r
- // contains raw character data & lengths\r
- BamAlignmentSupportData SupportData; \r
- \r
- // allow these classes access to BamAlignment private members (SupportData)\r
- // but client code should not need to touch this data\r
- friend class BamReader;\r
- friend class BamWriter;\r
-\r
- // Alignment flag query constants\r
- // Use the get/set methods above instead\r
- private:\r
- enum { PAIRED = 1\r
- , PROPER_PAIR = 2\r
- , UNMAPPED = 4\r
- , MATE_UNMAPPED = 8\r
- , REVERSE = 16\r
- , MATE_REVERSE = 32\r
- , READ_1 = 64\r
- , READ_2 = 128\r
- , SECONDARY = 256\r
- , QC_FAILED = 512\r
- , DUPLICATE = 1024 \r
- };\r
-};\r
-\r
// ----------------------------------------------------------------\r
-// Auxiliary data structs & typedefs\r
+// ----------------------------------------------------------------\r
+// Data structs & typedefs\r
\r
-struct CigarOp {\r
+// CIGAR operation data structure\r
+struct API_EXPORT CigarOp {\r
\r
// data members\r
char Type; // Operation type (MIDNSHP)\r
{ }\r
};\r
\r
-struct RefData {\r
+// Reference data entry\r
+struct API_EXPORT RefData {\r
\r
// data members\r
std::string RefName; // Name of reference sequence\r
, RefHasAlignments(ok)\r
{ }\r
};\r
+typedef std::vector<RefData> RefVector;\r
\r
-typedef std::vector<RefData> RefVector;\r
-typedef std::vector<BamAlignment> BamAlignmentVector;\r
-\r
-struct BamRegion {\r
+// General (sequential) genome region\r
+struct API_EXPORT BamRegion {\r
\r
// data members\r
int LeftRefID;\r
, RightRefID(rightID)\r
, RightPosition(rightPos)\r
{ }\r
+ \r
+ // copy constructor\r
+ BamRegion(const BamRegion& other)\r
+ : LeftRefID(other.LeftRefID)\r
+ , LeftPosition(other.LeftPosition)\r
+ , RightRefID(other.RightRefID)\r
+ , RightPosition(other.RightPosition)\r
+ { }\r
+ \r
+ // member functions\r
+ void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }\r
+ bool isLeftBoundSpecified(void) const { return ( LeftRefID >= 0 && LeftPosition >= 0 ); }\r
+ bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }\r
+ bool isRightBoundSpecified(void) const { return ( RightRefID >= 0 && RightPosition >= 0 ); }\r
};\r
\r
// ----------------------------------------------------------------\r
-// Added: 3-35-2010 DWB\r
-// Fixed: Routines to provide endian-correctness\r
// ----------------------------------------------------------------\r
+// General utilities \r
\r
// returns true if system is big endian\r
inline bool SystemIsBigEndian(void) {\r
SwapEndian_64(value);\r
}\r
\r
-// ----------------------------------------------------------------\r
-// BamAlignment member methods\r
-\r
-// constructors & destructor\r
-inline BamAlignment::BamAlignment(void) { }\r
-\r
-inline BamAlignment::BamAlignment(const BamAlignment& other)\r
- : Name(other.Name)\r
- , Length(other.Length)\r
- , QueryBases(other.QueryBases)\r
- , AlignedBases(other.AlignedBases)\r
- , Qualities(other.Qualities)\r
- , TagData(other.TagData)\r
- , RefID(other.RefID)\r
- , Position(other.Position)\r
- , Bin(other.Bin)\r
- , MapQuality(other.MapQuality)\r
- , AlignmentFlag(other.AlignmentFlag)\r
- , CigarData(other.CigarData)\r
- , MateRefID(other.MateRefID)\r
- , MatePosition(other.MatePosition)\r
- , InsertSize(other.InsertSize)\r
- , SupportData(other.SupportData)\r
-{ }\r
-\r
-inline BamAlignment::~BamAlignment(void) { }\r
-\r
-// Queries against alignment flags\r
-inline bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }\r
-inline bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }\r
-inline bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }\r
-inline bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }\r
-inline bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }\r
-inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }\r
-inline bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }\r
-inline bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }\r
-inline bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }\r
-inline bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }\r
-inline bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }\r
-\r
-// Manipulate alignment flags \r
-inline void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }\r
-inline void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }\r
-inline void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }\r
-inline void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }\r
-inline void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }\r
-inline void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }\r
-inline void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }\r
-inline void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }\r
-inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }\r
-inline void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }\r
-inline void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }\r
-\r
-// calculates alignment end position, based on starting position and CIGAR operations\r
-inline \r
-int BamAlignment::GetEndPosition(bool usePadded) const {\r
-\r
- // initialize alignment end to starting position\r
- int alignEnd = Position;\r
-\r
- // iterate over cigar operations\r
- std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();\r
- std::vector<CigarOp>::const_iterator cigarEnd = CigarData.end();\r
- for ( ; cigarIter != cigarEnd; ++cigarIter) {\r
- const char cigarType = (*cigarIter).Type;\r
- if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {\r
- alignEnd += (*cigarIter).Length;\r
- } \r
- else if ( usePadded && cigarType == 'I' ) {\r
- alignEnd += (*cigarIter).Length;\r
- }\r
- }\r
- return alignEnd;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type != "Z" && type != "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag already exists, return false\r
- // use EditTag explicitly instead\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
- \r
- // otherwise, copy tag data to temp buffer\r
- std::string newTag = tag + type + value;\r
- const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term\r
- char originalTagData[newTagDataLength];\r
- memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
- \r
- // append newTag\r
- strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term\r
- \r
- // store temp buffer back in TagData\r
- const char* newTagData = (const char*)originalTagData;\r
- TagData.assign(newTagData, newTagDataLength);\r
- \r
- // return success\r
- return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type == "f" || type == "Z" || type == "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag already exists, return false\r
- // use EditTag explicitly instead\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
- \r
- // otherwise, convert value to string\r
- union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
- un.value = value;\r
-\r
- // copy original tag data to temp buffer\r
- std::string newTag = tag + type;\r
- const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer\r
- char originalTagData[newTagDataLength];\r
- memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
- \r
- // append newTag\r
- strcat(originalTagData + tagDataLength, newTag.data());\r
- memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));\r
- \r
- // store temp buffer back in TagData\r
- const char* newTagData = (const char*)originalTagData;\r
- TagData.assign(newTagData, newTagDataLength);\r
- \r
- // return success\r
- return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
- return AddTag(tag, type, (const uint32_t&)value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type == "Z" || type == "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag already exists, return false\r
- // use EditTag explicitly instead\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;\r
- \r
- // otherwise, convert value to string\r
- union { float value; char valueBuffer[sizeof(float)]; } un;\r
- un.value = value;\r
-\r
- // copy original tag data to temp buffer\r
- std::string newTag = tag + type;\r
- const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float\r
- char originalTagData[newTagDataLength];\r
- memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term\r
- \r
- // append newTag\r
- strcat(originalTagData + tagDataLength, newTag.data());\r
- memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));\r
- \r
- // store temp buffer back in TagData\r
- const char* newTagData = (const char*)originalTagData;\r
- TagData.assign(newTagData, newTagDataLength);\r
- \r
- // return success\r
- return true;\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type != "Z" && type != "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pOriginalTagData = (char*)TagData.data();\r
- char* pTagData = pOriginalTagData;\r
- const unsigned int originalTagDataLength = TagData.size();\r
- \r
- unsigned int newTagDataLength = 0;\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
- \r
- // make sure array is more than big enough\r
- char newTagData[originalTagDataLength + value.size()]; \r
-\r
- // copy original tag data up til desired tag\r
- const unsigned int beginningTagDataLength = numBytesParsed;\r
- newTagDataLength += beginningTagDataLength;\r
- memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
- \r
- // copy new VALUE in place of current tag data\r
- const unsigned int dataLength = strlen(value.c_str());\r
- memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );\r
- \r
- // skip to next tag (if tag for removal is last, return true) \r
- const char* pTagStorageType = pTagData - 1;\r
- if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
- \r
- // copy everything from current tag (the next one after tag for removal) to end\r
- const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
- const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;\r
- const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
- memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
- \r
- // ensure null-terminator\r
- newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
- \r
- // save new tag data\r
- TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
- return true;\r
- }\r
- \r
- // tag not found, attempt AddTag\r
- else return AddTag(tag, type, value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type == "f" || type == "Z" || type == "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pOriginalTagData = (char*)TagData.data();\r
- char* pTagData = pOriginalTagData;\r
- const unsigned int originalTagDataLength = TagData.size();\r
- \r
- unsigned int newTagDataLength = 0;\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
- \r
- // make sure array is more than big enough\r
- char newTagData[originalTagDataLength + sizeof(value)]; \r
-\r
- // copy original tag data up til desired tag\r
- const unsigned int beginningTagDataLength = numBytesParsed;\r
- newTagDataLength += beginningTagDataLength;\r
- memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
- \r
- // copy new VALUE in place of current tag data\r
- union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;\r
- un.value = value;\r
- memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));\r
- \r
- // skip to next tag (if tag for removal is last, return true) \r
- const char* pTagStorageType = pTagData - 1;\r
- if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
- \r
- // copy everything from current tag (the next one after tag for removal) to end\r
- const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
- const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);\r
- const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
- memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
- \r
- // ensure null-terminator\r
- newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
- \r
- // save new tag data\r
- TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
- return true;\r
- }\r
- \r
- // tag not found, attempt AddTag\r
- else return AddTag(tag, type, value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {\r
- return EditTag(tag, type, (const uint32_t&)value);\r
-}\r
-\r
-inline\r
-bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {\r
- \r
- if ( SupportData.HasCoreOnly ) return false;\r
- if ( tag.size() != 2 || type.size() != 1 ) return false;\r
- if ( type == "Z" || type == "H" ) return false;\r
- \r
- // localize the tag data\r
- char* pOriginalTagData = (char*)TagData.data();\r
- char* pTagData = pOriginalTagData;\r
- const unsigned int originalTagDataLength = TagData.size();\r
- \r
- unsigned int newTagDataLength = 0;\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
- \r
- // make sure array is more than big enough\r
- char newTagData[originalTagDataLength + sizeof(value)]; \r
-\r
- // copy original tag data up til desired tag\r
- const unsigned int beginningTagDataLength = numBytesParsed;\r
- newTagDataLength += beginningTagDataLength;\r
- memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
- \r
- // copy new VALUE in place of current tag data\r
- union { float value; char valueBuffer[sizeof(float)]; } un;\r
- un.value = value;\r
- memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));\r
- \r
- // skip to next tag (if tag for removal is last, return true) \r
- const char* pTagStorageType = pTagData - 1;\r
- if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
- \r
- // copy everything from current tag (the next one after tag for removal) to end\r
- const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
- const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);\r
- const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
- memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);\r
- \r
- // ensure null-terminator\r
- newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;\r
- \r
- // save new tag data\r
- TagData.assign(newTagData, endTagOffset + endTagDataLength);\r
- return true;\r
- }\r
- \r
- // tag not found, attempt AddTag\r
- else return AddTag(tag, type, value);\r
-}\r
-\r
-// get "NM" tag data - originally contributed by Aaron Quinlan\r
-// stores data in 'editDistance', returns success/fail\r
-inline \r
-bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { \r
- return GetTag("NM", (uint32_t&)editDistance);\r
-}\r
-\r
-// get "RG" tag data\r
-// stores data in 'readGroup', returns success/fail\r
-inline \r
-bool BamAlignment::GetReadGroup(std::string& readGroup) const {\r
- return GetTag("RG", readGroup);\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {\r
-\r
- // make sure tag data exists\r
- if ( SupportData.HasCoreOnly || TagData.empty() ) \r
- return false;\r
-\r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
- const unsigned int dataLength = strlen(pTagData);\r
- destination.clear();\r
- destination.resize(dataLength);\r
- memcpy( (char*)destination.data(), pTagData, dataLength );\r
- return true;\r
- }\r
- \r
- // tag not found, return failure\r
- return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {\r
- \r
- // make sure tag data exists\r
- if ( SupportData.HasCoreOnly || TagData.empty() ) \r
- return false;\r
-\r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, determine data byte-length, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
- \r
- // determine data byte-length\r
- const char type = *(pTagData - 1);\r
- int destinationLength = 0;\r
- switch (type) {\r
- // 1 byte data\r
- case 'A':\r
- case 'c':\r
- case 'C':\r
- destinationLength = 1;\r
- break;\r
-\r
- // 2 byte data\r
- case 's':\r
- case 'S':\r
- destinationLength = 2;\r
- break;\r
-\r
- // 4 byte data\r
- case 'i':\r
- case 'I':\r
- destinationLength = 4;\r
- break;\r
-\r
- // unsupported type for integer destination (float or var-length strings)\r
- case 'f':\r
- case 'Z':\r
- case 'H':\r
- fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);\r
- return false;\r
-\r
- // unknown tag type\r
- default:\r
- fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
- return false;\r
- }\r
- \r
- // store in destination\r
- destination = 0;\r
- memcpy(&destination, pTagData, destinationLength);\r
- return true;\r
- }\r
- \r
- // tag not found, return failure\r
- return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {\r
- return GetTag(tag, (uint32_t&)destination);\r
-}\r
-\r
-inline\r
-bool BamAlignment::GetTag(const std::string& tag, float& destination) const {\r
- \r
- // make sure tag data exists\r
- if ( SupportData.HasCoreOnly || TagData.empty() ) \r
- return false;\r
-\r
- // localize the tag data\r
- char* pTagData = (char*)TagData.data();\r
- const unsigned int tagDataLength = TagData.size();\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, determine data byte-length, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {\r
- //pTagData += numBytesParsed;\r
- \r
- // determine data byte-length\r
- const char type = *(pTagData - 1);\r
- int destinationLength = 0;\r
- switch(type) {\r
-\r
- // 1 byte data\r
- case 'A':\r
- case 'c':\r
- case 'C':\r
- destinationLength = 1;\r
- break;\r
-\r
- // 2 byte data\r
- case 's':\r
- case 'S':\r
- destinationLength = 2;\r
- break;\r
-\r
- // 4 byte data\r
- case 'f':\r
- case 'i':\r
- case 'I':\r
- destinationLength = 4;\r
- break;\r
- \r
- // unsupported type (var-length strings)\r
- case 'Z':\r
- case 'H':\r
- fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);\r
- return false;\r
-\r
- // unknown tag type\r
- default:\r
- fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);\r
- return false;\r
- }\r
- \r
- // store in destination\r
- destination = 0.0;\r
- memcpy(&destination, pTagData, destinationLength);\r
- return true;\r
- }\r
- \r
- // tag not found, return failure\r
- return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::RemoveTag(const std::string& tag) {\r
- \r
- // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed\r
- // also, return false if no data present to remove\r
- if ( SupportData.HasCoreOnly || TagData.empty() ) return false;\r
- \r
- // localize the tag data\r
- char* pOriginalTagData = (char*)TagData.data();\r
- char* pTagData = pOriginalTagData;\r
- const unsigned int originalTagDataLength = TagData.size();\r
- unsigned int newTagDataLength = 0;\r
- unsigned int numBytesParsed = 0;\r
- \r
- // if tag found, store data in readGroup, return success\r
- if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {\r
- \r
- char newTagData[originalTagDataLength];\r
-\r
- // copy original tag data up til desired tag\r
- pTagData -= 3;\r
- numBytesParsed -= 3;\r
- const unsigned int beginningTagDataLength = numBytesParsed;\r
- newTagDataLength += beginningTagDataLength;\r
- memcpy(newTagData, pOriginalTagData, numBytesParsed);\r
- \r
- // skip to next tag (if tag for removal is last, return true) \r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
- if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;\r
- \r
- // copy everything from current tag (the next one after tag for removal) to end\r
- const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);\r
- const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;\r
- memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );\r
- \r
- // save new tag data\r
- TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);\r
- return true;\r
- }\r
- \r
- // tag not found, no removal - return failure\r
- return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {\r
-\r
- while ( numBytesParsed < tagDataLength ) {\r
-\r
- const char* pTagType = pTagData;\r
- const char* pTagStorageType = pTagData + 2;\r
- pTagData += 3;\r
- numBytesParsed += 3;\r
-\r
- // check the current tag, return true on match\r
- if ( std::strncmp(pTagType, tag.c_str(), 2) == 0 ) \r
- return true;\r
-\r
- // get the storage class and find the next tag\r
- if ( *pTagStorageType == '\0' ) return false; \r
- if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;\r
- if ( *pTagData == '\0' ) return false;\r
- }\r
- \r
- // checked all tags, none match\r
- return false;\r
-}\r
-\r
-inline\r
-bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {\r
- \r
- switch(storageType) {\r
-\r
- case 'A':\r
- case 'c':\r
- case 'C':\r
- ++numBytesParsed;\r
- ++pTagData;\r
- break;\r
-\r
- case 's':\r
- case 'S':\r
- numBytesParsed += 2;\r
- pTagData += 2;\r
- break;\r
-\r
- case 'f':\r
- case 'i':\r
- case 'I':\r
- numBytesParsed += 4;\r
- pTagData += 4;\r
- break;\r
-\r
- case 'Z':\r
- case 'H':\r
- while(*pTagData) {\r
- ++numBytesParsed;\r
- ++pTagData;\r
- }\r
- // increment for null-terminator\r
- ++numBytesParsed;\r
- ++pTagData;\r
- break;\r
-\r
- default: \r
- // error case\r
- fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);\r
- return false;\r
- }\r
- \r
- // return success\r
- return true;\r
+// returns whether file exists (can be opened OK)\r
+inline bool FileExists(const std::string& filename) {\r
+ std::ifstream f(filename.c_str(), std::ifstream::in);\r
+ return !f.fail();\r
}\r
\r
} // namespace BamTools\r