4 // Marth Lab, Boston College
5 // Last modified: 20 March 2009
15 typedef unsigned char uint8_t;
16 typedef short int16_t;
17 typedef unsigned short uint16_t;
19 typedef unsigned int uint32_t;
20 typedef long long int64_t;
21 typedef unsigned long long uint64_t;
40 unsigned int RefLength;
41 bool RefHasAlignments;
46 , RefHasAlignments(false)
50 typedef vector<RefData> RefVector;
54 // queries against alignment flag - see below for further detail
56 bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
57 bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
58 bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
59 bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
60 bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
61 bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
62 bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
63 bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
64 bool IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
65 bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
66 bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
68 // returns true and assigns the read group if present in the tag data
69 bool GetReadGroup(string& readGroup) const {
71 if(TagData.empty()) return false;
73 // localize the tag data
74 char* pTagData = (char*)TagData.data();
75 const unsigned int tagDataLen = TagData.size();
76 unsigned int numBytesParsed = 0;
78 bool foundReadGroupTag = false;
79 while(numBytesParsed < tagDataLen) {
81 const char* pTagType = pTagData;
82 const char* pTagStorageType = pTagData + 2;
86 // check the current tag
87 if(strncmp(pTagType, "RG", 2) == 0) {
88 foundReadGroupTag = true;
92 // get the storage class and find the next tag
93 SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed);
96 // return if the read group tag was not present
97 if(!foundReadGroupTag) return false;
99 // assign the read group
100 const unsigned int readGroupLen = strlen(pTagData);
101 readGroup.resize(readGroupLen);
102 memcpy((char*)readGroup.data(), pTagData, readGroupLen);
106 // skips to the next tag
107 static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
108 switch(storageType) {
134 printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);
141 string Name; // read name
142 unsigned int Length; // query length
143 string QueryBases; // original sequence ( produced from machine )
144 string AlignedBases; // aligned sequence ( with indels )
145 string Qualities; // FASTQ qualities ( still in ASCII characters )
146 string TagData; // contains the tag data (accessor methods will pull the requested information out)
147 unsigned int RefID; // ID for reference sequence
148 unsigned int Position; // position on reference sequence where alignment starts
149 unsigned int Bin; // bin in BAM file where this alignment resides
150 unsigned int MapQuality; // mapping quality
151 unsigned int AlignmentFlag; // see above for available queries
152 vector<CigarOp> CigarData; // vector of CIGAR operations (length & type) )
153 unsigned int MateRefID; // ID for reference sequence that mate was aligned to
154 unsigned int MatePosition; // position that mate was aligned to
155 unsigned int InsertSize; // mate pair insert size
157 // alignment flag query constants
159 enum { PAIRED = 1, // Alignment comes from paired-end data
160 PROPER_PAIR = 2, // Alignment passed paired-end resolution
161 UNMAPPED = 4, // Read is unmapped
162 MATE_UNMAPPED = 8, // Mate is unmapped
163 REVERSE = 16, // Read is on reverse strand
164 MATE_REVERSE = 32, // Mate is on reverse strand
165 READ_1 = 64, // This alignment is mate 1 of pair
166 READ_2 = 128, // This alignment is mate 2 of pair
167 SECONDARY = 256, // This alignment is not the primary (best) alignment for read
168 QC_FAILED = 512, // Read did not pass prior quality control steps
169 DUPLICATE = 1024 // Read is PCR duplicate
173 // commonly used vector in this library
174 typedef vector< BamAlignment > BamAlignmentVector;
176 #endif /* BAMALIGNMENT_H */