// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 19 September 2010 (DB)
+// Last modified: 22 December 2010 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
+#include <api/BamAlignment.h>
+using namespace BamTools;
+
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <exception>
#include <map>
#include <utility>
-#include "BamAlignment.h"
-using namespace BamTools;
using namespace std;
+const char* DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
+
// default ctor
BamAlignment::BamAlignment(void)
: RefID(-1)
void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
+void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); }
+void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); }
void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
+void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); }
void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
+// fills out character data
+bool BamAlignment::BuildCharData(void) {
+
+ // skip if char data already parsed
+ if ( !SupportData.HasCoreOnly ) return true;
+
+ // check system endianness
+ bool IsBigEndian = BamTools::SystemIsBigEndian();
+
+ // calculate character lengths/offsets
+ const unsigned int dataLength = SupportData.BlockLength - BAM_CORE_SIZE;
+ const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
+ const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
+ const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
+ const unsigned int tagDataLength = dataLength - tagDataOffset;
+
+ // check offsets to see what char data exists
+ const bool hasSeqData = ( seqDataOffset < dataLength );
+ const bool hasQualData = ( qualDataOffset < dataLength );
+ const bool hasTagData = ( tagDataOffset < dataLength );
+
+ // set up char buffers
+ const char* allCharData = SupportData.AllCharData.data();
+ const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
+ const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
+ char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
+
+ // store alignment name (relies on null char in name as terminator)
+ Name.assign((const char*)(allCharData));
+
+ // save query sequence
+ QueryBases.clear();
+ if ( hasSeqData ) {
+ QueryBases.reserve(SupportData.QuerySequenceLength);
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
+ char singleBase = DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+ QueryBases.append(1, singleBase);
+ }
+ }
+
+ // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
+ Qualities.clear();
+ if ( hasQualData ) {
+ Qualities.reserve(SupportData.QuerySequenceLength);
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
+ char singleQuality = (char)(qualData[i]+33);
+ Qualities.append(1, singleQuality);
+ }
+ }
+
+ // clear previous AlignedBases
+ AlignedBases.clear();
+
+ // if QueryBases has data, build AlignedBases using CIGAR data
+ // otherwise, AlignedBases will remain empty (this case IS allowed)
+ if ( !QueryBases.empty() ) {
+
+ // resize AlignedBases
+ AlignedBases.reserve(SupportData.QuerySequenceLength);
+
+ // iterate over CigarOps
+ int k = 0;
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+
+ const CigarOp& op = (*cigarIter);
+ switch(op.Type) {
+
+ // for 'M', 'I' - write bases
+ case ('M') :
+ case ('I') :
+ AlignedBases.append(QueryBases.substr(k, op.Length));
+ // fall through
+
+ // for 'S' - soft clip, do not write bases
+ // but increment placeholder 'k'
+ case ('S') :
+ k += op.Length;
+ break;
+
+ // for 'D' - write gap character
+ case ('D') :
+ AlignedBases.append(op.Length, '-');
+ break;
+
+ // for 'P' - write padding character
+ case ('P') :
+ AlignedBases.append( op.Length, '*' );
+ break;
+
+ // for 'N' - write N's, skip bases in original query sequence
+ case ('N') :
+ AlignedBases.append( op.Length, 'N' );
+ break;
+
+ // for 'H' - hard clip, do nothing to AlignedBases, move to next op
+ case ('H') :
+ break;
+
+ // shouldn't get here
+ default:
+ fprintf(stderr, "ERROR: Invalid Cigar op type\n");
+ exit(1);
+ }
+ }
+ }
+
+ // save tag data
+ TagData.clear();
+ if ( hasTagData ) {
+ if ( IsBigEndian ) {
+ int i = 0;
+ while ( (unsigned int)i < tagDataLength ) {
+
+ i += 2; // skip tagType chars (e.g. "RG", "NM", etc.)
+ uint8_t type = toupper(tagData[i]); // lower & upper case letters have same meaning
+ ++i; // skip valueType char (e.g. 'A', 'I', 'Z', etc.)
+
+ switch (type) {
+
+ case('A') :
+ case('C') :
+ ++i;
+ break;
+
+ case('S') :
+ SwapEndian_16p(&tagData[i]);
+ i += sizeof(uint16_t);
+ break;
+
+ case('F') :
+ case('I') :
+ SwapEndian_32p(&tagData[i]);
+ i += sizeof(uint32_t);
+ break;
+
+ case('D') :
+ SwapEndian_64p(&tagData[i]);
+ i += sizeof(uint64_t);
+ break;
+
+ case('H') :
+ case('Z') :
+ while (tagData[i]) { ++i; }
+ ++i; // increment one more for null terminator
+ break;
+
+ // shouldn't get here
+ default :
+ fprintf(stderr, "ERROR: Invalid tag value type\n");
+ exit(1);
+ }
+ }
+ }
+
+ // store tagData in alignment
+ TagData.resize(tagDataLength);
+ memcpy((char*)TagData.data(), tagData, tagDataLength);
+ }
+
+ // clear the core-only flag
+ SupportData.HasCoreOnly = false;
+
+ // return success
+ return true;
+}
+
// calculates alignment end position, based on starting position and CIGAR operations
int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
const char type = *(pTagData - 1);
int destinationLength = 0;
switch (type) {
+
// 1 byte data
case 'A':
case 'c':
return false;
}
-bool BamAlignment::FindTag(const string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed) {
+bool BamAlignment::FindTag(const string& tag,
+ char* &pTagData,
+ const unsigned int& tagDataLength,
+ unsigned int& numBytesParsed)
+{
while ( numBytesParsed < tagDataLength ) {
// return success
return true;
-}
\ No newline at end of file
+}