-// fills out character data for BamAlignment data
-bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {
-
- // calculate character lengths/offsets
- const unsigned int dataLength = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;
- const unsigned int seqDataOffset = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);
- const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;
- const unsigned int tagDataOffset = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;
- const unsigned int tagDataLength = dataLength - tagDataOffset;
-
- // set up char buffers
- const char* allCharData = bAlignment.SupportData.AllCharData.data();
- const char* seqData = ((const char*)allCharData) + seqDataOffset;
- const char* qualData = ((const char*)allCharData) + qualDataOffset;
- char* tagData = ((char*)allCharData) + tagDataOffset;
-
- // store alignment name (relies on null char in name as terminator)
- bAlignment.Name.assign((const char*)(allCharData));
-
- // save query sequence
- bAlignment.QueryBases.clear();
- bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);
- for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
- char singleBase = DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
- bAlignment.QueryBases.append(1, singleBase);
- }
-
- // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
- bAlignment.Qualities.clear();
- bAlignment.Qualities.reserve(bAlignment.SupportData.QuerySequenceLength);
- for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
- char singleQuality = (char)(qualData[i]+33);
- bAlignment.Qualities.append(1, singleQuality);
- }
-
- // if QueryBases is empty (and this is a allowed case)
- if ( bAlignment.QueryBases.empty() )
- bAlignment.AlignedBases = bAlignment.QueryBases;
-
- // if QueryBases contains data, then build AlignedBases using CIGAR data
- else {
-
- // resize AlignedBases
- bAlignment.AlignedBases.clear();
- bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);
-
- // iterate over CigarOps
- int k = 0;
- vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
- vector<CigarOp>::const_iterator cigarEnd = bAlignment.CigarData.end();
- for ( ; cigarIter != cigarEnd; ++cigarIter ) {
-
- const CigarOp& op = (*cigarIter);
- switch(op.Type) {
-
- case ('M') :
- case ('I') :
- bAlignment.AlignedBases.append(bAlignment.QueryBases.substr(k, op.Length)); // for 'M', 'I' - write bases
- // fall through
-
- case ('S') :
- k += op.Length; // for 'S' - soft clip, skip over query bases
- break;
-
- case ('D') :
- bAlignment.AlignedBases.append(op.Length, '-'); // for 'D' - write gap character
- break;
-
- case ('P') :
- bAlignment.AlignedBases.append( op.Length, '*' ); // for 'P' - write padding character
- break;
-
- case ('N') :
- bAlignment.AlignedBases.append( op.Length, 'N' ); // for 'N' - write N's, skip bases in original query sequence
- break;
-
- case ('H') :
- break; // for 'H' - hard clip, do nothing to AlignedBases, move to next op
-
- default:
- fprintf(stderr, "ERROR: Invalid Cigar op type\n"); // shouldn't get here
- exit(1);
- }
- }
- }
-
- // -----------------------
- // Added: 3-25-2010 DB
- // Fixed: endian-correctness for tag data
- // -----------------------
- if ( IsBigEndian ) {
- int i = 0;
- while ( (unsigned int)i < tagDataLength ) {
-
- i += 2; // skip tag type (e.g. "RG", "NM", etc)
- uint8_t type = toupper(tagData[i]); // lower & upper case letters have same meaning
- ++i; // skip value type
-
- switch (type) {
-
- case('A') :
- case('C') :
- ++i;
- break;
-
- case('S') :
- SwapEndian_16p(&tagData[i]);
- i += sizeof(uint16_t);
- break;
-
- case('F') :
- case('I') :
- SwapEndian_32p(&tagData[i]);
- i += sizeof(uint32_t);
- break;
-
- case('D') :
- SwapEndian_64p(&tagData[i]);
- i += sizeof(uint64_t);
- break;
-
- case('H') :
- case('Z') :
- while (tagData[i]) { ++i; }
- ++i; // increment one more for null terminator
- break;
-
- default :
- fprintf(stderr, "ERROR: Invalid tag value type\n"); // shouldn't get here
- exit(1);
- }
- }
- }
-
- // store TagData
- bAlignment.TagData.clear();
- bAlignment.TagData.resize(tagDataLength);
- memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);
-
- // clear the core-only flag
- bAlignment.SupportData.HasCoreOnly = false;
-
- // return success
- return true;