- // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
- // ( resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code )
- else {
-
- // calculate char lengths
- const unsigned int nameLength = al.Name.size() + 1;
- const unsigned int numCigarOperations = al.CigarData.size();
- const unsigned int queryLength = al.QueryBases.size();
- const unsigned int tagDataLength = al.TagData.size();
-
- // no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
- // force calculation of Bin before storing
- const int endPosition = al.GetEndPosition();
- const unsigned int alignmentBin = CalculateMinimumBin(al.Position, endPosition);
-
- // create our packed cigar string
- string packedCigar;
- CreatePackedCigar(al.CigarData, packedCigar);
- const unsigned int packedCigarLength = packedCigar.size();
-
- // encode the query
- string encodedQuery;
- EncodeQuerySequence(al.QueryBases, encodedQuery);
- const unsigned int encodedQueryLength = encodedQuery.size();
-
- // write the block size
- const unsigned int dataBlockSize = nameLength + packedCigarLength + encodedQueryLength + queryLength + tagDataLength;
- unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
- if (IsBigEndian) SwapEndian_32(blockSize);
- mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
-
- // assign the BAM core data
- uint32_t buffer[8];
- buffer[0] = al.RefID;
- buffer[1] = al.Position;
- buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
- buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
- buffer[4] = queryLength;
- buffer[5] = al.MateRefID;
- buffer[6] = al.MatePosition;
- buffer[7] = al.InsertSize;
-
- // swap BAM core endian-ness, if necessary
- if ( IsBigEndian ) {
- for ( int i = 0; i < 8; ++i )
- SwapEndian_32(buffer[i]);
- }
-
- // write the BAM core
- mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
-
- // write the query name
- mBGZF.Write(al.Name.c_str(), nameLength);
-
- // write the packed cigar
- if ( IsBigEndian ) {
-
- char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
- memcpy(cigarData, packedCigar.data(), packedCigarLength);
-
- for (unsigned int i = 0; i < packedCigarLength; ++i) {
- if ( IsBigEndian )
- SwapEndian_32p(&cigarData[i]);
- }
-
- mBGZF.Write(cigarData, packedCigarLength);
- free(cigarData);
- }
- else
- mBGZF.Write(packedCigar.data(), packedCigarLength);
-
- // write the encoded query sequence
- mBGZF.Write(encodedQuery.data(), encodedQueryLength);
-
- // write the base qualities
- string baseQualities(al.Qualities);
- char* pBaseQualities = (char*)al.Qualities.data();
- for(unsigned int i = 0; i < queryLength; i++) {
- pBaseQualities[i] -= 33;
- }
- mBGZF.Write(pBaseQualities, queryLength);
-
- // write the read group tag
- if ( IsBigEndian ) {
-
- char* tagData = (char*)calloc(sizeof(char), tagDataLength);
- memcpy(tagData, al.TagData.data(), tagDataLength);
-
- int i = 0;
- while ( (unsigned int)i < tagDataLength ) {
-
- i += 2; // skip tag type (e.g. "RG", "NM", etc)
- uint8_t type = toupper(tagData[i]); // lower & upper case letters have same meaning
- ++i; // skip value type
-
- switch (type) {
-
- case('A') :
- case('C') :
- ++i;
- break;
-
- case('S') :
- SwapEndian_16p(&tagData[i]);
- i+=2; // sizeof(uint16_t)
- break;
-
- case('F') :
- case('I') :
- SwapEndian_32p(&tagData[i]);
- i+=4; // sizeof(uint32_t)
- break;
-
- case('D') :
- SwapEndian_64p(&tagData[i]);
- i+=8; // sizeof(uint64_t)
- break;
-
- case('H') :
- case('Z') :
- while (tagData[i]) { ++i; }
- ++i; // increment one more for null terminator
- break;
-
- default :
- fprintf(stderr, "ERROR: Invalid tag value type\n"); // shouldn't get here
- free(tagData);
- exit(1);
- }
- }
-
- mBGZF.Write(tagData, tagDataLength);
- free(tagData);
- }
- else
- mBGZF.Write(al.TagData.data(), tagDataLength);
- }