]> git.donarmstrong.com Git - bamtools.git/commitdiff
Moved BamAlignmentSupportData into BamAlignment data type. This continues the read...
authorDerek <derekwbarnett@gmail.com>
Thu, 10 Jun 2010 04:50:37 +0000 (00:50 -0400)
committerDerek <derekwbarnett@gmail.com>
Thu, 10 Jun 2010 04:50:37 +0000 (00:50 -0400)
BamAux.h
BamReader.cpp
BamReader.h
BamWriter.cpp
BamWriter.h
bamtools_merge.cpp

index 46592497888843f7e14295f62290ecad73fa620f..5ef34fe626af240769ca2f43dcf657662a67c675 100644 (file)
--- a/BamAux.h
+++ b/BamAux.h
@@ -133,6 +133,28 @@ struct BamAlignment {
         int32_t      MateRefID;         // ID number for reference sequence where alignment's mate was aligned\r
         int32_t      MatePosition;      // Position (0-based) where alignment's mate starts\r
         int32_t      InsertSize;        // Mate-pair insert size\r
+        \r
+        struct BamAlignmentSupportData {\r
+      \r
+            // data members\r
+            std::string AllCharData;\r
+            uint32_t    BlockLength;\r
+            uint32_t    NumCigarOperations;\r
+            uint32_t    QueryNameLength;\r
+            uint32_t    QuerySequenceLength;\r
+            bool        IsParsed;\r
+            \r
+            // constructor\r
+            BamAlignmentSupportData(void)\r
+                : BlockLength(0)\r
+                , NumCigarOperations(0)\r
+                , QueryNameLength(0)\r
+                , QuerySequenceLength(0)\r
+                , IsParsed(false)\r
+            { }\r
+        };\r
+        \r
+        BamAlignmentSupportData SupportData;  // Contains raw character data & lengths \r
 \r
     // Alignment flag query constants\r
     // Use the get/set methods above instead\r
@@ -154,24 +176,6 @@ struct BamAlignment {
 // ----------------------------------------------------------------\r
 // Auxiliary data structs & typedefs\r
 \r
-struct BamAlignmentSupportData {\r
-      \r
-    // data members\r
-    std::string AllCharData;\r
-    uint32_t    BlockLength;\r
-    uint32_t    NumCigarOperations;\r
-    uint32_t    QueryNameLength;\r
-    uint32_t    QuerySequenceLength;\r
-    \r
-    // constructor\r
-    BamAlignmentSupportData(void)\r
-        : BlockLength(0)\r
-        , NumCigarOperations(0)\r
-        , QueryNameLength(0)\r
-        , QuerySequenceLength(0)\r
-    { }\r
-};\r
-\r
 struct CigarOp {\r
   \r
     // data members\r
index 53c32e9e1db6bf144a6c8ba6edec0a7ab2cc9d40..e5265e0ca0446318ee9e3955a4db2defbe3aefa4 100644 (file)
@@ -69,7 +69,7 @@ struct BamReader::BamReaderPrivate {
 \r
     // access alignment data\r
     bool GetNextAlignment(BamAlignment& bAlignment);\r
-    bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
+    bool GetNextAlignmentCore(BamAlignment& bAlignment);\r
 \r
     // access auxiliary data\r
     int GetReferenceID(const string& refName) const;\r
@@ -86,7 +86,7 @@ struct BamReader::BamReaderPrivate {
     // calculate bins that overlap region ( left to reference end for now )\r
     int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);\r
     // fills out character data for BamAlignment data\r
-    bool BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData);\r
+    bool BuildCharData(BamAlignment& bAlignment);\r
     // calculate file offset for first alignment chunk overlapping 'left'\r
     int64_t GetOffset(int refID, int left);\r
     // checks to see if alignment overlaps current region\r
@@ -94,7 +94,7 @@ struct BamReader::BamReaderPrivate {
     // retrieves header text from BAM file\r
     void LoadHeaderData(void);\r
     // retrieves BAM alignment under file pointer\r
-    bool LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
+    bool LoadNextAlignment(BamAlignment& bAlignment);\r
     // builds reference data structure from BAM file\r
     void LoadReferenceData(void);\r
 \r
@@ -139,7 +139,7 @@ bool BamReader::Rewind(void) { return d->Rewind(); }
 \r
 // access alignment data\r
 bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
-bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }\r
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment) { return d->GetNextAlignmentCore(bAlignment); }\r
 \r
 // access auxiliary data\r
 const string BamReader::GetHeaderText(void) const { return d->HeaderText; }\r
@@ -196,40 +196,40 @@ int BamReader::BamReaderPrivate::BinsFromRegion(int refID, int left, uint16_t li
     return i;\r
 }\r
 \r
-bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {\r
   \r
     // calculate character lengths/offsets\r
-    const unsigned int dataLength     = supportData.BlockLength - BAM_CORE_SIZE;\r
-    const unsigned int seqDataOffset  = supportData.QueryNameLength + (supportData.NumCigarOperations * 4);\r
-    const unsigned int qualDataOffset = seqDataOffset + (supportData.QuerySequenceLength+1)/2;\r
-    const unsigned int tagDataOffset  = qualDataOffset + supportData.QuerySequenceLength;\r
+    const unsigned int dataLength     = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;\r
+    const unsigned int seqDataOffset  = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);\r
+    const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;\r
+    const unsigned int tagDataOffset  = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;\r
     const unsigned int tagDataLength  = dataLength - tagDataOffset;\r
       \r
     // set up char buffers\r
-    const char* allCharData = supportData.AllCharData.data();\r
+    const char* allCharData = bAlignment.SupportData.AllCharData.data();\r
     const char* seqData     = ((const char*)allCharData) + seqDataOffset;\r
     const char* qualData    = ((const char*)allCharData) + qualDataOffset;\r
     char* tagData     = ((char*)allCharData) + tagDataOffset;\r
   \r
     // save query sequence\r
     bAlignment.QueryBases.clear();\r
-    bAlignment.QueryBases.reserve(supportData.QuerySequenceLength);\r
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {\r
+    bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {\r
         char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];\r
         bAlignment.QueryBases.append(1, singleBase);\r
     }\r
   \r
     // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character\r
     bAlignment.Qualities.clear();\r
-    bAlignment.Qualities.reserve(supportData.QuerySequenceLength);\r
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {\r
+    bAlignment.Qualities.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {\r
         char singleQuality = (char)(qualData[i]+33);\r
         bAlignment.Qualities.append(1, singleQuality);\r
     }\r
     \r
     // parse CIGAR to build 'AlignedBases'\r
     bAlignment.AlignedBases.clear();\r
-    bAlignment.AlignedBases.reserve(supportData.QuerySequenceLength);\r
+    bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
     \r
     int k = 0;\r
     vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
@@ -323,6 +323,9 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const
     bAlignment.TagData.resize(tagDataLength);\r
     memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);\r
     \r
+    // set support data parsed flag\r
+    bAlignment.SupportData.IsParsed = true;\r
+    \r
     // return success\r
     return true;\r
 }\r
@@ -504,25 +507,23 @@ int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
 // get next alignment (from specified region, if given)\r
 bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {\r
 \r
-    BamAlignmentSupportData supportData;\r
-  \r
     // if valid alignment available\r
-    if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+    if ( LoadNextAlignment(bAlignment) ) {\r
 \r
         // if region not specified, return success\r
         if ( !IsRegionSpecified ) { \r
-          bool ok = BuildCharData(bAlignment, supportData);\r
+          bool ok = BuildCharData(bAlignment);\r
           return ok; \r
         }\r
 \r
         // load next alignment until region overlap is found\r
         while ( !IsOverlap(bAlignment) ) {\r
             // if no valid alignment available (likely EOF) return failure\r
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+            if ( !LoadNextAlignment(bAlignment) ) return false;\r
         }\r
 \r
         // return success (alignment found that overlaps region)\r
-        bool ok = BuildCharData(bAlignment, supportData);\r
+        bool ok = BuildCharData(bAlignment);\r
         return ok;\r
     }\r
 \r
@@ -535,10 +536,10 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 // ** DOES NOT parse any character data (bases, qualities, tag data)\r
 //    these can be accessed, if necessary, from the supportData \r
 // useful for operations requiring ONLY positional or other alignment-related information\r
-bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {\r
 \r
     // if valid alignment available\r
-    if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+    if ( LoadNextAlignment(bAlignment) ) {\r
 \r
         // if region not specified, return success\r
         if ( !IsRegionSpecified ) return true;\r
@@ -546,7 +547,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment,
         // load next alignment until region overlap is found\r
         while ( !IsOverlap(bAlignment) ) {\r
             // if no valid alignment available (likely EOF) return failure\r
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+            if ( !LoadNextAlignment(bAlignment) ) return false;\r
         }\r
 \r
         // return success (alignment found that overlaps region)\r
@@ -847,14 +848,14 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
 }\r
 \r
 // populates BamAlignment with alignment data under file pointer, returns success/fail\r
-bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {\r
 \r
     // read in the 'block length' value, make sure it's not zero\r
     char buffer[4];\r
     mBGZF.Read(buffer, 4);\r
-    supportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);\r
-    if ( IsBigEndian ) { SwapEndian_32(supportData.BlockLength); }\r
-    if ( supportData.BlockLength == 0 ) { return false; }\r
+    bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);\r
+    if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); }\r
+    if ( bAlignment.SupportData.BlockLength == 0 ) { return false; }\r
 \r
     // read in core alignment data, make sure the right size of data was read\r
     char x[BAM_CORE_SIZE];\r
@@ -873,20 +874,20 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
     unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]);\r
     bAlignment.Bin        = tempValue >> 16;\r
     bAlignment.MapQuality = tempValue >> 8 & 0xff;\r
-    supportData.QueryNameLength = tempValue & 0xff;\r
+    bAlignment.SupportData.QueryNameLength = tempValue & 0xff;\r
 \r
     tempValue = BgzfData::UnpackUnsignedInt(&x[12]);\r
     bAlignment.AlignmentFlag = tempValue >> 16;\r
-    supportData.NumCigarOperations = tempValue & 0xffff;\r
+    bAlignment.SupportData.NumCigarOperations = tempValue & 0xffff;\r
 \r
-    supportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);\r
+    bAlignment.SupportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);\r
     bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);\r
     bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);\r
     bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);\r
     \r
     // store 'all char data' and cigar ops\r
-    const unsigned int dataLength      = supportData.BlockLength - BAM_CORE_SIZE;\r
-    const unsigned int cigarDataOffset = supportData.QueryNameLength;\r
+    const unsigned int dataLength      = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;\r
+    const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength;\r
     \r
     char*     allCharData = (char*)calloc(sizeof(char), dataLength);\r
     uint32_t* cigarData   = (uint32_t*)(allCharData + cigarDataOffset);\r
@@ -897,14 +898,14 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
      \r
         // store alignment name and length\r
         bAlignment.Name.assign((const char*)(allCharData));\r
-        bAlignment.Length = supportData.QuerySequenceLength;\r
+        bAlignment.Length = bAlignment.SupportData.QuerySequenceLength;\r
       \r
         // store remaining 'allCharData' in supportData structure\r
-        supportData.AllCharData.assign((const char*)allCharData, dataLength);\r
+        bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);\r
         \r
         // save CigarOps for BamAlignment\r
         bAlignment.CigarData.clear();\r
-        for (unsigned int i = 0; i < supportData.NumCigarOperations; ++i) {\r
+        for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {\r
 \r
             // swap if necessary\r
             if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }\r
index 88cc74a25d3cfd04ab90e7957dbfe808b313121e..0b13786b1b112a48cccae944ac73f14c4f7e6e53 100644 (file)
@@ -51,11 +51,12 @@ class BamReader {
 \r
         // retrieves next available alignment (returns success/fail)\r
         bool GetNextAlignment(BamAlignment& bAlignment);\r
+        \r
         // retrieves next available alignment core data (returns success/fail)\r
         // ** DOES NOT parse any character data (bases, qualities, tag data)\r
         //    these can be accessed, if necessary, from the supportData \r
         // useful for operations requiring ONLY positional or other alignment-related information\r
-        bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
+        bool GetNextAlignmentCore(BamAlignment& bAlignment);\r
 \r
         // ----------------------\r
         // access auxiliary data\r
index 9d18fae6ba616b1d243913e7e07c4a55c70a88b4..660be5d740240ab648163f1781b29e80e5727e2e 100644 (file)
@@ -37,7 +37,6 @@ struct BamWriter::BamWriterPrivate {
     void Close(void);\r
     void Open(const string& filename, const string& samHeader, const RefVector& referenceSequences);\r
     void SaveAlignment(const BamAlignment& al);\r
-    void SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData);\r
 \r
     // internal methods\r
     void CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar);\r
@@ -74,10 +73,6 @@ void BamWriter::SaveAlignment(const BamAlignment& al) {
     d->SaveAlignment(al);\r
 }\r
 \r
-void BamWriter::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
-    d->SaveAlignment(al, supportData);\r
-}\r
-\r
 // -----------------------------------------------------\r
 // BamWriterPrivate implementation\r
 // -----------------------------------------------------\r
@@ -249,170 +244,139 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam
 // saves the alignment to the alignment archive\r
 void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {\r
 \r
-    // initialize\r
-    const unsigned int nameLen            = al.Name.size() + 1;\r
-    const unsigned int queryLen           = al.QueryBases.size();\r
-    const unsigned int numCigarOperations = al.CigarData.size();\r
-\r
-    // create our packed cigar string\r
-    string packedCigar;\r
-    CreatePackedCigar(al.CigarData, packedCigar);\r
-    const unsigned int packedCigarLen = packedCigar.size();\r
-\r
-    // encode the query\r
-    string encodedQuery;\r
-    EncodeQuerySequence(al.QueryBases, encodedQuery);\r
-    const unsigned int encodedQueryLen = encodedQuery.size();\r
-\r
-    // store the tag data length\r
-    // -------------------------------------------\r
-    // Modified: 3-25-2010 DWB\r
-    // Contributed: ARQ\r
-    // Fixed: "off by one" error when parsing tags in files produced by BamWriter\r
-    const unsigned int tagDataLength = al.TagData.size();\r
-    // original line: \r
-    // const unsigned int tagDataLength = al.TagData.size() + 1;     \r
-    // -------------------------------------------\r
-    \r
-    // assign the BAM core data\r
+     // assign the BAM core data\r
     uint32_t buffer[8];\r
     buffer[0] = al.RefID;\r
     buffer[1] = al.Position;\r
-    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | nameLen;\r
-    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;\r
-    buffer[4] = queryLen;\r
+    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;\r
+    buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;\r
+    buffer[4] = al.SupportData.QuerySequenceLength;\r
     buffer[5] = al.MateRefID;\r
     buffer[6] = al.MatePosition;\r
     buffer[7] = al.InsertSize;\r
 \r
     // write the block size\r
-    unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;\r
-    unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;\r
+    unsigned int blockSize = al.SupportData.BlockLength;\r
     if ( IsBigEndian ) { SwapEndian_32(blockSize); }\r
     mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);\r
 \r
-    // write the BAM core\r
+    // swap BAM core endian-ness, if necessary\r
     if ( IsBigEndian ) { \r
         for ( int i = 0; i < 8; ++i ) { \r
             SwapEndian_32(buffer[i]); \r
         } \r
     }\r
+    \r
+    // write the BAM core\r
     mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);\r
 \r
-    // write the query name\r
-    mBGZF.Write(al.Name.c_str(), nameLen);\r
-\r
-    // write the packed cigar\r
-    if ( IsBigEndian ) {\r
+    // if support data, not parsed out (resulted from BamReader::GetNextAlignmentCore()\r
+    // write the raw char data\r
+    if ( !al.SupportData.IsParsed )\r
+        mBGZF.Write((char*)al.SupportData.AllCharData.data(), al.SupportData.BlockLength-BAM_CORE_SIZE);  \r
+    \r
+    // re-pack (as needed) & write the parsed char data\r
+    else {\r
       \r
-        char* cigarData = (char*)calloc(sizeof(char), packedCigarLen);\r
-        memcpy(cigarData, packedCigar.data(), packedCigarLen);\r
-        \r
-        for (unsigned int i = 0; i < packedCigarLen; ++i) {\r
-            if ( IsBigEndian ) { \r
-              SwapEndian_32p(&cigarData[i]); \r
-            }\r
-        }\r
+        // initialize\r
+        const unsigned int nameLen            = al.Name.size() + 1;\r
+        const unsigned int queryLen           = al.QueryBases.size();\r
+        const unsigned int tagDataLength      = al.TagData.size();\r
         \r
-        mBGZF.Write(cigarData, packedCigarLen);\r
-        free(cigarData);\r
-        \r
-    } else { \r
-        mBGZF.Write(packedCigar.data(), packedCigarLen);\r
-    }\r
-\r
-    // write the encoded query sequence\r
-    mBGZF.Write(encodedQuery.data(), encodedQueryLen);\r
-\r
-    // write the base qualities\r
-    string baseQualities = al.Qualities;\r
-    char* pBaseQualities = (char*)al.Qualities.data();\r
-    for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }\r
-    mBGZF.Write(pBaseQualities, queryLen);\r
-\r
-    // write the read group tag\r
-    if ( IsBigEndian ) {\r
-      \r
-        char* tagData = (char*)calloc(sizeof(char), tagDataLength);\r
-        memcpy(tagData, al.TagData.data(), tagDataLength);\r
+        // create our packed cigar string\r
+        string packedCigar;\r
+        CreatePackedCigar(al.CigarData, packedCigar);\r
+        const unsigned int packedCigarLen = packedCigar.size();\r
+\r
+        // encode the query\r
+        string encodedQuery;\r
+        EncodeQuerySequence(al.QueryBases, encodedQuery);\r
+        const unsigned int encodedQueryLen = encodedQuery.size(); \r
       \r
-        int i = 0;\r
-        while ( (unsigned int)i < tagDataLength ) {\r
+        // write the query name\r
+        mBGZF.Write(al.Name.c_str(), nameLen);\r
+\r
+        // write the packed cigar\r
+        if ( IsBigEndian ) {\r
+          \r
+            char* cigarData = (char*)calloc(sizeof(char), packedCigarLen);\r
+            memcpy(cigarData, packedCigar.data(), packedCigarLen);\r
             \r
-            i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)\r
-            uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning \r
-            ++i;                                    // skip value type\r
-            \r
-            switch (type) {\r
-              \r
-                case('A') :\r
-                case('C') : \r
-                    ++i;\r
-                    break;\r
-                            \r
-                case('S') : \r
-                    SwapEndian_16p(&tagData[i]); \r
-                    i+=2; // sizeof(uint16_t)\r
-                    break;\r
-                            \r
-                case('F') :\r
-                case('I') : \r
-                    SwapEndian_32p(&tagData[i]);\r
-                    i+=4; // sizeof(uint32_t)\r
-                    break;\r
-                            \r
-                case('D') : \r
-                    SwapEndian_64p(&tagData[i]);\r
-                    i+=8; // sizeof(uint64_t)\r
-                    break;\r
-                            \r
-                case('H') :\r
-                case('Z') : \r
-                    while (tagData[i]) { ++i; }\r
-                    ++i; // increment one more for null terminator\r
-                    break;\r
-                            \r
-                default : \r
-                    printf("ERROR: Invalid tag value type\n"); // shouldn't get here\r
-                    free(tagData);\r
-                    exit(1); \r
+            for (unsigned int i = 0; i < packedCigarLen; ++i) {\r
+                if ( IsBigEndian ) { \r
+                  SwapEndian_32p(&cigarData[i]); \r
+                }\r
             }\r
+            \r
+            mBGZF.Write(cigarData, packedCigarLen);\r
+            free(cigarData);\r
+            \r
+        } else { \r
+            mBGZF.Write(packedCigar.data(), packedCigarLen);\r
         }\r
-        \r
-        mBGZF.Write(tagData, tagDataLength);\r
-        free(tagData);\r
-    } else {\r
-        mBGZF.Write(al.TagData.data(), tagDataLength);\r
-    }\r
-}\r
 \r
-void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al, const BamAlignmentSupportData& supportData) {\r
-  \r
-    // assign the BAM core data\r
-    uint32_t buffer[8];\r
-    buffer[0] = al.RefID;\r
-    buffer[1] = al.Position;\r
-    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | supportData.QueryNameLength;\r
-    buffer[3] = (al.AlignmentFlag << 16) | supportData.NumCigarOperations;\r
-    buffer[4] = supportData.QuerySequenceLength;\r
-    buffer[5] = al.MateRefID;\r
-    buffer[6] = al.MatePosition;\r
-    buffer[7] = al.InsertSize;\r
-\r
-    // write the block size\r
-    unsigned int blockSize = supportData.BlockLength;\r
-    if ( IsBigEndian ) { SwapEndian_32(blockSize); }\r
-    mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);\r
-\r
-    // write the BAM core\r
-    if ( IsBigEndian ) { \r
-        for ( int i = 0; i < 8; ++i ) { \r
-            SwapEndian_32(buffer[i]); \r
-        } \r
+        // write the encoded query sequence\r
+        mBGZF.Write(encodedQuery.data(), encodedQueryLen);\r
+\r
+        // write the base qualities\r
+        string baseQualities = al.Qualities;\r
+        char* pBaseQualities = (char*)al.Qualities.data();\r
+        for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }\r
+        mBGZF.Write(pBaseQualities, queryLen);\r
+\r
+        // write the read group tag\r
+        if ( IsBigEndian ) {\r
+          \r
+            char* tagData = (char*)calloc(sizeof(char), tagDataLength);\r
+            memcpy(tagData, al.TagData.data(), tagDataLength);\r
+          \r
+            int i = 0;\r
+            while ( (unsigned int)i < tagDataLength ) {\r
+                \r
+                i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)\r
+                uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning \r
+                ++i;                                    // skip value type\r
+                \r
+                switch (type) {\r
+                  \r
+                    case('A') :\r
+                    case('C') : \r
+                        ++i;\r
+                        break;\r
+                                \r
+                    case('S') : \r
+                        SwapEndian_16p(&tagData[i]); \r
+                        i+=2; // sizeof(uint16_t)\r
+                        break;\r
+                                \r
+                    case('F') :\r
+                    case('I') : \r
+                        SwapEndian_32p(&tagData[i]);\r
+                        i+=4; // sizeof(uint32_t)\r
+                        break;\r
+                                \r
+                    case('D') : \r
+                        SwapEndian_64p(&tagData[i]);\r
+                        i+=8; // sizeof(uint64_t)\r
+                        break;\r
+                                \r
+                    case('H') :\r
+                    case('Z') : \r
+                        while (tagData[i]) { ++i; }\r
+                        ++i; // increment one more for null terminator\r
+                        break;\r
+                                \r
+                    default : \r
+                        printf("ERROR: Invalid tag value type\n"); // shouldn't get here\r
+                        free(tagData);\r
+                        exit(1); \r
+                }\r
+            }\r
+            \r
+            mBGZF.Write(tagData, tagDataLength);\r
+            free(tagData);\r
+        } else {\r
+            mBGZF.Write(al.TagData.data(), tagDataLength);\r
+        }      \r
     }\r
-    mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);\r
-\r
-    // write the raw char data\r
-    mBGZF.Write((char*)supportData.AllCharData.data(), supportData.BlockLength-BAM_CORE_SIZE);\r
 }\r
-\r
index 31c7d61b8a3fe259ed353d54014cc3dc068a9e1e..2608ddbe793f972ab0ac1b4da78f5fdc7812b0de 100644 (file)
@@ -37,8 +37,6 @@ class BamWriter {
         void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);\r
         // saves the alignment to the alignment archive\r
         void SaveAlignment(const BamTools::BamAlignment& al);\r
-        // saves the (partial) alignment, using support data, to the alignment archive\r
-        void SaveAlignment(const BamTools::BamAlignment& al, const BamTools::BamAlignmentSupportData& supportData);\r
 \r
     // private implementation\r
     private:\r
index cae61344eb9c8d30b2aa72c620788bf1a06f9270..bfb5de7fef462ee5d3dc3c84466f70fe49814051 100644 (file)
@@ -88,8 +88,11 @@ int MergeTool::Run(int argc, char* argv[]) {
     if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn());
     
     // opens the BAM files without checking for indexes
-    BamMultiReader reader;
-    reader.Open(m_settings->InputFiles, false); 
+//     BamMultiReader reader;
+//     reader.Open(m_settings->InputFiles, false); 
+
+    BamReader reader;
+    reader.Open(m_settings->InputFiles.at(0));
 
     // retrieve header & reference dictionary info
     std::string mergedHeader = reader.GetHeaderText();
@@ -100,6 +103,11 @@ int MergeTool::Run(int argc, char* argv[]) {
     writer.Open(m_settings->OutputFilename, mergedHeader, references);
 
     // store alignments to output file
+//     BamAlignment bAlignment;
+//     while (reader.GetNextAlignment(bAlignment)) {
+//         writer.SaveAlignment(bAlignment);
+//     }
+    
     BamAlignment bAlignment;
     while (reader.GetNextAlignment(bAlignment)) {
         writer.SaveAlignment(bAlignment);