]> git.donarmstrong.com Git - bamtools.git/commitdiff
Fixed: proper handling of BamAlignment::AlignedBases when QueryBases is empty. Thank...
authorDerek <derekwbarnett@gmail.com>
Fri, 9 Jul 2010 16:20:39 +0000 (12:20 -0400)
committerDerek <derekwbarnett@gmail.com>
Fri, 9 Jul 2010 16:20:39 +0000 (12:20 -0400)
BamReader.cpp
BamReader.h

index c618b41c2628225237e92e6fd6bda6794ce8362b..578cccfc23f91ccb18fe698ad280d6c593da9490 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 30 June 2010 (DB)\r
+// Last modified: 9 July 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
 // Institute.\r
@@ -246,51 +246,59 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {
         bAlignment.Qualities.append(1, singleQuality);\r
     }\r
     \r
-    // parse CIGAR to build 'AlignedBases'\r
-    bAlignment.AlignedBases.clear();\r
-    bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+    // if QueryBases is empty (and this is a allowed case)\r
+    if ( bAlignment.QueryBases.empty() ) \r
+        bAlignment.AlignedBases = bAlignment.QueryBases;\r
     \r
-    int k = 0;\r
-    vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
-    vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();\r
-    for ( ; cigarIter != cigarEnd; ++cigarIter ) {\r
-        \r
-        const CigarOp& op = (*cigarIter);\r
-        switch(op.Type) {\r
-          \r
-            case ('M') :\r
-            case ('I') :\r
-                bAlignment.AlignedBases.append(bAlignment.QueryBases.substr(k, op.Length)); // for 'M', 'I' - write bases\r
-                // fall through\r
+    // if QueryBases contains data, then build AlignedBases using CIGAR data\r
+    else {\r
+    \r
+        // resize AlignedBases\r
+        bAlignment.AlignedBases.clear();\r
+        bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+      \r
+        // iterate over CigarOps\r
+        int k = 0;\r
+        vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
+        vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();\r
+        for ( ; cigarIter != cigarEnd; ++cigarIter ) {\r
             \r
-            case ('S') :\r
-                k += op.Length;                                     // for 'S' - soft clip, skip over query bases\r
-                break;\r
+            const CigarOp& op = (*cigarIter);\r
+            switch(op.Type) {\r
+              \r
+                case ('M') :\r
+                case ('I') :\r
+                    bAlignment.AlignedBases.append(bAlignment.QueryBases.substr(k, op.Length)); // for 'M', 'I' - write bases\r
+                    // fall through\r
                 \r
-            case ('D') :\r
-                bAlignment.AlignedBases.append(op.Length, '-');     // for 'D' - write gap character\r
-                break;\r
-                \r
-            case ('P') :\r
-                bAlignment.AlignedBases.append( op.Length, '*' );   // for 'P' - write padding character\r
-                break;\r
-                \r
-            case ('N') :\r
-                bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in original query sequence\r
-                // k+=op.Length; \r
-                break;\r
-                \r
-            case ('H') :\r
-                break;  // for 'H' - hard clip, do nothing to AlignedBases, move to next op\r
-                \r
-            default:\r
-                printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here\r
-                exit(1);\r
+                case ('S') :\r
+                    k += op.Length;                                     // for 'S' - soft clip, skip over query bases\r
+                    break;\r
+                    \r
+                case ('D') :\r
+                    bAlignment.AlignedBases.append(op.Length, '-');     // for 'D' - write gap character\r
+                    break;\r
+                    \r
+                case ('P') :\r
+                    bAlignment.AlignedBases.append( op.Length, '*' );   // for 'P' - write padding character\r
+                    break;\r
+                    \r
+                case ('N') :\r
+                    bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in original query sequence\r
+                    break;\r
+                    \r
+                case ('H') :\r
+                    break;  // for 'H' - hard clip, do nothing to AlignedBases, move to next op\r
+                    \r
+                default:\r
+                    printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here\r
+                    exit(1);\r
+            }\r
         }\r
     }\r
  \r
     // -----------------------\r
-    // Added: 3-25-2010 DWB\r
+    // Added: 3-25-2010 DB\r
     // Fixed: endian-correctness for tag data\r
     // -----------------------\r
     if ( IsBigEndian ) {\r
@@ -310,18 +318,18 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {
 \r
                 case('S') : \r
                     SwapEndian_16p(&tagData[i]); \r
-                    i+=2; // sizeof(uint16_t)\r
+                    i += sizeof(uint16_t);\r
                     break;\r
                     \r
                 case('F') :\r
                 case('I') : \r
                     SwapEndian_32p(&tagData[i]);\r
-                    i+=4; // sizeof(uint32_t)\r
+                    i += sizeof(uint32_t);\r
                     break;\r
                 \r
                 case('D') : \r
                     SwapEndian_64p(&tagData[i]);\r
-                    i+=8; // sizeof(uint64_t) \r
+                    i += sizeof(uint64_t);\r
                     break;\r
                 \r
                 case('H') :\r
@@ -407,7 +415,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 }\r
 \r
 // retrieves next available alignment core data (returns success/fail)\r
-// ** DOES NOT parse any character data (bases, qualities, tag data)\r
+// ** DOES NOT parse any character data (read name, bases, qualities, tag data)\r
 //    these can be accessed, if necessary, from the supportData \r
 // useful for operations requiring ONLY positional or other alignment-related information\r
 bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {\r
index 33ee9b4d342b466017f47745361e4cd9630b64a5..c93987b1a41bfbe41b7f0a10bd9d826a3514e681 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College\r
 // All rights reserved.\r
 // ---------------------------------------------------------------------------\r
-// Last modified: 22 June 2010 (DB)\r
+// Last modified: 9 July 2010 (DB)\r
 // ---------------------------------------------------------------------------\r
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
 // Institute.\r
@@ -60,7 +60,7 @@ class BamReader {
         bool GetNextAlignment(BamAlignment& bAlignment);\r
         \r
         // retrieves next available alignment core data (returns success/fail)\r
-        // ** DOES NOT parse any character data (bases, qualities, tag data)\r
+        // ** DOES NOT parse any character data (read name, bases, qualities, tag data)\r
         //    these can be accessed, if necessary, from the supportData \r
         // useful for operations requiring ONLY positional or other alignment-related information\r
         bool GetNextAlignmentCore(BamAlignment& bAlignment);\r