]> git.donarmstrong.com Git - bamtools.git/blobdiff - BamReader.cpp
change merger to use GetNextAlignmentCore
[bamtools.git] / BamReader.cpp
index 53c32e9e1db6bf144a6c8ba6edec0a7ab2cc9d40..e5265e0ca0446318ee9e3955a4db2defbe3aefa4 100644 (file)
@@ -69,7 +69,7 @@ struct BamReader::BamReaderPrivate {
 \r
     // access alignment data\r
     bool GetNextAlignment(BamAlignment& bAlignment);\r
-    bool GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
+    bool GetNextAlignmentCore(BamAlignment& bAlignment);\r
 \r
     // access auxiliary data\r
     int GetReferenceID(const string& refName) const;\r
@@ -86,7 +86,7 @@ struct BamReader::BamReaderPrivate {
     // calculate bins that overlap region ( left to reference end for now )\r
     int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);\r
     // fills out character data for BamAlignment data\r
-    bool BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData);\r
+    bool BuildCharData(BamAlignment& bAlignment);\r
     // calculate file offset for first alignment chunk overlapping 'left'\r
     int64_t GetOffset(int refID, int left);\r
     // checks to see if alignment overlaps current region\r
@@ -94,7 +94,7 @@ struct BamReader::BamReaderPrivate {
     // retrieves header text from BAM file\r
     void LoadHeaderData(void);\r
     // retrieves BAM alignment under file pointer\r
-    bool LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);\r
+    bool LoadNextAlignment(BamAlignment& bAlignment);\r
     // builds reference data structure from BAM file\r
     void LoadReferenceData(void);\r
 \r
@@ -139,7 +139,7 @@ bool BamReader::Rewind(void) { return d->Rewind(); }
 \r
 // access alignment data\r
 bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }\r
-bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) { return d->GetNextAlignmentCore(bAlignment, supportData); }\r
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment) { return d->GetNextAlignmentCore(bAlignment); }\r
 \r
 // access auxiliary data\r
 const string BamReader::GetHeaderText(void) const { return d->HeaderText; }\r
@@ -196,40 +196,40 @@ int BamReader::BamReaderPrivate::BinsFromRegion(int refID, int left, uint16_t li
     return i;\r
 }\r
 \r
-bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {\r
   \r
     // calculate character lengths/offsets\r
-    const unsigned int dataLength     = supportData.BlockLength - BAM_CORE_SIZE;\r
-    const unsigned int seqDataOffset  = supportData.QueryNameLength + (supportData.NumCigarOperations * 4);\r
-    const unsigned int qualDataOffset = seqDataOffset + (supportData.QuerySequenceLength+1)/2;\r
-    const unsigned int tagDataOffset  = qualDataOffset + supportData.QuerySequenceLength;\r
+    const unsigned int dataLength     = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;\r
+    const unsigned int seqDataOffset  = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);\r
+    const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;\r
+    const unsigned int tagDataOffset  = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;\r
     const unsigned int tagDataLength  = dataLength - tagDataOffset;\r
       \r
     // set up char buffers\r
-    const char* allCharData = supportData.AllCharData.data();\r
+    const char* allCharData = bAlignment.SupportData.AllCharData.data();\r
     const char* seqData     = ((const char*)allCharData) + seqDataOffset;\r
     const char* qualData    = ((const char*)allCharData) + qualDataOffset;\r
     char* tagData     = ((char*)allCharData) + tagDataOffset;\r
   \r
     // save query sequence\r
     bAlignment.QueryBases.clear();\r
-    bAlignment.QueryBases.reserve(supportData.QuerySequenceLength);\r
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {\r
+    bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {\r
         char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];\r
         bAlignment.QueryBases.append(1, singleBase);\r
     }\r
   \r
     // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character\r
     bAlignment.Qualities.clear();\r
-    bAlignment.Qualities.reserve(supportData.QuerySequenceLength);\r
-    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {\r
+    bAlignment.Qualities.reserve(bAlignment.SupportData.QuerySequenceLength);\r
+    for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {\r
         char singleQuality = (char)(qualData[i]+33);\r
         bAlignment.Qualities.append(1, singleQuality);\r
     }\r
     \r
     // parse CIGAR to build 'AlignedBases'\r
     bAlignment.AlignedBases.clear();\r
-    bAlignment.AlignedBases.reserve(supportData.QuerySequenceLength);\r
+    bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
     \r
     int k = 0;\r
     vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
@@ -323,6 +323,9 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const
     bAlignment.TagData.resize(tagDataLength);\r
     memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);\r
     \r
+    // set support data parsed flag\r
+    bAlignment.SupportData.IsParsed = true;\r
+    \r
     // return success\r
     return true;\r
 }\r
@@ -504,25 +507,23 @@ int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
 // get next alignment (from specified region, if given)\r
 bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {\r
 \r
-    BamAlignmentSupportData supportData;\r
-  \r
     // if valid alignment available\r
-    if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+    if ( LoadNextAlignment(bAlignment) ) {\r
 \r
         // if region not specified, return success\r
         if ( !IsRegionSpecified ) { \r
-          bool ok = BuildCharData(bAlignment, supportData);\r
+          bool ok = BuildCharData(bAlignment);\r
           return ok; \r
         }\r
 \r
         // load next alignment until region overlap is found\r
         while ( !IsOverlap(bAlignment) ) {\r
             // if no valid alignment available (likely EOF) return failure\r
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+            if ( !LoadNextAlignment(bAlignment) ) return false;\r
         }\r
 \r
         // return success (alignment found that overlaps region)\r
-        bool ok = BuildCharData(bAlignment, supportData);\r
+        bool ok = BuildCharData(bAlignment);\r
         return ok;\r
     }\r
 \r
@@ -535,10 +536,10 @@ bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 // ** DOES NOT parse any character data (bases, qualities, tag data)\r
 //    these can be accessed, if necessary, from the supportData \r
 // useful for operations requiring ONLY positional or other alignment-related information\r
-bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {\r
 \r
     // if valid alignment available\r
-    if ( LoadNextAlignment(bAlignment, supportData) ) {\r
+    if ( LoadNextAlignment(bAlignment) ) {\r
 \r
         // if region not specified, return success\r
         if ( !IsRegionSpecified ) return true;\r
@@ -546,7 +547,7 @@ bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment,
         // load next alignment until region overlap is found\r
         while ( !IsOverlap(bAlignment) ) {\r
             // if no valid alignment available (likely EOF) return failure\r
-            if ( !LoadNextAlignment(bAlignment, supportData) ) return false;\r
+            if ( !LoadNextAlignment(bAlignment) ) return false;\r
         }\r
 \r
         // return success (alignment found that overlaps region)\r
@@ -847,14 +848,14 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
 }\r
 \r
 // populates BamAlignment with alignment data under file pointer, returns success/fail\r
-bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {\r
+bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {\r
 \r
     // read in the 'block length' value, make sure it's not zero\r
     char buffer[4];\r
     mBGZF.Read(buffer, 4);\r
-    supportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);\r
-    if ( IsBigEndian ) { SwapEndian_32(supportData.BlockLength); }\r
-    if ( supportData.BlockLength == 0 ) { return false; }\r
+    bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);\r
+    if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); }\r
+    if ( bAlignment.SupportData.BlockLength == 0 ) { return false; }\r
 \r
     // read in core alignment data, make sure the right size of data was read\r
     char x[BAM_CORE_SIZE];\r
@@ -873,20 +874,20 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
     unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]);\r
     bAlignment.Bin        = tempValue >> 16;\r
     bAlignment.MapQuality = tempValue >> 8 & 0xff;\r
-    supportData.QueryNameLength = tempValue & 0xff;\r
+    bAlignment.SupportData.QueryNameLength = tempValue & 0xff;\r
 \r
     tempValue = BgzfData::UnpackUnsignedInt(&x[12]);\r
     bAlignment.AlignmentFlag = tempValue >> 16;\r
-    supportData.NumCigarOperations = tempValue & 0xffff;\r
+    bAlignment.SupportData.NumCigarOperations = tempValue & 0xffff;\r
 \r
-    supportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);\r
+    bAlignment.SupportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);\r
     bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);\r
     bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);\r
     bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);\r
     \r
     // store 'all char data' and cigar ops\r
-    const unsigned int dataLength      = supportData.BlockLength - BAM_CORE_SIZE;\r
-    const unsigned int cigarDataOffset = supportData.QueryNameLength;\r
+    const unsigned int dataLength      = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;\r
+    const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength;\r
     \r
     char*     allCharData = (char*)calloc(sizeof(char), dataLength);\r
     uint32_t* cigarData   = (uint32_t*)(allCharData + cigarDataOffset);\r
@@ -897,14 +898,14 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, Ba
      \r
         // store alignment name and length\r
         bAlignment.Name.assign((const char*)(allCharData));\r
-        bAlignment.Length = supportData.QuerySequenceLength;\r
+        bAlignment.Length = bAlignment.SupportData.QuerySequenceLength;\r
       \r
         // store remaining 'allCharData' in supportData structure\r
-        supportData.AllCharData.assign((const char*)allCharData, dataLength);\r
+        bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);\r
         \r
         // save CigarOps for BamAlignment\r
         bAlignment.CigarData.clear();\r
-        for (unsigned int i = 0; i < supportData.NumCigarOperations; ++i) {\r
+        for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {\r
 \r
             // swap if necessary\r
             if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }\r