]> git.donarmstrong.com Git - bamtools.git/commitdiff
change merger to use GetNextAlignmentCore
authorErik Garrison <erik.garrison@bc.edu>
Thu, 10 Jun 2010 17:33:28 +0000 (13:33 -0400)
committerErik Garrison <erik.garrison@bc.edu>
Thu, 10 Jun 2010 17:33:28 +0000 (13:33 -0400)
This provides a modest performance boost to the merger.  A small change
to the BamAlignment copy constructor was required (to copy
BamAlignmentSupportData).

BamAux.h
BamMultiReader.cpp
BamMultiReader.h
bamtools_merge.cpp

index 5ef34fe626af240769ca2f43dcf657662a67c675..6777dbc07809c64b47ff0f9af9f47806a71fffda 100644 (file)
--- a/BamAux.h
+++ b/BamAux.h
@@ -272,6 +272,7 @@ BamAlignment::BamAlignment(const BamAlignment& other)
     , MateRefID(other.MateRefID)\r
     , MatePosition(other.MatePosition)\r
     , InsertSize(other.InsertSize)\r
+    , SupportData(other.SupportData)\r
 { }\r
 \r
 inline \r
index 51372c3feef1d3bf8a73d4c93ade22b227411a3e..f0a8ceda8b2bc1bf8b43803e794be9e229357415 100644 (file)
@@ -80,7 +80,7 @@ bool BamMultiReader::HasOpenReaders() {
     return alignments.size() > 0;
 }
 
-// get next alignment among all files (from specified region, if given)
+// get next alignment among all files
 bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
 
     // bail out if we are at EOF in all files, means no more alignments to process
@@ -92,24 +92,59 @@ bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
     UpdateReferenceID();
 
     // our lowest alignment and reader will be at the front of our alignment index
-    BamAlignment* lowestAlignment = alignments.begin()->second.second;
-    BamReader* lowestReader = alignments.begin()->second.first;
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
 
     // now that we have the lowest alignment in the set, save it by copy to our argument
-    nextAlignment = BamAlignment(*lowestAlignment);
+    nextAlignment = BamAlignment(*alignment);
 
     // remove this alignment index entry from our alignment index
     alignments.erase(alignments.begin());
 
     // and add another entry if we can get another alignment from the reader
-    if (lowestReader->GetNextAlignment(*lowestAlignment)) {
-        alignments.insert(make_pair(make_pair(lowestAlignment->RefID, lowestAlignment->Position), 
-                                    make_pair(lowestReader, lowestAlignment)));
+    if (reader->GetNextAlignment(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
+                                    make_pair(reader, alignment)));
     } else { // do nothing
         //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
     }
 
     return true;
+
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
+
+    // bail out if we are at EOF in all files, means no more alignments to process
+    if (!HasOpenReaders())
+        return false;
+
+    // when all alignments have stepped into a new target sequence, update our
+    // current reference sequence id
+    UpdateReferenceID();
+
+    // our lowest alignment and reader will be at the front of our alignment index
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
+
+    // now that we have the lowest alignment in the set, save it by copy to our argument
+    nextAlignment = BamAlignment(*alignment);
+    //memcpy(&nextAlignment, alignment, sizeof(BamAlignment));
+
+    // remove this alignment index entry from our alignment index
+    alignments.erase(alignments.begin());
+
+    // and add another entry if we can get another alignment from the reader
+    if (reader->GetNextAlignmentCore(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position), 
+                                    make_pair(reader, alignment)));
+    } else { // do nothing
+        //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
+    }
+
+    return true;
+
 }
 
 // jumps to specified region(refID, leftBound) in BAM files, returns success/fail
@@ -146,7 +181,7 @@ bool BamMultiReader::Jump(int refID, int position) {
 }
 
 // opens BAM files
-void BamMultiReader::Open(const vector<string> filenames, bool openIndexes) {
+void BamMultiReader::Open(const vector<string> filenames, bool openIndexes, bool coreMode) {
     // for filename in filenames
     fileNames = filenames; // save filenames in our multireader
     for (vector<string>::const_iterator it = filenames.begin(); it != filenames.end(); ++it) {
@@ -158,7 +193,11 @@ void BamMultiReader::Open(const vector<string> filenames, bool openIndexes) {
             reader->Open(filename); // for merging, jumping is disallowed
         }
         BamAlignment* alignment = new BamAlignment;
-        reader->GetNextAlignment(*alignment);
+        if (coreMode) {
+            reader->GetNextAlignmentCore(*alignment);
+        } else {
+            reader->GetNextAlignment(*alignment);
+        }
         readers.push_back(make_pair(reader, alignment)); // store pointers to our readers for cleanup
         alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
                                     make_pair(reader, alignment)));
index 6d5a805f2739c9306ac6c9d2f23bc37b63ab388e..3d9024c40ce9927e13e3588acb82d4d09b7711a7 100644 (file)
@@ -49,11 +49,18 @@ class BamMultiReader {
 \r
         // close BAM files\r
         void Close(void);\r
+\r
+        // opens BAM files (and optional BAM index files, if provided)\r
+        // @openIndexes - triggers index opening, useful for suppressing\r
+        // error messages during merging of files in which we may not have\r
+        // indexes.\r
+        // @coreMode - setup our first alignments using GetNextAlignmentCore();\r
+        // also useful for merging\r
+        void Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false);\r
+\r
         // performs random-access jump to reference, position\r
         bool Jump(int refID, int position = 0);\r
-        // opens BAM files (and optional BAM index files, if provided)\r
-        //void Open(const vector<std::string&> filenames, const vector<std::string&> indexFilenames);\r
-        void Open(const vector<string> filenames, bool openIndexes = true);\r
+\r
         // returns file pointers to beginning of alignments\r
         bool Rewind(void);\r
 \r
@@ -65,6 +72,10 @@ class BamMultiReader {
 \r
         // retrieves next available alignment (returns success/fail) from all files\r
         bool GetNextAlignment(BamAlignment&);\r
+        // retrieves next available alignment (returns success/fail) from all files\r
+        // and populates the support data with information about the alignment\r
+        // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT\r
+        bool GetNextAlignmentCore(BamAlignment&);\r
         // ... should this be private?\r
         bool HasOpenReaders(void);\r
 \r
index bfb5de7fef462ee5d3dc3c84466f70fe49814051..dcea1725b229a4c95200318ebbe0103cc55ec809 100644 (file)
@@ -88,11 +88,8 @@ int MergeTool::Run(int argc, char* argv[]) {
     if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn());
     
     // opens the BAM files without checking for indexes
-//     BamMultiReader reader;
-//     reader.Open(m_settings->InputFiles, false); 
-
-    BamReader reader;
-    reader.Open(m_settings->InputFiles.at(0));
+    BamMultiReader reader;
+    reader.Open(m_settings->InputFiles, false, true); 
 
     // retrieve header & reference dictionary info
     std::string mergedHeader = reader.GetHeaderText();
@@ -103,16 +100,11 @@ int MergeTool::Run(int argc, char* argv[]) {
     writer.Open(m_settings->OutputFilename, mergedHeader, references);
 
     // store alignments to output file
-//     BamAlignment bAlignment;
-//     while (reader.GetNextAlignment(bAlignment)) {
-//         writer.SaveAlignment(bAlignment);
-//     }
-    
     BamAlignment bAlignment;
-    while (reader.GetNextAlignment(bAlignment)) {
+    while (reader.GetNextAlignmentCore(bAlignment)) {
         writer.SaveAlignment(bAlignment);
     }
-
+    
     // clean & exit
     reader.Close();
     writer.Close();