X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=BamMultiReader.h;h=bd36d7160e0d18aee8d63c8699262c4a296695bc;hb=c50fd68f6f1eec78d0c8b2558093ad6305f862b3;hp=319d3270819c8d0b0d5ed5c288b97193c6f1107a;hpb=da8310a92e91d3682857b8f9affbe19621ea7bae;p=bamtools.git diff --git a/BamMultiReader.h b/BamMultiReader.h index 319d327..bd36d71 100644 --- a/BamMultiReader.h +++ b/BamMultiReader.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 February 2010 (EG) +// Last modified: 20 July 2010 (DB) // --------------------------------------------------------------------------- // Functionality for simultaneously reading multiple BAM files // *************************************************************************** @@ -13,6 +13,9 @@ // C++ includes #include +#include +#include // for pair +#include using namespace std; @@ -22,7 +25,9 @@ using namespace std; namespace BamTools { -enum BamReaderState { START, END, CLOSED }; +// index mapping reference/position pairings to bamreaders and their alignments +typedef multimap, pair > AlignmentIndex; + class BamMultiReader { @@ -38,17 +43,31 @@ class BamMultiReader { int CurrentRefID; int CurrentLeft; + // region under analysis, specified using SetRegion + BamRegion Region; + // ---------------------- // BAM file operations // ---------------------- // close BAM files void Close(void); + + // opens BAM files (and optional BAM index files, if provided) + // @openIndexes - triggers index opening, useful for suppressing + // error messages during merging of files in which we may not have + // indexes. + // @coreMode - setup our first alignments using GetNextAlignmentCore(); + // also useful for merging + bool Open(const vector filenames, bool openIndexes = true, bool coreMode = false, bool useDefaultIndex = true); + // performs random-access jump to reference, position bool Jump(int refID, int position = 0); - // opens BAM files (and optional BAM index files, if provided) - //void Open(const vector filenames, const vector indexFilenames); - void Open(const vector filenames); + + // sets the target region + bool SetRegion(const BamRegion& region); + bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above + // returns file pointers to beginning of alignments bool Rewind(void); @@ -60,6 +79,10 @@ class BamMultiReader { // retrieves next available alignment (returns success/fail) from all files bool GetNextAlignment(BamAlignment&); + // retrieves next available alignment (returns success/fail) from all files + // and populates the support data with information about the alignment + // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT + bool GetNextAlignmentCore(BamAlignment&); // ... should this be private? bool HasOpenReaders(void); @@ -68,40 +91,40 @@ class BamMultiReader { // ---------------------- // returns unified SAM header text for all files - const string GetUnifiedHeaderText(void) const; + const string GetHeaderText(void) const; // returns number of reference sequences const int GetReferenceCount(void) const; // returns vector of reference objects const BamTools::RefVector GetReferenceData(void) const; // returns reference id (used for BamMultiReader::Jump()) for the given reference name - //const int GetReferenceID(const std::string& refName) const; + const int GetReferenceID(const std::string& refName) const; + // validates that we have a congruent set of BAM files that are aligned against the same reference sequences + void ValidateReaders() const; // ---------------------- // BAM index operations // ---------------------- // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai") - bool CreateIndexes(void); + bool CreateIndexes(bool useDefaultIndex = true); //const int GetReferenceID(const string& refName) const; // utility void PrintFilenames(void); - void UpdateAlignments(void); - + void DumpAlignmentIndex(void); + void UpdateAlignments(void); // updates our alignment cache // private implementation private: - // TODO perhaps, for legibility, I should use a struct to wrap them all up - // But this may actually make things more confusing, as I'm only - // operating on them all simultaneously during GetNextAlignment - // calls. - // all these vectors are ordered the same - // readers.at(N) refers to the same reader as alignments.at(N) and readerStates.at(N) - vector readers; // the set of readers which we operate on - vector alignments; // the equivalent set of alignments we use to step through the files - vector readerStates; // states of the various readers - // alignment position? + + // the set of readers and alignments which we operate on, maintained throughout the life of this class + vector > readers; + + // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment + // when a reader reaches EOF, its entry is removed from this index + AlignmentIndex alignments; + vector fileNames; };