// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 22 February 2010 (EG)\r
+// Last modified: 20 July 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Functionality for simultaneously reading multiple BAM files\r
// ***************************************************************************\r
\r
// C++ includes\r
#include <string>\r
+#include <map>\r
+#include <utility> // for pair\r
+#include <sstream>\r
\r
using namespace std;\r
\r
\r
namespace BamTools {\r
\r
-enum BamReaderState { START, END, CLOSED };\r
+// index mapping reference/position pairings to bamreaders and their alignments\r
+typedef multimap<pair<int, int>, pair<BamReader*, BamAlignment*> > AlignmentIndex;\r
+\r
\r
class BamMultiReader {\r
\r
int CurrentRefID;\r
int CurrentLeft;\r
\r
+ // region under analysis, specified using SetRegion\r
+ BamRegion Region;\r
+\r
// ----------------------\r
// BAM file operations\r
// ----------------------\r
\r
// close BAM files\r
void Close(void);\r
+\r
+ // opens BAM files (and optional BAM index files, if provided)\r
+ // @openIndexes - triggers index opening, useful for suppressing\r
+ // error messages during merging of files in which we may not have\r
+ // indexes.\r
+ // @coreMode - setup our first alignments using GetNextAlignmentCore();\r
+ // also useful for merging\r
+ bool Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false, bool useDefaultIndex = true);\r
+\r
// performs random-access jump to reference, position\r
bool Jump(int refID, int position = 0);\r
- // opens BAM files (and optional BAM index files, if provided)\r
- //void Open(const vector<std::string&> filenames, const vector<std::string&> indexFilenames);\r
- void Open(const vector<string> filenames, bool openIndexes = true);\r
+\r
+ // sets the target region\r
+ bool SetRegion(const BamRegion& region);\r
+ bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above\r
+\r
// returns file pointers to beginning of alignments\r
bool Rewind(void);\r
\r
\r
// retrieves next available alignment (returns success/fail) from all files\r
bool GetNextAlignment(BamAlignment&);\r
+ // retrieves next available alignment (returns success/fail) from all files\r
+ // and populates the support data with information about the alignment\r
+ // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT\r
+ bool GetNextAlignmentCore(BamAlignment&);\r
// ... should this be private?\r
bool HasOpenReaders(void);\r
\r
// ----------------------\r
\r
// creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")\r
- bool CreateIndexes(void);\r
+ bool CreateIndexes(bool useDefaultIndex = true);\r
\r
//const int GetReferenceID(const string& refName) const;\r
\r
// utility\r
void PrintFilenames(void);\r
- void UpdateAlignments(void);\r
-\r
+ void DumpAlignmentIndex(void);\r
+ void UpdateAlignments(void); // updates our alignment cache\r
\r
// private implementation\r
private:\r
- // TODO perhaps, for legibility, I should use a struct to wrap them all up\r
- // But this may actually make things more confusing, as I'm only\r
- // operating on them all simultaneously during GetNextAlignment\r
- // calls.\r
- // all these vectors are ordered the same\r
- // readers.at(N) refers to the same reader as alignments.at(N) and readerStates.at(N)\r
- vector<BamReader*> readers; // the set of readers which we operate on\r
- vector<BamAlignment*> alignments; // the equivalent set of alignments we use to step through the files\r
- vector<BamReaderState> readerStates; // states of the various readers\r
- // alignment position?\r
+\r
+ // the set of readers and alignments which we operate on, maintained throughout the life of this class\r
+ vector<pair<BamReader*, BamAlignment*> > readers;\r
+\r
+ // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment\r
+ // when a reader reaches EOF, its entry is removed from this index\r
+ AlignmentIndex alignments;\r
+\r
vector<string> fileNames;\r
};\r
\r