// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 19 November 2010 (DB)
+// Last modified: 23 December 2010 (DB)
// ---------------------------------------------------------------------------
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
// Institute.
#include <api/BamMultiReader.h>
#include <api/BGZF.h>
+#include <api/internal/BamMultiReader_p.h>
using namespace BamTools;
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <sstream>
#include <string>
#include <vector>
using namespace std;
// BamMultiReader implementation
// -----------------------------------------------------
-// constructor
BamMultiReader::BamMultiReader(void)
- : CurrentRefID(0)
- , CurrentLeft(0)
+ : d(new Internal::BamMultiReaderPrivate)
{ }
-// destructor
BamMultiReader::~BamMultiReader(void) {
- Close();
+ delete d;
+ d = 0;
}
-// close the BAM files
void BamMultiReader::Close(void) {
-
- // close all BAM readers and clean up pointers
- vector<pair<BamReader*, BamAlignment*> >::iterator readerIter = readers.begin();
- vector<pair<BamReader*, BamAlignment*> >::iterator readerEnd = readers.end();
- for ( ; readerIter != readerEnd; ++readerIter) {
-
- BamReader* reader = (*readerIter).first;
- BamAlignment* alignment = (*readerIter).second;
-
- // close the reader
- if ( reader) reader->Close();
-
- // delete reader pointer
- delete reader;
- reader = 0;
-
- // delete alignment pointer
- delete alignment;
- alignment = 0;
- }
-
- // clear out the container
- readers.clear();
+ d->Close();
}
-// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail
bool BamMultiReader::CreateIndexes(bool useStandardIndex) {
- bool result = true;
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- result &= reader->CreateIndex(useStandardIndex);
- }
- return result;
+ return d->CreateIndexes(useStandardIndex);
}
-// sets the index caching mode on the readers
void BamMultiReader::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) {
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- reader->SetIndexCacheMode(mode);
- }
-}
-
-// for debugging
-void BamMultiReader::DumpAlignmentIndex(void) {
- for (AlignmentIndex::const_iterator it = alignments.begin(); it != alignments.end(); ++it) {
- cerr << it->first.first << ":" << it->first.second << " " << it->second.first->GetFilename() << endl;
- }
+ d->SetIndexCacheMode(mode);
}
-// makes a virtual, unified header for all the bam files in the multireader
const string BamMultiReader::GetHeaderText(void) const {
-
- string mergedHeader = "";
- map<string, bool> readGroups;
-
- // foreach extraction entry (each BAM file)
- for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
-
- BamReader* reader = rs->first;
- string headerText = reader->GetHeaderText();
- if ( headerText.empty() ) continue;
-
- map<string, bool> currentFileReadGroups;
- stringstream header(headerText);
- vector<string> lines;
- string item;
- while (getline(header, item))
- lines.push_back(item);
-
- for (vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it) {
-
- // get next line from header, skip if empty
- string headerLine = *it;
- if ( headerLine.empty() ) { continue; }
-
- // if first file, save HD & SQ entries
- if ( rs == readers.begin() ) {
- if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
- mergedHeader.append(headerLine.c_str());
- mergedHeader.append(1, '\n');
- }
- }
-
- // (for all files) append RG entries if they are unique
- if ( headerLine.find("@RG") == 0 ) {
- stringstream headerLineSs(headerLine);
- string part, readGroupPart, readGroup;
- while(std::getline(headerLineSs, part, '\t')) {
- stringstream partSs(part);
- string subtag;
- std::getline(partSs, subtag, ':');
- if (subtag == "ID") {
- std::getline(partSs, readGroup, ':');
- break;
- }
- }
- if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
- mergedHeader.append(headerLine.c_str() );
- mergedHeader.append(1, '\n');
- readGroups[readGroup] = true;
- currentFileReadGroups[readGroup] = true;
- } else {
- // warn iff we are reading one file and discover duplicated @RG tags in the header
- // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
- if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
- cerr << "WARNING: duplicate @RG tag " << readGroup
- << " entry in header of " << reader->GetFilename() << endl;
- }
- }
- }
- }
- }
-
- // return merged header text
- return mergedHeader;
+ return d->GetHeaderText();
}
-// get next alignment among all files
bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
-
- // bail out if we are at EOF in all files, means no more alignments to process
- if (!HasOpenReaders())
- return false;
-
- // when all alignments have stepped into a new target sequence, update our
- // current reference sequence id
- UpdateReferenceID();
-
- // our lowest alignment and reader will be at the front of our alignment index
- BamAlignment* alignment = alignments.begin()->second.second;
- BamReader* reader = alignments.begin()->second.first;
-
- // now that we have the lowest alignment in the set, save it by copy to our argument
- nextAlignment = BamAlignment(*alignment);
-
- // remove this alignment index entry from our alignment index
- alignments.erase(alignments.begin());
-
- // and add another entry if we can get another alignment from the reader
- if (reader->GetNextAlignment(*alignment)) {
- alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
- make_pair(reader, alignment)));
- } else { // do nothing
- //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
- }
-
- return true;
-
+ return d->GetNextAlignment(nextAlignment);
}
-// get next alignment among all files without parsing character data from alignments
bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
-
- // bail out if we are at EOF in all files, means no more alignments to process
- if (!HasOpenReaders())
- return false;
-
- // when all alignments have stepped into a new target sequence, update our
- // current reference sequence id
- UpdateReferenceID();
-
- // our lowest alignment and reader will be at the front of our alignment index
- BamAlignment* alignment = alignments.begin()->second.second;
- BamReader* reader = alignments.begin()->second.first;
-
- // now that we have the lowest alignment in the set, save it by copy to our argument
- nextAlignment = BamAlignment(*alignment);
- //memcpy(&nextAlignment, alignment, sizeof(BamAlignment));
-
- // remove this alignment index entry from our alignment index
- alignments.erase(alignments.begin());
-
- // and add another entry if we can get another alignment from the reader
- if (reader->GetNextAlignmentCore(*alignment)) {
- alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
- make_pair(reader, alignment)));
- } else { // do nothing
- //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
- }
-
- return true;
-
+ return d->GetNextAlignmentCore(nextAlignment);
}
-// ---------------------------------------------------------------------------------------
-//
-// NB: The following GetReferenceX() functions assume that we have identical
-// references for all BAM files. We enforce this by invoking the above
-// validation function (ValidateReaders) to verify that our reference data
-// is the same across all files on Open, so we will not encounter a situation
-// in which there is a mismatch and we are still live.
-//
-// ---------------------------------------------------------------------------------------
-
-// returns the number of reference sequences
const int BamMultiReader::GetReferenceCount(void) const {
- return readers.front().first->GetReferenceCount();
+ return d->GetReferenceCount();
}
-// returns vector of reference objects
const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
- return readers.front().first->GetReferenceData();
+ return d->GetReferenceData();
}
-// returns refID from reference name
const int BamMultiReader::GetReferenceID(const string& refName) const {
- return readers.front().first->GetReferenceID(refName);
+ return d->GetReferenceID(refName);
}
-// ---------------------------------------------------------------------------------------
-
-// checks if any readers still have alignments
bool BamMultiReader::HasOpenReaders() {
- return alignments.size() > 0;
+ return d->HasOpenReaders();
}
-// returns whether underlying BAM readers ALL have an index loaded
-// this is useful to indicate whether Jump() or SetRegion() are possible
bool BamMultiReader::IsIndexLoaded(void) const {
- bool ok = true;
- vector<pair<BamReader*, BamAlignment*> >::const_iterator readerIter = readers.begin();
- vector<pair<BamReader*, BamAlignment*> >::const_iterator readerEnd = readers.end();
- for ( ; readerIter != readerEnd; ++readerIter ) {
- const BamReader* reader = (*readerIter).first;
- if ( reader ) ok &= reader->IsIndexLoaded();
- }
- return ok;
+ return d->IsIndexLoaded();
}
-// jumps to specified region(refID, leftBound) in BAM files, returns success/fail
bool BamMultiReader::Jump(int refID, int position) {
-
- //if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {
- CurrentRefID = refID;
- CurrentLeft = position;
-
- bool result = true;
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- result &= reader->Jump(refID, position);
- if (!result) {
- cerr << "ERROR: could not jump " << reader->GetFilename() << " to " << refID << ":" << position << endl;
- exit(1);
- }
- }
- if (result) UpdateAlignments();
- return result;
+ return d->Jump(refID, position);
}
-// opens BAM files
-bool BamMultiReader::Open(const vector<string>& filenames, bool openIndexes, bool coreMode, bool preferStandardIndex) {
-
- // for filename in filenames
- fileNames = filenames; // save filenames in our multireader
- for (vector<string>::const_iterator it = filenames.begin(); it != filenames.end(); ++it) {
-
- const string filename = *it;
- BamReader* reader = new BamReader;
-
- bool openedOK = true;
- openedOK = reader->Open(filename, "", openIndexes, preferStandardIndex);
-
- // if file opened ok, check that it can be read
- if ( openedOK ) {
-
- bool fileOK = true;
- BamAlignment* alignment = new BamAlignment;
- fileOK &= ( coreMode ? reader->GetNextAlignmentCore(*alignment) : reader->GetNextAlignment(*alignment) );
-
- if (fileOK) {
- readers.push_back(make_pair(reader, alignment)); // store pointers to our readers for cleanup
- alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
- make_pair(reader, alignment)));
- } else {
- cerr << "WARNING: could not read first alignment in " << filename << ", ignoring file" << endl;
- // if only file available & could not be read, return failure
- if ( filenames.size() == 1 ) return false;
- }
- }
-
- // TODO; any further error handling when openedOK is false ??
- else
- return false;
- }
-
- // files opened ok, at least one alignment could be read,
- // now need to check that all files use same reference data
- ValidateReaders();
- return true;
+bool BamMultiReader::Open(const vector<string>& filenames,
+ bool openIndexes,
+ bool coreMode,
+ bool preferStandardIndex)
+{
+ return d->Open(filenames, openIndexes, coreMode, preferStandardIndex);
}
-void BamMultiReader::PrintFilenames(void) {
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- cout << reader->GetFilename() << endl;
- }
+void BamMultiReader::PrintFilenames(void) const {
+ d->PrintFilenames();
}
-// returns BAM file pointers to beginning of alignment data
-bool BamMultiReader::Rewind(void) {
- bool result = true;
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- result &= reader->Rewind();
- }
- return result;
+bool BamMultiReader::Rewind(void) {
+ return d->Rewind();
}
-bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) {
+bool BamMultiReader::SetRegion(const int& leftRefID,
+ const int& leftPosition,
+ const int& rightRefID,
+ const int& rightPosition)
+{
BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
- return SetRegion(region);
+ return d->SetRegion(region);
}
bool BamMultiReader::SetRegion(const BamRegion& region) {
-
- Region = region;
-
- // NB: While it may make sense to track readers in which we can
- // successfully SetRegion, In practice a failure of SetRegion means "no
- // alignments here." It makes sense to simply accept the failure,
- // UpdateAlignments(), and continue.
-
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- if (!it->first->SetRegion(region)) {
- cerr << "ERROR: could not jump " << it->first->GetFilename() << " to "
- << region.LeftRefID << ":" << region.LeftPosition
- << ".." << region.RightRefID << ":" << region.RightPosition << endl;
- }
- }
-
- UpdateAlignments();
- return true;
-}
-
-void BamMultiReader::UpdateAlignments(void) {
- // Update Alignments
- alignments.clear();
- for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* br = it->first;
- BamAlignment* ba = it->second;
- if (br->GetNextAlignment(*ba)) {
- alignments.insert(make_pair(make_pair(ba->RefID, ba->Position),
- make_pair(br, ba)));
- } else {
- // assume BamReader end of region / EOF
- }
- }
-}
-
-// updates the reference id stored in the BamMultiReader
-// to reflect the current state of the readers
-void BamMultiReader::UpdateReferenceID(void) {
- // the alignments are sorted by position, so the first alignment will always have the lowest reference ID
- if (alignments.begin()->second.second->RefID != CurrentRefID) {
- // get the next reference id
- // while there aren't any readers at the next ref id
- // increment the ref id
- int nextRefID = CurrentRefID;
- while (alignments.begin()->second.second->RefID != nextRefID) {
- ++nextRefID;
- }
- //cerr << "updating reference id from " << CurrentRefID << " to " << nextRefID << endl;
- CurrentRefID = nextRefID;
- }
+ return d->SetRegion(region);
}
-// ValidateReaders checks that all the readers point to BAM files representing
-// alignments against the same set of reference sequences, and that the
-// sequences are identically ordered. If these checks fail the operation of
-// the multireader is undefined, so we force program exit.
-void BamMultiReader::ValidateReaders(void) const {
- int firstRefCount = readers.front().first->GetReferenceCount();
- BamTools::RefVector firstRefData = readers.front().first->GetReferenceData();
- for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
- BamReader* reader = it->first;
- BamTools::RefVector currentRefData = reader->GetReferenceData();
- BamTools::RefVector::const_iterator f = firstRefData.begin();
- BamTools::RefVector::const_iterator c = currentRefData.begin();
- if (reader->GetReferenceCount() != firstRefCount || firstRefData.size() != currentRefData.size()) {
- cerr << "ERROR: mismatched number of references in " << reader->GetFilename()
- << " expected " << firstRefCount
- << " reference sequences but only found " << reader->GetReferenceCount() << endl;
- exit(1);
- }
- // this will be ok; we just checked above that we have identically-sized sets of references
- // here we simply check if they are all, in fact, equal in content
- while (f != firstRefData.end()) {
- if (f->RefName != c->RefName || f->RefLength != c->RefLength) {
- cerr << "ERROR: mismatched references found in " << reader->GetFilename()
- << " expected: " << endl;
- for (BamTools::RefVector::const_iterator a = firstRefData.begin(); a != firstRefData.end(); ++a)
- cerr << a->RefName << " " << a->RefLength << endl;
- cerr << "but found: " << endl;
- for (BamTools::RefVector::const_iterator a = currentRefData.begin(); a != currentRefData.end(); ++a)
- cerr << a->RefName << " " << a->RefLength << endl;
- exit(1);
- }
- ++f; ++c;
- }
- }
+void BamMultiReader::SetSortOrder(const SortOrder& order) {
+ d->SetSortOrder(order);
}
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 19 November 2010 (DB)\r
+// Last modified: 23 December 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Functionality for simultaneously reading multiple BAM files\r
// ***************************************************************************\r
\r
namespace BamTools {\r
\r
-// index mapping reference/position pairings to bamreaders and their alignments\r
-typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex;\r
+namespace Internal {\r
+ class BamMultiReaderPrivate;\r
+} // namespace Internal\r
\r
class API_EXPORT BamMultiReader {\r
\r
// public interface\r
public:\r
\r
- // positioning\r
- int CurrentRefID;\r
- int CurrentLeft;\r
-\r
- // region under analysis, specified using SetRegion\r
- BamRegion Region;\r
-\r
// ----------------------\r
// BAM file operations\r
// ----------------------\r
\r
// close BAM files\r
void Close(void);\r
-\r
// opens BAM files (and optional BAM index files, if provided)\r
// @openIndexes - triggers index opening, useful for suppressing\r
// error messages during merging of files in which we may not have\r
// indexes.\r
// @coreMode - setup our first alignments using GetNextAlignmentCore();\r
// also useful for merging\r
- // @preferStandardIndex - look for standard BAM index ".bai" first. If false, \r
- // will look for BamTools index ".bti". \r
- bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);\r
-\r
+ // @preferStandardIndex - look for standard BAM index ".bai" first. If false,\r
+ // will look for BamTools index ".bti".\r
+ bool Open(const std::vector<std::string>& filenames,\r
+ bool openIndexes = true,\r
+ bool coreMode = false,\r
+ bool preferStandardIndex = false);\r
// returns whether underlying BAM readers ALL have an index loaded\r
// this is useful to indicate whether Jump() or SetRegion() are possible\r
bool IsIndexLoaded(void) const;\r
- \r
// performs random-access jump to reference, position\r
bool Jump(int refID, int position = 0);\r
-\r
+ // list files associated with this multireader\r
+ void PrintFilenames(void) const;\r
// sets the target region\r
bool SetRegion(const BamRegion& region);\r
- bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above\r
-\r
+ bool SetRegion(const int& leftRefID,\r
+ const int& leftBound,\r
+ const int& rightRefID,\r
+ const int& rightBound);\r
// returns file pointers to beginning of alignments\r
bool Rewind(void);\r
\r
// ----------------------\r
// access alignment data\r
// ----------------------\r
- // updates the reference id marker to match the lower limit of our readers\r
- void UpdateReferenceID(void);\r
\r
// retrieves next available alignment (returns success/fail) from all files\r
- bool GetNextAlignment(BamAlignment&);\r
+ bool GetNextAlignment(BamAlignment& alignment);\r
// retrieves next available alignment (returns success/fail) from all files\r
// and populates the support data with information about the alignment\r
// *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT\r
- bool GetNextAlignmentCore(BamAlignment&);\r
+ bool GetNextAlignmentCore(BamAlignment& alignment);\r
// ... should this be private?\r
bool HasOpenReaders(void);\r
+ // set sort order for merging BAM files (i.e. which alignment from the files is 'next'?)\r
+ // default behavior is to sort by position, use this method to handle BAMs sorted by read name\r
+ enum SortOrder { SortedByPosition = 0, SortedByReadName};\r
+ void SetSortOrder(const SortOrder& order);\r
\r
// ----------------------\r
// access auxiliary data\r
const BamTools::RefVector GetReferenceData(void) const;\r
// returns reference id (used for BamMultiReader::Jump()) for the given reference name\r
const int GetReferenceID(const std::string& refName) const;\r
- // validates that we have a congruent set of BAM files that are aligned against the same reference sequences\r
- void ValidateReaders() const;\r
\r
// ----------------------\r
// BAM index operations\r
\r
// creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")\r
bool CreateIndexes(bool useStandardIndex = true);\r
-\r
// sets the index caching mode for the readers\r
void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);\r
\r
- //const int GetReferenceID(const string& refName) const;\r
-\r
- // utility\r
- void PrintFilenames(void);\r
- void DumpAlignmentIndex(void);\r
- void UpdateAlignments(void); // updates our alignment cache\r
-\r
// private implementation\r
private:\r
-\r
- // the set of readers and alignments which we operate on, maintained throughout the life of this class\r
- std::vector<std::pair<BamReader*, BamAlignment*> > readers;\r
-\r
- // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment\r
- // when a reader reaches EOF, its entry is removed from this index\r
- AlignmentIndex alignments;\r
-\r
- std::vector<std::string> fileNames;\r
+ Internal::BamMultiReaderPrivate* d;\r
};\r
\r
} // namespace BamTools\r
BamReader.cpp
BamWriter.cpp
BGZF.cpp
+ internal/BamMultiReader_p.cpp
internal/BamReader_p.cpp
internal/BamStandardIndex_p.cpp
internal/BamToolsIndex_p.cpp
BamReader.cpp
BamWriter.cpp
BGZF.cpp
+ internal/BamMultiReader_p.cpp
internal/BamReader_p.cpp
internal/BamStandardIndex_p.cpp
internal/BamToolsIndex_p.cpp
--- /dev/null
+// ***************************************************************************
+// BamMultiMerger_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides merging functionality for BamMultiReader. At this point, supports
+// sorting results by (refId, position) or by read name.
+// ***************************************************************************
+
+#ifndef BAMMULTIMERGER_P_H
+#define BAMMULTIMERGER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAlignment.h>
+#include <api/BamReader.h>
+#include <map>
+#include <string>
+#include <utility>
+
+namespace BamTools {
+namespace Internal {
+
+typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+
+// generic MultiMerger interface
+class IBamMultiMerger {
+
+ public:
+ IBamMultiMerger(void) { }
+ virtual ~IBamMultiMerger(void) { }
+
+ public:
+ virtual void Add(const ReaderAlignment& value) =0;
+ virtual void Clear(void) =0;
+ virtual const ReaderAlignment& First(void) const =0;
+ virtual const int Size(void) const =0;
+ virtual ReaderAlignment TakeFirst(void) =0;
+};
+
+// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position)
+class PositionMultiMerger : public IBamMultiMerger {
+
+ public:
+ PositionMultiMerger(void) : IBamMultiMerger() { }
+ ~PositionMultiMerger(void) { }
+
+ public:
+ void Add(const ReaderAlignment& value);
+ void Clear(void);
+ const ReaderAlignment& First(void) const;
+ const int Size(void) const;
+ ReaderAlignment TakeFirst(void);
+
+ private:
+ typedef std::pair<int, int> KeyType;
+ typedef std::multimap<KeyType, ReaderAlignment> IndexType;
+ typedef std::pair<KeyType, ReaderAlignment> KeyValueType;
+ typedef IndexType::iterator IndexIterator;
+ typedef IndexType::const_iterator IndexConstIterator;
+
+ IndexType m_data;
+};
+
+// IBamMultiMerger implementation - sorted on BamAlignment: Name
+class ReadNameMultiMerger : public IBamMultiMerger {
+
+ public:
+ ReadNameMultiMerger(void) : IBamMultiMerger() { }
+ ~ReadNameMultiMerger(void) { }
+
+ public:
+ void Add(const ReaderAlignment& value);
+ void Clear(void);
+ const ReaderAlignment& First(void) const;
+ const int Size(void) const;
+ ReaderAlignment TakeFirst(void);
+
+ private:
+ typedef std::string KeyType;
+ typedef std::multimap<KeyType, ReaderAlignment> IndexType;
+ typedef std::pair<KeyType, ReaderAlignment> KeyValueType;
+ typedef IndexType::iterator IndexIterator;
+ typedef IndexType::const_iterator IndexConstIterator;
+
+ IndexType m_data;
+};
+
+// ------------------------------------------
+// PositionMultiMerger implementation
+
+inline void PositionMultiMerger::Add(const ReaderAlignment& value) {
+ const KeyType key = std::make_pair<int, int>( value.second->RefID, value.second->Position );
+ m_data.insert( KeyValueType(key, value) );
+}
+
+inline void PositionMultiMerger::Clear(void) {
+ m_data.clear();
+}
+
+inline const ReaderAlignment& PositionMultiMerger::First(void) const {
+ const KeyValueType& entry = (*m_data.begin());
+ return entry.second;
+}
+
+inline const int PositionMultiMerger::Size(void) const {
+ return m_data.size();
+}
+
+inline ReaderAlignment PositionMultiMerger::TakeFirst(void) {
+ IndexIterator first = m_data.begin();
+ ReaderAlignment next = (*first).second;
+ m_data.erase(first);
+ return next;
+}
+
+// ------------------------------------------
+// ReadNameMultiMerger implementation
+
+inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {
+ const KeyType key = value.second->Name;
+ m_data.insert( KeyValueType(key, value) );
+}
+
+inline void ReadNameMultiMerger::Clear(void) {
+ m_data.clear();
+}
+
+inline const ReaderAlignment& ReadNameMultiMerger::First(void) const {
+ const KeyValueType& entry = (*m_data.begin());
+ return entry.second;
+}
+
+inline const int ReadNameMultiMerger::Size(void) const {
+ return m_data.size();
+}
+
+inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) {
+ IndexIterator first = m_data.begin();
+ ReaderAlignment next = (*first).second;
+ m_data.erase(first);
+ return next;
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIMERGER_P_H
--- /dev/null
+// ***************************************************************************
+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#include <api/BamAlignment.h>
+#include <api/BamMultiReader.h>
+#include <api/internal/BamMultiMerger_p.h>
+#include <api/internal/BamMultiReader_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+using namespace std;
+
+// ctor
+BamMultiReaderPrivate::BamMultiReaderPrivate(void)
+ : m_alignments(0)
+ , m_isCoreMode(false)
+ , m_isSortedByPosition(true)
+{ }
+
+// dtor
+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {
+
+ // close all open BAM readers
+ Close();
+
+ // clean up alignment cache
+ delete m_alignments;
+ m_alignments = 0;
+}
+
+// close the BAM files
+void BamMultiReaderPrivate::Close(void) {
+
+ // clear out alignment cache
+ m_alignments->Clear();
+
+ // iterate over readers
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+
+ // close reader
+ BamReader* reader = (*readerIter).first;
+ BamAlignment* alignment = (*readerIter).second;
+ if ( reader ) reader->Close();
+
+ // delete pointers
+ delete reader;
+ reader = 0;
+ delete alignment;
+ alignment = 0;
+ }
+
+ // clear out readers
+ m_readers.clear();
+
+ // reset flags
+ m_isCoreMode = false;
+ m_isSortedByPosition = true;
+}
+
+// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail
+bool BamMultiReaderPrivate::CreateIndexes(bool useStandardIndex) {
+
+ bool result = true;
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ result &= reader->CreateIndex(useStandardIndex);
+ }
+ return result;
+}
+
+const string BamMultiReaderPrivate::ExtractReadGroup(const string& headerLine) const {
+
+ string readGroup("");
+ stringstream headerLineSs(headerLine);
+ string part;
+
+ // parse @RG header line, looking for the ID: tag
+ while( getline(headerLineSs, part, '\t') ) {
+ stringstream partSs(part);
+ string subtag;
+ getline(partSs, subtag, ':');
+ if ( subtag == "ID" ) {
+ getline(partSs, readGroup, ':');
+ break;
+ }
+ }
+ return readGroup;
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+const string BamMultiReaderPrivate::GetHeaderText(void) const {
+
+ // just spit single header out if only have one reader open
+ if ( m_readers.size() == 1 ) {
+
+
+ vector<ReaderAlignment>::const_iterator readerBegin = m_readers.begin();
+ const ReaderAlignment& entry = (*readerBegin);
+ const BamReader* reader = entry.first;
+ if ( reader == 0 ) return "";
+ return reader->GetHeaderText();
+ }
+
+ string mergedHeader = "";
+ map<string, bool> readGroups;
+
+ // foreach extraction entry (each BAM file)
+ vector<ReaderAlignment>::const_iterator readerBegin = m_readers.begin();
+ vector<ReaderAlignment>::const_iterator readerIter = readerBegin;
+ vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+
+ // get header from reader
+ const BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+ string headerText = reader->GetHeaderText();
+ if ( headerText.empty() ) continue;
+
+ // store header text in lines
+ map<string, bool> currentFileReadGroups;
+ const vector<string> lines = SplitHeaderText(headerText);
+
+ // iterate over header lines
+ vector<string>::const_iterator linesIter = lines.begin();
+ vector<string>::const_iterator linesEnd = lines.end();
+ for ( ; linesIter != linesEnd; ++linesIter ) {
+
+ // get next line from header, skip if empty
+ const string headerLine = (*linesIter);
+ if ( headerLine.empty() ) { continue; }
+
+ // if first file, save HD & SQ entries
+ if ( readerIter == readerBegin ) {
+ if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
+ mergedHeader.append(headerLine.c_str());
+ mergedHeader.append(1, '\n');
+ }
+ }
+
+ // (for all files) append RG entries if they are unique
+ if ( headerLine.find("@RG") == 0 ) {
+
+ // extract read group name from line
+ const string readGroup = ExtractReadGroup(headerLine);
+
+ // make sure not to duplicate @RG entries
+ if ( readGroups.find(readGroup) == readGroups.end() ) {
+ mergedHeader.append(headerLine.c_str() );
+ mergedHeader.append(1, '\n');
+ readGroups[readGroup] = true;
+ currentFileReadGroups[readGroup] = true;
+ } else {
+ // warn iff we are reading one file and discover duplicated @RG tags in the header
+ // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+ if ( currentFileReadGroups.find(readGroup) != currentFileReadGroups.end() ) {
+ cerr << "WARNING: duplicate @RG tag " << readGroup
+ << " entry in header of " << reader->GetFilename() << endl;
+ }
+ }
+ }
+ }
+ }
+
+ // return merged header text
+ return mergedHeader;
+}
+
+// get next alignment among all files
+bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
+ return LoadNextAlignment(al, false);
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
+ return LoadNextAlignment(al, true);
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical
+// references for all BAM files. We enforce this by invoking the above
+// validation function (ValidateReaders) to verify that our reference data
+// is the same across all files on Open, so we will not encounter a situation
+// in which there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+const int BamMultiReaderPrivate::GetReferenceCount(void) const {
+ const ReaderAlignment& firstReader = m_readers.front();
+ const BamReader* reader = firstReader.first;
+ if ( reader ) return reader->GetReferenceCount();
+ else return 0;
+}
+
+// returns vector of reference objects
+const RefVector BamMultiReaderPrivate::GetReferenceData(void) const {
+ const ReaderAlignment& firstReader = m_readers.front();
+ const BamReader* reader = firstReader.first;
+ if ( reader ) return reader->GetReferenceData();
+ else return RefVector();
+}
+
+// returns refID from reference name
+const int BamMultiReaderPrivate::GetReferenceID(const string& refName) const {
+ const ReaderAlignment& firstReader = m_readers.front();
+ const BamReader* reader = firstReader.first;
+ if ( reader ) return reader->GetReferenceID(refName);
+ else return -1; // ERROR case - how to report
+}
+
+// ---------------------------------------------------------------------------------------
+
+// checks if any readers still have alignments
+bool BamMultiReaderPrivate::HasOpenReaders(void) {
+ return ( m_alignments->Size() > 0 );
+}
+
+// returns whether underlying BAM readers ALL have an index loaded
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReaderPrivate::IsIndexLoaded(void) const {
+ bool ok = true;
+ vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const BamReader* reader = (*readerIter).first;
+ if ( reader ) ok &= reader->IsIndexLoaded();
+ }
+ return ok;
+}
+
+// jumps to specified region(refID, leftBound) in BAM files, returns success/fail
+bool BamMultiReaderPrivate::Jump(int refID, int position) {
+
+ bool ok = true;
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+
+ ok &= reader->Jump(refID, position);
+ if ( !ok ) {
+ cerr << "ERROR: could not jump " << reader->GetFilename()
+ << " to " << refID << ":" << position << endl;
+ exit(1);
+ }
+ }
+
+ if (ok) UpdateAlignments();
+ return ok;
+}
+
+bool BamMultiReaderPrivate::LoadNextAlignment(BamAlignment& al, bool coreMode) {
+
+ // bail out if no more data to process
+ if ( !HasOpenReaders() ) return false;
+
+ // "pop" next alignment and reader
+ ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
+ BamReader* reader = nextReaderAlignment.first;
+ BamAlignment* alignment = nextReaderAlignment.second;
+
+ // save it by copy to our argument
+ al = BamAlignment(*alignment);
+
+ // peek to next alignment & store in cache
+ m_isCoreMode = coreMode;
+ SaveNextAlignment(reader,alignment);
+
+ // return success
+ return true;
+}
+
+// opens BAM files
+bool BamMultiReaderPrivate::Open(const vector<string>& filenames,
+ bool openIndexes,
+ bool coreMode,
+ bool preferStandardIndex)
+{
+ // store core mode flag
+ m_isCoreMode = coreMode;
+
+ // first clear out any prior alignment cache prior data
+ if ( m_alignments ) {
+ m_alignments->Clear();
+ delete m_alignments;
+ m_alignments = 0;
+ }
+
+ // create alignment cache based on sorting mode
+ if ( m_isSortedByPosition )
+ m_alignments = new PositionMultiMerger;
+ else
+ m_alignments = new ReadNameMultiMerger;
+
+ // iterate over filenames
+ vector<string>::const_iterator filenameIter = filenames.begin();
+ vector<string>::const_iterator filenameEnd = filenames.end();
+ for ( ; filenameIter != filenameEnd; ++filenameIter ) {
+ const string filename = (*filenameIter);
+
+ bool openedOk = true;
+ BamReader* reader = new BamReader;
+ openedOk = reader->Open(filename, "", openIndexes, preferStandardIndex);
+
+ // if file opened ok
+ if ( openedOk ) {
+
+ // try to read first alignment
+ bool fileOk = true;
+ BamAlignment* alignment = new BamAlignment;
+ fileOk &= ( coreMode ? reader->GetNextAlignmentCore(*alignment)
+ : reader->GetNextAlignment(*alignment) );
+
+ if ( fileOk ) {
+
+ m_readers.push_back( make_pair(reader, alignment) );
+ m_alignments->Add( make_pair(reader, alignment) );
+
+ } else {
+ cerr << "WARNING: could not read first alignment in "
+ << filename << ", ignoring file" << endl;
+
+ // if only file available & could not be read, return failure
+ if ( filenames.size() == 1 )
+ return false;
+ }
+ }
+
+ // TODO; any further error handling when openedOK is false ??
+ else return false;
+ }
+
+ // files opened ok, at least one alignment could be read,
+ // now need to check that all files use same reference data
+ ValidateReaders();
+ return true;
+}
+
+// print associated filenames to stdout
+void BamMultiReaderPrivate::PrintFilenames(void) const {
+
+ vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+ cout << reader->GetFilename() << endl;
+ }
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReaderPrivate::Rewind(void) {
+
+ bool result = true;
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+ result &= reader->Rewind();
+ }
+ return result;
+}
+
+void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
+
+ // must be in core mode && sorting by position to call GNACore()
+ if ( m_isCoreMode && m_isSortedByPosition ) {
+ if ( reader->GetNextAlignmentCore(*alignment) )
+ m_alignments->Add( make_pair(reader, alignment) );
+ }
+
+ // not in core mode and/or sorting by readname, must call GNA()
+ else {
+ if ( reader->GetNextAlignment(*alignment) )
+ m_alignments->Add( make_pair(reader, alignment) );
+ }
+}
+
+// sets the index caching mode on the readers
+void BamMultiReaderPrivate::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) {
+
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+ reader->SetIndexCacheMode(mode);
+ }
+}
+
+bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) {
+
+ // NB: While it may make sense to track readers in which we can
+ // successfully SetRegion, In practice a failure of SetRegion means "no
+ // alignments here." It makes sense to simply accept the failure,
+ // UpdateAlignments(), and continue.
+
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue;
+ if ( !reader->SetRegion(region) ) {
+ cerr << "ERROR: could not jump " << reader->GetFilename() << " to "
+ << region.LeftRefID << ":" << region.LeftPosition << ".."
+ << region.RightRefID << ":" << region.RightPosition << endl;
+ }
+ }
+
+ UpdateAlignments();
+ return true;
+}
+
+void BamMultiReaderPrivate::SetSortOrder(const BamMultiReader::SortOrder& order) {
+
+ const BamMultiReader::SortOrder oldSortOrder = ( m_isSortedByPosition ? BamMultiReader::SortedByPosition
+ : BamMultiReader::SortedByReadName ) ;
+ // skip if no change needed
+ if ( oldSortOrder == order ) return;
+
+ // create new alignment cache
+ IBamMultiMerger* newAlignmentCache(0);
+ if ( order == BamMultiReader::SortedByPosition ) {
+ newAlignmentCache = new PositionMultiMerger;
+ m_isSortedByPosition = true;
+ }
+ else {
+ newAlignmentCache = new ReadNameMultiMerger;
+ m_isSortedByPosition = false;
+ }
+
+ // copy old cache contents to new cache
+ while ( m_alignments->Size() > 0 ) {
+ ReaderAlignment value = m_alignments->TakeFirst();
+ newAlignmentCache->Add(value);
+ }
+
+ // remove old cache structure & point to new cache
+ delete m_alignments;
+ m_alignments = newAlignmentCache;
+}
+
+// updates our alignment cache
+void BamMultiReaderPrivate::UpdateAlignments(void) {
+
+ // clear the cache
+ m_alignments->Clear();
+
+ // iterate over readers
+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ BamReader* reader = (*readerIter).first;
+ BamAlignment* alignment = (*readerIter).second;
+ if ( reader == 0 ) continue;
+ SaveNextAlignment(reader, alignment);
+ }
+}
+
+// splits the entire header into a list of strings
+const vector<string> BamMultiReaderPrivate::SplitHeaderText(const string& headerText) const {
+ stringstream header(headerText);
+ vector<string> lines;
+ string item;
+ while ( getline(header, item) )
+ lines.push_back(item);
+ return lines;
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered. If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+void BamMultiReaderPrivate::ValidateReaders(void) const {
+
+ // retrieve first reader data
+ const BamReader* firstReader = m_readers.front().first;
+ if ( firstReader == 0 ) return; // signal error?
+ const RefVector firstReaderRefData = firstReader->GetReferenceData();
+ const int firstReaderRefCount = firstReader->GetReferenceCount();
+ const int firstReaderRefSize = firstReaderRefData.size();
+
+ // iterate over all readers
+ vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();
+ vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+
+ // get current reader data
+ BamReader* reader = (*readerIter).first;
+ if ( reader == 0 ) continue; // error?
+ const RefVector currentReaderRefData = reader->GetReferenceData();
+ const int currentReaderRefCount = reader->GetReferenceCount();
+ const int currentReaderRefSize = currentReaderRefData.size();
+
+ // init container iterators
+ RefVector::const_iterator firstRefIter = firstReaderRefData.begin();
+ RefVector::const_iterator firstRefEnd = firstReaderRefData.end();
+ RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
+
+ // compare reference counts from BamReader ( & container size, in case of BR error)
+ if ( (currentReaderRefCount != firstReaderRefCount) ||
+ (firstReaderRefSize != currentReaderRefSize) )
+ {
+ cerr << "ERROR: mismatched number of references in " << reader->GetFilename()
+ << " expected " << firstReaderRefCount
+ << " reference sequences but only found " << currentReaderRefCount << endl;
+ exit(1);
+ }
+
+ // this will be ok; we just checked above that we have identically-sized sets of references
+ // here we simply check if they are all, in fact, equal in content
+ while ( firstRefIter != firstRefEnd ) {
+
+ const RefData& firstRef = (*firstRefIter);
+ const RefData& currentRef = (*currentRefIter);
+
+ // compare reference name & length
+ if ( (firstRef.RefName != currentRef.RefName) ||
+ (firstRef.RefLength != currentRef.RefLength) )
+ {
+ cerr << "ERROR: mismatched references found in " << reader->GetFilename()
+ << " expected: " << endl;
+ RefVector::const_iterator refIter = firstReaderRefData.begin();
+ RefVector::const_iterator refEnd = firstReaderRefData.end();
+ for ( ; refIter != refEnd; ++refIter ) {
+ const RefData& entry = (*refIter);
+ cerr << entry.RefName << " " << entry.RefLength << endl;
+ }
+
+ cerr << "but found: " << endl;
+ refIter = currentReaderRefData.begin();
+ refEnd = currentReaderRefData.end();
+ for ( ; refIter != refEnd; ++refIter ) {
+ const RefData& entry = (*refIter);
+ cerr << entry.RefName << " " << entry.RefLength << endl;
+ }
+
+ exit(1);
+ }
+
+ // update iterators
+ ++firstRefIter;
+ ++currentRefIter;
+ }
+ }
+}
--- /dev/null
+// ***************************************************************************
+// BamMultiReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#ifndef BAMMULTIREADER_P_H
+#define BAMMULTIREADER_P_H
+
+// -------------
+// W A R N I N G
+// -------------
+//
+// This file is not part of the BamTools API. It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamMultiReader.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class IBamMultiMerger;
+
+class BamMultiReaderPrivate {
+
+ // constructor / destructor
+ public:
+ BamMultiReaderPrivate(void);
+ ~BamMultiReaderPrivate(void);
+
+ // public interface
+ public:
+
+ // file operations
+ void Close(void);
+ bool Open(const std::vector<std::string>& filenames,
+ bool openIndexes = true,
+ bool coreMode = false,
+ bool preferStandardIndex = false);
+ bool IsIndexLoaded(void) const;
+ bool Jump(int refID, int position = 0);
+ void PrintFilenames(void) const;
+ bool SetRegion(const BamRegion& region);
+ bool Rewind(void);
+
+ // access alignment data
+ bool GetNextAlignment(BamAlignment& al);
+ bool GetNextAlignmentCore(BamAlignment& al);
+ bool HasOpenReaders(void);
+ void SetSortOrder(const BamMultiReader::SortOrder& order);
+
+ // access auxiliary data
+ const std::string GetHeaderText(void) const;
+ const int GetReferenceCount(void) const;
+ const BamTools::RefVector GetReferenceData(void) const;
+ const int GetReferenceID(const std::string& refName) const;
+
+ // BAM index operations
+ bool CreateIndexes(bool useStandardIndex = true);
+ void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
+
+ // internal methods
+ private:
+ const std::string ExtractReadGroup(const std::string& headerLine) const;
+ bool LoadNextAlignment(BamAlignment& al, bool coreMode);
+ void SaveNextAlignment(BamTools::BamReader* reader, BamTools::BamAlignment* alignment);
+ const std::vector<std::string> SplitHeaderText(const std::string& headerText) const;
+ // updates our alignment cache
+ void UpdateAlignments(void);
+ // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
+ void ValidateReaders(void) const;
+
+ // data members
+ public:
+ typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+ std::vector<ReaderAlignment> m_readers;
+
+ IBamMultiMerger* m_alignments;
+ bool m_isCoreMode;
+ bool m_isSortedByPosition;
+};
+
+} // namesapce Internal
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_P_H