// ***************************************************************************
// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 9 September 2011 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
// ctor
BamMultiReaderPrivate::BamMultiReaderPrivate(void)
: m_alignments(0)
- , m_isCoreMode(false)
, m_sortOrder(BamMultiReader::SortedByPosition)
{ }
// makes a virtual, unified header for all the bam files in the multireader
string BamMultiReaderPrivate::GetHeaderText(void) const {
+ // TODO: merge SamHeader objects instead of parsing string data (again)
+
// if only one reader is open
if ( m_readers.size() == 1 ) {
// get next alignment among all files
bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
- m_isCoreMode = false;
- return LoadNextAlignment(al);
+ return PopNextCachedAlignment(al, true);
}
// get next alignment among all files without parsing character data from alignments
bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
- m_isCoreMode = true;
- return LoadNextAlignment(al);
+ return PopNextCachedAlignment(al, false);
}
// ---------------------------------------------------------------------------------------
return true;
}
-bool BamMultiReaderPrivate::LoadNextAlignment(BamAlignment& al) {
-
- // bail out if no more data to process
- if ( !HasAlignmentData() )
- return false;
-
- // "pop" next alignment and reader
- ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
- BamReader* reader = nextReaderAlignment.first;
- BamAlignment* alignment = nextReaderAlignment.second;
-
- // store cached alignment into destination parameter (by copy)
- al = *alignment;
-
- // peek to next alignment & store in cache
- SaveNextAlignment(reader, alignment);
-
- // return success
- return true;
+bool BamMultiReaderPrivate::LoadNextAlignment(BamReader* reader, BamAlignment* alignment) {
+ // lazy building of alignment's char data,
+ // only populated on demand by sorting merger or client call to GetNextAlignment()
+ return reader->GetNextAlignmentCore(*alignment);
}
// locate (& load) index files for BAM readers that don't already have one loaded
if ( m_alignments == 0 ) return false;
}
+ // put all current readers back at beginning (refreshes alignment cache)
+ Rewind();
+
// iterate over filenames
vector<string>::const_iterator filenameIter = filenames.begin();
vector<string>::const_iterator filenameEnd = filenames.end();
if ( filename.empty() ) continue;
// attempt to open BamReader on filename
- BamReader* reader = OpenReader(filename);
- if ( reader == 0 ) continue;
-
- // store reader with new alignment
- m_readers.push_back( make_pair(reader, new BamAlignment) );
+ bool openedOk = false;
+ ReaderAlignment ra = OpenReader(filename, &openedOk);
+ if ( openedOk ) {
+ m_readers.push_back(ra); // store reader/alignment in local list
+ m_alignments->Add(ra); // add reader/alignment to sorting cache
+ }
}
- // validate & rewind any opened readers, also refreshes alignment cache
- if ( !m_readers.empty() ) {
+ // if more than one reader open, check for reference consistency
+ if ( m_readers.size() > 1 )
ValidateReaders();
- Rewind();
- }
// return success
return true;
return result;
}
-BamReader* BamMultiReaderPrivate::OpenReader(const std::string& filename) {
+ReaderAlignment BamMultiReaderPrivate::OpenReader(const string& filename, bool* ok) {
- // create new BamReader
+ // clear status flag
+ *ok = false;
+
+ // create new BamReader & BamAlignment
BamReader* reader = new BamReader;
+ BamAlignment* alignment = new BamAlignment;
// if reader opens OK
if ( reader->Open(filename) ) {
- // attempt to read first alignment (sanity check)
- // if ok, then return BamReader pointer
- BamAlignment al;
- if ( reader->GetNextAlignmentCore(al) )
- return reader;
+ // if first alignment reads OK
+ if ( LoadNextAlignment(reader, alignment) ) {
+ *ok = true;
+ return make_pair(reader, alignment);
+ }
// could not read alignment
else {
// reader could not open
else {
- cerr << "BamMultiReader WARNING: Could not open: "
+ cerr << "BamMultiReader WARNING: Could not open "
<< filename << ", ignoring file" << endl;
}
// if we get here, there was a problem with this BAM file (opening or reading)
// clean up memory allocation & return null pointer
delete reader;
- return 0;
+ delete alignment;
+ return ReaderAlignment();
}
// print associated filenames to stdout
cout << (*filenameIter) << endl;
}
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+ // bail out if no more data to process
+ if ( !HasAlignmentData() )
+ return false;
+
+ // pop next reader/alignment pair
+ ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
+ BamReader* reader = nextReaderAlignment.first;
+ BamAlignment* alignment = nextReaderAlignment.second;
+
+ // store cached alignment into destination parameter (by copy)
+ al = *alignment;
+
+ // set char data if requested
+ if ( needCharData ) {
+ al.BuildCharData();
+ al.Filename = reader->GetFilename();
+ }
+
+ // load next alignment from reader & store in cache
+ SaveNextAlignment(reader, alignment);
+
+ // return success
+ return true;
+}
+
// returns BAM file pointers to beginning of alignment data & resets alignment cache
bool BamMultiReaderPrivate::Rewind(void) {
void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
- // must be in core mode && NOT sorting by read name to call GNACore()
- if ( m_isCoreMode && m_sortOrder != BamMultiReader::SortedByReadName ) {
- if ( reader->GetNextAlignmentCore(*alignment) )
- m_alignments->Add( make_pair(reader, alignment) );
- }
-
- // not in core mode and/or sorting by readname, must call GNA()
- else {
- if ( reader->GetNextAlignment(*alignment) )
- m_alignments->Add( make_pair(reader, alignment) );
- }
+ // if can read alignment from reader, store in cache
+ if ( LoadNextAlignment(reader, alignment) )
+ m_alignments->Add( make_pair(reader, alignment) );
}
// sets the index caching mode on the readers
// attempt to set BamReader's region of interest
if ( !reader->SetRegion(region) ) {
- cerr << "BamMultiReader ERROR: could not jump " << reader->GetFilename() << " to "
+ cerr << "BamMultiReader WARNING: could not jump " << reader->GetFilename() << " to "
<< region.LeftRefID << ":" << region.LeftPosition << ".."
<< region.RightRefID << ":" << region.RightPosition << endl;
}
// clear the cache
m_alignments->Clear();
- // seed cache with fully-populated alignments
- // further updates will fill with full/core-only as requested
- m_isCoreMode = false;
-
// iterate over readers
vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
vector<ReaderAlignment>::iterator readerEnd = m_readers.end();