X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Finternal%2FBamMultiReader_p.cpp;h=583085c7cd11782b68337f483113cd8ef702110a;hb=8c80d760637f8df39262683cd2570f0589423d36;hp=01cdf8f9d2c38a8a5d40946c3897424bfc045e63;hpb=83255b02911cc3e267386e20bcb8747478b4c239;p=bamtools.git diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp index 01cdf8f..583085c 100644 --- a/src/api/internal/BamMultiReader_p.cpp +++ b/src/api/internal/BamMultiReader_p.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 23 December 2010 (DB) +// Last modified: 21 March 2011 (DB) // --------------------------------------------------------------------------- // Functionality for simultaneously reading multiple BAM files // ************************************************************************* @@ -26,7 +26,7 @@ using namespace std; BamMultiReaderPrivate::BamMultiReaderPrivate(void) : m_alignments(0) , m_isCoreMode(false) - , m_isSortedByPosition(true) + , m_sortOrder(BamMultiReader::SortedByPosition) { } // dtor @@ -40,50 +40,96 @@ BamMultiReaderPrivate::~BamMultiReaderPrivate(void) { m_alignments = 0; } -// close the BAM files +// close all BAM files void BamMultiReaderPrivate::Close(void) { + CloseFiles( Filenames() ); +} - // clear out alignment cache - m_alignments->Clear(); - - // iterate over readers - vector::iterator readerIter = m_readers.begin(); - vector::iterator readerEnd = m_readers.end(); - for ( ; readerIter != readerEnd; ++readerIter ) { +// close requested BAM file +void BamMultiReaderPrivate::CloseFile(const string& filename) { + vector filenames(1, filename); + CloseFiles(filenames); +} - // close reader - BamReader* reader = (*readerIter).first; - BamAlignment* alignment = (*readerIter).second; - if ( reader ) reader->Close(); +// close requested BAM files +void BamMultiReaderPrivate::CloseFiles(const vector& filenames) { - // delete pointers - delete reader; - reader = 0; - delete alignment; - alignment = 0; + // iterate over filenames + vector::const_iterator filesIter = filenames.begin(); + vector::const_iterator filesEnd = filenames.end(); + for ( ; filesIter != filesEnd; ++filesIter ) { + const string& filename = (*filesIter); + if ( filename.empty() ) continue; + + // iterate over readers + vector::iterator readerIter = m_readers.begin(); + vector::iterator readerEnd = m_readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + BamReader* reader = (*readerIter).first; + if ( reader == 0 ) continue; + + // if reader matches requested filename + if ( reader->GetFilename() == filename ) { + + // remove reader/alignment from alignment cache + m_alignments->Remove(reader); + + // close & delete reader + reader->Close(); + delete reader; + reader = 0; + + // delete reader's alignment entry + BamAlignment* alignment = (*readerIter).second; + delete alignment; + alignment = 0; + + // remove reader from container + m_readers.erase(readerIter); + + // on match, just go on to next filename + // (no need to keep looking and iterator is invalid now anyway) + break; + } + } } - // clear out readers - m_readers.clear(); - - // reset flags - m_isCoreMode = false; - m_isSortedByPosition = true; + // make sure alignment cache is cleared if all readers are now closed + if ( m_readers.empty() && m_alignments != 0 ) + m_alignments->Clear(); } -// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail -bool BamMultiReaderPrivate::CreateIndexes(bool useStandardIndex) { +// creates index files for BAM files that don't have them +bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { bool result = true; + + // iterate over readers vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; - result &= reader->CreateIndex(useStandardIndex); + if ( reader == 0 ) continue; + + // if reader doesn't have an index, create one + if ( !reader->HasIndex() ) + result &= reader->CreateIndex(type); } + return result; } +IBamMultiMerger* BamMultiReaderPrivate::CreateMergerForCurrentSortOrder(void) const { + switch ( m_sortOrder ) { + case ( BamMultiReader::SortedByPosition ) : return new PositionMultiMerger; + case ( BamMultiReader::SortedByReadName ) : return new ReadNameMultiMerger; + case ( BamMultiReader::Unsorted ) : return new UnsortedMultiMerger; + default : + cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; + return 0; + } +} + const string BamMultiReaderPrivate::ExtractReadGroup(const string& headerLine) const { string readGroup(""); @@ -103,21 +149,50 @@ const string BamMultiReaderPrivate::ExtractReadGroup(const string& headerLine) c return readGroup; } +const vector BamMultiReaderPrivate::Filenames(void) const { + + // init filename container + vector filenames; + filenames.reserve( m_readers.size() ); + + // iterate over readers + vector::const_iterator readerIter = m_readers.begin(); + vector::const_iterator readerEnd = m_readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + const BamReader* reader = (*readerIter).first; + if ( reader == 0 ) continue; + + // store filename if not empty + const string filename = reader->GetFilename(); + if ( !filename.empty() ) + filenames.push_back( reader->GetFilename() ); + } + + // return result + return filenames; +} + +SamHeader BamMultiReaderPrivate::GetHeader(void) const { + string text = GetHeaderText(); + return SamHeader(text); +} + // makes a virtual, unified header for all the bam files in the multireader -const string BamMultiReaderPrivate::GetHeaderText(void) const { +string BamMultiReaderPrivate::GetHeaderText(void) const { - // just spit single header out if only have one reader open + // if only one reader is open if ( m_readers.size() == 1 ) { + // just return reader's header text + const ReaderAlignment& ra = m_readers.front(); + const BamReader* reader = ra.first; + if ( reader ) return reader->GetHeaderText(); - vector::const_iterator readerBegin = m_readers.begin(); - const ReaderAlignment& entry = (*readerBegin); - const BamReader* reader = entry.first; - if ( reader == 0 ) return ""; - return reader->GetHeaderText(); + // invalid reader + return string(); } - string mergedHeader = ""; + string mergedHeader(""); map readGroups; // foreach extraction entry (each BAM file) @@ -125,10 +200,10 @@ const string BamMultiReaderPrivate::GetHeaderText(void) const { vector::const_iterator readerIter = readerBegin; vector::const_iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { - - // get header from reader const BamReader* reader = (*readerIter).first; if ( reader == 0 ) continue; + + // get header from reader string headerText = reader->GetHeaderText(); if ( headerText.empty() ) continue; @@ -143,9 +218,10 @@ const string BamMultiReaderPrivate::GetHeaderText(void) const { // get next line from header, skip if empty const string headerLine = (*linesIter); - if ( headerLine.empty() ) { continue; } + if ( headerLine.empty() ) continue; // if first file, save HD & SQ entries + // TODO: what if first file has empty header, should just check for empty 'mergedHeader' instead ? if ( readerIter == readerBegin ) { if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) { mergedHeader.append(headerLine.c_str()); @@ -169,8 +245,8 @@ const string BamMultiReaderPrivate::GetHeaderText(void) const { // warn iff we are reading one file and discover duplicated @RG tags in the header // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags if ( currentFileReadGroups.find(readGroup) != currentFileReadGroups.end() ) { - cerr << "WARNING: duplicate @RG tag " << readGroup - << " entry in header of " << reader->GetFilename() << endl; + cerr << "BamMultiReader WARNING: duplicate @RG tag " << readGroup + << " entry in header of " << reader->GetFilename() << endl; } } } @@ -183,207 +259,351 @@ const string BamMultiReaderPrivate::GetHeaderText(void) const { // get next alignment among all files bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) { - return LoadNextAlignment(al, false); + m_isCoreMode = false; + return LoadNextAlignment(al); } // get next alignment among all files without parsing character data from alignments bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) { - return LoadNextAlignment(al, true); + m_isCoreMode = true; + return LoadNextAlignment(al); } // --------------------------------------------------------------------------------------- // // NB: The following GetReferenceX() functions assume that we have identical -// references for all BAM files. We enforce this by invoking the above -// validation function (ValidateReaders) to verify that our reference data -// is the same across all files on Open, so we will not encounter a situation -// in which there is a mismatch and we are still live. +// references for all BAM files. We enforce this by invoking the +// ValidateReaders() method to verify that our reference data is the same +// across all files on Open - so we will not encounter a situation in which +// there is a mismatch and we are still live. // // --------------------------------------------------------------------------------------- // returns the number of reference sequences -const int BamMultiReaderPrivate::GetReferenceCount(void) const { - const ReaderAlignment& firstReader = m_readers.front(); - const BamReader* reader = firstReader.first; +int BamMultiReaderPrivate::GetReferenceCount(void) const { + + // handle empty multireader + if ( m_readers.empty() ) + return 0; + + // return reference count from first reader + const ReaderAlignment& ra = m_readers.front(); + const BamReader* reader = ra.first; if ( reader ) return reader->GetReferenceCount(); - else return 0; + + // invalid reader + return 0; } // returns vector of reference objects const RefVector BamMultiReaderPrivate::GetReferenceData(void) const { - const ReaderAlignment& firstReader = m_readers.front(); - const BamReader* reader = firstReader.first; + + // handle empty multireader + if ( m_readers.empty() ) + return RefVector(); + + // return reference data from first BamReader + const ReaderAlignment& ra = m_readers.front(); + const BamReader* reader = ra.first; if ( reader ) return reader->GetReferenceData(); - else return RefVector(); + + // invalid reader + return RefVector(); } // returns refID from reference name -const int BamMultiReaderPrivate::GetReferenceID(const string& refName) const { - const ReaderAlignment& firstReader = m_readers.front(); - const BamReader* reader = firstReader.first; +int BamMultiReaderPrivate::GetReferenceID(const string& refName) const { + + // handle empty multireader + if ( m_readers.empty() ) + return -1; + + // return reference ID from first BamReader + const ReaderAlignment& ra = m_readers.front(); + const BamReader* reader = ra.first; if ( reader ) return reader->GetReferenceID(refName); - else return -1; // ERROR case - how to report -} + // invalid reader + return -1; +} // --------------------------------------------------------------------------------------- // checks if any readers still have alignments -bool BamMultiReaderPrivate::HasOpenReaders(void) { - return ( m_alignments->Size() > 0 ); +bool BamMultiReaderPrivate::HasAlignmentData(void) const { + if ( m_alignments == 0 ) + return false; + return !m_alignments->IsEmpty(); } -// returns whether underlying BAM readers ALL have an index loaded +// returns true if all readers have index data available // this is useful to indicate whether Jump() or SetRegion() are possible -bool BamMultiReaderPrivate::IsIndexLoaded(void) const { - bool ok = true; +bool BamMultiReaderPrivate::HasIndexes(void) const { + + // handle empty multireader + if ( m_readers.empty() ) + return false; + + bool result = true; + + // iterate over readers + vector::const_iterator readerIter = m_readers.begin(); + vector::const_iterator readerEnd = m_readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + const BamReader* reader = (*readerIter).first; + if ( reader == 0 ) continue; + + // see if current reader has index data + result &= reader->HasIndex(); + } + + return result; +} + +// returns true if multireader has open readers +bool BamMultiReaderPrivate::HasOpenReaders(void) { + + // iterate over readers vector::const_iterator readerIter = m_readers.begin(); vector::const_iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { const BamReader* reader = (*readerIter).first; - if ( reader ) ok &= reader->IsIndexLoaded(); + if ( reader == 0 ) continue; + + // return true whenever an open reader is found + if ( reader->IsOpen() ) return true; } - return ok; + + // no readers open + return false; } -// jumps to specified region(refID, leftBound) in BAM files, returns success/fail +// performs random-access jump using (refID, position) as a left-bound bool BamMultiReaderPrivate::Jump(int refID, int position) { - bool ok = true; + // NB: While it may make sense to track readers in which we can + // successfully Jump, in practice a failure of Jump means "no + // alignments here." It makes sense to simply accept the failure, + // UpdateAlignments(), and continue. + + // iterate over readers vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; if ( reader == 0 ) continue; - ok &= reader->Jump(refID, position); - if ( !ok ) { - cerr << "ERROR: could not jump " << reader->GetFilename() + // attempt jump() on each + if ( !reader->Jump(refID, position) ) { + cerr << "BamMultiReader ERROR: could not jump " << reader->GetFilename() << " to " << refID << ":" << position << endl; - exit(1); } } - if (ok) UpdateAlignments(); - return ok; + // update alignment cache & return success + UpdateAlignmentCache(); + return true; } -bool BamMultiReaderPrivate::LoadNextAlignment(BamAlignment& al, bool coreMode) { +bool BamMultiReaderPrivate::LoadNextAlignment(BamAlignment& al) { // bail out if no more data to process - if ( !HasOpenReaders() ) return false; + if ( !HasAlignmentData() ) + return false; // "pop" next alignment and reader ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst(); - BamReader* reader = nextReaderAlignment.first; + BamReader* reader = nextReaderAlignment.first; BamAlignment* alignment = nextReaderAlignment.second; - // save it by copy to our argument - al = BamAlignment(*alignment); + // store cached alignment into destination parameter (by copy) + al = *alignment; // peek to next alignment & store in cache - m_isCoreMode = coreMode; - SaveNextAlignment(reader,alignment); + SaveNextAlignment(reader, alignment); // return success return true; } -// opens BAM files -bool BamMultiReaderPrivate::Open(const vector& filenames, - bool openIndexes, - bool coreMode, - bool preferStandardIndex) -{ - // store core mode flag - m_isCoreMode = coreMode; - - // first clear out any prior alignment cache prior data - if ( m_alignments ) { - m_alignments->Clear(); - delete m_alignments; - m_alignments = 0; +// locate (& load) index files for BAM readers that don't already have one loaded +bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType) { + + bool result = true; + + // iterate over readers + vector::iterator readerIter = m_readers.begin(); + vector::iterator readerEnd = m_readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + BamReader* reader = (*readerIter).first; + if ( reader == 0 ) continue; + + // if reader has no index, try to locate one + if ( !reader->HasIndex() ) + result &= reader->LocateIndex(preferredType); } - // create alignment cache based on sorting mode - if ( m_isSortedByPosition ) - m_alignments = new PositionMultiMerger; - else - m_alignments = new ReadNameMultiMerger; + return result; +} + +// opens BAM files +bool BamMultiReaderPrivate::Open(const vector& filenames) { + + // create alignment cache if neccessary + if ( m_alignments == 0 ) { + m_alignments = CreateMergerForCurrentSortOrder(); + if ( m_alignments == 0 ) return false; + } // iterate over filenames vector::const_iterator filenameIter = filenames.begin(); vector::const_iterator filenameEnd = filenames.end(); for ( ; filenameIter != filenameEnd; ++filenameIter ) { - const string filename = (*filenameIter); + const string& filename = (*filenameIter); + if ( filename.empty() ) continue; + + // attempt to open BamReader on filename + BamReader* reader = OpenReader(filename); + if ( reader == 0 ) continue; + + // store reader with new alignment + m_readers.push_back( make_pair(reader, new BamAlignment) ); + } + + // validate & rewind any opened readers, also refreshes alignment cache + if ( !m_readers.empty() ) { + ValidateReaders(); + Rewind(); + } + + // return success + return true; +} - bool openedOk = true; - BamReader* reader = new BamReader; - openedOk = reader->Open(filename, "", openIndexes, preferStandardIndex); +bool BamMultiReaderPrivate::OpenFile(const std::string& filename) { + vector filenames(1, filename); + return Open(filenames); +} - // if file opened ok - if ( openedOk ) { +bool BamMultiReaderPrivate::OpenIndexes(const vector& indexFilenames) { - // try to read first alignment - bool fileOk = true; - BamAlignment* alignment = new BamAlignment; - fileOk &= ( coreMode ? reader->GetNextAlignmentCore(*alignment) - : reader->GetNextAlignment(*alignment) ); + // TODO: This needs to be cleaner - should not assume same order. + // And either way, shouldn't start at first reader. Should start at + // first reader without an index? - if ( fileOk ) { + // make sure same number of index filenames as readers + if ( m_readers.size() != indexFilenames.size() || !indexFilenames.empty() ) + return false; - m_readers.push_back( make_pair(reader, alignment) ); - m_alignments->Add( make_pair(reader, alignment) ); + // init result flag + bool result = true; - } else { - cerr << "WARNING: could not read first alignment in " - << filename << ", ignoring file" << endl; + // iterate over BamReaders + vector::const_iterator indexFilenameIter = indexFilenames.begin(); + vector::const_iterator indexFilenameEnd = indexFilenames.end(); + vector::iterator readerIter = m_readers.begin(); + vector::iterator readerEnd = m_readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + BamReader* reader = (*readerIter).first; - // if only file available & could not be read, return failure - if ( filenames.size() == 1 ) - return false; - } + // open index filename on reader + if ( reader ) { + const string& indexFilename = (*indexFilenameIter); + result &= reader->OpenIndex(indexFilename); } - // TODO; any further error handling when openedOK is false ?? - else return false; + // increment filename iterator, skip if no more index files to open + if ( ++indexFilenameIter == indexFilenameEnd ) + break; } - // files opened ok, at least one alignment could be read, - // now need to check that all files use same reference data - ValidateReaders(); - return true; + // TODO: validation ?? + + // return success/fail + return result; +} + +BamReader* BamMultiReaderPrivate::OpenReader(const std::string& filename) { + + // create new BamReader + BamReader* reader = new BamReader; + + // if reader opens OK + if ( reader->Open(filename) ) { + + // attempt to read first alignment (sanity check) + // if ok, then return BamReader pointer + BamAlignment al; + if ( reader->GetNextAlignmentCore(al) ) + return reader; + + // could not read alignment + else { + cerr << "BamMultiReader WARNING: Could not read first alignment from " + << filename << ", ignoring file" << endl; + } + } + + // reader could not open + else { + cerr << "BamMultiReader WARNING: Could not open: " + << filename << ", ignoring file" << endl; + } + + // if we get here, there was a problem with this BAM file (opening or reading) + // clean up memory allocation & return null pointer + delete reader; + return 0; } // print associated filenames to stdout void BamMultiReaderPrivate::PrintFilenames(void) const { + const vector& filenames = Filenames(); + vector::const_iterator filenameIter = filenames.begin(); + vector::const_iterator filenameEnd = filenames.end(); + for ( ; filenameIter != filenameEnd; ++filenameIter ) + cout << (*filenameIter) << endl; +} - vector::const_iterator readerIter = m_readers.begin(); - vector::const_iterator readerEnd = m_readers.end(); - for ( ; readerIter != readerEnd; ++readerIter ) { - const BamReader* reader = (*readerIter).first; - if ( reader == 0 ) continue; - cout << reader->GetFilename() << endl; +// returns BAM file pointers to beginning of alignment data & resets alignment cache +bool BamMultiReaderPrivate::Rewind(void) { + + // clear out alignment cache + m_alignments->Clear(); + + // attempt to rewind files + if ( !RewindReaders() ) { + cerr << "BamMultiReader ERROR: could not rewind file(s) successfully"; + return false; } + + // reset cache & return success + UpdateAlignmentCache(); + return true; } // returns BAM file pointers to beginning of alignment data -bool BamMultiReaderPrivate::Rewind(void) { +bool BamMultiReaderPrivate::RewindReaders(void) { bool result = true; + + // iterate over readers vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; if ( reader == 0 ) continue; + + // attempt rewind on BamReader result &= reader->Rewind(); } + return result; } void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) { - // must be in core mode && sorting by position to call GNACore() - if ( m_isCoreMode && m_isSortedByPosition ) { + // must be in core mode && NOT sorting by read name to call GNACore() + if ( m_isCoreMode && m_sortOrder != BamMultiReader::SortedByReadName ) { if ( reader->GetNextAlignmentCore(*alignment) ) m_alignments->Add( make_pair(reader, alignment) ); } @@ -396,13 +616,16 @@ void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* a } // sets the index caching mode on the readers -void BamMultiReaderPrivate::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) { +void BamMultiReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode mode) { + // iterate over readers vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; if ( reader == 0 ) continue; + + // set reader's index cache mode reader->SetIndexCacheMode(mode); } } @@ -414,43 +637,41 @@ bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) { // alignments here." It makes sense to simply accept the failure, // UpdateAlignments(), and continue. + // iterate over alignments vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; if ( reader == 0 ) continue; + + // attempt to set BamReader's region of interest if ( !reader->SetRegion(region) ) { - cerr << "ERROR: could not jump " << reader->GetFilename() << " to " + cerr << "BamMultiReader ERROR: could not jump " << reader->GetFilename() << " to " << region.LeftRefID << ":" << region.LeftPosition << ".." << region.RightRefID << ":" << region.RightPosition << endl; } } - UpdateAlignments(); + // update alignment cache & return success + UpdateAlignmentCache(); return true; } void BamMultiReaderPrivate::SetSortOrder(const BamMultiReader::SortOrder& order) { - const BamMultiReader::SortOrder oldSortOrder = ( m_isSortedByPosition ? BamMultiReader::SortedByPosition - : BamMultiReader::SortedByReadName ) ; // skip if no change needed - if ( oldSortOrder == order ) return; + if ( m_sortOrder == order ) return; - // create new alignment cache - IBamMultiMerger* newAlignmentCache(0); - if ( order == BamMultiReader::SortedByPosition ) { - newAlignmentCache = new PositionMultiMerger; - m_isSortedByPosition = true; - } - else { - newAlignmentCache = new ReadNameMultiMerger; - m_isSortedByPosition = false; - } + // set new sort order + m_sortOrder = order; + + // create new alignment cache based on sort order + IBamMultiMerger* newAlignmentCache = CreateMergerForCurrentSortOrder(); + if ( newAlignmentCache == 0 ) return; // print error? // copy old cache contents to new cache while ( m_alignments->Size() > 0 ) { - ReaderAlignment value = m_alignments->TakeFirst(); + ReaderAlignment value = m_alignments->TakeFirst(); // retrieves & 'pops' newAlignmentCache->Add(value); } @@ -459,33 +680,44 @@ void BamMultiReaderPrivate::SetSortOrder(const BamMultiReader::SortOrder& order) m_alignments = newAlignmentCache; } +// splits the entire header into a list of strings +const vector BamMultiReaderPrivate::SplitHeaderText(const string& headerText) const { + + stringstream header(headerText); + string item; + + vector lines; + while ( getline(header, item) ) + lines.push_back(item); + return lines; +} + // updates our alignment cache -void BamMultiReaderPrivate::UpdateAlignments(void) { +void BamMultiReaderPrivate::UpdateAlignmentCache(void) { + + // skip if invalid alignment cache + if ( m_alignments == 0 ) return; // clear the cache m_alignments->Clear(); + // seed cache with fully-populated alignments + // further updates will fill with full/core-only as requested + m_isCoreMode = false; + // iterate over readers vector::iterator readerIter = m_readers.begin(); vector::iterator readerEnd = m_readers.end(); for ( ; readerIter != readerEnd; ++readerIter ) { BamReader* reader = (*readerIter).first; BamAlignment* alignment = (*readerIter).second; - if ( reader == 0 ) continue; + if ( reader == 0 || alignment == 0 ) continue; + + // save next alignment from each reader in cache SaveNextAlignment(reader, alignment); } } -// splits the entire header into a list of strings -const vector BamMultiReaderPrivate::SplitHeaderText(const string& headerText) const { - stringstream header(headerText); - vector lines; - string item; - while ( getline(header, item) ) - lines.push_back(item); - return lines; -} - // ValidateReaders checks that all the readers point to BAM files representing // alignments against the same set of reference sequences, and that the // sequences are identically ordered. If these checks fail the operation of @@ -494,7 +726,7 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { // retrieve first reader data const BamReader* firstReader = m_readers.front().first; - if ( firstReader == 0 ) return; // signal error? + if ( firstReader == 0 ) return; const RefVector firstReaderRefData = firstReader->GetReferenceData(); const int firstReaderRefCount = firstReader->GetReferenceCount(); const int firstReaderRefSize = firstReaderRefData.size(); @@ -506,7 +738,7 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { // get current reader data BamReader* reader = (*readerIter).first; - if ( reader == 0 ) continue; // error? + if ( reader == 0 ) continue; const RefVector currentReaderRefData = reader->GetReferenceData(); const int currentReaderRefCount = reader->GetReferenceCount(); const int currentReaderRefSize = currentReaderRefData.size(); @@ -520,7 +752,7 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { if ( (currentReaderRefCount != firstReaderRefCount) || (firstReaderRefSize != currentReaderRefSize) ) { - cerr << "ERROR: mismatched number of references in " << reader->GetFilename() + cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename() << " expected " << firstReaderRefCount << " reference sequences but only found " << currentReaderRefCount << endl; exit(1); @@ -529,7 +761,6 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { // this will be ok; we just checked above that we have identically-sized sets of references // here we simply check if they are all, in fact, equal in content while ( firstRefIter != firstRefEnd ) { - const RefData& firstRef = (*firstRefIter); const RefData& currentRef = (*currentRefIter); @@ -537,8 +768,10 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { if ( (firstRef.RefName != currentRef.RefName) || (firstRef.RefLength != currentRef.RefLength) ) { - cerr << "ERROR: mismatched references found in " << reader->GetFilename() + cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename() << " expected: " << endl; + + // print first reader's reference data RefVector::const_iterator refIter = firstReaderRefData.begin(); RefVector::const_iterator refEnd = firstReaderRefData.end(); for ( ; refIter != refEnd; ++refIter ) { @@ -547,6 +780,8 @@ void BamMultiReaderPrivate::ValidateReaders(void) const { } cerr << "but found: " << endl; + + // print current reader's reference data refIter = currentReaderRefData.begin(); refEnd = currentReaderRefData.end(); for ( ; refIter != refEnd; ++refIter ) {