// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 18 March 2011 (DB)
+// Last modified: 9 September 2011 (DB)
// ---------------------------------------------------------------------------
// Provides merging functionality for BamMultiReader. At this point, supports
// sorting results by (refId, position) or by read name.
virtual ~IBamMultiMerger(void) { }
public:
- virtual void Add(const ReaderAlignment& value) =0;
+ virtual void Add(ReaderAlignment value) =0;
virtual void Clear(void) =0;
virtual const ReaderAlignment& First(void) const =0;
virtual bool IsEmpty(void) const =0;
~PositionMultiMerger(void) { }
public:
- void Add(const ReaderAlignment& value);
+ void Add(ReaderAlignment value);
void Clear(void);
const ReaderAlignment& First(void) const;
bool IsEmpty(void) const;
~ReadNameMultiMerger(void) { }
public:
- void Add(const ReaderAlignment& value);
+ void Add(ReaderAlignment value);
void Clear(void);
const ReaderAlignment& First(void) const;
bool IsEmpty(void) const;
~UnsortedMultiMerger(void) { }
public:
- void Add(const ReaderAlignment& value);
+ void Add(ReaderAlignment value);
void Clear(void);
const ReaderAlignment& First(void) const;
bool IsEmpty(void) const;
// ------------------------------------------
// PositionMultiMerger implementation
-inline void PositionMultiMerger::Add(const ReaderAlignment& value) {
+inline void PositionMultiMerger::Add(ReaderAlignment value) {
const KeyType key( value.second->RefID, value.second->Position );
m_data.insert( ElementType(key, value) );
}
// ------------------------------------------
// ReadNameMultiMerger implementation
-inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {
- const KeyType key(value.second->Name);
- m_data.insert( ElementType(key, value) );
+inline void ReadNameMultiMerger::Add(ReaderAlignment value) {
+ BamAlignment* al = value.second;
+ if ( al->BuildCharData() ) {
+ const KeyType key(al->Name);
+ m_data.insert( ElementType(key, value) );
+ }
}
inline void ReadNameMultiMerger::Clear(void) {
// ------------------------------------------
// UnsortedMultiMerger implementation
-inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) {
+inline void UnsortedMultiMerger::Add(ReaderAlignment value) {
m_data.push_back(value);
}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 5 April 2011 (DB)
+// Last modified: 9 September 2011 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
// ctor
BamMultiReaderPrivate::BamMultiReaderPrivate(void)
: m_alignments(0)
- , m_isCoreMode(false)
, m_sortOrder(BamMultiReader::SortedByPosition)
{ }
// get next alignment among all files
bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
- m_isCoreMode = false;
- return LoadNextAlignment(al);
+ return PopNextCachedAlignment(al, true);
}
// get next alignment among all files without parsing character data from alignments
bool BamMultiReaderPrivate::GetNextAlignmentCore(BamAlignment& al) {
- m_isCoreMode = true;
- return LoadNextAlignment(al);
+ return PopNextCachedAlignment(al, false);
}
// ---------------------------------------------------------------------------------------
return true;
}
-bool BamMultiReaderPrivate::LoadNextAlignment(BamAlignment& al) {
-
- // bail out if no more data to process
- if ( !HasAlignmentData() )
- return false;
-
- // "pop" next alignment and reader
- ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
- BamReader* reader = nextReaderAlignment.first;
- BamAlignment* alignment = nextReaderAlignment.second;
-
- // store cached alignment into destination parameter (by copy)
- al = *alignment;
-
- // peek to next alignment & store in cache
- SaveNextAlignment(reader, alignment);
-
- // return success
- return true;
+bool BamMultiReaderPrivate::LoadNextAlignment(BamReader* reader, BamAlignment* alignment) {
+ // lazy building of alignment's char data,
+ // only populated on demand by sorting merger or client call to GetNextAlignment()
+ return reader->GetNextAlignmentCore(*alignment);
}
// locate (& load) index files for BAM readers that don't already have one loaded
cout << (*filenameIter) << endl;
}
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+ // bail out if no more data to process
+ if ( !HasAlignmentData() )
+ return false;
+
+ // "pop" next alignment and reader
+ ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
+ BamReader* reader = nextReaderAlignment.first;
+ BamAlignment* alignment = nextReaderAlignment.second;
+
+ // store cached alignment into destination parameter (by copy)
+ al = *alignment;
+
+ // set char data if requested
+ if ( needCharData ) {
+ al.BuildCharData();
+ al.Filename = reader->GetFilename();
+ }
+
+ // peek to next alignment & store in cache
+ SaveNextAlignment(reader, alignment);
+
+ // return success
+ return true;
+}
+
// returns BAM file pointers to beginning of alignment data & resets alignment cache
bool BamMultiReaderPrivate::Rewind(void) {
void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) {
- // must be in core mode && NOT sorting by read name to call GNACore()
- if ( m_isCoreMode && m_sortOrder != BamMultiReader::SortedByReadName ) {
- if ( reader->GetNextAlignmentCore(*alignment) )
- m_alignments->Add( make_pair(reader, alignment) );
- }
-
- // not in core mode and/or sorting by readname, must call GNA()
- else {
- if ( reader->GetNextAlignment(*alignment) )
- m_alignments->Add( make_pair(reader, alignment) );
- }
+ // if can read alignment from reader, store in cache
+ if ( LoadNextAlignment(reader, alignment) )
+ m_alignments->Add( make_pair(reader, alignment) );
}
// sets the index caching mode on the readers
// clear the cache
m_alignments->Clear();
- // seed cache with fully-populated alignments
- // further updates will fill with full/core-only as requested
- m_isCoreMode = false;
-
// iterate over readers
vector<ReaderAlignment>::iterator readerIter = m_readers.begin();
vector<ReaderAlignment>::iterator readerEnd = m_readers.end();
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 13 March 2011 (DB)
+// Last modified: 9 September 2011 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const;
const std::string ExtractReadGroup(const std::string& headerLine) const;
bool HasAlignmentData(void) const;
- bool LoadNextAlignment(BamAlignment& al);
- BamTools::BamReader* OpenReader(const std::string& filename);
+ bool LoadNextAlignment(BamReader* reader, BamAlignment* alignment);
+ BamReader* OpenReader(const std::string& filename);
+ bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
bool RewindReaders(void);
- void SaveNextAlignment(BamTools::BamReader* reader, BamTools::BamAlignment* alignment);
+ void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
const std::vector<std::string> SplitHeaderText(const std::string& headerText) const;
void UpdateAlignmentCache(void);
void ValidateReaders(void) const;
std::vector<ReaderAlignment> m_readers;
IBamMultiMerger* m_alignments;
- bool m_isCoreMode;
BamMultiReader::SortOrder m_sortOrder;
};