// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 17 January 2011 (DB)
// ---------------------------------------------------------------------------
// Provides merging functionality for BamMultiReader. At this point, supports
// sorting results by (refId, position) or by read name.
#include <api/BamAlignment.h>
#include <api/BamReader.h>
-#include <map>
-#include <queue>
+#include <api/algorithms/Sort.h>
+#include <deque>
+#include <functional>
+#include <set>
#include <string>
-#include <utility>
namespace BamTools {
namespace Internal {
-typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+struct MergeItem {
-// generic MultiMerger interface
-class IBamMultiMerger {
+ // data members
+ BamReader* Reader;
+ BamAlignment* Alignment;
- public:
- IBamMultiMerger(void) { }
- virtual ~IBamMultiMerger(void) { }
+ // ctors & dtor
+ MergeItem(BamReader* reader = 0,
+ BamAlignment* alignment = 0)
+ : Reader(reader)
+ , Alignment(alignment)
+ { }
- public:
- virtual void Add(const ReaderAlignment& value) =0;
- virtual void Clear(void) =0;
- virtual const ReaderAlignment& First(void) const =0;
- virtual const int Size(void) const =0;
- virtual ReaderAlignment TakeFirst(void) =0;
+ MergeItem(const MergeItem& other)
+ : Reader(other.Reader)
+ , Alignment(other.Alignment)
+ { }
+
+ ~MergeItem(void) { }
};
-// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position)
-class PositionMultiMerger : public IBamMultiMerger {
+template<typename Compare>
+struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool> {
public:
- PositionMultiMerger(void) : IBamMultiMerger() { }
- ~PositionMultiMerger(void) { }
+ MergeItemSorter(const Compare& comp = Compare())
+ : m_comp(comp)
+ { }
- public:
- void Add(const ReaderAlignment& value);
- void Clear(void);
- const ReaderAlignment& First(void) const;
- const int Size(void) const;
- ReaderAlignment TakeFirst(void);
+ bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
+ const BamAlignment& l = *lhs.Alignment;
+ const BamAlignment& r = *rhs.Alignment;
+ return m_comp(l,r);
+ }
private:
- typedef std::pair<int, int> KeyType;
- typedef std::multimap<KeyType, ReaderAlignment> IndexType;
- typedef std::pair<KeyType, ReaderAlignment> KeyValueType;
- typedef IndexType::iterator IndexIterator;
- typedef IndexType::const_iterator IndexConstIterator;
-
- IndexType m_data;
+ Compare m_comp;
};
-// IBamMultiMerger implementation - sorted on BamAlignment: Name
-class ReadNameMultiMerger : public IBamMultiMerger {
+// pure ABC so we can just work polymorphically with any specific merger implementation
+class IMultiMerger {
public:
- ReadNameMultiMerger(void) : IBamMultiMerger() { }
- ~ReadNameMultiMerger(void) { }
-
+ IMultiMerger(void) { }
+ virtual ~IMultiMerger(void) { }
public:
- void Add(const ReaderAlignment& value);
- void Clear(void);
- const ReaderAlignment& First(void) const;
- const int Size(void) const;
- ReaderAlignment TakeFirst(void);
-
- private:
- typedef std::string KeyType;
- typedef std::multimap<KeyType, ReaderAlignment> IndexType;
- typedef std::pair<KeyType, ReaderAlignment> KeyValueType;
- typedef IndexType::iterator IndexIterator;
- typedef IndexType::const_iterator IndexConstIterator;
-
- IndexType m_data;
+ virtual void Add(MergeItem item) =0;
+ virtual void Clear(void) =0;
+ virtual const MergeItem& First(void) const =0;
+ virtual bool IsEmpty(void) const =0;
+ virtual void Remove(BamReader* reader) =0;
+ virtual int Size(void) const =0;
+ virtual MergeItem TakeFirst(void) =0;
};
-// IBamMultiMerger implementation - unsorted BAM file(s)
-class UnsortedMultiMerger : public IBamMultiMerger {
+// general merger
+template<typename Compare>
+class MultiMerger : public IMultiMerger {
public:
- UnsortedMultiMerger(void) : IBamMultiMerger() { }
- ~UnsortedMultiMerger(void) { }
+ typedef Compare CompareType;
+ typedef MergeItemSorter<CompareType> MergeType;
public:
- void Add(const ReaderAlignment& value);
+ explicit MultiMerger(const Compare& comp = Compare())
+ : IMultiMerger()
+ , m_data( MergeType(comp) )
+ { }
+ ~MultiMerger(void) { }
+
+ public:
+ void Add(MergeItem item);
void Clear(void);
- const ReaderAlignment& First(void) const;
- const int Size(void) const;
- ReaderAlignment TakeFirst(void);
+ const MergeItem& First(void) const;
+ bool IsEmpty(void) const;
+ void Remove(BamReader* reader);
+ int Size(void) const;
+ MergeItem TakeFirst(void);
private:
- typedef std::queue<ReaderAlignment> IndexType;
- IndexType m_data;
+ typedef MergeItem ValueType;
+ typedef std::multiset<ValueType, MergeType> ContainerType;
+ typedef typename ContainerType::iterator DataIterator;
+ typedef typename ContainerType::const_iterator DataConstIterator;
+ ContainerType m_data;
};
-// ------------------------------------------
-// PositionMultiMerger implementation
-
-inline void PositionMultiMerger::Add(const ReaderAlignment& value) {
- const KeyType key = std::make_pair<int, int>( value.second->RefID, value.second->Position );
- m_data.insert( KeyValueType(key, value) );
+template <typename Compare>
+inline void MultiMerger<Compare>::Add(MergeItem item) {
+ if ( CompareType::UsesCharData() )
+ item.Alignment->BuildCharData();
+ m_data.insert(item);
}
-inline void PositionMultiMerger::Clear(void) {
+template <typename Compare>
+inline void MultiMerger<Compare>::Clear(void) {
m_data.clear();
}
-inline const ReaderAlignment& PositionMultiMerger::First(void) const {
- const KeyValueType& entry = (*m_data.begin());
- return entry.second;
+template <typename Compare>
+inline const MergeItem& MultiMerger<Compare>::First(void) const {
+ const ValueType& entry = (*m_data.begin());
+ return entry;
}
-inline const int PositionMultiMerger::Size(void) const {
+template <typename Compare>
+inline bool MultiMerger<Compare>::IsEmpty(void) const {
+ return m_data.empty();
+}
+template <typename Compare>
+inline void MultiMerger<Compare>::Remove(BamReader* reader) {
+
+ if ( reader == 0 ) return;
+ const std::string& filenameToRemove = reader->GetFilename();
+
+ // iterate over readers in cache
+ DataIterator dataIter = m_data.begin();
+ DataIterator dataEnd = m_data.end();
+ for ( ; dataIter != dataEnd; ++dataIter ) {
+ const MergeItem& item = (*dataIter);
+ const BamReader* itemReader = item.Reader;
+ if ( itemReader == 0 ) continue;
+
+ // remove iterator on match
+ if ( itemReader->GetFilename() == filenameToRemove ) {
+ m_data.erase(dataIter);
+ return;
+ }
+ }
+}
+template <typename Compare>
+inline int MultiMerger<Compare>::Size(void) const {
return m_data.size();
}
-inline ReaderAlignment PositionMultiMerger::TakeFirst(void) {
- IndexIterator first = m_data.begin();
- ReaderAlignment next = (*first).second;
- m_data.erase(first);
- return next;
+template <typename Compare>
+inline MergeItem MultiMerger<Compare>::TakeFirst(void) {
+ DataIterator firstIter = m_data.begin();
+ MergeItem firstItem = (*firstIter);
+ m_data.erase(firstIter);
+ return firstItem;
}
-// ------------------------------------------
-// ReadNameMultiMerger implementation
+// unsorted "merger"
+template<>
+class MultiMerger<Algorithms::Sort::Unsorted> : public IMultiMerger {
-inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {
- const KeyType key = value.second->Name;
- m_data.insert( KeyValueType(key, value) );
-}
+ public:
+ explicit MultiMerger(const Algorithms::Sort::Unsorted& comp = Algorithms::Sort::Unsorted())
+ : IMultiMerger()
+ { }
+ ~MultiMerger(void) { }
-inline void ReadNameMultiMerger::Clear(void) {
- m_data.clear();
-}
+ public:
+ void Add(MergeItem item);
+ void Clear(void);
+ const MergeItem& First(void) const;
+ bool IsEmpty(void) const;
+ void Remove(BamReader* reader);
+ int Size(void) const;
+ MergeItem TakeFirst(void);
-inline const ReaderAlignment& ReadNameMultiMerger::First(void) const {
- const KeyValueType& entry = (*m_data.begin());
- return entry.second;
-}
+ private:
+ typedef MergeItem ValueType;
+ typedef std::deque<ValueType> ContainerType;
+ typedef ContainerType::iterator DataIterator;
+ typedef ContainerType::const_iterator DataConstIterator;
+ ContainerType m_data;
+};
-inline const int ReadNameMultiMerger::Size(void) const {
- return m_data.size();
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Add(MergeItem item) {
+ m_data.push_back(item);
}
-inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) {
- IndexIterator first = m_data.begin();
- ReaderAlignment next = (*first).second;
- m_data.erase(first);
- return next;
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Clear(void) {
+ m_data.clear();
}
-// ------------------------------------------
-// UnsortedMultiMerger implementation
-
-inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) {
- m_data.push(value);
+inline
+const MergeItem& MultiMerger<Algorithms::Sort::Unsorted>::First(void) const {
+ return m_data.front();
}
-inline void UnsortedMultiMerger::Clear(void) {
- m_data.clear();
+inline
+bool MultiMerger<Algorithms::Sort::Unsorted>::IsEmpty(void) const {
+ return m_data.empty();
}
-inline const ReaderAlignment& UnsortedMultiMerger::First(void) const {
- return m_data.front();
+inline
+void MultiMerger<Algorithms::Sort::Unsorted>::Remove(BamReader* reader) {
+
+ if ( reader == 0 ) return;
+ const std::string filenameToRemove = reader->GetFilename();
+
+ // iterate over readers in cache
+ DataIterator dataIter = m_data.begin();
+ DataIterator dataEnd = m_data.end();
+ for ( ; dataIter != dataEnd; ++dataIter ) {
+ const MergeItem& item = (*dataIter);
+ const BamReader* itemReader = item.Reader;
+ if ( itemReader == 0 ) continue;
+
+ // remove iterator on match
+ if ( itemReader->GetFilename() == filenameToRemove ) {
+ m_data.erase(dataIter);
+ return;
+ }
+ }
}
-inline const int UnsortedMultiMerger::Size(void) const {
+inline
+int MultiMerger<Algorithms::Sort::Unsorted>::Size(void) const {
return m_data.size();
}
-inline ReaderAlignment UnsortedMultiMerger::TakeFirst(void) {
- ReaderAlignment first = m_data.front();
- m_data.pop();
- return first;
+inline
+MergeItem MultiMerger<Algorithms::Sort::Unsorted>::TakeFirst(void) {
+ MergeItem firstItem = m_data.front();
+ m_data.pop_front();
+ return firstItem;
}
} // namespace Internal