1 // ***************************************************************************
2 // Sort.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 10 October 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides sorting functionality.
9 // ***************************************************************************
11 #ifndef ALGORITHMS_SORT_H
12 #define ALGORITHMS_SORT_H
14 #include "api/api_global.h"
15 #include "api/BamAlignment.h"
16 #include "api/BamReader.h"
17 #include "api/BamMultiReader.h"
25 namespace Algorithms {
27 /*! \struct BamTools::Algorithms::Sort
28 \brief Provides classes & methods related to sorting BamAlignments
30 struct API_EXPORT Sort {
32 //! Provides explicit values for specifying desired sort ordering
33 enum Order { AscendingOrder = 0
37 /*! \fn template<typename ElemType> static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs)
40 Determines necessary STL function object depending on requested Sort::Order
42 template<typename ElemType>
43 static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs) {
45 case ( Sort::AscendingOrder ) : { std::less<ElemType> comp; return comp(lhs, rhs); }
46 case ( Sort::DescendingOrder ) : { std::greater<ElemType> comp; return comp(lhs, rhs); }
47 default : BT_ASSERT_UNREACHABLE;
49 return false; // <-- unreachable
52 //! Base class for our sorting function objects
53 typedef std::binary_function<BamAlignment, BamAlignment, bool> AlignmentSortBase;
55 /*! \struct BamTools::Algorithms::Sort::ByName
56 \brief Function object for comparing alignments by name
58 Default sort order is Sort::AscendingOrder.
61 std::vector<BamAlignment> a;
63 // sort by name, in ascending order (the following two lines are equivalent):
64 std::sort( a.begin(), a.end(), Sort::ByName() );
65 std::sort( a.begin(), a.end(), Sort::ByName(Sort::AscendingOrder) );
67 // OR sort in descending order
68 std::sort( a.begin(), a.end(), Sort::ByName(Sort::DescendingOrder) );
71 struct ByName : public AlignmentSortBase {
74 ByName(const Sort::Order& order = Sort::AscendingOrder)
78 // comparison function
79 bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
80 return sort_helper(m_order, lhs.Name, rhs.Name);
83 // used by BamMultiReader internals
84 static inline bool UsesCharData(void) { return true; }
88 const Sort::Order& m_order;
91 /*! \struct BamTools::Algorithms::Sort::ByPosition
92 \brief Function object for comparing alignments by position
94 Default sort order is Sort::AscendingOrder.
97 std::vector<BamAlignment> a;
99 // sort by position, in ascending order (the following two lines are equivalent):
100 std::sort( a.begin(), a.end(), Sort::ByPosition() );
101 std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::AscendingOrder) );
103 // OR sort in descending order
104 std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::DescendingOrder) );
107 struct ByPosition : public AlignmentSortBase {
110 ByPosition(const Sort::Order& order = Sort::AscendingOrder)
114 // comparison function
115 bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
117 // force unmapped aligmnents to end
118 if ( lhs.RefID == -1 ) return false;
119 if ( rhs.RefID == -1 ) return true;
121 // if on same reference, sort on position
122 if ( lhs.RefID == rhs.RefID )
123 return sort_helper(m_order, lhs.Position, rhs.Position);
125 // otherwise sort on reference ID
126 return sort_helper(m_order, lhs.RefID, rhs.RefID);
129 // used by BamMultiReader internals
130 static inline bool UsesCharData(void) { return false; }
137 /*! \struct BamTools::Algorithms::Sort::ByTag
138 \brief Function object for comparing alignments by tag value
140 Default sort order is Sort::AscendingOrder.
143 std::vector<BamAlignment> a;
145 // sort by edit distance, in ascending order (the following two lines are equivalent):
146 std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM") );
147 std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM", Sort::AscendingOrder) );
149 // OR sort in descending order
150 std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM", Sort::DescendingOrder) );
154 struct ByTag : public AlignmentSortBase {
157 ByTag(const std::string& tag,
158 const Sort::Order& order = Sort::AscendingOrder)
163 // comparison function
164 bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
166 // force alignments without tag to end
169 if ( !lhs.GetTag(m_tag, lhsTagValue) ) return false;
170 if ( !rhs.GetTag(m_tag, rhsTagValue) ) return true;
172 // otherwise compare on tag values
173 return sort_helper(m_order, lhsTagValue, rhsTagValue);
176 // used by BamMultiReader internals
177 static inline bool UsesCharData(void) { return true; }
185 /*! \struct BamTools::Algorithms::Sort::Unsorted
186 \brief Placeholder function object
188 This function object exists purely to allow for dropping a "do not care" ordering
189 into methods, containers, etc that are designed to work with the other sorting objects.
192 std::set<BamAlignment, Sort::ByName>; // STL set, ordered on alignment name
193 std::set<BamAlignment, Sort::Unsorted>; // STL set, unsorted (but probably insertion order)
196 struct Unsorted : public AlignmentSortBase {
198 // comparison function
199 inline bool operator()(const BamTools::BamAlignment&, const BamTools::BamAlignment&) {
200 return false; // returning false tends to retain insertion order
203 // used by BamMultiReader internals
204 static inline bool UsesCharData(void) { return false; }
207 /*! Sorts a std::vector of alignments (in-place), using the provided compare function.
210 std::vector<BamAlignemnt> a;
213 // sort our alignment list by edit distance
214 Sort::SortAlignments(a, Sort::ByTag<int>("NM"));
217 \param[in,out] data vector of alignments to be sorted
218 \param[in] comp comparison function object
220 template<typename Compare>
221 static inline void SortAlignments(std::vector<BamAlignment>& data,
222 const Compare& comp = Compare())
224 std::sort(data.begin(), data.end(), comp);
227 /*! Returns a sorted copy of the input alignments, using the provided compare function.
230 std::vector<BamAlignemnt> a;
233 // get a copy of our original data, sorted by edit distance (descending order)
234 std::vector<BamAligment> sortedData;
235 sortedData = Sort::SortAlignments(a, Sort::ByTag<int>("NM", Sort::DescendingOrder));
238 \param[in] input vector of alignments to be sorted
239 \param[in] comp comparison function object
240 \return sorted copy of the input data
242 template<typename Compare>
243 static inline std::vector<BamAlignment> SortAlignments(const std::vector<BamAlignment>& input,
244 const Compare& comp = Compare())
246 std::vector<BamAlignment> output(input);
247 SortAlignments(output, comp);
251 /*! Reads a region of alignments from a position-sorted BAM file,
252 then sorts by the provided compare function
256 // open BAM file & index file
259 // define a region of interest (i.e. a exon or some other feature)
261 // get all alignments covering that region, sorted by read group name
262 std::vector<BamAlignments> a;
263 a = Sort::GetSortedRegion(reader, region, Sort::ByTag<std::string>("RG"));
266 \param[in] reader BamReader opened on desired BAM file
267 \param[in] region desired region-of-interest
268 \param[in] comp comparison function object
269 \return sorted vector of the region's alignments
271 template<typename Compare>
272 static std::vector<BamAlignment> GetSortedRegion(BamReader& reader,
273 const BamRegion& region,
274 const Compare& comp = Compare())
276 // return empty container if unable to find region
277 if ( !reader.IsOpen() ) return std::vector<BamAlignment>();
278 if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
280 // iterate through region, grabbing alignments
282 std::vector<BamAlignment> results;
283 while ( reader.GetNextAlignmentCore(al) )
284 results.push_back(al);
286 // sort & return alignments
287 SortAlignments(results, comp);
291 /*! Reads a region of alignments from position-sorted BAM files,
292 then sorts by the provided compare function
295 BamMultiReader reader;
296 // open BAM files & index files
299 // define a region of interest (i.e. a exon or some other feature)
301 // get all alignments covering that region, sorted by read group name
302 std::vector<BamAlignments> a;
303 a = Sort::GetSortedRegion(reader, region, Sort::ByTag<std::string>("RG"));
306 \param[in] reader BamMultiReader opened on desired BAM files
307 \param[in] region desired region-of-interest
308 \param[in] comp comparison function object
309 \return sorted vector of the region's alignments
311 template<typename Compare>
312 static std::vector<BamAlignment> GetSortedRegion(BamMultiReader& reader,
313 const BamRegion& region,
314 const Compare& comp = Compare())
316 // return empty container if unable to find region
317 if ( !reader.HasOpenReaders() ) return std::vector<BamAlignment>();
318 if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
320 // iterate through region, grabbing alignments
322 std::vector<BamAlignment> results;
323 while ( reader.GetNextAlignmentCore(al) )
324 results.push_back(al);
326 // sort & return alignments
327 SortAlignments(results, comp);
332 } // namespace Algorithms
333 } // namespace BamTools
335 #endif // ALGORITHMS_SORT_H