]> git.donarmstrong.com Git - bamtools.git/blob - src/api/BamAlgorithms.h
Initial test run of new BamAlgorithms
[bamtools.git] / src / api / BamAlgorithms.h
1 // ***************************************************************************
2 // BamAlgorithms.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 29 September 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides generic algorithms that are intended to work with BamTools data
9 // structures. Where possible, these are intended to be STL-compatible.
10 // ***************************************************************************
11
12 #ifndef BAMALGORITHMS_H
13 #define BAMALGORITHMS_H
14
15 #include <api/api_global.h>
16 #include <api/BamAlignment.h>
17 #include <api/BamReader.h>
18 #include <api/BamMultiReader.h>
19 #include <algorithm>
20 #include <functional>
21 #include <string>
22 #include <vector>
23
24 namespace BamTools {
25 namespace Algorithms {
26
27 // -------------------------------------------------------
28 // Built-in function objects for comparing BamAlignments
29
30 typedef std::binary_function<BamAlignment, BamAlignment, bool> BamAlignmentComparer;
31
32 // Algorithms::SortByName<Compare>
33 // compare alignments by name (default comparison is std::less<std::string> )
34 template<template <typename> class Compare = std::less>
35 struct API_EXPORT SortByName : public BamAlignmentComparer {
36     bool operator()(const BamTools::BamAlignment& lhs,
37                     const BamTools::BamAlignment& rhs)
38     {
39         Compare<std::string> comp;
40         return comp(lhs.Name, rhs.Name);
41     }
42 };
43
44 // Algorithms::SortByPosition<Compare>
45 // compare alignments by position (default comparison is std::less<int>)
46 template<template <typename> class Compare = std::less>
47 struct API_EXPORT SortByPosition : public BamAlignmentComparer {
48     bool operator()(const BamTools::BamAlignment& lhs,
49                     const BamTools::BamAlignment& rhs)
50     {
51         // force unmapped aligmnents to end
52         if ( lhs.RefID == -1 ) return false;
53         if ( rhs.RefID == -1 ) return true;
54
55         // otherwise compare first on RefID, then position
56         Compare<int32_t> comp;
57         if ( lhs.RefID == rhs.RefID )
58             return comp(lhs.Position, rhs.Position);
59         return comp(lhs.RefID, rhs.RefID);
60     }
61 };
62
63 // Algorithms::SortByTag<Compare>("XY")
64 // compare alignments by tag value (default comparison is std::less<T>)
65 // where T is the expected tag type (e.g. RG -> string, NM -> int, etc.)
66 template<typename T, template <typename> class Compare = std::less>
67 struct API_EXPORT SortByTag : public BamAlignmentComparer {
68
69     // ctor - needed to provide the tag name ("RG", "NM", "Aq", etc)
70     explicit SortByTag(const std::string& tag) : m_tag(tag) { }
71
72     bool operator()(const BamTools::BamAlignment& lhs,
73                     const BamTools::BamAlignment& rhs)
74     {
75         // force alignments without tag to end
76         T lhsTagValue;
77         T rhsTagValue;
78         if ( !lhs.GetTag(m_tag, lhsTagValue) ) return false;
79         if ( !rhs.GetTag(m_tag, rhsTagValue) ) return true;
80
81         // otherwise compare tag values
82         Compare<T> comp;
83         return comp(lhsTagValue, rhsTagValue);
84     }
85
86     private:
87         std::string m_tag;
88 };
89
90 // Algorithms::Unsorted
91 // placeholder comparison object, ignores the alignments' data
92 // N.B. - returning false typically retains initial insertion order
93 struct API_EXPORT Unsorted : public BamAlignmentComparer {
94     bool operator()(const BamTools::BamAlignment& /*lhs*/,
95                     const BamTools::BamAlignment& /*rhs*/)
96     {
97         return false;
98     }
99 };
100
101 API_EXPORT template<typename Compare>
102 std::vector<BamAlignment> SortReaderRegion(BamReader& reader,
103                                            const BamRegion& region,
104                                            const Compare& comp = Compare())
105 {
106     // return empty container if unable to find region
107     if ( !reader.IsOpen() )          return std::vector<BamAlignment>();
108     if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
109
110     // iterate through region, grabbing alignments
111     BamAlignment al;
112     std::vector<BamAlignment> results;
113     while ( reader.GetNextAlignment(al) )
114         results.push_back(al);
115
116     // sort & return alignments
117     std::sort(results.begin(), results.end(), comp);
118     return results;
119 }
120
121 API_EXPORT template<typename Compare>
122 std::vector<BamAlignment> SortReaderRegion(BamMultiReader& reader,
123                                            const BamRegion& region,
124                                            const Compare& comp = Compare())
125 {
126     // return empty container if unable to find region
127     if ( !reader.HasOpenReaders() )  return std::vector<BamAlignment>();
128     if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
129
130     // iterate through region, grabbing alignments
131     BamAlignment al;
132     std::vector<BamAlignment> results;
133     while ( reader.GetNextAlignment(al) )
134         results.push_back(al);
135
136     // sort & return alignments
137     std::sort(results.begin(), results.end(), comp);
138     return results;
139 }
140
141 } // namespace Algorithms
142 } // namespace BamTools
143
144 #endif // BAM_ALGORITHMS_H