]> git.donarmstrong.com Git - bamtools.git/blobdiff - src/api/algorithms/Sort.h
Fixed: invalidated by-name sort order in some cases
[bamtools.git] / src / api / algorithms / Sort.h
index f9d2dafdd1900afb25bff39c85c22bf41632f5d7..32902e11904aa6187e3c25b90ea5b4e1350e43f0 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 29 September 2011 (DB)
+// Last modified: 4 April 2012 (DB)
 // ---------------------------------------------------------------------------
 // Provides sorting functionality.
 // ***************************************************************************
 #ifndef ALGORITHMS_SORT_H
 #define ALGORITHMS_SORT_H
 
-#include <api/api_global.h>
-#include <api/BamAlignment.h>
-#include <api/BamReader.h>
-#include <api/BamMultiReader.h>
-#include <algorithm>
+#include "api/api_global.h"
+#include "api/BamAlignment.h"
+#include "api/BamReader.h"
+#include "api/BamMultiReader.h"
 #include <cassert>
+#include <algorithm>
 #include <functional>
 #include <string>
 #include <vector>
 namespace BamTools {
 namespace Algorithms {
 
+/*! \struct BamTools::Algorithms::Sort
+    \brief Provides classes & methods related to sorting BamAlignments
+*/
 struct API_EXPORT Sort {
 
+    //! Provides explicit values for specifying desired sort ordering
     enum Order { AscendingOrder = 0
                , DescendingOrder
                };
 
+    /*! \fn template<typename ElemType> static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs)
+        \internal
+
+        Determines necessary STL function object depending on requested Sort::Order
+    */
     template<typename ElemType>
-    static inline bool sort_helper(const Sort::Order& order,
-                                   const ElemType& lhs,
-                                   const ElemType& rhs)
-    {
+    static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs) {
         switch ( order ) {
             case ( Sort::AscendingOrder  ) : { std::less<ElemType> comp;    return comp(lhs, rhs); }
             case ( Sort::DescendingOrder ) : { std::greater<ElemType> comp; return comp(lhs, rhs); }
-            default : assert(false);
+            default : BT_ASSERT_UNREACHABLE;
         }
         return false; // <-- unreachable
     }
 
+    //! Base class for our sorting function objects
     typedef std::binary_function<BamAlignment, BamAlignment, bool> AlignmentSortBase;
 
-    // compare alignments by name
+    /*! \struct BamTools::Algorithms::Sort::ByName
+        \brief Function object for comparing alignments by name
+
+        Default sort order is Sort::AscendingOrder.
+
+        \code
+            std::vector<BamAlignment> a;
+
+            // sort by name, in ascending order (the following two lines are equivalent):
+            std::sort( a.begin(), a.end(), Sort::ByName() );
+            std::sort( a.begin(), a.end(), Sort::ByName(Sort::AscendingOrder) );
+
+            // OR sort in descending order
+            std::sort( a.begin(), a.end(), Sort::ByName(Sort::DescendingOrder) );
+        \endcode
+    */
     struct ByName : public AlignmentSortBase {
 
+        // ctor
         ByName(const Sort::Order& order = Sort::AscendingOrder)
             : m_order(order)
         { }
 
-        bool operator()(const BamTools::BamAlignment& lhs,
-                        const BamTools::BamAlignment& rhs)
-        {
+        // comparison function
+        bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
             return sort_helper(m_order, lhs.Name, rhs.Name);
         }
 
+        // used by BamMultiReader internals
         static inline bool UsesCharData(void) { return true; }
 
+        // data members
         private:
-            const Sort::Order& m_order;
+            const Sort::Order m_order;
     };
 
-    // compare alignments by position
+    /*! \struct BamTools::Algorithms::Sort::ByPosition
+        \brief Function object for comparing alignments by position
+
+        Default sort order is Sort::AscendingOrder.
+
+        \code
+            std::vector<BamAlignment> a;
+
+            // sort by position, in ascending order (the following two lines are equivalent):
+            std::sort( a.begin(), a.end(), Sort::ByPosition() );
+            std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::AscendingOrder) );
+
+            // OR sort in descending order
+            std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::DescendingOrder) );
+        \endcode
+    */
     struct ByPosition : public AlignmentSortBase {
 
+        // ctor
         ByPosition(const Sort::Order& order = Sort::AscendingOrder)
             : m_order(order)
         { }
 
-        bool operator()(const BamTools::BamAlignment& lhs,
-                        const BamTools::BamAlignment& rhs)
-        {
+        // comparison function
+        bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
+
             // force unmapped aligmnents to end
             if ( lhs.RefID == -1 ) return false;
             if ( rhs.RefID == -1 ) return true;
@@ -86,25 +126,43 @@ struct API_EXPORT Sort {
             return sort_helper(m_order, lhs.RefID, rhs.RefID);
         }
 
+        // used by BamMultiReader internals
         static inline bool UsesCharData(void) { return false; }
 
+        // data members
         private:
-            Sort::Order m_order;
+            const Sort::Order m_order;
     };
 
-    // compare alignments by tag value
+    /*! \struct BamTools::Algorithms::Sort::ByTag
+        \brief Function object for comparing alignments by tag value
+
+        Default sort order is Sort::AscendingOrder.
+
+        \code
+            std::vector<BamAlignment> a;
+
+            // sort by edit distance, in ascending order (the following two lines are equivalent):
+            std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM") );
+            std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM", Sort::AscendingOrder) );
+
+            // OR sort in descending order
+            std::sort( a.begin(), a.end(), Sort::ByTag<int>("NM", Sort::DescendingOrder) );
+        \endcode
+    */
     template<typename T>
     struct ByTag : public AlignmentSortBase {
 
+        // ctor
         ByTag(const std::string& tag,
               const Sort::Order& order = Sort::AscendingOrder)
             : m_tag(tag)
             , m_order(order)
         { }
 
-        bool operator()(const BamTools::BamAlignment& lhs,
-                        const BamTools::BamAlignment& rhs)
-        {
+        // comparison function
+        bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) {
+
             // force alignments without tag to end
             T lhsTagValue;
             T rhsTagValue;
@@ -115,87 +173,161 @@ struct API_EXPORT Sort {
             return sort_helper(m_order, lhsTagValue, rhsTagValue);
         }
 
+        // used by BamMultiReader internals
         static inline bool UsesCharData(void) { return true; }
 
+        // data members
         private:
-            std::string m_tag;
-            Sort::Order m_order;
+            const std::string m_tag;
+            const Sort::Order m_order;
     };
 
-    // essentially a placeholder comparison object, ignores the alignments' data
-    // N.B. - returning false tends to retain insertion order
+    /*! \struct BamTools::Algorithms::Sort::Unsorted
+        \brief Placeholder function object
+
+        This function object exists purely to allow for dropping a "do not care" ordering
+        into methods, containers, etc that are designed to work with the other sorting objects.
+
+        \code
+            std::set<BamAlignment, Sort::ByName>;   // STL set, ordered on alignment name
+            std::set<BamAlignment, Sort::Unsorted>; // STL set, unsorted (but probably insertion order)
+        \endcode
+    */
     struct Unsorted : public AlignmentSortBase {
-        inline bool operator()(const BamTools::BamAlignment& /*lhs*/,
-                               const BamTools::BamAlignment& /*rhs*/)
-        {
-            return false;
+
+        // comparison function
+        inline bool operator()(const BamTools::BamAlignment&, const BamTools::BamAlignment&) {
+            return false;   // returning false tends to retain insertion order
         }
 
+        // used by BamMultiReader internals
         static inline bool UsesCharData(void) { return false; }
     };
-};
 
-// in-place sorting of alignment vector
-template<typename Compare>
-inline
-void SortAlignments(std::vector<BamAlignment>& data,
-                    const Compare& comp = Compare())
-{
-    std::sort(data.begin(), data.end(), comp);
-}
-
-// returns sorted copy of input alignment vector
-template<typename Compare>
-inline
-std::vector<BamAlignment> SortAlignmentsCopy(const std::vector<BamAlignment>& input,
-                                             const Compare& comp = Compare())
-{
-    std::vector<BamAlignment> output(input);
-    SortAlignments(output, comp);
-    return output;
-}
-
-// pulls a region from a position-sorted BAM file
-// returns the alignments sorted by user-defined Compare type
-template<typename Compare>
-std::vector<BamAlignment> GetSortedRegion(BamReader& reader,
-                                          const BamRegion& region,
-                                          const Compare& comp = Compare())
-{
-    // return empty container if unable to find region
-    if ( !reader.IsOpen() )          return std::vector<BamAlignment>();
-    if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
-
-    // iterate through region, grabbing alignments
-    BamAlignment al;
-    std::vector<BamAlignment> results;
-    while ( reader.GetNextAlignmentCore(al) )
-        results.push_back(al);
-
-    // sort & return alignments
-    SortAlignments(results, comp);
-    return results;
-}
-
-template<typename Compare>
-std::vector<BamAlignment> GetSortedRegion(BamMultiReader& reader,
-                                          const BamRegion& region,
-                                          const Compare& comp = Compare())
-{
-    // return empty container if unable to find region
-    if ( !reader.HasOpenReaders() )  return std::vector<BamAlignment>();
-    if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
-
-    // iterate through region, grabbing alignments
-    BamAlignment al;
-    std::vector<BamAlignment> results;
-    while ( reader.GetNextAlignmentCore(al) )
-        results.push_back(al);
-
-    // sort & return alignments
-    SortAlignments(results, comp);
-    return results;
-}
+    /*! Sorts a std::vector of alignments (in-place), using the provided compare function.
+
+        \code
+            std::vector<BamAlignemnt> a;
+            // populate data
+
+            // sort our alignment list by edit distance
+            Sort::SortAlignments(a, Sort::ByTag<int>("NM"));
+        \endcode
+
+        \param[in,out] data vector of alignments to be sorted
+        \param[in]     comp comparison function object
+    */
+    template<typename Compare>
+    static inline void SortAlignments(std::vector<BamAlignment>& data,
+                                      const Compare& comp = Compare())
+    {
+        std::sort(data.begin(), data.end(), comp);
+    }
+
+    /*! Returns a sorted copy of the input alignments, using the provided compare function.
+
+        \code
+            std::vector<BamAlignemnt> a;
+            // populate data
+
+            // get a copy of our original data, sorted by edit distance (descending order)
+            std::vector<BamAligment> sortedData;
+            sortedData = Sort::SortAlignments(a, Sort::ByTag<int>("NM", Sort::DescendingOrder));
+        \endcode
+
+        \param[in] input vector of alignments to be sorted
+        \param[in] comp  comparison function object
+        \return sorted copy of the input data
+    */
+    template<typename Compare>
+    static inline std::vector<BamAlignment> SortAlignments(const std::vector<BamAlignment>& input,
+                                                           const Compare& comp = Compare())
+    {
+        std::vector<BamAlignment> output(input);
+        SortAlignments(output, comp);
+        return output;
+    }
+
+    /*! Reads a region of alignments from a position-sorted BAM file,
+        then sorts by the provided compare function
+
+        \code
+            BamReader reader;
+            // open BAM file & index file
+
+            BamRegion region;
+            // define a region of interest (i.e. a exon or some other feature)
+
+            // get all alignments covering that region, sorted by read group name
+            std::vector<BamAlignments> a;
+            a = Sort::GetSortedRegion(reader, region, Sort::ByTag<std::string>("RG"));
+        \endcode
+
+        \param[in] reader BamReader opened on desired BAM file
+        \param[in] region desired region-of-interest
+        \param[in] comp   comparison function object
+        \return sorted vector of the region's alignments
+    */
+    template<typename Compare>
+    static std::vector<BamAlignment> GetSortedRegion(BamReader& reader,
+                                                     const BamRegion& region,
+                                                     const Compare& comp = Compare())
+    {
+        // return empty container if unable to find region
+        if ( !reader.IsOpen() )          return std::vector<BamAlignment>();
+        if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
+
+        // iterate through region, grabbing alignments
+        BamAlignment al;
+        std::vector<BamAlignment> results;
+        while ( reader.GetNextAlignmentCore(al) )
+            results.push_back(al);
+
+        // sort & return alignments
+        SortAlignments(results, comp);
+        return results;
+    }
+
+    /*! Reads a region of alignments from position-sorted BAM files,
+        then sorts by the provided compare function
+
+        \code
+            BamMultiReader reader;
+            // open BAM files & index files
+
+            BamRegion region;
+            // define a region of interest (i.e. a exon or some other feature)
+
+            // get all alignments covering that region, sorted by read group name
+            std::vector<BamAlignments> a;
+            a = Sort::GetSortedRegion(reader, region, Sort::ByTag<std::string>("RG"));
+        \endcode
+
+        \param[in] reader BamMultiReader opened on desired BAM files
+        \param[in] region desired region-of-interest
+        \param[in] comp   comparison function object
+        \return sorted vector of the region's alignments
+    */
+    template<typename Compare>
+    static std::vector<BamAlignment> GetSortedRegion(BamMultiReader& reader,
+                                                     const BamRegion& region,
+                                                     const Compare& comp = Compare())
+    {
+        // return empty container if unable to find region
+        if ( !reader.HasOpenReaders() )  return std::vector<BamAlignment>();
+        if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
+
+        // iterate through region, grabbing alignments
+        BamAlignment al;
+        std::vector<BamAlignment> results;
+        while ( reader.GetNextAlignmentCore(al) )
+            results.push_back(al);
+
+        // sort & return alignments
+        SortAlignments(results, comp);
+        return results;
+    }
+};
 
 } // namespace Algorithms
 } // namespace BamTools