X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Falgorithms%2FSort.h;h=92cb165a2c44ac031c73c8142eae40fdd1f075d9;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=f9d2dafdd1900afb25bff39c85c22bf41632f5d7;hpb=8a90b7aefffaf186053ef4da96c8663bf528274a;p=bamtools.git diff --git a/src/api/algorithms/Sort.h b/src/api/algorithms/Sort.h index f9d2daf..92cb165 100644 --- a/src/api/algorithms/Sort.h +++ b/src/api/algorithms/Sort.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 29 September 2011 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides sorting functionality. // *************************************************************************** @@ -11,12 +11,12 @@ #ifndef ALGORITHMS_SORT_H #define ALGORITHMS_SORT_H -#include -#include -#include -#include -#include +#include "api/api_global.h" +#include "api/BamAlignment.h" +#include "api/BamReader.h" +#include "api/BamMultiReader.h" #include +#include #include #include #include @@ -24,56 +24,96 @@ namespace BamTools { namespace Algorithms { +/*! \struct BamTools::Algorithms::Sort + \brief Provides classes & methods related to sorting BamAlignments +*/ struct API_EXPORT Sort { + //! Provides explicit values for specifying desired sort ordering enum Order { AscendingOrder = 0 , DescendingOrder }; + /*! \fn template static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs) + \internal + + Determines necessary STL function object depending on requested Sort::Order + */ template - static inline bool sort_helper(const Sort::Order& order, - const ElemType& lhs, - const ElemType& rhs) - { + static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs) { switch ( order ) { case ( Sort::AscendingOrder ) : { std::less comp; return comp(lhs, rhs); } case ( Sort::DescendingOrder ) : { std::greater comp; return comp(lhs, rhs); } - default : assert(false); + default : BT_ASSERT_UNREACHABLE; } return false; // <-- unreachable } + //! Base class for our sorting function objects typedef std::binary_function AlignmentSortBase; - // compare alignments by name + /*! \struct BamTools::Algorithms::Sort::ByName + \brief Function object for comparing alignments by name + + Default sort order is Sort::AscendingOrder. + + \code + std::vector a; + + // sort by name, in ascending order (the following two lines are equivalent): + std::sort( a.begin(), a.end(), Sort::ByName() ); + std::sort( a.begin(), a.end(), Sort::ByName(Sort::AscendingOrder) ); + + // OR sort in descending order + std::sort( a.begin(), a.end(), Sort::ByName(Sort::DescendingOrder) ); + \endcode + */ struct ByName : public AlignmentSortBase { + // ctor ByName(const Sort::Order& order = Sort::AscendingOrder) : m_order(order) { } - bool operator()(const BamTools::BamAlignment& lhs, - const BamTools::BamAlignment& rhs) - { + // comparison function + bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) { return sort_helper(m_order, lhs.Name, rhs.Name); } + // used by BamMultiReader internals static inline bool UsesCharData(void) { return true; } + // data members private: const Sort::Order& m_order; }; - // compare alignments by position + /*! \struct BamTools::Algorithms::Sort::ByPosition + \brief Function object for comparing alignments by position + + Default sort order is Sort::AscendingOrder. + + \code + std::vector a; + + // sort by position, in ascending order (the following two lines are equivalent): + std::sort( a.begin(), a.end(), Sort::ByPosition() ); + std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::AscendingOrder) ); + + // OR sort in descending order + std::sort( a.begin(), a.end(), Sort::ByPosition(Sort::DescendingOrder) ); + \endcode + */ struct ByPosition : public AlignmentSortBase { + // ctor ByPosition(const Sort::Order& order = Sort::AscendingOrder) : m_order(order) { } - bool operator()(const BamTools::BamAlignment& lhs, - const BamTools::BamAlignment& rhs) - { + // comparison function + bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) { + // force unmapped aligmnents to end if ( lhs.RefID == -1 ) return false; if ( rhs.RefID == -1 ) return true; @@ -86,25 +126,43 @@ struct API_EXPORT Sort { return sort_helper(m_order, lhs.RefID, rhs.RefID); } + // used by BamMultiReader internals static inline bool UsesCharData(void) { return false; } + // data members private: Sort::Order m_order; }; - // compare alignments by tag value + /*! \struct BamTools::Algorithms::Sort::ByTag + \brief Function object for comparing alignments by tag value + + Default sort order is Sort::AscendingOrder. + + \code + std::vector a; + + // sort by edit distance, in ascending order (the following two lines are equivalent): + std::sort( a.begin(), a.end(), Sort::ByTag("NM") ); + std::sort( a.begin(), a.end(), Sort::ByTag("NM", Sort::AscendingOrder) ); + + // OR sort in descending order + std::sort( a.begin(), a.end(), Sort::ByTag("NM", Sort::DescendingOrder) ); + \endcode + */ template struct ByTag : public AlignmentSortBase { + // ctor ByTag(const std::string& tag, const Sort::Order& order = Sort::AscendingOrder) : m_tag(tag) , m_order(order) { } - bool operator()(const BamTools::BamAlignment& lhs, - const BamTools::BamAlignment& rhs) - { + // comparison function + bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) { + // force alignments without tag to end T lhsTagValue; T rhsTagValue; @@ -115,87 +173,161 @@ struct API_EXPORT Sort { return sort_helper(m_order, lhsTagValue, rhsTagValue); } + // used by BamMultiReader internals static inline bool UsesCharData(void) { return true; } + // data members private: std::string m_tag; Sort::Order m_order; }; - // essentially a placeholder comparison object, ignores the alignments' data - // N.B. - returning false tends to retain insertion order + /*! \struct BamTools::Algorithms::Sort::Unsorted + \brief Placeholder function object + + This function object exists purely to allow for dropping a "do not care" ordering + into methods, containers, etc that are designed to work with the other sorting objects. + + \code + std::set; // STL set, ordered on alignment name + std::set; // STL set, unsorted (but probably insertion order) + \endcode + */ struct Unsorted : public AlignmentSortBase { - inline bool operator()(const BamTools::BamAlignment& /*lhs*/, - const BamTools::BamAlignment& /*rhs*/) - { - return false; + + // comparison function + inline bool operator()(const BamTools::BamAlignment&, const BamTools::BamAlignment&) { + return false; // returning false tends to retain insertion order } + // used by BamMultiReader internals static inline bool UsesCharData(void) { return false; } }; -}; -// in-place sorting of alignment vector -template -inline -void SortAlignments(std::vector& data, - const Compare& comp = Compare()) -{ - std::sort(data.begin(), data.end(), comp); -} - -// returns sorted copy of input alignment vector -template -inline -std::vector SortAlignmentsCopy(const std::vector& input, - const Compare& comp = Compare()) -{ - std::vector output(input); - SortAlignments(output, comp); - return output; -} - -// pulls a region from a position-sorted BAM file -// returns the alignments sorted by user-defined Compare type -template -std::vector GetSortedRegion(BamReader& reader, - const BamRegion& region, - const Compare& comp = Compare()) -{ - // return empty container if unable to find region - if ( !reader.IsOpen() ) return std::vector(); - if ( !reader.SetRegion(region) ) return std::vector(); - - // iterate through region, grabbing alignments - BamAlignment al; - std::vector results; - while ( reader.GetNextAlignmentCore(al) ) - results.push_back(al); - - // sort & return alignments - SortAlignments(results, comp); - return results; -} - -template -std::vector GetSortedRegion(BamMultiReader& reader, - const BamRegion& region, - const Compare& comp = Compare()) -{ - // return empty container if unable to find region - if ( !reader.HasOpenReaders() ) return std::vector(); - if ( !reader.SetRegion(region) ) return std::vector(); - - // iterate through region, grabbing alignments - BamAlignment al; - std::vector results; - while ( reader.GetNextAlignmentCore(al) ) - results.push_back(al); - - // sort & return alignments - SortAlignments(results, comp); - return results; -} + /*! Sorts a std::vector of alignments (in-place), using the provided compare function. + + \code + std::vector a; + // populate data + + // sort our alignment list by edit distance + Sort::SortAlignments(a, Sort::ByTag("NM")); + \endcode + + \param[in,out] data vector of alignments to be sorted + \param[in] comp comparison function object + */ + template + static inline void SortAlignments(std::vector& data, + const Compare& comp = Compare()) + { + std::sort(data.begin(), data.end(), comp); + } + + /*! Returns a sorted copy of the input alignments, using the provided compare function. + + \code + std::vector a; + // populate data + + // get a copy of our original data, sorted by edit distance (descending order) + std::vector sortedData; + sortedData = Sort::SortAlignments(a, Sort::ByTag("NM", Sort::DescendingOrder)); + \endcode + + \param[in] input vector of alignments to be sorted + \param[in] comp comparison function object + \return sorted copy of the input data + */ + template + static inline std::vector SortAlignments(const std::vector& input, + const Compare& comp = Compare()) + { + std::vector output(input); + SortAlignments(output, comp); + return output; + } + + /*! Reads a region of alignments from a position-sorted BAM file, + then sorts by the provided compare function + + \code + BamReader reader; + // open BAM file & index file + + BamRegion region; + // define a region of interest (i.e. a exon or some other feature) + + // get all alignments covering that region, sorted by read group name + std::vector a; + a = Sort::GetSortedRegion(reader, region, Sort::ByTag("RG")); + \endcode + + \param[in] reader BamReader opened on desired BAM file + \param[in] region desired region-of-interest + \param[in] comp comparison function object + \return sorted vector of the region's alignments + */ + template + static std::vector GetSortedRegion(BamReader& reader, + const BamRegion& region, + const Compare& comp = Compare()) + { + // return empty container if unable to find region + if ( !reader.IsOpen() ) return std::vector(); + if ( !reader.SetRegion(region) ) return std::vector(); + + // iterate through region, grabbing alignments + BamAlignment al; + std::vector results; + while ( reader.GetNextAlignmentCore(al) ) + results.push_back(al); + + // sort & return alignments + SortAlignments(results, comp); + return results; + } + + /*! Reads a region of alignments from position-sorted BAM files, + then sorts by the provided compare function + + \code + BamMultiReader reader; + // open BAM files & index files + + BamRegion region; + // define a region of interest (i.e. a exon or some other feature) + + // get all alignments covering that region, sorted by read group name + std::vector a; + a = Sort::GetSortedRegion(reader, region, Sort::ByTag("RG")); + \endcode + + \param[in] reader BamMultiReader opened on desired BAM files + \param[in] region desired region-of-interest + \param[in] comp comparison function object + \return sorted vector of the region's alignments + */ + template + static std::vector GetSortedRegion(BamMultiReader& reader, + const BamRegion& region, + const Compare& comp = Compare()) + { + // return empty container if unable to find region + if ( !reader.HasOpenReaders() ) return std::vector(); + if ( !reader.SetRegion(region) ) return std::vector(); + + // iterate through region, grabbing alignments + BamAlignment al; + std::vector results; + while ( reader.GetNextAlignmentCore(al) ) + results.push_back(al); + + // sort & return alignments + SortAlignments(results, comp); + return results; + } +}; } // namespace Algorithms } // namespace BamTools