]> git.donarmstrong.com Git - bamtools.git/commitdiff
Fix: unmapped reads now pushed to end of coordinate-sorted BAM
authorderek <derekwbarnett@gmail.com>
Wed, 28 Sep 2011 16:26:25 +0000 (12:26 -0400)
committerderek <derekwbarnett@gmail.com>
Wed, 28 Sep 2011 16:26:25 +0000 (12:26 -0400)
src/api/internal/BamMultiMerger_p.h
src/toolkit/bamtools_sort.cpp

index 15ae437dc44ba1f55d5dd54b37ee3c39e6f9f23e..b279611f5a4f11390865d19cb83730f60d4bd3f0 100644 (file)
@@ -2,7 +2,7 @@
 // BamMultiMerger_p.h (c) 2010 Derek Barnett
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 9 September 2011 (DB)
+// Last modified: 28 September 2011 (DB)
 // ---------------------------------------------------------------------------
 // Provides merging functionality for BamMultiReader.  At this point, supports
 // sorting results by (refId, position) or by read name.
@@ -71,7 +71,24 @@ class PositionMultiMerger : public IBamMultiMerger {
         typedef ReaderAlignment               ValueType;
         typedef std::pair<KeyType, ValueType> ElementType;
 
-        typedef std::multimap<KeyType, ValueType> ContainerType;
+        struct SortLessThanPosition {
+            bool operator() (const KeyType& lhs, const KeyType& rhs) {
+
+                // force unmapped alignments to end
+                if ( lhs.first == -1 ) return false;
+                if ( rhs.first == -1 ) return true;
+
+                // sort first on RefID, then by Position
+                if ( lhs.first != rhs.first )
+                    return lhs.first < rhs.first;
+                else
+                    return lhs.second < rhs.second;
+            }
+        };
+
+        typedef SortLessThanPosition Compare;
+
+        typedef std::multimap<KeyType, ValueType, Compare> ContainerType;
         typedef ContainerType::iterator           DataIterator;
         typedef ContainerType::const_iterator     DataConstIterator;
 
index fb181a8b7566abd8b47b0574c78a0b332a84319c..ad0e4f0a69b9fa9a38ce92831ae5728853a37259 100644 (file)
@@ -2,7 +2,7 @@
 // bamtools_sort.cpp (c) 2010 Derek Barnett, Erik Garrison
 // Marth Lab, Department of Biology, Boston College
 // ---------------------------------------------------------------------------
-// Last modified: 14 June 2011 (DB)
+// Last modified: 28 September 2011 (DB)
 // ---------------------------------------------------------------------------
 // Sorts an input BAM file
 // ***************************************************************************
@@ -40,6 +40,12 @@ const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024;    // Mb
 
 struct SortLessThanPosition {
     bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
+
+        // force unmapped alignments to end
+        if ( lhs.RefID == -1 ) return false;
+        if ( rhs.RefID == -1 ) return true;
+
+        // sort first on RefID, then by Position
         if ( lhs.RefID != rhs.RefID )
             return lhs.RefID < rhs.RefID;
         else