// bamtools_sort.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 3 October 2011 (DB)
+// Last modified: 27 March 2012 (DB)
// ---------------------------------------------------------------------------
// Sorts an input BAM file
// ***************************************************************************
#include <api/SamConstants.h>
#include <api/BamMultiReader.h>
#include <api/BamWriter.h>
+#include <api/algorithms/Sort.h>
#include <utils/bamtools_options.h>
using namespace BamTools;
+using namespace BamTools::Algorithms;
#include <cstdio>
#include <algorithm>
// compromise that should perform well on average.
const unsigned int SORT_DEFAULT_MAX_BUFFER_COUNT = 500000; // max numberOfAlignments for buffer
const unsigned int SORT_DEFAULT_MAX_BUFFER_MEMORY = 1024; // Mb
-
-// -----------------------------------
-// comparison objects (for sorting)
-
-struct SortLessThanPosition {
- bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
-
- // force unmapped alignments to end
- if ( lhs.RefID == -1 ) return false;
- if ( rhs.RefID == -1 ) return true;
-
- // sort first on RefID, then by Position
- if ( lhs.RefID != rhs.RefID )
- return lhs.RefID < rhs.RefID;
- else
- return lhs.Position < rhs.Position;
- }
-};
-
-struct SortLessThanName {
- bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
- return lhs.Name < rhs.Name;
- }
-};
} // namespace BamTools
// that way multiple sort runs don't trip on each other's temp files
if ( m_settings) {
size_t extensionFound = m_settings->InputBamFilename.find(".bam");
- if (extensionFound != string::npos )
+ if ( extensionFound != string::npos )
m_tempFilenameStub = m_settings->InputBamFilename.substr(0,extensionFound);
m_tempFilenameStub.append(".sort.temp.");
}
// get basic data that will be shared by all temp/output files
SamHeader header = reader.GetHeader();
+ if ( !header.HasVersion() )
+ header.Version = Constants::SAM_CURRENT_VERSION;
header.SortOrder = ( m_settings->IsSortingByName
? Constants::SAM_HD_SORTORDER_QUERYNAME
: Constants::SAM_HD_SORTORDER_COORDINATE );
// if buffer is "full"
else {
-
- // push any unmapped reads into buffer,
- // don't want to split these into a separate temp file
- if ( !al.IsMapped() )
- buffer.push_back(al);
-
- // "al" is mapped, so create a sorted temp file with current buffer contents
+ // so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
- else {
- CreateSortedTempFile(buffer);
- buffer.push_back(al);
- }
+ CreateSortedTempFile(buffer);
+ buffer.push_back(al);
}
}
}
// if buffer is "full"
else {
-
- // push any unmapped reads into buffer,
- // don't want to split these into a separate temp file
- if ( !al.IsMapped() )
- buffer.push_back(al);
-
- // "al" is mapped, so create a sorted temp file with current buffer contents
+ // create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
- else {
- CreateSortedTempFile(buffer);
- buffer.push_back(al);
- }
+ CreateSortedTempFile(buffer);
+ buffer.push_back(al);
}
}
}
// sort buffer by desired method
if ( m_settings->IsSortingByName )
- std::stable_sort ( buffer.begin(), buffer.end(), SortLessThanName() );
- else
- std::stable_sort ( buffer.begin(), buffer.end(), SortLessThanPosition() );
+ std::stable_sort( buffer.begin(), buffer.end(), Sort::ByName() );
+ else
+ std::stable_sort( buffer.begin(), buffer.end(), Sort::ByPosition() );
}
bool SortTool::SortToolPrivate::WriteTempFile(const vector<BamAlignment>& buffer,