X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Finternal%2Fbam%2FBamMultiReader_p.cpp;h=310d8371241f4650fcb95c66e1977bcc2625ef25;hb=2126ee0d204be8293df9492b48bce076a41a2a25;hp=d3f2b156fdf0152989d9779f2d9b9701a85e22e6;hpb=75ebabf8071379eaec8349f6708dfb2567d289c6;p=bamtools.git diff --git a/src/api/internal/bam/BamMultiReader_p.cpp b/src/api/internal/bam/BamMultiReader_p.cpp index d3f2b15..310d837 100644 --- a/src/api/internal/bam/BamMultiReader_p.cpp +++ b/src/api/internal/bam/BamMultiReader_p.cpp @@ -2,7 +2,7 @@ // BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 25 October 2011 (DB) +// Last modified: 24 July 2013 (DB) // --------------------------------------------------------------------------- // Functionality for simultaneously reading multiple BAM files // ************************************************************************* @@ -25,6 +25,8 @@ using namespace std; // ctor BamMultiReaderPrivate::BamMultiReaderPrivate(void) : m_alignmentCache(0) + , m_hasUserMergeOrder(false) + , m_mergeOrder(BamMultiReader::RoundRobinMerge) { } // dtor @@ -115,11 +117,19 @@ bool BamMultiReaderPrivate::CloseFiles(const vector& filenames) { } } - // make sure alignment cache is cleaned up if all readers closed - if ( m_readers.empty() && m_alignmentCache ) { - m_alignmentCache->Clear(); - delete m_alignmentCache; - m_alignmentCache = 0; + // make sure we clean up properly if all readers were closed + if ( m_readers.empty() ) { + + // clean up merger + if ( m_alignmentCache ) { + m_alignmentCache->Clear(); + delete m_alignmentCache; + m_alignmentCache = 0; + } + + // reset merge flags + m_hasUserMergeOrder = false; + m_mergeOrder = BamMultiReader::RoundRobinMerge; } // return whether all readers closed OK @@ -161,21 +171,46 @@ bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { return true; } -IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const { +IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) { + + // if no merge order set explicitly, use SAM header to lookup proper order + if ( !m_hasUserMergeOrder ) { + + // fetch SamHeader from BAM files + SamHeader header = GetHeader(); + + // if BAM files are sorted by position + if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ) + m_mergeOrder = BamMultiReader::MergeByCoordinate; - // fetch SamHeader - SamHeader header = GetHeader(); + // if BAM files are sorted by read name + else if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME ) + m_mergeOrder = BamMultiReader::MergeByName; - // if BAM files are sorted by position - if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE ) - return new MultiMerger(); + // otherwise, sorting is either "unknown" or marked as "unsorted" + else + m_mergeOrder = BamMultiReader::RoundRobinMerge; + } + + // use current merge order to create proper 'multi-merger' + switch ( m_mergeOrder ) { + + // merge BAM files by position + case BamMultiReader::MergeByCoordinate : + return new MultiMerger(); + + // merge BAM files by read name + case BamMultiReader::MergeByName : + return new MultiMerger(); - // if BAM files are sorted by read name - if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME ) - return new MultiMerger(); + // sorting is "unknown", "unsorted" or "ignored"... so use unsorted merger + case BamMultiReader::RoundRobinMerge : + return new MultiMerger(); - // otherwise "unknown" or "unsorted", use unsorted merger and just read in - return new MultiMerger(); + // unknown merge order, can't create merger + default: + return 0; + } } const vector BamMultiReaderPrivate::Filenames(void) const { @@ -248,6 +283,10 @@ string BamMultiReaderPrivate::GetHeaderText(void) const { return mergedHeader.ToString(); } +BamMultiReader::MergeOrder BamMultiReaderPrivate::GetMergeOrder(void) const { + return m_mergeOrder; +} + // get next alignment among all files bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) { return PopNextCachedAlignment(al, true); @@ -622,6 +661,40 @@ void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* a m_alignmentCache->Add( MergeItem(reader, alignment) ); } +bool BamMultiReaderPrivate::SetExplicitMergeOrder(BamMultiReader::MergeOrder order) { + + // set new merge flags + m_hasUserMergeOrder = true; + m_mergeOrder = order; + + // remove any existing merger (storing any existing data sitting in the cache) + vector currentCacheData; + if ( m_alignmentCache ) { + while ( !m_alignmentCache->IsEmpty() ) + currentCacheData.push_back( m_alignmentCache->TakeFirst() ); + delete m_alignmentCache; + m_alignmentCache = 0; + } + + // create new cache using the new merge flags + m_alignmentCache = CreateAlignmentCache(); + if ( m_alignmentCache == 0 ) { + SetErrorString("BamMultiReader::SetExplicitMergeOrder", "requested order is unrecognized"); + return false; + } + + // push current data onto new cache + vector::const_iterator readerIter = currentCacheData.begin(); + vector::const_iterator readerEnd = currentCacheData.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + const MergeItem& item = (*readerIter); + m_alignmentCache->Add(item); + } + + // return success + return true; +} + void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const { static const string SEPARATOR = ": "; m_errorString = where + SEPARATOR + what;