# set BamTools version information
set( BamTools_VERSION_MAJOR 2 )
set( BamTools_VERSION_MINOR 2 )
-set( BamTools_VERSION_BUILD 2 )
+set( BamTools_VERSION_BUILD 3 )
# set our library and executable destination dirs
set( EXECUTABLE_OUTPUT_PATH "${CMAKE_SOURCE_DIR}/bin" )
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 2.2.2
+PROJECT_NUMBER = 2.2.3
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
//
\brief Convenience class for reading multiple BAM files.
*/
+/*! \enum BamMultiReader::MergeOrder
+ \brief A description of the enum type.
+*/
+/*! \var BamMultiReader::MergeOrder BamMultiReader::MergeByCoordinate
+ \brief The description of the first enum value.
+*/
+/*! \var BamMultiReader::MergeOrder BamMultiReader::MergeByName
+ \brief BAM files are
+*/
+
+
+
/*! \fn BamMultiReader::BamMultiReader(void)
\brief constructor
*/
return d->GetHeaderText();
}
+/*! \fn BamMultiReader::MergeOrder BamMultiReader::GetMergeOrder(void) const
+ \brief Returns curent merge order strategy.
+
+ \returns current merge order enum value
+ \sa BamMultiReader::MergeOrder, SetExplicitMergeOrder()
+*/
+BamMultiReader::MergeOrder BamMultiReader::GetMergeOrder(void) const {
+ return d->GetMergeOrder();
+}
+
/*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
\brief Retrieves next available alignment.
\param[out] alignment destination for alignment record data
\returns \c true if a valid alignment was found
- \sa GetNextAlignmentCore(), SetRegion(), BamReader::GetNextAlignment()
+ \sa GetNextAlignmentCore(), SetExplicitMergeOrder(), SetRegion(), BamReader::GetNextAlignment()
*/
bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
return d->GetNextAlignment(nextAlignment);
\param[out] alignment destination for alignment record data
\returns \c true if a valid alignment was found
- \sa GetNextAlignment(), SetRegion(), BamReader::GetNextAlignmentCore()
+ \sa GetNextAlignment(), SetExplicitMergeOrder(), SetRegion(), BamReader::GetNextAlignmentCore()
*/
bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
return d->GetNextAlignmentCore(nextAlignment);
return d->Rewind();
}
+/*! \fn void BamMultiReader::SetExplicitMergeOrder(BamMultiReader::MergeOrder order)
+ \brief Sets an explicit merge order, regardless of the BAM files' SO header tag.
+
+ The default behavior of the BamMultiReader is to check the SO tag in the BAM files'
+ SAM header text to determine the merge strategy". The merge strategy is used to
+ determine from which BAM file the next alignment should come when either
+ GetNextAlignment() or GetNextAlignmentCore() are called. If files share a
+ 'coordinate' or 'queryname' value for this tag, then the merge strategy is
+ selected accordingly. If any of them do not match, or if any fileis marked as
+ 'unsorted', then the merge strategy is simply a round-robin.
+
+ This method allows client code to explicitly override the lookup behavior. This
+ method can be useful when you know, for example, that your BAM files are sorted
+ by coordinate but upstream processes did not set the header tag properly.
+
+ \note This method should \bold not be called while reading alignments via
+ GetNextAlignment() or GetNextAlignmentCore(). For proper results, you should
+ call this method before (or immediately after) opening files, rewinding,
+ jumping, etc. but \bold not once alignment fetching has started. There is
+ nothing in the API to prevent you from doing so, but the results may be
+ unexpected.
+
+ \sa BamMultiReader::MergeOrder, GetMergeOrder(), GetNextAlignment(), GetNextAlignmentCore()
+*/
+void BamMultiReader::SetExplicitMergeOrder(BamMultiReader::MergeOrder order) {
+ d->SetExplicitMergeOrder(order);
+}
+
/*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
\brief Sets a target region of interest
// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
// ***************************************************************************
class API_EXPORT BamMultiReader {
+ // enums
+ public:
+ // possible merge order strategies
+ enum MergeOrder { RoundRobinMerge = 0
+ , MergeByCoordinate
+ , MergeByName
+ };
+
// constructor / destructor
public:
BamMultiReader(void);
bool CloseFile(const std::string& filename);
// returns list of filenames for all open BAM files
const std::vector<std::string> Filenames(void) const;
+ // returns curent merge order strategy
+ BamMultiReader::MergeOrder GetMergeOrder(void) const;
// returns true if multireader has any open BAM files
bool HasOpenReaders(void) const;
// performs random-access jump within current BAM files
bool OpenFile(const std::string& filename);
// returns file pointers to beginning of alignments
bool Rewind(void);
+ // sets an explicit merge order, regardless of the BAM files' SO header tag
+ void SetExplicitMergeOrder(BamMultiReader::MergeOrder order);
// sets the target region of interest
bool SetRegion(const BamRegion& region);
// sets the target region of interest
// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
// ctor
BamMultiReaderPrivate::BamMultiReaderPrivate(void)
: m_alignmentCache(0)
+ , m_hasUserMergeOrder(false)
+ , m_mergeOrder(BamMultiReader::RoundRobinMerge)
{ }
// dtor
}
}
- // make sure alignment cache is cleaned up if all readers closed
- if ( m_readers.empty() && m_alignmentCache ) {
- m_alignmentCache->Clear();
- delete m_alignmentCache;
- m_alignmentCache = 0;
+ // make sure we clean up properly if all readers were closed
+ if ( m_readers.empty() ) {
+
+ // clean up merger
+ if ( m_alignmentCache ) {
+ m_alignmentCache->Clear();
+ delete m_alignmentCache;
+ m_alignmentCache = 0;
+ }
+
+ // reset merge flags
+ m_hasUserMergeOrder = false;
+ m_mergeOrder = BamMultiReader::RoundRobinMerge;
}
// return whether all readers closed OK
return true;
}
-IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
+IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) {
+
+ // if no merge order set explicitly, use SAM header to lookup proper order
+ if ( !m_hasUserMergeOrder ) {
+
+ // fetch SamHeader from BAM files
+ SamHeader header = GetHeader();
+
+ // if BAM files are sorted by position
+ if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
+ m_mergeOrder = BamMultiReader::MergeByCoordinate;
+
+ // if BAM files are sorted by read name
+ if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
+ m_mergeOrder = BamMultiReader::MergeByName;
+
+ // otherwise, sorting is either "unknown" or marked as "unsorted"
+ else
+ m_mergeOrder = BamMultiReader::RoundRobinMerge;
+ }
+
+ // use current merge order to create proper 'multi-merger'
+ switch ( m_mergeOrder ) {
- // fetch SamHeader
- SamHeader header = GetHeader();
+ // merge BAM files by position
+ case BamMultiReader::MergeByCoordinate :
+ return new MultiMerger<Algorithms::Sort::ByPosition>();
- // if BAM files are sorted by position
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
- return new MultiMerger<Algorithms::Sort::ByPosition>();
+ // merge BAM files by read name
+ case BamMultiReader::MergeByName :
+ return new MultiMerger<Algorithms::Sort::ByName>();
- // if BAM files are sorted by read name
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
- return new MultiMerger<Algorithms::Sort::ByName>();
+ // sorting is "unknown", "unsorted" or "ignored"... so use unsorted merger
+ case BamMultiReader::RoundRobinMerge :
+ return new MultiMerger<Algorithms::Sort::Unsorted>();
- // otherwise "unknown" or "unsorted", use unsorted merger and just read in
- return new MultiMerger<Algorithms::Sort::Unsorted>();
+ // unknown merge order, can't create merger
+ default:
+ return 0;
+ }
}
const vector<string> BamMultiReaderPrivate::Filenames(void) const {
return mergedHeader.ToString();
}
+BamMultiReader::MergeOrder BamMultiReaderPrivate::GetMergeOrder(void) const {
+ return m_mergeOrder;
+}
+
// get next alignment among all files
bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
return PopNextCachedAlignment(al, true);
m_alignmentCache->Add( MergeItem(reader, alignment) );
}
+void BamMultiReaderPrivate::SetExplicitMergeOrder(BamMultiReader::MergeOrder order) {
+
+ // set new merge flags
+ m_hasUserMergeOrder = true;
+ m_mergeOrder = order;
+
+ // remove any existing merger
+ if ( m_alignmentCache ) {
+ m_alignmentCache->Clear();
+ delete m_alignmentCache;
+ m_alignmentCache = 0;
+ }
+
+ // update cache with new strategy
+ UpdateAlignmentCache();
+}
+
void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
static const string SEPARATOR = ": ";
m_errorString = where + SEPARATOR + what;
// BamMultiReader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
bool SetRegion(const BamRegion& region);
// access alignment data
+ BamMultiReader::MergeOrder GetMergeOrder(void) const;
bool GetNextAlignment(BamAlignment& al);
bool GetNextAlignmentCore(BamAlignment& al);
bool HasOpenReaders(void);
+ void SetExplicitMergeOrder(BamMultiReader::MergeOrder order);
// access auxiliary data
SamHeader GetHeader(void) const;
public:
bool CloseFiles(const std::vector<std::string>& filenames);
- IMultiMerger* CreateAlignmentCache(void) const;
+ IMultiMerger* CreateAlignmentCache(void);
bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
bool RewindReaders(void);
void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
public:
std::vector<MergeItem> m_readers;
IMultiMerger* m_alignmentCache;
+
+ bool m_hasUserMergeOrder;
+ BamMultiReader::MergeOrder m_mergeOrder;
+
mutable std::string m_errorString;
};
# set BamTools application properties
set_target_properties( bamtools_cmd PROPERTIES
- VERSION 2.2.2
+ VERSION 2.2.3
OUTPUT_NAME "bamtools"
)
# make version info available in application