# set BamTools version information
set( BamTools_VERSION_MAJOR 2 )
-set( BamTools_VERSION_MINOR 2 )
+set( BamTools_VERSION_MINOR 3 )
set( BamTools_VERSION_BUILD 0 )
# set our library and executable destination dirs
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 2.1.1
+PROJECT_NUMBER = 2.3.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
// BamAlignment.cpp (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 April 2012 (DB)
+// Last modified: 4 December 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
return false;
}
+/*! \fn bool BamAlignment::GetArrayTagType(const std::string& tag, char& type) const
+ \brief Retrieves the BAM tag type-code for the array elements associated with requested tag name.
+
+ \param[in] tag 2-character tag name
+ \param[out] type retrieved (1-character) type-code
+
+ \return \c true if found. False if not found, or if tag is not an array type.
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+bool BamAlignment::GetArrayTagType(const std::string& tag, char& type) const {
+
+ // skip if alignment is core-only
+ if ( SupportData.HasCoreOnly ) {
+ // TODO: set error string?
+ return false;
+ }
+
+ // skip if no tags present
+ if ( TagData.empty() ) {
+ // TODO: set error string?
+ return false;
+ }
+
+ // localize the tag data
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+
+ // if tag not found, return failure
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) ){
+ // TODO: set error string?
+ return false;
+ }
+
+ // check that tag type code is array
+ type = *(pTagData - 1);
+ if ( type != Constants::BAM_TAG_TYPE_ARRAY ) {
+ // TODO: set error string
+ return false;
+ }
+
+ // fetch element type
+ const char elementType = *pTagData;
+ switch ( elementType ) {
+
+ // allowable types
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_UINT8) :
+ case (Constants::BAM_TAG_TYPE_INT16) :
+ case (Constants::BAM_TAG_TYPE_UINT16) :
+ case (Constants::BAM_TAG_TYPE_INT32) :
+ case (Constants::BAM_TAG_TYPE_UINT32) :
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
+ type = elementType;
+ break;
+
+ default:
+ //TODO: set error string
+ return false;
+ }
+
+ // if we get here, return success
+ return true;
+}
+
+
/*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool closedInterval = false) const
\brief Calculates alignment end position, based on its starting position and CIGAR data.
return softClipFound;
}
+/*! \fn std::vector<std::string> BamAlignment::GetTagNames(void) const
+ \brief Retrieves the BAM tag names.
+
+ When paired with GetTagType() and GetTag(), this method allows you
+ to iterate over an alignment's tag data without knowing the names (or types)
+ beforehand.
+
+ \return \c vector containing all tag names found (empty if none available)
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
+*/
+std::vector<std::string> BamAlignment::GetTagNames(void) const {
+
+ std::vector<std::string> result;
+ if ( SupportData.HasCoreOnly || TagData.empty() )
+ return result;
+
+ char* pTagData = (char*)TagData.data();
+ const unsigned int tagDataLength = TagData.size();
+ unsigned int numBytesParsed = 0;
+ while ( numBytesParsed < tagDataLength ) {
+
+ // get current tag name & type
+ const char* pTagName = pTagData;
+ const char* pTagType = pTagData + 2;
+ pTagData += 3;
+ numBytesParsed +=3;
+
+ // store tag name
+ result.push_back( std::string(pTagName, 2) );
+
+ // find the next tag
+ if ( *pTagType == '\0' ) break;
+ if ( !SkipToNextTag(*pTagType, pTagData, numBytesParsed) ) break;
+ if ( *pTagData == '\0' ) break;
+ }
+
+ return result;
+}
+
/*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
\brief Retrieves the BAM tag type-code associated with requested tag name.
// BamAlignment.h (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 16 October 2011 (DB)
+// Last modified: 25 July 2013 (DB)
// ---------------------------------------------------------------------------
// Provides the BamAlignment data structure
// ***************************************************************************
template<typename T> bool GetTag(const std::string& tag, T& destination) const;
template<typename T> bool GetTag(const std::string& tag, std::vector<T>& destination) const;
+ // retrieves all current tag names
+ std::vector<std::string> GetTagNames(void) const;
+
// retrieves the SAM/BAM type-code for requested tag name
bool GetTagType(const std::string& tag, char& type) const;
+ // retrieves the SAM/BAM type-code for the data elements in an array tag
+ bool GetArrayTagType(const std::string& tag, char& type) const;
+
// returns true if alignment has a record for this tag name
bool HasTag(const std::string& tag) const;
public:
std::string Name; // read name
int32_t Length; // length of query sequence
- std::string QueryBases; // 'original' sequence (as reported from sequencing machine)
- std::string AlignedBases; // 'aligned' sequence (includes any indels, padding, clipping)
+ std::string QueryBases; // 'original' sequence (contained in BAM file)
+ std::string AlignedBases; // 'aligned' sequence (QueryBases plus deletion, padding, clipping chars)
std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
std::string TagData; // tag data (use provided methods to query/modify)
int32_t RefID; // ID number for reference sequence
// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
//
/*! \class BamTools::BamMultiReader
\brief Convenience class for reading multiple BAM files.
*/
+/*! \enum BamMultiReader::MergeOrder
+ \brief Used to describe the merge strategy of the BamMultiReader.
+
+ The merge strategy determines which alignment is 'next' from across
+ all opened BAM files.
+*/
+/*! \var BamMultiReader::MergeOrder BamMultiReader::RoundRobinMerge
+ \brief Merge strategy when BAM files are unsorted, or their sorted status is either unknown or ignored
+*/
+/*! \var BamMultiReader::MergeOrder BamMultiReader::MergeByCoordinate
+ \brief Merge strategy when BAM files are sorted by position ('coordinate')
+*/
+/*! \var BamMultiReader::MergeOrder BamMultiReader::MergeByName
+ \brief Merge strategy when BAM files are sorted by read name ('queryname')
+*/
/*! \fn BamMultiReader::BamMultiReader(void)
\brief constructor
return d->GetHeaderText();
}
+/*! \fn BamMultiReader::MergeOrder BamMultiReader::GetMergeOrder(void) const
+ \brief Returns curent merge order strategy.
+
+ \returns current merge order enum value
+ \sa BamMultiReader::MergeOrder, SetExplicitMergeOrder()
+*/
+BamMultiReader::MergeOrder BamMultiReader::GetMergeOrder(void) const {
+ return d->GetMergeOrder();
+}
+
/*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
\brief Retrieves next available alignment.
\param[out] alignment destination for alignment record data
\returns \c true if a valid alignment was found
- \sa GetNextAlignmentCore(), SetRegion(), BamReader::GetNextAlignment()
+ \sa GetNextAlignmentCore(), SetExplicitMergeOrder(), SetRegion(), BamReader::GetNextAlignment()
*/
bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
return d->GetNextAlignment(nextAlignment);
\param[out] alignment destination for alignment record data
\returns \c true if a valid alignment was found
- \sa GetNextAlignment(), SetRegion(), BamReader::GetNextAlignmentCore()
+ \sa GetNextAlignment(), SetExplicitMergeOrder(), SetRegion(), BamReader::GetNextAlignmentCore()
*/
bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
return d->GetNextAlignmentCore(nextAlignment);
return d->Rewind();
}
+/*! \fn void BamMultiReader::SetExplicitMergeOrder(BamMultiReader::MergeOrder order)
+ \brief Sets an explicit merge order, regardless of the BAM files' SO header tag.
+
+ The default behavior of the BamMultiReader is to check the SO tag in the BAM files'
+ SAM header text to determine the merge strategy". The merge strategy is used to
+ determine from which BAM file the next alignment should come when either
+ GetNextAlignment() or GetNextAlignmentCore() are called. If files share a
+ 'coordinate' or 'queryname' value for this tag, then the merge strategy is
+ selected accordingly. If any of them do not match, or if any fileis marked as
+ 'unsorted', then the merge strategy is simply a round-robin.
+
+ This method allows client code to explicitly override the lookup behavior. This
+ method can be useful when you know, for example, that your BAM files are sorted
+ by coordinate but upstream processes did not set the header tag properly.
+
+ \note This method should \bold not be called while reading alignments via
+ GetNextAlignment() or GetNextAlignmentCore(). For proper results, you should
+ call this method before (or immediately after) opening files, rewinding,
+ jumping, etc. but \bold not once alignment fetching has started. There is
+ nothing in the API to prevent you from doing so, but the results may be
+ unexpected.
+
+ \returns \c true if merge order could be successfully applied
+ \sa BamMultiReader::MergeOrder, GetMergeOrder(), GetNextAlignment(), GetNextAlignmentCore()
+*/
+bool BamMultiReader::SetExplicitMergeOrder(BamMultiReader::MergeOrder order) {
+ return d->SetExplicitMergeOrder(order);
+}
+
/*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
\brief Sets a target region of interest
// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
// ***************************************************************************
class API_EXPORT BamMultiReader {
+ // enums
+ public:
+ // possible merge order strategies
+ enum MergeOrder { RoundRobinMerge = 0
+ , MergeByCoordinate
+ , MergeByName
+ };
+
// constructor / destructor
public:
BamMultiReader(void);
bool CloseFile(const std::string& filename);
// returns list of filenames for all open BAM files
const std::vector<std::string> Filenames(void) const;
+ // returns curent merge order strategy
+ BamMultiReader::MergeOrder GetMergeOrder(void) const;
// returns true if multireader has any open BAM files
bool HasOpenReaders(void) const;
// performs random-access jump within current BAM files
bool OpenFile(const std::string& filename);
// returns file pointers to beginning of alignments
bool Rewind(void);
+ // sets an explicit merge order, regardless of the BAM files' SO header tag
+ bool SetExplicitMergeOrder(BamMultiReader::MergeOrder order);
// sets the target region of interest
bool SetRegion(const BamRegion& region);
// sets the target region of interest
// BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 29 July 2013 (DB)
// ---------------------------------------------------------------------------
// Provides read access to BAM files.
// ***************************************************************************
return d->CreateIndex(type);
}
+/*! \fn const SamHeader& BamReader::GetConstSamHeader(void) const
+ \brief Returns const reference to SAM header data.
+
+ Allows for read-only queries of SAM header data.
+
+ If you do not need to modify the SAM header, use this method to avoid the
+ potentially expensive copy used by GetHeader().
+
+ \note
+ \returns const reference to header data object
+ \sa GetHeader(), GetHeaderText()
+*/
+const SamHeader& BamReader::GetConstSamHeader(void) const {
+ return d->GetConstSamHeader();
+}
+
/*! \fn std::string BamReader::GetErrorString(void) const
\brief Returns a human-readable description of the last error that occurred
/*! \fn SamHeader BamReader::GetHeader(void) const
\brief Returns SAM header data.
- Header data is wrapped in a SamHeader object that can be conveniently queried & modified.
+ Header data is wrapped in a SamHeader object that can be conveniently queried and/or modified.
+ If you only need read access, consider using GetConstSamHeader() instead.
\note Modifying the retrieved SamHeader object does NOT affect the
current BAM file. This file has been opened in a read-only mode.
BamWriter to generate a new BAM file with the appropriate header information.
\returns header data object
- \sa GetHeaderText()
+ \sa GetConstSamHeader(), GetHeaderText()
*/
SamHeader BamReader::GetHeader(void) const {
return d->GetSamHeader();
However, this method does NOT populate the alignment's string data fields
(read name, bases, qualities, tags, filename). This provides a boost in speed
- when these fields are not required for every alignment. These fields can be
- populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
+ when these fields are not required for every alignment. These fields, excluding filename,
+ can be populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
\param[out] alignment destination for alignment record data
\returns \c true if a valid alignment was found
// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg\r
// Marth Lab, Department of Biology, Boston College\r
// ---------------------------------------------------------------------------\r
-// Last modified: 10 October 2011 (DB)\r
+// Last modified: 18 November 2012 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides read access to BAM files.\r
// ***************************************************************************\r
// access header data\r
// ----------------------\r
\r
- // returns SAM header data\r
+ // returns a read-only reference to SAM header data\r
+ const SamHeader& GetConstSamHeader(void) const;\r
+ // returns an editable copy of SAM header data\r
SamHeader GetHeader(void) const;\r
// returns SAM header data, as SAM-formatted text\r
std::string GetHeaderText(void) const;\r
add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake)
# fetch all internal source files
-add_subdirectory ( internal )
+add_subdirectory( internal )
# make list of all API source files
set( BamToolsAPISources
# create main BamTools API shared library
add_library( BamTools SHARED ${BamToolsAPISources} )
set_target_properties( BamTools PROPERTIES
- SOVERSION "2.2.0"
+ SOVERSION "2.3.0"
OUTPUT_NAME "bamtools" )
# create main BamTools API static library
PREFIX "lib" )
# link libraries automatically with zlib (and Winsock2, if applicable)
-if( _WIN32 )
+if( WIN32 )
set( APILibs z ws2_32 )
-else( _WIN32 )
+else()
set( APILibs z )
-endif( _WIN32 )
+endif()
-target_link_libraries( BamTools ${APILibs} )
+target_link_libraries( BamTools ${APILibs} )
target_link_libraries( BamTools-static ${APILibs} )
# set library install destinations
-install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
+install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
# export API headers
, ReadGroups(other.ReadGroups)
, Programs(other.Programs)
, Comments(other.Comments)
+ , m_errorString(other.GetErrorString())
{ }
/*! \fn SamHeader::~SamHeader(void)
// BamHeader_p.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for handling BAM headers.
// ***************************************************************************
free(headerText);
}
+// returns const-reference to SamHeader data object
+const SamHeader& BamHeader::ToConstSamHeader(void) const {
+ return m_header;
+}
+
// returns *copy* of SamHeader data object
SamHeader BamHeader::ToSamHeader(void) const {
return m_header;
// BamHeader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011 (DB)
+// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for handling BAM headers.
// ***************************************************************************
// load BAM header ('magic number' and SAM header text) from BGZF stream
// returns true if all OK
void Load(BgzfStream* stream);
+ // returns (read-only) reference to SamHeader data object
+ const SamHeader& ToConstSamHeader(void) const;
// returns (editable) copy of SamHeader data object
SamHeader ToSamHeader(void) const;
// returns SAM-formatted string of header data
// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
// ctor
BamMultiReaderPrivate::BamMultiReaderPrivate(void)
: m_alignmentCache(0)
+ , m_hasUserMergeOrder(false)
+ , m_mergeOrder(BamMultiReader::RoundRobinMerge)
{ }
// dtor
}
}
- // make sure alignment cache is cleaned up if all readers closed
- if ( m_readers.empty() && m_alignmentCache ) {
- m_alignmentCache->Clear();
- delete m_alignmentCache;
- m_alignmentCache = 0;
+ // make sure we clean up properly if all readers were closed
+ if ( m_readers.empty() ) {
+
+ // clean up merger
+ if ( m_alignmentCache ) {
+ m_alignmentCache->Clear();
+ delete m_alignmentCache;
+ m_alignmentCache = 0;
+ }
+
+ // reset merge flags
+ m_hasUserMergeOrder = false;
+ m_mergeOrder = BamMultiReader::RoundRobinMerge;
}
// return whether all readers closed OK
return true;
}
-IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) const {
+IMultiMerger* BamMultiReaderPrivate::CreateAlignmentCache(void) {
+
+ // if no merge order set explicitly, use SAM header to lookup proper order
+ if ( !m_hasUserMergeOrder ) {
+
+ // fetch SamHeader from BAM files
+ SamHeader header = GetHeader();
+
+ // if BAM files are sorted by position
+ if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
+ m_mergeOrder = BamMultiReader::MergeByCoordinate;
- // fetch SamHeader
- SamHeader header = GetHeader();
+ // if BAM files are sorted by read name
+ else if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
+ m_mergeOrder = BamMultiReader::MergeByName;
- // if BAM files are sorted by position
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_COORDINATE )
- return new MultiMerger<Algorithms::Sort::ByPosition>();
+ // otherwise, sorting is either "unknown" or marked as "unsorted"
+ else
+ m_mergeOrder = BamMultiReader::RoundRobinMerge;
+ }
+
+ // use current merge order to create proper 'multi-merger'
+ switch ( m_mergeOrder ) {
+
+ // merge BAM files by position
+ case BamMultiReader::MergeByCoordinate :
+ return new MultiMerger<Algorithms::Sort::ByPosition>();
+
+ // merge BAM files by read name
+ case BamMultiReader::MergeByName :
+ return new MultiMerger<Algorithms::Sort::ByName>();
- // if BAM files are sorted by read name
- if ( header.SortOrder == Constants::SAM_HD_SORTORDER_QUERYNAME )
- return new MultiMerger<Algorithms::Sort::ByName>();
+ // sorting is "unknown", "unsorted" or "ignored"... so use unsorted merger
+ case BamMultiReader::RoundRobinMerge :
+ return new MultiMerger<Algorithms::Sort::Unsorted>();
- // otherwise "unknown" or "unsorted", use unsorted merger and just read in
- return new MultiMerger<Algorithms::Sort::Unsorted>();
+ // unknown merge order, can't create merger
+ default:
+ return 0;
+ }
}
const vector<string> BamMultiReaderPrivate::Filenames(void) const {
return mergedHeader.ToString();
}
+BamMultiReader::MergeOrder BamMultiReaderPrivate::GetMergeOrder(void) const {
+ return m_mergeOrder;
+}
+
// get next alignment among all files
bool BamMultiReaderPrivate::GetNextAlignment(BamAlignment& al) {
return PopNextCachedAlignment(al, true);
m_alignmentCache->Add( MergeItem(reader, alignment) );
}
+bool BamMultiReaderPrivate::SetExplicitMergeOrder(BamMultiReader::MergeOrder order) {
+
+ // set new merge flags
+ m_hasUserMergeOrder = true;
+ m_mergeOrder = order;
+
+ // remove any existing merger (storing any existing data sitting in the cache)
+ vector<MergeItem> currentCacheData;
+ if ( m_alignmentCache ) {
+ while ( !m_alignmentCache->IsEmpty() )
+ currentCacheData.push_back( m_alignmentCache->TakeFirst() );
+ delete m_alignmentCache;
+ m_alignmentCache = 0;
+ }
+
+ // create new cache using the new merge flags
+ m_alignmentCache = CreateAlignmentCache();
+ if ( m_alignmentCache == 0 ) {
+ SetErrorString("BamMultiReader::SetExplicitMergeOrder", "requested order is unrecognized");
+ return false;
+ }
+
+ // push current data onto new cache
+ vector<MergeItem>::const_iterator readerIter = currentCacheData.begin();
+ vector<MergeItem>::const_iterator readerEnd = currentCacheData.end();
+ for ( ; readerIter != readerEnd; ++readerIter ) {
+ const MergeItem& item = (*readerIter);
+ m_alignmentCache->Add(item);
+ }
+
+ // return success
+ return true;
+}
+
void BamMultiReaderPrivate::SetErrorString(const string& where, const string& what) const {
static const string SEPARATOR = ": ";
m_errorString = where + SEPARATOR + what;
// BamMultiReader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 14 January 2013 (DB)
// ---------------------------------------------------------------------------
// Functionality for simultaneously reading multiple BAM files
// *************************************************************************
bool SetRegion(const BamRegion& region);
// access alignment data
+ BamMultiReader::MergeOrder GetMergeOrder(void) const;
bool GetNextAlignment(BamAlignment& al);
bool GetNextAlignmentCore(BamAlignment& al);
bool HasOpenReaders(void);
+ bool SetExplicitMergeOrder(BamMultiReader::MergeOrder order);
// access auxiliary data
SamHeader GetHeader(void) const;
public:
bool CloseFiles(const std::vector<std::string>& filenames);
- IMultiMerger* CreateAlignmentCache(void) const;
+ IMultiMerger* CreateAlignmentCache(void);
bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
bool RewindReaders(void);
void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
public:
std::vector<MergeItem> m_readers;
IMultiMerger* m_alignmentCache;
+
+ bool m_hasUserMergeOrder;
+ BamMultiReader::MergeOrder m_mergeOrder;
+
mutable std::string m_errorString;
};
// BamReader_p.cpp (c) 2009 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 28 November 2011 (DB)
+// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading BAM files
// ***************************************************************************
return m_filename;
}
+const SamHeader& BamReaderPrivate::GetConstSamHeader(void) const {
+ return m_header.ToConstSamHeader();
+}
+
string BamReaderPrivate::GetErrorString(void) const {
return m_errorString;
}
// BamReader_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 25 October 2011 (DB)
+// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for reading BAM files
// ***************************************************************************
// access auxiliary data
std::string GetHeaderText(void) const;
+ const SamHeader& GetConstSamHeader(void) const;
SamHeader GetSamHeader(void) const;
int GetReferenceCount(void) const;
const RefVector& GetReferenceData(void) const;
// BamWriter_p.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 4 April 2012 (DB)
+// Last modified: 18 November 2012 (DB)
// ---------------------------------------------------------------------------
// Provides the basic functionality for producing BAM files
// ***************************************************************************
// write the base qualities
char* pBaseQualities = new char[queryLength]();
- if ( al.Qualities.empty() || al.Qualities == "*" )
+ if ( al.Qualities.empty() || ( al.Qualities.size() == 1 && al.Qualities[0] == '*' ) || al.Qualities[0] == (char)0xFF )
memset(pBaseQualities, 0xFF, queryLength); // if missing or '*', fill with invalid qual
else {
for ( size_t i = 0; i < queryLength; ++i )
# src/api/internal/index
# ==========================
-set ( InternalIndexDir "${InternalDir}/index" )
+set( InternalIndexDir "${InternalDir}/index" )
-set ( InternalIndexSources
+set( InternalIndexSources
${InternalIndexDir}/BamIndexFactory_p.cpp
${InternalIndexDir}/BamStandardIndex_p.cpp
${InternalIndexDir}/BamToolsIndex_p.cpp
// BamHttp_p.cpp (c) 2011 Derek Barnett
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 8 December 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Provides reading/writing of BAM files on HTTP server
// ***************************************************************************
#include <cassert>
#include <cctype>
+#include <cstdlib>
#include <algorithm>
#include <sstream>
using namespace std;
static const string DOUBLE_NEWLINE = "\n\n";
static const string GET_METHOD = "GET";
+static const string HEAD_METHOD = "HEAD";
static const string HOST_HEADER = "Host";
static const string RANGE_HEADER = "Range";
static const string BYTES_PREFIX = "bytes=";
+static const string CONTENT_LENGTH_HEADER = "Content-Length";
static const char HOST_SEPARATOR = '/';
static const char PROXY_SEPARATOR = ':';
, m_response(0)
, m_isUrlParsed(false)
, m_filePosition(-1)
- , m_endRangeFilePosition(-1)
+ , m_fileEndPosition(-1)
+ , m_rangeEndPosition(-1)
{
ParseUrl(url);
}
delete m_socket;
}
-void BamHttp::Close(void) {
-
- // disconnect socket
- m_socket->DisconnectFromHost();
-
- // clean up request & response
- if ( m_request ) {
- delete m_request;
- m_request = 0;
- }
+void BamHttp::ClearResponse(void) {
if ( m_response ) {
delete m_response;
m_response = 0;
}
+}
- // reset state - necessary??
+void BamHttp::Close(void) {
+
+ // disconnect socket & clear related resources
+ DisconnectSocket();
+
+ // reset state
m_isUrlParsed = false;
- m_filePosition = -1;
- m_endRangeFilePosition = -1;
+ m_filePosition = -1;
+ m_fileEndPosition = -1;
+ m_rangeEndPosition = -1;
+ m_mode = IBamIODevice::NotOpen;
}
bool BamHttp::ConnectSocket(void) {
// any state checks, etc?
if ( !m_socket->ConnectToHost(m_hostname, m_port, m_mode) ) {
- // TODO: set error string
- return false;
- }
-
- // attempt initial request
- m_filePosition = 0;
- m_endRangeFilePosition = -1;
- if ( !SendRequest() ) {
- // TODO: set error string
- Close();
- return false;
- }
-
- // wait for response from server
- if ( !ReceiveResponse() ) {
- // TODO: set error string
- Close();
+ SetErrorString("BamHttp::ConnectSocket", m_socket->GetErrorString());
return false;
}
return true;
}
+void BamHttp::DisconnectSocket(void) {
+
+ // disconnect socket & clean up
+ m_socket->DisconnectFromHost();
+ ClearResponse();
+ if ( m_request ) {
+ delete m_request;
+ m_request = 0;
+ }
+}
+
bool BamHttp::EnsureSocketConnection(void) {
if ( m_socket->IsConnected() )
return true;
- else return ConnectSocket();
+ return ConnectSocket();
}
bool BamHttp::IsOpen(void) const {
return false;
}
+ // initialize our file positions
+ m_filePosition = 0;
+ m_fileEndPosition = 0;
+ m_rangeEndPosition = 0;
+
+ // attempt to send initial request (just 'HEAD' to check connection)
+ if ( !SendHeadRequest() ) {
+ SetErrorString("BamHttp::Open", m_socket->GetErrorString());
+ return false;
+ }
+
+ // clear response from HEAD request, not needed
+ ClearResponse();
+
// return success
return true;
}
if ( !IsOpen() )
return -1;
- // read until hit desired @numBytes
- int64_t bytesReadSoFar = 0;
- while ( bytesReadSoFar < numBytes ) {
-
- // calculate number of bytes we're going to try to read this iteration
- const size_t remainingBytes = ( numBytes - bytesReadSoFar );
+ int64_t numBytesReadSoFar = 0;
+ while ( numBytesReadSoFar < numBytes ) {
- // if socket has access to entire file contents
- // i.e. we received response with full data (status code == 200)
- if ( m_endRangeFilePosition < 0 ) {
+ const size_t remaining = static_cast<size_t>( numBytes - numBytesReadSoFar );
- // try to read 'remainingBytes' from socket
- const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, remainingBytes);
- if ( socketBytesRead < 0 ) // error
+ // if we're not holding a valid GET reponse, get one
+ if ( m_response == 0 ) {
+ if ( !SendGetRequest(remaining) )
return -1;
- else if ( socketBytesRead == 0 ) // EOF
- return bytesReadSoFar;
- bytesReadSoFar += socketBytesRead;
- m_filePosition += socketBytesRead;
}
+ BT_ASSERT_X(m_response, "null HTTP response");
- // socket has access to a range of data (might already be in buffer)
- // i.e. we received response with partial data (status code == 206)
- else {
+ // check response status code
+ const int statusCode = m_response->GetStatusCode();
+
+ // if we receieved full file contents in response
+ if ( statusCode == 200 ) {
+
+ // try to read 'remaining' bytes from socket
+ const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::Read", m_socket->GetErrorString());
+ return -1;
+ }
+
+ // EOF
+ else if ( socketBytesRead == 0 )
+ return numBytesReadSoFar;
- // there is data left from last request
- if ( m_endRangeFilePosition > m_filePosition ) {
+ // update counters
+ numBytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
- // try to read either the total 'remainingBytes' or
- // whatever we have remaining from last request range
- const size_t rangeRemainingBytes = m_endRangeFilePosition - m_filePosition;
- const size_t bytesToRead = std::min(remainingBytes, rangeRemainingBytes);
- const int64_t socketBytesRead = ReadFromSocket(data+bytesReadSoFar, bytesToRead);
- if ( socketBytesRead < 0 ) // error
+ }
+
+ // else if we received a range of bytes in response
+ else if ( statusCode == 206 ) {
+
+ // if we've exhausted the last request
+ if ( m_filePosition == m_rangeEndPosition ) {
+ if ( !SendGetRequest(remaining) )
return -1;
- else if ( socketBytesRead == 0 ) // EOF
- return bytesReadSoFar;
- bytesReadSoFar += socketBytesRead;
- m_filePosition += socketBytesRead;
}
- // otherwise, this is a 1st-time read or
- // we already read everything from the last GET request
else {
- // request for next range
- if ( !SendRequest(remainingBytes) || !ReceiveResponse() ) {
- Close();
+ // try to read 'remaining' bytes from socket
+ const int64_t socketBytesRead = ReadFromSocket(data+numBytesReadSoFar, remaining);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::Read", m_socket->GetErrorString());
return -1;
}
+
+ // maybe EOF
+ else if ( socketBytesRead == 0 ) {
+
+ // if we know we're not at end position, fire off a new request
+ if ( m_fileEndPosition > 0 && m_filePosition < m_fileEndPosition ) {
+ if ( !SendGetRequest() )
+ return -1;
+ } else
+ return numBytesReadSoFar;
+ }
+
+ // update counters
+ numBytesReadSoFar += socketBytesRead;
+ m_filePosition += socketBytesRead;
}
}
+
+
+ // else some other HTTP status
+ else {
+ SetErrorString("BamHttp::Read", "unsupported status code in response");
+ return -1;
+ }
}
- // return actual number bytes successfully read
- return bytesReadSoFar;
+ // return actual number of bytes read
+ return numBytesReadSoFar;
}
int64_t BamHttp::ReadFromSocket(char* data, const unsigned int maxNumBytes) {
bool BamHttp::ReceiveResponse(void) {
- // clear any prior response
- if ( m_response )
- delete m_response;
-
- // make sure we're connected
- if ( !EnsureSocketConnection() )
- return false;
-
// fetch header, up until double new line
string responseHeader;
do {
+
+ // make sure we can read a line
+ if ( !m_socket->WaitForReadLine() )
+ return false;
+
// read line & append to full header
const string headerLine = m_socket->ReadLine();
responseHeader += headerLine;
// sanity check
if ( responseHeader.empty() ) {
- // TODO: set error string
+ SetErrorString("BamHttp::ReceiveResponse", "empty HTTP response");
Close();
return false;
}
// create response from header text
m_response = new HttpResponseHeader(responseHeader);
if ( !m_response->IsValid() ) {
- // TODO: set error string
+ SetErrorString("BamHttp::ReceiveResponse", "could not parse HTTP response");
Close();
return false;
}
- // if we got range response as requested
- if ( m_response->GetStatusCode() == 206 )
- return true;
-
- // if we got the full file contents instead of range
- else if ( m_response->GetStatusCode() == 200 ) {
+ // if we get here, success
+ return true;
+}
- // skip up to current file position
- RaiiBuffer tmp(0x8000);
- int64_t numBytesRead = 0;
- while ( numBytesRead < m_filePosition ) {
+bool BamHttp::Seek(const int64_t& position, const int origin) {
- const int64_t remaining = m_filePosition - numBytesRead;
- const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
- const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
- if ( socketBytesRead < 0 ) { // error
- Close();
- return false;
- }
- else if ( socketBytesRead == 0 ) // EOF
- break;
+ // if HTTP device not in a valid state
+ if ( !IsOpen() ) {
+ SetErrorString("BamHttp::Seek", "cannot seek on unopen connection");
+ return false;
+ }
- numBytesRead += socketBytesRead;
- }
+ // reset the connection
+ DisconnectSocket();
+ if ( !ConnectSocket() ) {
+ SetErrorString("BamHttp::Seek", m_socket->GetErrorString());
+ return false;
+ }
- // return success
- return ( numBytesRead == m_filePosition);
+ // udpate file position
+ switch ( origin ) {
+ case SEEK_CUR : m_filePosition += position; break;
+ case SEEK_SET : m_filePosition = position; break;
+ default :
+ SetErrorString("BamHttp::Seek", "unsupported seek origin");
+ return false;
}
- // on any other reponse status
- // TODO: set error string
- Close();
- return false;
+ // return success
+ return true;
}
-bool BamHttp::Seek(const int64_t& position, const int origin) {
+bool BamHttp::SendGetRequest(const size_t numBytes) {
- // if HTTP device not in a valid state
- if ( !IsOpen() ) {
- // TODO: set error string
+ // clear previous data
+ ClearResponse();
+ if ( m_request )
+ delete m_request;
+ m_socket->ClearBuffer();
+
+ // make sure we're connected
+ if ( !EnsureSocketConnection() )
+ return false;
+
+ // create range string
+ const int64_t endPosition = m_filePosition + std::max(static_cast<size_t>(0x10000), numBytes);
+ stringstream range("");
+ range << BYTES_PREFIX << m_filePosition << '-' << endPosition;
+
+ // create request
+ m_request = new HttpRequestHeader(GET_METHOD, m_filename);
+ m_request->SetField(HOST_HEADER, m_hostname);
+ m_request->SetField(RANGE_HEADER, range.str());
+
+ // send request
+ const string requestHeader = m_request->ToString();
+ const size_t headerSize = requestHeader.size();
+ if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
return false;
}
- // discard socket's buffer contents, update positions, & return success
+ // ensure clean buffer
m_socket->ClearBuffer();
- if ( origin == SEEK_CUR )
- m_filePosition += position;
- else if ( origin == SEEK_SET )
- m_filePosition = position;
- else {
- // TODO: set error string
+ // wait for response
+ if ( !ReceiveResponse() ) {
+ SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
+ Close();
return false;
}
- m_endRangeFilePosition = m_filePosition;
- return true;
+ BT_ASSERT_X(m_response, "BamHttp::SendGetRequest : null HttpResponse");
+ BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendGetRequest : invalid HttpResponse");
+
+ // check response status code
+ const int statusCode = m_response->GetStatusCode();
+ switch ( statusCode ) {
+
+ // ranged response, as requested
+ case 206 :
+ // get content length if available
+ if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
+ const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
+ m_rangeEndPosition = m_filePosition + atoi( contentLengthString.c_str() );
+ }
+ return true;
+
+ // full contents, not range
+ case 200 :
+ {
+ // skip up to current file position
+ RaiiBuffer tmp(0x8000);
+ int64_t numBytesRead = 0;
+ while ( numBytesRead < m_filePosition ) {
+
+ // read data from response
+ const int64_t remaining = m_filePosition - numBytesRead;
+ const size_t bytesToRead = static_cast<size_t>( (remaining > 0x8000) ? 0x8000 : remaining );
+ const int64_t socketBytesRead = ReadFromSocket(tmp.Buffer, bytesToRead);
+
+ // if error
+ if ( socketBytesRead < 0 ) {
+ SetErrorString("BamHttp::SendGetRequest", m_socket->GetErrorString());
+ Close();
+ return false;
+ }
+
+ // else if EOF
+ else if ( socketBytesRead == 0 && m_socket->BufferBytesAvailable() == 0 )
+ break;
+
+ // update byte counter
+ numBytesRead += socketBytesRead;
+ }
+
+ // return success
+ return ( numBytesRead == m_filePosition);
+ }
+
+ // any other status codes
+ default:
+ break;
+ }
+
+ // fail on unexpected status code
+ SetErrorString("BamHttp::SendGetRequest", "unsupported status code in response");
+ Close();
+ return false;
}
-bool BamHttp::SendRequest(const size_t numBytes) {
+bool BamHttp::SendHeadRequest(void) {
- // remove any currently active request
+ // ensure clean slate
+ ClearResponse();
if ( m_request )
delete m_request;
-
- // create range string
- m_endRangeFilePosition = m_filePosition + numBytes;
- stringstream range("");
- range << BYTES_PREFIX << m_filePosition << '-' << m_endRangeFilePosition;
+ m_socket->ClearBuffer();
// make sure we're connected
if ( !EnsureSocketConnection() )
return false;
// create request
- m_request = new HttpRequestHeader(GET_METHOD, m_filename);
- m_request->SetField(HOST_HEADER, m_hostname);
- m_request->SetField(RANGE_HEADER, range.str());
+ m_request = new HttpRequestHeader(HEAD_METHOD, m_filename);
+ m_request->SetField(HOST_HEADER, m_hostname);
- // write request to socket
+ // send request
const string requestHeader = m_request->ToString();
const size_t headerSize = requestHeader.size();
- return ( WriteToSocket(requestHeader.c_str(), headerSize) == headerSize );
+ if ( WriteToSocket(requestHeader.c_str(), headerSize) != headerSize ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
+ return false;
+ }
+
+ m_socket->ClearBuffer();
+
+ // wait for response from server
+ if ( !ReceiveResponse() ) {
+ SetErrorString("BamHttp::SendHeadRequest", m_socket->GetErrorString());
+ Close();
+ return false;
+ }
+ BT_ASSERT_X(m_response, "BamHttp::SendHeadRequest : null HttpResponse");
+ BT_ASSERT_X(m_response->IsValid(), "BamHttp::SendHeadRequest : invalid HttpResponse");
+
+ // get content length if available
+ if ( m_response->ContainsKey(CONTENT_LENGTH_HEADER) ) {
+ const string contentLengthString = m_response->GetValue(CONTENT_LENGTH_HEADER);
+ m_fileEndPosition = atoi( contentLengthString.c_str() ) - 1;
+ }
+
+ // return whether we found any errors
+ return m_socket->GetError() == TcpSocket::NoError;
}
int64_t BamHttp::Tell(void) const {
// internal methods
private:
+ void ClearResponse(void);
bool ConnectSocket(void);
+ void DisconnectSocket(void);
bool EnsureSocketConnection(void);
void ParseUrl(const std::string& url);
int64_t ReadFromSocket(char* data, const unsigned int numBytes);
bool ReceiveResponse(void);
- bool SendRequest(const size_t numBytes = 0);
+ bool SendGetRequest(const size_t numBytes = 0x10000);
+ bool SendHeadRequest(void);
int64_t WriteToSocket(const char* data, const unsigned int numBytes);
// data members
// file position
int64_t m_filePosition;
- int64_t m_endRangeFilePosition;
+ int64_t m_fileEndPosition;
+ int64_t m_rangeEndPosition;
};
} // namespace Internal
# src/api/internal/io
# ==========================
-set ( InternalIODir "${InternalDir}/io" )
+set( InternalIODir "${InternalDir}/io" )
#--------------------------
# platform-independent IO
#--------------------------
-set ( CommonIOSources
+set( CommonIOSources
${InternalIODir}/BamDeviceFactory_p.cpp
${InternalIODir}/BamFile_p.cpp
${InternalIODir}/BamFtp_p.cpp
#------------------------
# platform-dependent IO
#------------------------
-if ( _WIN32 )
- set ( PlatformIOSources
- ${InternalIODir}/TcpSocketEngine_win_p.cpp
- )
-else ( _WIN32 )
- set ( PlatformIOSources
- ${InternalIODir}/TcpSocketEngine_unix_p.cpp
- )
-endif ( _WIN32 )
+if( WIN32 )
+ set( PlatformIOSources ${InternalIODir}/TcpSocketEngine_win_p.cpp )
+else()
+ set( PlatformIOSources ${InternalIODir}/TcpSocketEngine_unix_p.cpp )
+endif()
#---------------------------
# make build-specific list
#---------------------------
-set ( InternalIOSources
- ${CommonIOSources}
+set( InternalIOSources
+ ${CommonIOSources}
${PlatformIOSources}
PARENT_SCOPE # <-- leave this last
// ctor & dtor
public:
- HttpRequestHeader(const std::string& method, // "GET", "PUT", etc
+ HttpRequestHeader(const std::string& method, // "GET", "HEAD", ...
const std::string& resource, // filename
int majorVersion = 1, // version info
int minorVersion = 1);
bytesReadSoFar += bytesToRead;
Free(bytesToRead);
- if ( !((bytesReadSoFar < index+1)&&(bytesReadSoFar < max-1)) )
+ if ( !((bytesReadSoFar < index+1) && (bytesReadSoFar < max-1)) )
finished = true;
}
if ( (m_tail + n) <= m_data.at(m_tailBufferIndex).Size() ) {
// fetch write pointer at current 'tail', increment tail by @n & return
- char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+ char* ptr = m_data[m_tailBufferIndex].Data(); //+ m_tail;
m_tail += n;
return ptr;
}
m_data[m_tailBufferIndex].Resize(m_tail + n);
// fetch write pointer at current 'tail', increment tail by @n & return
- char* ptr = m_data[m_tailBufferIndex].Data() + m_tail;
+ char* ptr = m_data[m_tailBufferIndex].Data(); //+ m_tail;
m_tail += n;
return ptr;
}
namespace Internal {
// constants
-static const size_t DEFAULT_BUFFER_SIZE = 0x4000;
+static const size_t DEFAULT_BUFFER_SIZE = 0x10000;
} // namespace Internal
} // namespace BamTools
, m_engine(0)
, m_cachedSocketDescriptor(-1)
, m_readBuffer(DEFAULT_BUFFER_SIZE)
- , m_error(TcpSocket::UnknownSocketError)
+ , m_error(TcpSocket::NoError)
, m_state(TcpSocket::UnconnectedState)
{ }
m_hostName = hostInfo.HostName();
m_mode = mode;
m_state = TcpSocket::UnconnectedState;
- m_error = TcpSocket::UnknownSocketError;
+ m_error = TcpSocket::NoError;
// m_localPort = 0;
m_remotePort = 0;
// m_localAddress.Clear();
namespace BamTools {
namespace Internal {
+class BamHttp;
class TcpSocketEngine;
class TcpSocket {
// enums
public:
- enum SocketError { UnknownSocketError = -1
+ enum SocketError { NoError = -2
+ , UnknownSocketError = -1
, ConnectionRefusedError = 0
, RemoteHostClosedError
, HostNotFoundError
TcpSocket::SocketError m_error;
TcpSocket::SocketState m_state;
std::string m_errorString;
+
+ friend class BamHttp;
};
} // namespace Internal
# src/api/internal/sam
# ==========================
-set ( InternalSamDir "${InternalDir}/sam" )
+set( InternalSamDir "${InternalDir}/sam" )
-set ( InternalSamSources
+set( InternalSamSources
${InternalSamDir}/SamFormatParser_p.cpp
${InternalSamDir}/SamFormatPrinter_p.cpp
${InternalSamDir}/SamHeaderValidator_p.cpp
# src/api/internal/utils
# ==========================
-set ( InternalUtilsDir "${InternalDir}/utils" )
+set( InternalUtilsDir "${InternalDir}/utils" )
-set ( InternalUtilsSources
+set( InternalUtilsSources
${InternalUtilsDir}/BamException_p.cpp
PARENT_SCOPE # <-- leave this last
add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake)
# create jsoncpp library
-add_library( jsoncpp SHARED
+add_library( jsoncpp STATIC
json_reader.cpp
json_value.cpp
json_writer.cpp
# set jsoncpp library properties
set_target_properties( jsoncpp PROPERTIES
- SOVERSION 1.0.0
OUTPUT_NAME jsoncpp
+ PREFIX "lib"
)
# set BamTools application properties
set_target_properties( bamtools_cmd PROPERTIES
- VERSION 2.2.0
+ VERSION 2.3.0
OUTPUT_NAME "bamtools"
)
# make version info available in application
// bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 11 November 2012
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
// flag
bool HasInput;
+ bool HasInputFilelist;
bool HasOutput;
bool HasFormat;
bool HasRegion;
// options
vector<string> InputFiles;
+ string InputFilelist;
string OutputFilename;
string Format;
string Region;
// constructor
ConvertSettings(void)
: HasInput(false)
+ , HasInputFilelist(false)
, HasOutput(false)
, HasFormat(false)
, HasRegion(false)
// initialize conversion input/output
// set to default input if none provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open input files
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
}
// write alignment's source BAM file
- m_out << "\"filename\":" << a.Filename << ",";
+ m_out << "\"filename\":\"" << a.Filename << "\",";
// write tag data
const char* tagData = a.TagData.c_str();
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [-region <REGION>] [format-specific options]");
+ Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats",
+ "-format <FORMAT> [-in <filename> -in <filename> ... | -list <filelist>] [-out <filename>] [-region <REGION>] [format-specific options]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
// bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Prints alignment count for BAM file(s)
// ***************************************************************************
#include <utils/bamtools_utilities.h>
using namespace BamTools;
+#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// flags
bool HasInput;
+ bool HasInputFilelist;
bool HasRegion;
// filenames
vector<string> InputFiles;
+ string InputFilelist;
string Region;
// constructor
CountSettings(void)
: HasInput(false)
+ , HasInputFilelist(false)
, HasRegion(false)
{ }
};
bool CountTool::CountToolPrivate::Run(void) {
- // if no '-in' args supplied, default to stdin
- if ( !m_settings->HasInput )
+ // set to default input if none provided
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools count ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open reader without index
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
+ Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)",
+ "[-in <filename> -in <filename> ... | -list <filelist>] [-region <REGION>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
+ Options::AddValueOption("-region", "REGION",
+ "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one",
+ "", m_settings->HasRegion, m_settings->Region, IO_Opts);
}
CountTool::~CountTool(void) {
// bamtools_coverage.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 24 July 2013
// ---------------------------------------------------------------------------
// Prints coverage data for a single BAM file
// ***************************************************************************
BamAlignment al;
while ( reader.GetNextAlignment(al) )
pileup.AddAlignment(al);
+ pileup.Flush();
// clean up
reader.Close();
// bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011
+// Last modified: 3 May 2013
// ---------------------------------------------------------------------------
// Filters BAM file(s) according to some user-specified criteria
// ***************************************************************************
using namespace Json;
#include <cstdio>
+#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
const string ISREVERSESTRAND_PROPERTY = "isReverseStrand";
const string ISSECONDMATE_PROPERTY = "isSecondMate";
const string ISSINGLETON_PROPERTY = "isSingleton";
+const string LENGTH_PROPERTY = "length";
const string MAPQUALITY_PROPERTY = "mapQuality";
const string MATEPOSITION_PROPERTY = "matePosition";
const string MATEREFERENCE_PROPERTY = "mateReference";
const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
keepAlignment &= valueFilter.check(isSingleton);
}
+ else if ( propertyName == LENGTH_PROPERTY ) keepAlignment &= valueFilter.check(al.Length);
else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
else if ( propertyName == MATEREFERENCE_PROPERTY ) {
string tagFilterString = entireTagFilterString.substr(3);
// switch on tag type to set tag query value & parse filter token
+ int8_t asciiFilterValue, asciiQueryValue;
int32_t intFilterValue, intQueryValue;
uint32_t uintFilterValue, uintQueryValue;
float realFilterValue, realQueryValue;
bool keepAlignment = false;
switch (tagType) {
+ // ASCII tag type
+ case 'A':
+ if ( al.GetTag(tagName, asciiQueryValue) ) {
+ if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, asciiFilterValue, compareType) ) {
+ tagFilter.Value = asciiFilterValue;
+ tagFilter.Type = compareType;
+ keepAlignment = tagFilter.check(asciiQueryValue);
+ }
+ }
+ break;
+
// signed int tag type
case 'c' :
case 's' :
break;
// string tag type
- case 'A':
+
case 'Z':
case 'H':
if ( al.GetTag(tagName, stringQueryValue) ) {
// IO opts
// flags
- bool HasInputBamFilename;
- bool HasOutputBamFilename;
+ bool HasInput;
+ bool HasInputFilelist;
+ bool HasOutput;
bool HasRegion;
- bool HasScriptFilename;
+ bool HasScript;
bool IsForceCompression;
// filenames
vector<string> InputFiles;
+ string InputFilelist;
string OutputFilename;
string Region;
string ScriptFilename;
// flags
bool HasAlignmentFlagFilter;
bool HasInsertSizeFilter;
+ bool HasLengthFilter;
bool HasMapQualityFilter;
bool HasNameFilter;
bool HasQueryBasesFilter;
// filters
string AlignmentFlagFilter;
string InsertSizeFilter;
- string NameFilter;
+ string LengthFilter;
string MapQualityFilter;
+ string NameFilter;
string QueryBasesFilter;
string TagFilter; // support multiple ?
// constructor
FilterSettings(void)
- : HasInputBamFilename(false)
- , HasOutputBamFilename(false)
+ : HasInput(false)
+ , HasInputFilelist(false)
+ , HasOutput(false)
, HasRegion(false)
- , HasScriptFilename(false)
+ , HasScript(false)
, IsForceCompression(false)
, OutputFilename(Options::StandardOut())
, HasAlignmentFlagFilter(false)
, HasInsertSizeFilter(false)
+ , HasLengthFilter(false)
, HasMapQualityFilter(false)
, HasNameFilter(false)
, HasQueryBasesFilter(false)
// int32_t conversion
else if ( propertyName == INSERTSIZE_PROPERTY ||
+ propertyName == LENGTH_PROPERTY ||
propertyName == MATEPOSITION_PROPERTY ||
propertyName == POSITION_PROPERTY
)
m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
}
- else if ( propertyName == TAG_PROPERTY ) {
- // this will be stored directly as the TAG:VALUE token
- // (VALUE may contain compare ops, will be parsed out later)
- m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
- }
+ else if ( propertyName == TAG_PROPERTY ) {
+ // this will be stored directly as the TAG:VALUE token
+ // (VALUE may contain compare ops, will be parsed out later)
+ m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
+ }
// else unknown property
else {
// peek ahead, make sure there is data available
char ch = fgetc(inFile);
ungetc(ch, inFile);
- if( feof(inFile) ) break;
+ if( feof(inFile) )
+ break;
// read next block of data
if ( fgets(buffer, 1024, inFile) == 0 ) {
m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
m_propertyNames.push_back(ISSINGLETON_PROPERTY);
+ m_propertyNames.push_back(LENGTH_PROPERTY);
m_propertyNames.push_back(MAPQUALITY_PROPERTY);
m_propertyNames.push_back(MATEPOSITION_PROPERTY);
m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) );
if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) );
if ( m_settings->HasIsSingletonFilter ) propertyTokens.insert( make_pair(ISSINGLETON_PROPERTY, m_settings->IsSingletonFilter) );
+ if ( m_settings->HasLengthFilter ) propertyTokens.insert( make_pair(LENGTH_PROPERTY, m_settings->LengthFilter) );
if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) );
if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) );
if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) );
bool FilterTool::FilterToolPrivate::Run(void) {
// set to default input if none provided
- if ( !m_settings->HasInputBamFilename )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// initialize defined properties & user-specified filters
// quit if failed
- if ( !SetupFilters() ) return false;
+ if ( !SetupFilters() )
+ return false;
// open reader without index
BamMultiReader reader;
InitProperties();
// parse script for filter rules, if given
- if ( m_settings->HasScriptFilename )
+ if ( m_settings->HasScript )
return ParseScript();
// otherwise check command line for filters
// ----------------------------------
// set program details
- const string usage = "[-in <filename> -in <filename> ...] "
+ const string usage = "[-in <filename> -in <filename> ... | -list <filelist>] "
"[-out <filename> | [-forceCompression]] [-region <REGION>] "
"[ [-script <filename] | [filterOptions] ]";
+
Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", usage );
// ----------------------------------
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
const string inDesc = "the input BAM file(s)";
+ const string listDesc = "the input BAM file list, one line per file";
const string outDesc = "the output BAM file";
const string regionDesc = "only read data from this genomic region (see documentation for more details)";
const string scriptDesc = "the filter script file (see documentation for more details)";
"default behavior is to leave output uncompressed. Use this flag to "
"override and force compression";
- Options::AddValueOption("-in", "BAM filename", inDesc, "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", outDesc, "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
- Options::AddValueOption("-region", "REGION", regionDesc, "", m_settings->HasRegion, m_settings->Region, IO_Opts);
- Options::AddValueOption("-script", "filename", scriptDesc, "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
+ Options::AddValueOption("-in", "BAM filename", inDesc, "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", listDesc, "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", outDesc, "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-region", "REGION", regionDesc, "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+ Options::AddValueOption("-script", "filename", scriptDesc, "", m_settings->HasScript, m_settings->ScriptFilename, IO_Opts);
Options::AddOption("-forceCompression",forceDesc, m_settings->IsForceCompression, IO_Opts);
// ----------------------------------
const string flagDesc = "keep reads with this *exact* alignment flag (for more detailed queries, see below)";
const string insertDesc = "keep reads with insert size that matches pattern";
+ const string lengthDesc = "keep reads with length that matches pattern";
const string mapQualDesc = "keep reads with map quality that matches pattern";
const string nameDesc = "keep reads with name that matches pattern";
const string queryDesc = "keep reads with motif that matches pattern";
Options::AddValueOption("-alignmentFlag", "int", flagDesc, "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
Options::AddValueOption("-insertSize", "int", insertDesc, "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
+ Options::AddValueOption("-length", "int", lengthDesc, "", m_settings->HasLengthFilter, m_settings->LengthFilter, FilterOpts);
Options::AddValueOption("-mapQuality", "[0-255]", mapQualDesc, "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
Options::AddValueOption("-name", "string", nameDesc, "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
Options::AddValueOption("-queryBases", "string", queryDesc, "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
// bamtools_header.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Prints the SAM-style header from a single BAM file ( or merged header from
// multiple BAM files) to stdout
#include <utils/bamtools_options.h>
using namespace BamTools;
+#include <fstream>
#include <iostream>
#include <string>
#include <vector>
struct HeaderTool::HeaderSettings {
// flags
- bool HasInputBamFilename;
+ bool HasInput;
+ bool HasInputFilelist;
// filenames
vector<string> InputFiles;
+ string InputFilelist;
// constructor
HeaderSettings(void)
- : HasInputBamFilename(false)
+ : HasInput(false)
+ , HasInputFilelist(false)
{ }
};
bool HeaderTool::HeaderToolPrivate::Run(void) {
// set to default input if none provided
- if ( !m_settings->HasInputBamFilename )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools header ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// attemp to open BAM files
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in <filename> -in <filename> ...] ");
+ Options::SetProgramInfo("bamtools header", "prints header from BAM file(s)", "[-in <filename> -in <filename> ... | -list <filelist>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
}
HeaderTool::~HeaderTool(void) {
// bamtools_merge.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Merges multiple BAM files into one
// ***************************************************************************
#include <utils/bamtools_utilities.h>
using namespace BamTools;
+#include <fstream>
#include <iostream>
#include <string>
#include <vector>
struct MergeTool::MergeSettings {
// flags
- bool HasInputBamFilename;
- bool HasOutputBamFilename;
+ bool HasInput;
+ bool HasInputFilelist;
+ bool HasOutput;
bool IsForceCompression;
bool HasRegion;
// filenames
vector<string> InputFiles;
+ string InputFilelist;
// other parameters
string OutputFilename;
// constructor
MergeSettings(void)
- : HasInputBamFilename(false)
- , HasOutputBamFilename(false)
+ : HasInput(false)
+ , HasInputFilelist(false)
+ , HasOutput(false)
, IsForceCompression(false)
, HasRegion(false)
, OutputFilename(Options::StandardOut())
bool MergeTool::MergeToolPrivate::Run(void) {
// set to default input if none provided
- if ( !m_settings->HasInputBamFilename )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools merge ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// opens the BAM files (by default without checking for indexes)
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>]");
+ Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one",
+ "[-in <filename> -in <filename> ... | -list <filelist>] [-out <filename> | [-forceCompression]] [-region <REGION>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts);
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts);
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts);
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts);
Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
}
// bamtools_random.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Grab a random subset of alignments (testing tool)
// ***************************************************************************
#include <ctime>
#include <cstdlib>
+#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// flags
bool HasAlignmentCount;
bool HasInput;
+ bool HasInputFilelist;
bool HasOutput;
+ bool HasRandomNumberSeed;
bool HasRegion;
bool IsForceCompression;
// parameters
unsigned int AlignmentCount;
vector<string> InputFiles;
+ string InputFilelist;
string OutputFilename;
+ unsigned int RandomNumberSeed;
string Region;
// constructor
RandomSettings(void)
: HasAlignmentCount(false)
, HasInput(false)
+ , HasInputFilelist(false)
, HasOutput(false)
+ , HasRandomNumberSeed(false)
, HasRegion(false)
, IsForceCompression(false)
, AlignmentCount(RANDOM_MAX_ALIGNMENT_COUNT)
, OutputFilename(Options::StandardOut())
+ , RandomNumberSeed(0)
{ }
};
bool RandomTool::RandomToolPrivate::Run(void) {
// set to default stdin if no input files provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open our reader
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
}
// seed our random number generator
- srand( time(NULL) );
+ if ( m_settings->HasRandomNumberSeed )
+ srand( m_settings->RandomNumberSeed );
+ else
+ srand( time(NULL) );
// grab random alignments
BamAlignment al;
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in <filename> -in <filename> ...] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
+ Options::SetProgramInfo("bamtools random", "grab a random subset of alignments",
+ "[-in <filename> -in <filename> ... | -list <filelist>] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
- Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings");
- Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
+ Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "",
+ m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
+ Options::AddValueOption("-seed", "unsigned integer", "random number generator seed (for repeatable results). Current time is used if no seed value is provided.", "",
+ m_settings->HasRandomNumberSeed, m_settings->RandomNumberSeed, SettingsOpts);
}
RandomTool::~RandomTool(void) {
// bamtools_resolve.cpp (c) 2011
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 14 October 2011
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Resolves paired-end reads (marking the IsProperPair flag as needed).
// ***************************************************************************
static const string RG_FIELD_DESCRIPTION =
"#<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>";
+static const string MODEL_DESCRIPTION =
+ "# ------------- Model Types Description ---------------\n"
+ "#\n"
+ "# ID Position Orientation \n"
+ "# 1 mate1 < mate2 mate1:forward, mate2:forward \n"
+ "# 2 mate1 < mate2 mate1:forward, mate2:reverse \n"
+ "# 3 mate1 < mate2 mate1:reverse, mate2:forward \n"
+ "# 4 mate1 < mate2 mate1:reverse, mate2:reverse \n"
+ "# 5 mate2 < mate1 mate2:forward, mate1:forward \n"
+ "# 6 mate2 < mate1 mate2:forward, mate1:reverse \n"
+ "# 7 mate2 < mate1 mate2:reverse, mate1:forward \n"
+ "# 8 mate2 < mate1 mate2:reverse, mate1:reverse \n"
+ "# -----------------------------------------------------\n";
+
// --------------------------------------------------------------------------
// unique readname file constants
// --------------------------------------------------------------------------
<< BAMTOOLS_VERSION_BUILD;
// # bamtools resolve (vX.Y.Z)
+ // #
+ // # MODEL DESCRIPTION - see above for actual text
// \n
m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl
+ << COMMENT_CHAR << endl
+ << MODEL_DESCRIPTION
<< endl;
}
// bamtools_split.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 8 December 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Splits a BAM file on user-specified property, creating a new BAM output
// file for each value found
static const string SPLIT_PAIRED_TOKEN = ".PAIRED_END";
static const string SPLIT_SINGLE_TOKEN = ".SINGLE_END";
static const string SPLIT_REFERENCE_TOKEN = ".REF_";
+static const string SPLIT_TAG_TOKEN = ".TAG_";
string GetTimestampString(void) {
bool HasInputFilename;
bool HasCustomOutputStub;
bool HasCustomRefPrefix;
+ bool HasCustomTagPrefix;
bool IsSplittingMapped;
bool IsSplittingPaired;
bool IsSplittingReference;
// string args
string CustomOutputStub;
string CustomRefPrefix;
+ string CustomTagPrefix;
string InputFilename;
string TagToSplit;
: HasInputFilename(false)
, HasCustomOutputStub(false)
, HasCustomRefPrefix(false)
+ , HasCustomTagPrefix(false)
, IsSplittingMapped(false)
, IsSplittingPaired(false)
, IsSplittingReference(false)
, IsSplittingTag(false)
, CustomOutputStub("")
, CustomRefPrefix("")
+ , CustomTagPrefix("")
, InputFilename(Options::StandardIn())
, TagToSplit("")
{ }
WriterMap outputFiles;
WriterMapIterator writerIter;
+ // determine tag prefix
+ string tagPrefix = SPLIT_TAG_TOKEN;
+ if ( m_settings->HasCustomTagPrefix )
+ tagPrefix = m_settings->CustomTagPrefix;
+
+ // make sure prefix starts with '.'
+ const size_t dotFound = tagPrefix.find('.');
+ if ( dotFound != 0 )
+ tagPrefix = string(".") + tagPrefix;
+
// local variables
const string tag = m_settings->TagToSplit;
BamWriter* writer;
if ( al.GetTag(tag, currentValue) ) {
// open new BamWriter, save first alignment
- outputFilenameStream << m_outputFilenameStub << ".TAG_" << tag << "_" << currentValue << ".bam";
+ outputFilenameStream << m_outputFilenameStub << tagPrefix << tag << "_" << currentValue << ".bam";
writer = new BamWriter;
if ( !writer->Open(outputFilenameStream.str(), m_header, m_references) ) {
cerr << "bamtools split ERROR: could not open " << outputFilenameStream.str()
if ( writerIter == outputFiles.end() ) {
// open new BamWriter
- outputFilenameStream << m_outputFilenameStub << ".TAG_" << tag << "_" << currentValue << ".bam";
+ outputFilenameStream << m_outputFilenameStub << tagPrefix << tag << "_" << currentValue << ".bam";
writer = new BamWriter;
if ( !writer->Open(outputFilenameStream.str(), m_header, m_references) ) {
cerr << "bamtool split ERROR: could not open " << outputFilenameStream.str()
Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn());
Options::AddValueOption("-refPrefix", "string", "custom prefix for splitting by references. Currently files end with REF_<refName>.bam. This option allows you to replace \"REF_\" with a prefix of your choosing.", "",
m_settings->HasCustomRefPrefix, m_settings->CustomRefPrefix, IO_Opts);
+ Options::AddValueOption("-tagPrefix", "string", "custom prefix for splitting by tags. Current files end with TAG_<tagname>_<tagvalue>.bam. This option allows you to replace \"TAG_\" with a prefix of your choosing.", "",
+ m_settings->HasCustomTagPrefix, m_settings->CustomTagPrefix, IO_Opts);
Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "",
m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);
// bamtools_cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Prints general alignment statistics for BAM file(s).
// ***************************************************************************
#include <cmath>
#include <algorithm>
+#include <fstream>
#include <functional>
#include <iostream>
#include <numeric>
// flags
bool HasInput;
+ bool HasInputFilelist;
bool IsShowingInsertSizeSummary;
// filenames
vector<string> InputFiles;
+ string InputFilelist;
// constructor
StatsSettings(void)
: HasInput(false)
+ , HasInputFilelist(false)
, IsShowingInsertSizeSummary(false)
{ }
};
bool StatsTool::StatsToolPrivate::CalculateMedian(vector<int>& data, double& median) {
// skip if data empty
- if ( data.empty() ) return false;
+ if ( data.empty() )
+ return false;
// find middle element
size_t middleIndex = data.size() / 2;
}
// check for explicit proper pair flag
- if ( al.IsProperPair() ) ++m_numProperPair;
+ if ( al.IsProperPair() )
+ ++m_numProperPair;
// store insert size for first mate
if ( m_settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) {
bool StatsTool::StatsToolPrivate::Run() {
// set to default input if none provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools stats ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open the BAM files
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools stats", "prints general alignment statistics", "[-in <filename> -in <filename> ...] [statsOptions]");
+ Options::SetProgramInfo("bamtools stats", "prints general alignment statistics", "[-in <filename> -in <filename> ... | -list <filelist>] [statsOptions]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
OptionGroup* AdditionalOpts = Options::CreateOptionGroup("Additional Stats");
Options::AddOption("-insert", "summarize insert size data", m_settings->IsShowingInsertSizeSummary, AdditionalOpts);
add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake)
# create BamTools utils library
-add_library( BamTools-utils SHARED
+add_library( BamTools-utils STATIC
bamtools_fasta.cpp
bamtools_options.cpp
bamtools_pileup_engine.cpp
# set BamTools library properties
set_target_properties( BamTools-utils PROPERTIES
- SOVERSION 2.2.0
OUTPUT_NAME bamtools-utils
+ PREFIX "lib"
)
// bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 10 October 2011
+// Last modified: 3 May 2013
// ---------------------------------------------------------------------------
// Provides a generic filter engine based on filter-sets of properties,
// with possible "rules" (compound logical expressions) to create more complex
std::stack<bool> resultStack;
FilterMap::const_iterator filterIter;
- FilterMap::const_iterator filterEnd = m_filters.end();
std::queue<std::string> ruleQueueCopy = m_ruleQueue;
while ( !ruleQueueCopy.empty() ) {
const std::string& token = ruleQueueCopy.front();
else {
// look up PropertyFilter that matches this token
filterIter = m_filters.find(token);
- BAMTOOLS_ASSERT_MESSAGE( (filterIter != filterEnd), "Filter mentioned in rule, not found in FilterEngine" );
+ BAMTOOLS_ASSERT_MESSAGE( (filterIter != m_filters.end() ), "Filter mentioned in rule, not found in FilterEngine" );
const PropertyFilter& filter = (*filterIter).second;
bool result = m_checker.check(filter, query);
resultStack.push( result );