# create main BamTools API shared library
add_library( BamTools SHARED ${BamToolsAPISources} )
-set_target_properties( BamTools PROPERTIES SOVERSION "2.0.5" )
-set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" )
+set_target_properties( BamTools PROPERTIES
+ SOVERSION "2.0.5"
+ OUTPUT_NAME "bamtools" )
- target_link_libraries( BamTools z )
- install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin" )
# create main BamTools API static library
add_library( BamTools-static STATIC ${BamToolsAPISources} )
- set_target_properties( BamTools-static PROPERTIES
- OUTPUT_NAME "bamtools"
-set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" )
-set_target_properties( BamTools-static PROPERTIES PREFIX "lib" )
++set_target_properties( BamTools-static PROPERTIES
++ OUTPUT_NAME "bamtools"
+ PREFIX "lib" )
- target_link_libraries( BamTools-static z )
- install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools" )
+
+ # link libraries with zlib automatically
+ if ( _WIN32 )
+ set( APILibs z ws2_32 )
+ else ( _WIN32 )
+ set( APILibs z )
+ endif ( _WIN32 )
+
+ target_link_libraries( BamTools ${APILibs} )
+ target_link_libraries( BamTools-static ${APILibs} )
+
+ # set library install destinations
+ install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
+ install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
# export API headers
- include( ../ExportHeader.cmake )
- set( ApiIncludeDir "api" )
- ExportHeader( APIHeaders api_global.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAlgorithms.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAlignment.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAux.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamConstants.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamIndex.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamMultiReader.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamReader.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamWriter.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders IBamIODevice.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamConstants.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamHeader.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamProgram.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamProgramChain.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamReadGroup.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamSequence.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamSequenceDictionary.h ${ApiIncludeDir} )
+ include(../ExportHeader.cmake)
+ set(ApiIncludeDir "api")
+ ExportHeader(APIHeaders api_global.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAlgorithms.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAlignment.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAux.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamConstants.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders IBamIODevice.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamProgram.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamProgramChain.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamSequence.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamSequenceDictionary.h ${ApiIncludeDir})
-set(AlgorithmsIncludeDir "api/algorithms")
-ExportHeader(AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir})
+set( AlgorithmsIncludeDir "api/algorithms" )
+ExportHeader( AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir} )
--- /dev/null
- assert(m_stream);
+ // ***************************************************************************
+ // BamReader_p.cpp (c) 2009 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
++<<<<<<< HEAD:src/api/internal/BamReader_p.cpp
++// Last modified: 14 November 2011 (DB)
++=======
+ // Last modified: 25 October 2011 (DB)
++>>>>>>> remoteio:src/api/internal/bam/BamReader_p.cpp
+ // ---------------------------------------------------------------------------
+ // Provides the basic functionality for reading BAM files
+ // ***************************************************************************
+
+ #include "api/BamConstants.h"
+ #include "api/BamReader.h"
+ #include "api/IBamIODevice.h"
+ #include "api/internal/bam/BamHeader_p.h"
+ #include "api/internal/bam/BamRandomAccessController_p.h"
+ #include "api/internal/bam/BamReader_p.h"
+ #include "api/internal/index/BamStandardIndex_p.h"
+ #include "api/internal/index/BamToolsIndex_p.h"
+ #include "api/internal/io/BamDeviceFactory_p.h"
+ #include "api/internal/utils/BamException_p.h"
+ using namespace BamTools;
+ using namespace BamTools::Internal;
+
+ #include <algorithm>
+ #include <cassert>
+ #include <iostream>
+ #include <iterator>
+ #include <vector>
+ using namespace std;
+
+ // constructor
+ BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+ : m_alignmentsBeginOffset(0)
+ , m_parent(parent)
+ {
+ m_isBigEndian = BamTools::SystemIsBigEndian();
+ }
+
+ // destructor
+ BamReaderPrivate::~BamReaderPrivate(void) {
+ Close();
+ }
+
+ // closes the BAM file
+ bool BamReaderPrivate::Close(void) {
+
+ // clear BAM metadata
+ m_references.clear();
+ m_header.Clear();
+
+ // clear filename
+ m_filename.clear();
+
+ // close random access controller
+ m_randomAccessController.Close();
+
+ // if stream is open, attempt close
+ if ( IsOpen() ) {
+ try {
+ m_stream.Close();
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error closing BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Close", message);
+ return false;
+ }
+ }
+
+ // return success
+ return true;
+ }
+
+ // creates an index file of requested type on current BAM file
+ bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+ return false;
+ }
+
+ // attempt to create index
+ if ( m_randomAccessController.CreateIndex(this, type) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not create index: \n\t") + bracError;
+ SetErrorString("BamReader::CreateIndex", message);
+ return false;
+ }
+ }
+
+ // return path & filename of current BAM file
+ const string BamReaderPrivate::Filename(void) const {
+ return m_filename;
+ }
+
+ string BamReaderPrivate::GetErrorString(void) const {
+ return m_errorString;
+ }
+
+ // return header data as std::string
+ string BamReaderPrivate::GetHeaderText(void) const {
+ return m_header.ToString();
+ }
+
+ // return header data as SamHeader object
+ SamHeader BamReaderPrivate::GetSamHeader(void) const {
+ return m_header.ToSamHeader();
+ }
+
+ // get next alignment (with character data fully parsed)
+ bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
+
+ // if valid alignment found
+ if ( GetNextAlignmentCore(alignment) ) {
+
+ // store alignment's "source" filename
+ alignment.Filename = m_filename;
+
+ // return success/failure of parsing char data
+ if ( alignment.BuildCharData() )
+ return true;
+ else {
+ const string alError = alignment.GetErrorString();
+ const string message = string("could not populate alignment data: \n\t") + alError;
+ SetErrorString("BamReader::GetNextAlignment", message);
+ return false;
+ }
+ }
+
+ // no valid alignment found
+ return false;
+ }
+
+ // retrieves next available alignment core data (returns success/fail)
+ // ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
+ // these can be accessed, if necessary, from the supportData
+ // useful for operations requiring ONLY positional or other alignment-related information
+ bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
+
+ // skip if stream not opened
+ if ( !m_stream.IsOpen() )
+ return false;
+
+ try {
+
+ // skip if region is set but has no alignments
+ if ( m_randomAccessController.HasRegion() &&
+ !m_randomAccessController.RegionHasAlignments() )
+ {
+ return false;
+ }
+
+ // if can't read next alignment
+ if ( !LoadNextAlignment(alignment) )
+ return false;
+
+ // check alignment's region-overlap state
+ BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
+
+ // if alignment starts after region, no need to keep reading
+ if ( state == BamRandomAccessController::AfterRegion )
+ return false;
+
+ // read until overlap is found
+ while ( state != BamRandomAccessController::OverlapsRegion ) {
+
+ // if can't read next alignment
+ if ( !LoadNextAlignment(alignment) )
+ return false;
+
+ // check alignment's region-overlap state
+ state = m_randomAccessController.AlignmentState(alignment);
+
+ // if alignment starts after region, no need to keep reading
+ if ( state == BamRandomAccessController::AfterRegion )
+ return false;
+ }
+
+ // if we get here, we found the next 'valid' alignment
+ // (e.g. overlaps current region if one was set, simply the next alignment if not)
+ alignment.SupportData.HasCoreOnly = true;
+ return true;
+
+ } catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+ SetErrorString("BamReader::GetNextAlignmentCore", message);
+ return false;
+ }
+ }
+
+ int BamReaderPrivate::GetReferenceCount(void) const {
+ return m_references.size();
+ }
+
+ const RefVector& BamReaderPrivate::GetReferenceData(void) const {
+ return m_references;
+ }
+
+ // returns RefID for given RefName (returns References.size() if not found)
+ int BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+ // retrieve names from reference data
+ vector<string> refNames;
+ RefVector::const_iterator refIter = m_references.begin();
+ RefVector::const_iterator refEnd = m_references.end();
+ for ( ; refIter != refEnd; ++refIter)
+ refNames.push_back( (*refIter).RefName );
+
+ // return 'index-of' refName (or -1 if not found)
+ int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+ if ( index == (int)m_references.size() ) return -1;
+ else return index;
+ }
+
+ bool BamReaderPrivate::HasIndex(void) const {
+ return m_randomAccessController.HasIndex();
+ }
+
+ bool BamReaderPrivate::IsOpen(void) const {
+ return m_stream.IsOpen();
+ }
+
+ // load BAM header data
+ void BamReaderPrivate::LoadHeaderData(void) {
+ m_header.Load(&m_stream);
+ }
+
+ // populates BamAlignment with alignment data under file pointer, returns success/fail
+ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
+
+ // read in the 'block length' value, make sure it's not zero
+ char buffer[sizeof(uint32_t)];
+ m_stream.Read(buffer, sizeof(uint32_t));
+ alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
+ if ( alignment.SupportData.BlockLength == 0 )
+ return false;
+
+ // read in core alignment data, make sure the right size of data was read
+ char x[Constants::BAM_CORE_SIZE];
+ if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
+ return false;
+
+ // swap core endian-ness if necessary
+ if ( m_isBigEndian ) {
+ for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+ BamTools::SwapEndian_32p(&x[i]);
+ }
+
+ // set BamAlignment 'core' and 'support' data
+ alignment.RefID = BamTools::UnpackSignedInt(&x[0]);
+ alignment.Position = BamTools::UnpackSignedInt(&x[4]);
+
+ unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
+ alignment.Bin = tempValue >> 16;
+ alignment.MapQuality = tempValue >> 8 & 0xff;
+ alignment.SupportData.QueryNameLength = tempValue & 0xff;
+
+ tempValue = BamTools::UnpackUnsignedInt(&x[12]);
+ alignment.AlignmentFlag = tempValue >> 16;
+ alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+
+ alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
+ alignment.MateRefID = BamTools::UnpackSignedInt(&x[20]);
+ alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
+ alignment.InsertSize = BamTools::UnpackSignedInt(&x[28]);
+
+ // set BamAlignment length
+ alignment.Length = alignment.SupportData.QuerySequenceLength;
+
+ // read in character data - make sure proper data size was read
+ bool readCharDataOK = false;
+ const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+ RaiiBuffer allCharData(dataLength);
+
+ if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
+
+ // store 'allCharData' in supportData structure
+ alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
+
+ // set success flag
+ readCharDataOK = true;
+
+ // save CIGAR ops
+ // need to calculate this here so that BamAlignment::GetEndPosition() performs correctly,
+ // even when GetNextAlignmentCore() is called
+ const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
+ uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
+ CigarOp op;
+ alignment.CigarData.clear();
+ alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
+ for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
+
+ // swap endian-ness if necessary
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
+
+ // build CigarOp structure
+ op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
+ op.Type = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
+
+ // save CigarOp
+ alignment.CigarData.push_back(op);
+ }
+ }
+
+ // return success/failure
+ return readCharDataOK;
+ }
+
+ // loads reference data from BAM file
+ bool BamReaderPrivate::LoadReferenceData(void) {
+
+ // get number of reference sequences
+ char buffer[sizeof(uint32_t)];
+ m_stream.Read(buffer, sizeof(uint32_t));
+ uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
+ m_references.reserve((int)numberRefSeqs);
+
+ // iterate over all references in header
+ for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
+
+ // get length of reference name
+ m_stream.Read(buffer, sizeof(uint32_t));
+ uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
+ RaiiBuffer refName(refNameLength);
+
+ // get reference name and reference sequence length
+ m_stream.Read(refName.Buffer, refNameLength);
+ m_stream.Read(buffer, sizeof(int32_t));
+ int32_t refLength = BamTools::UnpackSignedInt(buffer);
+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
+
+ // store data for reference
+ RefData aReference;
+ aReference.RefName = (string)((const char*)refName.Buffer);
+ aReference.RefLength = refLength;
+ m_references.push_back(aReference);
+ }
+
+ // return success
+ return true;
+ }
+
+ bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
+
+ if ( m_randomAccessController.LocateIndex(this, preferredType) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not locate index: \n\t") + bracError;
+ SetErrorString("BamReader::LocateIndex", message);
+ return false;
+ }
+ }
+
+ // opens BAM file (and index)
+ bool BamReaderPrivate::Open(const string& filename) {
+
+ try {
+
+ // make sure we're starting with fresh state
+ Close();
+
+ // open BgzfStream
+ m_stream.Open(filename, IBamIODevice::ReadOnly);
+
+ // load BAM metadata
+ LoadHeaderData();
+ LoadReferenceData();
+
+ // store filename & offset of first alignment
+ m_filename = filename;
+ m_alignmentsBeginOffset = m_stream.Tell();
+
+ // return success
+ return true;
+
+ } catch ( BamException& e ) {
+ const string error = e.what();
+ const string message = string("could not open file: ") + filename +
+ "\n\t" + error;
+ SetErrorString("BamReader::Open", message);
+ return false;
+ }
+ }
+
+ bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
+
+ if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not open index: \n\t") + bracError;
+ SetErrorString("BamReader::OpenIndex", message);
+ return false;
+ }
+ }
+
+ // returns BAM file pointer to beginning of alignment data
+ bool BamReaderPrivate::Rewind(void) {
+
+ // reset region
+ m_randomAccessController.ClearRegion();
+
+ // return status of seeking back to first alignment
+ if ( Seek(m_alignmentsBeginOffset) )
+ return true;
+ else {
+ const string currentError = m_errorString;
+ const string message = string("could not rewind: \n\t") + currentError;
+ SetErrorString("BamReader::Rewind", message);
+ return false;
+ }
+ }
+
+ bool BamReaderPrivate::Seek(const int64_t& position) {
+
+ // skip if BAM file not open
+ if ( !IsOpen() ) {
+ SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+ return false;
+ }
+
+ try {
+ m_stream.Seek(position);
+ return true;
+ }
+ catch ( BamException& e ) {
+ const string streamError = e.what();
+ const string message = string("could not seek in BAM file: \n\t") + streamError;
+ SetErrorString("BamReader::Seek", message);
+ return false;
+ }
+ }
+
+ void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+ static const string SEPARATOR = ": ";
+ m_errorString = where + SEPARATOR + what;
+ }
+
+ void BamReaderPrivate::SetIndex(BamIndex* index) {
+ m_randomAccessController.SetIndex(index);
+ }
+
+ // sets current region & attempts to jump to it
+ // returns success/failure
+ bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+
+ if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+ return true;
+ else {
+ const string bracError = m_randomAccessController.GetErrorString();
+ const string message = string("could not set region: \n\t") + bracError;
+ SetErrorString("BamReader::SetRegion", message);
+ return false;
+ }
+ }
+
+ int64_t BamReaderPrivate::Tell(void) const {
+ return m_stream.Tell();
+ }
--- /dev/null
+ // ***************************************************************************
+ // BamStandardIndex.h (c) 2010 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
+ // Last modified: 10 November 2011 (DB)
+ // ---------------------------------------------------------------------------
+ // Provides index operations for the standardized BAM index format (".bai")
+ // ***************************************************************************
+
+ #ifndef BAM_STANDARD_INDEX_FORMAT_H
+ #define BAM_STANDARD_INDEX_FORMAT_H
+
+ // -------------
+ // W A R N I N G
+ // -------------
+ //
+ // This file is not part of the BamTools API. It exists purely as an
+ // implementation detail. This header file may change from version to
+ // version without notice, or even be removed.
+ //
+ // We mean it.
+
+ #include "api/BamAux.h"
+ #include "api/BamIndex.h"
+ #include "api/IBamIODevice.h"
+ #include <map>
+ #include <set>
+ #include <string>
+ #include <vector>
+
+ namespace BamTools {
+ namespace Internal {
+
+ // -----------------------------------------------------------------------------
+ // BamStandardIndex data structures
+
+ // defines start and end of a contiguous run of alignments
+ struct BaiAlignmentChunk {
+
+ // data members
+ uint64_t Start;
+ uint64_t Stop;
+
+ // constructor
+ BaiAlignmentChunk(const uint64_t& start = 0,
+ const uint64_t& stop = 0)
+ : Start(start)
+ , Stop(stop)
+ { }
+ };
+
+ // comparison operator (for sorting)
+ inline
+ bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+ return lhs.Start < rhs.Start;
+ }
+
+ // convenience typedef for a list of all alignment 'chunks' in a BAI bin
+ typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+
+ // convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+ typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+
+ // convenience typedef for a list of all 'linear offsets' in a reference
+ typedef std::vector<uint64_t> BaiLinearOffsetVector;
+
+ // contains all fields necessary for building, loading, & writing
+ // full BAI index data for a single reference
+ struct BaiReferenceEntry {
+
+ // data members
+ int32_t ID;
+ BaiBinMap Bins;
+ BaiLinearOffsetVector LinearOffsets;
+
+ // ctor
+ BaiReferenceEntry(const int32_t& id = -1)
+ : ID(id)
+ { }
+ };
+
+ // provides (persistent) summary of BaiReferenceEntry's index data
+ struct BaiReferenceSummary {
+
+ // data members
+ int NumBins;
+ int NumLinearOffsets;
+ uint64_t FirstBinFilePosition;
+ uint64_t FirstLinearOffsetFilePosition;
+
+ // ctor
+ BaiReferenceSummary(void)
+ : NumBins(0)
+ , NumLinearOffsets(0)
+ , FirstBinFilePosition(0)
+ , FirstLinearOffsetFilePosition(0)
+ { }
+ };
+
+ // convenience typedef for describing a full BAI index file summary
+ typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+
+ // end BamStandardIndex data structures
+ // -----------------------------------------------------------------------------
+
+ class BamStandardIndex : public BamIndex {
+
+ // ctor & dtor
+ public:
+ BamStandardIndex(Internal::BamReaderPrivate* reader);
+ ~BamStandardIndex(void);
+
+ // BamIndex implementation
+ public:
+ // builds index from associated BAM file & writes out to index file
+ bool Create(void);
+ // returns whether reference has alignments or no
+ bool HasAlignments(const int& referenceID) const;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+ // loads existing data from file into memory
+ bool Load(const std::string& filename);
++ BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
+ public:
+ // returns format's file extension
+ static const std::string Extension(void);
+
+ // internal methods
+ private:
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CloseFile(void);
+ bool IsDeviceOpen(void) const;
+ void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+ void Seek(const int64_t& position, const int origin);
+ int64_t Tell(void) const;
+
+ // BAI index building methods
+ void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+ void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+ const uint32_t& currentBin,
+ const uint64_t& currentOffset,
+ const uint64_t& lastOffset);
+ void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+ const int& alignmentStartPosition,
+ const int& alignmentStopPosition,
+ const uint64_t& lastOffset);
+
+ // random-access methods
+ void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+ void CalculateCandidateBins(const uint32_t& begin,
+ const uint32_t& end,
+ std::set<uint16_t>& candidateBins);
+ void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+ const uint64_t& minOffset,
+ std::set<uint16_t>& candidateBins,
+ std::vector<int64_t>& offsets);
+ uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+
+ // BAI summary (create/load) methods
+ void ReserveForSummary(const int& numReferences);
+ void SaveBinsSummary(const int& refId, const int& numBins);
+ void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+ void SkipBins(const int& numBins);
+ void SkipLinearOffsets(const int& numLinearOffsets);
+ void SummarizeBins(BaiReferenceSummary& refSummary);
+ void SummarizeIndexFile(void);
+ void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+ void SummarizeReference(BaiReferenceSummary& refSummary);
+
+ // BAI full index input methods
+ void ReadBinID(uint32_t& binId);
+ void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+ void ReadIntoBuffer(const unsigned int& bytesRequested);
+ void ReadLinearOffset(uint64_t& linearOffset);
+ void ReadNumAlignmentChunks(int& numAlignmentChunks);
+ void ReadNumBins(int& numBins);
+ void ReadNumLinearOffsets(int& numLinearOffsets);
+ void ReadNumReferences(int& numReferences);
+
+ // BAI full index output methods
+ void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+ void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+ void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+ void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+ void WriteBins(const int& refId, BaiBinMap& bins);
+ void WriteHeader(void);
+ void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+ void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+
+ // data members
+ private:
+ bool m_isBigEndian;
+ BaiFileSummary m_indexFileSummary;
+
+ // our input buffer
+ unsigned int m_bufferLength;
+ struct RaiiWrapper {
+ IBamIODevice* Device;
+ char* Buffer;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper m_resources;
+
+ // static methods
+ private:
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // checks if the buffer is large enough to accomodate the requested size
+ static void CheckBufferSize(unsigned char*& buffer,
+ unsigned int& bufferLength,
+ const unsigned int& requestedBytes);
+ // static constants
+ private:
+ static const int MAX_BIN;
+ static const int BAM_LIDX_SHIFT;
+ static const std::string BAI_EXTENSION;
+ static const char* const BAI_MAGIC;
+ static const int SIZEOF_ALIGNMENTCHUNK;
+ static const int SIZEOF_BINCORE;
+ static const int SIZEOF_LINEAROFFSET;
+ };
+
+ } // namespace Internal
+ } // namespace BamTools
+
+ #endif // BAM_STANDARD_INDEX_FORMAT_H
--- /dev/null
+ // ***************************************************************************
+ // BamToolsIndex.h (c) 2010 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
+ // Last modified: 10 November 2011 (DB)
+ // ---------------------------------------------------------------------------
+ // Provides index operations for the BamTools index format (".bti")
+ // ***************************************************************************
+
+ #ifndef BAMTOOLS_INDEX_FORMAT_H
+ #define BAMTOOLS_INDEX_FORMAT_H
+
+ // -------------
+ // W A R N I N G
+ // -------------
+ //
+ // This file is not part of the BamTools API. It exists purely as an
+ // implementation detail. This header file may change from version to
+ // version without notice, or even be removed.
+ //
+ // We mean it.
+
+ #include "api/BamAux.h"
+ #include "api/BamIndex.h"
+ #include "api/IBamIODevice.h"
+ #include <map>
+ #include <string>
+ #include <vector>
+
+ namespace BamTools {
+ namespace Internal {
+
+ // contains data for each 'block' in a BTI index
+ struct BtiBlock {
+
+ // data members
+ int32_t MaxEndPosition;
+ int64_t StartOffset;
+ int32_t StartPosition;
+
+ // ctor
+ BtiBlock(const int32_t& maxEndPosition = 0,
+ const int64_t& startOffset = 0,
+ const int32_t& startPosition = 0)
+ : MaxEndPosition(maxEndPosition)
+ , StartOffset(startOffset)
+ , StartPosition(startPosition)
+ { }
+ };
+
+ // convenience typedef for describing a a list of BTI blocks on a reference
+ typedef std::vector<BtiBlock> BtiBlockVector;
+
+ // contains all fields necessary for building, loading, & writing
+ // full BTI index data for a single reference
+ struct BtiReferenceEntry {
+
+ // data members
+ int32_t ID;
+ BtiBlockVector Blocks;
+
+ // ctor
+ BtiReferenceEntry(const int& id = -1)
+ : ID(id)
+ { }
+ };
+
+ // provides (persistent) summary of BtiReferenceEntry's index data
+ struct BtiReferenceSummary {
+
+ // data members
+ int NumBlocks;
+ uint64_t FirstBlockFilePosition;
+
+ // ctor
+ BtiReferenceSummary(void)
+ : NumBlocks(0)
+ , FirstBlockFilePosition(0)
+ { }
+ };
+
+ // convenience typedef for describing a full BTI index file summary
+ typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+
+ class BamToolsIndex : public BamIndex {
+
+ // keep a list of any supported versions here
+ // (might be useful later to handle any 'legacy' versions if the format changes)
+ // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+ //
+ // so a change introduced in BTI_1_2 may be handled from then on by:
+ //
+ // if ( indexVersion >= BTI_1_2 )
+ // do something new
+ // else
+ // do the old thing
+ enum Version { BTI_1_0 = 1
+ , BTI_1_1
+ , BTI_1_2
+ , BTI_2_0
+ };
+
+ // ctor & dtor
+ public:
+ BamToolsIndex(Internal::BamReaderPrivate* reader);
+ ~BamToolsIndex(void);
+
+ // BamIndex implementation
+ public:
+ // builds index from associated BAM file & writes out to index file
+ bool Create(void);
+ // returns whether reference has alignments or no
+ bool HasAlignments(const int& referenceID) const;
+ // attempts to use index data to jump to @region, returns success/fail
+ // a "successful" jump indicates no error, but not whether this region has data
+ // * thus, the method sets a flag to indicate whether there are alignments
+ // available after the jump position
+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+ // loads existing data from file into memory
+ bool Load(const std::string& filename);
++ BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
+ public:
+ // returns format's file extension
+ static const std::string Extension(void);
+
+ // internal methods
+ private:
+
+ // index file ops
+ void CheckMagicNumber(void);
+ void CheckVersion(void);
+ void CloseFile(void);
+ bool IsDeviceOpen(void) const;
+ void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+ void Seek(const int64_t& position, const int origin);
+ int64_t Tell(void) const;
+
+ // index-creation methods
+ void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+ void WriteBlock(const BtiBlock& block);
+ void WriteBlocks(const BtiBlockVector& blocks);
+ void WriteHeader(void);
+ void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+ // random-access methods
+ void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+ void ReadBlock(BtiBlock& block);
+ void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+ void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+ // BTI summary data methods
+ void InitializeFileSummary(const int& numReferences);
+ void LoadFileSummary(void);
+ void LoadHeader(void);
+ void LoadNumBlocks(int& numBlocks);
+ void LoadNumReferences(int& numReferences);
+ void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+ void SkipBlocks(const int& numBlocks);
+
+ // data members
+ private:
+ bool m_isBigEndian;
+ BtiFileSummary m_indexFileSummary;
+ uint32_t m_blockSize;
+ int32_t m_inputVersion; // Version is serialized as int
+ Version m_outputVersion;
+
+ struct RaiiWrapper {
+ IBamIODevice* Device;
+ RaiiWrapper(void);
+ ~RaiiWrapper(void);
+ };
+ RaiiWrapper m_resources;
+
+ // static constants
+ private:
+ static const uint32_t DEFAULT_BLOCK_LENGTH;
+ static const std::string BTI_EXTENSION;
+ static const char* const BTI_MAGIC;
+ static const int SIZEOF_BLOCK;
+ };
+
+ } // namespace Internal
+ } // namespace BamTools
+
+ #endif // BAMTOOLS_INDEX_FORMAT_H