]> git.donarmstrong.com Git - bamtools.git/commitdiff
merge with remoteio branch
authorderek <derekwbarnett@gmail.com>
Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
committerderek <derekwbarnett@gmail.com>
Mon, 28 Nov 2011 23:55:31 +0000 (18:55 -0500)
1  2 
src/api/CMakeLists.txt
src/api/internal/bam/BamReader_p.cpp
src/api/internal/index/BamStandardIndex_p.h
src/api/internal/index/BamToolsIndex_p.h

index c8504edba46e3023c35a1f51ac9d66bf7e109e4d,45005645098e133fb1daca630e49920b8dc76f6e..539feca0317a4e801ce14fa34626c0a98849e654
@@@ -49,41 -33,49 +33,51 @@@ set( BamToolsAPISource
  
  # create main BamTools API shared library
  add_library( BamTools SHARED ${BamToolsAPISources} )
 -set_target_properties( BamTools PROPERTIES SOVERSION "2.0.5" )
 -set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" )
 +set_target_properties( BamTools PROPERTIES
 +                       SOVERSION "2.0.5"
 +                       OUTPUT_NAME "bamtools" )
- target_link_libraries( BamTools z )
- install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin" )
  
  # create main BamTools API static library
  add_library( BamTools-static STATIC ${BamToolsAPISources} )
- set_target_properties( BamTools-static PROPERTIES
-                        OUTPUT_NAME "bamtools"
 -set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" )
 -set_target_properties( BamTools-static PROPERTIES PREFIX "lib" )
++set_target_properties( BamTools-static PROPERTIES 
++                       OUTPUT_NAME "bamtools" 
 +                       PREFIX "lib" )
- target_link_libraries( BamTools-static z )
- install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools" )
+ # link libraries with zlib automatically
+ if ( _WIN32 )
+     set( APILibs z ws2_32 )
+ else ( _WIN32 )
+     set( APILibs z )
+ endif ( _WIN32 )
+ target_link_libraries( BamTools ${APILibs} )
+ target_link_libraries( BamTools-static ${APILibs} )
+ # set library install destinations
+ install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools" RUNTIME DESTINATION "bin")
+ install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
  
  # export API headers
- include( ../ExportHeader.cmake )
- set( ApiIncludeDir "api" )
- ExportHeader( APIHeaders api_global.h             ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAlgorithms.h          ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAlignment.h           ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamAux.h                 ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamConstants.h           ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamIndex.h               ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamMultiReader.h         ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamReader.h              ${ApiIncludeDir} )
- ExportHeader( APIHeaders BamWriter.h              ${ApiIncludeDir} )
- ExportHeader( APIHeaders IBamIODevice.h           ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamConstants.h           ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamHeader.h              ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamProgram.h             ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamProgramChain.h        ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamReadGroup.h           ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamSequence.h            ${ApiIncludeDir} )
- ExportHeader( APIHeaders SamSequenceDictionary.h  ${ApiIncludeDir} )
+ include(../ExportHeader.cmake)
+ set(ApiIncludeDir "api")
+ ExportHeader(APIHeaders api_global.h             ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAlgorithms.h          ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAlignment.h           ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamAux.h                 ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamConstants.h           ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamIndex.h               ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamMultiReader.h         ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamReader.h              ${ApiIncludeDir})
+ ExportHeader(APIHeaders BamWriter.h              ${ApiIncludeDir})
+ ExportHeader(APIHeaders IBamIODevice.h           ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamConstants.h           ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamHeader.h              ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamProgram.h             ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamProgramChain.h        ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamReadGroup.h           ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamSequence.h            ${ApiIncludeDir})
+ ExportHeader(APIHeaders SamSequenceDictionary.h  ${ApiIncludeDir})
  
 -set(AlgorithmsIncludeDir "api/algorithms")
 -ExportHeader(AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir})
 +set( AlgorithmsIncludeDir "api/algorithms" )
 +ExportHeader( AlgorithmsHeaders algorithms/Sort.h ${AlgorithmsIncludeDir} )
index 0000000000000000000000000000000000000000,6484a10b2c0f07aed19e31a74b1d5f9ed114c51a..6904da7a6f3f53c917ba6d274521c2aaf6d7208c
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,466 +1,469 @@@
 -        assert(m_stream);
+ // ***************************************************************************
+ // BamReader_p.cpp (c) 2009 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
++<<<<<<< HEAD:src/api/internal/BamReader_p.cpp
++// Last modified: 14 November 2011 (DB)
++=======
+ // Last modified: 25 October 2011 (DB)
++>>>>>>> remoteio:src/api/internal/bam/BamReader_p.cpp
+ // ---------------------------------------------------------------------------
+ // Provides the basic functionality for reading BAM files
+ // ***************************************************************************
+ #include "api/BamConstants.h"
+ #include "api/BamReader.h"
+ #include "api/IBamIODevice.h"
+ #include "api/internal/bam/BamHeader_p.h"
+ #include "api/internal/bam/BamRandomAccessController_p.h"
+ #include "api/internal/bam/BamReader_p.h"
+ #include "api/internal/index/BamStandardIndex_p.h"
+ #include "api/internal/index/BamToolsIndex_p.h"
+ #include "api/internal/io/BamDeviceFactory_p.h"
+ #include "api/internal/utils/BamException_p.h"
+ using namespace BamTools;
+ using namespace BamTools::Internal;
+ #include <algorithm>
+ #include <cassert>
+ #include <iostream>
+ #include <iterator>
+ #include <vector>
+ using namespace std;
+ // constructor
+ BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+     : m_alignmentsBeginOffset(0)
+     , m_parent(parent)
+ {
+     m_isBigEndian = BamTools::SystemIsBigEndian();
+ }
+ // destructor
+ BamReaderPrivate::~BamReaderPrivate(void) {
+     Close();
+ }
+ // closes the BAM file
+ bool BamReaderPrivate::Close(void) {
+     // clear BAM metadata
+     m_references.clear();
+     m_header.Clear();
+     // clear filename
+     m_filename.clear();
+     // close random access controller
+     m_randomAccessController.Close();
+     // if stream is open, attempt close
+     if ( IsOpen() ) {
+         try {
+             m_stream.Close();
+         } catch ( BamException& e ) {
+             const string streamError = e.what();
+             const string message = string("encountered error closing BAM file: \n\t") + streamError;
+             SetErrorString("BamReader::Close", message);
+             return false;
+         }
+     }
+     // return success
+     return true;
+ }
+ // creates an index file of requested type on current BAM file
+ bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {
+     // skip if BAM file not open
+     if ( !IsOpen() ) {
+         SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file");
+         return false;
+     }
+     // attempt to create index
+     if ( m_randomAccessController.CreateIndex(this, type) )
+         return true;
+     else {
+         const string bracError = m_randomAccessController.GetErrorString();
+         const string message = string("could not create index: \n\t") + bracError;
+         SetErrorString("BamReader::CreateIndex", message);
+         return false;
+     }
+ }
+ // return path & filename of current BAM file
+ const string BamReaderPrivate::Filename(void) const {
+     return m_filename;
+ }
+ string BamReaderPrivate::GetErrorString(void) const {
+     return m_errorString;
+ }
+ // return header data as std::string
+ string BamReaderPrivate::GetHeaderText(void) const {
+     return m_header.ToString();
+ }
+ // return header data as SamHeader object
+ SamHeader BamReaderPrivate::GetSamHeader(void) const {
+     return m_header.ToSamHeader();
+ }
+ // get next alignment (with character data fully parsed)
+ bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {
+     // if valid alignment found
+     if ( GetNextAlignmentCore(alignment) ) {
+         // store alignment's "source" filename
+         alignment.Filename = m_filename;
+         // return success/failure of parsing char data
+         if ( alignment.BuildCharData() )
+             return true;
+         else {
+             const string alError = alignment.GetErrorString();
+             const string message = string("could not populate alignment data: \n\t") + alError;
+             SetErrorString("BamReader::GetNextAlignment", message);
+             return false;
+         }
+     }
+     // no valid alignment found
+     return false;
+ }
+ // retrieves next available alignment core data (returns success/fail)
+ // ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)
+ //    these can be accessed, if necessary, from the supportData
+ // useful for operations requiring ONLY positional or other alignment-related information
+ bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {
+     // skip if stream not opened
+     if ( !m_stream.IsOpen() )
+         return false;
+     try {
+         // skip if region is set but has no alignments
+         if ( m_randomAccessController.HasRegion() &&
+              !m_randomAccessController.RegionHasAlignments() )
+         {
+             return false;
+         }
+         // if can't read next alignment
+         if ( !LoadNextAlignment(alignment) )
+             return false;
+         // check alignment's region-overlap state
+         BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);
+         // if alignment starts after region, no need to keep reading
+         if ( state == BamRandomAccessController::AfterRegion )
+             return false;
+         // read until overlap is found
+         while ( state != BamRandomAccessController::OverlapsRegion ) {
+             // if can't read next alignment
+             if ( !LoadNextAlignment(alignment) )
+                 return false;
+             // check alignment's region-overlap state
+             state = m_randomAccessController.AlignmentState(alignment);
+             // if alignment starts after region, no need to keep reading
+             if ( state == BamRandomAccessController::AfterRegion )
+                 return false;
+         }
+         // if we get here, we found the next 'valid' alignment
+         // (e.g. overlaps current region if one was set, simply the next alignment if not)
+         alignment.SupportData.HasCoreOnly = true;
+         return true;
+     } catch ( BamException& e ) {
+         const string streamError = e.what();
+         const string message = string("encountered error reading BAM alignment: \n\t") + streamError;
+         SetErrorString("BamReader::GetNextAlignmentCore", message);
+         return false;
+     }
+ }
+ int BamReaderPrivate::GetReferenceCount(void) const {
+     return m_references.size();
+ }
+ const RefVector& BamReaderPrivate::GetReferenceData(void) const {
+     return m_references;
+ }
+ // returns RefID for given RefName (returns References.size() if not found)
+ int BamReaderPrivate::GetReferenceID(const string& refName) const {
+     // retrieve names from reference data
+     vector<string> refNames;
+     RefVector::const_iterator refIter = m_references.begin();
+     RefVector::const_iterator refEnd  = m_references.end();
+     for ( ; refIter != refEnd; ++refIter)
+         refNames.push_back( (*refIter).RefName );
+     // return 'index-of' refName (or -1 if not found)
+     int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+     if ( index == (int)m_references.size() ) return -1;
+     else return index;
+ }
+ bool BamReaderPrivate::HasIndex(void) const {
+     return m_randomAccessController.HasIndex();
+ }
+ bool BamReaderPrivate::IsOpen(void) const {
+     return m_stream.IsOpen();
+ }
+ // load BAM header data
+ void BamReaderPrivate::LoadHeaderData(void) {
+     m_header.Load(&m_stream);
+ }
+ // populates BamAlignment with alignment data under file pointer, returns success/fail
+ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) {
+     // read in the 'block length' value, make sure it's not zero
+     char buffer[sizeof(uint32_t)];
+     m_stream.Read(buffer, sizeof(uint32_t));
+     alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer);
+     if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength);
+     if ( alignment.SupportData.BlockLength == 0 )
+         return false;
+     // read in core alignment data, make sure the right size of data was read
+     char x[Constants::BAM_CORE_SIZE];
+     if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE )
+         return false;
+     // swap core endian-ness if necessary
+     if ( m_isBigEndian ) {
+         for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+             BamTools::SwapEndian_32p(&x[i]);
+     }
+     // set BamAlignment 'core' and 'support' data
+     alignment.RefID    = BamTools::UnpackSignedInt(&x[0]);
+     alignment.Position = BamTools::UnpackSignedInt(&x[4]);
+     unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]);
+     alignment.Bin        = tempValue >> 16;
+     alignment.MapQuality = tempValue >> 8 & 0xff;
+     alignment.SupportData.QueryNameLength = tempValue & 0xff;
+     tempValue = BamTools::UnpackUnsignedInt(&x[12]);
+     alignment.AlignmentFlag = tempValue >> 16;
+     alignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+     alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]);
+     alignment.MateRefID    = BamTools::UnpackSignedInt(&x[20]);
+     alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]);
+     alignment.InsertSize   = BamTools::UnpackSignedInt(&x[28]);
+     // set BamAlignment length
+     alignment.Length = alignment.SupportData.QuerySequenceLength;
+     // read in character data - make sure proper data size was read
+     bool readCharDataOK = false;
+     const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE;
+     RaiiBuffer allCharData(dataLength);
+     if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) {
+         // store 'allCharData' in supportData structure
+         alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength);
+         // set success flag
+         readCharDataOK = true;
+         // save CIGAR ops
+         // need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
+         // even when GetNextAlignmentCore() is called
+         const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength;
+         uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset);
+         CigarOp op;
+         alignment.CigarData.clear();
+         alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations);
+         for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) {
+             // swap endian-ness if necessary
+             if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]);
+             // build CigarOp structure
+             op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT);
+             op.Type   = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ];
+             // save CigarOp
+             alignment.CigarData.push_back(op);
+         }
+     }
+     // return success/failure
+     return readCharDataOK;
+ }
+ // loads reference data from BAM file
+ bool BamReaderPrivate::LoadReferenceData(void) {
+     // get number of reference sequences
+     char buffer[sizeof(uint32_t)];
+     m_stream.Read(buffer, sizeof(uint32_t));
+     uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);
+     if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);
+     m_references.reserve((int)numberRefSeqs);
+     // iterate over all references in header
+     for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {
+         // get length of reference name
+         m_stream.Read(buffer, sizeof(uint32_t));
+         uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);
+         if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);
+         RaiiBuffer refName(refNameLength);
+         // get reference name and reference sequence length
+         m_stream.Read(refName.Buffer, refNameLength);
+         m_stream.Read(buffer, sizeof(int32_t));
+         int32_t refLength = BamTools::UnpackSignedInt(buffer);
+         if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);
+         // store data for reference
+         RefData aReference;
+         aReference.RefName   = (string)((const char*)refName.Buffer);
+         aReference.RefLength = refLength;
+         m_references.push_back(aReference);
+     }
+     // return success
+     return true;
+ }
+ bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {
+     if ( m_randomAccessController.LocateIndex(this, preferredType) )
+         return true;
+     else {
+         const string bracError = m_randomAccessController.GetErrorString();
+         const string message = string("could not locate index: \n\t") + bracError;
+         SetErrorString("BamReader::LocateIndex", message);
+         return false;
+     }
+ }
+ // opens BAM file (and index)
+ bool BamReaderPrivate::Open(const string& filename) {
+     try {
+         // make sure we're starting with fresh state
+         Close();
+         // open BgzfStream
+         m_stream.Open(filename, IBamIODevice::ReadOnly);
+         // load BAM metadata
+         LoadHeaderData();
+         LoadReferenceData();
+         // store filename & offset of first alignment
+         m_filename = filename;
+         m_alignmentsBeginOffset = m_stream.Tell();
+         // return success
+         return true;
+     } catch ( BamException& e ) {
+         const string error = e.what();
+         const string message = string("could not open file: ") + filename +
+                                "\n\t" + error;
+         SetErrorString("BamReader::Open", message);
+         return false;
+     }
+ }
+ bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {
+     if ( m_randomAccessController.OpenIndex(indexFilename, this) )
+         return true;
+     else {
+         const string bracError = m_randomAccessController.GetErrorString();
+         const string message = string("could not open index: \n\t") + bracError;
+         SetErrorString("BamReader::OpenIndex", message);
+         return false;
+     }
+ }
+ // returns BAM file pointer to beginning of alignment data
+ bool BamReaderPrivate::Rewind(void) {
+     // reset region
+     m_randomAccessController.ClearRegion();
+     // return status of seeking back to first alignment
+     if ( Seek(m_alignmentsBeginOffset) )
+         return true;
+     else {
+         const string currentError = m_errorString;
+         const string message = string("could not rewind: \n\t") + currentError;
+         SetErrorString("BamReader::Rewind", message);
+         return false;
+     }
+ }
+ bool BamReaderPrivate::Seek(const int64_t& position) {
+     // skip if BAM file not open
+     if ( !IsOpen() ) {
+         SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file");
+         return false;
+     }
+     try {
+         m_stream.Seek(position);
+         return true;
+     }
+     catch ( BamException& e ) {
+         const string streamError = e.what();
+         const string message = string("could not seek in BAM file: \n\t") + streamError;
+         SetErrorString("BamReader::Seek", message);
+         return false;
+     }
+ }
+ void BamReaderPrivate::SetErrorString(const string& where, const string& what) {
+     static const string SEPARATOR = ": ";
+     m_errorString = where + SEPARATOR + what;
+ }
+ void BamReaderPrivate::SetIndex(BamIndex* index) {
+     m_randomAccessController.SetIndex(index);
+ }
+ // sets current region & attempts to jump to it
+ // returns success/failure
+ bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+     if ( m_randomAccessController.SetRegion(region, m_references.size()) )
+         return true;
+     else {
+         const string bracError = m_randomAccessController.GetErrorString();
+         const string message = string("could not set region: \n\t") + bracError;
+         SetErrorString("BamReader::SetRegion", message);
+         return false;
+     }
+ }
+ int64_t BamReaderPrivate::Tell(void) const {
+     return m_stream.Tell();
+ }
index 0000000000000000000000000000000000000000,8322e5f2cda2204a42898393391d26977bde136d..273d56e767f970af328084b22050e26c763fbff0
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,236 +1,237 @@@
+ // ***************************************************************************
+ // BamStandardIndex.h (c) 2010 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
+ // Last modified: 10 November 2011 (DB)
+ // ---------------------------------------------------------------------------
+ // Provides index operations for the standardized BAM index format (".bai")
+ // ***************************************************************************
+ #ifndef BAM_STANDARD_INDEX_FORMAT_H
+ #define BAM_STANDARD_INDEX_FORMAT_H
+ //  -------------
+ //  W A R N I N G
+ //  -------------
+ //
+ // This file is not part of the BamTools API.  It exists purely as an
+ // implementation detail.  This header file may change from version to
+ // version without notice, or even be removed.
+ //
+ // We mean it.
+ #include "api/BamAux.h"
+ #include "api/BamIndex.h"
+ #include "api/IBamIODevice.h"
+ #include <map>
+ #include <set>
+ #include <string>
+ #include <vector>
+ namespace BamTools {
+ namespace Internal {
+ // -----------------------------------------------------------------------------
+ // BamStandardIndex data structures
+ // defines start and end of a contiguous run of alignments
+ struct BaiAlignmentChunk {
+     // data members
+     uint64_t Start;
+     uint64_t Stop;
+     // constructor
+     BaiAlignmentChunk(const uint64_t& start = 0,
+                       const uint64_t& stop = 0)
+         : Start(start)
+         , Stop(stop)
+     { }
+ };
+ // comparison operator (for sorting)
+ inline
+ bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {
+     return lhs.Start < rhs.Start;
+ }
+ // convenience typedef for a list of all alignment 'chunks' in a BAI bin
+ typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;
+ // convenience typedef for a map of all BAI bins in a reference (ID => chunks)
+ typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;
+ // convenience typedef for a list of all 'linear offsets' in a reference
+ typedef std::vector<uint64_t> BaiLinearOffsetVector;
+ // contains all fields necessary for building, loading, & writing
+ // full BAI index data for a single reference
+ struct BaiReferenceEntry {
+     // data members
+     int32_t ID;
+     BaiBinMap Bins;
+     BaiLinearOffsetVector LinearOffsets;
+     // ctor
+     BaiReferenceEntry(const int32_t& id = -1)
+         : ID(id)
+     { }
+ };
+ // provides (persistent) summary of BaiReferenceEntry's index data
+ struct BaiReferenceSummary {
+     // data members
+     int NumBins;
+     int NumLinearOffsets;
+     uint64_t FirstBinFilePosition;
+     uint64_t FirstLinearOffsetFilePosition;
+     // ctor
+     BaiReferenceSummary(void)
+         : NumBins(0)
+         , NumLinearOffsets(0)
+         , FirstBinFilePosition(0)
+         , FirstLinearOffsetFilePosition(0)
+     { }
+ };
+ // convenience typedef for describing a full BAI index file summary
+ typedef std::vector<BaiReferenceSummary> BaiFileSummary;
+ // end BamStandardIndex data structures
+ // -----------------------------------------------------------------------------
+ class BamStandardIndex : public BamIndex {
+     // ctor & dtor
+     public:
+         BamStandardIndex(Internal::BamReaderPrivate* reader);
+         ~BamStandardIndex(void);
+     // BamIndex implementation
+     public:
+         // builds index from associated BAM file & writes out to index file
+         bool Create(void);
+         // returns whether reference has alignments or no
+         bool HasAlignments(const int& referenceID) const;
+         // attempts to use index data to jump to @region, returns success/fail
+         // a "successful" jump indicates no error, but not whether this region has data
+         //   * thus, the method sets a flag to indicate whether there are alignments
+         //     available after the jump position
+         bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+         // loads existing data from file into memory
+         bool Load(const std::string& filename);
++        BamIndex::IndexType Type(void) const { return BamIndex::STANDARD; }
+     public:
+         // returns format's file extension
+         static const std::string Extension(void);
+     // internal methods
+     private:
+         // index file ops
+         void CheckMagicNumber(void);
+         void CloseFile(void);
+         bool IsDeviceOpen(void) const;
+         void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+         void Seek(const int64_t& position, const int origin);
+         int64_t Tell(void) const;
+         // BAI index building methods
+         void ClearReferenceEntry(BaiReferenceEntry& refEntry);
+         void SaveAlignmentChunkToBin(BaiBinMap& binMap,
+                                      const uint32_t& currentBin,
+                                      const uint64_t& currentOffset,
+                                      const uint64_t& lastOffset);
+         void SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
+                                    const int& alignmentStartPosition,
+                                    const int& alignmentStopPosition,
+                                    const uint64_t& lastOffset);
+         // random-access methods
+         void AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);
+         void CalculateCandidateBins(const uint32_t& begin,
+                                     const uint32_t& end,
+                                     std::set<uint16_t>& candidateBins);
+         void CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
+                                        const uint64_t& minOffset,
+                                        std::set<uint16_t>& candidateBins,
+                                        std::vector<int64_t>& offsets);
+         uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);
+         void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+         uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);
+         // BAI summary (create/load) methods
+         void ReserveForSummary(const int& numReferences);
+         void SaveBinsSummary(const int& refId, const int& numBins);
+         void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);
+         void SkipBins(const int& numBins);
+         void SkipLinearOffsets(const int& numLinearOffsets);
+         void SummarizeBins(BaiReferenceSummary& refSummary);
+         void SummarizeIndexFile(void);
+         void SummarizeLinearOffsets(BaiReferenceSummary& refSummary);
+         void SummarizeReference(BaiReferenceSummary& refSummary);
+         // BAI full index input methods
+         void ReadBinID(uint32_t& binId);
+         void ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);
+         void ReadIntoBuffer(const unsigned int& bytesRequested);
+         void ReadLinearOffset(uint64_t& linearOffset);
+         void ReadNumAlignmentChunks(int& numAlignmentChunks);
+         void ReadNumBins(int& numBins);
+         void ReadNumLinearOffsets(int& numLinearOffsets);
+         void ReadNumReferences(int& numReferences);
+         // BAI full index output methods
+         void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);
+         void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);
+         void WriteAlignmentChunk(const BaiAlignmentChunk& chunk);
+         void WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);
+         void WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);
+         void WriteBins(const int& refId, BaiBinMap& bins);
+         void WriteHeader(void);
+         void WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);
+         void WriteReferenceEntry(BaiReferenceEntry& refEntry);
+     // data members
+     private:
+         bool m_isBigEndian;
+         BaiFileSummary m_indexFileSummary;
+         // our input buffer
+         unsigned int m_bufferLength;
+         struct RaiiWrapper {
+             IBamIODevice* Device;
+             char* Buffer;
+             RaiiWrapper(void);
+             ~RaiiWrapper(void);
+         };
+         RaiiWrapper m_resources;
+     // static methods
+     private:
+         // checks if the buffer is large enough to accomodate the requested size
+         static void CheckBufferSize(char*& buffer,
+                                     unsigned int& bufferLength,
+                                     const unsigned int& requestedBytes);
+         // checks if the buffer is large enough to accomodate the requested size
+         static void CheckBufferSize(unsigned char*& buffer,
+                                     unsigned int& bufferLength,
+                                     const unsigned int& requestedBytes);
+     // static constants
+     private:
+         static const int MAX_BIN;
+         static const int BAM_LIDX_SHIFT;
+         static const std::string BAI_EXTENSION;
+         static const char* const BAI_MAGIC;
+         static const int SIZEOF_ALIGNMENTCHUNK;
+         static const int SIZEOF_BINCORE;
+         static const int SIZEOF_LINEAROFFSET;
+ };
+ } // namespace Internal
+ } // namespace BamTools
+ #endif // BAM_STANDARD_INDEX_FORMAT_H
index 0000000000000000000000000000000000000000,7a66f39d0b5a67c4d66eb3f699b0e9a3c1e4d093..c1e1aa0dc1390ac4e569b4a6adfbece5feb3fa96
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,185 +1,186 @@@
+ // ***************************************************************************
+ // BamToolsIndex.h (c) 2010 Derek Barnett
+ // Marth Lab, Department of Biology, Boston College
+ // ---------------------------------------------------------------------------
+ // Last modified: 10 November 2011 (DB)
+ // ---------------------------------------------------------------------------
+ // Provides index operations for the BamTools index format (".bti")
+ // ***************************************************************************
+ #ifndef BAMTOOLS_INDEX_FORMAT_H
+ #define BAMTOOLS_INDEX_FORMAT_H
+ //  -------------
+ //  W A R N I N G
+ //  -------------
+ //
+ // This file is not part of the BamTools API.  It exists purely as an
+ // implementation detail.  This header file may change from version to
+ // version without notice, or even be removed.
+ //
+ // We mean it.
+ #include "api/BamAux.h"
+ #include "api/BamIndex.h"
+ #include "api/IBamIODevice.h"
+ #include <map>
+ #include <string>
+ #include <vector>
+ namespace BamTools {
+ namespace Internal {
+ // contains data for each 'block' in a BTI index
+ struct BtiBlock {
+     // data members
+     int32_t MaxEndPosition;
+     int64_t StartOffset;
+     int32_t StartPosition;
+     // ctor
+     BtiBlock(const int32_t& maxEndPosition = 0,
+              const int64_t& startOffset    = 0,
+              const int32_t& startPosition  = 0)
+         : MaxEndPosition(maxEndPosition)
+         , StartOffset(startOffset)
+         , StartPosition(startPosition)
+     { }
+ };
+ // convenience typedef for describing a a list of BTI blocks on a reference
+ typedef std::vector<BtiBlock> BtiBlockVector;
+ // contains all fields necessary for building, loading, & writing
+ // full BTI index data for a single reference
+ struct BtiReferenceEntry {
+     // data members
+     int32_t ID;
+     BtiBlockVector Blocks;
+     // ctor
+     BtiReferenceEntry(const int& id = -1)
+         : ID(id)
+     { }
+ };
+ // provides (persistent) summary of BtiReferenceEntry's index data
+ struct BtiReferenceSummary {
+     // data members
+     int NumBlocks;
+     uint64_t FirstBlockFilePosition;
+     // ctor
+     BtiReferenceSummary(void)
+         : NumBlocks(0)
+         , FirstBlockFilePosition(0)
+     { }
+ };
+ // convenience typedef for describing a full BTI index file summary
+ typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+ class BamToolsIndex : public BamIndex {
+     // keep a list of any supported versions here
+     // (might be useful later to handle any 'legacy' versions if the format changes)
+     // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+     //
+     // so a change introduced in BTI_1_2 may be handled from then on by:
+     //
+     // if ( indexVersion >= BTI_1_2 )
+     //   do something new
+     // else
+     //   do the old thing
+     enum Version { BTI_1_0 = 1
+                  , BTI_1_1
+                  , BTI_1_2
+                  , BTI_2_0
+                  };
+     // ctor & dtor
+     public:
+         BamToolsIndex(Internal::BamReaderPrivate* reader);
+         ~BamToolsIndex(void);
+     // BamIndex implementation
+     public:
+         // builds index from associated BAM file & writes out to index file
+         bool Create(void);
+         // returns whether reference has alignments or no
+         bool HasAlignments(const int& referenceID) const;
+         // attempts to use index data to jump to @region, returns success/fail
+         // a "successful" jump indicates no error, but not whether this region has data
+         //   * thus, the method sets a flag to indicate whether there are alignments
+         //     available after the jump position
+         bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+         // loads existing data from file into memory
+         bool Load(const std::string& filename);
++        BamIndex::IndexType Type(void) const { return BamIndex::BAMTOOLS; }
+     public:
+         // returns format's file extension
+         static const std::string Extension(void);
+     // internal methods
+     private:
+         // index file ops
+         void CheckMagicNumber(void);
+         void CheckVersion(void);
+         void CloseFile(void);
+         bool IsDeviceOpen(void) const;
+         void OpenFile(const std::string& filename, IBamIODevice::OpenMode mode);
+         void Seek(const int64_t& position, const int origin);
+         int64_t Tell(void) const;
+         // index-creation methods
+         void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+         void WriteBlock(const BtiBlock& block);
+         void WriteBlocks(const BtiBlockVector& blocks);
+         void WriteHeader(void);
+         void WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+         // random-access methods
+         void GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+         void ReadBlock(BtiBlock& block);
+         void ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+         void ReadReferenceEntry(BtiReferenceEntry& refEntry);
+         // BTI summary data methods
+         void InitializeFileSummary(const int& numReferences);
+         void LoadFileSummary(void);
+         void LoadHeader(void);
+         void LoadNumBlocks(int& numBlocks);
+         void LoadNumReferences(int& numReferences);
+         void LoadReferenceSummary(BtiReferenceSummary& refSummary);
+         void SkipBlocks(const int& numBlocks);
+     // data members
+     private:
+         bool  m_isBigEndian;
+         BtiFileSummary m_indexFileSummary;
+         uint32_t m_blockSize;
+         int32_t m_inputVersion; // Version is serialized as int
+         Version m_outputVersion;
+         struct RaiiWrapper {
+             IBamIODevice* Device;
+             RaiiWrapper(void);
+             ~RaiiWrapper(void);
+         };
+         RaiiWrapper m_resources;
+     // static constants
+     private:
+         static const uint32_t DEFAULT_BLOCK_LENGTH;
+         static const std::string BTI_EXTENSION;
+         static const char* const BTI_MAGIC;
+         static const int SIZEOF_BLOCK;
+ };
+ } // namespace Internal
+ } // namespace BamTools
+ #endif // BAMTOOLS_INDEX_FORMAT_H