From a434db60a2ba1c77a0084704290c0e2ba6dcf1dc Mon Sep 17 00:00:00 2001
From: barnett <barnett@9efb377e-2e27-44b9-b91a-ec4abb80ed8b>
Date: Tue, 8 Dec 2009 18:37:03 +0000
Subject: [PATCH] Major overhaul to BamTools Separated out Bgzf routines to
 BGZF.h Simplified main BamReader.h/BamWriter.h headers, by adding pimpl

git-svn-id: svn+ssh://gene.bc.edu/home/subversion/Derek/BamTools/trunk@30 9efb377e-2e27-44b9-b91a-ec4abb80ed8b
---
 BGZF.cpp      |  354 ++++++++++
 BGZF.h        |  183 +++++
 BamAux.h      |  592 ++++++++--------
 BamReader.cpp | 1797 ++++++++++++++++++++++++++++---------------------
 BamReader.h   |  369 +++-------
 BamWriter.cpp |  688 ++++++++-----------
 BamWriter.h   |  163 ++---
 7 files changed, 2257 insertions(+), 1889 deletions(-)
 create mode 100644 BGZF.cpp
 create mode 100644 BGZF.h
diff --git a/BGZF.cpp b/BGZF.cpp
new file mode 100644
index 0000000..651fe81
--- /dev/null
+++ b/BGZF.cpp
@@ -0,0 +1,354 @@
+// ***************************************************************************
+// BGZF.cpp (c) 2009 Derek Barnett, Michael Strömberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading & writing BGZF files
+// ***************************************************************************
+
+#include <algorithm>
+#include "BGZF.h"
+using namespace BamTools;
+using std::string;
+using std::min;
+
+BgzfData::BgzfData(void)
+    : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
+    , CompressedBlockSize(MAX_BLOCK_SIZE)
+    , BlockLength(0)
+    , BlockOffset(0)
+    , BlockAddress(0)
+    , IsOpen(false)
+    , IsWriteOnly(false)
+    , Stream(NULL)
+    , UncompressedBlock(NULL)
+    , CompressedBlock(NULL)
+{
+    try {
+        CompressedBlock   = new char[CompressedBlockSize];
+        UncompressedBlock = new char[UncompressedBlockSize];
+    } catch( std::bad_alloc& ba ) {
+        printf("ERROR: Unable to allocate memory for our BGZF object.\n");
+        exit(1);
+    }
+}
+
+// destructor
+BgzfData::~BgzfData(void) {
+    if(CompressedBlock)   delete [] CompressedBlock;
+    if(UncompressedBlock) delete [] UncompressedBlock;
+}
+
+// closes BGZF file
+void BgzfData::Close(void) {
+
+    if (!IsOpen) { return; }
+    IsOpen = false;
+
+    // flush the BGZF block
+    if ( IsWriteOnly ) { FlushBlock(); }
+
+    // flush and close
+    fflush(Stream);
+    fclose(Stream);
+}
+
+// compresses the current block
+int BgzfData::DeflateBlock(void) {
+
+    // initialize the gzip header
+    char* buffer = CompressedBlock;
+    unsigned int bufferSize = CompressedBlockSize;
+
+    memset(buffer, 0, 18);
+    buffer[0]  = GZIP_ID1;
+    buffer[1]  = (char)GZIP_ID2;
+    buffer[2]  = CM_DEFLATE;
+    buffer[3]  = FLG_FEXTRA;
+    buffer[9]  = (char)OS_UNKNOWN;
+    buffer[10] = BGZF_XLEN;
+    buffer[12] = BGZF_ID1;
+    buffer[13] = BGZF_ID2;
+    buffer[14] = BGZF_LEN;
+
+    // loop to retry for blocks that do not compress enough
+    int inputLength = BlockOffset;
+    int compressedLength = 0;
+
+    while(true) {
+
+        z_stream zs;
+        zs.zalloc    = NULL;
+        zs.zfree     = NULL;
+        zs.next_in   = (Bytef*)UncompressedBlock;
+        zs.avail_in  = inputLength;
+        zs.next_out  = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
+        zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
+
+        // initialize the zlib compression algorithm
+        if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {
+            printf("ERROR: zlib deflate initialization failed.\n");
+            exit(1);
+        }
+
+        // compress the data
+        int status = deflate(&zs, Z_FINISH);
+        if(status != Z_STREAM_END) {
+
+            deflateEnd(&zs);
+
+            // reduce the input length and try again
+            if(status == Z_OK) {
+                inputLength -= 1024;
+                if(inputLength < 0) {
+                    printf("ERROR: input reduction failed.\n");
+                    exit(1);
+                }
+                continue;
+            }
+
+            printf("ERROR: zlib deflate failed.\n");
+            exit(1);
+        }
+
+        // finalize the compression routine
+        if(deflateEnd(&zs) != Z_OK) {
+            printf("ERROR: deflate end failed.\n");
+            exit(1);
+        }
+
+        compressedLength = zs.total_out;
+        compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
+
+        if(compressedLength > MAX_BLOCK_SIZE) {
+            printf("ERROR: deflate overflow.\n");
+            exit(1);
+        }
+
+        break;
+    }
+
+    // store the compressed length
+    BgzfData::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
+
+    // store the CRC32 checksum
+    unsigned int crc = crc32(0, NULL, 0);
+    crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
+    BgzfData::PackUnsignedInt(&buffer[compressedLength - 8], crc);
+    BgzfData::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
+
+    // ensure that we have less than a block of data left
+    int remaining = BlockOffset - inputLength;
+    if(remaining > 0) {
+        if(remaining > inputLength) {
+            printf("ERROR: remainder too large.\n");
+            exit(1);
+        }
+        memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
+    }
+
+    BlockOffset = remaining;
+    return compressedLength;
+}
+
+// flushes the data in the BGZF block
+void BgzfData::FlushBlock(void) {
+
+    // flush all of the remaining blocks
+    while(BlockOffset > 0) {
+
+        // compress the data block
+        int blockLength = DeflateBlock();
+
+        // flush the data to our output stream
+        int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
+
+        if(numBytesWritten != blockLength) {
+            printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);
+            exit(1);
+        }
+
+        BlockAddress += blockLength;
+    }
+}
+
+// de-compresses the current block
+int BgzfData::InflateBlock(const int& blockLength) {
+
+    // Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
+    z_stream zs;
+    zs.zalloc    = NULL;
+    zs.zfree     = NULL;
+    zs.next_in   = (Bytef*)CompressedBlock + 18;
+    zs.avail_in  = blockLength - 16;
+    zs.next_out  = (Bytef*)UncompressedBlock;
+    zs.avail_out = UncompressedBlockSize;
+
+    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    if (status != Z_OK) {
+        printf("inflateInit failed\n");
+        exit(1);
+    }
+
+    status = inflate(&zs, Z_FINISH);
+    if (status != Z_STREAM_END) {
+        inflateEnd(&zs);
+        printf("inflate failed\n");
+        exit(1);
+    }
+
+    status = inflateEnd(&zs);
+    if (status != Z_OK) {
+        printf("inflateEnd failed\n");
+        exit(1);
+    }
+
+    return zs.total_out;
+}
+
+void BgzfData::Open(const string& filename, const char* mode) {
+
+    if ( strcmp(mode, "rb") == 0 ) {
+        IsWriteOnly = false;
+    } else if ( strcmp(mode, "wb") == 0) {
+        IsWriteOnly = true;
+    } else {
+        printf("ERROR: Unknown file mode: %s\n", mode);
+        exit(1);
+    }
+
+    Stream = fopen(filename.c_str(), mode);
+    if(!Stream) {
+        printf("ERROR: Unable to open the BAM file %s\n", filename.c_str() );
+        exit(1);
+    }
+    IsOpen = true;
+}
+
+int BgzfData::Read(char* data, const unsigned int dataLength) {
+
+   if (dataLength == 0) { return 0; }
+
+   char* output = data;
+   unsigned int numBytesRead = 0;
+   while (numBytesRead < dataLength) {
+
+       int bytesAvailable = BlockLength - BlockOffset;
+       if (bytesAvailable <= 0) {
+           if ( ReadBlock() != 0 ) { return -1; }
+           bytesAvailable = BlockLength - BlockOffset;
+           if ( bytesAvailable <= 0 ) { break; }
+       }
+
+       char* buffer   = UncompressedBlock;
+       int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
+       memcpy(output, buffer + BlockOffset, copyLength);
+
+       BlockOffset  += copyLength;
+       output       += copyLength;
+       numBytesRead += copyLength;
+   }
+
+   if ( BlockOffset == BlockLength ) {
+       BlockAddress = ftell(Stream);
+       BlockOffset  = 0;
+       BlockLength  = 0;
+   }
+
+   return numBytesRead;
+}
+
+int BgzfData::ReadBlock(void) {
+
+    char    header[BLOCK_HEADER_LENGTH];
+    int64_t blockAddress = ftell(Stream);
+
+    int count = fread(header, 1, sizeof(header), Stream);
+    if (count == 0) {
+        BlockLength = 0;
+        return 0;
+    }
+
+    if (count != sizeof(header)) {
+        printf("read block failed - count != sizeof(header)\n");
+        return -1;
+    }
+
+    if (!BgzfData::CheckBlockHeader(header)) {
+        printf("read block failed - CheckBlockHeader() returned false\n");
+        return -1;
+    }
+
+    int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;
+    char* compressedBlock = CompressedBlock;
+    memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
+    int remaining = blockLength - BLOCK_HEADER_LENGTH;
+
+    count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);
+    if (count != remaining) {
+        printf("read block failed - count != remaining\n");
+        return -1;
+    }
+
+    count = InflateBlock(blockLength);
+    if (count < 0) { return -1; }
+
+    if ( BlockLength != 0 ) {
+        BlockOffset = 0;
+    }
+
+    BlockAddress = blockAddress;
+    BlockLength  = count;
+    return 0;
+}
+
+bool BgzfData::Seek(int64_t position) {
+
+    int     blockOffset  = (position & 0xFFFF);
+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+
+    if (fseek(Stream, blockAddress, SEEK_SET) != 0) {
+        printf("ERROR: Unable to seek in BAM file\n");
+        exit(1);
+    }
+
+    BlockLength  = 0;
+    BlockAddress = blockAddress;
+    BlockOffset  = blockOffset;
+    return true;
+}
+
+int64_t BgzfData::Tell(void) {
+    return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );
+}
+
+// writes the supplied data into the BGZF buffer
+unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {
+
+    // initialize
+    unsigned int numBytesWritten = 0;
+    const char* input = data;
+    unsigned int blockLength = UncompressedBlockSize;
+
+    // copy the data to the buffer
+    while(numBytesWritten < dataLen) {
+        unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
+        char* buffer = UncompressedBlock;
+        memcpy(buffer + BlockOffset, input, copyLength);
+
+        BlockOffset     += copyLength;
+        input           += copyLength;
+        numBytesWritten += copyLength;
+
+        if(BlockOffset == blockLength) {
+            FlushBlock();
+        }
+    }
+
+    return numBytesWritten;
+}
diff --git a/BGZF.h b/BGZF.h
new file mode 100644
index 0000000..2896542
--- /dev/null
+++ b/BGZF.h
@@ -0,0 +1,183 @@
+// ***************************************************************************
+// BGZF.h (c) 2009 Derek Barnett, Michael Strömberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading & writing BGZF files
+// ***************************************************************************
+
+#ifndef BGZF_H
+#define BGZF_H
+
+// 'C' includes
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+// C++ includes
+#include <string>
+
+// zlib includes
+#include "zlib.h"
+
+// Platform-specific type definitions
+#ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+#else
+        #include <stdint.h>
+#endif
+
+namespace BamTools {
+
+// zlib constants
+const int GZIP_ID1   = 31;
+const int GZIP_ID2   = 139;
+const int CM_DEFLATE = 8;
+const int FLG_FEXTRA = 4;
+const int OS_UNKNOWN = 255;
+const int BGZF_XLEN  = 6;
+const int BGZF_ID1   = 66;
+const int BGZF_ID2   = 67;
+const int BGZF_LEN   = 2;
+const int GZIP_WINDOW_BITS    = -15;
+const int Z_DEFAULT_MEM_LEVEL = 8;
+
+// BZGF constants
+const int BLOCK_HEADER_LENGTH = 18;
+const int BLOCK_FOOTER_LENGTH = 8;
+const int MAX_BLOCK_SIZE      = 65536;
+const int DEFAULT_BLOCK_SIZE  = 65536;
+
+struct BgzfData {
+
+    // data members
+    unsigned int UncompressedBlockSize;
+    unsigned int CompressedBlockSize;
+    unsigned int BlockLength;
+    unsigned int BlockOffset;
+    uint64_t BlockAddress;
+    bool     IsOpen;
+    bool     IsWriteOnly;
+    FILE*    Stream;
+    char*    UncompressedBlock;
+    char*    CompressedBlock;
+
+    // constructor & destructor
+    BgzfData(void);
+    ~BgzfData(void);
+
+    // closes BGZF file
+    void Close(void);
+    // compresses the current block
+    int DeflateBlock(void);
+    // flushes the data in the BGZF block
+    void FlushBlock(void);
+    // de-compresses the current block
+    int InflateBlock(const int& blockLength);
+    // opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing
+    void Open(const std::string& filename, const char* mode);
+    // reads BGZF data into a byte buffer
+    int Read(char* data, const unsigned int dataLength);
+    // reads BGZF block
+    int ReadBlock(void);
+    // seek to position in BAM file
+    bool Seek(int64_t position);
+    // get file position in BAM file
+    int64_t Tell(void);
+    // writes the supplied data into the BGZF buffer
+    unsigned int Write(const char* data, const unsigned int dataLen);
+
+    // checks BGZF block header
+    static inline bool CheckBlockHeader(char* header);
+    // packs an unsigned integer into the specified buffer
+    static inline void PackUnsignedInt(char* buffer, unsigned int value);
+    // packs an unsigned short into the specified buffer
+    static inline void PackUnsignedShort(char* buffer, unsigned short value);
+    // unpacks a buffer into a signed int
+    static inline signed int UnpackSignedInt(char* buffer);
+    // unpacks a buffer into a unsigned int
+    static inline unsigned int UnpackUnsignedInt(char* buffer);
+    // unpacks a buffer into a unsigned short
+    static inline unsigned short UnpackUnsignedShort(char* buffer);
+};
+
+// -------------------------------------------------------------
+
+inline
+bool BgzfData::CheckBlockHeader(char* header) {
+
+    return (header[0] == GZIP_ID1 &&
+            header[1] == (char)GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & FLG_FEXTRA) != 0 &&
+            BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
+            header[12] == BGZF_ID1 &&
+            header[13] == BGZF_ID2 &&
+            BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
+}
+
+// packs an unsigned integer into the specified buffer
+inline
+void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
+    buffer[0] = (char)value;
+    buffer[1] = (char)(value >> 8);
+    buffer[2] = (char)(value >> 16);
+    buffer[3] = (char)(value >> 24);
+}
+
+// packs an unsigned short into the specified buffer
+inline
+void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
+    buffer[0] = (char)value;
+    buffer[1] = (char)(value >> 8);
+}
+
+// unpacks a buffer into a signed int
+inline
+signed int BgzfData::UnpackSignedInt(char* buffer) {
+    union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+// unpacks a buffer into an unsigned int
+inline
+unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
+    union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+// unpacks a buffer into an unsigned short
+inline
+unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
+    union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    return un.value;
+}
+
+} // namespace BamTools
+
+#endif // BGZF_H
diff --git a/BamAux.h b/BamAux.h
index 5784198..d5510d1 100644
--- a/BamAux.h
+++ b/BamAux.h
@@ -3,343 +3,283 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 1 October 2009 (DB)
+// Last modified: 8 December 2009 (DB)
 // ---------------------------------------------------------------------------
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Defines common constants, typedefs, & data structures for BamTools.
+// Provides the basic constants, data structures, etc. for using BAM files
 // ***************************************************************************
 
-/*! 
-	\file BamAux.h
-	\brief BamTools constants, typedefs, & data structures
-*/
+#ifndef BAMAUX_H
+#define BAMAUX_H
 
-#pragma once
+// C inclues
+#include <cstdlib>
+#include <cstring>
 
 // C++ includes
 #include <exception>
+#include <map>
 #include <string>
 #include <utility>
 #include <vector>
 
-// C includes
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-// Platform-specific type definitions
-#ifdef WIN32
-	typedef char                 int8_t;
-	typedef unsigned char       uint8_t;
-	typedef short               int16_t;
-	typedef unsigned short     uint16_t;
-	typedef int                 int32_t;
-	typedef unsigned int       uint32_t;
-	typedef long long           int64_t;
-	typedef unsigned long long uint64_t;
-#else
-	#include <stdint.h>
-#endif
-
-//! \namespace BamTools
 namespace BamTools {
 
-	//! \cond
-	// --------------------------------------------------------------------------------------
-	// This section is purely internal and can be excluded from main generated documentation.
-	
-	// zlib constants
-	const int GZIP_ID1   = 31;
-	const int GZIP_ID2   = 139;
-	const int CM_DEFLATE = 8;
-	const int FLG_FEXTRA = 4;
-	const int OS_UNKNOWN = 255;
-	const int BGZF_XLEN  = 6;
-	const int BGZF_ID1   = 66;
-	const int BGZF_ID2   = 67;
-	const int BGZF_LEN   = 2;
-	const int GZIP_WINDOW_BITS = -15;
-	const int Z_DEFAULT_MEM_LEVEL = 8;
-
-	// BZGF constants
-	const int BLOCK_HEADER_LENGTH = 18;
-	const int BLOCK_FOOTER_LENGTH = 8;
-	const int MAX_BLOCK_SIZE      = 65536;
-	const int DEFAULT_BLOCK_SIZE  = 65536;
-
-	// BAM constants
-	const unsigned int BAM_CORE_SIZE = 32;
-	const int BAM_CMATCH      = 0;
-	const int BAM_CINS        = 1;
-	const int BAM_CDEL        = 2;
-	const int BAM_CREF_SKIP   = 3;
-	const int BAM_CSOFT_CLIP  = 4;
-	const int BAM_CHARD_CLIP  = 5;
-	const int BAM_CPAD        = 6;
-	const int BAM_CIGAR_SHIFT = 4;
-	const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
-
-	// BAM index constants
-	const int MAX_BIN           = 37450;	// =(8^6-1)/7+1
-	const int BAM_MIN_CHUNK_GAP = 32768;
-	const int BAM_LIDX_SHIFT    = 14;
-
-	// Explicit variable sizes
-	const int BT_SIZEOF_INT = 4;
-	
-	struct BgzfData {
-		unsigned int UncompressedBlockSize;
-		unsigned int CompressedBlockSize;
-		unsigned int BlockLength;
-		unsigned int BlockOffset;
-		uint64_t BlockAddress;
-		bool     IsOpen;
-		FILE*    Stream;
-		char*    UncompressedBlock;
-		char*    CompressedBlock;
-		
-		// constructor
-		BgzfData(void)
-			: UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
-			, CompressedBlockSize(MAX_BLOCK_SIZE)
-			, BlockLength(0)
-			, BlockOffset(0)
-			, BlockAddress(0)
-			, IsOpen(false)
-			, Stream(NULL)
-			, UncompressedBlock(NULL)
-			, CompressedBlock(NULL)
-		{
-			try {
-				CompressedBlock   = new char[CompressedBlockSize];
-				UncompressedBlock = new char[UncompressedBlockSize];
-			} catch( std::bad_alloc& ba ) {
-				printf("ERROR: Unable to allocate memory for our BGZF object.\n");
-				exit(1);
-			}
-		}
-		
-		// destructor
-		~BgzfData(void) {
-			if(CompressedBlock)   delete [] CompressedBlock;
-			if(UncompressedBlock) delete [] UncompressedBlock;
-		}
-	};
-	//! \endcond
-	
-	// --------------------------------------------------------------------------------------
-	// Data structures
-	
-	//! \brief Cigar operation data structure
-	struct CigarOp {
-		char     Type;   //!< Operation type (MIDNSHP)
-		uint32_t Length; //!< Operation length (number of bases)
-		   
-	};
-
-	//! Reference sequence data structure
-	struct RefData {
-		std::string  RefName;          //!< Name of reference sequence
-		unsigned int RefLength;        //!< Length of reference sequence
-		bool         RefHasAlignments; //!< True if BAM file contains alignments mapped to reference sequence
-		
-		// constructor
-		RefData(void)
-			: RefLength(0)
-			, RefHasAlignments(false)
-		{ }
-	};
-
-	//! BAM alignment data structure
-	struct BamAlignment {
-		
-		// Queries against alignment flag
-		public:
-			//! Returns true if this read is a PCR duplicate (determined by external app)
-			bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
-			//! Returns true if this read failed quality control (determined by external app)
-			bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }	
-			//! Returns true if alignment is first mate on read
-			bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
-			//! Returns true if alignment is mapped			
-			bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
-			//! Returns true if alignment's mate is mapped
-			bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }	
-			//! Returns true if alignment's mate mapped to reverse strand
-			bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
-			//! Returns true if alignment part of paired-end read
-			bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }	
-			//! Returns true if this position is primary alignment (determined by external app)
-			bool IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY) == 0 ); }	
-			//! Returns true if alignment is part of read that satisfied paired-end resolution (determined by external app)
-			bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }	
-			//! Returns true if alignment mapped to reverse strand
-			bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }	
-			//! Returns true if alignment is second mate on read
-			bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }	
-		
-		public:
-			/*! 
-				\brief Get alignment's read group text.
-				
-				Assigns read group text to readGroup.
-				
-				\return True if read group data successfully retrieved.
-			*/
-			bool GetReadGroup(std::string& readGroup) const {
-				
-				if ( TagData.empty() ) { return false; }
-				
-				// localize the tag data
-				char* pTagData = (char*)TagData.data();
-				const unsigned int tagDataLen = TagData.size();
-				unsigned int numBytesParsed = 0;
-				
-				bool foundReadGroupTag = false;
-				while( numBytesParsed < tagDataLen ) {
-					
-					const char* pTagType = pTagData;
-					const char* pTagStorageType = pTagData + 2;
-					pTagData       += 3;
-					numBytesParsed += 3;
-					
-					// check the current tag
-					if ( strncmp(pTagType, "RG", 2) == 0 ) {
-						foundReadGroupTag = true;
-						break;
-					}
-					
-					// get the storage class and find the next tag
-					SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
-				}
-				
-				// return if the read group tag was not present
-				if ( !foundReadGroupTag ) { return false; }
-				
-				// assign the read group
-				const unsigned int readGroupLen = strlen(pTagData);
-				readGroup.resize(readGroupLen);
-				memcpy( (char*)readGroup.data(), pTagData, readGroupLen );
-				return true;
-			}
-		
-		private:
-			// skips to the next tag
-			static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
-				switch(storageType) {
-					
-					case 'A':
-					case 'c':
-					case 'C':
-						++numBytesParsed;
-						++pTagData;
-						break;
-						
-					case 's':
-					case 'S':
-					case 'f':
-						numBytesParsed += 2;
-						pTagData       += 2;
-						break;
-						
-					case 'i':
-					case 'I':
-						numBytesParsed += 4;
-						pTagData       += 4;
-						break;
-						
-					case 'Z':
-					case 'H':
-						while(*pTagData) {
-							++numBytesParsed;
-							++pTagData;
-						}
-						break;
-						
-					default:
-						printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);
-						exit(1);
-				}
-			}
-		
-		// Data members
-		public:
-			std::string  Name;              //!< Read name
-			unsigned int Length;            //!< Query length
-			std::string  QueryBases;        //!< 'Original' sequence (as reported from sequencing machine)
-			std::string  AlignedBases;      //!< 'Aligned' sequence (includes any indels, padding, clipping) 
-			std::string  Qualities;         //!< FASTQ qualities (ASCII characters, not numeric values)
-			std::string  TagData;           //!< Tag data (accessor methods will pull the requested information out)
-			//unsigned int RefID;             //!< ID number for reference sequence
-			//unsigned int Position;          //!< Position (0-based) where alignment starts 
-			signed int RefID;             //!< ID number for reference sequence (-1)
-			signed int Position;          //!< Position (0-based) where alignment starts (-1) 
-			unsigned int Bin;               //!< Bin in BAM file where this alignment resides
-			unsigned int MapQuality;        //!< Mapping quality score 
-			unsigned int AlignmentFlag;     //!< Alignment bit-flag - see Is<something>() methods for available queries
-			std::vector<CigarOp> CigarData; //!< CIGAR operations for this alignment
-			//unsigned int MateRefID;         //!< ID number for reference sequence where alignment's mate was aligned
-			//unsigned int MatePosition;      //!< Position (0-based) where alignment's mate starts
-			//unsigned int InsertSize;        //!< Mate-pair insert size
-			signed int MateRefID;         //!< ID number for reference sequence where alignment's mate was aligned (-1)
-			signed int MatePosition;      //!< Position (0-based) where alignment's mate starts (-1)
-			signed int InsertSize;		  //!< Mate-pair insert size(0)
-		
-		// Alignment flag query constants
-		private:
-			enum { PAIRED        = 1,
-				   PROPER_PAIR   = 2,
-				   UNMAPPED      = 4,
-				   MATE_UNMAPPED = 8,
-				   REVERSE       = 16,
-				   MATE_REVERSE  = 32,
-				   READ_1        = 64,
-				   READ_2        = 128,
-				   SECONDARY     = 256,
-				   QC_FAILED     = 512,
-				   DUPLICATE     = 1024
-			     };
-	};
-
-	// ----------------------------------------------------------------
-	// Typedefs
-	
-	/*!
-		\typedef RefVector
-		\brief Vector of RefData objects
-	*/
-	typedef std::vector<RefData> RefVector;
-	
-	/*! 
-		\typedef BamAlignmentVector
-		\brief Vector of BamAlignments
-	*/
-	typedef std::vector< BamAlignment > BamAlignmentVector;
-	
-	//! \cond
-	// ----------------------------------------------------------------
-	// Typedefs (internal - can exclude from main documentation)
-	
-	//Offsets for linear indexing
-	typedef std::vector<uint64_t> LinearOffsetVector;
-
-	// Alignment 'chunk' boundaries
-	typedef std::pair<uint64_t, uint64_t> ChunkPair;
-	// Vector of alignment 'chunks'
-	typedef std::vector<ChunkPair> ChunkVector;
-
-	// BAM bin contains a bin ID & a vector of alignment 'chunks'
-	typedef std::pair<uint32_t, ChunkVector*> BamBin;
-	// Vector of BAM bins
-	typedef std::vector<BamBin> BinVector;
-
-	// Reference sequence index data
-	typedef std::pair<BinVector*, LinearOffsetVector*> RefIndex;
-
-	// Full BAM file index data structure 
-	typedef std::vector<RefIndex*> BamIndex;
-	// ----------------------------------------------------------------
-	//! \endcond
+// BAM constants
+const int BAM_CORE_SIZE   = 32;
+const int BAM_CMATCH      = 0;
+const int BAM_CINS        = 1;
+const int BAM_CDEL        = 2;
+const int BAM_CREF_SKIP   = 3;
+const int BAM_CSOFT_CLIP  = 4;
+const int BAM_CHARD_CLIP  = 5;
+const int BAM_CPAD        = 6;
+const int BAM_CIGAR_SHIFT = 4;
+const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
+
+// BAM index constants
+const int MAX_BIN           = 37450;	// =(8^6-1)/7+1
+const int BAM_MIN_CHUNK_GAP = 32768;
+const int BAM_LIDX_SHIFT    = 14;
+
+// Explicit variable sizes
+const int BT_SIZEOF_INT = 4;
+
+struct CigarOp;
+
+struct BamAlignment {
+
+    // Queries against alignment flag
+    public:
+        // Returns true if this read is a PCR duplicate (determined by external app)
+        bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
+        // Returns true if this read failed quality control (determined by external app)
+        bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
+        // Returns true if alignment is first mate on read
+        bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
+        // Returns true if alignment is mapped
+        bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
+        // Returns true if alignment's mate is mapped
+        bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
+        // Returns true if alignment's mate mapped to reverse strand
+        bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
+        // Returns true if alignment part of paired-end read
+        bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
+        // Returns true if this position is primary alignment (determined by external app)
+        bool IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY) == 0 ); }
+        // Returns true if alignment is part of read that satisfied paired-end resolution (determined by external app)
+        bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
+        // Returns true if alignment mapped to reverse strand
+        bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
+        // Returns true if alignment is second mate on read
+        bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
+
+    public:
+
+        // get "RG" tag data
+        bool GetReadGroup(std::string& readGroup) const {
+
+            if ( TagData.empty() ) { return false; }
+
+            // localize the tag data
+            char* pTagData = (char*)TagData.data();
+            const unsigned int tagDataLen = TagData.size();
+            unsigned int numBytesParsed = 0;
+
+            bool foundReadGroupTag = false;
+            while( numBytesParsed < tagDataLen ) {
+
+                const char* pTagType = pTagData;
+                const char* pTagStorageType = pTagData + 2;
+                pTagData       += 3;
+                numBytesParsed += 3;
+
+                // check the current tag
+                if ( std::strncmp(pTagType, "RG", 2) == 0 ) {
+                    foundReadGroupTag = true;
+                    break;
+                }
+
+                // get the storage class and find the next tag
+                SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
+            }
+
+            // return if the read group tag was not present
+            if ( !foundReadGroupTag ) { return false; }
+
+            // assign the read group
+            const unsigned int readGroupLen = std::strlen(pTagData);
+            readGroup.resize(readGroupLen);
+            std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );
+            return true;
+        }
+
+        // get "NM" tag data - contributed by Aaron Quinlan
+        bool GetEditDistance(uint8_t& editDistance) const {
+
+            if ( TagData.empty() ) { return false; }
+
+            // localize the tag data
+            char* pTagData = (char*)TagData.data();
+            const unsigned int tagDataLen = TagData.size();
+            unsigned int numBytesParsed = 0;
+
+            bool foundEditDistanceTag = false;
+            while( numBytesParsed < tagDataLen ) {
+
+                const char* pTagType = pTagData;
+                const char* pTagStorageType = pTagData + 2;
+                pTagData       += 3;
+                numBytesParsed += 3;
+
+                // check the current tag
+                if ( strncmp(pTagType, "NM", 2) == 0 ) {
+                    foundEditDistanceTag = true;
+                    break;
+                }
+
+                // get the storage class and find the next tag
+                SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
+            }
+            // return if the edit distance tag was not present
+            if ( !foundEditDistanceTag ) { return false; }
+
+            // assign the editDistance value
+            memcpy(&editDistance, pTagData, 1);
+            return true;
+        }
+
+    private:
+        static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
+            switch(storageType) {
+
+                case 'A':
+                case 'c':
+                case 'C':
+                        ++numBytesParsed;
+                        ++pTagData;
+                        break;
+
+                case 's':
+                case 'S':
+                case 'f':
+                        numBytesParsed += 2;
+                        pTagData       += 2;
+                        break;
+
+                case 'i':
+                case 'I':
+                        numBytesParsed += 4;
+                        pTagData       += 4;
+                        break;
+
+                case 'Z':
+                case 'H':
+                        while(*pTagData) {
+                            ++numBytesParsed;
+                            ++pTagData;
+                        }
+                        break;
+
+                default:
+                        printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);
+                        exit(1);
+            }
+        }
+
+    // Data members
+    public:
+        std::string  Name;              // Read name
+        int32_t      Length;            // Query length
+        std::string  QueryBases;        // 'Original' sequence (as reported from sequencing machine)
+        std::string  AlignedBases;      // 'Aligned' sequence (includes any indels, padding, clipping)
+        std::string  Qualities;         // FASTQ qualities (ASCII characters, not numeric values)
+        std::string  TagData;           // Tag data (accessor methods will pull the requested information out)
+        int32_t      RefID;             // ID number for reference sequence
+        int32_t      Position;          // Position (0-based) where alignment starts
+        uint16_t     Bin;               // Bin in BAM file where this alignment resides
+        uint16_t     MapQuality;        // Mapping quality score
+        uint32_t     AlignmentFlag;     // Alignment bit-flag - see Is<something>() methods for available queries
+        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
+        int32_t      MateRefID;         // ID number for reference sequence where alignment's mate was aligned
+        int32_t      MatePosition;      // Position (0-based) where alignment's mate starts
+        int32_t      InsertSize;        // Mate-pair insert size
+
+    // Alignment flag query constants
+    private:
+        enum { PAIRED        = 1,
+               PROPER_PAIR   = 2,
+               UNMAPPED      = 4,
+               MATE_UNMAPPED = 8,
+               REVERSE       = 16,
+               MATE_REVERSE  = 32,
+               READ_1        = 64,
+               READ_2        = 128,
+               SECONDARY     = 256,
+               QC_FAILED     = 512,
+               DUPLICATE     = 1024
+             };
+};
+
+// ----------------------------------------------------------------
+// Auxiliary data structs & typedefs
+
+struct CigarOp {
+    char     Type;   // Operation type (MIDNSHP)
+    uint32_t Length; // Operation length (number of bases)
+};
+
+struct RefData {
+    // data members
+    std::string  RefName;          // Name of reference sequence
+    unsigned int RefLength;        // Length of reference sequence
+    bool         RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
+    // constructor
+    RefData(void)
+        : RefLength(0)
+        , RefHasAlignments(false)
+    { }
+};
+
+typedef std::vector<RefData> RefVector;
+typedef std::vector<BamAlignment> BamAlignmentVector;
+
+// ----------------------------------------------------------------
+// Indexing structs & typedefs
+
+struct Chunk {
+    // data members
+    uint64_t Start;
+    uint64_t Stop;
+    // constructor
+    Chunk(const uint64_t& start = 0, const uint64_t& stop = 0)
+        : Start(start)
+        , Stop(stop)
+    { }
+};
+
+inline
+bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
+    return lhs.Start < rhs.Start;
 }
+
+typedef std::vector<Chunk> ChunkVector;
+typedef std::map<uint32_t, ChunkVector> BamBinMap;
+typedef std::vector<uint64_t> LinearOffsetVector;
+
+struct ReferenceIndex {
+    // data members
+    BamBinMap Bins;
+    LinearOffsetVector Offsets;
+    // constructor
+    ReferenceIndex(const BamBinMap& binMap = BamBinMap(),
+                   const LinearOffsetVector& offsets = LinearOffsetVector())
+        : Bins(binMap)
+        , Offsets(offsets)
+    { }
+};
+
+typedef std::vector<ReferenceIndex> BamIndex;
+
+} // namespace BamTools
+
+#endif // BAMAUX_H
diff --git a/BamReader.cpp b/BamReader.cpp
index f2a1e11..386a485 100644
--- a/BamReader.cpp
+++ b/BamReader.cpp
@@ -1,759 +1,1038 @@
-// ***************************************************************************
-// BamReader.cpp (c) 2009 Derek Barnett, Michael Strömberg
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 15 July 2009 (DB)
-// ---------------------------------------------------------------------------
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-// BamTools includes
-#include "BamReader.h"
-using namespace BamTools;
-using namespace std;
-
-// static character constants
-const char* BamReader::DNA_LOOKUP   = "=ACMGRSVTWYHKDBN";
-const char* BamReader::CIGAR_LOOKUP = "MIDNSHP";
-
-// constructor
-BamReader::BamReader(void)
-	: m_BGZF(NULL)
-	, m_index(NULL)
-	, m_isIndexLoaded(false)
-	, m_alignmentsBeginOffset(0)
-	, m_isRegionSpecified(false)
-	, m_currentRefID(0)
-	, m_currentLeft(0)
-{ }
-
-// destructor
-BamReader::~BamReader(void) {
-	Close();
-}
-
-// checks BGZF block header
-bool BamReader::BgzfCheckBlockHeader(char* header) {
-
-	return (header[0] == GZIP_ID1 &&
-            header[1] == (char)GZIP_ID2 &&
-            header[2] == Z_DEFLATED &&
-            (header[3] & FLG_FEXTRA) != 0 &&
-            BgzfUnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
-            header[12] == BGZF_ID1 &&
-            header[13] == BGZF_ID2 &&
-            BgzfUnpackUnsignedShort(&header[14]) == BGZF_LEN
-		   );
-}
-
-// closes the BAM file
-void BamReader::BgzfClose(void) {
-	fflush(m_BGZF->Stream);
-	fclose(m_BGZF->Stream);
-	m_BGZF->IsOpen = false;
-}
-
-// de-compresses the current block
-int BamReader::BgzfInflateBlock(int blockLength) {
-	
-	// Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
-    z_stream zs;
-    zs.zalloc    = NULL;
-    zs.zfree     = NULL;
-    zs.next_in   = (Bytef*)m_BGZF->CompressedBlock + 18;
-    zs.avail_in  = blockLength - 16;
-    zs.next_out  = (Bytef*)m_BGZF->UncompressedBlock;
-    zs.avail_out = m_BGZF->UncompressedBlockSize;
-
-    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
-    if (status != Z_OK) {
-        printf("inflateInit failed\n");
-        exit(1);
-    }
-
-    status = inflate(&zs, Z_FINISH);
-    if (status != Z_STREAM_END) {
-        inflateEnd(&zs);
-        printf("inflate failed\n");
-        exit(1);
-    }
-
-    status = inflateEnd(&zs);
-    if (status != Z_OK) {
-        printf("inflateEnd failed\n");
-        exit(1);
-    }
-
-    return zs.total_out;
-}
-
-// opens the BAM file for reading
-void BamReader::BgzfOpen(const string& filename) {
-
-	m_BGZF->Stream = fopen(filename.c_str(), "rb");
-	if(!m_BGZF->Stream) {
-		printf("ERROR: Unable to open the BAM file %s for reading.\n", filename.c_str() );
-		exit(1);
-	}
-
-	m_BGZF->IsOpen = true;
-}
-
-// reads BGZF data into buffer
-unsigned int BamReader::BgzfRead(char* data, const unsigned int dataLength) {
-
-    if (dataLength == 0) { return 0; }
-
-	char* output = data;
-    	unsigned int numBytesRead = 0;
-    	while (numBytesRead < dataLength) {
-
-        int bytesAvailable = m_BGZF->BlockLength - m_BGZF->BlockOffset;
-        if (bytesAvailable <= 0) {
-            if ( BgzfReadBlock() != 0 ) { return -1; }
-            bytesAvailable = m_BGZF->BlockLength - m_BGZF->BlockOffset;
-            if ( bytesAvailable <= 0 ) { break; }
-        }
-
-	char* buffer   = m_BGZF->UncompressedBlock;
-        int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
-        memcpy(output, buffer + m_BGZF->BlockOffset, copyLength);
-
-        m_BGZF->BlockOffset += copyLength;
-        output             += copyLength;
-        numBytesRead       += copyLength;
-    }
-
-    if ( m_BGZF->BlockOffset == m_BGZF->BlockLength ) {
-	m_BGZF->BlockAddress = ftello(m_BGZF->Stream);						
-        m_BGZF->BlockOffset  = 0;
-        m_BGZF->BlockLength  = 0;
-    }
-
-	return numBytesRead;
-}
-
-int BamReader::BgzfReadBlock(void) {
-
-    char    header[BLOCK_HEADER_LENGTH];
-    int64_t blockAddress = ftello(m_BGZF->Stream);
-
-    int count = fread(header, 1, sizeof(header), m_BGZF->Stream);
-	if (count == 0) {
-        m_BGZF->BlockLength = 0;
-        return 0;
-    }
-
-    if (count != sizeof(header)) {
-        printf("read block failed - count != sizeof(header)\n");
-        return -1;
-    }
-
-    if (!BgzfCheckBlockHeader(header)) {
-        printf("read block failed - CheckBgzfBlockHeader() returned false\n");
-        return -1;
-    }
-
-    int blockLength = BgzfUnpackUnsignedShort(&header[16]) + 1;
-    char* compressedBlock = m_BGZF->CompressedBlock;
-    memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
-    int remaining = blockLength - BLOCK_HEADER_LENGTH;
-
-    count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, m_BGZF->Stream);
-    if (count != remaining) {
-        printf("read block failed - count != remaining\n");
-        return -1;
-    }
-
-    count = BgzfInflateBlock(blockLength);
-    if (count < 0) { return -1; }
-
-    if (m_BGZF->BlockLength != 0) {
-        m_BGZF->BlockOffset = 0;
-    }
-
-    m_BGZF->BlockAddress = blockAddress;
-    m_BGZF->BlockLength  = count;
-    return 0;
-}
-
-// move file pointer to specified offset
-bool BamReader::BgzfSeek(int64_t position) {
-
-	int     blockOffset  = (position & 0xFFFF);
-    	int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
-    	if (fseeko(m_BGZF->Stream, blockAddress, SEEK_SET) != 0) {
-        printf("ERROR: Unable to seek in BAM file\n");
-		exit(1);
-    	}
-
-    	m_BGZF->BlockLength  = 0;
-    	m_BGZF->BlockAddress = blockAddress;
-    	m_BGZF->BlockOffset  = blockOffset;
-	return true;
-}
-
-// get file position in BAM file
-int64_t BamReader::BgzfTell(void) {
-	return ( (m_BGZF->BlockAddress << 16) | (m_BGZF->BlockOffset & 0xFFFF) );
-}
-
-int BamReader::BinsFromRegion(int refID, unsigned int left, uint16_t list[MAX_BIN]) {
-
-	// get region boundaries
-	uint32_t begin = left;
-	uint32_t end   = m_references.at(refID).RefLength - 1;
-
-	// initialize list, bin '0' always a valid bin
-	int i = 0;
-	list[i++] = 0;
-
-	// get rest of bins that contain this region
-	unsigned int k;
-	for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { list[i++] = k; }
-	for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { list[i++] = k; }
-	for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { list[i++] = k; }
-	for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { list[i++] = k; }
-	for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { list[i++] = k; }
-	
-	// return number of bins stored
-	return i;
-}
-
-unsigned int BamReader::CalculateAlignmentEnd(const unsigned int& position, const vector<CigarOp>& cigarData) {
-
-	// initialize alignment end to starting position
-	unsigned int alignEnd = position;
-
-	// iterate over cigar operations
-	vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
-	vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
-	for ( ; cigarIter != cigarEnd; ++cigarIter) {
-		char cigarType = (*cigarIter).Type;
-		if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
-			alignEnd += (*cigarIter).Length;
-		}
-	}
-	return alignEnd;
-}
-
-void BamReader::ClearIndex(void) {
-
-	if ( m_index ) {
-		// iterate over references
-		vector<RefIndex*>::iterator refIter = m_index->begin();
-		vector<RefIndex*>::iterator refEnd  = m_index->end();
-		for ( ; refIter != refEnd; ++refIter) {
-			RefIndex* aRef = (*refIter);
-			if ( aRef ) {
-				// clear out BAM bins
-				if ( aRef->first ) {
-					BinVector::iterator binIter = (aRef->first)->begin();
-					BinVector::iterator binEnd  = (aRef->first)->end();
-					for ( ; binIter != binEnd; ++binIter ) {
-						ChunkVector* chunks = (*binIter).second;
-						if ( chunks ) { delete chunks; chunks = NULL;}
-					}
-					delete aRef->first;
-					aRef->first = NULL;
-				}
-				// clear BAM linear offsets
-				if ( aRef->second ) { delete aRef->second; aRef->second = NULL; }
-				delete aRef;
-				aRef = NULL;
-			}
-		}
-		delete m_index;
-		m_index = NULL;
-	}
-}
-
-// closes the BAM file
-void BamReader::Close(void) {
-	
-	if (m_BGZF!=NULL && m_BGZF->IsOpen) { 
-		BgzfClose();
-		delete m_BGZF;
-		m_BGZF = NULL; 
-	}	
-	ClearIndex();
-	m_headerText.clear();
-	m_isRegionSpecified = false;
-}
-
-const string BamReader::GetHeaderText(void) const {
-	return m_headerText;
-}
-
-const int BamReader::GetReferenceCount(void) const {
-	return m_references.size();
-}
-
-const RefVector BamReader::GetReferenceData(void) const {
-	return m_references;
-}
-
-const int BamReader::GetReferenceID(const string& refName) const {
-
-	// retrieve names from reference data
-	vector<string> refNames;
-	RefVector::const_iterator refIter = m_references.begin();
-    RefVector::const_iterator refEnd  = m_references.end();
-    for ( ; refIter != refEnd; ++refIter) {
-		refNames.push_back( (*refIter).RefName );
-    }
-
-	// return 'index-of' refName ( if not found, returns refNames.size() )
-	return Index( refNames.begin(), refNames.end(), refName );
-}
-
-// get next alignment (from specified region, if given)
-bool BamReader::GetNextAlignment(BamAlignment& bAlignment) {
-
-	// if valid alignment available
-	if ( LoadNextAlignment(bAlignment) ) {
-		
-		// if region not specified, return success
-		if ( !m_isRegionSpecified ) { return true; }
-		
-		// load next alignment until region overlap is found
-		while ( !IsOverlap(bAlignment) ) {
-			// if no valid alignment available (likely EOF) return failure
-			if ( !LoadNextAlignment(bAlignment) ) { return false; }
-		}
-		
-		// return success (alignment found that overlaps region)
-		return true;
-	} 
-	
-	// no valid alignment
-	else { return false; }
-}
-
-int64_t BamReader::GetOffset(int refID, unsigned int left) {
-
-	// calculate which bins overlap this region
-	uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
-	int numBins = BinsFromRegion(refID, left, bins);
-
-	// get bins for this reference
-	RefIndex* refIndex = m_index->at(refID);
-	BinVector* refBins = refIndex->first;
-
-	// get minimum offset to consider
-	LinearOffsetVector* linearOffsets = refIndex->second;
-	uint64_t minOffset = ((left>>BAM_LIDX_SHIFT) >= linearOffsets->size()) ? 0 : linearOffsets->at(left>>BAM_LIDX_SHIFT);
-
-	// store offsets to beginning of alignment 'chunks' 
-	std::vector<int64_t> chunkStarts;
-	
-	// reference bin iterators
-	BinVector::const_iterator binIter;
-	BinVector::const_iterator binBegin = refBins->begin();
-	BinVector::const_iterator binEnd   = refBins->end();
-	
-	// store all alignment 'chunk' starts for bins in this region
-	for (int i = 0; i < numBins; ++i ) {
-		binIter = lower_bound(binBegin, binEnd, bins[i], LookupKeyCompare<uint32_t, ChunkVector*>() );
-		if ( (binIter != binEnd) && ( (*binIter).first == bins[i]) ) {
-			ChunkVector* binChunks = (*binIter).second;
-			ChunkVector::const_iterator chunkIter = binChunks->begin();
-			ChunkVector::const_iterator chunkEnd  = binChunks->end();
-			for ( ; chunkIter != chunkEnd; ++chunkIter) {
-				if ( (*chunkIter).second > minOffset ) {
-					chunkStarts.push_back( (*chunkIter).first );
-				}	
-			}
-		}
-	}
-	
-	// clean up memory
-	free(bins);
-	
-	// if no alignments found
-	if ( chunkStarts.empty() ) { return -1; }
-	
-	// else return smallest offset for alignment starts 
-	else { return *min_element(chunkStarts.begin(), chunkStarts.end()); }
-}
-
-bool BamReader::IsOverlap(BamAlignment& bAlignment) {
-
-	// if on different reference sequence, quit
-	if ( bAlignment.RefID != m_currentRefID ) { return false; }
-
-	// read starts after left boundary
-	if ( bAlignment.Position >= (signed int) m_currentLeft) { return true; }
-
-	// return whether alignment end overlaps left boundary
-	return ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) >= m_currentLeft );
-}
-
-bool BamReader::Jump(int refID, unsigned int position) {
-
-	// if index available, and region is valid
-	if ( (m_index->size() != 0) && m_references.at(refID).RefHasAlignments && (position <= m_references.at(refID).RefLength) ) { 
-		m_currentRefID = refID;
-		m_currentLeft  = position;
-		m_isRegionSpecified = true;
-		
-		int64_t offset = GetOffset(m_currentRefID, m_currentLeft);
-		if ( offset == -1 ) { return false; }
-		else { return BgzfSeek(offset); }
-	}
-	return false;
-}
-
-void BamReader::LoadHeaderData(void) {
-	
-	// check to see if proper BAM header
-	char buffer[4];
-	if (BgzfRead(buffer, 4) != 4) { 
-		printf("Could not read header type\n");
-		exit(1); 
-	}
-
-	if (strncmp(buffer, "BAM\001", 4)) {
-		printf("wrong header type!\n");
-		exit(1);
-	}
-	
-	// get BAM header text length
-	BgzfRead(buffer, 4);
-	const unsigned int headerTextLength = BgzfUnpackUnsignedInt(buffer);
-
-	// get BAM header text
-	char* headerText = (char*)calloc(headerTextLength + 1, 1);
-	BgzfRead(headerText, headerTextLength);
-	m_headerText = (string)((const char*)headerText);
-	
-	// clean up calloc-ed temp variable
-	free(headerText);
-}
-
-void BamReader::LoadIndexData(FILE* indexStream) {
-
-	// see if index is valid BAM index
-	char magic[4];
-	fread(magic, 1, 4, indexStream);
-	if (strncmp(magic, "BAI\1", 4)) {
-		printf("Problem with index file - invalid format.\n");
-		fclose(indexStream);
-		exit(1);
-	}
-
-	// get number of reference sequences
-	uint32_t numRefSeqs;
-	fread(&numRefSeqs, 4, 1, indexStream);
-	
-	// intialize BamIndex data structure
-	m_index = new BamIndex;
-	m_index->reserve(numRefSeqs);
-
-	// iterate over reference sequences
-	for (unsigned int i = 0; i < numRefSeqs; ++i) {
-		
-		// get number of bins for this reference sequence
-		int32_t numBins;
-		fread(&numBins, 4, 1, indexStream);
-		
-		if (numBins > 0) { m_references.at(i).RefHasAlignments = true; }
-
-		// intialize BinVector
-		BinVector* bins = new BinVector;
-		bins->reserve(numBins);
-		
-		// iterate over bins for that reference sequence
-		for (int j = 0; j < numBins; ++j) {
-			
-			// get binID 
-			uint32_t binID;
-			fread(&binID, 4, 1, indexStream);
-			
-			// get number of regionChunks in this bin
-			uint32_t numChunks;
-			fread(&numChunks, 4, 1, indexStream);
-			
-			// intialize ChunkVector
-			ChunkVector* regionChunks = new ChunkVector;
-			regionChunks->reserve(numChunks);
-			
-			// iterate over regionChunks in this bin
-			for (unsigned int k = 0; k < numChunks; ++k) {
-				
-				// get chunk boundaries (left, right) 
-				uint64_t left;
-				uint64_t right;
-				fread(&left, 8, 1, indexStream);
-				fread(&right, 8, 1, indexStream);
-				
-				// save ChunkPair
-				regionChunks->push_back( ChunkPair(left, right) );
-			}
-			
-			// sort chunks for this bin
-			sort( regionChunks->begin(), regionChunks->end(), LookupKeyCompare<uint64_t, uint64_t>() );
-
-			// save binID, chunkVector for this bin
-			bins->push_back( BamBin(binID, regionChunks) );
-		}
-		
-		// sort bins by binID
-		sort(bins->begin(), bins->end(), LookupKeyCompare<uint32_t, ChunkVector*>() );
-
-		// load linear index for this reference sequence
-		
-		// get number of linear offsets
-		int32_t numLinearOffsets;
-		fread(&numLinearOffsets, 4, 1, indexStream);
-		
-		// intialize LinearOffsetVector
-		LinearOffsetVector* linearOffsets = new LinearOffsetVector;
-		linearOffsets->reserve(numLinearOffsets);
-		
-		// iterate over linear offsets for this reference sequeence
-		for (int j = 0; j < numLinearOffsets; ++j) {
-			// get a linear offset
-			uint64_t linearOffset;
-			fread(&linearOffset, 8, 1, indexStream);
-			// store linear offset
-			linearOffsets->push_back(linearOffset);
-		}
-		
-		// sort linear offsets
-		sort( linearOffsets->begin(), linearOffsets->end() );
-
-		// store index data for that reference sequence
-		m_index->push_back( new RefIndex(bins, linearOffsets) );
-	}
-	
-	// close index file (.bai) and return
-	fclose(indexStream);
-}
-
-bool BamReader::LoadNextAlignment(BamAlignment& bAlignment) {
-
-	// read in the 'block length' value, make sure it's not zero
-	char buffer[4];
-	BgzfRead(buffer, 4);
-	const unsigned int blockLength = BgzfUnpackUnsignedInt(buffer);
-	if ( blockLength == 0 ) { return false; }
-
-	// keep track of bytes read as method progresses
-	int bytesRead = 4;
-
-	// read in core alignment data, make sure the right size of data was read 
-	char x[BAM_CORE_SIZE];
-	if ( BgzfRead(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }
-	bytesRead += BAM_CORE_SIZE;
-
-	// set BamAlignment 'core' data and character data lengths
-	unsigned int tempValue;
-	unsigned int queryNameLength;
-	unsigned int numCigarOperations;
-	unsigned int querySequenceLength;
-
-	//bAlignment.RefID    = BgzfUnpackUnsignedInt(&x[0]);
-	//bAlignment.Position = BgzfUnpackUnsignedInt(&x[4]);
-	bAlignment.RefID    = BgzfUnpackSignedInt(&x[0]);
-	bAlignment.Position = BgzfUnpackSignedInt(&x[4]);
-
-	tempValue             = BgzfUnpackUnsignedInt(&x[8]);		
-	bAlignment.Bin        = tempValue >> 16;
-	bAlignment.MapQuality = tempValue >> 8 & 0xff;
-	queryNameLength       = tempValue & 0xff;
-
-	tempValue                = BgzfUnpackUnsignedInt(&x[12]);	
-	bAlignment.AlignmentFlag = tempValue >> 16;
-	numCigarOperations       = tempValue & 0xffff;
-
-	querySequenceLength     = BgzfUnpackUnsignedInt(&x[16]);
-	//bAlignment.MateRefID    = BgzfUnpackUnsignedInt(&x[20]);
-	//bAlignment.MatePosition = BgzfUnpackUnsignedInt(&x[24]);
-	//bAlignment.InsertSize   = BgzfUnpackUnsignedInt(&x[28]);
-	bAlignment.MateRefID    = BgzfUnpackSignedInt(&x[20]);
-	bAlignment.MatePosition = BgzfUnpackSignedInt(&x[24]);
-	bAlignment.InsertSize   = BgzfUnpackSignedInt(&x[28]);
-
-	// calculate lengths/offsets
-	const unsigned int dataLength      = blockLength - BAM_CORE_SIZE;
-	const unsigned int cigarDataOffset = queryNameLength;
-	const unsigned int seqDataOffset   = cigarDataOffset + (numCigarOperations * 4);
-	const unsigned int qualDataOffset  = seqDataOffset + (querySequenceLength+1)/2;
-	const unsigned int tagDataOffset   = qualDataOffset + querySequenceLength;
-	const unsigned int tagDataLen      = dataLength - tagDataOffset;
-	
-	// set up destination buffers for character data
-	char* allCharData   = (char*)calloc(sizeof(char), dataLength);
-	uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
-	char* seqData       = ((char*)allCharData) + seqDataOffset;
-	char* qualData      = ((char*)allCharData) + qualDataOffset;
-	char* tagData       = ((char*)allCharData) + tagDataOffset;
-	
-	// get character data - make sure proper data size was read
-	if ( BgzfRead(allCharData, dataLength) != dataLength) { return false; }
-	else {
-		
-		bytesRead += dataLength;
-		
-		// clear out any previous string data
-		bAlignment.Name.clear();
-		bAlignment.QueryBases.clear();
-		bAlignment.Qualities.clear();
-		bAlignment.AlignedBases.clear();
-		bAlignment.CigarData.clear();
-		bAlignment.TagData.clear();
-		
-		// save name
-		bAlignment.Name = (string)((const char*)(allCharData));
-		
-		// save query sequence
-		for (unsigned int i = 0; i < querySequenceLength; ++i) { 	
-			char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
-			bAlignment.QueryBases.append( 1, singleBase );
-		}
-		
-		// save sequence length
-		bAlignment.Length = bAlignment.QueryBases.length();
-		
-		// save qualities
-		for (unsigned int i = 0; i < querySequenceLength; ++i) { 
-			char singleQuality = (char)(qualData[i]+33);			// conversion from QV to FASTQ character
-			bAlignment.Qualities.append( 1, singleQuality );
-		}
-		
-		// save CIGAR-related data;
-		int k = 0;
-		for (unsigned int i = 0; i < numCigarOperations; ++i) {
-			
-			// build CigarOp struct
-			CigarOp op;
-			op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);
-			op.Type   = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];
-			
-			// save CigarOp
-			bAlignment.CigarData.push_back(op);
-			
-			// build AlignedBases string
-			switch (op.Type) {
-				
-				case ('M') : 
-				case ('I') : bAlignment.AlignedBases.append( bAlignment.QueryBases.substr(k, op.Length) ); 	// for 'M', 'I' - write bases
-				case ('S') : k += op.Length;                                                     			// for 'S' - skip over query bases
-							 break;
-							 
-				case ('D') : bAlignment.AlignedBases.append( op.Length, '-' );	// for 'D' - write gap character
-							 break;
-							
-				case ('P') : bAlignment.AlignedBases.append( op.Length, '*' );	// for 'P' - write padding character;
-							 break;
-							 
-				case ('N') : bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in query sequence
-							 k += op.Length;
-							 break;
-							 
-				case ('H') : break; 											// for 'H' - do nothing, move to next op
-							 
-				default    : printf("ERROR: Invalid Cigar op type\n");			// shouldn't get here
-							 exit(1);
-			}
-		}
-		
-		// read in the tag data
-		bAlignment.TagData.resize(tagDataLen);
-		memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLen);
-	}
-
-	free(allCharData);
-	return true;
-}
-
-void BamReader::LoadReferenceData(void) {
-
-	// get number of reference sequences
-	char buffer[4];
-	BgzfRead(buffer, 4);
-	const unsigned int numberRefSeqs = BgzfUnpackUnsignedInt(buffer);
-	if (numberRefSeqs == 0) { return; }
-	m_references.reserve((int)numberRefSeqs);
-	
-	// iterate over all references in header
-	for (unsigned int i = 0; i != numberRefSeqs; ++i) {
-
-		// get length of reference name
-		BgzfRead(buffer, 4);
-		const unsigned int refNameLength = BgzfUnpackUnsignedInt(buffer);
-		char* refName = (char*)calloc(refNameLength, 1);
-		
-		// get reference name and reference sequence length
-		BgzfRead(refName, refNameLength);
-		BgzfRead(buffer, 4);
-		const unsigned int refLength = BgzfUnpackUnsignedInt(buffer);
-		
-		// store data for reference
-		RefData aReference;
-		aReference.RefName   = (string)((const char*)refName);
-		aReference.RefLength = refLength;
-		m_references.push_back(aReference);
-		
-		// clean up calloc-ed temp variable
-		free(refName);
-	}
-}
-
-// opens BAM file (and index)
-void BamReader::Open(const string& filename, const string& indexFilename) {
-
-	// open the BGZF file for reading, retrieve header text & reference data
-	m_BGZF = new BgzfData;
-	BgzfOpen(filename);
-	LoadHeaderData();	
-	LoadReferenceData();
-
-	// store file offset of first alignment
-	m_alignmentsBeginOffset = BgzfTell();
-
-	// open index file & load index data (if exists)
-	OpenIndex(indexFilename);
-}
-
-void BamReader::OpenIndex(const string& indexFilename) {
-
-	// if index file exists
-	if (!indexFilename.empty()) {
-
-		// open index
-		FILE* indexStream = fopen(indexFilename.c_str(), "rb");
-		
-		// abort on error
-		if(!indexStream) {
-			printf("ERROR: Unable to open the BAM index file %s for reading.\n", indexFilename.c_str() );
-			exit(1);
-		}
-	
-		// build up index data structure
-		LoadIndexData(indexStream);
-	}
-}
-
-bool BamReader::Rewind(void) {
-
-	// find first reference that has alignments in the BAM file
-	int refID = 0;
-	int refCount = m_references.size();
-	for ( ; refID < refCount; ++refID ) {
-		if ( m_references.at(refID).RefHasAlignments ) { break; } 
-	}
-
-	// store default bounds for first alignment
-	m_currentRefID = refID;
-	m_currentLeft = 0;
-	m_isRegionSpecified = false;
-
-	// return success/failure of seek
-	return BgzfSeek(m_alignmentsBeginOffset);
-}	
+// ***************************************************************************
+// BamReader.cpp (c) 2009 Derek Barnett, Michael Strömberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+// C++ includes
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+// BamTools includes
+#include "BGZF.h"
+#include "BamReader.h"
+using namespace BamTools;
+using namespace std;
+
+struct BamReader::BamReaderPrivate {
+
+    // -------------------------------
+    // data members
+    // -------------------------------
+
+    // general data
+    BgzfData  mBGZF;
+    string    HeaderText;
+    BamIndex  Index;
+    RefVector References;
+    bool      IsIndexLoaded;
+    int64_t   AlignmentsBeginOffset;
+    string    Filename;
+    string    IndexFilename;
+
+    // user-specified region values
+    bool         IsRegionSpecified;
+    int          CurrentRefID;
+    unsigned int CurrentLeft;
+
+    // BAM character constants
+    const char* DNA_LOOKUP;
+    const char* CIGAR_LOOKUP;
+
+    // -------------------------------
+    // constructor & destructor
+    // -------------------------------
+    BamReaderPrivate(void);
+    ~BamReaderPrivate(void);
+
+    // -------------------------------
+    // "public" interface
+    // -------------------------------
+
+    // flie operations
+    void Close(void);
+    bool Jump(int refID, unsigned int position = 0);
+    void Open(const string& filename, const string& indexFilename = "");
+    bool Rewind(void);
+
+    // access alignment data
+    bool GetNextAlignment(BamAlignment& bAlignment);
+
+    // access auxiliary data
+    const string GetHeaderText(void) const;
+    const int GetReferenceCount(void) const;
+    const RefVector GetReferenceData(void) const;
+    const int GetReferenceID(const string& refName) const;
+
+    // index operations
+    bool CreateIndex(void);
+
+    // -------------------------------
+    // internal methods
+    // -------------------------------
+
+    // *** reading alignments and auxiliary data *** //
+
+    // calculate bins that overlap region ( left to reference end for now )
+    int BinsFromRegion(int, unsigned int, uint16_t[MAX_BIN]);
+    // calculates alignment end position based on starting position and provided CIGAR operations
+    unsigned int CalculateAlignmentEnd(const unsigned int& position, const std::vector<CigarOp>& cigarData);
+    // calculate file offset for first alignment chunk overlapping 'left'
+    int64_t GetOffset(int refID, unsigned int left);
+    // checks to see if alignment overlaps current region
+    bool IsOverlap(BamAlignment& bAlignment);
+    // retrieves header text from BAM file
+    void LoadHeaderData(void);
+    // retrieves BAM alignment under file pointer
+    bool LoadNextAlignment(BamAlignment& bAlignment);
+    // builds reference data structure from BAM file
+    void LoadReferenceData(void);
+
+    // *** index file handling *** //
+
+    // calculates index for BAM file
+    bool BuildIndex(void);
+    // clear out inernal index data structure
+    void ClearIndex(void);
+    // saves BAM bin entry for index
+    void InsertBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset);
+    // saves linear offset entry for index
+    void InsertLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset);
+    // loads index from BAM index file
+    bool LoadIndex(void);
+    // simplifies index by merging 'chunks'
+    void MergeChunks(void);
+    // round-up 32-bit integer to next power-of-2
+    void Roundup32(int& value);
+    // saves index to BAM index file
+    bool WriteIndex(void);
+};
+
+// -----------------------------------------------------
+// BamReader implementation (wrapper around BRPrivate)
+// -----------------------------------------------------
+
+// constructor
+BamReader::BamReader(void) {
+    d = new BamReaderPrivate;
+}
+
+// destructor
+BamReader::~BamReader(void) {
+    delete d;
+    d = 0;
+}
+
+// file operations
+void BamReader::Close(void) { d->Close(); }
+bool BamReader::Jump(int refID, unsigned int position) { return d->Jump(refID, position); }
+void BamReader::Open(const string& filename, const string& indexFilename) { d->Open(filename, indexFilename); }
+bool BamReader::Rewind(void) { return d->Rewind(); }
+
+// access alignment data
+bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }
+
+// access auxiliary data
+const string    BamReader::GetHeaderText(void) const { return d->HeaderText; }
+const int       BamReader::GetReferenceCount(void) const { return d->References.size(); }
+const RefVector BamReader::GetReferenceData(void) const { return d->References; }
+const int       BamReader::GetReferenceID(const string& refName) const { return d->GetReferenceID(refName); }
+
+// index operations
+bool BamReader::CreateIndex(void) { return d->CreateIndex(); }
+
+// -----------------------------------------------------
+// BamReaderPrivate implementation
+// -----------------------------------------------------
+
+// constructor
+BamReader::BamReaderPrivate::BamReaderPrivate(void)
+    : IsIndexLoaded(false)
+    , AlignmentsBeginOffset(0)
+    , IsRegionSpecified(false)
+    , CurrentRefID(0)
+    , CurrentLeft(0)
+    , DNA_LOOKUP("=ACMGRSVTWYHKDBN")
+    , CIGAR_LOOKUP("MIDNSHP")
+{ }
+
+// destructor
+BamReader::BamReaderPrivate::~BamReaderPrivate(void) {
+    Close();
+}
+
+// calculate bins that overlap region ( left to reference end for now )
+int BamReader::BamReaderPrivate::BinsFromRegion(int refID, unsigned int left, uint16_t list[MAX_BIN]) {
+
+    // get region boundaries
+    uint32_t begin = left;
+    uint32_t end   = References.at(refID).RefLength - 1;
+
+    // initialize list, bin '0' always a valid bin
+    int i = 0;
+    list[i++] = 0;
+
+    // get rest of bins that contain this region
+    unsigned int k;
+    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { list[i++] = k; }
+    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { list[i++] = k; }
+    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { list[i++] = k; }
+    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { list[i++] = k; }
+    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { list[i++] = k; }
+
+    // return number of bins stored
+    return i;
+}
+
+// populates BAM index data structure from BAM file data
+bool BamReader::BamReaderPrivate::BuildIndex(void) {
+
+    // check to be sure file is open
+    if (!mBGZF.IsOpen) { return false; }
+
+    // move file pointer to beginning of alignments
+    Rewind();
+
+    // get reference count, reserve index space
+    int numReferences = References.size();
+    for ( int i = 0; i < numReferences; ++i ) {
+        Index.push_back(ReferenceIndex());
+    }
+
+    // sets default constant for bin, ID, offset, coordinate variables
+    const uint32_t defaultValue = 0xffffffffu;
+
+    // bin data
+    uint32_t saveBin(defaultValue);
+    uint32_t lastBin(defaultValue);
+
+    // reference ID data
+    int32_t saveRefID(defaultValue);
+    int32_t lastRefID(defaultValue);
+
+    // offset data
+    uint64_t saveOffset = mBGZF.Tell();
+    uint64_t lastOffset = saveOffset;
+
+    // coordinate data
+    int32_t lastCoordinate = defaultValue;
+
+    BamAlignment bAlignment;
+    while( GetNextAlignment(bAlignment) ) {
+
+        // change of chromosome, save ID, reset bin
+        if ( lastRefID != bAlignment.RefID ) {
+            lastRefID = bAlignment.RefID;
+            lastBin   = defaultValue;
+        }
+
+        // if lastCoordinate greater than BAM position - file not sorted properly
+        else if ( lastCoordinate > bAlignment.Position ) {
+            printf("BAM file not properly sorted:\n");
+            printf("Alignment %s : %u > %u on reference (id = %d)", bAlignment.Name.c_str(), lastCoordinate, bAlignment.Position, bAlignment.RefID);
+            exit(1);
+        }
+
+        // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
+        if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
+
+            // save linear offset entry (matched to BAM entry refID)
+            ReferenceIndex& refIndex = Index.at(bAlignment.RefID);
+            LinearOffsetVector& offsets = refIndex.Offsets;
+            InsertLinearOffset(offsets, bAlignment, lastOffset);
+        }
+
+        // if current BamAlignment bin != lastBin, "then possibly write the binning index"
+        if ( bAlignment.Bin != lastBin ) {
+
+            // if not first time through
+            if ( saveBin != defaultValue ) {
+
+                // save Bam bin entry
+                ReferenceIndex& refIndex = Index.at(saveRefID);
+                BamBinMap& binMap = refIndex.Bins;
+                InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
+            }
+
+            // update saveOffset
+            saveOffset = lastOffset;
+
+            // update bin values
+            saveBin = bAlignment.Bin;
+            lastBin = bAlignment.Bin;
+
+            // update saveRefID
+            saveRefID = bAlignment.RefID;
+
+            // if invalid RefID, break out (why?)
+            if ( saveRefID < 0 ) { break; }
+        }
+
+        // make sure that current file pointer is beyond lastOffset
+        if ( mBGZF.Tell() <= (int64_t)lastOffset  ) {
+            printf("Error in BGZF offsets.\n");
+            exit(1);
+        }
+
+        // update lastOffset
+        lastOffset = mBGZF.Tell();
+
+        // update lastCoordinate
+        lastCoordinate = bAlignment.Position;
+    }
+
+    // save any leftover BAM data (as long as refID is valid)
+    if ( saveRefID >= 0 ) {
+        // save Bam bin entry
+        ReferenceIndex& refIndex = Index.at(saveRefID);
+        BamBinMap& binMap = refIndex.Bins;
+        InsertBinEntry(binMap, saveBin, saveOffset, lastOffset);
+    }
+
+    // simplify index by merging chunks
+    MergeChunks();
+
+    // iterate over references
+    BamIndex::iterator indexIter = Index.begin();
+    BamIndex::iterator indexEnd  = Index.end();
+    for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
+
+        // get reference index data
+        ReferenceIndex& refIndex = (*indexIter);
+        BamBinMap& binMap = refIndex.Bins;
+        LinearOffsetVector& offsets = refIndex.Offsets;
+
+        // store whether reference has alignments or no
+        References[i].RefHasAlignments = ( binMap.size() > 0 );
+
+        // sort linear offsets
+        sort(offsets.begin(), offsets.end());
+    }
+
+
+    // rewind file pointer to beginning of alignments, return success/fail
+    return Rewind();
+}
+
+// calculates alignment end position based on starting position and provided CIGAR operations
+unsigned int BamReader::BamReaderPrivate::CalculateAlignmentEnd(const unsigned int& position, const vector<CigarOp>& cigarData) {
+
+    // initialize alignment end to starting position
+    unsigned int alignEnd = position;
+
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
+    vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+        char cigarType = (*cigarIter).Type;
+        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
+            alignEnd += (*cigarIter).Length;
+        }
+    }
+    return alignEnd;
+}
+
+
+// clear index data structure
+void BamReader::BamReaderPrivate::ClearIndex(void) {
+    Index.clear(); // sufficient ??
+}
+
+// closes the BAM file
+void BamReader::BamReaderPrivate::Close(void) {
+    mBGZF.Close();
+    ClearIndex();
+    HeaderText.clear();
+    IsRegionSpecified = false;
+}
+
+// create BAM index from BAM file (keep structure in memory) and write to default index output file
+bool BamReader::BamReaderPrivate::CreateIndex(void) {
+
+    // clear out index
+    ClearIndex();
+
+    bool ok = true;
+    ok &= BuildIndex();
+    ok &= WriteIndex();
+    return ok;
+}
+
+// returns RefID for given RefName (returns References.size() if not found)
+const int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // retrieve names from reference data
+    vector<string> refNames;
+    RefVector::const_iterator refIter = References.begin();
+    RefVector::const_iterator refEnd  = References.end();
+    for ( ; refIter != refEnd; ++refIter) {
+        refNames.push_back( (*refIter).RefName );
+    }
+
+    // return 'index-of' refName ( if not found, returns refNames.size() )
+    return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+}
+
+// get next alignment (from specified region, if given)
+bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
+
+    // if valid alignment available
+    if ( LoadNextAlignment(bAlignment) ) {
+
+        // if region not specified, return success
+        if ( !IsRegionSpecified ) { return true; }
+
+        // load next alignment until region overlap is found
+        while ( !IsOverlap(bAlignment) ) {
+            // if no valid alignment available (likely EOF) return failure
+            if ( !LoadNextAlignment(bAlignment) ) { return false; }
+        }
+
+        // return success (alignment found that overlaps region)
+        return true;
+    }
+
+    // no valid alignment
+    else { return false; }
+}
+
+// calculate closest indexed file offset for region specified
+int64_t BamReader::BamReaderPrivate::GetOffset(int refID, unsigned int left) {
+
+    // calculate which bins overlap this region
+    uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
+    int numBins = BinsFromRegion(refID, left, bins);
+
+    // get bins for this reference
+    const ReferenceIndex& refIndex = Index.at(refID);
+    const BamBinMap& binMap        = refIndex.Bins;
+
+    // get minimum offset to consider
+    const LinearOffsetVector& offsets = refIndex.Offsets;
+    uint64_t minOffset = ( (left>>BAM_LIDX_SHIFT) >= offsets.size() ) ? 0 : offsets.at(left>>BAM_LIDX_SHIFT);
+
+    // store offsets to beginning of alignment 'chunks'
+    std::vector<int64_t> chunkStarts;
+
+    // store all alignment 'chunk' starts for bins in this region
+    for (int i = 0; i < numBins; ++i ) {
+        uint16_t binKey = bins[i];
+
+        map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
+        if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
+
+            const ChunkVector& chunks = (*binIter).second;
+            std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
+            std::vector<Chunk>::const_iterator chunksEnd  = chunks.end();
+            for ( ; chunksIter != chunksEnd; ++chunksIter) {
+                const Chunk& chunk = (*chunksIter);
+                if ( chunk.Stop > minOffset ) {
+                    chunkStarts.push_back( chunk.Start );
+                }
+            }
+        }
+    }
+
+    // clean up memory
+    free(bins);
+
+    // if no alignments found, else return smallest offset for alignment starts
+    if ( chunkStarts.size() == 0 ) { return -1; }
+    else { return *min_element(chunkStarts.begin(), chunkStarts.end()); }
+}
+
+// saves BAM bin entry for index
+void BamReader::BamReaderPrivate::InsertBinEntry(BamBinMap&      binMap,
+                                                 const uint32_t& saveBin,
+                                                 const uint64_t& saveOffset,
+                                                 const uint64_t& lastOffset)
+{
+    // look up saveBin
+    BamBinMap::iterator binIter = binMap.find(saveBin);
+
+    // create new chunk
+    Chunk newChunk(saveOffset, lastOffset);
+
+    // if entry doesn't exist
+    if ( binIter == binMap.end() ) {
+        ChunkVector newChunks;
+        newChunks.push_back(newChunk);
+        binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
+    }
+
+    // otherwise
+    else {
+        ChunkVector& binChunks = (*binIter).second;
+        binChunks.push_back( newChunk );
+    }
+}
+
+// saves linear offset entry for index
+void BamReader::BamReaderPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
+                                                     const BamAlignment& bAlignment,
+                                                     const uint64_t&     lastOffset)
+{
+    // get converted offsets
+    int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
+    int endOffset   = ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) - 1) >> BAM_LIDX_SHIFT;
+
+    // resize vector if necessary
+    int oldSize = offsets.size();
+    int newSize = endOffset + 1;
+    if ( oldSize < newSize ) {        
+        Roundup32(newSize);
+        offsets.resize(newSize, 0);
+    }
+
+    // store offset
+    for(int i = beginOffset + 1; i <= endOffset ; ++i) {
+        if ( offsets[i] == 0) {
+            offsets[i] = lastOffset;
+        }
+    }
+}
+
+// returns whether alignment overlaps currently specified region (refID, leftBound)
+bool BamReader::BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {
+
+    // if on different reference sequence, quit
+    if ( bAlignment.RefID != CurrentRefID ) { return false; }
+
+    // read starts after left boundary
+    if ( bAlignment.Position >= (int32_t)CurrentLeft) { return true; }
+
+    // return whether alignment end overlaps left boundary
+    return ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) >= CurrentLeft );
+}
+
+// jumps to specified region(refID, leftBound) in BAM file, returns success/fail
+bool BamReader::BamReaderPrivate::Jump(int refID, unsigned int position) {
+
+    // if data exists for this reference and position is valid    
+    if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {
+
+		// set current region
+        CurrentRefID = refID;
+        CurrentLeft  = position;
+        IsRegionSpecified = true;
+
+		// calculate offset
+        int64_t offset = GetOffset(CurrentRefID, CurrentLeft);
+
+		// if in valid offset, return failure
+        if ( offset == -1 ) { return false; }
+
+		// otherwise return success of seek operation
+        else { return mBGZF.Seek(offset); }
+    }
+
+	// invalid jump request parameters, return failure
+    return false;
+}
+
+// load BAM header data
+void BamReader::BamReaderPrivate::LoadHeaderData(void) {
+
+    // check to see if proper BAM header
+    char buffer[4];
+    if (mBGZF.Read(buffer, 4) != 4) {
+        printf("Could not read header type\n");
+        exit(1);
+    }
+
+    if (strncmp(buffer, "BAM\001", 4)) {
+        printf("wrong header type!\n");
+        exit(1);
+    }
+
+    // get BAM header text length
+    mBGZF.Read(buffer, 4);
+    const unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer);
+
+    // get BAM header text
+    char* headerText = (char*)calloc(headerTextLength + 1, 1);
+    mBGZF.Read(headerText, headerTextLength);
+    HeaderText = (string)((const char*)headerText);
+
+    // clean up calloc-ed temp variable
+    free(headerText);
+}
+
+// load existing index data from BAM index file (".bai"), return success/fail
+bool BamReader::BamReaderPrivate::LoadIndex(void) {
+
+    // clear out index data
+    ClearIndex();
+
+    // skip if index file empty
+    if ( IndexFilename.empty() ) { return false; }
+
+    // open index file, abort on error
+    FILE* indexStream = fopen(IndexFilename.c_str(), "rb");
+    if(!indexStream) {
+        printf("ERROR: Unable to open the BAM index file %s for reading.\n", IndexFilename.c_str() );
+        return false;
+    }
+
+    // see if index is valid BAM index
+    char magic[4];
+    fread(magic, 1, 4, indexStream);
+    if (strncmp(magic, "BAI\1", 4)) {
+        printf("Problem with index file - invalid format.\n");
+        fclose(indexStream);
+        return false;
+    }
+
+    // get number of reference sequences
+    uint32_t numRefSeqs;
+    fread(&numRefSeqs, 4, 1, indexStream);
+
+    // intialize space for BamIndex data structure
+    Index.reserve(numRefSeqs);
+
+    // iterate over reference sequences
+    for (unsigned int i = 0; i < numRefSeqs; ++i) {
+
+        // get number of bins for this reference sequence
+        int32_t numBins;
+        fread(&numBins, 4, 1, indexStream);
+
+        if (numBins > 0) {
+            RefData& refEntry = References[i];
+            refEntry.RefHasAlignments = true;
+        }
+
+        // intialize BinVector
+        BamBinMap binMap;
+
+        // iterate over bins for that reference sequence
+        for (int j = 0; j < numBins; ++j) {
+
+            // get binID
+            uint32_t binID;
+            fread(&binID, 4, 1, indexStream);
+
+            // get number of regionChunks in this bin
+            uint32_t numChunks;
+            fread(&numChunks, 4, 1, indexStream);
+
+            // intialize ChunkVector
+            ChunkVector regionChunks;
+            regionChunks.reserve(numChunks);
+
+            // iterate over regionChunks in this bin
+            for (unsigned int k = 0; k < numChunks; ++k) {
+
+                // get chunk boundaries (left, right)
+                uint64_t left;
+                uint64_t right;
+                fread(&left, 8, 1, indexStream);
+                fread(&right, 8, 1, indexStream);
+
+                // save ChunkPair
+                regionChunks.push_back( Chunk(left, right) );
+            }
+
+            // sort chunks for this bin
+            sort( regionChunks.begin(), regionChunks.end(), ChunkLessThan );
+
+            // save binID, chunkVector for this bin
+            binMap.insert( pair<uint32_t, ChunkVector>(binID, regionChunks) );
+        }
+
+        // load linear index for this reference sequence
+
+        // get number of linear offsets
+        int32_t numLinearOffsets;
+        fread(&numLinearOffsets, 4, 1, indexStream);
+
+        // intialize LinearOffsetVector
+        LinearOffsetVector offsets;
+        offsets.reserve(numLinearOffsets);
+
+        // iterate over linear offsets for this reference sequeence
+        uint64_t linearOffset;
+        for (int j = 0; j < numLinearOffsets; ++j) {
+            // read a linear offset & store
+            fread(&linearOffset, 8, 1, indexStream);
+            offsets.push_back(linearOffset);
+        }
+
+        // sort linear offsets
+        sort( offsets.begin(), offsets.end() );
+
+        // store index data for that reference sequence
+        Index.push_back( ReferenceIndex(binMap, offsets) );
+    }
+
+    // close index file (.bai) and return
+    fclose(indexStream);
+    return true;
+}
+
+// populates BamAlignment with alignment data under file pointer, returns success/fail
+bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {
+
+    // read in the 'block length' value, make sure it's not zero
+    char buffer[4];
+    mBGZF.Read(buffer, 4);
+    const unsigned int blockLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( blockLength == 0 ) { return false; }
+
+    // keep track of bytes read as method progresses
+    int bytesRead = 4;
+
+    // read in core alignment data, make sure the right size of data was read
+    char x[BAM_CORE_SIZE];
+    if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }
+    bytesRead += BAM_CORE_SIZE;
+
+    // set BamAlignment 'core' data and character data lengths
+    unsigned int tempValue;
+    unsigned int queryNameLength;
+    unsigned int numCigarOperations;
+    unsigned int querySequenceLength;
+
+    bAlignment.RefID    = BgzfData::UnpackSignedInt(&x[0]);
+    bAlignment.Position = BgzfData::UnpackSignedInt(&x[4]);
+
+    tempValue             = BgzfData::UnpackUnsignedInt(&x[8]);
+    bAlignment.Bin        = tempValue >> 16;
+    bAlignment.MapQuality = tempValue >> 8 & 0xff;
+    queryNameLength       = tempValue & 0xff;
+
+    tempValue                = BgzfData::UnpackUnsignedInt(&x[12]);
+    bAlignment.AlignmentFlag = tempValue >> 16;
+    numCigarOperations       = tempValue & 0xffff;
+
+    querySequenceLength     = BgzfData::UnpackUnsignedInt(&x[16]);
+    bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);
+    bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);
+    bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);
+
+    // calculate lengths/offsets
+    const unsigned int dataLength      = blockLength - BAM_CORE_SIZE;
+    const unsigned int cigarDataOffset = queryNameLength;
+    const unsigned int seqDataOffset   = cigarDataOffset + (numCigarOperations * 4);
+    const unsigned int qualDataOffset  = seqDataOffset + (querySequenceLength+1)/2;
+    const unsigned int tagDataOffset   = qualDataOffset + querySequenceLength;
+    const unsigned int tagDataLen      = dataLength - tagDataOffset;
+
+    // set up destination buffers for character data
+    char* allCharData   = (char*)calloc(sizeof(char), dataLength);
+    uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
+    char* seqData       = ((char*)allCharData) + seqDataOffset;
+    char* qualData      = ((char*)allCharData) + qualDataOffset;
+    char* tagData       = ((char*)allCharData) + tagDataOffset;
+
+    // get character data - make sure proper data size was read
+    if ( mBGZF.Read(allCharData, dataLength) != (signed int)dataLength) { return false; }
+    else {
+
+        bytesRead += dataLength;
+
+        // clear out any previous string data
+        bAlignment.Name.clear();
+        bAlignment.QueryBases.clear();
+        bAlignment.Qualities.clear();
+        bAlignment.AlignedBases.clear();
+        bAlignment.CigarData.clear();
+        bAlignment.TagData.clear();
+
+        // save name
+        bAlignment.Name = (string)((const char*)(allCharData));
+
+        // save query sequence
+        for (unsigned int i = 0; i < querySequenceLength; ++i) {
+            char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+            bAlignment.QueryBases.append( 1, singleBase );
+        }
+
+        // save sequence length
+        bAlignment.Length = bAlignment.QueryBases.length();
+
+        // save qualities, convert from numeric QV to FASTQ character
+        for (unsigned int i = 0; i < querySequenceLength; ++i) {
+            char singleQuality = (char)(qualData[i]+33);
+            bAlignment.Qualities.append( 1, singleQuality );
+        }
+
+        // save CIGAR-related data;
+        int k = 0;
+        for (unsigned int i = 0; i < numCigarOperations; ++i) {
+
+            // build CigarOp struct
+            CigarOp op;
+            op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);
+            op.Type   = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];
+
+            // save CigarOp
+            bAlignment.CigarData.push_back(op);
+
+            // build AlignedBases string
+            switch (op.Type) {
+
+                case ('M') :
+                case ('I') : bAlignment.AlignedBases.append( bAlignment.QueryBases.substr(k, op.Length) ); // for 'M', 'I' - write bases
+                case ('S') : k += op.Length;                                                               // for 'S' - skip over query bases
+                             break;
+
+                case ('D') : bAlignment.AlignedBases.append( op.Length, '-' );	// for 'D' - write gap character
+                             break;
+
+                case ('P') : bAlignment.AlignedBases.append( op.Length, '*' );	// for 'P' - write padding character;
+                             break;
+
+                case ('N') : bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in query sequence
+                             k += op.Length;
+                             break;
+
+                case ('H') : break; 					        // for 'H' - do nothing, move to next op
+
+                default    : printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here
+                             exit(1);
+            }
+        }
+
+        // read in the tag data
+        bAlignment.TagData.resize(tagDataLen);
+        memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLen);
+    }
+
+    free(allCharData);
+    return true;
+}
+
+// loads reference data from BAM file
+void BamReader::BamReaderPrivate::LoadReferenceData(void) {
+
+    // get number of reference sequences
+    char buffer[4];
+    mBGZF.Read(buffer, 4);
+    const unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer);
+    if (numberRefSeqs == 0) { return; }
+    References.reserve((int)numberRefSeqs);
+
+    // iterate over all references in header
+    for (unsigned int i = 0; i != numberRefSeqs; ++i) {
+
+        // get length of reference name
+        mBGZF.Read(buffer, 4);
+        const unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer);
+        char* refName = (char*)calloc(refNameLength, 1);
+
+        // get reference name and reference sequence length
+        mBGZF.Read(refName, refNameLength);
+        mBGZF.Read(buffer, 4);
+        const unsigned int refLength = BgzfData::UnpackUnsignedInt(buffer);
+
+        // store data for reference
+        RefData aReference;
+        aReference.RefName   = (string)((const char*)refName);
+        aReference.RefLength = refLength;
+        References.push_back(aReference);
+
+        // clean up calloc-ed temp variable
+        free(refName);
+    }
+}
+
+// merges 'alignment chunks' in BAM bin (used for index building)
+void BamReader::BamReaderPrivate::MergeChunks(void) {
+
+    // iterate over reference enties
+    BamIndex::iterator indexIter = Index.begin();
+    BamIndex::iterator indexEnd  = Index.end();
+    for ( ; indexIter != indexEnd; ++indexIter ) {
+
+        // get BAM bin map for this reference
+        ReferenceIndex& refIndex = (*indexIter);
+        BamBinMap& bamBinMap = refIndex.Bins;
+
+        // iterate over BAM bins
+        BamBinMap::iterator binIter = bamBinMap.begin();
+        BamBinMap::iterator binEnd  = bamBinMap.end();
+        for ( ; binIter != binEnd; ++binIter ) {
+
+            // get chunk vector for this bin
+            ChunkVector& binChunks = (*binIter).second;
+            if ( binChunks.size() == 0 ) { continue; }
+
+            ChunkVector mergedChunks;
+            mergedChunks.push_back( binChunks[0] );
+
+            // iterate over chunks
+            int i = 0;
+            ChunkVector::iterator chunkIter = binChunks.begin();
+            ChunkVector::iterator chunkEnd  = binChunks.end();
+            for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+                // get 'currentChunk' based on numeric index
+                Chunk& currentChunk = mergedChunks[i];
+
+                // get iteratorChunk based on vector iterator
+                Chunk& iteratorChunk = (*chunkIter);
+
+                // if currentChunk.Stop(shifted) == iterator Chunk.Start(shifted)
+                if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 ) {
+
+                    // set currentChunk.Stop to iteratorChunk.Stop
+                    currentChunk.Stop = iteratorChunk.Stop;
+                }
+
+                // otherwise
+                else {
+                    // set currentChunk + 1 to iteratorChunk
+                    mergedChunks.push_back(iteratorChunk);
+                    ++i;
+                }
+            }
+
+            // saved merged chunk vector
+            (*binIter).second = mergedChunks;
+        }
+    }
+}
+
+// opens BAM file (and index)
+void BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) {
+
+    Filename = filename;
+    IndexFilename = indexFilename;
+
+    // open the BGZF file for reading, retrieve header text & reference data
+    mBGZF.Open(filename, "rb");
+    LoadHeaderData();
+    LoadReferenceData();
+
+    // store file offset of first alignment
+    AlignmentsBeginOffset = mBGZF.Tell();
+
+    // open index file & load index data (if exists)
+    if ( !IndexFilename.empty() ) {
+        LoadIndex();
+    }
+}
+
+// returns BAM file pointer to beginning of alignment data
+bool BamReader::BamReaderPrivate::Rewind(void) {
+
+    // find first reference that has alignments in the BAM file
+    int refID = 0;
+    int refCount = References.size();
+    for ( ; refID < refCount; ++refID ) {
+        if ( References.at(refID).RefHasAlignments ) { break; }
+    }
+
+    // store default bounds for first alignment
+    CurrentRefID = refID;
+    CurrentLeft = 0;
+    IsRegionSpecified = false;
+
+    // return success/failure of seek
+    return mBGZF.Seek(AlignmentsBeginOffset);
+}
+
+// rounds value up to next power-of-2 (used in index building)
+void BamReader::BamReaderPrivate::Roundup32(int& value) {    
+    --value;
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    ++value;
+}
+
+// saves index data to BAM index file (".bai"), returns success/fail
+bool BamReader::BamReaderPrivate::WriteIndex(void) {
+
+    IndexFilename = Filename + ".bai";
+    FILE* indexStream = fopen(IndexFilename.c_str(), "wb");
+    if ( indexStream == 0 ) {
+        printf("ERROR: Could not open file to save index\n");
+        return false;
+    }
+
+    // write BAM index header
+    fwrite("BAI\1", 1, 4, indexStream);
+
+    // write number of reference sequences
+    int32_t numReferenceSeqs = Index.size();
+    fwrite(&numReferenceSeqs, 4, 1, indexStream);
+
+    // iterate over reference sequences
+    BamIndex::const_iterator indexIter = Index.begin();
+    BamIndex::const_iterator indexEnd  = Index.end();
+    for ( ; indexIter != indexEnd; ++ indexIter ) {
+
+        // get reference index data
+        const ReferenceIndex& refIndex = (*indexIter);
+        const BamBinMap& binMap = refIndex.Bins;
+        const LinearOffsetVector& offsets = refIndex.Offsets;
+
+        // write number of bins
+        int32_t binCount = binMap.size();
+        fwrite(&binCount, 4, 1, indexStream);
+
+        // iterate over bins
+        BamBinMap::const_iterator binIter = binMap.begin();
+        BamBinMap::const_iterator binEnd  = binMap.end();
+        for ( ; binIter != binEnd; ++binIter ) {
+
+            // get bin data (key and chunk vector)
+            const uint32_t& binKey = (*binIter).first;
+            const ChunkVector& binChunks = (*binIter).second;
+
+            // save BAM bin key
+            fwrite(&binKey, 4, 1, indexStream);
+
+            // save chunk count
+            int32_t chunkCount = binChunks.size();
+            fwrite(&chunkCount, 4, 1, indexStream);
+
+            // iterate over chunks
+            ChunkVector::const_iterator chunkIter = binChunks.begin();
+            ChunkVector::const_iterator chunkEnd  = binChunks.end();
+            for ( ; chunkIter != chunkEnd; ++chunkIter ) {
+
+                // get current chunk data
+                const Chunk& chunk    = (*chunkIter);
+                const uint64_t& start = chunk.Start;
+                const uint64_t& stop  = chunk.Stop;
+
+                // save chunk offsets
+                fwrite(&start, 8, 1, indexStream);
+                fwrite(&stop,  8, 1, indexStream);
+            }
+        }
+
+        // write linear offsets size
+        int32_t offsetSize = offsets.size();
+        fwrite(&offsetSize, 4, 1, indexStream);
+
+        // iterate over linear offsets
+        LinearOffsetVector::const_iterator offsetIter = offsets.begin();
+        LinearOffsetVector::const_iterator offsetEnd  = offsets.end();
+        for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+
+            // write linear offset value
+            const uint64_t& linearOffset = (*offsetIter);
+            fwrite(&linearOffset, 8, 1, indexStream);
+        }
+    }
+
+    // flush buffer, close file, and return success
+    fflush(indexStream);
+    fclose(indexStream);
+    return true;
+}
diff --git a/BamReader.h b/BamReader.h
index ed8397d..2587b00 100644
--- a/BamReader.h
+++ b/BamReader.h
@@ -1,286 +1,83 @@
-// ***************************************************************************
-// BamReader.h (c) 2009 Derek Barnett, Michael Strömberg
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 24 June 2009 (DB)
-// ---------------------------------------------------------------------------
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading BAM files
-// ***************************************************************************
-
-/*! 
-	\file BamReader.h
-	\brief API for reading BAM files.
-*/
-
-#pragma once
-
-// C++ includes
-#include <algorithm>
-#include <iterator>
-#include <string>
-#include <utility>
-#include <vector>
-
-// zlib includes
-#include <zlib.h>
-
-// BamTools includes
-#include "BamAux.h"
-
-namespace BamTools { 
-
-	//! API for reading BAM files.
-	class BamReader {
-		
-		public:
-			
-			//! Constructor
-			BamReader(void);
-			
-			//! Destructor
-			~BamReader(void);
-		
-		public:
-			
-			/*! 
-				\brief Closes the BAM file.
-				
-				Also closes index file and clears index data, if provided.
-				
-				\sa Open()
-			*/
-			void Close(void);
-			
-			/*! 
-				\brief Access SAM format header data.
-				
-				See SAM format documentation for detailed header description.
-				
-				\return Full header text (no parsing of tags)
-			*/
-			const std::string GetHeaderText(void) const;
-			
-			/*! 
-				\brief Retrieve next alignment.
-				
-				Stores result in bAlignment.  
-				
-				If reference and position are specified by a prior call to Jump(), this method stores the next aligmment that either: 
-				a) overlaps, or 
-				b) begins on/after that specified position.
-				
-				Otherwise this simply stores next alignment, if one exists.
-				
-				Note that this method does not specifiy a 'right bound' position.  
-				If a range is desired, you should supply some stopping criteria. For example:
-				
-				\code
-				BamReader bReader;
-				bReader.Open(bamFile, bamIndexfile);
-				if ( bReader.Jump( someID, somePosition ) {
-					BamAlignment bAlignment;
-					while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= someUpperBound) ) {
-						// do something
-					}
-				}
-				\endcode
-				
-				\param bAlignment destination for alignment data
-				\return success/failure
-				\sa Jump(), Rewind()
-			*/
-			bool GetNextAlignment(BamAlignment& bAlignment);
-			
-			/*! 
-				\brief Get number of reference sequences in BAM file.
-				\return Number of references
-				\sa GetReferenceData(), GetReferenceID()
-			*/
-			const int GetReferenceCount(void) const;
-			
-			/*! 
-				\brief Access reference data.
-				\return Vector of RefData entry
-				\sa GetReferenceCount(), GetReferenceID()
-			*/
-			const RefVector GetReferenceData(void) const;
-			
-			/*! 
-				\brief Get reference ID from name.
-				\param refName name of reference sequence
-				\return reference ID number
-				\sa GetReferenceCount(), GetReferenceData()
-			*/
-			const int GetReferenceID(const std::string& refName) const;
-			
-			/*! 
-				\brief Random access in BAM file.
-				
-				Jump to a specified position on reference sequence. 
-				Position is optional - defaults to beginning of specified reference.
-				
-				Reference and position are stored for use by subsequent calls to GetNextAlignment().
-				
-				\param refID ID number of desired reference
-				\param position left-bound position  
-				\return success/failure
-				\sa GetNextAlignment(), Rewind()
-			*/
-			bool Jump(int refID, unsigned int position = 0);
-			
-			/*! 
-				\brief Opens a BAM file.
-				
-				Index file is optional - sequential reading through a BAM file does not require an index.
-				
-				However, the index is required to perform random-access alignment retrival (using the Jump() method ).  
-				
-				See SAMtools documentation for BAM index generation.
-				
-				\param filename BAM file
-				\param indexFilename BAM index file
-				\sa Jump(), Close()
-			*/
-			void Open(const std::string& filename, const std::string& indexFilename = "");
-			
-			/*!
-				\brief Moves file pointer to beginning of alignment data.
-				
-				A subsequent call to GetNextAlignment() would retrieve the first alignment in the BAM file.
-				Clears any reference and position set by a prior call to Jump()
-				
-				\return success/failure
-				\sa GetNextAlignment(), Jump()
-			*/
-			bool Rewind(void);
-		
-		// --------------------------------------------------------------------------------------
-		// internal methods
-		private:
-			// checks BGZF block header
-			bool BgzfCheckBlockHeader(char* header);
-			// closes the BAM file
-			void BgzfClose(void);
-			// de-compresses the current block
-			int BgzfInflateBlock(int blockLength);
-			// opens the BAM file for reading
-			void BgzfOpen(const std::string& filename);
-			// reads BGZF data into a byte buffer
-			unsigned int BgzfRead(char* data, const unsigned int dataLen);
-			// reads BGZF block
-			int BgzfReadBlock(void);
-			// seek to position in BAM file
-			bool BgzfSeek(int64_t position);
-			// get file position in BAM file
-			int64_t BgzfTell(void);
-			// unpacks a buffer into an unsigned int
-			static inline unsigned int BgzfUnpackUnsignedInt(char* buffer);
-			// unpacks a buffer into an unsigned short
-			static inline unsigned short BgzfUnpackUnsignedShort(char* buffer);
-			// unpacks a buffer into a signed int
-			static inline signed int BgzfUnpackSignedInt(char* buffer);
-			// calculate bins that overlap region ( left to reference end for now )
-			int BinsFromRegion(int, unsigned int, uint16_t[MAX_BIN]);
-			// calculates alignment end position based on starting position and provided CIGAR operations
-			unsigned int CalculateAlignmentEnd(const unsigned int& position, const std::vector<CigarOp>& cigarData);
-			// clear out (delete pointers in) index data structure
-			void ClearIndex(void);
-			// calculate file offset for first alignment chunk overlapping 'left'
-			int64_t GetOffset(int refID, unsigned int left);
-			// checks to see if alignment overlaps current region
-			bool IsOverlap(BamAlignment& bAlignment);
-			// retrieves header text from BAM file
-			void LoadHeaderData(void);
-			// builds BamIndex data structure from BAM index file
-			void LoadIndexData(FILE* indexStream);
-			// retrieves BAM alignment under file pointer
-			bool LoadNextAlignment(BamAlignment& bAlignment);
-			// builds reference data structure from BAM file
-			void LoadReferenceData(void);
-			// open BAM index file (if successful, loads index)
-			void OpenIndex(const std::string& indexFilename);
-		
-		// aligment file & index file data
-		private:
-			BgzfData*   m_BGZF;
-			std::string m_headerText;
-			BamIndex*   m_index;
-			RefVector   m_references;
-			bool        m_isIndexLoaded;
-			int64_t     m_alignmentsBeginOffset;
-		
-		// user-specified region values
-		private:
-			bool         m_isRegionSpecified;
-			int          m_currentRefID;
-			unsigned int m_currentLeft;
-		
-		// BAM character constants
-		private:
-			static const char* DNA_LOOKUP;
-			static const char* CIGAR_LOOKUP;
-	};
-
-	//! \cond
-	// --------------------------------------------------------------------------------------
-	// static inline methods (internal - can exclude from main documentation)
-	
-	// unpacks a buffer into an unsigned int
-	inline unsigned int BamReader::BgzfUnpackUnsignedInt(char* buffer) {
-		union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
-		un.valueBuffer[0] = buffer[0];
-		un.valueBuffer[1] = buffer[1];
-		un.valueBuffer[2] = buffer[2];
-		un.valueBuffer[3] = buffer[3];
-		return un.value;
-	}
-
-	// unpacks a buffer into an unsigned short
-	inline unsigned short BamReader::BgzfUnpackUnsignedShort(char* buffer) {
-		union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un;
-		un.valueBuffer[0] = buffer[0];
-		un.valueBuffer[1] = buffer[1];
-		return un.value;
-	}
-	
-	// unpacks a buffer into a signed int
-	inline signed int BamReader::BgzfUnpackSignedInt(char* buffer) {
-		union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
-		un.valueBuffer[0] = buffer[0];
-		un.valueBuffer[1] = buffer[1];
-		un.valueBuffer[2] = buffer[2];
-		un.valueBuffer[3] = buffer[3];
-		return un.value;
-	}
-
-	// --------------------------------------------------------------------------------------
-	// template classes/methods (internal - can exclude from main documentation)
-	
-	// allows sorting/searching of a vector of pairs (instead of using maps)
-	template <typename Key, typename Value>
-	class LookupKeyCompare {
-
-		typedef std::pair< Key, Value > LookupData;
-		typedef typename LookupData::first_type Key_t;
-		
-		public:
-			bool operator() (const LookupData& lhs, const LookupData& rhs) const { return keyLess(lhs.first, rhs.first); }
-			bool operator() (const LookupData& lhs, const Key_t& k) const        { return keyLess(lhs.first, k); }
-			bool operator() (const Key_t& k, const LookupData& rhs) const        { return keyLess(k, rhs.first); }
-		private:
-			bool keyLess(const Key_t& k1, const Key_t& k2) const { return k1 < k2; }
-	};
-
-	// return index of item if found, else return container.size()
-	template < typename InputIterator, typename EqualityComparable >
-	typename std::iterator_traits<InputIterator>::difference_type
-	Index(const InputIterator& begin, const InputIterator& end, const EqualityComparable& item) {
-		return std::distance(begin, std::find(begin, end, item));
-	}
-	//! \endcond
-	
-}
+// ***************************************************************************
+// BamReader.h (c) 2009 Derek Barnett, Michael Strömberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_H
+#define BAMREADER_H
+
+// C++ includes
+#include <string>
+
+// BamTools includes
+#include "BamAux.h"
+
+namespace BamTools {
+
+class BamReader {
+
+    // constructor / destructor
+    public:
+        BamReader(void);
+        ~BamReader(void);
+
+    // public interface
+    public:
+
+        // ----------------------
+        // BAM file operations
+        // ----------------------
+
+        // close BAM file
+        void Close(void);
+        // performs random-access jump to reference, position
+        bool Jump(int refID, unsigned int position = 0);
+        // opens BAM file (and optional BAM index file, if provided)
+        void Open(const std::string& filename, const std::string& indexFilename = "");
+        // returns file pointer to beginning of alignments
+        bool Rewind(void);
+
+        // ----------------------
+        // access alignment data
+        // ----------------------
+
+        // retrieves next available alignment (returns success/fail)
+        bool GetNextAlignment(BamAlignment& bAlignment);
+
+        // ----------------------
+        // access auxiliary data
+        // ----------------------
+
+        // returns SAM header text
+        const std::string GetHeaderText(void) const;
+        // returns number of reference sequences
+        const int GetReferenceCount(void) const;
+        // returns vector of reference objects
+        const BamTools::RefVector GetReferenceData(void) const;
+        // returns reference id (used for BamReader::Jump()) for the given reference name
+        const int GetReferenceID(const std::string& refName) const;
+
+        // ----------------------
+        // BAM index operations
+        // ----------------------
+
+        // creates index for BAM file, saves to file (default = bamFilename + ".bai")
+        bool CreateIndex(void);
+
+    // private implementation
+    private:
+        struct BamReaderPrivate;
+        BamReaderPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMREADER_H
diff --git a/BamWriter.cpp b/BamWriter.cpp
index 597bae6..c834d45 100644
--- a/BamWriter.cpp
+++ b/BamWriter.cpp
@@ -1,404 +1,284 @@
-// ***************************************************************************
-// BamWriter.cpp (c) 2009 Michael Strömberg, Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 24 June 2009 (DB)
-// ---------------------------------------------------------------------------
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-#include "BamWriter.h"
-using namespace BamTools;
-using namespace std;
-
-// constructor
-BamWriter::BamWriter(void)
-{}
-
-// destructor
-BamWriter::~BamWriter(void) {
-	if(mBGZF.IsOpen) BgzfClose();
-}
-
-// closes the BAM file
-void BamWriter::BgzfClose(void) {
-
-	mBGZF.IsOpen = false;
-
-	// flush the BGZF block
-	BgzfFlushBlock();
-
-	// flush and close
-	fflush(mBGZF.Stream);
-	fclose(mBGZF.Stream);
-}
-
-// compresses the current block
-int BamWriter::BgzfDeflateBlock(void) {
-
-	// initialize the gzip header
-	char* buffer = mBGZF.CompressedBlock;
-	unsigned int bufferSize = mBGZF.CompressedBlockSize;
-
-	memset(buffer, 0, 18);
-	buffer[0]  = GZIP_ID1;
-	buffer[1]  = (char)GZIP_ID2;
-	buffer[2]  = CM_DEFLATE;
-	buffer[3]  = FLG_FEXTRA;
-	buffer[9]  = (char)OS_UNKNOWN;
-	buffer[10] = BGZF_XLEN;
-	buffer[12] = BGZF_ID1;
-	buffer[13] = BGZF_ID2;
-	buffer[14] = BGZF_LEN;
-
-	// loop to retry for blocks that do not compress enough
-	int inputLength = mBGZF.BlockOffset;
-	int compressedLength = 0;
-
-	while(true) {
-
-		z_stream zs;
-		zs.zalloc    = NULL;
-		zs.zfree     = NULL;
-		zs.next_in   = (Bytef*)mBGZF.UncompressedBlock;
-		zs.avail_in  = inputLength;
-		zs.next_out  = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
-		zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
-
-		// initialize the zlib compression algorithm
-		if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) {
-			printf("ERROR: zlib deflate initialization failed.\n");
-			exit(1);
-		}
-
-		// compress the data
-		int status = deflate(&zs, Z_FINISH);
-		if(status != Z_STREAM_END) {
-			deflateEnd(&zs);
-
-			// reduce the input length and try again
-			if(status == Z_OK) {
-				inputLength -= 1024;
-				if(inputLength < 0) {
-					printf("ERROR: input reduction failed.\n");
-					exit(1);
-				}
-				continue;
-			}
-
-			printf("ERROR: zlib deflate failed.\n");
-			exit(1);
-		}
-
-		// finalize the compression routine
-		if(deflateEnd(&zs) != Z_OK) {
-			printf("ERROR: deflate end failed.\n");
-			exit(1);
-		}
-
-		compressedLength = zs.total_out;
-		compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
-
-		if(compressedLength > MAX_BLOCK_SIZE) {
-			printf("ERROR: deflate overflow.\n");
-			exit(1);
-		}
-
-		break;
-	}
-
-	// store the compressed length
-	BgzfPackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
-
-	// store the CRC32 checksum
-	unsigned int crc = crc32(0, NULL, 0);
-	crc = crc32(crc, (Bytef*)mBGZF.UncompressedBlock, inputLength);
-	BgzfPackUnsignedInt(&buffer[compressedLength - 8], crc);
-	BgzfPackUnsignedInt(&buffer[compressedLength - 4], inputLength);
-
-	// ensure that we have less than a block of data left
-	int remaining = mBGZF.BlockOffset - inputLength;
-	if(remaining > 0) {
-		if(remaining > inputLength) {
-			printf("ERROR: remainder too large.\n");
-			exit(1);
-		}
-
-		memcpy(mBGZF.UncompressedBlock, mBGZF.UncompressedBlock + inputLength, remaining);
-	}
-
-	mBGZF.BlockOffset = remaining;
-	return compressedLength;
-}
-
-// flushes the data in the BGZF block
-void BamWriter::BgzfFlushBlock(void) {
-
-	// flush all of the remaining blocks
-	while(mBGZF.BlockOffset > 0) {
-
-		// compress the data block
-		int blockLength = BgzfDeflateBlock();
-
-		// flush the data to our output stream
-		int numBytesWritten = fwrite(mBGZF.CompressedBlock, 1, blockLength, mBGZF.Stream);
-
-		if(numBytesWritten != blockLength) {
-			printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);
-			exit(1);
-		}
-
-		mBGZF.BlockAddress += blockLength;
-	}
-}
-
-// opens the BAM file for writing
-void BamWriter::BgzfOpen(const string& filename) {
-
-	mBGZF.Stream = fopen(filename.c_str(), "wb");
-
-	if(!mBGZF.Stream) {
-		printf("ERROR: Unable to open the BAM file (%s) for writing.\n", filename.c_str());
-		exit(1);
-	}
-
-	mBGZF.IsOpen = true;
-}
-
-// writes the supplied data into the BGZF buffer
-unsigned int BamWriter::BgzfWrite(const char* data, const unsigned int dataLen) {
-
-	// initialize
-	unsigned int numBytesWritten = 0;
-	const char* input = data;
-	unsigned int blockLength = mBGZF.UncompressedBlockSize;
-
-	// copy the data to the buffer
-	while(numBytesWritten < dataLen) {
-		unsigned int copyLength = min(blockLength - mBGZF.BlockOffset, dataLen - numBytesWritten);
-		char* buffer = mBGZF.UncompressedBlock;
-		memcpy(buffer + mBGZF.BlockOffset, input, copyLength);
-
-		mBGZF.BlockOffset += copyLength;
-		input             += copyLength;
-		numBytesWritten   += copyLength;
-
-		if(mBGZF.BlockOffset == blockLength) BgzfFlushBlock();
-	}
-
-	return numBytesWritten;
-}
-
-// closes the alignment archive
-void BamWriter::Close(void) {
-	if(mBGZF.IsOpen) BgzfClose();
-}
-
-// creates a cigar string from the supplied alignment
-void BamWriter::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
-
-	// initialize
-	const unsigned int numCigarOperations = cigarOperations.size();
-	packedCigar.resize(numCigarOperations * BT_SIZEOF_INT);
-
-	// pack the cigar data into the string
-	unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
-
-	unsigned int cigarOp;
-	vector<CigarOp>::const_iterator coIter;
-	for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); coIter++) {
-
-		switch(coIter->Type) {
-	  case 'M':
-		  cigarOp = BAM_CMATCH;
-		  break;
-	  case 'I':
-		  cigarOp = BAM_CINS;
-		  break;
-	  case 'D':
-		  cigarOp = BAM_CDEL;
-		  break;
-	  case 'N':
-		  cigarOp = BAM_CREF_SKIP;
-		  break;
-	  case 'S':
-		  cigarOp = BAM_CSOFT_CLIP;
-		  break;
-	  case 'H':
-		  cigarOp = BAM_CHARD_CLIP;
-		  break;
-	  case 'P':
-		  cigarOp = BAM_CPAD;
-		  break;
-	  default:
-		  printf("ERROR: Unknown cigar operation found: %c\n", coIter->Type);
-		  exit(1);
-		}
-
-		*pPackedCigar = coIter->Length << BAM_CIGAR_SHIFT | cigarOp;
-		pPackedCigar++;
-	}
-}
-
-// encodes the supplied query sequence into 4-bit notation
-void BamWriter::EncodeQuerySequence(const string& query, string& encodedQuery) {
-
-	// prepare the encoded query string
-	const unsigned int queryLen = query.size();
-	const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
-	encodedQuery.resize(encodedQueryLen);
-	char* pEncodedQuery = (char*)encodedQuery.data();
-	const char* pQuery = (const char*)query.data();
-
-	unsigned char nucleotideCode;
-	bool useHighWord = true;
-
-	while(*pQuery) {
-
-		switch(*pQuery) {
-			case '=':
-				nucleotideCode = 0;
-				break;
-			case 'A':
-				nucleotideCode = 1;
-				break;
-			case 'C':
-				nucleotideCode = 2;
-				break;
-			case 'G':
-				nucleotideCode = 4;
-				break;
-			case 'T':
-				nucleotideCode = 8;
-				break;
-			case 'N':
-				nucleotideCode = 15;
-				break;
-			default:
-				printf("ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
-				exit(1);
-		}
-
-		// pack the nucleotide code
-		if(useHighWord) {
-			*pEncodedQuery = nucleotideCode << 4;
-			useHighWord = false;
-		} else {
-			*pEncodedQuery |= nucleotideCode;
-			pEncodedQuery++;
-			useHighWord = true;
-		}
-
-		// increment the query position
-		pQuery++;
-	}
-}
-
-// opens the alignment archive
-void BamWriter::Open(const string& filename, const string& samHeader, const RefVector& referenceSequences) {
-
-	// open the BGZF file for writing
-	BgzfOpen(filename);
-
-	// ================
-	// write the header
-	// ================
-
-	// write the BAM signature
-	const unsigned char SIGNATURE_LENGTH = 4;
-	const char* BAM_SIGNATURE = "BAM\1";
-	BgzfWrite(BAM_SIGNATURE, SIGNATURE_LENGTH);
-
-	// write the SAM header text length
-	const unsigned int samHeaderLen = samHeader.size();
-	BgzfWrite((char*)&samHeaderLen, BT_SIZEOF_INT);
-
-	// write the SAM header text
-	if(samHeaderLen > 0) BgzfWrite(samHeader.data(), samHeaderLen);
-
-	// write the number of reference sequences
-	const unsigned int numReferenceSequences = referenceSequences.size();
-	BgzfWrite((char*)&numReferenceSequences, BT_SIZEOF_INT);
-
-	// =============================
-	// write the sequence dictionary
-	// =============================
-
-	RefVector::const_iterator rsIter;
-	for(rsIter = referenceSequences.begin(); rsIter != referenceSequences.end(); rsIter++) {
-
-		// write the reference sequence name length
-		const unsigned int referenceSequenceNameLen = rsIter->RefName.size() + 1;
-		BgzfWrite((char*)&referenceSequenceNameLen, BT_SIZEOF_INT);
-
-		// write the reference sequence name
-		BgzfWrite(rsIter->RefName.c_str(), referenceSequenceNameLen);
-
-		// write the reference sequence length
-		BgzfWrite((char*)&rsIter->RefLength, BT_SIZEOF_INT);
-	}
-}
-
-// saves the alignment to the alignment archive
-void BamWriter::SaveAlignment(const BamAlignment& al) {
-
-	// initialize
-	const unsigned int nameLen            = al.Name.size() + 1;
-	const unsigned int queryLen           = al.QueryBases.size();
-	const unsigned int numCigarOperations = al.CigarData.size();
-
-	// create our packed cigar string
-	string packedCigar;
-	CreatePackedCigar(al.CigarData, packedCigar);
-	const unsigned int packedCigarLen = packedCigar.size();
-
-	// encode the query
-	string encodedQuery;
-	EncodeQuerySequence(al.QueryBases, encodedQuery);
-	const unsigned int encodedQueryLen = encodedQuery.size();
-
-	// store the tag data length
-	const unsigned int tagDataLength = al.TagData.size() + 1;
-
-	// assign the BAM core data
-	unsigned int buffer[8];
-	buffer[0] = al.RefID;
-	buffer[1] = al.Position;
-	buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | nameLen;
-	buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
-	buffer[4] = queryLen;
-	buffer[5] = al.MateRefID;
-	buffer[6] = al.MatePosition;
-	buffer[7] = al.InsertSize;
-
-	// write the block size
-	const unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;
-	const unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
-
-	BgzfWrite((char*)&blockSize, BT_SIZEOF_INT);
-
-	// write the BAM core
-	BgzfWrite((char*)&buffer, BAM_CORE_SIZE);
-
-	// write the query name
-	BgzfWrite(al.Name.c_str(), nameLen);
-
-	// write the packed cigar
-	BgzfWrite(packedCigar.data(), packedCigarLen);
-
-	// write the encoded query sequence
-	BgzfWrite(encodedQuery.data(), encodedQueryLen);
-
-	// write the base qualities
-	string baseQualities = al.Qualities;
-	char* pBaseQualities = (char*)al.Qualities.data();
-	for(unsigned int i = 0; i < queryLen; i++) pBaseQualities[i] -= 33;
-	BgzfWrite(pBaseQualities, queryLen);
-
-	// write the read group tag
-	BgzfWrite(al.TagData.data(), tagDataLength);
-}
+// ***************************************************************************
+// BamWriter.cpp (c) 2009 Michael Strömberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+// BGZF includes
+#include "BGZF.h"
+#include "BamWriter.h"
+using namespace BamTools;
+using namespace std;
+
+struct BamWriter::BamWriterPrivate {
+
+    // data members
+    BgzfData mBGZF;
+
+    // constructor / destructor
+    BamWriterPrivate(void) { }
+    ~BamWriterPrivate(void) {
+        mBGZF.Close();
+    }
+
+    // "public" interface
+    void Close(void);
+    void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);
+    void SaveAlignment(const BamTools::BamAlignment& al);
+
+    // internal methods
+    void CreatePackedCigar(const std::vector<CigarOp>& cigarOperations, std::string& packedCigar);
+    void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+};
+
+// -----------------------------------------------------
+// BamWriter implementation
+// -----------------------------------------------------
+
+// constructor
+BamWriter::BamWriter(void) {
+    d = new BamWriterPrivate;
+}
+
+// destructor
+BamWriter::~BamWriter(void) {
+    delete d;
+    d = 0;
+}
+
+// closes the alignment archive
+void BamWriter::Close(void) {
+    d->Close();
+}
+
+// opens the alignment archive
+void BamWriter::Open(const string& filename, const string& samHeader, const RefVector& referenceSequences) {
+    d->Open(filename, samHeader, referenceSequences);
+}
+
+// saves the alignment to the alignment archive
+void BamWriter::SaveAlignment(const BamAlignment& al) {
+    d->SaveAlignment(al);
+}
+
+// -----------------------------------------------------
+// BamWriterPrivate implementation
+// -----------------------------------------------------
+
+// closes the alignment archive
+void BamWriter::BamWriterPrivate::Close(void) {
+    mBGZF.Close();
+}
+
+// creates a cigar string from the supplied alignment
+void BamWriter::BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
+
+    // initialize
+    const unsigned int numCigarOperations = cigarOperations.size();
+    packedCigar.resize(numCigarOperations * BT_SIZEOF_INT);
+
+    // pack the cigar data into the string
+    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
+
+    unsigned int cigarOp;
+    vector<CigarOp>::const_iterator coIter;
+    for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); coIter++) {
+
+        switch(coIter->Type) {
+            case 'M':
+                  cigarOp = BAM_CMATCH;
+                  break;
+            case 'I':
+                  cigarOp = BAM_CINS;
+                  break;
+            case 'D':
+                  cigarOp = BAM_CDEL;
+                  break;
+            case 'N':
+                  cigarOp = BAM_CREF_SKIP;
+                  break;
+            case 'S':
+                  cigarOp = BAM_CSOFT_CLIP;
+                  break;
+            case 'H':
+                  cigarOp = BAM_CHARD_CLIP;
+                  break;
+            case 'P':
+                  cigarOp = BAM_CPAD;
+                  break;
+            default:
+                  printf("ERROR: Unknown cigar operation found: %c\n", coIter->Type);
+                  exit(1);
+        }
+
+        *pPackedCigar = coIter->Length << BAM_CIGAR_SHIFT | cigarOp;
+        pPackedCigar++;
+    }
+}
+
+// encodes the supplied query sequence into 4-bit notation
+void BamWriter::BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
+
+    // prepare the encoded query string
+    const unsigned int queryLen = query.size();
+    const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
+    encodedQuery.resize(encodedQueryLen);
+    char* pEncodedQuery = (char*)encodedQuery.data();
+    const char* pQuery = (const char*)query.data();
+
+    unsigned char nucleotideCode;
+    bool useHighWord = true;
+
+    while(*pQuery) {
+
+        switch(*pQuery) {
+            case '=':
+                    nucleotideCode = 0;
+                    break;
+            case 'A':
+                    nucleotideCode = 1;
+                    break;
+            case 'C':
+                    nucleotideCode = 2;
+                    break;
+            case 'G':
+                    nucleotideCode = 4;
+                    break;
+            case 'T':
+                    nucleotideCode = 8;
+                    break;
+            case 'N':
+                    nucleotideCode = 15;
+                    break;
+            default:
+                    printf("ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
+                    exit(1);
+        }
+
+        // pack the nucleotide code
+        if(useHighWord) {
+            *pEncodedQuery = nucleotideCode << 4;
+            useHighWord = false;
+        } else {
+            *pEncodedQuery |= nucleotideCode;
+            pEncodedQuery++;
+            useHighWord = true;
+        }
+
+        // increment the query position
+        pQuery++;
+    }
+}
+
+// opens the alignment archive
+void BamWriter::BamWriterPrivate::Open(const string& filename, const string& samHeader, const RefVector& referenceSequences) {
+
+    // open the BGZF file for writing
+    mBGZF.Open(filename, "wb");
+
+    // ================
+    // write the header
+    // ================
+
+    // write the BAM signature
+    const unsigned char SIGNATURE_LENGTH = 4;
+    const char* BAM_SIGNATURE = "BAM\1";
+    mBGZF.Write(BAM_SIGNATURE, SIGNATURE_LENGTH);
+
+    // write the SAM header text length
+    const unsigned int samHeaderLen = samHeader.size();
+    mBGZF.Write((char*)&samHeaderLen, BT_SIZEOF_INT);
+
+    // write the SAM header text
+    if(samHeaderLen > 0) {
+        mBGZF.Write(samHeader.data(), samHeaderLen);
+    }
+
+    // write the number of reference sequences
+    const unsigned int numReferenceSequences = referenceSequences.size();
+    mBGZF.Write((char*)&numReferenceSequences, BT_SIZEOF_INT);
+
+    // =============================
+    // write the sequence dictionary
+    // =============================
+
+    RefVector::const_iterator rsIter;
+    for(rsIter = referenceSequences.begin(); rsIter != referenceSequences.end(); rsIter++) {
+
+        // write the reference sequence name length
+        const unsigned int referenceSequenceNameLen = rsIter->RefName.size() + 1;
+        mBGZF.Write((char*)&referenceSequenceNameLen, BT_SIZEOF_INT);
+
+        // write the reference sequence name
+        mBGZF.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
+
+        // write the reference sequence length
+        mBGZF.Write((char*)&rsIter->RefLength, BT_SIZEOF_INT);
+    }
+}
+
+// saves the alignment to the alignment archive
+void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
+
+    // initialize
+    const unsigned int nameLen            = al.Name.size() + 1;
+    const unsigned int queryLen           = al.QueryBases.size();
+    const unsigned int numCigarOperations = al.CigarData.size();
+
+    // create our packed cigar string
+    string packedCigar;
+    CreatePackedCigar(al.CigarData, packedCigar);
+    const unsigned int packedCigarLen = packedCigar.size();
+
+    // encode the query
+    string encodedQuery;
+    EncodeQuerySequence(al.QueryBases, encodedQuery);
+    const unsigned int encodedQueryLen = encodedQuery.size();
+
+    // store the tag data length
+    const unsigned int tagDataLength = al.TagData.size() + 1;
+
+    // assign the BAM core data
+    unsigned int buffer[8];
+    buffer[0] = al.RefID;
+    buffer[1] = al.Position;
+    buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | nameLen;
+    buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+    buffer[4] = queryLen;
+    buffer[5] = al.MateRefID;
+    buffer[6] = al.MatePosition;
+    buffer[7] = al.InsertSize;
+
+    // write the block size
+    const unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;
+    const unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
+    mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
+
+    // write the BAM core
+    mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
+
+    // write the query name
+    mBGZF.Write(al.Name.c_str(), nameLen);
+
+    // write the packed cigar
+    mBGZF.Write(packedCigar.data(), packedCigarLen);
+
+    // write the encoded query sequence
+    mBGZF.Write(encodedQuery.data(), encodedQueryLen);
+
+    // write the base qualities
+    string baseQualities = al.Qualities;
+    char* pBaseQualities = (char*)al.Qualities.data();
+    for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; }
+    mBGZF.Write(pBaseQualities, queryLen);
+
+    // write the read group tag
+    mBGZF.Write(al.TagData.data(), tagDataLength);
+}
diff --git a/BamWriter.h b/BamWriter.h
index 5294dc9..14de8b5 100644
--- a/BamWriter.h
+++ b/BamWriter.h
@@ -1,114 +1,49 @@
-// ***************************************************************************
-// BamWriter.h (c) 2009 Michael Strömberg, Derek Barnett
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 24 June 2009 (DB)
-// ---------------------------------------------------------------------------
-// The BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for producing BAM files
-// ***************************************************************************
-
-/*! 
-	\file BamWriter.h
-	\brief API for writing BAM files.
-*/
-
-#pragma once
-
-// C++ includes
-#include <string>
-#include <vector>
-
-// zlib includes
-#include <zlib.h>
-
-// BamTools includes
-#include "BamAux.h"
-
-namespace BamTools { 
-
-	//! API for writing BAM files.
-	class BamWriter {
-		
-		public:
-			
-			//! Constructor.
-			BamWriter(void);
-			
-			//! Destructor.
-			~BamWriter(void);
-		
-		public:
-			
-			/*! 
-				\brief Closes the alignment archive
-				\sa Open()
-			*/
-			void Close(void);
-			
-			/*! 
-				\brief Opens the alignment archive
-				\param filename output BAM file
-				\param samHeader SAM-format header text
-				\param referenceSequences Reference sequence data 
-				\sa Close()
-			*/
-			void Open(const std::string& filename, const std::string& samHeader, const RefVector& referenceSequences);
-			
-			/*! 
-				\brief Saves an alignment to the archive
-				\param al The BamAlignment to be saved
-			*/
-			void SaveAlignment(const BamAlignment& al);
-		
-		// --------------------------------------------------------------------------------------
-		// internal methods
-		private:
-			// closes the BAM file
-			void BgzfClose(void);
-			// compresses the current block
-			int BgzfDeflateBlock(void);
-			// flushes the data in the BGZF block
-			void BgzfFlushBlock(void);
-			// opens the BAM file for writing
-			void BgzfOpen(const std::string& filename);
-			// packs an unsigned integer into the specified buffer
-			static inline void BgzfPackUnsignedInt(char* buffer, unsigned int value);
-			// packs an unsigned short into the specified buffer
-			static inline void BgzfPackUnsignedShort(char* buffer, unsigned short value);
-			// writes the supplied data into the BGZF buffer
-			unsigned int BgzfWrite(const char* data, const unsigned int dataLen);
-			// calculates the minimum bin that contains a region [begin, end)
-			static inline unsigned int CalculateMinimumBin(unsigned int begin, unsigned int end);
-			// creates a packed cigar string from the supplied alignment
-			static void CreatePackedCigar(const std::vector<CigarOp>& cigarOperations, std::string& packedCigar);
-			// encodes the supplied query sequence into 4-bit notation
-			static void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
-			// our BGZF output object
-			BgzfData mBGZF;
-	};
-
-	//! \cond
-	// --------------------------------------------------------------------------------------
-	// static inline methods (internal - can exclude from main documentation)
-	
-	// packs an unsigned integer into the specified buffer
-	inline void BamWriter::BgzfPackUnsignedInt(char* buffer, unsigned int value) {
-		buffer[0] = (char)value;
-		buffer[1] = (char)(value >> 8);
-		buffer[2] = (char)(value >> 16);
-		buffer[3] = (char)(value >> 24);
-	}
-
-	// packs an unsigned short into the specified buffer
-	inline void BamWriter::BgzfPackUnsignedShort(char* buffer, unsigned short value) {
-		buffer[0] = (char)value;
-		buffer[1] = (char)(value >> 8);
-	}
-	// --------------------------------------------------------------------------------------
-	//! \endcond
-	
-} // end BamTools namespace
\ No newline at end of file
+// ***************************************************************************
+// BamWriter.h (c) 2009 Michael Strömberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 8 December 2009 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_H
+#define BAMWRITER_H
+
+// C++ includes
+#include <string>
+
+// BamTools includes
+#include "BamAux.h"
+
+namespace BamTools {
+
+class BamWriter {
+
+    // constructor/destructor
+    public:
+        BamWriter(void);
+        ~BamWriter(void);
+
+    // public interface
+    public:
+        // closes the alignment archive
+        void Close(void);
+        // opens the alignment archive
+        void Open(const std::string& filename, const std::string& samHeader, const BamTools::RefVector& referenceSequences);
+        // saves the alignment to the alignment archive
+        void SaveAlignment(const BamTools::BamAlignment& al);
+
+    // private implementation
+    private:
+        struct BamWriterPrivate;
+        BamWriterPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMWRITER_H
-- 
2.39.5