From e38ea1095fe8f77bb1fa75a1e8c13c4b904280ae Mon Sep 17 00:00:00 2001
From: barnett <barnett@9efb377e-2e27-44b9-b91a-ec4abb80ed8b>
Date: Mon, 18 May 2009 16:25:20 +0000
Subject: [PATCH] Major overhaul of BamReader. No longer relying on bgzf.* API.
 Sped up random-access. STLUtilities.h no longer needed

git-svn-id: svn+ssh://gene.bc.edu/home/subversion/Derek/BamTools/trunk@19 9efb377e-2e27-44b9-b91a-ec4abb80ed8b
---
 BamConversionMain.cpp |   9 +-
 BamReader.cpp         | 866 +++++++++++++++++++++++-------------------
 BamReader.h           | 501 ++++++++++++------------
 BamReaderMain.cpp     | 250 +++---------
 BamWriter.h           |   3 +
 Makefile              |  41 +-
 README                |  25 +-
 STLUtilities.h        | 146 -------
 bgzf.c                | 493 ------------------------
 bgzf.h                | 119 ------
 10 files changed, 816 insertions(+), 1637 deletions(-)
 delete mode 100644 STLUtilities.h
 delete mode 100644 bgzf.c
 delete mode 100644 bgzf.h
diff --git a/BamConversionMain.cpp b/BamConversionMain.cpp
index 457e58e..0e6333b 100644
--- a/BamConversionMain.cpp
+++ b/BamConversionMain.cpp
@@ -17,12 +17,7 @@ int main(int argc, char* argv[]) {
 
 	// open our BAM reader
 	BamReader reader;
-	reader.SetFilename(inputFilename);
-
-	if(!reader.Open()) {
-		cout << "ERROR: Unable to open the BAM file (" << inputFilename << ")." << endl;
-		exit(1);
-	}
+	reader.Open(inputFilename);
 
 	// retrieve the SAM header text
 	string samHeader = reader.GetHeaderText();
@@ -43,4 +38,4 @@ int main(int argc, char* argv[]) {
 	writer.Close();
 
 	return 0;
-}
\ No newline at end of file
+}
diff --git a/BamReader.cpp b/BamReader.cpp
index f1c7061..b5f15ef 100644
--- a/BamReader.cpp
+++ b/BamReader.cpp
@@ -1,10 +1,16 @@
-// BamReader.cpp
-
-// Derek Barnett
-// Marth Lab, Boston College
-// Last modified: 23 April 2009
+// ***************************************************************************
+// BamReader (c) 2009 Derek Barnett
+// Marth Lab, Deptartment of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
 
 #include "BamReader.h"
+//#include "STLUtilities.h"
+
+#include <cstring>
+
 #include <iostream>
 using std::cerr;
 using std::endl;
@@ -13,194 +19,185 @@ using std::endl;
 const char* BamReader::DNA_LOOKUP   = "=ACMGRSVTWYHKDBN";
 const char* BamReader::CIGAR_LOOKUP = "MIDNSHP";
 
-BamReader::BamReader(const char* filename, const char* indexFilename) 
-	: m_filename( (char*)filename )
-	, m_indexFilename( (char*)indexFilename )
-	, m_file(0)
-	, m_index(NULL)
-	, m_headerText("")
-	, m_isOpen(false)
+// constructor
+BamReader::BamReader(void)
+	: m_index(NULL)
 	, m_isIndexLoaded(false)
+	, m_alignmentsBeginOffset(0)
 	, m_isRegionSpecified(false)
-	, m_isCalculateAlignedBases(true)
 	, m_currentRefID(0)
 	, m_currentLeft(0)
-	, m_alignmentsBeginOffset(0)
-{
-	Open();
-}
+{ }
 
+// destructor
 BamReader::~BamReader(void) {
-
-	// close file
-	if ( m_isOpen ) { Close(); }
+	m_headerText.clear();
+	ClearIndex();
+	Close();
 }
 
-// open BAM file
-bool BamReader::Open(void) {
-
-	if (!m_isOpen && m_filename != NULL ) {
-
-		// open file
-		m_file = bam_open(m_filename, "r"); 
-		
-		// get header info && index info
-		if ( (m_file != NULL) && LoadHeader() ) {
-
-			// save file offset where alignments start
-			m_alignmentsBeginOffset = bam_tell(m_file);
-			
-			// set open flag
-			m_isOpen = true;
-		}
-
-		// try to open (and load) index data, if index file given
-		if ( m_indexFilename != NULL ) {
-			OpenIndex();
-		}
-	}
-
-	return m_isOpen;
+// checks BGZF block header
+bool BamReader::BgzfCheckBlockHeader(char* header) {
+
+	return (header[0] == GZIP_ID1 &&
+            header[1] == (char)GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & FLG_FEXTRA) != 0 &&
+            BgzfUnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
+            header[12] == BGZF_ID1 &&
+            header[13] == BGZF_ID2 &&
+            BgzfUnpackUnsignedShort(&header[14]) == BGZF_LEN);
 }
 
-bool BamReader::OpenIndex(void) {
-
-	if ( m_indexFilename && !m_isIndexLoaded ) {
-		m_isIndexLoaded = LoadIndex();
-	}
-	return m_isIndexLoaded;
+// closes the BAM file
+void BamReader::BgzfClose(void) {
+	m_BGZF.IsOpen = false;
+	fclose(m_BGZF.Stream);
 }
 
-// close BAM file
-bool BamReader::Close(void) {
+// de-compresses the current block
+int BamReader::BgzfInflateBlock(int blockLength) {
 	
-	if (m_isOpen) {
+	// Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
+    z_stream zs;
+    zs.zalloc    = NULL;
+    zs.zfree     = NULL;
+    zs.next_in   = (Bytef*)m_BGZF.CompressedBlock + 18;
+    zs.avail_in  = blockLength - 16;
+    zs.next_out  = (Bytef*)m_BGZF.UncompressedBlock;
+    zs.avail_out = m_BGZF.UncompressedBlockSize;
+
+    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    if (status != Z_OK) {
+        printf("inflateInit failed\n");
+        exit(1);
+    }
 
-		// close file
-		int ret = bam_close(m_file);
-		
-		// delete index info
-		if ( m_index != NULL) { delete m_index; }
+    status = inflate(&zs, Z_FINISH);
+    if (status != Z_STREAM_END) {
+        inflateEnd(&zs);
+        printf("inflate failed\n");
+        exit(1);
+    }
 
-		// clear open flag
-		m_isOpen = false;
+    status = inflateEnd(&zs);
+    if (status != Z_OK) {
+        printf("inflateEnd failed\n");
+        exit(1);
+    }
 
-		// clear index flag
-		m_isIndexLoaded = false;
+    return zs.total_out;
+}
 
-		// clear region flag
-		m_isRegionSpecified = false;
+// opens the BAM file for reading
+void BamReader::BgzfOpen(const string& filename) {
 
-		// return success/fail of bam_close
-		return (ret == 0);
-	} 
+	m_BGZF.Stream = fopen(filename.c_str(), "rb");
+	if(!m_BGZF.Stream) {
+		cerr << "ERROR: Unable to open the BAM file " << filename << "for reading." << endl;
+		exit(1);
+	}
 
-	return true;
+	m_BGZF.IsOpen = true;
 }
 
-// get BAM filename
-const char* BamReader::Filename(void) const { 
-	return (const char*)m_filename; 
-}
+// reads BGZF data into buffer
+unsigned int BamReader::BgzfRead(char* data, const unsigned int dataLength) {
 
-// set BAM filename
-void BamReader::SetFilename(const char* filename) {
-	m_filename = (char*)filename;
-}
+    if (dataLength == 0) { return 0; }
 
-// get BAM Index filename
-const char* BamReader::IndexFilename(void) const { 
-	return (const char*)m_indexFilename; 
-}
+	char* output = data;
+    unsigned int numBytesRead = 0;
+    while (numBytesRead < dataLength) {
 
-// set BAM Index filename
-void BamReader::SetIndexFilename(const char* indexFilename) {
-	m_indexFilename = (char*)indexFilename;
-}
+        int bytesAvailable = m_BGZF.BlockLength - m_BGZF.BlockOffset;
+        if (bytesAvailable <= 0) {
+            if ( BgzfReadBlock() != 0 ) { return -1; }
+            bytesAvailable = m_BGZF.BlockLength - m_BGZF.BlockOffset;
+            if ( bytesAvailable <= 0 ) { break; }
+        }
 
-// return full header text
-const string BamReader::GetHeaderText(void) const { 
-	return m_headerText; 
-}
+		char* buffer   = m_BGZF.UncompressedBlock;
+        int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
+        memcpy(output, buffer + m_BGZF.BlockOffset, copyLength);
 
-// return number of reference sequences in BAM file
-const int BamReader::GetReferenceCount(void) const { 
-	return m_references.size();
-}
+        m_BGZF.BlockOffset += copyLength;
+        output             += copyLength;
+        numBytesRead       += copyLength;
+    }
 
-// get RefID from reference name
-const int BamReader::GetRefID(string refName) const { 
-	
-	vector<string> refNames;
-	RefVector::const_iterator refIter = m_references.begin();
-    RefVector::const_iterator refEnd  = m_references.end();
-    for ( ; refIter != refEnd; ++refIter) {
-		refNames.push_back( (*refIter).RefName );
+    if ( m_BGZF.BlockOffset == m_BGZF.BlockLength ) {
+		m_BGZF.BlockAddress = ftello64(m_BGZF.Stream);						
+        m_BGZF.BlockOffset  = 0;
+        m_BGZF.BlockLength  = 0;
     }
 
-	// return 'index-of' refName (if not found, returns refNames.size())
-	return Index( refNames.begin(), refNames.end(), refName );
+	return numBytesRead;
 }
 
-const RefVector BamReader::GetReferenceData(void) const {
-	return m_references;
-}
+int BamReader::BgzfReadBlock(void) {
 
-bool BamReader::Jump(int refID, unsigned int left) {
+    char    header[BLOCK_HEADER_LENGTH];
+    int64_t blockAddress = ftello(m_BGZF.Stream);
 
-	// if index available, and region is valid
-	if ( m_isIndexLoaded && m_references.at(refID).RefHasAlignments && (left <= m_references.at(refID).RefLength) ) { 
-		m_currentRefID = refID;
-		m_currentLeft  = left;
-		m_isRegionSpecified = true;
-		
-		int64_t offset = GetOffset(m_currentRefID, m_currentLeft);
-		if ( offset == -1 ) { return false; }
-		else { return ( bam_seek(m_file, offset, SEEK_SET) == 0 ); }
-	}
-	return false;
-}
+    int count = fread(header, 1, sizeof(header), m_BGZF.Stream);
+	if (count == 0) {
+        m_BGZF.BlockLength = 0;
+        return 0;
+    }
 
-bool BamReader::Rewind(void) {
+    if (count != sizeof(header)) {
+        printf("read block failed - count != sizeof(header)\n");
+        return -1;
+    }
 
-	int refID = 0;
-	int refCount = m_references.size();
-	for ( ; refID < refCount; ++refID ) {
-		if ( m_references.at(refID).RefHasAlignments ) { break; } 
-	}
+    if (!BgzfCheckBlockHeader(header)) {
+        printf("read block failed - CheckBgzfBlockHeader() returned false\n");
+        return -1;
+    }
 
-	m_currentRefID = refID;
-	m_currentLeft = 0;
-	m_isRegionSpecified = false;
+    int blockLength = BgzfUnpackUnsignedShort(&header[16]) + 1;
+    char* compressedBlock = m_BGZF.CompressedBlock;
+    memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
+    int remaining = blockLength - BLOCK_HEADER_LENGTH;
 
-	return ( bam_seek(m_file, m_alignmentsBeginOffset, SEEK_SET) == 0 );
-}	
+    count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, m_BGZF.Stream);
+    if (count != remaining) {
+        printf("read block failed - count != remaining\n");
+        return -1;
+    }
 
-// get next alignment from specified region
-bool BamReader::GetNextAlignment(BamAlignment& bAlignment) {
+    count = BgzfInflateBlock(blockLength);
+    if (count < 0) { return -1; }
 
-	// try to load 'next' read
-	if ( LoadNextAlignment(bAlignment) ) {
+    if (m_BGZF.BlockLength != 0) {
+        m_BGZF.BlockOffset = 0;
+    }
 
-		// if specified region, check for region overlap
-		if ( m_isRegionSpecified ) {
+    m_BGZF.BlockAddress = blockAddress;
+    m_BGZF.BlockLength  = count;
+    return 0;
+}
 
-			// if overlap, return true
-			if ( IsOverlap(bAlignment) ) { return true; }
-			// if not, try the next alignment
-			else { return GetNextAlignment(bAlignment); }
-		} 
+// move file pointer to specified offset
+bool BamReader::BgzfSeek(int64_t position) {
 
-		// not using region, valid read detected, return success
-		else { return true; }
-	}
+	int     blockOffset  = (position & 0xFFFF);
+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+    if (fseeko(m_BGZF.Stream, blockAddress, SEEK_SET) != 0) {
+        printf("ERROR: Unable to seek in BAM file\n");
+		exit(1);
+    }
 
-	// no valid alignment to load
-	return false;
+    m_BGZF.BlockLength  = 0;
+    m_BGZF.BlockAddress = blockAddress;
+    m_BGZF.BlockOffset  = blockOffset;
+	return true;
 }
 
-void BamReader::SetCalculateAlignedBases(bool flag) {
-	m_isCalculateAlignedBases = flag;
+// get file position in BAM file
+int64_t BamReader::BgzfTell(void) {
+	return ( (m_BGZF.BlockAddress << 16) | (m_BGZF.BlockOffset & 0xFFFF) );
 }
 
 int BamReader::BinsFromRegion(int refID, unsigned int left, uint16_t list[MAX_BIN]) {
@@ -225,108 +222,152 @@ int BamReader::BinsFromRegion(int refID, unsigned int left, uint16_t list[MAX_BI
 	return i;
 }
 
-uint32_t BamReader::CalculateAlignmentEnd(const unsigned int& position, const vector<CigarOp>& cigarData) {
+unsigned int BamReader::CalculateAlignmentEnd(const unsigned int& position, const vector<CigarOp>& cigarData) {
 
 	// initialize alignment end to starting position
-	uint32_t alignEnd = position;
+	unsigned int alignEnd = position;
 
 	// iterate over cigar operations
 	vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
 	vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
 	for ( ; cigarIter != cigarEnd; ++cigarIter) {
-		if ( (*cigarIter).Type == 'M' || (*cigarIter).Type == 'D' || (*cigarIter).Type == 'N') {
+		char cigarType = (*cigarIter).Type;
+		if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
 			alignEnd += (*cigarIter).Length;
 		}
 	}
 	return alignEnd;
 }
 
-int64_t BamReader::GetOffset(int refID, unsigned int left) {
+void BamReader::ClearIndex(void) {
+
+	if ( m_index ) {
+		// iterate over references
+		vector<RefIndex*>::iterator refIter = m_index->begin();
+		vector<RefIndex*>::iterator refEnd  = m_index->end();
+		for ( ; refIter != refEnd; ++refIter) {
+			RefIndex* aRef = (*refIter);
+			if ( aRef ) {
+				// clear out BAM bins
+				if ( aRef->first ) {
+					BinVector::iterator binIter = (aRef->first)->begin();
+					BinVector::iterator binEnd  = (aRef->first)->end();
+					for ( ; binIter != binEnd; ++binIter ) {
+						ChunkVector* chunks = (*binIter).second;
+						if ( chunks ) { delete chunks; }
+					}
+					delete aRef->first;
+				}
+				// clear BAM linear offsets
+				if ( aRef->second ) { delete aRef->second; }
+				delete aRef;
+			}
+		}
+		delete m_index;
+	}
+}
 
-	//  make space for bins
-	uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2); 		
-	
-	// returns number of bins overlapping (left, right)
-	// stores indices of those bins in 'bins'
-	int numBins = BinsFromRegion(refID, left, bins);				
+// closes the BAM file
+void BamReader::Close(void) {
+	if(m_BGZF.IsOpen) { BgzfClose(); }
+	if(m_index)       { delete m_index; m_index = NULL; }
+	m_isRegionSpecified = false;
+}
 
-	// access index data for refID
-	RefIndex* refIndex = m_index->at(refID);
+const string BamReader::GetHeaderText(void) const {
+	return m_headerText;
+}
 
-	// get list of BAM bins for this reference sequence
-	BinVector* refBins = refIndex->first;
+const int BamReader::GetReferenceCount(void) const {
+	return m_references.size();
+}
 
-	sort( refBins->begin(), refBins->end(), LookupKeyCompare<uint32_t, ChunkVector*>() );
+const RefVector BamReader::GetReferenceData(void) const {
+	return m_references;
+}
 
-	// declare ChunkVector
-	ChunkVector regionChunks;
+const int BamReader::GetReferenceID(const string& refName) const {
 
-	// declaure LinearOffsetVector
-	LinearOffsetVector* linearOffsets = refIndex->second;
+	// retrieve names from reference data
+	vector<string> refNames;
+	RefVector::const_iterator refIter = m_references.begin();
+    RefVector::const_iterator refEnd  = m_references.end();
+    for ( ; refIter != refEnd; ++refIter) {
+		refNames.push_back( (*refIter).RefName );
+    }
 
-	// some sort of linear offset vs bin index voodoo... not completely sure what's going here
-	uint64_t minOffset = ((left>>BAM_LIDX_SHIFT) >= linearOffsets->size()) ? 0 : linearOffsets->at(left>>BAM_LIDX_SHIFT);
+	// return 'index-of' refName ( if not found, returns refNames.size() )
+	return Index( refNames.begin(), refNames.end(), refName );
+}
 
-	BinVector::iterator binBegin = refBins->begin();
-	BinVector::iterator binEnd   = refBins->end();
+// get next alignment (from specified region, if given)
+bool BamReader::GetNextAlignment(BamAlignment& bAlignment) {
 
-	// iterate over bins overlapping region, count chunks
-	for (int i = 0; i < numBins; ++i) {
+	// if valid alignment available
+	if ( LoadNextAlignment(bAlignment) ) {
 		
-		// look for bin with ID=bin[i]
-		BinVector::iterator binIter = binBegin;
-
-		for ( ; binIter != binEnd; ++binIter ) {
+		// if region not specified, return success
+		if ( !m_isRegionSpecified ) { return true; }
 		
-			// if found, increment n_off by number of chunks for each bin
-			if ( (*binIter).first == (uint32_t)bins[i] ) { 
-				
-				// iterate over chunks in that bin
-				ChunkVector* binChunks = (*binIter).second;
-				
-				ChunkVector::iterator chunkIter = binChunks->begin();
-				ChunkVector::iterator chunkEnd  = binChunks->end();
-				for ( ; chunkIter != chunkEnd; ++chunkIter) {
-				
-					// if right bound of pair is greater than min_off (linear offset value), store pair
-					if ( (*chunkIter).second > minOffset) { 
-						regionChunks.push_back( (*chunkIter) ); 
-					}
-				}
-			}
+		// load next alignment until region overlap is found
+		while ( !IsOverlap(bAlignment) ) {
+			// if no valid alignment available (likely EOF) return failure
+			if ( !LoadNextAlignment(bAlignment) ) { return false; }
 		}
-	}
+		
+		// return success (alignment found that overlaps region)
+		return true;
+	} 
+	
+	// no valid alignment
+	else { return false; }
+}
 
-	// clean up temp array
-	free(bins);
+int64_t BamReader::GetOffset(int refID, unsigned int left) {
 
-	// there should be at least 1
-	if(regionChunks.size() == 0) { return -1; }
+	// calculate which bins overlap this region
+	uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
+	int numBins = BinsFromRegion(refID, left, bins);
 
-	// sort chunks by start position
-	sort ( regionChunks.begin(), regionChunks.end(), LookupKeyCompare<uint64_t, uint64_t>() );
+	// get bins for this reference
+	RefIndex* refIndex = m_index->at(refID);
+	BinVector* refBins = refIndex->first;
 
-	// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
-	int numOffsets = regionChunks.size();	
-	for (int i = 1; i < numOffsets; ++i) {
-		if ( regionChunks.at(i-1).second >= regionChunks.at(i).first ) {
-			regionChunks.at(i-1).second = regionChunks.at(i).first;
-		}
-	}
+	// get minimum offset to consider
+	LinearOffsetVector* linearOffsets = refIndex->second;
+	uint64_t minOffset = ((left>>BAM_LIDX_SHIFT) >= linearOffsets->size()) ? 0 : linearOffsets->at(left>>BAM_LIDX_SHIFT);
+
+	// store offsets to beginning of alignment 'chunks' 
+	std::vector<int64_t> chunkStarts;
 	
-	// merge adjacent chunks
-	int l = 0;
-	for (int i = 1; i < numOffsets; ++i) {
-		// if adjacent, expand boundaries of (merged) chunk
-		if ( (regionChunks.at(l).second>>16) == (regionChunks.at(i).first>>16) ) {
-			regionChunks.at(l).second = regionChunks.at(i).second;
+	// reference bin iterators
+	BinVector::const_iterator binIter;
+	BinVector::const_iterator binBegin = refBins->begin();
+	BinVector::const_iterator binEnd   = refBins->end();
+	
+	// store all alignment 'chunk' starts for bins in this region
+	for (int i = 0; i < numBins; ++i ) {
+		binIter = lower_bound(binBegin, binEnd, bins[i], LookupKeyCompare<uint32_t, ChunkVector*>() );
+		if ( (binIter != binEnd) && ( (*binIter).first == bins[i]) ) {
+			ChunkVector* binChunks = (*binIter).second;
+			ChunkVector::const_iterator chunkIter = binChunks->begin();
+			ChunkVector::const_iterator chunkEnd  = binChunks->end();
+			for ( ; chunkIter != chunkEnd; ++chunkIter) {
+				if ( (*chunkIter).second > minOffset ) {
+					chunkStarts.push_back( (*chunkIter).first );
+				}	
+			}
 		}
-		// else, move on to next (merged) chunk index
-		else { regionChunks.at(++l) = regionChunks.at(i); }
 	}
-
-	// return beginning file offset of first chunk for region
-	return (int64_t)regionChunks.at(0).first;
+	
+	// clean up memory
+	free(bins);
+	
+	// if no alignments found
+	if ( chunkStarts.empty() ) { return -1; }
+	
+	// else return smallest offset for alignment starts 
+	else { return *min_element(chunkStarts.begin(), chunkStarts.end()); }
 }
 
 bool BamReader::IsOverlap(BamAlignment& bAlignment) {
@@ -337,92 +378,65 @@ bool BamReader::IsOverlap(BamAlignment& bAlignment) {
 	// read starts after left boundary
 	if ( bAlignment.Position >= m_currentLeft) { return true; }
 
-	// get alignment end
-	uint32_t alignEnd = CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData);
-
 	// return whether alignment end overlaps left boundary
-	return ( alignEnd >= m_currentLeft );
+	return ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) >= m_currentLeft );
 }
 
-bool BamReader::LoadHeader(void) {
+bool BamReader::Jump(int refID, unsigned int left) {
+
+	// if index available, and region is valid
+	if ( (m_index->size() != 0) && m_references.at(refID).RefHasAlignments && (left <= m_references.at(refID).RefLength) ) { 
+		m_currentRefID = refID;
+		m_currentLeft  = left;
+		m_isRegionSpecified = true;
+		
+		int64_t offset = GetOffset(m_currentRefID, m_currentLeft);
+		if ( offset == -1 ) { return false; }
+		else { return BgzfSeek(offset); }
+	}
+	return false;
+}
 
+void BamReader::LoadHeaderData(void) {
+	
 	// check to see if proper BAM header
-	char buf[4];
-	if (bam_read(m_file, buf, 4) != 4) { return false; }
-	if (strncmp(buf, "BAM\001", 4)) {
-		fprintf(stderr, "wrong header type!\n");
-		return false;
+	char buffer[4];
+	if (BgzfRead(buffer, 4) != 4) { 
+		printf("Could not read header type\n");
+		exit(1); 
+	}
+	if (strncmp(buffer, "BAM\001", 4)) {
+		printf("wrong header type!\n");
+		exit(1);
 	}
 	
 	// get BAM header text length
-	int32_t headerTextLength;
-	bam_read(m_file, &headerTextLength, 4);
+	BgzfRead(buffer, 4);
+	const unsigned int headerTextLength = BgzfUnpackUnsignedInt(buffer);
 
 	// get BAM header text
 	char* headerText = (char*)calloc(headerTextLength + 1, 1);
-	bam_read(m_file, headerText, headerTextLength);
+	BgzfRead(headerText, headerTextLength);
 	m_headerText = (string)((const char*)headerText);
 	
 	// clean up calloc-ed temp variable
 	free(headerText);
-
-	// get number of reference sequences
-	int32_t numberRefSeqs;
-	bam_read(m_file, &numberRefSeqs, 4);
-	if (numberRefSeqs == 0) { return false; }
-
-	m_references.reserve((int)numberRefSeqs);
-	
-	// reference variables
-	int32_t  refNameLength;
-	char*    refName;
-	uint32_t refLength;
-
-	// iterate over all references in header
-	for (int i = 0; i != numberRefSeqs; ++i) {
-
-		// get length of reference name
-		bam_read(m_file, &refNameLength, 4);
-		refName = (char*)calloc(refNameLength, 1);
-
-		// get reference name and reference sequence length
-		bam_read(m_file, refName, refNameLength);
-		bam_read(m_file, &refLength, 4);
-
-		// store data for reference
-		RefData aReference;
-		aReference.RefName   = (string)((const char*)refName);
-		aReference.RefLength = refLength;
-		m_references.push_back(aReference);
-
-		// clean up calloc-ed temp variable
-		free(refName);
-	}
-	
-	return true;
 }
 
-bool BamReader::LoadIndex(void) {
-
-	// check to see if index file exists
-	FILE* indexFile;
-	if ( ( indexFile = fopen(m_indexFilename, "r") ) == 0 ) {
-		fprintf(stderr, "The alignment is not indexed. Please run SAMtools \'index\' command first.\n");
-		return false;
-	}
+void BamReader::LoadIndexData(FILE* indexStream) {
 
 	// see if index is valid BAM index
 	char magic[4];
-	fread(magic, 1, 4, indexFile);
+	fread(magic, 1, 4, indexStream);
 	if (strncmp(magic, "BAI\1", 4)) {
-		fprintf(stderr, "Problem with index - wrong \'magic\' number.\n");
-		fclose(indexFile);
-		return false;
+		printf("Problem with index file - invalid format.\n");
+		fclose(indexStream);
+		exit(1);
 	}
 
 	// get number of reference sequences
 	uint32_t numRefSeqs;
-	fread(&numRefSeqs, 4, 1, indexFile);
+	fread(&numRefSeqs, 4, 1, indexStream);
 	
 	// intialize BamIndex data structure
 	m_index = new BamIndex;
@@ -433,7 +447,7 @@ bool BamReader::LoadIndex(void) {
 		
 		// get number of bins for this reference sequence
 		int32_t numBins;
-		fread(&numBins, 4, 1, indexFile);
+		fread(&numBins, 4, 1, indexStream);
 		
 		if (numBins > 0) { m_references.at(i).RefHasAlignments = true; }
 
@@ -446,11 +460,11 @@ bool BamReader::LoadIndex(void) {
 			
 			// get binID 
 			uint32_t binID;
-			fread(&binID, 4, 1, indexFile);
+			fread(&binID, 4, 1, indexStream);
 			
 			// get number of regionChunks in this bin
 			uint32_t numChunks;
-			fread(&numChunks, 4, 1, indexFile);
+			fread(&numChunks, 4, 1, indexStream);
 			
 			// intialize ChunkVector
 			ChunkVector* regionChunks = new ChunkVector;
@@ -462,22 +476,28 @@ bool BamReader::LoadIndex(void) {
 				// get chunk boundaries (left, right) 
 				uint64_t left;
 				uint64_t right;
-				fread(&left, 8, 1, indexFile);
-				fread(&right, 8, 1, indexFile);
+				fread(&left, 8, 1, indexStream);
+				fread(&right, 8, 1, indexStream);
 				
 				// save ChunkPair
 				regionChunks->push_back( ChunkPair(left, right) );
 			}
 			
+			// sort chunks for this bin
+			sort( regionChunks->begin(), regionChunks->end(), LookupKeyCompare<uint64_t, uint64_t>() );
+
 			// save binID, chunkVector for this bin
 			bins->push_back( BamBin(binID, regionChunks) );
 		}
 		
+		// sort bins by binID
+		sort(bins->begin(), bins->end(), LookupKeyCompare<uint32_t, ChunkVector*>() );
+
 		// load linear index for this reference sequence
 		
 		// get number of linear offsets
 		int32_t numLinearOffsets;
-		fread(&numLinearOffsets, 4, 1, indexFile);
+		fread(&numLinearOffsets, 4, 1, indexStream);
 		
 		// intialize LinearOffsetVector
 		LinearOffsetVector* linearOffsets = new LinearOffsetVector;
@@ -487,179 +507,233 @@ bool BamReader::LoadIndex(void) {
 		for (int j = 0; j < numLinearOffsets; ++j) {
 			// get a linear offset
 			uint64_t linearOffset;
-			fread(&linearOffset, 8, 1, indexFile);
+			fread(&linearOffset, 8, 1, indexStream);
 			// store linear offset
 			linearOffsets->push_back(linearOffset);
 		}
 		
+		// sort linear offsets
+		sort( linearOffsets->begin(), linearOffsets->end() );
+
 		// store index data for that reference sequence
 		m_index->push_back( new RefIndex(bins, linearOffsets) );
 	}
 	
 	// close index file (.bai) and return
-	fclose(indexFile);
-	return true;
+	fclose(indexStream);
 }
 
 bool BamReader::LoadNextAlignment(BamAlignment& bAlignment) {
 
-	// check valid alignment block header data
-	int32_t block_len;
-	int32_t ret;
-	uint32_t x[8];
-
-	int32_t bytesRead = 0;
-
 	// read in the 'block length' value, make sure it's not zero
-	if ( (ret = bam_read(m_file, &block_len, 4)) == 0 )        { return false; }
-	bytesRead += 4;
+	char buffer[4];
+	BgzfRead(buffer, 4);
+	const unsigned int blockLength = BgzfUnpackUnsignedInt(buffer);
+	if ( blockLength == 0 ) { return false; }
+
+	// keep track of bytes read as method progresses
+	int bytesRead = 4;
 
-	// read in core alignment data, make the right size of data was read 
-	if ( bam_read(m_file, x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }
+	// read in core alignment data, make sure the right size of data was read 
+	char x[BAM_CORE_SIZE];
+	if ( BgzfRead(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }
 	bytesRead += BAM_CORE_SIZE;
 
-	// set BamAlignment 'core' data
-	bAlignment.RefID         = x[0]; 
-	bAlignment.Position      = x[1];
-	bAlignment.Bin           = x[2]>>16; 
-	bAlignment.MapQuality    = x[2]>>8&0xff; 
-	bAlignment.AlignmentFlag = x[3]>>16; 
-	bAlignment.MateRefID     = x[5]; 
-	bAlignment.MatePosition  = x[6]; 
-	bAlignment.InsertSize    = x[7];
-
-	// fetch & store often-used lengths for character data parsing
-	unsigned int queryNameLength     = x[2]&0xff;
-	unsigned int numCigarOperations  = x[3]&0xffff;
-	unsigned int querySequenceLength = x[4];
+	// set BamAlignment 'core' data and character data lengths
+	unsigned int tempValue;
+	unsigned int queryNameLength;
+	unsigned int numCigarOperations;
+	unsigned int querySequenceLength;
+
+	bAlignment.RefID    = BgzfUnpackUnsignedInt(&x[0]);
+	bAlignment.Position = BgzfUnpackUnsignedInt(&x[4]);
+
+	tempValue             = BgzfUnpackUnsignedInt(&x[8]);		
+	bAlignment.Bin        = tempValue >> 16;
+	bAlignment.MapQuality = tempValue >> 8 & 0xff;
+	queryNameLength       = tempValue & 0xff;
+
+	tempValue                = BgzfUnpackUnsignedInt(&x[12]);	
+	bAlignment.AlignmentFlag = tempValue >> 16;
+	numCigarOperations       = tempValue & 0xffff;
+
+	querySequenceLength     = BgzfUnpackUnsignedInt(&x[16]);
+	bAlignment.MateRefID    = BgzfUnpackUnsignedInt(&x[20]);
+	bAlignment.MatePosition = BgzfUnpackUnsignedInt(&x[24]);
+	bAlignment.InsertSize   = BgzfUnpackUnsignedInt(&x[28]);
+
+	// calculate lengths/offsets
+	const unsigned int dataLength      = blockLength - BAM_CORE_SIZE;
+	const unsigned int cigarDataOffset = queryNameLength;
+	const unsigned int seqDataOffset   = cigarDataOffset + (numCigarOperations * 4);
+	const unsigned int qualDataOffset  = seqDataOffset + (querySequenceLength+1)/2;
+	const unsigned int tagDataOffset   = qualDataOffset + querySequenceLength;
+	const unsigned int tagDataLen      = dataLength - tagDataOffset;
 	
-	// get length of character data
-	int dataLength = block_len - BAM_CORE_SIZE;
-
 	// set up destination buffers for character data
-	uint8_t*  allCharData = (uint8_t*)calloc(sizeof(uint8_t), dataLength);
-	uint32_t* cigarData   = (uint32_t*)(allCharData+queryNameLength);
-
-	const unsigned int tagDataOffset = (numCigarOperations * 4) + queryNameLength + (querySequenceLength + 1) / 2 + querySequenceLength;
-	const unsigned int tagDataLen = dataLength - tagDataOffset;
-	char* tagData = ((char*)allCharData) + tagDataOffset;
+	char* allCharData   = (char*)calloc(sizeof(char), dataLength);
+	uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
+	char* seqData       = ((char*)allCharData) + seqDataOffset;
+	char* qualData      = ((char*)allCharData) + qualDataOffset;
+	char* tagData       = ((char*)allCharData) + tagDataOffset;
 	
 	// get character data - make sure proper data size was read
-	if (bam_read(m_file, allCharData, dataLength) != dataLength) { return false; }
+	if ( BgzfRead(allCharData, dataLength) != dataLength) { return false; }
 	else {
-
+		
 		bytesRead += dataLength;
-
-		// clear out bases, qualities, aligned bases, CIGAR, and tag data
+		
+		// clear out any previous string data
+		bAlignment.Name.clear();
 		bAlignment.QueryBases.clear();
 		bAlignment.Qualities.clear();
 		bAlignment.AlignedBases.clear();
 		bAlignment.CigarData.clear();
 		bAlignment.TagData.clear();
-
+		
 		// save name
 		bAlignment.Name = (string)((const char*)(allCharData));
 		
-		// save bases
-		char singleBase[2];
-		uint8_t* s = ( allCharData + (numCigarOperations*4) + queryNameLength);
-		for (unsigned int i = 0; i < querySequenceLength; ++i) { 
-			// numeric to char conversion
-			sprintf( singleBase, "%c", DNA_LOOKUP[ ( ( s[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ] );
-			// append character data to Bases
-			bAlignment.QueryBases.append( (const char*)singleBase );
+		// save query sequence
+		for (unsigned int i = 0; i < querySequenceLength; ++i) { 	
+			char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+			bAlignment.QueryBases.append( 1, singleBase );
 		}
 		
 		// save sequence length
 		bAlignment.Length = bAlignment.QueryBases.length();
 		
 		// save qualities
-		char singleQuality[4];
-		uint8_t* t = ( allCharData + (numCigarOperations*4) + queryNameLength + (querySequenceLength + 1)/2);
 		for (unsigned int i = 0; i < querySequenceLength; ++i) { 
-			// numeric to char conversion
-			sprintf( singleQuality, "%c", ( t[i]+33 ) ); 
-			// append character data to Qualities
-			bAlignment.Qualities.append( (const char*)singleQuality );
+			char singleQuality = (char)(qualData[i]+33);			// conversion from QV to FASTQ character
+			bAlignment.Qualities.append( 1, singleQuality );
 		}
 		
 		// save CIGAR-related data;
 		int k = 0;
 		for (unsigned int i = 0; i < numCigarOperations; ++i) {
-
+			
 			// build CigarOp struct
 			CigarOp op;
 			op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);
 			op.Type   = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];
-
+			
 			// save CigarOp
 			bAlignment.CigarData.push_back(op);
-
-			// can skip this step if user wants to ignore
-			if (m_isCalculateAlignedBases) {
-
-				// build AlignedBases string
-				switch (op.Type) {
-					
-					case ('M') : 
-					case ('I') : bAlignment.AlignedBases.append( bAlignment.QueryBases.substr(k, op.Length) ); 	// for 'M', 'I' - write bases
-					case ('S') : k += op.Length;                                                     			// for 'S' - skip over query bases
-								 break;
-					
-					case ('D') : 
-					case ('P') : bAlignment.AlignedBases.append( op.Length, '*' );	// for 'D', 'P' - write padding;
-								 break;
-					
-					case ('N') : bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in query sequence
-								 k += op.Length;
-								 break;
-
-					case ('H') : break; 											// for 'H' - do nothing, move to next op
-					
-					default    : assert(false);	// shouldn't get here
-								 break;
-				}
+			
+			// build AlignedBases string
+			switch (op.Type) {
+				
+				case ('M') : 
+				case ('I') : bAlignment.AlignedBases.append( bAlignment.QueryBases.substr(k, op.Length) ); 	// for 'M', 'I' - write bases
+				case ('S') : k += op.Length;                                                     			// for 'S' - skip over query bases
+							 break;
+							 
+				case ('D') : 
+				case ('P') : bAlignment.AlignedBases.append( op.Length, '*' );	// for 'D', 'P' - write padding;
+							 break;
+							 
+				case ('N') : bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in query sequence
+							 k += op.Length;
+							 break;
+							 
+				case ('H') : break; 											// for 'H' - do nothing, move to next op
+							 
+				default    : printf("ERROR: Invalid Cigar op type\n");			// shouldn't get here
+							 exit(1);
 			}
 		}
-
+		
 		// read in the tag data
 		bAlignment.TagData.resize(tagDataLen);
 		memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLen);
 	}
+
 	free(allCharData);
+	return true;
+}
 
-/*
-	// (optional) read tag parsing data
-	string tag;
-	char data;
-	int i = 0;
+void BamReader::LoadReferenceData(void) {
 
-	// still data to read (auxiliary tags)
-	while ( bytesRead < block_len ) {
+	// get number of reference sequences
+	char buffer[4];
+	BgzfRead(buffer, 4);
+	const unsigned int numberRefSeqs = BgzfUnpackUnsignedInt(buffer);
+	if (numberRefSeqs == 0) { return; }
+	m_references.reserve((int)numberRefSeqs);
+	
+	// iterate over all references in header
+	for (unsigned int i = 0; i != numberRefSeqs; ++i) {
 
-		if ( bam_read(m_file, &data, 1) == 1 ) { 
-			
-			++bytesRead;
+		// get length of reference name
+		BgzfRead(buffer, 4);
+		const unsigned int refNameLength = BgzfUnpackUnsignedInt(buffer);
+		char* refName = (char*)calloc(refNameLength, 1);
+		
+		// get reference name and reference sequence length
+		BgzfRead(refName, refNameLength);
+		BgzfRead(buffer, 4);
+		const unsigned int refLength = BgzfUnpackUnsignedInt(buffer);
+		
+		// store data for reference
+		RefData aReference;
+		aReference.RefName   = (string)((const char*)refName);
+		aReference.RefLength = refLength;
+		m_references.push_back(aReference);
+		
+		// clean up calloc-ed temp variable
+		free(refName);
+	}
+}
 
-			if (bytesRead == block_len && data != '\0') {
-				fprintf(stderr, "ERROR: Invalid termination of tag info at end of alignment block.\n");
-				exit(1);
-			}
+// opens BAM file (and index)
+void BamReader::Open(const string& filename, const string& indexFilename) {
 
-			tag.append(1, data);
-			if ( data == '\0' ) {
-				bAlignment.Tags.push_back(tag);
-				tag = "";
-				i = 0;
-			} else {
-				if ( (i == 1) && (i == 2) ) { tag.append(1, ':'); }
-				++i;
-			}
-		} else {
-			fprintf(stderr, "ERROR: Could not read tag info.\n");
+	// open the BGZF file for reading, retrieve header text & reference data
+	BgzfOpen(filename);
+	LoadHeaderData();	
+	LoadReferenceData();
+
+	// store file offset of first alignment
+	m_alignmentsBeginOffset = BgzfTell();
+
+	// open index file & load index data (if exists)
+	OpenIndex(indexFilename);
+}
+
+void BamReader::OpenIndex(const string& indexFilename) {
+
+	// if index file exists
+	if (!indexFilename.empty()) {
+
+		// open index
+		FILE* indexStream = fopen(indexFilename.c_str(), "rb");
+		
+		// abort on error
+		if(!indexStream) {
+			cerr << "ERROR: Unable to open the BAM index file " << indexFilename << "for reading." << endl;
 			exit(1);
 		}
+	
+		// build up index data structure
+		LoadIndexData(indexStream);
 	}
-*/
-	return true;
 }
+
+bool BamReader::Rewind(void) {
+
+	// find first reference that has alignments in the BAM file
+	int refID = 0;
+	int refCount = m_references.size();
+	for ( ; refID < refCount; ++refID ) {
+		if ( m_references.at(refID).RefHasAlignments ) { break; } 
+	}
+
+	// store default bounds for first alignment
+	m_currentRefID = refID;
+	m_currentLeft = 0;
+	m_isRegionSpecified = false;
+
+	// return success/failure of seek
+	return BgzfSeek(m_alignmentsBeginOffset);
+}	
diff --git a/BamReader.h b/BamReader.h
index e2b4598..17cb86a 100644
--- a/BamReader.h
+++ b/BamReader.h
@@ -1,242 +1,259 @@
-// BamReader.h
-
-/* The MIT License
-
-   Copyright (c) 2008 Genome Research Ltd (GRL).
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/*
-	Implementation of BAM-parsing was translated to C++ directly from Heng Li's SAMtools package 
-        (thus the carryover of above MIT license)
-	Contact: Derek Barnett <barnetde@bc.edu>
-*/
-
-// Derek Barnett
-// Marth Lab, Boston College
-// Last modified: 23 April 2009
-
-#ifndef BAMREADER_H
-#define BAMREADER_H
-
-// custom includes
-#include "BamAlignment.h"
-#include "STLUtilities.h"
-
-// C library includes
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef WIN32
-typedef char                 int8_t;
-typedef unsigned char       uint8_t;
-typedef short               int16_t;
-typedef unsigned short     uint16_t;
-typedef int                 int32_t;
-typedef unsigned int       uint32_t;
-typedef long long           int64_t;
-typedef unsigned long long uint64_t;
-#else
-#include <stdint.h>
-#endif
-
-// BGZF library includes/defines
-#include "bgzf.h"
-typedef BGZF* BamFile;
-#define bam_open(f_name, mode)      bgzf_open(f_name, mode)
-#define bam_close(f_ptr)            bgzf_close(f_ptr)
-#define bam_read(f_ptr, buf, size)  bgzf_read(f_ptr, buf, size)
-#define bam_write(f_ptr, buf, size) bgzf_write(f_ptr, buf, size)
-#define bam_tell(f_ptr)             bgzf_tell(f_ptr)
-#define bam_seek(f_ptr, pos, dir)   bgzf_seek(f_ptr, pos, dir)
-
-// size of alignment data block in BAM file (bytes)
-#define BAM_CORE_SIZE 32
-
-// BAM indexing constants
-#define MAX_BIN           37450 // =(8^6-1)/7+1
-#define BAM_MIN_CHUNK_GAP 32768 
-#define BAM_LIDX_SHIFT    14
-
-// CIGAR-retrieval mask/shift constants
-#define BAM_CIGAR_SHIFT 4
-#define BAM_CIGAR_MASK  ((1 << BAM_CIGAR_SHIFT) - 1)
-
-// CIGAR-operation types
-#define BAM_CMATCH      0
-#define BAM_CINS        1
-#define BAM_CDEL        2
-#define BAM_CREF_SKIP   3
-#define BAM_CSOFT_CLIP  4
-#define BAM_CHARD_CLIP  5
-#define BAM_CPAD        6
-
-// --------------------------- //
-// Bam header info
-// --------------------------- //
-
-// --------------------------- //
-// BamIndex-related typedefs
-// --------------------------- //
-
-// offset for linear indexing
-typedef vector<uint64_t> LinearOffsetVector;
-
-// chunk boundaries
-typedef pair<uint64_t, uint64_t> ChunkPair;
-// list of chunks in a BAM bin
-typedef vector<ChunkPair> ChunkVector;
-
-// BAM bins for a reference sequence
-// replaces khash - uint32_t is key, ChunkVector is value
-typedef pair<uint32_t, ChunkVector*> BamBin;
-typedef vector<BamBin> BinVector;
-
-// each reference sequence has a BinVector and LinearOffsetVector
-typedef pair<BinVector*, LinearOffsetVector*> RefIndex;
-
-// full BamIndex defined as: 
-typedef vector<RefIndex*> BamIndex;
-
-// ---------------------------------------------------------------------------//
-
-class BamReader {
-	
-	public:
-		// constructors
-		BamReader(const char* fileName = NULL, const char* indexFilename = NULL);
-
-	public:
-		// destructor
-		~BamReader(void);
-	
-	// BAM interface methods
-	public:
-
-		// ----------------------- //
-		// File manipulation
-		// ----------------------- //
-		
-		// open BAM file (automatically opens index if provided)
-		bool Open(void);
-		
-		// open BAM index (allows index to be opened separately - i.e. sometime after the BAM file is opened)
-		bool OpenIndex(void);
-		
-		// close BAM file
-		bool Close(void);
-		
-		// get BAM filename
-		const char* Filename(void) const;
-	
-		// set BAM filename
-		void SetFilename(const char*);
-
-		// get BAM Index filename
-		const char* IndexFilename(void) const;
-		
-		// set BAM Index filename
-		void SetIndexFilename(const char*);
-
-		// ----------------------- //
-		// Access BAM header
-		// ----------------------- //
-		
-		// return full header text
-		const string GetHeaderText(void) const;
-		
-		// --------------------------------- //
-		// Access reference sequence info
-		// --------------------------------- //
-		
-		// return number of reference sequences in BAM file
-		const int GetReferenceCount(void) const;
-
-		// return vector of RefData entries
-		const RefVector GetReferenceData(void) const;
-
-		// get refID from reference name
-		const int GetRefID(string refName) const;		
-		
-		// ----------------------------------------- //
-		// File position moving
-		// ----------------------------------------- //
-
-		// jumps to 'left' position on refID
-		// actually jumps before position, so reads that overlap 'left' are included as well
-		// 'left' defaults to reference begin if not specified
-		bool Jump(int refID, unsigned int left = 0);
-
-		// Jump to beginning of BAM file, clears any region previously set by Jump()
-		bool Rewind(void);
-		
-		// ------------------------------ //
-		// Access alignments
-		// ------------------------------ //
-		
-		// get next alignment
-		bool GetNextAlignment(BamAlignment& read);
-
-		// allow user to specifiy whether 'AlignedBases' string is calculated when alignment is loaded
-		void SetCalculateAlignedBases(bool);
-
-	// internal utility methods
-	private:
-		int      BinsFromRegion(int, unsigned int, uint16_t[MAX_BIN]);
-		uint32_t CalculateAlignmentEnd(const unsigned int&, const vector<CigarOp>&);
-		int64_t  GetOffset(int, unsigned int);
-		bool     IsOverlap(BamAlignment&);
-		bool     LoadHeader(void);
-		bool     LoadIndex(void);
-		bool     LoadNextAlignment(BamAlignment&);
-
-	private:	
-		// main BAM reader components
-		char*      m_filename;
-		char*      m_indexFilename;
-		BamFile    m_file;
-		BamIndex*  m_index;
-		RefVector  m_references;
-		string     m_headerText;
-
-		// state flags
-		bool m_isOpen;			         // BAM file is open for processing
-		bool m_isIndexLoaded;	         // BAM Index data is loaded and available for processing
-		bool m_isRegionSpecified;        // a region has been specified - specifically, a user has called Jump()
-		bool m_isCalculateAlignedBases;  // build 'AlignedBases' string when getting an alignment, otherwise skip (default = true)
-
-		// region values
-		int m_currentRefID;
-		unsigned int m_currentLeft;
-
-		// file offset of 1st read in BAM file
-		int64_t m_alignmentsBeginOffset;
-
-	private:
-		// BAM character constants
-		static const char* DNA_LOOKUP;
-		static const char* CIGAR_LOOKUP;
-};
-
-#endif /* BAMREADER_H */
+// ***************************************************************************
+// BamReader (c) 2009 Derek Barnett
+// Marth Lab, Deptartment of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#pragma once
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace std;
+
+#include "BamAlignment.h"
+
+// our zlib constants
+#define GZIP_ID1             31
+#define GZIP_ID2            139
+#define CM_DEFLATE            8
+#define FLG_FEXTRA            4
+#define OS_UNKNOWN          255
+#define BGZF_XLEN             6
+#define BGZF_ID1             66
+#define BGZF_ID2             67
+#define BGZF_LEN              2
+#define GZIP_WINDOW_BITS    -15
+#define Z_DEFAULT_MEM_LEVEL   8
+
+// our BZGF constants
+#define BLOCK_HEADER_LENGTH    18
+#define BLOCK_FOOTER_LENGTH     8
+#define MAX_BLOCK_SIZE      65536
+#define DEFAULT_BLOCK_SIZE  65536
+
+// our BAM constants
+#define BAM_CORE_SIZE  32
+#define BAM_CMATCH      0
+#define BAM_CINS        1
+#define BAM_CDEL        2
+#define BAM_CREF_SKIP   3
+#define BAM_CSOFT_CLIP  4
+#define BAM_CHARD_CLIP  5
+#define BAM_CPAD        6
+#define BAM_CIGAR_SHIFT 4
+#define BAM_CIGAR_MASK  ((1 << BAM_CIGAR_SHIFT) - 1)
+
+// BAM indexing constants
+#define MAX_BIN           37450 // =(8^6-1)/7+1
+#define BAM_MIN_CHUNK_GAP 32768 
+#define BAM_LIDX_SHIFT    14
+
+// our variable sizes
+#define SIZEOF_INT 4
+
+// define our BZGF structure
+#ifndef BGZF_DATA
+#define BGZF_DATA
+struct BgzfData {
+	unsigned int UncompressedBlockSize;
+	unsigned int CompressedBlockSize;
+	unsigned int BlockLength;
+	unsigned int BlockOffset;
+	uint64_t BlockAddress;
+	bool IsOpen;
+	FILE* Stream;
+	char* UncompressedBlock;
+	char* CompressedBlock;
+
+	// constructor
+	BgzfData(void)
+		: UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
+		, CompressedBlockSize(MAX_BLOCK_SIZE)
+		, BlockLength(0)
+		, BlockOffset(0)
+		, BlockAddress(0)
+		, IsOpen(false)
+		, Stream(NULL)
+		, UncompressedBlock(NULL)
+		, CompressedBlock(NULL)
+	{
+		try {
+			CompressedBlock   = new char[CompressedBlockSize];
+			UncompressedBlock = new char[UncompressedBlockSize];
+		} catch(bad_alloc&) {
+			printf("ERROR: Unable to allocate memory for our BGZF object.\n");
+			exit(1);
+		}
+	}
+
+	// destructor
+	~BgzfData(void) {
+		if(CompressedBlock)   delete [] CompressedBlock;
+		if(UncompressedBlock) delete [] UncompressedBlock;
+	}
+};
+#endif // BGZF_DATA
+
+
+// --------------------------- //
+// BamIndex-related typedefs
+// --------------------------- //
+
+// offset for linear indexing
+typedef vector<uint64_t> LinearOffsetVector;
+
+// alignment 'chunk' boundaries
+typedef pair<uint64_t, uint64_t> ChunkPair;
+typedef vector<ChunkPair> ChunkVector;
+
+// BAM bins.. each contains (binID, alignment 'chunks')
+typedef pair<uint32_t, ChunkVector*> BamBin;
+typedef vector<BamBin> BinVector;
+
+// each reference sequence has a BinVector and LinearOffsetVector
+typedef pair<BinVector*, LinearOffsetVector*> RefIndex;
+
+// full BamIndex defined as: 
+typedef vector<RefIndex*> BamIndex;
+
+class BamReader {
+
+	// constructor/destructor
+	public:
+		BamReader(void);
+		~BamReader(void);
+
+	// public interface
+	public:
+		// closes the BAM file
+		void Close(void);
+		// retrieves header text
+		const string GetHeaderText(void) const;
+		// saves the alignment to the alignment archive
+		bool GetNextAlignment(BamAlignment& bAlignment);
+		// return number of reference sequences in BAM file
+		const int GetReferenceCount(void) const;
+		// return vector of RefData entries
+		const RefVector GetReferenceData(void) const;
+		// get refID from reference name
+		const int GetReferenceID(const string& refName) const;
+		// jumps to 'left' position on refID
+		bool Jump(int refID, unsigned int left = 0);
+		// opens the BAM file
+		void Open(const string& filename, const string& indexFilename = "");
+		// move file pointer back to first alignment
+		bool Rewind(void);
+
+	// internal methods
+	private:
+		// checks BGZF block header
+		bool BgzfCheckBlockHeader(char* header);
+		// closes the BAM file
+		void BgzfClose(void);
+		// de-compresses the current block
+		int BgzfInflateBlock(int blockLength);
+		// opens the BAM file for reading
+		void BgzfOpen(const string& filename);
+		// reads BGZF data into a byte buffer
+		unsigned int BgzfRead(char* data, const unsigned int dataLen);
+		// reads BGZF block
+		int BgzfReadBlock(void);
+		// seek to position in BAM file
+		bool BgzfSeek(int64_t position);
+		// get file position in BAM file
+		int64_t BgzfTell(void);
+		// unpacks a buffer into an unsigned int
+		static inline unsigned int BgzfUnpackUnsignedInt(char* buffer);
+		// unpacks a buffer into an unsigned short
+		static inline unsigned short BgzfUnpackUnsignedShort(char* buffer);
+		// 	calculate bins that overlap region ( left to reference end for now )
+		int BinsFromRegion(int, unsigned int, uint16_t[MAX_BIN]);
+		// calculates alignment end position based on starting position and provided CIGAR operations
+		unsigned int CalculateAlignmentEnd(const unsigned int& position, const vector<CigarOp>& cigarData);
+		// clear out (delete pointers in) index data structure
+		void ClearIndex(void);
+		// calculate file offset for first alignment chunk overlapping 'left'
+		int64_t GetOffset(int refID, unsigned int left);
+		// checks to see if alignment overlaps current region
+		bool IsOverlap(BamAlignment& bAlignment);
+		// retrieves header text from BAM file
+		void LoadHeaderData(void);
+		// builds BamIndex data structure from BAM index file
+		void LoadIndexData(FILE* indexStream);
+		// retrieves BAM alignment under file pointer
+		bool LoadNextAlignment(BamAlignment& bAlignment);
+		// builds reference data structure from BAM file
+		void LoadReferenceData(void);
+		// open BAM index file (if successful, loads index)
+		void OpenIndex(const string& indexFilename);
+	
+	// aligment file & index file data
+	private:
+		BgzfData  m_BGZF;
+		string    m_headerText;
+		BamIndex* m_index;
+		RefVector m_references;
+		bool      m_isIndexLoaded;
+		int64_t   m_alignmentsBeginOffset;
+
+	// user-specified region values
+	private:
+		bool         m_isRegionSpecified;
+		int          m_currentRefID;
+		unsigned int m_currentLeft;
+
+	// BAM character constants
+	private:
+		static const char* DNA_LOOKUP;
+		static const char* CIGAR_LOOKUP;
+};
+
+// unpacks a buffer into an unsigned int
+inline unsigned int BamReader::BgzfUnpackUnsignedInt(char* buffer) {
+	union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
+	un.valueBuffer[0] = buffer[0];
+	un.valueBuffer[1] = buffer[1];
+	un.valueBuffer[2] = buffer[2];
+	un.valueBuffer[3] = buffer[3];
+	return un.value;
+}
+
+// unpacks a buffer into an unsigned short
+inline unsigned short BamReader::BgzfUnpackUnsignedShort(char* buffer) {
+	union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un;
+	un.valueBuffer[0] = buffer[0];
+	un.valueBuffer[1] = buffer[1];
+	return un.value;
+}
+
+// allows sorting/searching of a vector of pairs (instead of using maps)
+template <typename Key, typename Value>
+class LookupKeyCompare {
+
+	typedef pair< Key, Value > LookupData;
+	typedef typename LookupData::first_type Key_t;
+	
+	public:
+		bool operator() (const LookupData& lhs, const LookupData& rhs) const { return keyLess(lhs.first, rhs.first); }
+		bool operator() (const LookupData& lhs, const Key_t& k) const        { return keyLess(lhs.first, k); }
+		bool operator() (const Key_t& k, const LookupData& rhs) const        { return keyLess(k, rhs.first); }
+	private:
+		bool keyLess(const Key_t& k1, const Key_t& k2) const { return k1 < k2; }
+};
+
+// return index of item if found, else return container.size()
+template < typename InputIterator, typename EqualityComparable >
+typename iterator_traits<InputIterator>::difference_type
+Index(const InputIterator& begin, const InputIterator& end, const EqualityComparable& item) {
+	return distance(begin, find(begin, end, item));
+}
+
diff --git a/BamReaderMain.cpp b/BamReaderMain.cpp
index 5426763..d568fed 100644
--- a/BamReaderMain.cpp
+++ b/BamReaderMain.cpp
@@ -2,7 +2,7 @@
 
 // Derek Barnett
 // Marth Lab, Boston College
-// Last modified: 6 April 2009
+// Last modified: 12 May 2009
 
 #include "BamReader.h"
 
@@ -32,221 +32,73 @@ int main(int argc, char* argv[]) {
 	const char* bamFilename = argv[1];
 	const char* bamIndexFilename = argv[2];
 
-	string refName;
-	int refID;
-
-	int alignmentCount;
-
-	vector<string> refNames;
-
+	int alignmentsRead = 0;
 	BamAlignment bAlignment;
-	BamAlignmentVector alignments;
-
-	RefVector references;
-
-	// ------------------------------------------------------------------------------------------------------ //
-	// Declare BamReader & open BAM file - automatically loads up header and index file (.bai) information
-	// ------------------------------------------------------------------------------------------------------ //
 	
-	BamReader bReader(bamFilename, bamIndexFilename);
+	BamReader bReader;
+	cerr << endl << "Opening BAM file (and index)" << endl << endl;
+	bReader.Open(bamFilename, bamIndexFilename);
 
-	cerr << endl;
-	cerr << "Opening BamReader for BAM file: " << bamFilename << " ..... ";
-	
-	if ( bReader.Open() ) { 
-		cerr << "ok" << endl; 
-	}
-	else {
-		cerr << "error" << endl;
-		exit(1);
+	string header = bReader.GetHeaderText();
+	cerr << "Printing header text..." << endl;
+	if ( header.empty() ) {
+		cerr << "No header provided." << endl << endl;
+	} else {
+		cerr << "----------------------" << endl;
+		cerr << "Header Text: " << endl;
+		cerr << "----------------------" << endl;
+		cerr << header << endl << endl;
 	}
-	
-	// ------------------------------------------------------------ //
-	// Find out how many reference sequences are in BAM file
-	// ------------------------------------------------------------ //
-	
-	references = bReader.GetReferenceData();
 
-	cerr << endl;
-	cerr << "Total number of ref seqs: " << references.size() << endl;
-	
-	// ---------------------------------------------------------------------------- //
-	// Get the names/lengths of all the reference sequences that have alignments
-	// ---------------------------------------------------------------------------- //
-	
-	cerr << endl;
-	cerr << "All ref sequences with alignments:" << endl;
+	RefVector references = bReader.GetReferenceData();
+	cerr << "Printing references..." << endl;
+	RefVector::const_iterator refIter = references.begin();
+	RefVector::const_iterator refEnd  = references.end();
+	for ( ; refIter != refEnd; ++refIter) {
+		cerr << "Reference entry: " << endl;
+		cerr << (*refIter).RefName << endl;
+		cerr << (*refIter).RefLength << endl;
+		cerr << "Has alignments? : " << ( ((*refIter).RefHasAlignments) ? "yes" : "no" ) << endl;
+	}
 	cerr << endl;
 
+	alignmentsRead = 0;
+	while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {
+		cerr << "Alignment " << alignmentsRead << endl;
+		cerr << bAlignment.Name << endl;
+		cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;
+		++alignmentsRead;
+	}
 
-	if ( !references.empty() ) {
-		
-		RefVector::iterator refIter = references.begin();
-		RefVector::iterator refEnd  = references.end();
-		
-		refID = 0;
-		// iterate over reference names, print to STDERR
-		for( ; refIter != refEnd; ++refIter) {
-
-			if ( (*refIter).RefHasAlignments ) {
-				cerr << "ID:     " << refID << endl;
-				cerr << "Name:   " << (*refIter).RefName << endl;
-				cerr << "Length: " << (*refIter).RefLength << endl;
-				cerr << endl;
+	cerr << "Jumping in BAM file" << endl;
+	if ( bReader.Jump(1, 5000) ) {
+		cerr << "Jumping seems ok - getting first 10 alignments that start at or after chr2:5000" << endl;
+
+		alignmentsRead = 0;
+		while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {
+			if ( bAlignment.Position >= 5000 ) { 
+				cerr << "Alignment " << alignmentsRead << endl;
+				cerr << bAlignment.Name << endl;
+				cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;
+				++alignmentsRead;
 			}
-
-			++refID;
 		}
 	}
 
-	// --------------------------------------------------------------------------------------------- //
-	// Get the SAM-style header text, if available (not available in the example file shown here)
-	// --------------------------------------------------------------------------------------------- //	
-	
-	cerr << "------------------------------------" << endl;
-	cerr << "SAM header text: " << endl;
-	
-	// get (SAM) header text
-	string header = bReader.GetHeaderText();
-	
-	cerr << ( (header.empty()) ? "no header data" : header ) << endl;
-	cerr << "------------------------------------" << endl;
-
-	// --------------------------------------------------------------------------------------------- //
-	// Here we start accessing alignments
-	// The first method shows how to iterate over all reads in a BamFile
-	// This method will work on any BAM file (sorted/non-sorted, with/without an index)
-	// --------------------------------------------------------------------------------------------- //
-
-	// Call Rewind() to make sure you're at the 1st alignment in the file
-	// Please note, however, it's not necessary in this case, since file pointer initially set to 1st alignment
-	// but this is probably a good habit to develop to ensure correctness and make your intent obvious in the code
-
-	cerr << "Getting (up to) first 1000 alignments" << endl;
-
-	// start at 1st alignment
+	cerr << "Rewinding BAM file" << endl;
 	if ( bReader.Rewind() ) {
+		cerr << "Rewind seems to be ok - getting first 10 alignments" << endl;
 		
-		alignmentCount = 0;
-		while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {
-		
-			// disregard unmapped alignments
-			if ( bAlignment.IsMapped() ) {
-
-				++alignmentCount;
-				
-				cout << "----------------------------" << endl;
-				cout << "Alignment " << alignmentCount << endl;
-				cout << bAlignment.Name << endl;
-				cout << bAlignment.AlignedBases << endl;
-				cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
-				
-				cout << "Cigar Data: " << endl;
-
-				vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
-				vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();
-				for ( ; cigarIter != cigarEnd; ++cigarIter ) {
-					cout << "Type: " << (*cigarIter).Type << "\tLength: " << (*cigarIter).Length << endl;
-				}
-
-				if(!bAlignment.TagData.empty()) {
-				  cout << "Tag data is present." << endl;
-				  string readGroup;
-				  if(bAlignment.GetReadGroup(readGroup)) {
-				    cout << "- read group: " << readGroup << endl;
-				  }
-				}
-			}
+		alignmentsRead = 0;
+		while ( bReader.GetNextAlignment(bAlignment) && (alignmentsRead <= 10) ) {
+			cerr << "Alignment " << alignmentsRead << endl;
+			cerr << bAlignment.Name << endl;
+			cerr << "Aligned to ref " << bAlignment.RefID << " at position " << bAlignment.Position << endl;
+			++alignmentsRead;
 		}
-
-		cerr << "Found " << alignmentCount << " alignments." << endl;
-	} else { cerr << "Could not rewind" << endl; }
-
-	// ---------------------------------------------------------------------------------------------------------- //
-	// You can iterate over each individual alignment that overlaps a specified region
-	// Set the refID & left boundary parameters using Jump(refID, left)
-	// Jump() actually positions the file pointer actually before the left boundary
-	// This ensures that reads beginning before, but overlapping 'left' are included as well 
-	// Client is responsible for setting/checking right boundary - see below
-	// ---------------------------------------------------------------------------------------------------------- //
-/*
-	cerr << "Jumping to region" << endl;
-
-	// get refID using a known refName
-	refName = "seq1";
-	refID = bReader.GetRefID(refName);
-	if (refID == (int)references.size()) { 
-		cerr << "Reference " << refName << " not found." << endl;
-		exit(1);
 	}
 
-	// set left boundary
-	unsigned int leftBound = 500;
-
-	// set right boundary - either user-specified number to set a discrete region
-	//                      OR you can query the BamReader for the end of the reference
-	unsigned int rightBound = references.at(refID).RefLength;
-
-	cerr << endl;
-	cerr << "Iterating over alignments on reference: " << refName << " from " << leftBound << " to ref end (" << rightBound << ")" << endl;
-
-	// set region - specific region on reference
-	if ( bReader.Jump(refID, leftBound) ) { 
-
-		alignmentCount = 0;
-		while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= rightBound) ) {
-		
-			if ( bAlignment.IsMapped() ) {
-
-				++alignmentCount;
-			
-				if ( (alignmentCount % 100000) == 0) { cerr << "Retrieved " << alignmentCount << " so far..." << endl; }
-			
-				cout << "----------------------------" << endl;
-				cout << "Alignment " << alignmentCount << endl;
-				cout << bAlignment.Name << endl;
-				cout << bAlignment.AlignedBases << endl;
-				cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
-			}
-		}
-
-		cerr << "Found " << alignmentCount << " alignments." << endl; 
-	} else { cerr << "Could not jump to region specified" << endl; }
-
-	// ----------------------------------------------------------------------------------------------------- //
-	// You can 'rewind' back to beginning of BAM file at any time 
-	// ----------------------------------------------------------------------------------------------------- //
-
-	cerr << endl;
-	cerr << "Rewinding BAM file... then getting first 1000 alignments" << endl;
-
-	alignmentCount = 0;
-	if (bReader.Rewind() ) {
-		while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {
-		
-			// disregard unmapped alignments
-			if ( bAlignment.IsMapped() ) {
-
-				++alignmentCount;
-			
-				cout << "----------------------------" << endl;
-				cout << "Alignment " << alignmentCount << endl;
-				cout << bAlignment.Name << endl;
-				cout << bAlignment.AlignedBases << endl;
-				cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;
-			}
-		}
-
-		cerr << "Found " << alignmentCount << " alignments." << endl;
-	} else { cerr << "Could not rewind" << endl; }
-*/
-	// ---------------------------------------------------------------------- //
-	// Close BamReader object (releases internal header/index data) and exit
-	// ---------------------------------------------------------------------- //
-	
-	cerr << endl;
-	cerr << "Closing BAM file: " << bamFilename << endl;
-	
+	cerr << "Closing BAM file..." << endl << endl;
 	bReader.Close();
 
 	cerr << "Exiting..." << endl << endl;
diff --git a/BamWriter.h b/BamWriter.h
index d375612..18f8e0a 100644
--- a/BamWriter.h
+++ b/BamWriter.h
@@ -57,6 +57,8 @@ using namespace std;
 #define SIZEOF_INT 4
 
 // define our BZGF structure
+#ifndef BGZF_DATA
+#define BGZF_DATA
 struct BgzfData {
 	unsigned int UncompressedBlockSize;
 	unsigned int CompressedBlockSize;
@@ -95,6 +97,7 @@ struct BgzfData {
 		if(UncompressedBlock) delete [] UncompressedBlock;
 	}
 };
+#endif	// BGZF_DATA
 
 class BamWriter {
 public:
diff --git a/Makefile b/Makefile
index 8862226..40e3ecd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,39 +1,18 @@
-CC=		gcc
 CXX=		g++
-CFLAGS=		-Wall -O3
-CXXFLAGS=	$(CFLAGS)
-DFLAGS=		-D_IOLIB=2 #-D_NDEBUG
-OBJS=		BamReader.o bgzf.o
-PROG=		BamReaderTest
-INCLUDES=       
-ARFLAGS=	-crs
+CXXFLAGS=	-Wall -O3
+PROG=		BamReaderTest BamConversion
 LIBS=		-lz
-SUBDIRS=	.
 
-.SUFFIXES:.c .cpp .o
+all: $(PROG)
 
-.c.o:
-		$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
+BamReaderTest: BamReader.o BamReaderMain.o
+	$(CXX) $(CXXFLAGS) -o $@ BamReader.o BamReaderMain.o $(LIBS)
 
-.cpp.o:
-		$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
-
-all: $(PROG) BamConversion
-
-lib:libbambc.a
-
-libbambc.a:$(OBJS)
-		$(AR) $(ARFLAGS) $@ $(OBJS)
-
-BamReaderTest:lib BamReaderMain.o
-		$(CXX) $(CXXFLAGS) -o $@ BamReaderMain.o $(LIBS) -L. -lbambc
-
-BamConversion: lib BamWriter.o BamConversionMain.o
-	$(CXX) $(CXXFLAGS) -o $@ BamWriter.o BamConversionMain.o $(LIBS) -L. -lbambc
-
-BamMerge: lib BamMerge.o
-		$(CXX) $(CXXFLAGS) -o $@ BamWriter.o BamMerge.o $(LIBS) -L. -lbambc
+BamConversion: BamReader.o BamWriter.o BamConversionMain.o
+	$(CXX) $(CXXFLAGS) -o $@ BamReader.o BamWriter.o BamConversionMain.o $(LIBS)
 
+BamMerge: BamReader.o BamWriter.o BamMerge.o
+	$(CXX) $(CXXFLAGS) -o $@ BamReader.o BamWriter.o BamMerge.o $(LIBS)
 
 clean:
-		rm -fr gmon.out *.o *.a a.out $(PROG) BamConversion *~
+		rm -fr gmon.out *.o *.a a.out *~
diff --git a/README b/README
index 639bfd1..9553d1a 100644
--- a/README
+++ b/README
@@ -2,10 +2,27 @@
 README
 ----------------
 
-Copy the 4 header files (*.h) and the library file (libbambc.a) to your directory.
 
-When you compile your program, add this to the g++ command:
+Significant re-write of BamReader API as of 5/18/2009:
+-------------------------------------------------------
+No longer using bgzf.h/.c
+No longer using libbambc.a library
+No longer using STL Utilities
+Minor changes to API syntax in opening BAM files with BamReader - see BamReader.h for details
 
-           -lz -L. -lbambc
 
-and you should be good to go.  (Those are lower case l's, as in 'lion'.)
+Compile instructions
+----------------------
+Short version:
+See Makefile for compilation example.
+
+Longer version:
+Copy BamAlignment.h, BamReader.* and/or BamWriter.* to working directory.
+#include "BamReader.h" and/or "BamWriter.h"
+Use standard compile commands, including the "-lz" parameter to access zlib functionality.
+(Note that "-L. -lbambc" are no longer necessary and WILL CAUSE PROBLEMS for compiler/linker )
+
+
+Feel free to email me or pop by my desk with any questions, comments, suggestions, etc.
+  - Derek 
+
diff --git a/STLUtilities.h b/STLUtilities.h
deleted file mode 100644
index 7a9fbc5..0000000
--- a/STLUtilities.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// STLUtilities.h
-
-// Derek Barnett
-// Marth Lab, Boston College
-// Last modified: 19 March 2009
-
-#ifndef STL_UTILITIES_H
-#define STL_UTILITIES_H
-
-#include <algorithm>
-using std::distance;
-using std::find;
-using std::transform;
-
-#include <functional>
-using std::unary_function;
-
-#include <iterator>
-using std::iterator_traits;
-
-#include <string>
-using std::string;
-
-#include <utility>
-using std::pair;
-
-#include <vector>
-using std::vector;
-
-// -------------------------------------------------------------------------------- //
-// This file contains a sample of STL tricks I've gathered along the way.
-//
-// Many thanks throughout to 'Effective' series by Scott Meyers.
-// Some code is lifted (almost) verbatim from his texts where applicable.
-// ------------------------------------------------------------------------------- //
-
-// --------------------------------------------------------------------------------------------------------------------------------- //
-// STL containers will delete the values they hold when the container is deleted or goes out of scope
-// *** If the container contains pointers, however, they WILL NOT delete the actual pointed-to objects ***
-// This struct allows for easy algorithm processing (instead of pesky loops and iterator-boundary issues) to delete pointed-to objects (for any C++ type) that have been allocated with 'new')
-// Usage example:    for_each( container.begin(), container.end(), DeleteObject() ) 
-//
-// Unless of course, you like quirky, hard-to-spot memory leaks... then feel free to disregard this little STL lesson   =)
-// --------------------------------------------------------------------------------------------------------------------------------- //
-struct DeleteObject {
-	template<typename T>
-	void operator() (const T* p) const { 
-		delete p; 
-	}
-};
-
-template<typename T>
-void ClearPointerVector(vector<T>& v) {
-	if ( !v.empty() ) {
-		for_each(v.begin(), v.end(), DeleteObject());
-		v.clear();
-	}
-}
-
-// --------------------------------------------------------------------------------------------------------------------------------- //
-// Query a vector (other containers? havent tried) for an element
-// Returns the index of that element
-// Returns vector::size() if not found
-// Works with reverse iterators as well... index is counted backward from last element though
-// --------------------------------------------------------------------------------------------------------------------------------- //
-template < typename InputIterator, typename EqualityComparable >
-typename iterator_traits<InputIterator>::difference_type
-Index(const InputIterator& begin, const InputIterator& end, const EqualityComparable& item) {
-	return distance(begin, find(begin, end, item));
-}
-
-// ----------------------------------------------------------------------------------------------------------------------//
-// This next section is a sort of work-around for the bulky associative maps
-// The idea is to use a *SORTED* vector of pair objects as a lookup vector
-// LookupVector = vector< pair<Key, Value> > 
-// ----------------------------------------------------------------------------------------------------------------------//
-
-// The following template classes allow a templatized comparison function for sorting & searching lookup vector
-
-// allows sorting by Key ( less<Key> )
-template <typename Key, typename Value>
-class LookupKeyCompare {
-
-	typedef pair< Key, Value > LookupData;
-	typedef typename LookupData::first_type Key_t;
-	
-	public:
-		bool operator() (const LookupData& lhs, const LookupData& rhs) const { return keyLess(lhs.first, rhs.first); }
-		bool operator() (const LookupData& lhs, const Key_t& k) const { return keyLess(lhs.first, k); }
-		bool operator() (const Key_t& k, const LookupData& rhs) const { return keyLess(k, rhs.first); }
-	private:
-		bool keyLess(const Key_t& k1, const Key_t& k2) const { return k1 < k2; }
-};
-
-// allows sorting by Value ( less<Value> )
-template <typename Key, typename Value>
-class LookupValueCompare {
-
-	typedef pair< Key, Value > LookupData;
-	typedef typename LookupData::second_type Value_t;
-	
-	public:
-		bool operator() (const LookupData& lhs, const LookupData& rhs) const { return valueLess(lhs.second, rhs.second); }
-		bool operator() (const LookupData& lhs, const Value_t& k) const { return valueLess(lhs.second, k); }
-		bool operator() (const Value_t& k, const LookupData& rhs) const { return valueLess(k, rhs.second); }
-	private:
-		bool valueLess(const Value_t& k1, const Value_t& k2) const { return k1 < k2; }
-};
-
-// The following template functions/structs allow you to pull all keys or all values from this lookup structure
-
-// pull Key from a data pair
-template<typename Key, typename Value>
-struct RipKey:
-	public unary_function< pair<Key,Value>, Key> {
-		Key operator() (pair<Key,Value> dataPair) { 
-			return dataPair.first; 
-		}
-};
-
-// pull Value from a data pair
-template<typename Key, typename Value>
-struct RipValue:
-	public unary_function< pair<Key,Value>, Value> {
-		Value operator() (pair<Key,Value> dataPair) { 
-			return dataPair.second; 
-		}
-};
-
-// pull all Keys from lookup, store in dest (overwrite contents of dest)
-template <typename Key, typename Value>
-void RipKeys( vector< pair<Key,Value> >& lookup, vector<Key>& dest) {
-	dest.clear();
-	dest.reserve(lookup.size());
-	transform(lookup.begin(), lookup.end(), back_inserter(dest), RipKey<Key,Value>());
-}
-
-// pull all Values from lookup, store in dest (overwrite contents of dest)
-template <typename Key, typename Value>
-void RipValues( vector< pair<Key,Value> >& lookup, vector<Value>& dest) {
-	dest.clear();
-	dest.reserve(lookup.size());
-	transform(lookup.begin(), lookup.end(), back_inserter(dest), RipValue<Key,Value>());
-}
-
-#endif /* STL_UTILITIES_H */
diff --git a/bgzf.c b/bgzf.c
deleted file mode 100644
index 6481ee3..0000000
--- a/bgzf.c
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2008 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever.
- * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
- * or functionality.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "bgzf.h"
-
-#ifdef WIN32
-
-#else
-#include <unistd.h>
-extern off_t ftello(FILE *stream);
-extern int fseeko(FILE *stream, off_t offset, int whence);
-#endif
-
-typedef int8_t byte;
-
-static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
-static const int MAX_BLOCK_SIZE = 64 * 1024;
-
-static const int BLOCK_HEADER_LENGTH = 18;
-static const int BLOCK_FOOTER_LENGTH = 8;
-
-static const int GZIP_ID1 = 31;
-static const int GZIP_ID2 = 139;
-static const int CM_DEFLATE = 8;
-static const int FLG_FEXTRA = 4;
-static const int OS_UNKNOWN = 255;
-static const int BGZF_ID1 = 66; // 'B'
-static const int BGZF_ID2 = 67; // 'C'
-static const int BGZF_LEN = 2;
-static const int BGZF_XLEN = 6; // BGZF_LEN+4
-
-static const int GZIP_WINDOW_BITS = -15; // no zlib header
-static const int Z_DEFAULT_MEM_LEVEL = 8;
-
-
-inline
-void
-packInt16(uint8_t* buffer, uint16_t value)
-{
-    buffer[0] = value;
-    buffer[1] = value >> 8;
-}
-
-inline
-int
-unpackInt16(const uint8_t* buffer)
-{
-    return (buffer[0] | (buffer[1] << 8));
-}
-
-inline
-void
-packInt32(uint8_t* buffer, uint32_t value)
-{
-    buffer[0] = value;
-    buffer[1] = value >> 8;
-    buffer[2] = value >> 16;
-    buffer[3] = value >> 24;
-}
-
-inline
-int
-min(int x, int y)
-{
-    return (x < y) ? x : y;
-}
-
-static
-void
-report_error(BGZF* fp, const char* message) {
-    fp->error = message;
-}
-
-static
-BGZF*
-open_read(int fd)
-{
-    FILE* file = fdopen(fd, "r");
-    BGZF* fp = (BGZF*)malloc(sizeof(BGZF));
-    fp->file_descriptor = fd;
-    fp->open_mode = 'r';
-    fp->owned_file = 0;
-    fp->file = file;
-    fp->uncompressed_block_size = MAX_BLOCK_SIZE;
-    fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
-    fp->compressed_block_size = MAX_BLOCK_SIZE;
-    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
-    fp->block_address = 0;
-    fp->block_offset = 0;
-    fp->block_length = 0;
-    fp->error = NULL;
-    return fp;
-}
-
-static
-BGZF*
-open_write(int fd)
-{
-    FILE* file = fdopen(fd, "w");
-    BGZF* fp = (BGZF*)malloc(sizeof(BGZF));
-    fp->file_descriptor = fd;
-    fp->open_mode = 'w';
-    fp->owned_file = 0;
-    fp->file = file;
-    fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
-    fp->uncompressed_block = NULL;
-    fp->compressed_block_size = MAX_BLOCK_SIZE;
-    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
-    fp->block_address = 0;
-    fp->block_offset = 0;
-    fp->block_length = 0;
-    fp->error = NULL;
-    return fp;
-}
-
-BGZF*
-bgzf_open(const char* __restrict path, const char* __restrict mode)
-{
-    BGZF* fp = NULL;
-    if (strcasecmp(mode, "r") == 0) {
-	int oflag = O_RDONLY;
-	int fd = open(path, oflag);
-        fp = open_read(fd);
-    } else if (strcasecmp(mode, "w") == 0) {
-	int oflag = O_WRONLY | O_CREAT | O_TRUNC;
-	int fd = open(path, oflag, 0644);
-        fp = open_write(fd);
-    }
-    if (fp != NULL) {
-        fp->owned_file = 1;
-    }
-    return fp;
-}
-
-BGZF*
-bgzf_fdopen(int fd, const char * __restrict mode)
-{
-    if (strcasecmp(mode, "r") == 0) {
-        return open_read(fd);
-    } else if (strcasecmp(mode, "w") == 0) {
-        return open_write(fd);
-    } else {
-        return NULL;
-    }
-}
-
-static
-int
-deflate_block(BGZF* fp, int block_length)
-{
-    // Deflate the block in fp->uncompressed_block into fp->compressed_block.
-    // Also adds an extra field that stores the compressed block length.
-
-    byte* buffer = (byte*)fp->compressed_block;
-    int buffer_size = fp->compressed_block_size;
-
-    // Init gzip header
-    buffer[0] = GZIP_ID1;
-    buffer[1] = GZIP_ID2;
-    buffer[2] = CM_DEFLATE;
-    buffer[3] = FLG_FEXTRA;
-    buffer[4] = 0; // mtime
-    buffer[5] = 0;
-    buffer[6] = 0;
-    buffer[7] = 0;
-    buffer[8] = 0;
-    buffer[9] = OS_UNKNOWN;
-    buffer[10] = BGZF_XLEN;
-    buffer[11] = 0;
-    buffer[12] = BGZF_ID1;
-    buffer[13] = BGZF_ID2;
-    buffer[14] = BGZF_LEN;
-    buffer[15] = 0;
-    buffer[16] = 0; // placeholder for block length
-    buffer[17] = 0;
-
-    // loop to retry for blocks that do not compress enough
-    int input_length = block_length;
-    int compressed_length = 0;
-    while (1) {
-
-        z_stream zs;
-        zs.zalloc = NULL;
-        zs.zfree = NULL;
-        zs.next_in = (Bytef*)fp->uncompressed_block;
-        zs.avail_in = input_length;
-        zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
-        zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
-
-        int status = deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-                                  GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
-        if (status != Z_OK) {
-            report_error(fp, "deflate init failed");
-            return -1;
-        }
-        status = deflate(&zs, Z_FINISH);
-        if (status != Z_STREAM_END) {
-            deflateEnd(&zs);
-            if (status == Z_OK) {
-                // Not enough space in buffer.
-                // Can happen in the rare case the input doesn't compress enough.
-                // Reduce the amount of input until it fits.
-                input_length -= 1024;
-                if (input_length <= 0) {
-                    // should never happen
-                    report_error(fp, "input reduction failed");
-                    return -1;
-                }
-                continue;
-            }
-            report_error(fp, "deflate failed");
-            return -1;
-        }
-        status = deflateEnd(&zs);
-        if (status != Z_OK) {
-            report_error(fp, "deflate end failed");
-            return -1;
-        }
-        compressed_length = zs.total_out;
-        compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
-        if (compressed_length > MAX_BLOCK_SIZE) {
-            // should never happen
-            report_error(fp, "deflate overflow");
-            return -1;
-        }
-        break;
-    }
-
-    packInt16((uint8_t*)&buffer[16], compressed_length-1);
-    uint32_t crc = crc32(0L, NULL, 0L);
-    crc = crc32(crc, (Bytef*)fp->uncompressed_block, input_length);
-    packInt32((uint8_t*)&buffer[compressed_length-8], crc);
-    packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
-
-    int remaining = block_length - input_length;
-    if (remaining > 0) {
-        if (remaining > input_length) {
-            // should never happen (check so we can use memcpy)
-            report_error(fp, "remainder too large");
-            return -1;
-        }
-        memcpy(fp->uncompressed_block,
-               (char*)fp->uncompressed_block + input_length,
-               remaining);
-    }
-    fp->block_offset = remaining;
-    return compressed_length;
-}
-
-static
-int
-inflate_block(BGZF* fp, int block_length)
-{
-    // Inflate the block in fp->compressed_block into fp->uncompressed_block
-
-    z_stream zs;
-    zs.zalloc = NULL;
-    zs.zfree = NULL;
-    zs.next_in = (Bytef*)fp->compressed_block + 18;
-    zs.avail_in = block_length - 16;
-    zs.next_out = (Bytef*)fp->uncompressed_block;
-    zs.avail_out = fp->uncompressed_block_size;
-
-    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
-    if (status != Z_OK) {
-        report_error(fp, "inflate init failed");
-        return -1;
-    }
-    status = inflate(&zs, Z_FINISH);
-    if (status != Z_STREAM_END) {
-        inflateEnd(&zs);
-        report_error(fp, "inflate failed");
-        return -1;
-    }
-    status = inflateEnd(&zs);
-    if (status != Z_OK) {
-        report_error(fp, "inflate failed");
-        return -1;
-    }
-    return zs.total_out;
-}
-
-static
-int
-check_header(const byte* header)
-{
-    return (header[0] == GZIP_ID1 &&
-            header[1] == (byte) GZIP_ID2 &&
-            header[2] == Z_DEFLATED &&
-            (header[3] & FLG_FEXTRA) != 0 &&
-            unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
-            header[12] == BGZF_ID1 &&
-            header[13] == BGZF_ID2 &&
-            unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
-}
-
-static
-int
-read_block(BGZF* fp)
-{
-    byte header[BLOCK_HEADER_LENGTH];
-    int64_t block_address = ftello(fp->file);
-    int count = fread(header, 1, sizeof(header), fp->file);
-    if (count == 0) {
-        fp->block_length = 0;
-        return 0;
-    }
-    if (count != sizeof(header)) {
-        report_error(fp, "read failed");
-        return -1;
-    }
-    if (!check_header(header)) {
-        report_error(fp, "invalid block header");
-        return -1;
-    }
-    int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
-    byte* compressed_block = (byte*) fp->compressed_block;
-    memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
-    int remaining = block_length - BLOCK_HEADER_LENGTH;
-    count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
-    if (count != remaining) {
-        report_error(fp, "read failed");
-        return -1;
-    }
-    count = inflate_block(fp, block_length);
-    if (count < 0) {
-        return -1;
-    }
-    if (fp->block_length != 0) {
-        // Do not reset offset if this read follows a seek.
-        fp->block_offset = 0;
-    }
-    fp->block_address = block_address;
-    fp->block_length = count;
-    return 0;
-}
-
-int
-bgzf_read(BGZF* fp, void* data, int length)
-{
-    if (length <= 0) {
-        return 0;
-    }
-    if (fp->open_mode != 'r') {
-        report_error(fp, "file not open for reading");
-        return -1;
-    }
-
-    int bytes_read = 0;
-    byte* output = (byte*)data;
-    while (bytes_read < length) {
-        int available = fp->block_length - fp->block_offset;
-        if (available <= 0) {
-            if (read_block(fp) != 0) {
-                return -1;
-            }
-            available = fp->block_length - fp->block_offset;
-            if (available <= 0) {
-                break;
-            }
-        }
-        int copy_length = min(length-bytes_read, available);
-        byte* buffer = (byte*)fp->uncompressed_block;
-        memcpy(output, buffer + fp->block_offset, copy_length);
-        fp->block_offset += copy_length;
-        output += copy_length;
-        bytes_read += copy_length;
-    }
-    if (fp->block_offset == fp->block_length) {
-        fp->block_address = ftello(fp->file);
-        fp->block_offset = 0;
-        fp->block_length = 0;
-    }
-    return bytes_read;
-}
-
-static
-int
-flush_block(BGZF* fp)
-{
-    while (fp->block_offset > 0) {
-        int block_length = deflate_block(fp, fp->block_offset);
-        if (block_length < 0) {
-            return -1;
-        }
-        int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
-        if (count != block_length) {
-            report_error(fp, "write failed");
-            return -1;
-        }
-        fp->block_address += block_length;
-    }
-    return 0;
-}
-
-int
-bgzf_write(BGZF* fp, const void* data, int length)
-{
-    if (fp->open_mode != 'w') {
-        report_error(fp, "file not open for writing");
-        return -1;
-    }
-
-    if (fp->uncompressed_block == NULL) {
-        fp->uncompressed_block = malloc(fp->uncompressed_block_size);
-    }
-
-    const byte* input = (byte*)data;
-    int block_length = fp->uncompressed_block_size;
-    int bytes_written = 0;
-    while (bytes_written < length) {
-        int copy_length = min(block_length - fp->block_offset, length - bytes_written);
-        byte* buffer = (byte*)fp->uncompressed_block;
-        memcpy(buffer + fp->block_offset, input, copy_length);
-        fp->block_offset += copy_length;
-        input += copy_length;
-        bytes_written += copy_length;
-        if (fp->block_offset == block_length) {
-            if (flush_block(fp) != 0) {
-                break;
-            }
-        }
-    }
-    return bytes_written;
-}
-
-int
-bgzf_close(BGZF* fp)
-{
-    if (fp->open_mode == 'w') {
-        if (flush_block(fp) != 0) {
-            return -1;
-        }
-        if (fflush(fp->file) != 0) {
-            report_error(fp, "flush failed");
-            return -1;
-        }
-    }
-    if (fp->owned_file) {
-        if (fclose(fp->file) != 0) {
-            return -1;
-        }
-    }
-    free(fp->uncompressed_block);
-    free(fp->compressed_block);
-    free(fp);
-    return 0;
-}
-
-int64_t
-bgzf_tell(BGZF* fp)
-{
-    return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF));
-}
-
-int64_t
-bgzf_seek(BGZF* fp, int64_t pos, int where)
-{
-    if (fp->open_mode != 'r') {
-        report_error(fp, "file not open for read");
-        return -1;
-    }
-    if (where != SEEK_SET) {
-        report_error(fp, "unimplemented seek option");
-        return -1;
-    }
-    int block_offset = pos & 0xFFFF;
-    int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
-    if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
-        report_error(fp, "seek failed");
-        return -1;
-    }
-    fp->block_length = 0;  // indicates current block is not loaded
-    fp->block_address = block_address;
-    fp->block_offset = block_offset;
-    return 0;
-}
-
diff --git a/bgzf.h b/bgzf.h
deleted file mode 100644
index 080db1d..0000000
--- a/bgzf.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2008 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever.
- * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
- * or functionality.
- */
-
-#ifndef __BGZF_H
-#define __BGZF_H
-
-#include <stdio.h>
-#include "zlib.h"
-
-#ifdef WIN32
-#include <io.h>
-
-typedef char                 int8_t;
-typedef unsigned char       uint8_t;
-typedef short               int16_t;
-typedef unsigned short     uint16_t;
-typedef int                 int32_t;
-typedef unsigned int       uint32_t;
-typedef long long           int64_t;
-typedef unsigned long long uint64_t;
-
-#define ftello(a)     _ftelli64(a)
-#define fseeko(a,b,c) _fseeki64(a,b,c)
-#define strcasecmp    _stricmp
-#define open          _open
-#define fdopen        _fdopen
-#else
-#include <stdint.h>
-#include <stdbool.h>
-#endif
-
-typedef struct {
-    int file_descriptor;
-    char open_mode;  // 'r' or 'w'
-    bool owned_file;
-    FILE* file;
-    int uncompressed_block_size;
-    int compressed_block_size;
-    void* uncompressed_block;
-    void* compressed_block;
-    int64_t block_address;
-    int block_length;
-    int block_offset;
-    const char* error;
-} BGZF;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Open an existing file descriptor for reading or writing.
- * Mode must be either "r" or "w".
- * A subsequent bgzf_close will not close the file descriptor.
- * Returns null on error.
- */
-BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
-
-/*
- * Open the specified file for reading or writing.
- * Mode must be either "r" or "w".
- * Returns null on error.
- */
-BGZF* bgzf_open(const char* path, const char* __restrict mode);
-
-/*
- * Close the BGZ file and free all associated resources.
- * Does not close the underlying file descriptor if created with bgzf_fdopen.
- * Returns zero on success, -1 on error.
- */
-int bgzf_close(BGZF* fp);
-
-/*
- * Read up to length bytes from the file storing into data.
- * Returns the number of bytes actually read.
- * Returns zero on end of file.
- * Returns -1 on error.
- */
-int bgzf_read(BGZF* fp, void* data, int length);
-
-/*
- * Write length bytes from data to the file.
- * Returns the number of bytes written.
- * Returns -1 on error.
- */
-int bgzf_write(BGZF* fp, const void* data, int length);
-
-/*
- * Return a virtual file pointer to the current location in the file.
- * No interpetation of the value should be made, other than a subsequent
- * call to bgzf_seek can be used to position the file at the same point.
- * Return value is non-negative on success.
- * Returns -1 on error.
- */
-int64_t bgzf_tell(BGZF* fp);
-
-/*
- * Set the file to read from the location specified by pos, which must
- * be a value previously returned by bgzf_tell for this file (but not
- * necessarily one returned by this file handle).
- * The where argument must be SEEK_SET.
- * Seeking on a file opened for write is not supported.
- * Returns zero on success, -1 on error.
- */
-int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-- 
2.47.3