X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Finternal%2FBamReader_p.cpp;h=11cba33ce177fabbc382738f99e6dcfb9b309554;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=f14dcee646a28eba84d2ee9e6afcf96601b07387;hpb=94193d06ce788ba7df8b7bd856a983d1e98daac6;p=bamtools.git diff --git a/src/api/internal/BamReader_p.cpp b/src/api/internal/BamReader_p.cpp index f14dcee..11cba33 100644 --- a/src/api/internal/BamReader_p.cpp +++ b/src/api/internal/BamReader_p.cpp @@ -1,22 +1,27 @@ // *************************************************************************** // BamReader_p.cpp (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 November 2010 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the basic functionality for reading BAM files // *************************************************************************** -#include -#include -#include -#include -#include +#include "api/BamConstants.h" +#include "api/BamReader.h" +#include "api/IBamIODevice.h" +#include "api/internal/BamDeviceFactory_p.h" +#include "api/internal/BamException_p.h" +#include "api/internal/BamHeader_p.h" +#include "api/internal/BamRandomAccessController_p.h" +#include "api/internal/BamReader_p.h" +#include "api/internal/BamStandardIndex_p.h" +#include "api/internal/BamToolsIndex_p.h" using namespace BamTools; using namespace BamTools::Internal; #include +#include #include #include #include @@ -24,18 +29,10 @@ using namespace std; // constructor BamReaderPrivate::BamReaderPrivate(BamReader* parent) - : HeaderText("") - , Index(0) - , HasIndex(false) - , AlignmentsBeginOffset(0) - // , m_header(0) - , IndexCacheMode(BamIndex::LimitedIndexCaching) - , HasAlignmentsInRegion(true) - , Parent(parent) - , DNA_LOOKUP("=ACMGRSVTWYHKDBN") - , CIGAR_LOOKUP("MIDNSHP") + : m_alignmentsBeginOffset(0) + , m_parent(parent) { - IsBigEndian = SystemIsBigEndian(); + m_isBigEndian = BamTools::SystemIsBigEndian(); } // destructor @@ -43,157 +40,162 @@ BamReaderPrivate::~BamReaderPrivate(void) { Close(); } -// adjusts requested region if necessary (depending on where data actually begins) -void BamReaderPrivate::AdjustRegion(BamRegion& region) { +// closes the BAM file +bool BamReaderPrivate::Close(void) { + + // clear BAM metadata + m_references.clear(); + m_header.Clear(); + + // clear filename + m_filename.clear(); + + // close random access controller + m_randomAccessController.Close(); + + // if stream is open, attempt close + if ( IsOpen() ) { + try { + m_stream.Close(); + } catch ( BamException& e ) { + const string streamError = e.what(); + const string message = string("encountered error closing BAM file: \n\t") + streamError; + SetErrorString("BamReader::Close", message); + return false; + } + } - // check for valid index first - if ( Index == 0 ) return; + // return success + return true; +} - // see if any references in region have alignments - HasAlignmentsInRegion = false; - int currentId = region.LeftRefID; +// creates an index file of requested type on current BAM file +bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) { - const int rightBoundRefId = ( region.isRightBoundSpecified() ? region.RightRefID : References.size() - 1 ); - while ( currentId <= rightBoundRefId ) { - HasAlignmentsInRegion = Index->HasAlignments(currentId); - if ( HasAlignmentsInRegion ) break; - ++currentId; + // skip if BAM file not open + if ( !IsOpen() ) { + SetErrorString("BamReader::CreateIndex", "cannot create index on unopened BAM file"); + return false; } - // if no data found on any reference in region - if ( !HasAlignmentsInRegion ) return; - - // if left bound of desired region had no data, use first reference that had data - // otherwise, leave requested region as-is - if ( currentId != region.LeftRefID ) { - region.LeftRefID = currentId; - region.LeftPosition = 0; + // attempt to create index + if ( m_randomAccessController.CreateIndex(this, type) ) + return true; + else { + const string bracError = m_randomAccessController.GetErrorString(); + const string message = string("could not create index: \n\t") + bracError; + SetErrorString("BamReader::CreateIndex", message); + return false; } } -// clear index data structure -void BamReaderPrivate::ClearIndex(void) { - delete Index; - Index = 0; - HasIndex = false; +// return path & filename of current BAM file +const string BamReaderPrivate::Filename(void) const { + return m_filename; } -// closes the BAM file -void BamReaderPrivate::Close(void) { - - // close BGZF file stream - mBGZF.Close(); - - // clear out index data - ClearIndex(); - - // clear out header data - HeaderText.clear(); - - // if ( m_header ) { - // delete m_header; - // m_header = 0; - // } - - // clear out region flags - Region.clear(); +string BamReaderPrivate::GetErrorString(void) const { + return m_errorString; } -// creates index for BAM file, saves to file -// default behavior is to create the BAM standard index (".bai") -// set flag to false to create the BamTools-specific index (".bti") -bool BamReaderPrivate::CreateIndex(bool useStandardIndex) { - - // clear out prior index data - ClearIndex(); - - // create index based on type requested - if ( useStandardIndex ) - Index = new BamStandardIndex(&mBGZF, Parent); - else - Index = new BamToolsIndex(&mBGZF, Parent); - - // set index cache mode to full for writing - Index->SetCacheMode(BamIndex::FullIndexCaching); - - // build new index - bool ok = true; - ok &= Index->Build(); - HasIndex = ok; - - // mark empty references - MarkReferences(); - - // attempt to save index data to file - ok &= Index->Write(Filename); - - // set client's desired index cache mode - Index->SetCacheMode(IndexCacheMode); - - // return success/fail of both building & writing index - return ok; +// return header data as std::string +string BamReaderPrivate::GetHeaderText(void) const { + return m_header.ToString(); } -const string BamReaderPrivate::GetHeaderText(void) const { +// return header data as SamHeader object +SamHeader BamReaderPrivate::GetSamHeader(void) const { + return m_header.ToSamHeader(); +} - return HeaderText; +// get next alignment (with character data fully parsed) +bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) { - // if ( m_header ) - // return m_header->Text(); - // else - // return string(""); -} + // if valid alignment found + if ( GetNextAlignmentCore(alignment) ) { -// get next alignment (from specified region, if given) -bool BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) { + // store alignment's "source" filename + alignment.Filename = m_filename; - // if valid alignment found, attempt to parse char data, and return success/failure - if ( GetNextAlignmentCore(bAlignment) ) - return bAlignment.BuildCharData(); + // return success/failure of parsing char data + if ( alignment.BuildCharData() ) + return true; + else { + const string alError = alignment.GetErrorString(); + const string message = string("could not populate alignment data: \n\t") + alError; + SetErrorString("BamReader::GetNextAlignment", message); + return false; + } + } // no valid alignment found - else return false; + return false; } // retrieves next available alignment core data (returns success/fail) -// ** DOES NOT parse any character data (read name, bases, qualities, tag data) +// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename) // these can be accessed, if necessary, from the supportData // useful for operations requiring ONLY positional or other alignment-related information -bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) { +bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) { - // if region is set but has no alignments - if ( !Region.isNull() && !HasAlignmentsInRegion ) + // skip if stream not opened + if ( !m_stream.IsOpen() ) return false; - // if valid alignment available - if ( LoadNextAlignment(bAlignment) ) { + try { - // set core-only flag - bAlignment.SupportData.HasCoreOnly = true; + // skip if region is set but has no alignments + if ( m_randomAccessController.HasRegion() && + !m_randomAccessController.RegionHasAlignments() ) + { + return false; + } - // if region not specified with at least a left boundary, return success - if ( !Region.isLeftBoundSpecified() ) return true; + // if can't read next alignment + if ( !LoadNextAlignment(alignment) ) + return false; - // determine region state (before, within, after) - BamReaderPrivate::RegionState state = IsOverlap(bAlignment); + // check alignment's region-overlap state + BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment); - // if alignment lies after region, return false - if ( state == AFTER_REGION ) return false; + // if alignment starts after region, no need to keep reading + if ( state == BamRandomAccessController::AfterRegion ) + return false; - while ( state != WITHIN_REGION ) { - // if no valid alignment available (likely EOF) return failure - if ( !LoadNextAlignment(bAlignment) ) return false; - // if alignment lies after region, return false (no available read within region) - state = IsOverlap(bAlignment); - if ( state == AFTER_REGION ) return false; + // read until overlap is found + while ( state != BamRandomAccessController::OverlapsRegion ) { + + // if can't read next alignment + if ( !LoadNextAlignment(alignment) ) + return false; + + // check alignment's region-overlap state + state = m_randomAccessController.AlignmentState(alignment); + + // if alignment starts after region, no need to keep reading + if ( state == BamRandomAccessController::AfterRegion ) + return false; } - // return success (alignment found that overlaps region) + // if we get here, we found the next 'valid' alignment + // (e.g. overlaps current region if one was set, simply the next alignment if not) + alignment.SupportData.HasCoreOnly = true; return true; + + } catch ( BamException& e ) { + const string streamError = e.what(); + const string message = string("encountered error reading BAM alignment: \n\t") + streamError; + SetErrorString("BamReader::GetNextAlignmentCore", message); + return false; } +} + +int BamReaderPrivate::GetReferenceCount(void) const { + return m_references.size(); +} - // no valid alignment - else return false; +const RefVector& BamReaderPrivate::GetReferenceData(void) const { + return m_references; } // returns RefID for given RefName (returns References.size() if not found) @@ -201,207 +203,82 @@ int BamReaderPrivate::GetReferenceID(const string& refName) const { // retrieve names from reference data vector refNames; - RefVector::const_iterator refIter = References.begin(); - RefVector::const_iterator refEnd = References.end(); + RefVector::const_iterator refIter = m_references.begin(); + RefVector::const_iterator refEnd = m_references.end(); for ( ; refIter != refEnd; ++refIter) refNames.push_back( (*refIter).RefName ); - // return 'index-of' refName ( if not found, returns refNames.size() ) - return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName)); + // return 'index-of' refName (or -1 if not found) + int index = distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName)); + if ( index == (int)m_references.size() ) return -1; + else return index; } -// returns region state - whether alignment ends before, overlaps, or starts after currently specified region -// this *internal* method should ONLY called when (at least) IsLeftBoundSpecified == true -BamReaderPrivate::RegionState BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) { - - // if alignment is on any reference sequence before left bound - if ( bAlignment.RefID < Region.LeftRefID ) - return BEFORE_REGION; - - // if alignment starts on left bound reference - else if ( bAlignment.RefID == Region.LeftRefID ) { - - // if alignment starts at or after left boundary - if ( bAlignment.Position >= Region.LeftPosition) { - - // if right boundary is specified AND - // left/right boundaries are on same reference AND - // alignment starts past right boundary - if ( Region.isRightBoundSpecified() && - Region.LeftRefID == Region.RightRefID && - bAlignment.Position > Region.RightPosition ) - return AFTER_REGION; - - // otherwise, alignment is within region - else - return WITHIN_REGION; - } - - // alignment starts before left boundary - else { - // check if alignment overlaps left boundary - if ( bAlignment.GetEndPosition() >= Region.LeftPosition ) - return WITHIN_REGION; - else - return BEFORE_REGION; - } - } - - // alignment starts on a reference after the left bound - else { - - // if region has a right boundary - if ( Region.isRightBoundSpecified() ) { - - // alignment is on reference between boundaries - if ( bAlignment.RefID < Region.RightRefID ) - return WITHIN_REGION; - - // alignment is on reference after right boundary - else if ( bAlignment.RefID > Region.RightRefID ) - return AFTER_REGION; - - // alignment is on right bound reference - else { - // check if alignment starts before or at right boundary - if ( bAlignment.Position <= Region.RightPosition ) - return WITHIN_REGION; - else - return AFTER_REGION; - } - } +bool BamReaderPrivate::HasIndex(void) const { + return m_randomAccessController.HasIndex(); +} - // otherwise, alignment is after left bound reference, but there is no right boundary - else return WITHIN_REGION; - } +bool BamReaderPrivate::IsOpen(void) const { + return m_stream.IsOpen(); } // load BAM header data void BamReaderPrivate::LoadHeaderData(void) { - - // m_header = new BamHeader(&mBGZF); - // bool headerLoadedOk = m_header->Load(); - // if ( !headerLoadedOk ) - // cerr << "BamReader could not load header" << endl; - - // check to see if proper BAM header - char buffer[4]; - if (mBGZF.Read(buffer, 4) != 4) { - fprintf(stderr, "Could not read header type\n"); - exit(1); - } - - if (strncmp(buffer, "BAM\001", 4)) { - fprintf(stderr, "wrong header type!\n"); - exit(1); - } - - // get BAM header text length - mBGZF.Read(buffer, 4); - unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) SwapEndian_32(headerTextLength); - - // get BAM header text - char* headerText = (char*)calloc(headerTextLength + 1, 1); - mBGZF.Read(headerText, headerTextLength); - HeaderText = (string)((const char*)headerText); - - // clean up calloc-ed temp variable - free(headerText); -} - -// load existing index data from BAM index file (".bti" OR ".bai"), return success/fail -bool BamReaderPrivate::LoadIndex(const bool lookForIndex, const bool preferStandardIndex) { - - // clear out any existing index data - ClearIndex(); - - // if no index filename provided, so we need to look for available index files - if ( IndexFilename.empty() ) { - - // attempt to load BamIndex based on current Filename provided & preferStandardIndex flag - const BamIndex::PreferredIndexType type = (preferStandardIndex ? BamIndex::STANDARD : BamIndex::BAMTOOLS); - Index = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, type); - - // if null, return failure - if ( Index == 0 ) return false; - - // generate proper IndexFilename based on type of index created - IndexFilename = Filename + Index->Extension(); - } - - else { - - // attempt to load BamIndex based on IndexFilename provided by client - Index = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent); - - // if null, return failure - if ( Index == 0 ) return false; - } - - // set cache mode for BamIndex - Index->SetCacheMode(IndexCacheMode); - - // loading the index data from file - HasIndex = Index->Load(IndexFilename); - - // mark empty references - MarkReferences(); - - // return index status - return HasIndex; + m_header.Load(&m_stream); } // populates BamAlignment with alignment data under file pointer, returns success/fail -bool BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) { +bool BamReaderPrivate::LoadNextAlignment(BamAlignment& alignment) { // read in the 'block length' value, make sure it's not zero - char buffer[4]; - mBGZF.Read(buffer, 4); - bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); } - if ( bAlignment.SupportData.BlockLength == 0 ) return false; + char buffer[sizeof(uint32_t)]; + m_stream.Read(buffer, sizeof(uint32_t)); + alignment.SupportData.BlockLength = BamTools::UnpackUnsignedInt(buffer); + if ( m_isBigEndian ) BamTools::SwapEndian_32(alignment.SupportData.BlockLength); + if ( alignment.SupportData.BlockLength == 0 ) + return false; // read in core alignment data, make sure the right size of data was read - char x[BAM_CORE_SIZE]; - if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) + char x[Constants::BAM_CORE_SIZE]; + if ( m_stream.Read(x, Constants::BAM_CORE_SIZE) != Constants::BAM_CORE_SIZE ) return false; - if ( IsBigEndian ) { - for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) - SwapEndian_32p(&x[i]); + // swap core endian-ness if necessary + if ( m_isBigEndian ) { + for ( unsigned int i = 0; i < Constants::BAM_CORE_SIZE; i+=sizeof(uint32_t) ) + BamTools::SwapEndian_32p(&x[i]); } // set BamAlignment 'core' and 'support' data - bAlignment.RefID = BgzfData::UnpackSignedInt(&x[0]); - bAlignment.Position = BgzfData::UnpackSignedInt(&x[4]); + alignment.RefID = BamTools::UnpackSignedInt(&x[0]); + alignment.Position = BamTools::UnpackSignedInt(&x[4]); - unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]); - bAlignment.Bin = tempValue >> 16; - bAlignment.MapQuality = tempValue >> 8 & 0xff; - bAlignment.SupportData.QueryNameLength = tempValue & 0xff; + unsigned int tempValue = BamTools::UnpackUnsignedInt(&x[8]); + alignment.Bin = tempValue >> 16; + alignment.MapQuality = tempValue >> 8 & 0xff; + alignment.SupportData.QueryNameLength = tempValue & 0xff; - tempValue = BgzfData::UnpackUnsignedInt(&x[12]); - bAlignment.AlignmentFlag = tempValue >> 16; - bAlignment.SupportData.NumCigarOperations = tempValue & 0xffff; + tempValue = BamTools::UnpackUnsignedInt(&x[12]); + alignment.AlignmentFlag = tempValue >> 16; + alignment.SupportData.NumCigarOperations = tempValue & 0xffff; - bAlignment.SupportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]); - bAlignment.MateRefID = BgzfData::UnpackSignedInt(&x[20]); - bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]); - bAlignment.InsertSize = BgzfData::UnpackSignedInt(&x[28]); + alignment.SupportData.QuerySequenceLength = BamTools::UnpackUnsignedInt(&x[16]); + alignment.MateRefID = BamTools::UnpackSignedInt(&x[20]); + alignment.MatePosition = BamTools::UnpackSignedInt(&x[24]); + alignment.InsertSize = BamTools::UnpackSignedInt(&x[28]); // set BamAlignment length - bAlignment.Length = bAlignment.SupportData.QuerySequenceLength; + alignment.Length = alignment.SupportData.QuerySequenceLength; // read in character data - make sure proper data size was read bool readCharDataOK = false; - const unsigned int dataLength = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE; - char* allCharData = (char*)calloc(sizeof(char), dataLength); + const unsigned int dataLength = alignment.SupportData.BlockLength - Constants::BAM_CORE_SIZE; + RaiiBuffer allCharData(dataLength); - if ( mBGZF.Read(allCharData, dataLength) == (signed int)dataLength) { + if ( m_stream.Read(allCharData.Buffer, dataLength) == dataLength ) { // store 'allCharData' in supportData structure - bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength); + alignment.SupportData.AllCharData.assign((const char*)allCharData.Buffer, dataLength); // set success flag readCharDataOK = true; @@ -409,180 +286,186 @@ bool BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) { // save CIGAR ops // need to calculate this here so that BamAlignment::GetEndPosition() performs correctly, // even when GetNextAlignmentCore() is called - const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength; - uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset); + const unsigned int cigarDataOffset = alignment.SupportData.QueryNameLength; + uint32_t* cigarData = (uint32_t*)(allCharData.Buffer + cigarDataOffset); CigarOp op; - bAlignment.CigarData.clear(); - bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations); - for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) { + alignment.CigarData.clear(); + alignment.CigarData.reserve(alignment.SupportData.NumCigarOperations); + for ( unsigned int i = 0; i < alignment.SupportData.NumCigarOperations; ++i ) { - // swap if necessary - if ( IsBigEndian ) SwapEndian_32(cigarData[i]); + // swap endian-ness if necessary + if ( m_isBigEndian ) BamTools::SwapEndian_32(cigarData[i]); // build CigarOp structure - op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT); - op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ]; + op.Length = (cigarData[i] >> Constants::BAM_CIGAR_SHIFT); + op.Type = Constants::BAM_CIGAR_LOOKUP[ (cigarData[i] & Constants::BAM_CIGAR_MASK) ]; // save CigarOp - bAlignment.CigarData.push_back(op); + alignment.CigarData.push_back(op); } } - free(allCharData); + // return success/failure return readCharDataOK; } // loads reference data from BAM file -void BamReaderPrivate::LoadReferenceData(void) { +bool BamReaderPrivate::LoadReferenceData(void) { // get number of reference sequences - char buffer[4]; - mBGZF.Read(buffer, 4); - unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) SwapEndian_32(numberRefSeqs); - if ( numberRefSeqs == 0 ) return; - References.reserve((int)numberRefSeqs); + char buffer[sizeof(uint32_t)]; + m_stream.Read(buffer, sizeof(uint32_t)); + uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer); + if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs); + m_references.reserve((int)numberRefSeqs); // iterate over all references in header - for (unsigned int i = 0; i != numberRefSeqs; ++i) { + for ( unsigned int i = 0; i != numberRefSeqs; ++i ) { // get length of reference name - mBGZF.Read(buffer, 4); - unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) SwapEndian_32(refNameLength); - char* refName = (char*)calloc(refNameLength, 1); + m_stream.Read(buffer, sizeof(uint32_t)); + uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer); + if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength); + RaiiBuffer refName(refNameLength); // get reference name and reference sequence length - mBGZF.Read(refName, refNameLength); - mBGZF.Read(buffer, 4); - int refLength = BgzfData::UnpackSignedInt(buffer); - if ( IsBigEndian ) SwapEndian_32(refLength); + m_stream.Read(refName.Buffer, refNameLength); + m_stream.Read(buffer, sizeof(int32_t)); + int32_t refLength = BamTools::UnpackSignedInt(buffer); + if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength); // store data for reference RefData aReference; - aReference.RefName = (string)((const char*)refName); + aReference.RefName = (string)((const char*)refName.Buffer); aReference.RefLength = refLength; - References.push_back(aReference); - - // clean up calloc-ed temp variable - free(refName); + m_references.push_back(aReference); } -} -// mark references with no alignment data -void BamReaderPrivate::MarkReferences(void) { + // return success + return true; +} - // ensure index is available - if ( !HasIndex ) return; +bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) { - // mark empty references - for ( int i = 0; i < (int)References.size(); ++i ) - References.at(i).RefHasAlignments = Index->HasAlignments(i); + if ( m_randomAccessController.LocateIndex(this, preferredType) ) + return true; + else { + const string bracError = m_randomAccessController.GetErrorString(); + const string message = string("could not locate index: \n\t") + bracError; + SetErrorString("BamReader::LocateIndex", message); + return false; + } } // opens BAM file (and index) -bool BamReaderPrivate::Open(const string& filename, - const string& indexFilename, - const bool lookForIndex, - const bool preferStandardIndex) -{ - // store filenames - Filename = filename; - IndexFilename = indexFilename; +bool BamReaderPrivate::Open(const string& filename) { - // open the BGZF file for reading, return false on failure - if ( !mBGZF.Open(filename, "rb") ) return false; + try { - // retrieve header text & reference data - LoadHeaderData(); - LoadReferenceData(); + // make sure we're starting with fresh state + Close(); - // store file offset of first alignment - AlignmentsBeginOffset = mBGZF.Tell(); + // open BgzfStream + m_stream.Open(filename, IBamIODevice::ReadOnly); + assert(m_stream); - // if no index filename provided - if ( IndexFilename.empty() ) { + // load BAM metadata + LoadHeaderData(); + LoadReferenceData(); - // client did not specify that index SHOULD be found - // useful for cases where sequential access is all that is required - if ( !lookForIndex ) return true; + // store filename & offset of first alignment + m_filename = filename; + m_alignmentsBeginOffset = m_stream.Tell(); - // otherwise, look for index file, return success/fail - else return LoadIndex(lookForIndex, preferStandardIndex) ; + // return success + return true; + + } catch ( BamException& e ) { + const string error = e.what(); + const string message = string("could not open file: ") + filename + + "\n\t" + error; + SetErrorString("BamReader::Open", message); + return false; } +} - // client supplied an index filename - // attempt to load index data, return success/fail - return LoadIndex(lookForIndex, preferStandardIndex); +bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) { + + if ( m_randomAccessController.OpenIndex(indexFilename, this) ) + return true; + else { + const string bracError = m_randomAccessController.GetErrorString(); + const string message = string("could not open index: \n\t") + bracError; + SetErrorString("BamReader::OpenIndex", message); + return false; + } } // returns BAM file pointer to beginning of alignment data bool BamReaderPrivate::Rewind(void) { - // rewind to first alignment, return false if unable to seek - if ( !mBGZF.Seek(AlignmentsBeginOffset) ) return false; + // reset region + m_randomAccessController.ClearRegion(); - // retrieve first alignment data, return false if unable to read - BamAlignment al; - if ( !LoadNextAlignment(al) ) return false; + // return status of seeking back to first alignment + if ( Seek(m_alignmentsBeginOffset) ) + return true; + else { + const string currentError = m_errorString; + const string message = string("could not rewind: \n\t") + currentError; + SetErrorString("BamReader::Rewind", message); + return false; + } +} - // reset default region info using first alignment in file - Region.clear(); - HasAlignmentsInRegion = true; +bool BamReaderPrivate::Seek(const int64_t& position) { - // rewind back to beginning of first alignment - // return success/fail of seek - return mBGZF.Seek(AlignmentsBeginOffset); + // skip if BAM file not open + if ( !IsOpen() ) { + SetErrorString("BamReader::Seek", "cannot seek on unopened BAM file"); + return false; + } + + try { + m_stream.Seek(position); + return true; + } + catch ( BamException& e ) { + const string streamError = e.what(); + const string message = string("could not seek in BAM file: \n\t") + streamError; + SetErrorString("BamReader::Seek", message); + return false; + } +} + +void BamReaderPrivate::SetErrorString(const string& where, const string& what) { + static const string SEPARATOR = ": "; + m_errorString = where + SEPARATOR + what; +} + +void BamReaderPrivate::SetIndex(BamIndex* index) { + m_randomAccessController.SetIndex(index); } // change the index caching behavior -void BamReaderPrivate::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) { - IndexCacheMode = mode; - if ( Index == 0 ) return; - Index->SetCacheMode(mode); +void BamReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) { + m_randomAccessController.SetIndexCacheMode(mode); } -// asks Index to attempt a Jump() to specified region +// sets current region & attempts to jump to it // returns success/failure bool BamReaderPrivate::SetRegion(const BamRegion& region) { - // clear out any prior BamReader region data - // - // N.B. - this is cleared so that BamIndex now has free reign to call - // GetNextAlignmentCore() and do overlap checking without worrying about BamReader - // performing any overlap checking of its own and moving on to the next read... Calls - // to GetNextAlignmentCore() with no Region set, simply return the next alignment. - // This ensures that the Index is able to do just that. (All without exposing - // LoadNextAlignment() to the public API, and potentially confusing clients with the nomenclature) - Region.clear(); - - // check for existing index - if ( !HasIndex ) return false; - - // adjust region if necessary to reflect where data actually begins - BamRegion adjustedRegion(region); - AdjustRegion(adjustedRegion); - - // if no data present, return true - // not an error, but BamReader knows that no data is there for future alignment access - // (this is useful in a MultiBamReader setting where some BAM files may lack data in regions - // that other BAMs have data) - if ( !HasAlignmentsInRegion ) { - Region = adjustedRegion; + if ( m_randomAccessController.SetRegion(region, m_references.size()) ) return true; + else { + const string bracError = m_randomAccessController.GetErrorString(); + const string message = string("could not set region: \n\t") + bracError; + SetErrorString("BamReader::SetRegion", message); + return false; } +} - // attempt jump to user-specified region return false if jump could not be performed at all - // (invalid index, unknown reference, etc) - // - // Index::Jump() is allowed to modify the HasAlignmentsInRegion flag - // * This covers case where a region is requested that lies beyond the last alignment on a reference - // If this occurs, any subsequent calls to GetNexAlignment[Core] simply return false - // BamMultiReader is then able to successfully pull alignments from a region from multiple files - // even if one or more have no data. - if ( !Index->Jump(adjustedRegion, &HasAlignmentsInRegion) ) return false; - - // save region and return success - Region = adjustedRegion; - return true; +int64_t BamReaderPrivate::Tell(void) const { + return m_stream.Tell(); }