From 7f8b301e2f43d4d50a79f6184f1c132c9e33d324 Mon Sep 17 00:00:00 2001 From: derek Date: Fri, 9 Mar 2012 00:34:14 -0500 Subject: [PATCH] FASTA reader - performance improvement for single-base fetching --- src/utils/bamtools_fasta.cpp | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/utils/bamtools_fasta.cpp b/src/utils/bamtools_fasta.cpp index d3ad080..680f62b 100644 --- a/src/utils/bamtools_fasta.cpp +++ b/src/utils/bamtools_fasta.cpp @@ -2,7 +2,7 @@ // bamtools_fasta.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 October 2011 +// Last modified: 9 March 2012 (DB) // --------------------------------------------------------------------------- // Provides FASTA reading/indexing functionality. // *************************************************************************** @@ -19,11 +19,6 @@ using namespace BamTools; #include using namespace std; -#ifdef _MSC_VER - #define ftello _ftelli64 - #define fseeko _fseeki64 -#endif - struct Fasta::FastaPrivate { struct FastaIndexData { @@ -250,22 +245,18 @@ bool Fasta::FastaPrivate::GetBase(const int& refId, const int& position, char& b cerr << "FASTA error: invalid position specified: " << position << endl; return false; } - - // seek to beginning of sequence data - if ( fseeko(Stream, referenceData.Offset, SEEK_SET) != 0 ) { - cerr << "FASTA error : could not sek in file" << endl; - return false; - } - - // retrieve sequence - string sequence = ""; - if ( !GetNextSequence(sequence) ) { - cerr << "FASTA error : could not retrieve base from FASTA file" << endl; + + // calculate seek position & attempt jump + const int64_t lines = position / referenceData.LineLength; + const int64_t lineOffset = position % referenceData.LineLength; + const int64_t seekTo = referenceData.Offset + (lines*referenceData.ByteLength) + lineOffset; + if ( fseek64(Stream, seekTo, SEEK_SET) != 0 ) { + cerr << "FASTA error : could not seek in file" << endl; return false; } // set base & return success - base = sequence.at(position); + base = getc(Stream); return true; } -- 2.39.2