* Moved FileExists() to BamAux.h so that all API classes have access to its functionality.
* Created 2 'factory methods' in BamIndex.h to return a BamIndex subclass, depending on client\'s specified PreferredIndexType & on what files actually exist on disk.
* Renamed BamDefaultIndex as BamStandardIndex. Hopefully this name should be a clearer description going forward than BamDefaultIndex, since the standardized index may not always be the 'default' in every situation.
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 27 July 2010 (DB)\r
+// Last modified: 3 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Provides the basic constants, data structures, etc. for using BAM files\r
// ***************************************************************************\r
\r
// C++ includes\r
#include <exception>\r
+#include <fstream>\r
+#include <iostream>\r
#include <map>\r
#include <string>\r
#include <utility>\r
SwapEndian_64(value);\r
}\r
\r
+inline bool FileExists(const std::string& filename) {\r
+ std::ifstream f(filename.c_str(), std::ifstream::in);\r
+ return !f.fail();\r
+}\r
+\r
// ----------------------------------------------------------------\r
// BamAlignment member methods\r
\r
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 17 August 2010 (DB)
+// Last modified: 3 September 2010 (DB)
// ---------------------------------------------------------------------------
// Provides index functionality - both for the default (standardized) BAM
// index format (.bai) as well as a BamTools-specific (nonstandard) index
#include <cstdio>
#include <cstdlib>
#include <algorithm>
-// #include <iostream>
#include <map>
#include "BamIndex.h"
#include "BamReader.h"
// #########################################################################################
// -------------------------------
-// BamDefaultIndex structs & typedefs
+// BamStandardIndex structs & typedefs
namespace BamTools {
// --------------------------------------------------
-// BamDefaultIndex data structures & typedefs
+// BamStandardIndex data structures & typedefs
struct Chunk {
// data members
{ }
};
-typedef vector<ReferenceIndex> BamDefaultIndexData;
+typedef vector<ReferenceIndex> BamStandardIndexData;
} // namespace BamTools
// -------------------------------
-// BamDefaultIndex implementation
+// BamStandardIndex implementation
-struct BamDefaultIndex::BamDefaultIndexPrivate {
+struct BamStandardIndex::BamStandardIndexPrivate {
// -------------------------
// data members
- BamDefaultIndexData m_indexData;
- BamDefaultIndex* m_parent;
+ BamStandardIndexData m_indexData;
+ BamStandardIndex* m_parent;
// -------------------------
// ctor & dtor
- BamDefaultIndexPrivate(BamDefaultIndex* parent) : m_parent(parent) { }
- ~BamDefaultIndexPrivate(void) { }
+ BamStandardIndexPrivate(BamStandardIndex* parent) : m_parent(parent) { }
+ ~BamStandardIndexPrivate(void) { m_indexData.clear(); }
// -------------------------
// internal methods
};
-BamDefaultIndex::BamDefaultIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
+BamStandardIndex::BamStandardIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian)
: BamIndex(bgzf, reader, isBigEndian)
{
- d = new BamDefaultIndexPrivate(this);
+ d = new BamStandardIndexPrivate(this);
}
-BamDefaultIndex::~BamDefaultIndex(void) {
- d->m_indexData.clear();
+BamStandardIndex::~BamStandardIndex(void) {
delete d;
d = 0;
}
// calculate bins that overlap region
-int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) {
+int BamStandardIndex::BamStandardIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) {
// get region boundaries
uint32_t begin = (unsigned int)region.LeftPosition;
return i;
}
-bool BamDefaultIndex::Build(void) {
+bool BamStandardIndex::Build(void) {
// be sure reader & BGZF file are valid & open for reading
if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
// update saveRefID
saveRefID = bAlignment.RefID;
- // if invalid RefID, break out (why?)
- if ( saveRefID < 0 ) { break; }
+ // if invalid RefID, break out
+ if ( saveRefID < 0 ) break;
}
// make sure that current file pointer is beyond lastOffset
// iterate through references in index
// store whether reference has data &
// sort offsets in linear offset vector
- BamDefaultIndexData::iterator indexIter = d->m_indexData.begin();
- BamDefaultIndexData::iterator indexEnd = d->m_indexData.end();
+ BamStandardIndexData::iterator indexIter = d->m_indexData.begin();
+ BamStandardIndexData::iterator indexEnd = d->m_indexData.end();
for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
// get reference index data
return m_reader->Rewind();
}
-bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
+bool BamStandardIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector<int64_t>& offsets) {
// calculate which bins overlap this region
uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
+ // iterate over chunks
const ChunkVector& chunks = (*binIter).second;
std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
}
// saves BAM bin entry for index
-void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& binMap,
+void BamStandardIndex::BamStandardIndexPrivate::InsertBinEntry(BamBinMap& binMap,
const uint32_t& saveBin,
const uint64_t& saveOffset,
const uint64_t& lastOffset)
}
// saves linear offset entry for index
-void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
+void BamStandardIndex::BamStandardIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets,
const BamAlignment& bAlignment,
const uint64_t& lastOffset)
{
}
}
-bool BamDefaultIndex::Load(const string& filename) {
+bool BamStandardIndex::Load(const string& filename) {
// open index file, abort on error
FILE* indexStream = fopen(filename.c_str(), "rb");
// get number of reference sequences
uint32_t numRefSeqs;
elementsRead = fread(&numRefSeqs, 4, 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_32(numRefSeqs); }
+ if ( m_isBigEndian ) SwapEndian_32(numRefSeqs);
- // intialize space for BamDefaultIndexData data structure
+ // intialize space for BamStandardIndexData data structure
d->m_indexData.reserve(numRefSeqs);
// iterate over reference sequences
// get number of bins for this reference sequence
int32_t numBins;
elementsRead = fread(&numBins, 4, 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_32(numBins); }
+ if ( m_isBigEndian ) SwapEndian_32(numBins);
if ( numBins > 0 ) {
RefData& refEntry = m_references[i];
binMap.insert( pair<uint32_t, ChunkVector>(binID, regionChunks) );
}
+ // -----------------------------------------------------
// load linear index for this reference sequence
// get number of linear offsets
int32_t numLinearOffsets;
elementsRead = fread(&numLinearOffsets, 4, 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_32(numLinearOffsets); }
+ if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
// intialize LinearOffsetVector
LinearOffsetVector offsets;
for ( int j = 0; j < numLinearOffsets; ++j ) {
// read a linear offset & store
elementsRead = fread(&linearOffset, 8, 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
offsets.push_back(linearOffset);
}
}
// merges 'alignment chunks' in BAM bin (used for index building)
-void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) {
+void BamStandardIndex::BamStandardIndexPrivate::MergeChunks(void) {
// iterate over reference enties
- BamDefaultIndexData::iterator indexIter = m_indexData.begin();
- BamDefaultIndexData::iterator indexEnd = m_indexData.end();
+ BamStandardIndexData::iterator indexIter = m_indexData.begin();
+ BamStandardIndexData::iterator indexEnd = m_indexData.end();
for ( ; indexIter != indexEnd; ++indexIter ) {
// get BAM bin map for this reference
// get chunk vector for this bin
ChunkVector& binChunks = (*binIter).second;
- if ( binChunks.size() == 0 ) { continue; }
+ if ( binChunks.size() == 0 ) continue;
ChunkVector mergedChunks;
mergedChunks.push_back( binChunks[0] );
// writes in-memory index data out to file
// N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
-bool BamDefaultIndex::Write(const std::string& bamFilename) {
+bool BamStandardIndex::Write(const std::string& bamFilename) {
string indexFilename = bamFilename + ".bai";
FILE* indexStream = fopen(indexFilename.c_str(), "wb");
// write number of reference sequences
int32_t numReferenceSeqs = d->m_indexData.size();
- if ( m_isBigEndian ) { SwapEndian_32(numReferenceSeqs); }
+ if ( m_isBigEndian ) SwapEndian_32(numReferenceSeqs);
fwrite(&numReferenceSeqs, 4, 1, indexStream);
// iterate over reference sequences
- BamDefaultIndexData::const_iterator indexIter = d->m_indexData.begin();
- BamDefaultIndexData::const_iterator indexEnd = d->m_indexData.end();
+ BamStandardIndexData::const_iterator indexIter = d->m_indexData.begin();
+ BamStandardIndexData::const_iterator indexEnd = d->m_indexData.end();
for ( ; indexIter != indexEnd; ++ indexIter ) {
// get reference index data
// write number of bins
int32_t binCount = binMap.size();
- if ( m_isBigEndian ) { SwapEndian_32(binCount); }
+ if ( m_isBigEndian ) SwapEndian_32(binCount);
fwrite(&binCount, 4, 1, indexStream);
// iterate over bins
const ChunkVector& binChunks = (*binIter).second;
// save BAM bin key
- if ( m_isBigEndian ) { SwapEndian_32(binKey); }
+ if ( m_isBigEndian ) SwapEndian_32(binKey);
fwrite(&binKey, 4, 1, indexStream);
// save chunk count
int32_t chunkCount = binChunks.size();
- if ( m_isBigEndian ) { SwapEndian_32(chunkCount); }
+ if ( m_isBigEndian ) SwapEndian_32(chunkCount);
fwrite(&chunkCount, 4, 1, indexStream);
// iterate over chunks
// write linear offsets size
int32_t offsetSize = offsets.size();
- if ( m_isBigEndian ) { SwapEndian_32(offsetSize); }
+ if ( m_isBigEndian ) SwapEndian_32(offsetSize);
fwrite(&offsetSize, 4, 1, indexStream);
// iterate over linear offsets
// write linear offset value
uint64_t linearOffset = (*offsetIter);
- if ( m_isBigEndian ) { SwapEndian_64(linearOffset); }
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
fwrite(&linearOffset, 8, 1, indexStream);
}
}
// if block is full, get offset for next block, reset currentBlockCount
if ( currentBlockCount == d->m_blockSize ) {
-
d->m_indexData.push_back( BamToolsIndexEntry(blockStartOffset, blockStartId, blockStartPosition) );
blockStartOffset = m_BGZF->Tell();
currentBlockCount = 0;
}
// no index was found
- if ( previousOffset == -1 )
- return false;
+ if ( previousOffset == -1 ) return false;
// store offset & return success
offsets.push_back(previousOffset);
// read in block size
elementsRead = fread(&d->m_blockSize, sizeof(d->m_blockSize), 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_32(d->m_blockSize); }
+ if ( m_isBigEndian ) SwapEndian_32(d->m_blockSize);
// read in number of offsets
uint32_t numOffsets;
elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, indexStream);
- if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
+ if ( m_isBigEndian ) SwapEndian_32(numOffsets);
// reserve space for index data
d->m_indexData.reserve(numOffsets);
// write block size
int32_t blockSize = d->m_blockSize;
- if ( m_isBigEndian ) { SwapEndian_32(blockSize); }
+ if ( m_isBigEndian ) SwapEndian_32(blockSize);
fwrite(&blockSize, sizeof(blockSize), 1, indexStream);
// write number of offset entries
uint32_t numOffsets = d->m_indexData.size();
- if ( m_isBigEndian ) { SwapEndian_32(numOffsets); }
+ if ( m_isBigEndian ) SwapEndian_32(numOffsets);
fwrite(&numOffsets, sizeof(numOffsets), 1, indexStream);
// iterate over offset entries
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 17 August 2010 (DB)
+// Last modified: 3 September 2010 (DB)
// ---------------------------------------------------------------------------
-// Provides index functionality - both for the default (standardized) BAM
-// index format (.bai) as well as a BamTools-specific (nonstandard) index
-// format (.bti).
+// Provides index functionality - both for the standardized BAM index format
+// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti").
// ***************************************************************************
#ifndef BAM_INDEX_H
#define BAM_INDEX_H
+#include <iostream>
#include <string>
#include <vector>
#include "BamAux.h"
// BamIndex base class
class BamIndex {
+ // ctor & dtor
public:
- BamIndex(BamTools::BgzfData* bgzf,
- BamTools::BamReader* reader,
- bool isBigEndian);
+ BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian);
virtual ~BamIndex(void) { }
+ // index interface
public:
// creates index data (in-memory) from current reader data
virtual bool Build(void) =0;
+ // returns supported file extension
+ virtual const std::string Extension(void) const =0;
// calculates offset(s) for a given region
virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets) =0;
- // loads existing data from file into memory
- virtual bool Load(const std::string& filename) =0;
// returns whether reference has alignments or no
virtual bool HasAlignments(const int& referenceID);
+ // loads existing data from file into memory
+ virtual bool Load(const std::string& filename) =0;
// writes in-memory index data out to file
// N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
virtual bool Write(const std::string& bamFilename) =0;
+ // factory methods for returning proper BamIndex-derived type based on available index files
+ public:
+
+ // returns index based on BAM filename 'stub'
+ // checks first for preferred type, returns that type if found
+ // (if not found, attmempts to load other type(s), returns 0 if NONE found)
+ //
+ // ** default preferred type is BamToolsIndex ** use this anytime it exists
+ enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
+ static BamIndex* FromBamFilename(const std::string& bamFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian,
+ const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
+
+ // returns index based on explicitly named index file (or 0 if not found)
+ static BamIndex* FromIndexFilename(const std::string& indexFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian);
+
+ // data members
protected:
BamTools::BgzfData* m_BGZF;
BamTools::BamReader* m_reader;
};
// --------------------------------------------------
-// BamDefaultIndex class
+// BamStandardIndex class
//
-// implements default (per SAM/BAM spec) index file ops
-class BamDefaultIndex : public BamIndex {
+// implements standardized (per SAM/BAM spec) index file ops
+class BamStandardIndex : public BamIndex {
// ctor & dtor
public:
- BamDefaultIndex(BamTools::BgzfData* bgzf,
+ BamStandardIndex(BamTools::BgzfData* bgzf,
BamTools::BamReader* reader,
bool isBigEndian);
- ~BamDefaultIndex(void);
+ ~BamStandardIndex(void);
// interface (implements BamIndex virtual methods)
public:
// creates index data (in-memory) from current reader data
bool Build(void);
+ // returns supported file extension
+ const std::string Extension(void) const { return std::string(".bai"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
// loads existing data from file into memory
// internal implementation
private:
- struct BamDefaultIndexPrivate;
- BamDefaultIndexPrivate* d;
+ struct BamStandardIndexPrivate;
+ BamStandardIndexPrivate* d;
};
// --------------------------------------------------
public:
// creates index data (in-memory) from current reader data
bool Build(void);
+ // returns supported file extension
+ const std::string Extension(void) const { return std::string(".bti"); }
// calculates offset(s) for a given region
bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets);
// loads existing data from file into memory
BamToolsIndexPrivate* d;
};
+// --------------------------------------------------
+// BamIndex factory methods
+//
+// return proper BamIndex-derived type based on available index files
+
+inline
+BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian,
+ const BamIndex::PreferredIndexType& type)
+{
+ // ---------------------------------------------------
+ // attempt to load preferred type first
+
+ const std::string bamtoolsIndexFilename = bamFilename + ".bti";
+ const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
+ if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
+ return new BamToolsIndex(bgzf, reader, isBigEndian);
+
+ const std::string standardIndexFilename = bamFilename + ".bai";
+ const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
+ if ( (type == BamIndex::STANDARD) && standardIndexExists )
+ return new BamStandardIndex(bgzf, reader, isBigEndian);
+
+ // ----------------------------------------------------
+ // preferred type could not be found, try other (non-preferred) types
+ // if none found, return 0
+
+ if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian);
+ if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian);
+ return 0;
+}
+
+inline
+BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
+ BamTools::BgzfData* bgzf,
+ BamTools::BamReader* reader,
+ bool isBigEndian)
+{
+ // see if specified file exists
+ const bool indexExists = BamTools::FileExists(indexFilename);
+ if ( !indexExists ) return 0;
+
+ const std::string bamtoolsIndexExtension(".bti");
+ const std::string standardIndexExtension(".bai");
+
+ // if has bamtoolsIndexExtension
+ if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
+ return new BamToolsIndex(bgzf, reader, isBigEndian);
+
+ // if has standardIndexExtension
+ if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
+ return new BamStandardIndex(bgzf, reader, isBigEndian);
+
+ // otherwise, unsupported file type
+ return 0;
+}
+
} // namespace BamTools
-#endif // BAM_INDEX_H
\ No newline at end of file
+#endif // BAM_INDEX_H
using namespace BamTools;
using namespace std;
-namespace BamTools {
-
-bool FileExists(const string& filename) {
- ifstream f(filename.c_str(), ifstream::in);
- return !f.fail();
-}
-
-} // namespace BamTools
-
// -----------------------------------------------------
// BamMultiReader implementation
// -----------------------------------------------------
bool openedOK = true;
if (openIndexes) {
-
- const string defaultIndexFilename = filename + ".bai";
- const string bamToolsIndexFilename = filename + ".bti";
-
- // if default BAM index requested and one exists
- if ( useDefaultIndex && FileExists(defaultIndexFilename) )
- openedOK = reader->Open(filename, defaultIndexFilename);
-
- // else see if BamTools index exists
- else if ( FileExists(bamToolsIndexFilename) )
- openedOK = reader->Open(filename, bamToolsIndexFilename);
- // else none exist... just open without
- else
- openedOK = reader->Open(filename);
+ // leave index filename empty
+ // this allows BamReader & BamIndex to search for any available
+ // useDefaultIndex gives hint to prefer BAI over BTI
+ openedOK = reader->Open(filename, "", true, useDefaultIndex);
}
// ignoring index file(s)
}
}
- // TODO; any more error handling when openedOKvis false ??
+ // TODO; any further error handling when openedOK is false ??
else
return false;
}
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 2 September 2010 (DB)\r
+// Last modified: 3 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Functionality for simultaneously reading multiple BAM files\r
// ***************************************************************************\r
// indexes.\r
// @coreMode - setup our first alignments using GetNextAlignmentCore();\r
// also useful for merging\r
- // @useDefaultIndex - look for default BAM index ".bai" first. If false, \r
- // or if ".bai" does not exist, will look for BamTools index ".bti". If \r
- // neither exist, will open without an index\r
- bool Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false, bool useDefaultIndex = true);\r
+ // @preferStandardIndex - look for standard BAM index ".bai" first. If false, \r
+ // will look for BamTools index ".bti". \r
+ bool Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = true);\r
\r
// returns whether underlying BAM readers ALL have an index loaded\r
// this is useful to indicate whether Jump() or SetRegion() are possible\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 30 August 2010 (DB)\r
+// Last modified: 3 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
\r
// file operations\r
void Close(void);\r
- bool Jump(int refID, int position = 0);\r
- bool Open(const string& filename, const string& indexFilename = "");\r
+ bool Jump(int refID, int position);\r
+ bool Open(const std::string& filename, \r
+ const std::string& indexFilename, \r
+ const bool lookForIndex, \r
+ const bool preferStandardIndex);\r
bool Rewind(void);\r
bool SetRegion(const BamRegion& region);\r
\r
int GetReferenceID(const string& refName) const;\r
\r
// index operations\r
- bool CreateIndex(bool useDefaultIndex);\r
+ bool CreateIndex(bool useStandardIndex);\r
\r
// -------------------------------\r
// internal methods\r
// clear out inernal index data structure\r
void ClearIndex(void);\r
// loads index from BAM index file\r
- bool LoadIndex(void);\r
+ bool LoadIndex(const bool lookForIndex, const bool preferStandardIndex);\r
};\r
\r
// -----------------------------------------------------\r
void BamReader::Close(void) { d->Close(); }\r
bool BamReader::IsIndexLoaded(void) const { return d->IsIndexLoaded; }\r
bool BamReader::IsOpen(void) const { return d->mBGZF.IsOpen; }\r
-bool BamReader::Jump(int refID, int position) { \r
+bool BamReader::Jump(int refID, int position) \r
+{ \r
d->Region.LeftRefID = refID;\r
d->Region.LeftPosition = position;\r
d->IsLeftBoundSpecified = true;\r
d->IsRightBoundSpecified = false;\r
return d->Jump(refID, position); \r
}\r
-bool BamReader::Open(const string& filename, const string& indexFilename) { return d->Open(filename, indexFilename); }\r
+bool BamReader::Open(const std::string& filename, \r
+ const std::string& indexFilename, \r
+ const bool lookForIndex, \r
+ const bool preferStandardIndex) \r
+{ \r
+ return d->Open(filename, indexFilename, lookForIndex, preferStandardIndex); \r
+}\r
bool BamReader::Rewind(void) { return d->Rewind(); }\r
bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); }\r
bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) {\r
const std::string BamReader::GetFilename(void) const { return d->Filename; }\r
\r
// index operations\r
-bool BamReader::CreateIndex(bool useDefaultIndex) { return d->CreateIndex(useDefaultIndex); }\r
+bool BamReader::CreateIndex(bool useStandardIndex) { return d->CreateIndex(useStandardIndex); }\r
\r
// -----------------------------------------------------\r
// BamReaderPrivate implementation\r
\r
// calculate character lengths/offsets\r
const unsigned int dataLength = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;\r
-// const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength;\r
const unsigned int seqDataOffset = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);\r
const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;\r
const unsigned int tagDataOffset = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;\r
\r
// set up char buffers\r
const char* allCharData = bAlignment.SupportData.AllCharData.data();\r
-// uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);\r
const char* seqData = ((const char*)allCharData) + seqDataOffset;\r
const char* qualData = ((const char*)allCharData) + qualDataOffset;\r
char* tagData = ((char*)allCharData) + tagDataOffset;\r
\r
// store alignment name (relies on null char in name as terminator)\r
bAlignment.Name.assign((const char*)(allCharData)); \r
- \r
-// // save CigarOps \r
-// CigarOp op;\r
-// bAlignment.CigarData.clear();\r
-// bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations);\r
-// for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {\r
-// \r
-// // swap if necessary\r
-// if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }\r
-// \r
-// // build CigarOp structure\r
-// op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);\r
-// op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];\r
-// \r
-// // save CigarOp\r
-// bAlignment.CigarData.push_back(op);\r
-// }\r
- \r
+\r
// save query sequence\r
bAlignment.QueryBases.clear();\r
bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);\r
IsRegionSpecified = false;\r
}\r
\r
-// create BAM index from BAM file (keep structure in memory) and write to default index output file\r
-bool BamReader::BamReaderPrivate::CreateIndex(bool useDefaultIndex) {\r
+// creates index for BAM file, saves to file\r
+// default behavior is to create the BAM standard index (".bai")\r
+// set flag to false to create the BamTools-specific index (".bti")\r
+bool BamReader::BamReaderPrivate::CreateIndex(bool useStandardIndex) {\r
\r
// clear out prior index data\r
ClearIndex();\r
\r
- // create default index\r
- if ( useDefaultIndex )\r
+ // create index based on type requested\r
+ if ( useStandardIndex ) \r
NewIndex = new BamDefaultIndex(&mBGZF, Parent, IsBigEndian);\r
// create BamTools 'custom' index\r
else\r
BamReader::BamReaderPrivate::RegionState state = IsOverlap(bAlignment);\r
\r
// if alignment lies after region, return false\r
- if ( state == AFTER_REGION ) \r
- return false;\r
+ if ( state == AFTER_REGION ) return false;\r
\r
while ( state != WITHIN_REGION ) {\r
// if no valid alignment available (likely EOF) return failure\r
if ( !LoadNextAlignment(bAlignment) ) return false;\r
// if alignment lies after region, return false (no available read within region)\r
state = IsOverlap(bAlignment);\r
- if ( state == AFTER_REGION) return false;\r
- \r
+ if ( state == AFTER_REGION ) return false;\r
}\r
\r
// return success (alignment found that overlaps region)\r
vector<string> refNames;\r
RefVector::const_iterator refIter = References.begin();\r
RefVector::const_iterator refEnd = References.end();\r
- for ( ; refIter != refEnd; ++refIter) {\r
+ for ( ; refIter != refEnd; ++refIter) \r
refNames.push_back( (*refIter).RefName );\r
- }\r
\r
// return 'index-of' refName ( if not found, returns refNames.size() )\r
return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));\r
// get BAM header text length\r
mBGZF.Read(buffer, 4);\r
unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer);\r
- if ( IsBigEndian ) { SwapEndian_32(headerTextLength); }\r
+ if ( IsBigEndian ) SwapEndian_32(headerTextLength); \r
\r
// get BAM header text\r
char* headerText = (char*)calloc(headerTextLength + 1, 1);\r
free(headerText);\r
}\r
\r
-// load existing index data from BAM index file (".bai"), return success/fail\r
-bool BamReader::BamReaderPrivate::LoadIndex(void) {\r
+// load existing index data from BAM index file (".bti" OR ".bai"), return success/fail\r
+bool BamReader::BamReaderPrivate::LoadIndex(const bool lookForIndex, const bool preferStandardIndex) {\r
\r
// clear out any existing index data\r
ClearIndex();\r
\r
- // skip if index file empty\r
- if ( IndexFilename.empty() )\r
- return false;\r
-\r
- // check supplied filename for index type\r
- size_t defaultExtensionFound = IndexFilename.find(".bai");\r
- size_t customExtensionFound = IndexFilename.find(".bti");\r
- \r
- // if SAM/BAM default (".bai")\r
- if ( defaultExtensionFound != string::npos )\r
- NewIndex = new BamDefaultIndex(&mBGZF, Parent, IsBigEndian);\r
- \r
- // if BamTools custom index (".bti")\r
- else if ( customExtensionFound != string::npos )\r
- NewIndex = new BamToolsIndex(&mBGZF, Parent, IsBigEndian);\r
+ // if no index filename provided, so we need to look for available index files\r
+ if ( IndexFilename.empty() ) {\r
+ \r
+ // attempt to load BamIndex based on current Filename provided & preferStandardIndex flag\r
+ const BamIndex::PreferredIndexType type = (preferStandardIndex ? BamIndex::STANDARD : BamIndex::BAMTOOLS);\r
+ NewIndex = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, IsBigEndian, type);\r
+ \r
+ // if null, return failure\r
+ if ( NewIndex == 0 ) return false;\r
+ \r
+ // generate proper IndexFilename based on type of index created\r
+ IndexFilename = Filename + NewIndex->Extension();\r
+ }\r
\r
- // else unknown\r
else {\r
- printf("ERROR: Unknown index file extension.\n");\r
- return false;\r
+ // attempt to load BamIndex based on IndexFilename provided by client\r
+ NewIndex = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent, IsBigEndian);\r
+ \r
+ // if null, return failure\r
+ if ( NewIndex == 0 ) return false; \r
}\r
\r
- // return success of loading index data from file\r
+ // an index file was found\r
+ // return success of loading the index data from file\r
IsIndexLoaded = NewIndex->Load(IndexFilename);\r
return IsIndexLoaded;\r
}\r
mBGZF.Read(buffer, 4);\r
bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);\r
if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); }\r
- if ( bAlignment.SupportData.BlockLength == 0 ) { return false; }\r
+ if ( bAlignment.SupportData.BlockLength == 0 ) return false;\r
\r
// read in core alignment data, make sure the right size of data was read\r
char x[BAM_CORE_SIZE];\r
- if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }\r
+ if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) return false; \r
\r
if ( IsBigEndian ) {\r
- for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) { \r
- SwapEndian_32p(&x[i]); \r
- }\r
+ for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) \r
+ SwapEndian_32p(&x[i]); \r
}\r
\r
// set BamAlignment 'core' and 'support' data\r
char buffer[4];\r
mBGZF.Read(buffer, 4);\r
unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer);\r
- if ( IsBigEndian ) { SwapEndian_32(numberRefSeqs); }\r
- if (numberRefSeqs == 0) { return; }\r
+ if ( IsBigEndian ) SwapEndian_32(numberRefSeqs);\r
+ if ( numberRefSeqs == 0 ) return;\r
References.reserve((int)numberRefSeqs);\r
\r
// iterate over all references in header\r
// get length of reference name\r
mBGZF.Read(buffer, 4);\r
unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer);\r
- if ( IsBigEndian ) { SwapEndian_32(refNameLength); }\r
+ if ( IsBigEndian ) SwapEndian_32(refNameLength);\r
char* refName = (char*)calloc(refNameLength, 1);\r
\r
// get reference name and reference sequence length\r
mBGZF.Read(refName, refNameLength);\r
mBGZF.Read(buffer, 4);\r
int refLength = BgzfData::UnpackSignedInt(buffer);\r
- if ( IsBigEndian ) { SwapEndian_32(refLength); }\r
+ if ( IsBigEndian ) SwapEndian_32(refLength); \r
\r
// store data for reference\r
RefData aReference;\r
}\r
\r
// opens BAM file (and index)\r
-bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) {\r
+bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename, const bool lookForIndex, const bool preferStandardIndex) {\r
\r
+ // store filenames\r
Filename = filename;\r
IndexFilename = indexFilename;\r
\r
// open the BGZF file for reading, return false on failure\r
- if ( !mBGZF.Open(filename, "rb") ) \r
- return false;\r
+ if ( !mBGZF.Open(filename, "rb") ) return false; \r
\r
// retrieve header text & reference data\r
LoadHeaderData();\r
// store file offset of first alignment\r
AlignmentsBeginOffset = mBGZF.Tell();\r
\r
- // open index file & load index data (if exists)\r
- if ( !IndexFilename.empty() )\r
- LoadIndex();\r
+ // if no index filename provided \r
+ if ( IndexFilename.empty() ) {\r
+ \r
+ // client did not specify that index SHOULD be found\r
+ // useful for cases where sequential access is all that is required\r
+ if ( !lookForIndex ) return true; \r
+ \r
+ // otherwise, look for index file, return success/fail\r
+ return LoadIndex(lookForIndex, preferStandardIndex) ;\r
+ }\r
\r
- // return success\r
- return true;\r
+ // client supplied an index filename\r
+ // attempt to load index data, return success/fail\r
+ return LoadIndex(lookForIndex, preferStandardIndex); \r
}\r
\r
// returns BAM file pointer to beginning of alignment data\r
Region = region;\r
\r
// set flags\r
- if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) \r
- IsLeftBoundSpecified = true;\r
- if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) \r
- IsRightBoundSpecified = true;\r
+ if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) IsLeftBoundSpecified = true;\r
+ if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) IsRightBoundSpecified = true;\r
\r
// attempt jump to beginning of region, return success/fail of Jump()\r
return Jump( Region.LeftRefID, Region.LeftPosition );\r
// Marth Lab, Department of Biology, Boston College\r
// All rights reserved.\r
// ---------------------------------------------------------------------------\r
-// Last modified: 2 September 2010 (DB)\r
+// Last modified: 3 September 2010 (DB)\r
// ---------------------------------------------------------------------------\r
// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad\r
// Institute.\r
// performs random-access jump to reference, position\r
bool Jump(int refID, int position = 0);\r
// opens BAM file (and optional BAM index file, if provided)\r
- bool Open(const std::string& filename, const std::string& indexFilename = "");\r
+ // @lookForIndex - if no indexFilename provided, look for an existing index file\r
+ // @preferStandardIndex - if true, give priority in index file searching to standard BAM index\r
+ bool Open(const std::string& filename, \r
+ const std::string& indexFilename = "", \r
+ const bool lookForIndex = false, \r
+ const bool preferStandardIndex = false);\r
// returns file pointer to beginning of alignments\r
bool Rewind(void);\r
// sets a region of interest (with left & right bound reference/position)\r
// BAM index operations\r
// ----------------------\r
\r
- // creates index for BAM file, saves to file (default = bamFilename + ".bai")\r
- bool CreateIndex(bool useDefaultIndex = true);\r
+ // creates index for BAM file, saves to file\r
+ // default behavior is to create the BAM standard index (".bai")\r
+ // set flag to false to create the BamTools-specific index (".bti")\r
+ bool CreateIndex(bool useStandardIndex = true);\r
\r
// private implementation\r
private:\r