1 // ***************************************************************************
2 // BamIndex.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 November 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the default (standardized) BAM
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index
11 // ***************************************************************************
13 #include <api/BamIndex.h>
14 #include <api/BamReader.h>
16 #include <api/internal/BamStandardIndex_p.h>
17 #include <api/internal/BamToolsIndex_p.h>
18 using namespace BamTools;
19 using namespace BamTools::Internal;
28 // --------------------------------------------------
29 // BamIndex factory methods
31 // returns index based on BAM filename 'stub'
32 // checks first for preferred type, returns that type if found
33 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
35 // ** default preferred type is BamToolsIndex ** use this anytime it exists
36 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
37 BamTools::BgzfData* bgzf,
38 BamTools::BamReader* reader,
39 const BamIndex::PreferredIndexType& type)
41 // ---------------------------------------------------
42 // attempt to load preferred type first
44 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
45 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
46 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
47 return new BamToolsIndex(bgzf, reader);
49 const std::string standardIndexFilename = bamFilename + ".bai";
50 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
51 if ( (type == BamIndex::STANDARD) && standardIndexExists )
52 return new BamStandardIndex(bgzf, reader);
54 // ----------------------------------------------------
55 // preferred type could not be found, try other (non-preferred) types
56 // if none found, return 0
58 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
59 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
63 // returns index based on explicitly named index file (or 0 if not found)
64 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
65 BamTools::BgzfData* bgzf,
66 BamTools::BamReader* reader)
68 // see if specified file exists
69 const bool indexExists = BamTools::FileExists(indexFilename);
70 if ( !indexExists ) return 0;
72 const std::string bamtoolsIndexExtension(".bti");
73 const std::string standardIndexExtension(".bai");
75 // if has bamtoolsIndexExtension
76 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
77 return new BamToolsIndex(bgzf, reader);
79 // if has standardIndexExtension
80 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
81 return new BamStandardIndex(bgzf, reader);
83 // otherwise, unsupported file type
87 // -------------------------------
88 // BamIndex implementation
91 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader)
94 , m_cacheMode(BamIndex::LimitedIndexCaching)
97 if ( m_reader && m_reader->IsOpen() )
98 m_references = m_reader->GetReferenceData();
102 BamIndex::~BamIndex(void) {
104 fclose(m_indexStream);
107 // return true if FILE* is open
108 bool BamIndex::IsOpen(void) const {
109 return ( m_indexStream != 0 );
112 // loads existing data from file into memory
113 bool BamIndex::Load(const string& filename) {
115 // open index file, abort on error
116 if ( !OpenIndexFile(filename, "rb") ) {
117 fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
121 // check magic number
122 if ( !LoadHeader() ) {
123 fclose(m_indexStream);
127 // load reference data (but only keep in memory if full caching requested)
128 bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching );
129 if ( !LoadAllReferences(saveInitialLoad) ) {
130 fclose(m_indexStream);
134 // update index cache based on selected mode
141 // opens index file for reading/writing, return true if opened OK
142 bool BamIndex::OpenIndexFile(const string& filename, const string& mode) {
143 m_indexStream = fopen(filename.c_str(), mode.c_str());
144 return ( m_indexStream != 0 );
147 // rewind index file to beginning of index data, return true if rewound OK
148 bool BamIndex::Rewind(void) {
149 return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 );
152 // change the index caching behavior
153 void BamIndex::SetCacheMode(const BamIndexCacheMode mode) {
154 if ( mode != m_cacheMode ) {
160 // updates in-memory cache of index data, depending on current cache mode
161 void BamIndex::UpdateCache(void) {
163 // skip if file not open
164 if ( !IsOpen() ) return;
166 // reflect requested cache mode behavior
167 switch ( m_cacheMode ) {
169 case (BamIndex::FullIndexCaching) :
171 LoadAllReferences(true);
174 case (BamIndex::LimitedIndexCaching) :
175 if ( HasFullDataCache() )
176 KeepOnlyFirstReferenceOffsets();
179 SkipToFirstReference();
180 LoadFirstReference(true);
183 case(BamIndex::NoIndexCaching) :
192 // writes in-memory index data out to file
193 bool BamIndex::Write(const string& bamFilename) {
195 // open index file for writing
196 string indexFilename = bamFilename + Extension();
197 if ( !OpenIndexFile(indexFilename, "wb") ) {
198 fprintf(stderr, "ERROR: Could not open file to save index.\n");
202 // write index header data
203 if ( !WriteHeader() ) {
204 fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n");
205 fflush(m_indexStream);
206 fclose(m_indexStream);
210 // write main index data
211 if ( !WriteAllReferences() ) {
212 fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n");
213 fflush(m_indexStream);
214 fclose(m_indexStream);
218 // flush any remaining output, rewind file, and return success
219 fflush(m_indexStream);
220 fclose(m_indexStream);
222 // re-open index file for later reading
223 if ( !OpenIndexFile(indexFilename, "rb") ) {
224 fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n");
228 // return success/failure of write