From 9f32927e1a745cf27af58867f7182f6ee2e26ed8 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 9 Jul 2010 12:08:31 -0400 Subject: [PATCH] Updated index and count sub-tools to recognize new (.bti) index file format --- bamtools_count.cpp | 80 +++++++++++++++++++++++++++++----------------- bamtools_index.cpp | 10 ++++-- 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/bamtools_count.cpp b/bamtools_count.cpp index 29b5507..187dc93 100644 --- a/bamtools_count.cpp +++ b/bamtools_count.cpp @@ -62,7 +62,7 @@ CountTool::CountTool(void) //Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts); OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai or .bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } CountTool::~CountTool(void) { @@ -103,22 +103,63 @@ int CountTool::Run(int argc, char* argv[]) { BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // check if there are index files *.bai corresponding to the input files - bool hasIndexes = true; + // check if there are index files *.bai/*.bti corresponding to the input files + bool hasDefaultIndex = false; + bool hasBamtoolsIndex = false; + bool hasNoIndex = false; + int defaultIndexCount = 0; + int bamtoolsIndexCount = 0; for (vector::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) { - if (!Utilities::FileExists(*f + ".bai")) { - errorStream << "could not find index file " << *f + ".bai" - << ", parsing whole file(s) to get alignment counts for target region" << endl; - hasIndexes = false; + + if ( Utilities::FileExists(*f + ".bai") ) { + hasDefaultIndex = true; + ++defaultIndexCount; + } + + if ( Utilities::FileExists(*f + ".bti") ) { + hasBamtoolsIndex = true; + ++bamtoolsIndexCount; + } + + if ( !hasDefaultIndex && !hasBamtoolsIndex ) { + hasNoIndex = true; + cerr << "*WARNING - could not find index file for " << *f + << ", parsing whole file(s) to get alignment counts for target region" + << " (could be slow)" << endl; break; } } - // has index, we can jump directly to - if ( hasIndexes ) { + + // determine if index file types are heterogeneous + bool hasDifferentIndexTypes = false; + if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) { + hasDifferentIndexTypes = true; + cerr << "*WARNING - different index file formats found" + << ", parsing whole file(s) to get alignment counts for target region" + << " (could be slow)" << endl; + } + + // if any input file has no index, or if input files use different index formats + // can't use BamMultiReader to jump directly (**for now**) + if ( hasNoIndex || hasDifferentIndexTypes ) { + + // read through sequentially, counting all overlapping reads + BamAlignment al; + while( reader.GetNextAlignmentCore(al) ) { + if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + { + ++alignmentCount; + } + } + } + + // has index file for each input file (and same format) + else { // this is kind of a hack...? BamMultiReader reader; - reader.Open(m_settings->InputFiles, true, true); + reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex ); if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { foundError = true; @@ -128,27 +169,8 @@ int CountTool::Run(int argc, char* argv[]) { while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } - - } else { - - // read through sequentially, until first overlapping read is found - BamAlignment al; - bool alignmentFound(false); - //reader.Open(m_settings->InputFiles, false, true); - while( reader.GetNextAlignmentCore(al) ) { - if ( (al.RefID == region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) ) { - alignmentFound = true; - break; - } - } - - // if overlapping alignment found (not EOF), increment counter - if ( alignmentFound ) ++ alignmentCount; } - // ----------------------------- - // count alignments until stop hit - } else { foundError = true; errorStream << "Could not parse REGION: " << m_settings->Region << endl; diff --git a/bamtools_index.cpp b/bamtools_index.cpp index 6287908..41dd5c7 100644 --- a/bamtools_index.cpp +++ b/bamtools_index.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 26 May 2010 +// Last modified: 7 July 2010 // --------------------------------------------------------------------------- // Creates a BAM index (".bai") file for the provided BAM file. // *************************************************************************** @@ -25,6 +25,7 @@ struct IndexTool::IndexSettings { // flags bool HasInputBamFilename; + bool IsUsingBamtoolsIndex; // filenames string InputBamFilename; @@ -32,6 +33,7 @@ struct IndexTool::IndexSettings { // constructor IndexSettings(void) : HasInputBamFilename(false) + , IsUsingBamtoolsIndex(false) , InputBamFilename(Options::StandardIn()) { } }; @@ -44,11 +46,12 @@ IndexTool::IndexTool(void) , m_settings(new IndexSettings) { // set program details - Options::SetProgramInfo("bamtools index", "creates index for BAM file", "-in "); + Options::SetProgramInfo("bamtools index", "creates index for BAM file", "[-in ] [-bti]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); + Options::AddOption("-bti", "use (non-standard) BamTools indexing scheme", m_settings->IsUsingBamtoolsIndex, IO_Opts); } IndexTool::~IndexTool(void) { @@ -71,7 +74,8 @@ int IndexTool::Run(int argc, char* argv[]) { reader.Open(m_settings->InputBamFilename); // create index for BAM file - reader.CreateIndex(); + bool useDefaultIndex = !m_settings->IsUsingBamtoolsIndex; + reader.CreateIndex(useDefaultIndex); // clean & exit reader.Close(); -- 2.39.5