]> git.donarmstrong.com Git - bamtools.git/commitdiff
Updated index and count sub-tools to recognize new (.bti) index file format
authorDerek <derekwbarnett@gmail.com>
Fri, 9 Jul 2010 16:08:31 +0000 (12:08 -0400)
committerDerek <derekwbarnett@gmail.com>
Fri, 9 Jul 2010 16:08:31 +0000 (12:08 -0400)
bamtools_count.cpp
bamtools_index.cpp

index 29b5507f41d996e3a48b03ddaa785087112d1231..187dc9377f35e0442906e87c0835ae6bdc8c7db5 100644 (file)
@@ -62,7 +62,7 @@ CountTool::CountTool(void)
     //Options::AddValueOption("-index", "BAM index filename", "the BAM index file",  "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts);
     
     OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
-    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai or <filename>.bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
 }
 
 CountTool::~CountTool(void) { 
@@ -103,22 +103,63 @@ int CountTool::Run(int argc, char* argv[]) {
         BamRegion region;
         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
 
-            // check if there are index files *.bai corresponding to the input files
-            bool hasIndexes = true;
+            // check if there are index files *.bai/*.bti corresponding to the input files
+            bool hasDefaultIndex   = false;
+            bool hasBamtoolsIndex  = false;
+            bool hasNoIndex        = false;
+            int defaultIndexCount   = 0;
+            int bamtoolsIndexCount = 0;
             for (vector<string>::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) {
-                if (!Utilities::FileExists(*f + ".bai")) {
-                    errorStream << "could not find index file " << *f + ".bai" 
-                        << ", parsing whole file(s) to get alignment counts for target region" << endl;
-                    hasIndexes = false;
+              
+                if ( Utilities::FileExists(*f + ".bai") ) {
+                    hasDefaultIndex = true;
+                    ++defaultIndexCount;
+                }       
+                
+                if ( Utilities::FileExists(*f + ".bti") ) {
+                    hasBamtoolsIndex = true;
+                    ++bamtoolsIndexCount;
+                }
+                  
+                if ( !hasDefaultIndex && !hasBamtoolsIndex ) {
+                    hasNoIndex = true;
+                    cerr << "*WARNING - could not find index file for " << *f  
+                         << ", parsing whole file(s) to get alignment counts for target region" 
+                         << " (could be slow)" << endl;
                     break;
                 }
             }
-            // has index, we can jump directly to 
-            if ( hasIndexes ) {
+            
+            // determine if index file types are heterogeneous
+            bool hasDifferentIndexTypes = false;
+            if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) {
+                hasDifferentIndexTypes = true;
+                cerr << "*WARNING - different index file formats found"  
+                         << ", parsing whole file(s) to get alignment counts for target region" 
+                         << " (could be slow)" << endl;
+            }
+            
+            // if any input file has no index, or if input files use different index formats
+            // can't use BamMultiReader to jump directly (**for now**)
+            if ( hasNoIndex || hasDifferentIndexTypes ) {
+                
+                // read through sequentially, counting all overlapping reads
+                BamAlignment al;
+                while( reader.GetNextAlignmentCore(al) ) {
+                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
+                         (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
+                    {
+                        ++alignmentCount;
+                    }
+                }
+            }
+            
+            // has index file for each input file (and same format)
+            else {
               
                 // this is kind of a hack...?
                 BamMultiReader reader;
-                reader.Open(m_settings->InputFiles, true, true);
+                reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex );
               
                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
                    foundError = true;
@@ -128,27 +169,8 @@ int CountTool::Run(int argc, char* argv[]) {
                     while ( reader.GetNextAlignmentCore(al) )
                         ++alignmentCount;
                 }
-
-            } else {
-              
-                // read through sequentially, until first overlapping read is found
-                BamAlignment al;
-                bool alignmentFound(false);
-                //reader.Open(m_settings->InputFiles, false, true);
-                while( reader.GetNextAlignmentCore(al) ) {
-                    if ( (al.RefID == region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) ) {
-                        alignmentFound = true;
-                        break;
-                    }
-                }
-                
-                // if overlapping alignment found (not EOF), increment counter
-                if ( alignmentFound ) ++ alignmentCount;
             }
             
-            // -----------------------------
-            // count alignments until stop hit
-            
         } else {
             foundError = true;
             errorStream << "Could not parse REGION: " << m_settings->Region << endl;
index 6287908b433ebd5317072b28b6088d732d34a842..41dd5c71f095004ec2312672d055292d808f0179 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 26 May 2010
+// Last modified: 7 July 2010
 // ---------------------------------------------------------------------------
 // Creates a BAM index (".bai") file for the provided BAM file.
 // ***************************************************************************
@@ -25,6 +25,7 @@ struct IndexTool::IndexSettings {
 
     // flags
     bool HasInputBamFilename;
+    bool IsUsingBamtoolsIndex;
 
     // filenames
     string InputBamFilename;
@@ -32,6 +33,7 @@ struct IndexTool::IndexSettings {
     // constructor
     IndexSettings(void)
         : HasInputBamFilename(false)
+        , IsUsingBamtoolsIndex(false)
         , InputBamFilename(Options::StandardIn())
     { }
 };  
@@ -44,11 +46,12 @@ IndexTool::IndexTool(void)
     , m_settings(new IndexSettings)
 {
     // set program details
-    Options::SetProgramInfo("bamtools index", "creates index for BAM file", "-in <filename>");
+    Options::SetProgramInfo("bamtools index", "creates index for BAM file", "[-in <filename>] [-bti]");
     
     // set up options 
     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
     Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
+    Options::AddOption("-bti", "use (non-standard) BamTools indexing scheme", m_settings->IsUsingBamtoolsIndex, IO_Opts);
 }
 
 IndexTool::~IndexTool(void) {
@@ -71,7 +74,8 @@ int IndexTool::Run(int argc, char* argv[]) {
     reader.Open(m_settings->InputBamFilename);
     
     // create index for BAM file
-    reader.CreateIndex();
+    bool useDefaultIndex = !m_settings->IsUsingBamtoolsIndex;
+    reader.CreateIndex(useDefaultIndex);
     
     // clean & exit
     reader.Close();