]> git.donarmstrong.com Git - bamtools.git/blobdiff - bamtools_count.cpp
Minor formatting cleanup in BamIndex.*
[bamtools.git] / bamtools_count.cpp
index 7a2de0da210b40844e2528f6b64076078c6884fb..4bd7c82bad7d1bcd8b98151e0b6d85b1b9b99951 100644 (file)
@@ -20,6 +20,7 @@
 #include "bamtools_options.h"
 #include "bamtools_utilities.h"
 #include "BamReader.h"
+#include "BamMultiReader.h"
 
 using namespace std;
 using namespace BamTools;
@@ -30,21 +31,17 @@ using namespace BamTools;
 struct CountTool::CountSettings {
 
     // flags
-    bool HasBamIndexFilename;
-    bool HasInputBamFilename;
+    bool HasInput;
     bool HasRegion;
 
     // filenames
-    string BamIndexFilename;
-    string InputBamFilename;
+    vector<string> InputFiles;
     string Region;
     
     // constructor
     CountSettings(void)
-        : HasBamIndexFilename(false)
-        , HasInputBamFilename(false)
+        : HasInput(false)
         , HasRegion(false)
-        , InputBamFilename(Options::StandardIn())
     { }  
 }; 
   
@@ -56,15 +53,15 @@ CountTool::CountTool(void)
     , m_settings(new CountSettings)
 { 
     // set program details
-    Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in <filename> [-region REGION -index <filename>]");
+    Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in <filename> [-region <REGION>]");
     
     // set up options 
     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
-    Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn());
-    Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts);
+    Options::AddValueOption("-in",  "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,  m_settings->InputFiles, IO_Opts);
+    //Options::AddValueOption("-index", "BAM index filename", "the BAM index file",  "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts);
     
     OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
-    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for optimal performance.", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+    Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai or <filename>.bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
 }
 
 CountTool::~CountTool(void) { 
@@ -82,9 +79,11 @@ int CountTool::Run(int argc, char* argv[]) {
     // parse command line arguments
     Options::Parse(argc, argv, 1);
 
-    //open our BAM reader
-    BamReader reader;
-    reader.Open(m_settings->InputBamFilename);
+    if ( !m_settings->HasInput ) 
+        m_settings->InputFiles.push_back(Options::StandardIn());
+    
+    BamMultiReader reader;
+    reader.Open(m_settings->InputFiles, false, true);
 
     // alignment counter
     int alignmentCount(0);
@@ -96,127 +95,88 @@ int CountTool::Run(int argc, char* argv[]) {
     // if no region specified, count entire file 
     if ( !m_settings->HasRegion ) {
         BamAlignment al;
-        while ( reader.GetNextAlignment(al) ) 
+        while ( reader.GetNextAlignmentCore(al) ) 
             ++alignmentCount;
-    } else {
+    }
+    
+    // more complicated - region specified
+    else {
         
-        // parse region string for desired region
-        string startChrom;
-        string stopChrom;
-        int startPos;
-        int stopPos;
-        if ( Utilities::ParseRegionString(m_settings->Region, startChrom, startPos, stopChrom, stopPos) ) {
-
-            const RefVector references = reader.GetReferenceData();
-
-            // -------------------------------
-            // validate start ref & position
-            
-            int startRefID = reader.GetReferenceID(startChrom);
-            
-            // startRefID not found
-            if ( startRefID == (int)references.size() ) {
-                foundError = true;
-                errorStream << "Start chrom: " << startChrom << " not found in BAM file." << endl;
-            } 
-            
-            // valid startRefID, check startPos
-            else {
-                const RefData& reference = references.at(startRefID);
+        BamRegion region;
+        if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
+
+            // check if there are index files *.bai/*.bti corresponding to the input files
+            bool hasDefaultIndex   = false;
+            bool hasBamtoolsIndex  = false;
+            bool hasNoIndex        = false;
+            int defaultIndexCount   = 0;
+            int bamtoolsIndexCount = 0;
+            for (vector<string>::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) {
+              
+                if ( Utilities::FileExists(*f + ".bai") ) {
+                    hasDefaultIndex = true;
+                    ++defaultIndexCount;
+                }       
                 
-                // startPos too large
-                if ( startPos > reference.RefLength ) {
-                    foundError = true;
-                    errorStream << "Start pos: " << startPos << " is larger than expected." << endl;
+                if ( Utilities::FileExists(*f + ".bti") ) {
+                    hasBamtoolsIndex = true;
+                    ++bamtoolsIndexCount;
+                }
+                  
+                if ( !hasDefaultIndex && !hasBamtoolsIndex ) {
+                    hasNoIndex = true;
+                    cerr << "*WARNING - could not find index file for " << *f  
+                         << ", parsing whole file(s) to get alignment counts for target region" 
+                         << " (could be slow)" << endl;
+                    break;
                 }
             }
             
-            // -------------------------------
-            // validate stop ref & position
-            
-            int stopRefID = reader.GetReferenceID(stopChrom);
-
-            // skip if error already found
-            if ( !foundError ) { 
-              
-                // stopRefID not found
-                if ( stopRefID == (int)references.size() ) {
-                    foundError = true;
-                    errorStream << "Stop chrom: " << stopChrom << " not found in BAM file." << endl;
-                } 
-                
-                // valid stopRefID, check stopPos
-                else {
-                    const RefData& reference = references.at(stopRefID);
-                    
-                    // stopPos too large
-                    if ( stopPos > reference.RefLength ) {
-                        foundError = true;
-                        errorStream << "Stop pos: " << stopPos << " is larger than expected." << endl;
-                    } 
-                    
-                    // no stopPos specified, set to reference end
-                    else if ( stopPos == -1 ) {
-                        stopPos = reference.RefLength;
-                    } 
-                }
+            // determine if index file types are heterogeneous
+            bool hasDifferentIndexTypes = false;
+            if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) {
+                hasDifferentIndexTypes = true;
+                cerr << "*WARNING - different index file formats found"  
+                         << ", parsing whole file(s) to get alignment counts for target region" 
+                         << " (could be slow)" << endl;
             }
-
-            // -------------------------------
-            // if refs & positions valid, retrieve data
             
-            if ( !foundError ) {
-
-                // has index, we can jump directly to 
-                if ( m_settings->HasBamIndexFilename ) {
-                  
-                    // this is kind of a hack...?
-                    // re-open BamReader, this time with the index file
-                    reader.Close();
-                    reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename);
-                  
-                    // attempt Jump(), catch error
-                    if ( !reader.Jump(startRefID, startPos) ) {
-                        foundError = true;
-                        errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl;
-                    }
-                }
+            // if any input file has no index, or if input files use different index formats
+            // can't use BamMultiReader to jump directly (**for now**)
+            if ( hasNoIndex || hasDifferentIndexTypes ) {
                 
-                else {
-                  
-                    // read through sequentially, until first overlapping read is found
-                    BamAlignment al;
-                    bool alignmentFound(false);
-                    while( reader.GetNextAlignment(al) ) {
-                        if ( (al.RefID == startRefID ) && ( (al.Position + al.Length) >= startPos) ) {
-                            alignmentFound = true;
-                            break;
-                        }
+                // read through sequentially, counting all overlapping reads
+                BamAlignment al;
+                while( reader.GetNextAlignmentCore(al) ) {
+                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
+                         (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
+                    {
+                        ++alignmentCount;
                     }
-                    
-                    // if overlapping alignment found (not EOF), increment counter
-                    if ( alignmentFound) ++ alignmentCount; 
                 }
-                
-                // -----------------------------
-                // count alignments until 
-                
-                if ( !foundError ) {
-                    // while valid alignment AND
-                    // ( either current ref is before stopRefID OR
-                    //   current ref stopRefID but we're before stopPos )
+            }
+            
+            // has index file for each input file (and same format)
+            else {
+              
+                // this is kind of a hack...?
+                BamMultiReader reader;
+                reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex );
+              
+                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
+                   foundError = true;
+                   errorStream << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+                } else {
                     BamAlignment al;
-                    while ( reader.GetNextAlignment(al) && ((al.RefID < stopRefID ) || (al.RefID == stopRefID && al.Position <= stopPos)) )
+                    while ( reader.GetNextAlignmentCore(al) )
                         ++alignmentCount;
                 }
             }
-        } 
-        
-        // could not parse REGION string, set error
-        else {
+            
+        } else {
             foundError = true;
-            errorStream << "Could not parse desired REGION: " << m_settings->Region << endl;
-            errorStream << "Please see README for details on accepted REGION formats" << endl;
+            errorStream << "Could not parse REGION: " << m_settings->Region << endl;
+            errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
         }
     }
      
@@ -229,4 +189,4 @@ int CountTool::Run(int argc, char* argv[]) {
     // clean & exit
     reader.Close();
     return (int)foundError;
-}
\ No newline at end of file
+}