]> git.donarmstrong.com Git - bamtools.git/commitdiff
Cleaned up CountTool, by extending Utilities::ParseRegionString() to also do validati...
authorDerek <derekwbarnett@gmail.com>
Thu, 3 Jun 2010 17:34:13 +0000 (13:34 -0400)
committerDerek <derekwbarnett@gmail.com>
Thu, 3 Jun 2010 17:34:13 +0000 (13:34 -0400)
bamtools_count.cpp
bamtools_utilities.cpp
bamtools_utilities.h

index c84c6ab68d54ef4ecb67c48cc58ab24f28508e77..67d1e9b1149494db1d1d96d9f0fb277e6f2c83c5 100644 (file)
@@ -103,123 +103,56 @@ int CountTool::Run(int argc, char* argv[]) {
     // more complicated - region specified
     else {
         
-        // parse region string for desired region
-        string startChrom;
-        string stopChrom;
-        int startPos;
-        int stopPos;
-        if ( Utilities::ParseRegionString(m_settings->Region, startChrom, startPos, stopChrom, stopPos) ) {
+        Region region;
+        if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
 
-            const RefVector references = reader.GetReferenceData();
-
-            // -------------------------------
-            // validate start ref & position
-            
-            int startRefID = reader.GetReferenceID(startChrom);
-            
-            // startRefID not found
-            if ( startRefID == (int)references.size() ) {
-                foundError = true;
-                errorStream << "Start chrom: " << startChrom << " not found in BAM file." << endl;
-            } 
-            
-            // valid startRefID, check startPos
-            else {
-                const RefData& reference = references.at(startRefID);
-                
-                // startPos too large
-                if ( startPos > reference.RefLength ) {
+            // has index, we can jump directly to 
+            if ( m_settings->HasBamIndexFilename ) {
+              
+                // this is kind of a hack...?
+                // re-open BamReader, this time with the index file
+                reader.Close();
+                reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename);
+              
+                // attempt Jump(), catch error
+                if ( !reader.Jump(region.StartChromID, region.StartPosition) ) {
                     foundError = true;
-                    errorStream << "Start pos: " << startPos << " is larger than expected." << endl;
+                    errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl;
                 }
             }
             
-            // -------------------------------
-            // validate stop ref & position
-            
-            int stopRefID = reader.GetReferenceID(stopChrom);
-
-            // skip if error already found
-            if ( !foundError ) { 
+            else {
               
-                // stopRefID not found
-                if ( stopRefID == (int)references.size() ) {
-                    foundError = true;
-                    errorStream << "Stop chrom: " << stopChrom << " not found in BAM file." << endl;
-                } 
-                
-                // valid stopRefID, check stopPos
-                else {
-                    const RefData& reference = references.at(stopRefID);
-                    
-                    // stopPos too large
-                    if ( stopPos > reference.RefLength ) {
-                        foundError = true;
-                        errorStream << "Stop pos: " << stopPos << " is larger than expected." << endl;
-                    } 
-                    
-                    // no stopPos specified, set to reference end
-                    else if ( stopPos == -1 ) {
-                        stopPos = reference.RefLength;
-                    } 
+                // read through sequentially, until first overlapping read is found
+                BamAlignment al;
+                bool alignmentFound(false);
+                while( reader.GetNextAlignment(al) ) {
+                    if ( (al.RefID == region.StartChromID ) && ( (al.Position + al.Length) >= region.StartPosition) ) {
+                        alignmentFound = true;
+                        break;
+                    }
                 }
+                
+                // if overlapping alignment found (not EOF), increment counter
+                if ( alignmentFound) ++ alignmentCount; 
             }
-
-            // -------------------------------
-            // if refs & positions valid, retrieve data
+            
+            // -----------------------------
+            // count alignments until stop hit
             
             if ( !foundError ) {
-
-                // has index, we can jump directly to 
-                if ( m_settings->HasBamIndexFilename ) {
-                  
-                    // this is kind of a hack...?
-                    // re-open BamReader, this time with the index file
-                    reader.Close();
-                    reader.Open(m_settings->InputBamFilename, m_settings->BamIndexFilename);
-                  
-                    // attempt Jump(), catch error
-                    if ( !reader.Jump(startRefID, startPos) ) {
-                        foundError = true;
-                        errorStream << "Could not jump to desired REGION: " << m_settings->Region << endl;
-                    }
-                }
-                
-                else {
-                  
-                    // read through sequentially, until first overlapping read is found
-                    BamAlignment al;
-                    bool alignmentFound(false);
-                    while( reader.GetNextAlignment(al) ) {
-                        if ( (al.RefID == startRefID ) && ( (al.Position + al.Length) >= startPos) ) {
-                            alignmentFound = true;
-                            break;
-                        }
-                    }
-                    
-                    // if overlapping alignment found (not EOF), increment counter
-                    if ( alignmentFound) ++ alignmentCount; 
-                }
-                
-                // -----------------------------
-                // count alignments until stop hit
-                
-                if ( !foundError ) {
-                    // while valid alignment AND
-                    // ( either current ref is before stopRefID OR
-                    //   current ref stopRefID but we're before stopPos )
-                    BamAlignment al;
-                    while ( reader.GetNextAlignment(al) && ((al.RefID < stopRefID ) || (al.RefID == stopRefID && al.Position <= stopPos)) )
-                        ++alignmentCount;
-                }
+                // while valid alignment AND
+                // ( either current ref is before stopRefID OR
+                //   current ref stopRefID but we're before stopPos )
+                BamAlignment al;
+                while ( reader.GetNextAlignment(al) && ((al.RefID < region.StopChromID ) || (al.RefID == region.StopChromID && al.Position <= region.StopPosition)) )
+                    ++alignmentCount;
             }
-        } 
-        
-        // could not parse REGION string, set error
-        else {
+
+        } else {
             foundError = true;
-            errorStream << "Could not parse desired REGION: " << m_settings->Region << endl;
-            errorStream << "Please see README for details on accepted REGION formats" << endl;
+            errorStream << "Could not parse REGION: " << m_settings->Region << endl;
+            errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
         }
     }
      
index b5440df12aa4164169f6f7b942894ca6f2fe6ac2..e3ab868178bccb3ef8e8e756fafdde860f703714 100644 (file)
@@ -9,26 +9,31 @@
 // ***************************************************************************
 
 #include <cstdlib>
-#include <iostream>
 #include "bamtools_utilities.h"
+#include "BamReader.h"
 
 using namespace std;
 using namespace BamTools;
 
-// Parses a REGION string, stores in (startChrom, startPos, stopChrom, stopPos) variables
-// Returns successful parse (true/false)
-bool Utilities::ParseRegionString(const string& regionString, string& startChrom, int& startPos, string& stopChrom, int& stopPos) {
-    
-    // shouldn't call this function with empty string but worth checking 
-    // checked first for clarity purposes later on, since we can assume at least some content in the string
-    if ( regionString.empty() ) { 
-        cerr << "Empty REGION. Usual format (e.g. chr2:1000..2000). See README for more detailed uses." << endl;
-        return false; 
-    }
+// Parses a region string, does validation (valid ID's, positions), stores in Region struct
+bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region) {
   
+    // -------------------------------
+    // parse region string
+  
+    // check first for empty string
+    if ( regionString.empty() ) 
+        return false;   
+    
     // non-empty string, look for a colom
     size_t foundFirstColon = regionString.find(':');
     
+    // store chrom strings, and numeric positions
+    string startChrom;
+    string stopChrom;
+    int startPos;
+    int stopPos;
+    
     // no colon found
     // going to use entire contents of requested chromosome 
     // just store entire region string as startChrom name
@@ -38,7 +43,6 @@ bool Utilities::ParseRegionString(const string& regionString, string& startChrom
         startPos   = 0;
         stopChrom  = regionString;
         stopPos    = -1;
-        return true;
     }
     
     // colon found, so we at least have some sort of startPos requested
@@ -57,7 +61,6 @@ bool Utilities::ParseRegionString(const string& regionString, string& startChrom
             startPos   = atoi( regionString.substr(foundFirstColon+1).c_str() ); 
             stopChrom  = startChrom;
             stopPos    = -1;
-            return true;
         } 
         
         // ".." found, so we have some sort of range selected
@@ -74,7 +77,6 @@ bool Utilities::ParseRegionString(const string& regionString, string& startChrom
             if ( foundSecondColon == string::npos ) {
                 stopChrom  = startChrom;
                 stopPos    = atoi( regionString.substr(foundRangeDots+2).c_str() );
-                return true;
             }
             
             // second colon found
@@ -82,12 +84,41 @@ bool Utilities::ParseRegionString(const string& regionString, string& startChrom
             else {
                 stopChrom  = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
                 stopPos    = atoi( regionString.substr(foundSecondColon+1).c_str() );
-                return true;
             }
         }
     }
-  
-    // shouldn't get here - any code path that does?
-    // if not, what does true/false really signify?
-    return false;
-}
\ No newline at end of file
+
+    // -------------------------------
+    // validate reference IDs & genomic positions
+    
+    const RefVector references = reader.GetReferenceData();
+    
+    // if startRefID not found, return false
+    int startRefID = reader.GetReferenceID(startChrom);
+    if ( startRefID == (int)references.size() ) return false;  
+    
+    // if startPos is larger than reference, return false
+    const RefData& startReference = references.at(startRefID);
+    if ( startPos > startReference.RefLength ) return false;
+    
+    // if stopRefID not found, return false
+    int stopRefID = reader.GetReferenceID(stopChrom);
+    if ( stopRefID == (int)references.size() ) return false;
+    
+    // if stopPosition larger than reference, return false
+    const RefData& stopReference = references.at(stopRefID);
+    if ( stopPos > stopReference.RefLength ) return false;
+    
+    // if no stopPosition specified, set to reference end
+    if ( stopPos == -1 )stopPos = stopReference.RefLength;  
+    
+    // -------------------------------
+    // set up Region struct & return
+    
+    region.StartChromID  = startRefID;
+    region.StopChromID   = stopRefID;
+    region.StartPosition = startPos;
+    region.StopPosition  = stopPos;
+    return true;
+}
+
index 8a5d36c6c1e1e5b275055939406ed7e8412ea5c9..1e280f127e903a2bc5368b32a8e82a667b285656 100644 (file)
 namespace BamTools {
 
 class BamReader;  
-  
+
+struct Region {
+    int StartChromID;
+    int StopChromID;
+    int StartPosition;
+    int StopPosition;
+};
+
 class Utilities {
   
-    public:
-        // Parses a REGION string, stores in (startChrom, startPos, stopChrom, stopPos) variables
-        // Returns successful parse (true/false)
-        static bool ParseRegionString(const std::string& regionString,
-                                      std::string& startChrom,
-                                      int& startPos,
-                                      std::string& stopChrom,
-                                      int& stopPos); 
+    public:                          
+        // Parses a region string, uses reader to do validation (valid ID's, positions), stores in Region struct
+        static bool ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region);
 };
 
 } // namespace BamTools