]> git.donarmstrong.com Git - bamtools.git/commitdiff
Implemented 'compound rule' logic in the FilterTool's script support.
authorDerek <derekwbarnett@gmail.com>
Wed, 22 Sep 2010 02:00:14 +0000 (22:00 -0400)
committerDerek <derekwbarnett@gmail.com>
Wed, 22 Sep 2010 02:00:14 +0000 (22:00 -0400)
 * Previously the script was limited to doing 'OR' comparisons on various
property sets (what I call here filters). Now, by providing each filter
with an id, you can use these id's to define a compound rule.

 * Documentation is severely lacking on this end at the moment, but I hope
to have a good explanation up soon.  I think this interface could provide
a powerful flexibility in querying BAM files for very specific cases for
further analyses.

src/toolkit/bamtools_filter.cpp
src/utils/Makefile
src/utils/bamtools_filter_engine.cpp [deleted file]
src/utils/bamtools_filter_engine.h
src/utils/bamtools_filter_properties.h [new file with mode: 0644]
src/utils/bamtools_filter_ruleparser.h [new file with mode: 0644]

index 5c8f3389472b09f07484f8f87a3e78a48a4cde75..cb153cf84fdd4c08c4c480eb3f2c68738883883e 100644 (file)
@@ -3,20 +3,16 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 31 August 2010
+// Last modified: 21 September 2010
 // ---------------------------------------------------------------------------
-// Filters a single BAM file (or filters multiple BAM files and merges) 
-// according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria.
 // ***************************************************************************
 
-// std includes
 #include <cstdio>
 #include <iostream>
 #include <sstream>
 #include <string>
 #include <vector>
-
-// BamTools includes
 #include "bamtools_filter.h"
 #include "bamtools_filter_engine.h"
 #include "bamtools_options.h"
 #include "BamReader.h"
 #include "BamMultiReader.h"
 #include "BamWriter.h"
-
-//JsonCPP includes
 #include "jsoncpp/json.h"
-
 using namespace std;
 using namespace BamTools; 
 using namespace Json;
@@ -63,6 +56,62 @@ const string REFERENCE_PROPERTY           = "reference";
 const string TRUE_STR  = "true";
 const string FALSE_STR = "false";
     
+RefVector filterToolReferences;    
+    
+struct BamAlignmentChecker {
+    bool check(const PropertyFilter& filter, const BamAlignment& al) {
+      
+        bool keepAlignment = true;
+        const PropertyMap& properties = filter.Properties;
+        PropertyMap::const_iterator propertyIter = properties.begin();
+        PropertyMap::const_iterator propertyEnd  = properties.end();
+        for ( ; propertyIter != propertyEnd; ++propertyIter ) {
+          
+            // check alignment data field depending on propertyName
+            const string& propertyName = (*propertyIter).first;
+            const PropertyFilterValue& valueFilter = (*propertyIter).second;
+            
+            if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= valueFilter.check(al.AlignmentFlag);
+            else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
+            else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
+            else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
+            else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
+            else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
+            else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
+            else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
+            else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
+            else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
+            else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
+            else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
+            else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
+            else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
+            else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
+            else if ( propertyName == MATEREFERENCE_PROPERTY ) {
+                if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
+                BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
+                const string& refName = filterToolReferences.at(al.MateRefID).RefName;
+                keepAlignment &= valueFilter.check(refName);
+            }
+            else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
+            else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
+            else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
+            else if ( propertyName == REFERENCE_PROPERTY ) {
+                BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
+                const string& refName = filterToolReferences.at(al.RefID).RefName;
+                keepAlignment &= valueFilter.check(refName);
+            }
+            else BAMTOOLS_ASSERT_UNREACHABLE;
+            
+            // if alignment fails at ANY point, just quit and return false
+            if ( !keepAlignment ) 
+                return false;
+        }
+      
+        BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
+        return keepAlignment;
+    }
+};
+    
 } // namespace BamTools
   
 // ---------------------------------------------
@@ -94,7 +143,7 @@ class FilterTool::FilterToolPrivate {
     private:
         vector<string> m_propertyNames;
         FilterTool::FilterSettings* m_settings;
-        RefVector m_references;
+        FilterEngine<BamAlignmentChecker> m_filterEngine;
 };
   
 // ---------------------------------------------
@@ -288,7 +337,6 @@ FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
 
 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
 
-  
     // dummy temp values for token parsing
     bool boolValue;
     int32_t int32Value;
@@ -323,8 +371,8 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
              propertyName == ISSECONDMATE_PROPERTY
            ) 
         {
-            FilterEngine::parseToken(token, boolValue, type);
-            FilterEngine::setProperty(filterName, propertyName, boolValue, type);
+            m_filterEngine.parseToken(token, boolValue, type);
+            m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
         }
         
         // int32_t conversion
@@ -333,22 +381,22 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
                   propertyName == POSITION_PROPERTY 
                 ) 
         {
-            FilterEngine::parseToken(token, int32Value, type);
-            FilterEngine::setProperty(filterName, propertyName, int32Value, type);
+            m_filterEngine.parseToken(token, int32Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
         }
         
         // uint16_t conversion
         else if ( propertyName == MAPQUALITY_PROPERTY )
         {
-            FilterEngine::parseToken(token, uint16Value, type);
-            FilterEngine::setProperty(filterName, propertyName, uint16Value, type);
+            m_filterEngine.parseToken(token, uint16Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
         }
         
         // uint32_t conversion
         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
         {
-            FilterEngine::parseToken(token, uint32Value, type);
-            FilterEngine::setProperty(filterName, propertyName, uint32Value, type);
+            m_filterEngine.parseToken(token, uint32Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
         }
         
         // string conversion
@@ -358,8 +406,8 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
                   propertyName == REFERENCE_PROPERTY
                 ) 
         {
-            FilterEngine::parseToken(token, stringValue, type);
-            FilterEngine::setProperty(filterName, propertyName, stringValue, type);
+            m_filterEngine.parseToken(token, stringValue, type);
+            m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
         }
       
         // else unknown property 
@@ -372,55 +420,7 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
 }
 
 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
-
-    bool keepAlignment = true;
-  
-    // only consider properties that are actually enabled
-    // iterate over these enabled properties
-    const vector<string> enabledProperties = FilterEngine::enabledPropertyNames();
-    vector<string>::const_iterator propIter = enabledProperties.begin();
-    vector<string>::const_iterator propEnd  = enabledProperties.end();
-    for ( ; propIter != propEnd; ++propIter ) {
-     
-        // check alignment data field depending on propertyName
-        const string& propertyName = (*propIter);
-        if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= FilterEngine::check(ALIGNMENTFLAG_PROPERTY,       al.AlignmentFlag);
-        else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= FilterEngine::check(INSERTSIZE_PROPERTY,          al.InsertSize);
-        else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISDUPLICATE_PROPERTY,         al.IsDuplicate());
-        else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= FilterEngine::check(ISFAILEDQC_PROPERTY,          al.IsFailedQC());
-        else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISFIRSTMATE_PROPERTY,         al.IsFirstMate());
-        else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= FilterEngine::check(ISMAPPED_PROPERTY,            al.IsMapped());
-        else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= FilterEngine::check(ISMATEMAPPED_PROPERTY,        al.IsMateMapped());
-        else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= FilterEngine::check(ISMATEREVERSESTRAND_PROPERTY, al.IsMateReverseStrand());
-        else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= FilterEngine::check(ISPAIRED_PROPERTY,            al.IsPaired());
-        else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= FilterEngine::check(ISPRIMARYALIGNMENT_PROPERTY,  al.IsPrimaryAlignment());
-        else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= FilterEngine::check(ISPROPERPAIR_PROPERTY,        al.IsProperPair());
-        else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= FilterEngine::check(ISREVERSESTRAND_PROPERTY,     al.IsReverseStrand());
-        else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= FilterEngine::check(ISSECONDMATE_PROPERTY,        al.IsSecondMate());
-        else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= FilterEngine::check(MAPQUALITY_PROPERTY,          al.MapQuality);
-        else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && FilterEngine::check(MATEPOSITION_PROPERTY, al.MateRefID) );
-        else if ( propertyName == MATEREFERENCE_PROPERTY ) {
-            if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
-            BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)m_references.size())), "Invalid MateRefID");
-            const string& refName = m_references.at(al.MateRefID).RefName;
-            keepAlignment &= FilterEngine::check(MATEREFERENCE_PROPERTY, refName);
-        }
-        else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= FilterEngine::check(NAME_PROPERTY,                al.Name);
-        else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= FilterEngine::check(POSITION_PROPERTY,            al.Position);
-        else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= FilterEngine::check(QUERYBASES_PROPERTY,          al.QueryBases);
-        else if ( propertyName == REFERENCE_PROPERTY ) {
-            BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)m_references.size())), "Invalid RefID");
-            const string& refName = m_references.at(al.RefID).RefName;
-            keepAlignment &= FilterEngine::check(REFERENCE_PROPERTY, refName);
-        }
-        else BAMTOOLS_ASSERT_MESSAGE( false, "Unknown property");
-        
-        // if alignment fails at ANY point, just quit and return false
-        if ( !keepAlignment ) return false;
-    }
-  
-    // return success (should still be true at this point)
-    return keepAlignment;
+    return m_filterEngine.check(al);
 }
 
 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
@@ -483,18 +483,18 @@ void FilterTool::FilterToolPrivate::InitProperties(void) {
     m_propertyNames.push_back(QUERYBASES_PROPERTY);
     m_propertyNames.push_back(REFERENCE_PROPERTY);
     
-    // add vector contents to FilterEngine
+    // add vector contents to FilterEngine<BamAlignmentChecker>
     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
-        FilterEngine::addProperty((*propertyNameIter));
+        m_filterEngine.addProperty((*propertyNameIter));
 }
 
 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
   
     // add a rule set to filter engine
     const string CMD = "COMMAND_LINE";
-    FilterEngine::addFilter(CMD);
+    m_filterEngine.addFilter(CMD);
 
     // map property names to command line args
     map<string, string> propertyTokens;
@@ -541,7 +541,7 @@ bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName,
     }
   
     // add this filter to engin
-    FilterEngine::addFilter(filterName);
+    m_filterEngine.addFilter(filterName);
   
     // add token list to this filter
     return AddPropertyTokensToFilter(filterName, propertyTokens);
@@ -595,14 +595,15 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) {
             success &= ParseFilterObject(filterName, filter);
         }
         
-        // see if user defined "rule" for these filters
+        // see if user defined a "rule" for these filters
+        // otherwise, use filter engine's default rule behavior
+        string ruleString("");
         const Json::Value rule = root["rule"];
-        if ( !rule.isNull() ) {
-            cout << "found rule: " << rule.asString() << endl;
-        } else {
-            cout << "no rule found!" << endl;
-        }
+        if ( rule.isString() )
+            ruleString = rule.asString();
+        m_filterEngine.setRule(ruleString);
           
+        // return success/fail
         return success;
     } 
     
@@ -629,12 +630,12 @@ bool FilterTool::FilterToolPrivate::Run(void) {
     BamMultiReader reader;
     reader.Open(m_settings->InputFiles, false, true);
     const string headerText = reader.GetHeaderText();
-    m_references = reader.GetReferenceData();
+    filterToolReferences = reader.GetReferenceData();
     
     // open writer
     BamWriter writer;
     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
-    writer.Open(m_settings->OutputFilename, headerText, m_references, writeUncompressed);
+    writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed);
     
     BamAlignment al;
     
@@ -710,7 +711,7 @@ bool FilterTool::FilterToolPrivate::Run(void) {
 
 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
   
-    // add known properties to FilterEngine
+    // add known properties to FilterEngine<BamAlignmentChecker>
     InitProperties();
     
     // parse script for filter rules, if given
index d56bfe01e5d783effe72f332d6cc20c89eef2cdc..6e881220e0c6a373545c6133822ad131a741034b 100644 (file)
@@ -15,7 +15,6 @@ INCLUDES = -I$(API_DIR)/
 # define our source and object files
 # ----------------------------------
 SOURCES = bamtools_fasta.cpp \
-          bamtools_filter_engine.cpp \
           bamtools_options.cpp \
          bamtools_pileup_engine.cpp \
           bamtools_utilities.cpp 
diff --git a/src/utils/bamtools_filter_engine.cpp b/src/utils/bamtools_filter_engine.cpp
deleted file mode 100644 (file)
index c9d0715..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-// ***************************************************************************
-// bamtools_filter_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 18 September 2010
-// ---------------------------------------------------------------------------
-// 
-// ***************************************************************************
-
-#include <iostream>
-#include "bamtools_filter_engine.h"
-using namespace std;
-using namespace BamTools;
-
-// ---------------------------------------------------------
-// FilterValue implementation
-
-// checks a string query against filter (value, compare type)
-bool PropertyFilterValue::check(const string& query) const {
-  
-    // ensure filter value & query are same type
-    if ( !Value.is_type<string>() ) {
-        cerr << "Cannot compare different types!" << endl;
-        return false;
-    }
-  
-    // localize string version of our filter value
-    const string& valueString = Value.get<string>();
-    
-    // string matching based on our filter type
-    switch ( Type ) {
-        case ( PropertyFilterValue::CONTAINS)           : return ( query.find(valueString) != string::npos );
-        case ( PropertyFilterValue::ENDS_WITH)          : return ( query.find(valueString) == (query.length() - valueString.length()) ); 
-        case ( PropertyFilterValue::EXACT)              : return ( query == valueString );
-        case ( PropertyFilterValue::GREATER_THAN)       : return ( query >  valueString ); 
-        case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= valueString ); 
-        case ( PropertyFilterValue::LESS_THAN)          : return ( query <  valueString );
-        case ( PropertyFilterValue::LESS_THAN_EQUAL)    : return ( query <= valueString );
-        case ( PropertyFilterValue::NOT)                : return ( query != valueString );
-        case ( PropertyFilterValue::STARTS_WITH)        : return ( query.find(valueString) == 0 );
-        default : BAMTOOLS_ASSERT_UNREACHABLE;
-    }
-    return false;
-}
-
-// ---------------------------------------------------------
-// FilterEngine implementation
-
-// static FilterEngine data members
-FilterMap FilterEngine::m_filters;
-vector<Property> FilterEngine::m_properties;
-
-// creates a new filter set, returns true if created, false if error or already exists
-bool FilterEngine::addFilter(const string& filterName) {
-    return (m_filters.insert(make_pair(filterName, PropertyFilter()))).second;
-}
-
-// return list of current filter names
-const vector<string> FilterEngine::filterNames(void) {
-    vector<string> names;
-    names.reserve(m_filters.size());
-    FilterMap::const_iterator mapIter = m_filters.begin();
-    FilterMap::const_iterator mapEnd  = m_filters.end();
-    for ( ; mapIter != mapEnd; ++mapIter )
-        names.push_back( (*mapIter).first ); 
-    return names;
-}
-
-// add a new known property (& type) to engine
-bool FilterEngine::addProperty(const string& propertyName) {
-    const vector<string> propertyNames = allPropertyNames();
-    bool found = binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
-    if ( found ) return false;
-    m_properties.push_back( Property(propertyName) );
-    sort( m_properties.begin(), m_properties.end() );
-    return true;
-}
-
-
-// returns list of all properties known by FilterEngine  ( any created using addProperty() )
-const vector<string> FilterEngine::allPropertyNames(void) {
-    vector<string> names;
-    names.reserve(m_properties.size());
-    vector<Property>::const_iterator propIter = m_properties.begin();
-    vector<Property>::const_iterator propEnd  = m_properties.end();
-    for ( ; propIter != propEnd; ++propIter )
-        names.push_back( (*propIter).Name );    
-    return names;
-}
-
-// returns list of property names that are 'enabled' ( only those touched by setProperty() )
-const vector<string> FilterEngine::enabledPropertyNames(void) {
-    vector<string> names;
-    names.reserve(m_properties.size());
-    vector<Property>::const_iterator propIter = m_properties.begin();
-    vector<Property>::const_iterator propEnd  = m_properties.end();
-    for ( ; propIter != propEnd; ++propIter )
-        if ( (*propIter).IsEnabled ) names.push_back( (*propIter).Name );    
-    return names;
-}
index 2297aea32736ac3b80630b26a5af553fe77a9308..924b043119b7cd3b96e6826aa18371ca7cb5a28b 100644 (file)
@@ -3,8 +3,39 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 30 August 2010
+// Last modified: 21 September 2010
 // ---------------------------------------------------------------------------
+// Provides a generic filter engine based on filter-sets of properties,
+// with possible "rules" (compound logical expressions) to create more complex
+// queries on a data set.
+//
+// FilterEngine consists, most importantly, of :
+//
+//     a list of possible properties (each tagged whether it has been 'enabled' as a filter)
+//     a map of filterName => propertySet
+//     queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
+//     
+// Each propertySet is a list of properties enabled for this particular filter object
+//
+//     Implemented as a map of propertyNames to propertyFilterValue
+//     ( "property1" => pfv1 
+//       "property2" => pfv2 
+//       "property4" => pfv4
+//       etc. )  
+//
+//     Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled 
+//     via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true 
+//     for any query.  Meaning that if a property is not set on this filter, we don't care 
+//     about it here, so it passes though OK.
+//
+// A propertyFilterValue contains a value and comparison type
+//
+//    ( pfv1: Value = 50,    Type = GREATER_THAN_EQUAL
+//      pfv2: Value = "foo", Type = STARTS_WITH
+//      pfv4: Value = "bar", Type = CONTAINS
+//      etc. )  
+//
+//    This allows for more complex queries (than simple isEqual?) against a variety of data types.
 // 
 // ***************************************************************************
 
 #define BAMTOOLS_FILTER_ENGINE_H
 
 #include <algorithm>
+#include <iostream>
 #include <map>
+#include <queue>
+#include <sstream>
+#include <stack>
 #include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
+#include "bamtools_filter_properties.h"
+#include "bamtools_filter_ruleparser.h"
 #include "bamtools_utilities.h"
-#include "bamtools_variant.h"
 
 namespace BamTools {
 
-struct PropertyFilterValue {
-  
-    // define valid ValueCompareTypes
-    enum ValueCompareType { CONTAINS = 0
-                          , ENDS_WITH
-                          , EXACT
-                          , GREATER_THAN
-                          , GREATER_THAN_EQUAL
-                          , LESS_THAN
-                          , LESS_THAN_EQUAL
-                          , NOT
-                          , STARTS_WITH
-                          };
-                   
-    // ctor
-    PropertyFilterValue(const Variant& value = Variant(),
-                        const ValueCompareType& type = PropertyFilterValue::EXACT)
-        : Value(value)
-        , Type(type)
-    { }
-          
-    // filter check methods      
-    template<typename T>
-    bool check(const T& query) const;
-    bool check(const std::string& query) const;
-             
-    // data members
-    Variant Value;
-    ValueCompareType Type;
-};
-
-inline
-const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
-  
-    switch ( type ) {
-        case ( PropertyFilterValue::CONTAINS )           : return std::string( "CONTAINS");
-        case ( PropertyFilterValue::ENDS_WITH )          : return std::string( "ENDS_WITH");
-        case ( PropertyFilterValue::EXACT )              : return std::string( "EXACT");
-        case ( PropertyFilterValue::GREATER_THAN )       : return std::string( "GREATER_THAN");
-        case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
-        case ( PropertyFilterValue::LESS_THAN )          : return std::string( "LESS_THAN");
-        case ( PropertyFilterValue::LESS_THAN_EQUAL )    : return std::string( "LESS_THAN_EQUAL");
-        case ( PropertyFilterValue::NOT )                : return std::string( "NOT");
-        case ( PropertyFilterValue::STARTS_WITH )        : return std::string( "STARTS_WITH");
-        default : BAMTOOLS_ASSERT_UNREACHABLE;
-    }
-    return std::string();
-}
-
-// property name => property filter value 
-// ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
-typedef std::map<std::string, PropertyFilterValue> PropertyMap;
-
-struct PropertyFilter {  
-  
-    // will be used more later
-    // if we implement a compound 'rules' system  - i.e. "(filter1 AND filter2) OR filter 3"
-    enum FilterCompareType { AND = 0
-                           , EXACT
-                           , NOT
-                           , OR
-                           };
-  
-    // data members
-    PropertyMap Properties;
-    FilterCompareType Type; 
-
-    // ctor
-    PropertyFilter(void) : Type( PropertyFilter::EXACT ) { }
-    
-    // filter check methods      
-    template<typename T>
-    bool check(const std::string& propertyName, const T& query) const;
+struct FilterCompareType {    
+    enum Type { AND = 0
+              , NOT
+              , OR
+    };
 };
-
-// filter name => properties  
-// ('filter1' => properties1, 'filter2' => properties2, etc...)
-typedef std::map<std::string, PropertyFilter> FilterMap;
   
-// used to store properties known to engine & keep track of enabled state
-struct Property {
-    std::string Name;
-    bool IsEnabled;
-    Property(const std::string& name, bool isEnabled = false) 
-        : Name(name)
-        , IsEnabled(isEnabled) 
-    { }
-};
-
-inline bool operator<  (const Property& lhs, const Property& rhs) { return lhs.Name <  rhs.Name; }
-inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
+// -----------------------------------------------------------
+// FilterEngine
   
+template <typename FilterChecker>
 class FilterEngine {
   
+    // ctor & dtor
+    public:
+        FilterEngine(void) 
+            : m_ruleString("")
+            , m_isRuleQueueGenerated(false)
+            , m_defaultCompareType(FilterCompareType::OR)
+            , AND_OPERATOR("&")
+            , OR_OPERATOR("|")
+            , NOT_OPERATOR("!")
+        { }
+        
+        ~FilterEngine(void) { }
+  
     // 'filter set' methods
     public:
         // creates a new filter set, returns true if created, false if error or already exists
-        static bool addFilter(const std::string& filterName);       
+        bool addFilter(const std::string& filterName);       
         
         // return list of current filter names
-        static const std::vector<std::string> filterNames(void);    
+        const std::vector<std::string> filterNames(void);    
   
     // 'property' methods
     public:
       
         // add a new known property (& type) to engine
-        static bool addProperty(const std::string& propertyName);
+        bool addProperty(const std::string& propertyName);
   
         // sets property filter (value, type) for propertyName, on a particular filter set 
         // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
         template<typename T>
-        static bool setProperty(const std::string& filterName, 
-                                const std::string& propertyName, 
-                                const T& value,
-                                const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
+        bool setProperty(const std::string& filterName, 
+                         const std::string& propertyName, 
+                         const T& value,
+                         const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
         
         // returns list of all properties known by FilterEngine  ( any created using addProperty() )
-        static const std::vector<std::string> allPropertyNames(void);
+        const std::vector<std::string> allPropertyNames(void);
         
         // returns list of property names that are 'enabled' ( only those touched by setProperty() )
-        static const std::vector<std::string> enabledPropertyNames(void);  
+        const std::vector<std::string> enabledPropertyNames(void);  
   
+     // 'rule' methods
+    public:   
+        
+        // sets comparison operator between filters if no rule string given
+        // default is to do an OR on each filter
+        void setDefaultCompareType(const FilterCompareType::Type& type = FilterCompareType::OR);
+
+        // sets rule string for building expression queue
+        // if empty, creates 
+        void setRule(const std::string& ruleString = "");
+        
     // token parsing (for property filter generation)
     public:
         template<typename T>
-        static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
+        bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
         
     // query evaluation
     public:
-        // returns true if query passes all filters on 'propertyName'
+        // returns true if query passes all filters in FilterEngine
         template<typename T>
-        static bool check(const std::string& propertyName, const T& query);
+        bool check(const T& query);
 
+    // internal rule-handling methods
+    private:
+        void buildDefaultRuleString(void);
+        void buildRuleQueue(void);
+        template<typename T>
+        bool evaluateFilterRules(const T& query);
+        
     // data members
     private:
         // all 'filter sets'
-        static FilterMap m_filters;
+        FilterMap m_filters;
         
         // all known properties
-        static std::vector<Property> m_properties;  
+        std::vector<Property> m_properties; 
+        
+        // infix expression of filter-set comparison rules 
+        std::string m_ruleString;
+        
+        // postfix expression of tokens (filterNames) and operators (as strings)
+        // if this is empty, uses m_compareType to build default expression queue
+        std::queue<std::string> m_ruleQueue;
+        
+        // flag to test if the rule expression queue has been generated
+        bool m_isRuleQueueGenerated;
+        
+        // 'default' comparison operator between filters if no rule string given
+        // if this is changed, m_ruleString is used to build new m_ruleQueue
+        FilterCompareType::Type m_defaultCompareType;
+        
+        // client-specified checking type ( provides method: bool check(PropertyFilter, T object) )
+        FilterChecker m_checker;
         
         // token-parsing constants
         static const int NOT_CHAR          = (int)'!';
@@ -169,65 +172,192 @@ class FilterEngine {
         static const int GREATER_THAN_CHAR = (int)'>';
         static const int LESS_THAN_CHAR    = (int)'<';
         static const int WILDCARD_CHAR     = (int)'*';
+        
+        // filter evaluation constants
+        const std::string AND_OPERATOR;
+        const std::string OR_OPERATOR;
+        const std::string NOT_OPERATOR;
 };
 
-// -------------------------------------------------------------------
-// template methods
+// creates a new filter set, returns true if created, false if error or already exists
+template<typename FilterChecker>
+inline bool FilterEngine<FilterChecker>::addFilter(const std::string& filterName) {
+    return (m_filters.insert(std::make_pair(filterName, PropertyFilter()))).second;
+}
+
+// add a new known property & type to engine
+template<typename FilterChecker>
+inline bool FilterEngine<FilterChecker>::addProperty(const std::string& propertyName) {
+    const std::vector<std::string> propertyNames = allPropertyNames();
+    bool found = std::binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
+    if ( found ) return false;
+    m_properties.push_back( Property(propertyName) );
+    std::sort( m_properties.begin(), m_properties.end() );
+    return true;
+}
+
+// returns list of all properties known by FilterEngine 
+// ( any that were created using addProperty() )
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::allPropertyNames(void) {
+    // set up stringlist
+    std::vector<std::string> names;
+    names.reserve(m_properties.size());
+    // iterate through all properties, appending to stringlist
+    std::vector<Property>::const_iterator propIter = m_properties.begin();
+    std::vector<Property>::const_iterator propEnd  = m_properties.end();
+    for ( ; propIter != propEnd; ++propIter )
+        names.push_back( (*propIter).Name );  
+    // return stringlist
+    return names;
+}
 
-// checks a query against a filter (value, compare type)
-template<typename T>
-bool PropertyFilterValue::check(const T& query) const {
+// builds a default rule string based on m_defaultCompareType
+// used if user supplied an explicit rule string
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::buildDefaultRuleString(void) {
   
-    // ensure filter value & query are same type
-    if ( !Value.is_type<T>() ) { 
-        std::cerr << "Cannot compare different types!" << std::endl;
-        return false;
+    // set up temp string stream 
+    std::stringstream ruleStream("");
+  
+    // get first filterName
+    FilterMap::const_iterator mapIter = m_filters.begin();
+    ruleStream << (*mapIter).first;
+    
+    // if there are more filters present
+    // iterate over remaining filters, appending compare operator and filter name
+    if ( m_filters.size() > 1 ) {        
+        for ( ++mapIter ; mapIter != m_filters.end(); ++mapIter )
+            ruleStream << ( (m_defaultCompareType == FilterCompareType::AND) ? " & " : " | " ) 
+                       << (*mapIter).first;
     }
+
+    // set m_ruleString from temp stream
+    m_ruleString = ruleStream.str();
+}
+
+// build expression queue based on ruleString
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::buildRuleQueue(void) {
+  
+    // skip if no filters present
+    if ( m_filters.empty() ) return;
+  
+    // clear out any prior expression queue data
+    while ( !m_ruleQueue.empty() )
+        m_ruleQueue.pop();
+  
+    // create a rule string, if not provided
+    if ( m_ruleString.empty() ) 
+        buildDefaultRuleString();
     
-    // string matching
-    if ( Value.is_type<std::string>() ) {
-        std::cerr << "Cannot compare different types - query is a string!" << std::endl;
-        return false;
-    } 
+    // initialize RuleParser, run, and retrieve results
+    RuleParser ruleParser(m_ruleString);
+    ruleParser.parse();
+    m_ruleQueue = ruleParser.results();
     
-    // numeric matching based on our filter type
-    switch ( Type ) {
-        case ( PropertyFilterValue::EXACT)              : return ( query == Value.get<T>() );
-        case ( PropertyFilterValue::GREATER_THAN)       : return ( query >  Value.get<T>() ); 
-        case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() ); 
-        case ( PropertyFilterValue::LESS_THAN)          : return ( query <  Value.get<T>() );
-        case ( PropertyFilterValue::LESS_THAN_EQUAL)    : return ( query <= Value.get<T>() );
-        case ( PropertyFilterValue::NOT)                : return ( query != Value.get<T>() );
-        default : BAMTOOLS_ASSERT_UNREACHABLE;
-    }
-    return false;
+    // set flag if rule queue contains any values
+    m_isRuleQueueGenerated = (!m_ruleQueue.empty());    
 }
 
-template<typename T>
-bool PropertyFilter::check(const std::string& propertyName, const T& query) const {
+// returns whether query value passes filter engine rules
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::check(const T& query) {
   
-    // if propertyName found for this filter, 
-    PropertyMap::const_iterator propIter = Properties.find(propertyName);
-    if ( propIter != Properties.end() ) {
-      const PropertyFilterValue& filterValue = (*propIter).second;
-      
-      // check 
-      switch ( Type ) {
-          case ( PropertyFilter::EXACT ) : return filterValue.check(query);
-          case ( PropertyFilter::NOT )   : return !filterValue.check(query);
-          case ( PropertyFilter::AND )   :
-          case ( PropertyFilter::OR )    : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a binary compare operator on 1 value");
-          default : BAMTOOLS_ASSERT_UNREACHABLE;
-      }
-      return false; // unreachable
+    // return result of querying against filter rules
+    return evaluateFilterRules(query);
+}
+
+// returns list of property names that are 'enabled' ( only those touched by setProperty() )
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::enabledPropertyNames(void) {
+    // initialize stringlist
+    std::vector<std::string> names;
+    names.reserve(m_properties.size());
+    // iterate over all properties, appending if enabled
+    std::vector<Property>::const_iterator propIter = m_properties.begin();
+    std::vector<Property>::const_iterator propEnd  = m_properties.end();
+    for ( ; propIter != propEnd; ++propIter )
+        if ( (*propIter).IsEnabled ) 
+            names.push_back( (*propIter).Name );    
+    // return stringlist
+    return names;
+}
+
+// evaluates postfix rule queue - with each filter as an operand, AND|OR|NOT as operators
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::evaluateFilterRules(const T& query) {
+  
+    // build ruleQueue if not done before
+    if ( !m_isRuleQueueGenerated ) 
+        buildRuleQueue();
+    
+    std::stack<bool> resultStack;
+    FilterMap::const_iterator filterIter;
+    FilterMap::const_iterator filterEnd  = m_filters.end();
+    std::queue<std::string> ruleQueueCopy = m_ruleQueue;
+    while ( !ruleQueueCopy.empty() ) {
+        const std::string& token = ruleQueueCopy.front();
+        
+        // token is NOT_OPERATOR
+        if ( token == FilterEngine<FilterChecker>::NOT_OPERATOR ) {
+            BAMTOOLS_ASSERT_MESSAGE( !resultStack.empty(), "Empty result stack - cannot apply operator: !" );
+            resultStack.top() = !resultStack.top();
+        }
+        
+        // token is AND_OPERATOR
+        else if ( token == FilterEngine<FilterChecker>::AND_OPERATOR ) {
+            BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: &" );
+            bool topResult = resultStack.top();
+            resultStack.pop();
+            resultStack.top() &= topResult;
+        }
+        
+        // token is OR_OPERATOR
+        else if ( token == FilterEngine<FilterChecker>::OR_OPERATOR ) {
+            BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: |" );
+            bool topResult = resultStack.top();
+            resultStack.pop();
+            resultStack.top() |= topResult;
+        }
+        
+        // token is an operand 
+        else {
+            // look up PropertyFilter that matches this token 
+            filterIter = m_filters.find(token);
+            BAMTOOLS_ASSERT_MESSAGE( (filterIter != filterEnd), "Filter mentioned in rule, not found in FilterEngine" );
+            const PropertyFilter& filter = (*filterIter).second;
+            bool result = m_checker.check(filter, query);
+            resultStack.push( result );
+        }
+        
+        // pop token from ruleQueue
+        ruleQueueCopy.pop();
     }
+    
+    // return last result
+    BAMTOOLS_ASSERT_MESSAGE( resultStack.size() == 1, "Result stack should only have one value remaining - cannot return result" );
+    return resultStack.top();
+}
 
-    // property unknown to this filter
-    else return true;
+// return list of current filter names
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::filterNames(void) {
+    // initialize stringlist
+    std::vector<std::string> names;
+    names.reserve(m_filters.size());
+    // iterate over all filters, appending filter name
+    FilterMap::const_iterator mapIter = m_filters.begin();
+    FilterMap::const_iterator mapEnd  = m_filters.end();
+    for ( ; mapIter != mapEnd; ++mapIter )
+        names.push_back( (*mapIter).first ); 
+    // return stringlist
+    return names;
 }
 
-template<typename T>
-bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
+// parse a filterValue token string that may contain comparison qualifiers (">50", "*SRR", etc.)
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
     
     // skip if token is empty
     if ( token.empty() ) return false;
@@ -244,20 +374,19 @@ bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilter
     // more than one character, check for special chars
     else {
         const int firstChar = (int)token.at(0);
-        
-        switch ( (int)firstChar ) {
+        switch ( firstChar ) {
           
-            case ( (int)FilterEngine::NOT_CHAR ) :
+            case ( FilterEngine<FilterChecker>::NOT_CHAR ) :
               
                 strippedToken = token.substr(1);       
                 type = PropertyFilterValue::NOT;
                 
                 break;
                 
-            case ( (int)FilterEngine::GREATER_THAN_CHAR ) :
+            case ( FilterEngine<FilterChecker>::GREATER_THAN_CHAR ) :
                 
                 // check for '>=' case
-                if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
+                if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
                     if ( token.length() == 2 ) return false;
                     strippedToken = token.substr(2);
                     type = PropertyFilterValue::GREATER_THAN_EQUAL;
@@ -271,10 +400,10 @@ bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilter
                 
                 break;
                 
-            case ( (int)FilterEngine::LESS_THAN_CHAR ) : 
+            case ( FilterEngine<FilterChecker>::LESS_THAN_CHAR ) : 
          
                 // check for '<=' case
-                if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
+                if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
                     if ( token.length() == 2 ) return false;
                     strippedToken = token.substr(2);
                     type = PropertyFilterValue::LESS_THAN_EQUAL;
@@ -288,10 +417,10 @@ bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilter
                 
                 break;
                 
-            case ( (int)FilterEngine::WILDCARD_CHAR ) : 
+            case ( FilterEngine<FilterChecker>::WILDCARD_CHAR ) : 
               
                 // check for *str* case (CONTAINS)
-                if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
+                if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
                     if ( token.length() == 2 ) return false;
                     strippedToken = token.substr(1, token.length() - 2);
                     type = PropertyFilterValue::CONTAINS;
@@ -305,11 +434,10 @@ bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilter
                 
                 break;
                
-                
             default :
               
                 // check for str* case (STARTS_WITH)
-                if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
+                if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
                     if ( token.length() == 2 ) return false;
                     strippedToken = token.substr(0, token.length() - 1);
                     type = PropertyFilterValue::STARTS_WITH;
@@ -348,13 +476,27 @@ bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilter
     return true;
 }
 
+// sets comparison operator between filters if no rule string given
+// default is to do an OR on each filter
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::setDefaultCompareType(const FilterCompareType::Type& type) {
+    // check for supported compare type
+    if ( type == FilterCompareType::AND || type == FilterCompareType::OR ) {
+        // if not the current compare type
+        if ( m_defaultCompareType != type ) {
+            m_defaultCompareType = type;
+            buildRuleQueue();
+        }
+    }
+}
+
 // sets property filter (value, type) for propertyName, on a particular filter set 
 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
-template<typename T>
-bool FilterEngine::setProperty(const std::string& filterName, 
-                               const std::string& propertyName, 
-                               const T& value,
-                               const PropertyFilterValue::ValueCompareType& type)
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::setProperty(const std::string& filterName, 
+                                              const std::string& propertyName, 
+                                              const T& value,
+                                              const PropertyFilterValue::ValueCompareType& type)
 {
     // lookup filter by name, return false if not found
     FilterMap::iterator filterIter = m_filters.find(filterName);
@@ -394,37 +536,19 @@ bool FilterEngine::setProperty(const std::string& filterName,
     } 
     
     // property already known, set as enabled
-    else 
-        (*knownPropertyIter).IsEnabled = true;
+    else (*knownPropertyIter).IsEnabled = true;
 
     // return success
     return true;
 }
 
-// returns false if query does not pass any filters on 'propertyName' 
-// returns true if property unknown (i.e. nothing has been set for this property... so query is considered to pass filter)
-template<typename T>
-bool FilterEngine::check(const std::string& propertyName, const T& query) {
-  
-    // check enabled properties list
-    // return true if no properties enabled at all OR if property is unknown to FilterEngine
-    const std::vector<std::string> enabledProperties = enabledPropertyNames();
-    if ( enabledProperties.empty() ) return true;
-    const bool found = std::binary_search( enabledProperties.begin(), enabledProperties.end(), propertyName );
-    if ( !found ) return true;
-    
-    // iterate over all filters in FilterEngine
-    FilterMap::const_iterator filterIter = m_filters.begin();
-    FilterMap::const_iterator filterEnd  = m_filters.end();
-    for ( ; filterIter != filterEnd; ++filterIter ) {
-      
-        // check query against this filter
-        const PropertyFilter& filter = (*filterIter).second;
-        if ( filter.check(propertyName, query) ) return true;
+// sets user-specified rule string & signals update of rule-expression queue
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::setRule(const std::string& ruleString) {
+    if ( m_ruleString != ruleString) {
+        m_ruleString = ruleString;
+        buildRuleQueue();
     }
-    // query passes none of the filters with current property enabled
-    return false;
 }
 
 } // namespace BamTools
diff --git a/src/utils/bamtools_filter_properties.h b/src/utils/bamtools_filter_properties.h
new file mode 100644 (file)
index 0000000..119da06
--- /dev/null
@@ -0,0 +1,195 @@
+// ***************************************************************************
+// bamtools_filter_properties.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 30 August 2010
+// ---------------------------------------------------------------------------
+// Provides support data structures & methods for FilterEngine
+//
+// The FilterEngine consists, most importantly, of :
+//
+//     a list of possible properties (each tagged whether it has been 'enabled' as a filter)
+//     a map of filterName => propertySet
+//     queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
+//     
+// Each propertySet is a list of properties enabled for this particular filter object
+//
+//     Implemented as a map of propertyNames to propertyFilterValue
+//     ( "property1" => pfv1 
+//       "property2" => pfv2 
+//       "property4" => pfv4
+//       etc. )  
+//
+//     Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled 
+//     via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true 
+//     for any query.  Meaning that if a property is not set on this filter, we don't care 
+//     about it here, so it passes though OK.
+//
+// A propertyFilterValue contains a value and comparison type
+//
+//    ( pfv1: Value = 50,    Type = GREATER_THAN_EQUAL
+//      pfv2: Value = "foo", Type = STARTS_WITH
+//      pfv4: Value = "bar", Type = CONTAINS
+//      etc. )  
+//
+//    This allows for more complex queries (than simple isEqual?) against a variety of data types.
+//
+// ***************************************************************************
+
+#ifndef BAMTOOLS_FILTER_PROPERTIES_H
+#define BAMTOOLS_FILTER_PROPERTIES_H
+
+#include <iostream>
+#include <map>
+#include <string>
+#include "bamtools_utilities.h"
+#include "bamtools_variant.h"
+
+namespace BamTools {
+
+// ----------------------------------------------------------
+// PropertyFilterValue
+  
+struct PropertyFilterValue {
+  
+    // define valid ValueCompareTypes
+    enum ValueCompareType { CONTAINS = 0
+                          , ENDS_WITH
+                          , EXACT
+                          , GREATER_THAN
+                          , GREATER_THAN_EQUAL
+                          , LESS_THAN
+                          , LESS_THAN_EQUAL
+                          , NOT
+                          , STARTS_WITH
+                          };
+                   
+    // ctor
+    PropertyFilterValue(const Variant& value = Variant(),
+                        const ValueCompareType& type = PropertyFilterValue::EXACT)
+        : Value(value)
+        , Type(type)
+    { }
+          
+    // filter check methods      
+    template<typename T>
+    bool check(const T& query) const;
+    bool check(const std::string& query) const;
+             
+    // data members
+    Variant Value;
+    ValueCompareType Type;
+};
+
+// checks a query against a filter (value, compare type)
+template<typename T>
+bool PropertyFilterValue::check(const T& query) const {
+  
+    // ensure filter value & query are same type
+    if ( !Value.is_type<T>() ) { 
+        std::cerr << "Cannot compare different types!" << std::endl;
+        return false;
+    }
+    
+    // string matching
+    if ( Value.is_type<std::string>() ) {
+        std::cerr << "Cannot compare different types - query is a string!" << std::endl;
+        return false;
+    } 
+    
+    // numeric matching based on our filter type
+    switch ( Type ) {
+        case ( PropertyFilterValue::EXACT)              : return ( query == Value.get<T>() );
+        case ( PropertyFilterValue::GREATER_THAN)       : return ( query >  Value.get<T>() ); 
+        case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() ); 
+        case ( PropertyFilterValue::LESS_THAN)          : return ( query <  Value.get<T>() );
+        case ( PropertyFilterValue::LESS_THAN_EQUAL)    : return ( query <= Value.get<T>() );
+        case ( PropertyFilterValue::NOT)                : return ( query != Value.get<T>() );
+        default : BAMTOOLS_ASSERT_UNREACHABLE;
+    }
+    return false;
+}
+
+// checks a string query against filter (value, compare type)
+inline
+bool PropertyFilterValue::check(const std::string& query) const {
+  
+    // ensure filter value & query are same type
+    if ( !Value.is_type<std::string>() ) {
+        std::cerr << "Cannot compare different types!" << std::endl;
+        return false;
+    }
+  
+    // localize string version of our filter value
+    const std::string& valueString = Value.get<std::string>();
+    
+    // string matching based on our filter type
+    switch ( Type ) {
+        case ( PropertyFilterValue::CONTAINS)           : return ( query.find(valueString) != std::string::npos );
+        case ( PropertyFilterValue::ENDS_WITH)          : return ( query.find(valueString) == (query.length() - valueString.length()) ); 
+        case ( PropertyFilterValue::EXACT)              : return ( query == valueString );
+        case ( PropertyFilterValue::GREATER_THAN)       : return ( query >  valueString ); 
+        case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= valueString ); 
+        case ( PropertyFilterValue::LESS_THAN)          : return ( query <  valueString );
+        case ( PropertyFilterValue::LESS_THAN_EQUAL)    : return ( query <= valueString );
+        case ( PropertyFilterValue::NOT)                : return ( query != valueString );
+        case ( PropertyFilterValue::STARTS_WITH)        : return ( query.find(valueString) == 0 );
+        default : BAMTOOLS_ASSERT_UNREACHABLE;
+    }
+    return false;
+}
+
+inline
+const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
+  
+    switch ( type ) {
+        case ( PropertyFilterValue::CONTAINS )           : return std::string( "CONTAINS");
+        case ( PropertyFilterValue::ENDS_WITH )          : return std::string( "ENDS_WITH");
+        case ( PropertyFilterValue::EXACT )              : return std::string( "EXACT");
+        case ( PropertyFilterValue::GREATER_THAN )       : return std::string( "GREATER_THAN");
+        case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
+        case ( PropertyFilterValue::LESS_THAN )          : return std::string( "LESS_THAN");
+        case ( PropertyFilterValue::LESS_THAN_EQUAL )    : return std::string( "LESS_THAN_EQUAL");
+        case ( PropertyFilterValue::NOT )                : return std::string( "NOT");
+        case ( PropertyFilterValue::STARTS_WITH )        : return std::string( "STARTS_WITH");
+        default : BAMTOOLS_ASSERT_UNREACHABLE;
+    }
+    return std::string();
+}
+
+// property name => property filter value 
+// ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
+typedef std::map<std::string, PropertyFilterValue> PropertyMap;
+
+// ----------------------------------------------------------
+// PropertyFilter
+
+struct PropertyFilter {  
+    // data members
+    PropertyMap Properties;
+};
+
+// filter name => properties  
+// ('filter1' => properties1, 'filter2' => properties2, etc...)
+typedef std::map<std::string, PropertyFilter> FilterMap;
+  
+// ----------------------------------------------------------
+// Property
+  
+// used to store properties known to engine & keep track of enabled state
+struct Property {
+    std::string Name;
+    bool IsEnabled;
+    Property(const std::string& name, bool isEnabled = false) 
+        : Name(name)
+        , IsEnabled(isEnabled) 
+    { }
+};
+
+inline bool operator<  (const Property& lhs, const Property& rhs) { return lhs.Name <  rhs.Name; }
+inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_FILTER_PROPERTIES_H
diff --git a/src/utils/bamtools_filter_ruleparser.h b/src/utils/bamtools_filter_ruleparser.h
new file mode 100644 (file)
index 0000000..7e3b1fd
--- /dev/null
@@ -0,0 +1,318 @@
+// ***************************************************************************
+// bamtools_filter_ruleparser.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 21 September 2010
+// ---------------------------------------------------------------------------
+// Provides a compound rule parser for FilterEngine.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_FILTER_RULEPARSER_H
+#define BAMTOOLS_FILTER_RULEPARSER_H
+
+#include <queue>
+#include <stack>
+#include <string>
+#include "bamtools_utilities.h"
+
+namespace BamTools {
+
+// -------------------------------------------
+// char constants  
+  
+const char LEFT_PARENTHESIS_CHAR  = '(';
+const char RIGHT_PARENTHESIS_CHAR = ')';
+const char AND_OPERATOR_CHAR      = '&';
+const char OR_OPERATOR_CHAR       = '|';
+const char NOT_OPERATOR_CHAR      = '!';
+const char SPACE_CHAR             = ' ';
+  
+// -------------------------------------------
+// RuleToken implementation
+  
+struct RuleToken {
+  
+    // enums
+    enum RuleTokenType { OPERAND = 0
+                       , AND_OPERATOR
+                       , OR_OPERATOR
+                       , NOT_OPERATOR
+                       , LEFT_PARENTHESIS
+                       , RIGHT_PARENTHESIS
+                       };
+    
+    // data members
+    RuleTokenType Type;
+    std::string Value;
+};
+
+inline int priority(const RuleToken& token) {
+    switch ( token.Type ) {
+        case ( RuleToken::NOT_OPERATOR )      : return 3;
+        case ( RuleToken::AND_OPERATOR )      : return 2;
+        case ( RuleToken::OR_OPERATOR  )      : return 1;
+        case ( RuleToken::LEFT_PARENTHESIS )  : return 0;
+        case ( RuleToken::RIGHT_PARENTHESIS ) : return 0;
+        default: BAMTOOLS_ASSERT_UNREACHABLE;
+    } 
+}
+
+inline bool isRightAssociative(const RuleToken& token) {
+    return (token.Type == RuleToken::NOT_OPERATOR || 
+            token.Type == RuleToken::LEFT_PARENTHESIS);
+}
+
+inline bool isLeftAssociative(const RuleToken& token) {
+    return !isRightAssociative(token);
+}
+
+inline bool isLeftParenthesis(const RuleToken& token) {
+    return ( token.Type == RuleToken::LEFT_PARENTHESIS );
+}
+
+inline bool isRightParenthesis(const RuleToken& token) {
+    return ( token.Type == RuleToken::RIGHT_PARENTHESIS );
+}
+
+inline bool isOperand(const RuleToken& token) {
+    return ( token.Type == RuleToken::OPERAND );
+}
+
+inline bool isOperator(const RuleToken& token) {
+    return ( token.Type == RuleToken::AND_OPERATOR ||
+             token.Type == RuleToken::OR_OPERATOR  ||
+             token.Type == RuleToken::NOT_OPERATOR);
+}
+  
+// -------------------------------------------
+// RuleParser implementation  
+  
+class RuleParser {
+
+    // ctor & dtor
+    public:
+        RuleParser(const std::string& ruleString)
+            : m_ruleString(ruleString)
+        { 
+            // initialize char markers
+            m_begin = (char*)m_ruleString.c_str();
+            m_end   = m_begin + m_ruleString.length();
+            ignoreQuotes();
+        }
+        
+        ~RuleParser(void) { }
+  
+    // public interface
+    public:
+        void parse(void);
+        std::queue<std::string> results(void) const { return m_ruleQueue; }
+
+    // internal methods
+    private:
+        char getNextChar(void);
+        void ignoreQuotes(void);
+        bool readToken(RuleToken& token);
+        void skipSpaces(void);
+      
+    // data members
+    private:
+        std::string m_ruleString;
+        char* m_begin;
+        char* m_current;
+        char* m_end;
+        
+        std::queue<std::string> m_ruleQueue;
+        std::stack<RuleToken> m_operatorStack;
+};
+
+inline
+char RuleParser::getNextChar(void) {
+   if ( m_current == m_end ) return 0;
+   return *m_current++;
+}
+
+inline
+void RuleParser::ignoreQuotes(void) {
+    if ( *m_begin == '\"' ) ++m_begin;
+    if ( *m_end   == '\"' ) --m_end;
+}
+
+inline
+void RuleParser::parse(void) {
+  
+    // clear out any prior data
+    while ( !m_ruleQueue.empty() ) 
+        m_ruleQueue.pop();
+    
+    // skip if no rule to parse
+    if ( m_ruleString.empty() ) return;
+  
+    // start at beginning of ruleString
+    m_current = m_begin;
+    
+    // iterate through tokens in rule string
+    RuleToken token;
+    while ( readToken(token) ) {
+      
+        if ( token.Value.empty() ) break;
+      
+        // if token is an operand
+        if ( isOperand(token) )
+            m_ruleQueue.push(token.Value);
+
+        // if token is an operator 
+        else if ( isOperator(token) ) {
+
+            // pop any operators at top of stack with higher priority
+            while ( !m_operatorStack.empty() ) {
+                const RuleToken& opToken = m_operatorStack.top();
+                if ( (isLeftAssociative(token) && (priority(token) <= priority(opToken))) ||
+                     (isRightAssociative(token) && (priority(token) < priority(opToken))) 
+                    )
+                {
+                    m_ruleQueue.push(opToken.Value);
+                    m_operatorStack.pop();
+                }
+                else break;
+            }
+            
+            // push current operator token onto stack
+            m_operatorStack.push(token);
+        }
+        
+        // if token is left parenthesis
+        else if ( isLeftParenthesis(token) )
+            m_operatorStack.push(token);
+        
+        // if token is right parenthesis
+        else if ( isRightParenthesis(token) ) {
+          
+            bool foundLeftParenthesis = false;
+          
+            // push operators into rule queue until left parenthesis found
+            while ( !m_operatorStack.empty() && !foundLeftParenthesis ) {
+                const RuleToken& opToken = m_operatorStack.top();
+                if ( !isLeftParenthesis(opToken) )
+                    m_ruleQueue.push(opToken.Value);
+                else 
+                    foundLeftParenthesis = true;
+                m_operatorStack.pop();
+            }
+          
+            // no left parenthesis found, error
+            BAMTOOLS_ASSERT_MESSAGE( foundLeftParenthesis, "ERROR: Mismatched parenthesis in rule string.1");
+        }
+        
+        // error: unknown operand
+        else BAMTOOLS_ASSERT_UNREACHABLE;
+    }    
+    
+    // while there are still operators on stack
+    while ( !m_operatorStack.empty() ) {
+        const RuleToken& token = m_operatorStack.top();
+        BAMTOOLS_ASSERT_MESSAGE( (!isLeftParenthesis(token) && !isRightParenthesis(token)), "ERROR: Mismatched parenthesis in rule string.2");
+        m_ruleQueue.push(token.Value);
+        m_operatorStack.pop();
+    }
+}
+
+inline
+bool RuleParser::readToken(RuleToken& token) {
+  
+    // skip any preceding whitespace
+    skipSpaces();
+    if ( m_current == m_end ) return false;
+
+    // clear out prior token value
+    token.Value.clear();
+    
+    // read chars while still in token
+    char c = 1;
+    bool keepReading = true;
+    bool inOperandString = false;
+    while ( keepReading && (c != 0) ) {
+      
+      // get next char
+      c = getNextChar();
+      switch (c) {
+        
+          // current char is '('
+          case ( LEFT_PARENTHESIS_CHAR ) :
+              token.Type = RuleToken::LEFT_PARENTHESIS;
+              token.Value.append(1, LEFT_PARENTHESIS_CHAR);
+              keepReading = false;
+              break;
+              
+          // current char is ')'
+          case ( RIGHT_PARENTHESIS_CHAR ) :
+              if ( inOperandString )
+                  --m_current;
+              else {
+                  token.Type = RuleToken::RIGHT_PARENTHESIS;
+                  token.Value.append(1, RIGHT_PARENTHESIS_CHAR);
+              }
+              keepReading = false;
+              break;
+        
+          // current char is '&'
+          case ( AND_OPERATOR_CHAR ) :
+              if ( inOperandString ) 
+                  --m_current;
+              else {
+                  token.Type = RuleToken::AND_OPERATOR;
+                  token.Value.append(1, AND_OPERATOR_CHAR);
+              }
+              keepReading = false;
+              break;
+              
+          // current char is '|' 
+          case ( OR_OPERATOR_CHAR ) :
+              if ( inOperandString )
+                  --m_current;
+              else {  
+                  token.Type = RuleToken::OR_OPERATOR;
+                  token.Value.append(1, OR_OPERATOR_CHAR);
+              }
+              keepReading = false;
+              break;
+              
+          // current char is '!'
+          case ( NOT_OPERATOR_CHAR ) :
+              token.Type = RuleToken::NOT_OPERATOR;
+              token.Value.append(1, NOT_OPERATOR_CHAR);
+              keepReading = false;
+              break;
+              
+          // current char is ' '
+          case ( SPACE_CHAR ) : 
+              keepReading = false;
+              break;
+            
+          // current char is a true value token
+          default:
+              if ( c != 0 ) {
+                  token.Type = RuleToken::OPERAND;
+                  token.Value.append(1, c);
+                  inOperandString = true;
+                  keepReading = true;
+              }
+        }         
+    }
+      
+    return true;
+}
+
+inline
+void RuleParser::skipSpaces(void) {
+    while ( m_current != m_end ) {
+        const char c = *m_current;
+        if ( c == ' ' || c == '\t' || c == '\r' || c == '\n')
+            ++m_current;
+        else break;
+    }
+}
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_FILTER_RULEPARSER_H
\ No newline at end of file