// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 31 August 2010
+// Last modified: 21 September 2010
// ---------------------------------------------------------------------------
-// Filters a single BAM file (or filters multiple BAM files and merges)
-// according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria.
// ***************************************************************************
-// std includes
#include <cstdio>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
-
-// BamTools includes
#include "bamtools_filter.h"
#include "bamtools_filter_engine.h"
#include "bamtools_options.h"
#include "BamReader.h"
#include "BamMultiReader.h"
#include "BamWriter.h"
-
-//JsonCPP includes
#include "jsoncpp/json.h"
-
using namespace std;
using namespace BamTools;
using namespace Json;
const string TRUE_STR = "true";
const string FALSE_STR = "false";
+RefVector filterToolReferences;
+
+struct BamAlignmentChecker {
+ bool check(const PropertyFilter& filter, const BamAlignment& al) {
+
+ bool keepAlignment = true;
+ const PropertyMap& properties = filter.Properties;
+ PropertyMap::const_iterator propertyIter = properties.begin();
+ PropertyMap::const_iterator propertyEnd = properties.end();
+ for ( ; propertyIter != propertyEnd; ++propertyIter ) {
+
+ // check alignment data field depending on propertyName
+ const string& propertyName = (*propertyIter).first;
+ const PropertyFilterValue& valueFilter = (*propertyIter).second;
+
+ if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
+ else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
+ else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
+ else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
+ else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
+ else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
+ else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
+ else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
+ else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
+ else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
+ else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
+ else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
+ else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
+ else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
+ else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
+ else if ( propertyName == MATEREFERENCE_PROPERTY ) {
+ if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
+ BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
+ const string& refName = filterToolReferences.at(al.MateRefID).RefName;
+ keepAlignment &= valueFilter.check(refName);
+ }
+ else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
+ else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
+ else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
+ else if ( propertyName == REFERENCE_PROPERTY ) {
+ BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
+ const string& refName = filterToolReferences.at(al.RefID).RefName;
+ keepAlignment &= valueFilter.check(refName);
+ }
+ else BAMTOOLS_ASSERT_UNREACHABLE;
+
+ // if alignment fails at ANY point, just quit and return false
+ if ( !keepAlignment )
+ return false;
+ }
+
+ BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
+ return keepAlignment;
+ }
+};
+
} // namespace BamTools
// ---------------------------------------------
private:
vector<string> m_propertyNames;
FilterTool::FilterSettings* m_settings;
- RefVector m_references;
+ FilterEngine<BamAlignmentChecker> m_filterEngine;
};
// ---------------------------------------------
bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
-
// dummy temp values for token parsing
bool boolValue;
int32_t int32Value;
propertyName == ISSECONDMATE_PROPERTY
)
{
- FilterEngine::parseToken(token, boolValue, type);
- FilterEngine::setProperty(filterName, propertyName, boolValue, type);
+ m_filterEngine.parseToken(token, boolValue, type);
+ m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
}
// int32_t conversion
propertyName == POSITION_PROPERTY
)
{
- FilterEngine::parseToken(token, int32Value, type);
- FilterEngine::setProperty(filterName, propertyName, int32Value, type);
+ m_filterEngine.parseToken(token, int32Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
}
// uint16_t conversion
else if ( propertyName == MAPQUALITY_PROPERTY )
{
- FilterEngine::parseToken(token, uint16Value, type);
- FilterEngine::setProperty(filterName, propertyName, uint16Value, type);
+ m_filterEngine.parseToken(token, uint16Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
}
// uint32_t conversion
else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
{
- FilterEngine::parseToken(token, uint32Value, type);
- FilterEngine::setProperty(filterName, propertyName, uint32Value, type);
+ m_filterEngine.parseToken(token, uint32Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
}
// string conversion
propertyName == REFERENCE_PROPERTY
)
{
- FilterEngine::parseToken(token, stringValue, type);
- FilterEngine::setProperty(filterName, propertyName, stringValue, type);
+ m_filterEngine.parseToken(token, stringValue, type);
+ m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
}
// else unknown property
}
bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
-
- bool keepAlignment = true;
-
- // only consider properties that are actually enabled
- // iterate over these enabled properties
- const vector<string> enabledProperties = FilterEngine::enabledPropertyNames();
- vector<string>::const_iterator propIter = enabledProperties.begin();
- vector<string>::const_iterator propEnd = enabledProperties.end();
- for ( ; propIter != propEnd; ++propIter ) {
-
- // check alignment data field depending on propertyName
- const string& propertyName = (*propIter);
- if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= FilterEngine::check(ALIGNMENTFLAG_PROPERTY, al.AlignmentFlag);
- else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= FilterEngine::check(INSERTSIZE_PROPERTY, al.InsertSize);
- else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISDUPLICATE_PROPERTY, al.IsDuplicate());
- else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= FilterEngine::check(ISFAILEDQC_PROPERTY, al.IsFailedQC());
- else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISFIRSTMATE_PROPERTY, al.IsFirstMate());
- else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= FilterEngine::check(ISMAPPED_PROPERTY, al.IsMapped());
- else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= FilterEngine::check(ISMATEMAPPED_PROPERTY, al.IsMateMapped());
- else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= FilterEngine::check(ISMATEREVERSESTRAND_PROPERTY, al.IsMateReverseStrand());
- else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= FilterEngine::check(ISPAIRED_PROPERTY, al.IsPaired());
- else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= FilterEngine::check(ISPRIMARYALIGNMENT_PROPERTY, al.IsPrimaryAlignment());
- else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= FilterEngine::check(ISPROPERPAIR_PROPERTY, al.IsProperPair());
- else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= FilterEngine::check(ISREVERSESTRAND_PROPERTY, al.IsReverseStrand());
- else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISSECONDMATE_PROPERTY, al.IsSecondMate());
- else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= FilterEngine::check(MAPQUALITY_PROPERTY, al.MapQuality);
- else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && FilterEngine::check(MATEPOSITION_PROPERTY, al.MateRefID) );
- else if ( propertyName == MATEREFERENCE_PROPERTY ) {
- if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
- BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)m_references.size())), "Invalid MateRefID");
- const string& refName = m_references.at(al.MateRefID).RefName;
- keepAlignment &= FilterEngine::check(MATEREFERENCE_PROPERTY, refName);
- }
- else if ( propertyName == NAME_PROPERTY ) keepAlignment &= FilterEngine::check(NAME_PROPERTY, al.Name);
- else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= FilterEngine::check(POSITION_PROPERTY, al.Position);
- else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= FilterEngine::check(QUERYBASES_PROPERTY, al.QueryBases);
- else if ( propertyName == REFERENCE_PROPERTY ) {
- BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)m_references.size())), "Invalid RefID");
- const string& refName = m_references.at(al.RefID).RefName;
- keepAlignment &= FilterEngine::check(REFERENCE_PROPERTY, refName);
- }
- else BAMTOOLS_ASSERT_MESSAGE( false, "Unknown property");
-
- // if alignment fails at ANY point, just quit and return false
- if ( !keepAlignment ) return false;
- }
-
- // return success (should still be true at this point)
- return keepAlignment;
+ return m_filterEngine.check(al);
}
const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
m_propertyNames.push_back(QUERYBASES_PROPERTY);
m_propertyNames.push_back(REFERENCE_PROPERTY);
- // add vector contents to FilterEngine
+ // add vector contents to FilterEngine<BamAlignmentChecker>
vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
- FilterEngine::addProperty((*propertyNameIter));
+ m_filterEngine.addProperty((*propertyNameIter));
}
bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
// add a rule set to filter engine
const string CMD = "COMMAND_LINE";
- FilterEngine::addFilter(CMD);
+ m_filterEngine.addFilter(CMD);
// map property names to command line args
map<string, string> propertyTokens;
}
// add this filter to engin
- FilterEngine::addFilter(filterName);
+ m_filterEngine.addFilter(filterName);
// add token list to this filter
return AddPropertyTokensToFilter(filterName, propertyTokens);
success &= ParseFilterObject(filterName, filter);
}
- // see if user defined "rule" for these filters
+ // see if user defined a "rule" for these filters
+ // otherwise, use filter engine's default rule behavior
+ string ruleString("");
const Json::Value rule = root["rule"];
- if ( !rule.isNull() ) {
- cout << "found rule: " << rule.asString() << endl;
- } else {
- cout << "no rule found!" << endl;
- }
+ if ( rule.isString() )
+ ruleString = rule.asString();
+ m_filterEngine.setRule(ruleString);
+ // return success/fail
return success;
}
BamMultiReader reader;
reader.Open(m_settings->InputFiles, false, true);
const string headerText = reader.GetHeaderText();
- m_references = reader.GetReferenceData();
+ filterToolReferences = reader.GetReferenceData();
// open writer
BamWriter writer;
bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
- writer.Open(m_settings->OutputFilename, headerText, m_references, writeUncompressed);
+ writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed);
BamAlignment al;
bool FilterTool::FilterToolPrivate::SetupFilters(void) {
- // add known properties to FilterEngine
+ // add known properties to FilterEngine<BamAlignmentChecker>
InitProperties();
// parse script for filter rules, if given
# define our source and object files
# ----------------------------------
SOURCES = bamtools_fasta.cpp \
- bamtools_filter_engine.cpp \
bamtools_options.cpp \
bamtools_pileup_engine.cpp \
bamtools_utilities.cpp
+++ /dev/null
-// ***************************************************************************
-// bamtools_filter_engine.cpp (c) 2010 Derek Barnett, Erik Garrison
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 18 September 2010
-// ---------------------------------------------------------------------------
-//
-// ***************************************************************************
-
-#include <iostream>
-#include "bamtools_filter_engine.h"
-using namespace std;
-using namespace BamTools;
-
-// ---------------------------------------------------------
-// FilterValue implementation
-
-// checks a string query against filter (value, compare type)
-bool PropertyFilterValue::check(const string& query) const {
-
- // ensure filter value & query are same type
- if ( !Value.is_type<string>() ) {
- cerr << "Cannot compare different types!" << endl;
- return false;
- }
-
- // localize string version of our filter value
- const string& valueString = Value.get<string>();
-
- // string matching based on our filter type
- switch ( Type ) {
- case ( PropertyFilterValue::CONTAINS) : return ( query.find(valueString) != string::npos );
- case ( PropertyFilterValue::ENDS_WITH) : return ( query.find(valueString) == (query.length() - valueString.length()) );
- case ( PropertyFilterValue::EXACT) : return ( query == valueString );
- case ( PropertyFilterValue::GREATER_THAN) : return ( query > valueString );
- case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= valueString );
- case ( PropertyFilterValue::LESS_THAN) : return ( query < valueString );
- case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= valueString );
- case ( PropertyFilterValue::NOT) : return ( query != valueString );
- case ( PropertyFilterValue::STARTS_WITH) : return ( query.find(valueString) == 0 );
- default : BAMTOOLS_ASSERT_UNREACHABLE;
- }
- return false;
-}
-
-// ---------------------------------------------------------
-// FilterEngine implementation
-
-// static FilterEngine data members
-FilterMap FilterEngine::m_filters;
-vector<Property> FilterEngine::m_properties;
-
-// creates a new filter set, returns true if created, false if error or already exists
-bool FilterEngine::addFilter(const string& filterName) {
- return (m_filters.insert(make_pair(filterName, PropertyFilter()))).second;
-}
-
-// return list of current filter names
-const vector<string> FilterEngine::filterNames(void) {
- vector<string> names;
- names.reserve(m_filters.size());
- FilterMap::const_iterator mapIter = m_filters.begin();
- FilterMap::const_iterator mapEnd = m_filters.end();
- for ( ; mapIter != mapEnd; ++mapIter )
- names.push_back( (*mapIter).first );
- return names;
-}
-
-// add a new known property (& type) to engine
-bool FilterEngine::addProperty(const string& propertyName) {
- const vector<string> propertyNames = allPropertyNames();
- bool found = binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
- if ( found ) return false;
- m_properties.push_back( Property(propertyName) );
- sort( m_properties.begin(), m_properties.end() );
- return true;
-}
-
-
-// returns list of all properties known by FilterEngine ( any created using addProperty() )
-const vector<string> FilterEngine::allPropertyNames(void) {
- vector<string> names;
- names.reserve(m_properties.size());
- vector<Property>::const_iterator propIter = m_properties.begin();
- vector<Property>::const_iterator propEnd = m_properties.end();
- for ( ; propIter != propEnd; ++propIter )
- names.push_back( (*propIter).Name );
- return names;
-}
-
-// returns list of property names that are 'enabled' ( only those touched by setProperty() )
-const vector<string> FilterEngine::enabledPropertyNames(void) {
- vector<string> names;
- names.reserve(m_properties.size());
- vector<Property>::const_iterator propIter = m_properties.begin();
- vector<Property>::const_iterator propEnd = m_properties.end();
- for ( ; propIter != propEnd; ++propIter )
- if ( (*propIter).IsEnabled ) names.push_back( (*propIter).Name );
- return names;
-}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 30 August 2010
+// Last modified: 21 September 2010
// ---------------------------------------------------------------------------
+// Provides a generic filter engine based on filter-sets of properties,
+// with possible "rules" (compound logical expressions) to create more complex
+// queries on a data set.
+//
+// FilterEngine consists, most importantly, of :
+//
+// a list of possible properties (each tagged whether it has been 'enabled' as a filter)
+// a map of filterName => propertySet
+// queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
+//
+// Each propertySet is a list of properties enabled for this particular filter object
+//
+// Implemented as a map of propertyNames to propertyFilterValue
+// ( "property1" => pfv1
+// "property2" => pfv2
+// "property4" => pfv4
+// etc. )
+//
+// Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled
+// via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true
+// for any query. Meaning that if a property is not set on this filter, we don't care
+// about it here, so it passes though OK.
+//
+// A propertyFilterValue contains a value and comparison type
+//
+// ( pfv1: Value = 50, Type = GREATER_THAN_EQUAL
+// pfv2: Value = "foo", Type = STARTS_WITH
+// pfv4: Value = "bar", Type = CONTAINS
+// etc. )
+//
+// This allows for more complex queries (than simple isEqual?) against a variety of data types.
//
// ***************************************************************************
#define BAMTOOLS_FILTER_ENGINE_H
#include <algorithm>
+#include <iostream>
#include <map>
+#include <queue>
+#include <sstream>
+#include <stack>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
+#include "bamtools_filter_properties.h"
+#include "bamtools_filter_ruleparser.h"
#include "bamtools_utilities.h"
-#include "bamtools_variant.h"
namespace BamTools {
-struct PropertyFilterValue {
-
- // define valid ValueCompareTypes
- enum ValueCompareType { CONTAINS = 0
- , ENDS_WITH
- , EXACT
- , GREATER_THAN
- , GREATER_THAN_EQUAL
- , LESS_THAN
- , LESS_THAN_EQUAL
- , NOT
- , STARTS_WITH
- };
-
- // ctor
- PropertyFilterValue(const Variant& value = Variant(),
- const ValueCompareType& type = PropertyFilterValue::EXACT)
- : Value(value)
- , Type(type)
- { }
-
- // filter check methods
- template<typename T>
- bool check(const T& query) const;
- bool check(const std::string& query) const;
-
- // data members
- Variant Value;
- ValueCompareType Type;
-};
-
-inline
-const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
-
- switch ( type ) {
- case ( PropertyFilterValue::CONTAINS ) : return std::string( "CONTAINS");
- case ( PropertyFilterValue::ENDS_WITH ) : return std::string( "ENDS_WITH");
- case ( PropertyFilterValue::EXACT ) : return std::string( "EXACT");
- case ( PropertyFilterValue::GREATER_THAN ) : return std::string( "GREATER_THAN");
- case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
- case ( PropertyFilterValue::LESS_THAN ) : return std::string( "LESS_THAN");
- case ( PropertyFilterValue::LESS_THAN_EQUAL ) : return std::string( "LESS_THAN_EQUAL");
- case ( PropertyFilterValue::NOT ) : return std::string( "NOT");
- case ( PropertyFilterValue::STARTS_WITH ) : return std::string( "STARTS_WITH");
- default : BAMTOOLS_ASSERT_UNREACHABLE;
- }
- return std::string();
-}
-
-// property name => property filter value
-// ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
-typedef std::map<std::string, PropertyFilterValue> PropertyMap;
-
-struct PropertyFilter {
-
- // will be used more later
- // if we implement a compound 'rules' system - i.e. "(filter1 AND filter2) OR filter 3"
- enum FilterCompareType { AND = 0
- , EXACT
- , NOT
- , OR
- };
-
- // data members
- PropertyMap Properties;
- FilterCompareType Type;
-
- // ctor
- PropertyFilter(void) : Type( PropertyFilter::EXACT ) { }
-
- // filter check methods
- template<typename T>
- bool check(const std::string& propertyName, const T& query) const;
+struct FilterCompareType {
+ enum Type { AND = 0
+ , NOT
+ , OR
+ };
};
-
-// filter name => properties
-// ('filter1' => properties1, 'filter2' => properties2, etc...)
-typedef std::map<std::string, PropertyFilter> FilterMap;
-// used to store properties known to engine & keep track of enabled state
-struct Property {
- std::string Name;
- bool IsEnabled;
- Property(const std::string& name, bool isEnabled = false)
- : Name(name)
- , IsEnabled(isEnabled)
- { }
-};
-
-inline bool operator< (const Property& lhs, const Property& rhs) { return lhs.Name < rhs.Name; }
-inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
+// -----------------------------------------------------------
+// FilterEngine
+template <typename FilterChecker>
class FilterEngine {
+ // ctor & dtor
+ public:
+ FilterEngine(void)
+ : m_ruleString("")
+ , m_isRuleQueueGenerated(false)
+ , m_defaultCompareType(FilterCompareType::OR)
+ , AND_OPERATOR("&")
+ , OR_OPERATOR("|")
+ , NOT_OPERATOR("!")
+ { }
+
+ ~FilterEngine(void) { }
+
// 'filter set' methods
public:
// creates a new filter set, returns true if created, false if error or already exists
- static bool addFilter(const std::string& filterName);
+ bool addFilter(const std::string& filterName);
// return list of current filter names
- static const std::vector<std::string> filterNames(void);
+ const std::vector<std::string> filterNames(void);
// 'property' methods
public:
// add a new known property (& type) to engine
- static bool addProperty(const std::string& propertyName);
+ bool addProperty(const std::string& propertyName);
// sets property filter (value, type) for propertyName, on a particular filter set
// setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
template<typename T>
- static bool setProperty(const std::string& filterName,
- const std::string& propertyName,
- const T& value,
- const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
+ bool setProperty(const std::string& filterName,
+ const std::string& propertyName,
+ const T& value,
+ const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
// returns list of all properties known by FilterEngine ( any created using addProperty() )
- static const std::vector<std::string> allPropertyNames(void);
+ const std::vector<std::string> allPropertyNames(void);
// returns list of property names that are 'enabled' ( only those touched by setProperty() )
- static const std::vector<std::string> enabledPropertyNames(void);
+ const std::vector<std::string> enabledPropertyNames(void);
+ // 'rule' methods
+ public:
+
+ // sets comparison operator between filters if no rule string given
+ // default is to do an OR on each filter
+ void setDefaultCompareType(const FilterCompareType::Type& type = FilterCompareType::OR);
+
+ // sets rule string for building expression queue
+ // if empty, creates
+ void setRule(const std::string& ruleString = "");
+
// token parsing (for property filter generation)
public:
template<typename T>
- static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
+ bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
// query evaluation
public:
- // returns true if query passes all filters on 'propertyName'
+ // returns true if query passes all filters in FilterEngine
template<typename T>
- static bool check(const std::string& propertyName, const T& query);
+ bool check(const T& query);
+ // internal rule-handling methods
+ private:
+ void buildDefaultRuleString(void);
+ void buildRuleQueue(void);
+ template<typename T>
+ bool evaluateFilterRules(const T& query);
+
// data members
private:
// all 'filter sets'
- static FilterMap m_filters;
+ FilterMap m_filters;
// all known properties
- static std::vector<Property> m_properties;
+ std::vector<Property> m_properties;
+
+ // infix expression of filter-set comparison rules
+ std::string m_ruleString;
+
+ // postfix expression of tokens (filterNames) and operators (as strings)
+ // if this is empty, uses m_compareType to build default expression queue
+ std::queue<std::string> m_ruleQueue;
+
+ // flag to test if the rule expression queue has been generated
+ bool m_isRuleQueueGenerated;
+
+ // 'default' comparison operator between filters if no rule string given
+ // if this is changed, m_ruleString is used to build new m_ruleQueue
+ FilterCompareType::Type m_defaultCompareType;
+
+ // client-specified checking type ( provides method: bool check(PropertyFilter, T object) )
+ FilterChecker m_checker;
// token-parsing constants
static const int NOT_CHAR = (int)'!';
static const int GREATER_THAN_CHAR = (int)'>';
static const int LESS_THAN_CHAR = (int)'<';
static const int WILDCARD_CHAR = (int)'*';
+
+ // filter evaluation constants
+ const std::string AND_OPERATOR;
+ const std::string OR_OPERATOR;
+ const std::string NOT_OPERATOR;
};
-// -------------------------------------------------------------------
-// template methods
+// creates a new filter set, returns true if created, false if error or already exists
+template<typename FilterChecker>
+inline bool FilterEngine<FilterChecker>::addFilter(const std::string& filterName) {
+ return (m_filters.insert(std::make_pair(filterName, PropertyFilter()))).second;
+}
+
+// add a new known property & type to engine
+template<typename FilterChecker>
+inline bool FilterEngine<FilterChecker>::addProperty(const std::string& propertyName) {
+ const std::vector<std::string> propertyNames = allPropertyNames();
+ bool found = std::binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
+ if ( found ) return false;
+ m_properties.push_back( Property(propertyName) );
+ std::sort( m_properties.begin(), m_properties.end() );
+ return true;
+}
+
+// returns list of all properties known by FilterEngine
+// ( any that were created using addProperty() )
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::allPropertyNames(void) {
+ // set up stringlist
+ std::vector<std::string> names;
+ names.reserve(m_properties.size());
+ // iterate through all properties, appending to stringlist
+ std::vector<Property>::const_iterator propIter = m_properties.begin();
+ std::vector<Property>::const_iterator propEnd = m_properties.end();
+ for ( ; propIter != propEnd; ++propIter )
+ names.push_back( (*propIter).Name );
+ // return stringlist
+ return names;
+}
-// checks a query against a filter (value, compare type)
-template<typename T>
-bool PropertyFilterValue::check(const T& query) const {
+// builds a default rule string based on m_defaultCompareType
+// used if user supplied an explicit rule string
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::buildDefaultRuleString(void) {
- // ensure filter value & query are same type
- if ( !Value.is_type<T>() ) {
- std::cerr << "Cannot compare different types!" << std::endl;
- return false;
+ // set up temp string stream
+ std::stringstream ruleStream("");
+
+ // get first filterName
+ FilterMap::const_iterator mapIter = m_filters.begin();
+ ruleStream << (*mapIter).first;
+
+ // if there are more filters present
+ // iterate over remaining filters, appending compare operator and filter name
+ if ( m_filters.size() > 1 ) {
+ for ( ++mapIter ; mapIter != m_filters.end(); ++mapIter )
+ ruleStream << ( (m_defaultCompareType == FilterCompareType::AND) ? " & " : " | " )
+ << (*mapIter).first;
}
+
+ // set m_ruleString from temp stream
+ m_ruleString = ruleStream.str();
+}
+
+// build expression queue based on ruleString
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::buildRuleQueue(void) {
+
+ // skip if no filters present
+ if ( m_filters.empty() ) return;
+
+ // clear out any prior expression queue data
+ while ( !m_ruleQueue.empty() )
+ m_ruleQueue.pop();
+
+ // create a rule string, if not provided
+ if ( m_ruleString.empty() )
+ buildDefaultRuleString();
- // string matching
- if ( Value.is_type<std::string>() ) {
- std::cerr << "Cannot compare different types - query is a string!" << std::endl;
- return false;
- }
+ // initialize RuleParser, run, and retrieve results
+ RuleParser ruleParser(m_ruleString);
+ ruleParser.parse();
+ m_ruleQueue = ruleParser.results();
- // numeric matching based on our filter type
- switch ( Type ) {
- case ( PropertyFilterValue::EXACT) : return ( query == Value.get<T>() );
- case ( PropertyFilterValue::GREATER_THAN) : return ( query > Value.get<T>() );
- case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() );
- case ( PropertyFilterValue::LESS_THAN) : return ( query < Value.get<T>() );
- case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= Value.get<T>() );
- case ( PropertyFilterValue::NOT) : return ( query != Value.get<T>() );
- default : BAMTOOLS_ASSERT_UNREACHABLE;
- }
- return false;
+ // set flag if rule queue contains any values
+ m_isRuleQueueGenerated = (!m_ruleQueue.empty());
}
-template<typename T>
-bool PropertyFilter::check(const std::string& propertyName, const T& query) const {
+// returns whether query value passes filter engine rules
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::check(const T& query) {
- // if propertyName found for this filter,
- PropertyMap::const_iterator propIter = Properties.find(propertyName);
- if ( propIter != Properties.end() ) {
- const PropertyFilterValue& filterValue = (*propIter).second;
-
- // check
- switch ( Type ) {
- case ( PropertyFilter::EXACT ) : return filterValue.check(query);
- case ( PropertyFilter::NOT ) : return !filterValue.check(query);
- case ( PropertyFilter::AND ) :
- case ( PropertyFilter::OR ) : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a binary compare operator on 1 value");
- default : BAMTOOLS_ASSERT_UNREACHABLE;
- }
- return false; // unreachable
+ // return result of querying against filter rules
+ return evaluateFilterRules(query);
+}
+
+// returns list of property names that are 'enabled' ( only those touched by setProperty() )
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::enabledPropertyNames(void) {
+ // initialize stringlist
+ std::vector<std::string> names;
+ names.reserve(m_properties.size());
+ // iterate over all properties, appending if enabled
+ std::vector<Property>::const_iterator propIter = m_properties.begin();
+ std::vector<Property>::const_iterator propEnd = m_properties.end();
+ for ( ; propIter != propEnd; ++propIter )
+ if ( (*propIter).IsEnabled )
+ names.push_back( (*propIter).Name );
+ // return stringlist
+ return names;
+}
+
+// evaluates postfix rule queue - with each filter as an operand, AND|OR|NOT as operators
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::evaluateFilterRules(const T& query) {
+
+ // build ruleQueue if not done before
+ if ( !m_isRuleQueueGenerated )
+ buildRuleQueue();
+
+ std::stack<bool> resultStack;
+ FilterMap::const_iterator filterIter;
+ FilterMap::const_iterator filterEnd = m_filters.end();
+ std::queue<std::string> ruleQueueCopy = m_ruleQueue;
+ while ( !ruleQueueCopy.empty() ) {
+ const std::string& token = ruleQueueCopy.front();
+
+ // token is NOT_OPERATOR
+ if ( token == FilterEngine<FilterChecker>::NOT_OPERATOR ) {
+ BAMTOOLS_ASSERT_MESSAGE( !resultStack.empty(), "Empty result stack - cannot apply operator: !" );
+ resultStack.top() = !resultStack.top();
+ }
+
+ // token is AND_OPERATOR
+ else if ( token == FilterEngine<FilterChecker>::AND_OPERATOR ) {
+ BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: &" );
+ bool topResult = resultStack.top();
+ resultStack.pop();
+ resultStack.top() &= topResult;
+ }
+
+ // token is OR_OPERATOR
+ else if ( token == FilterEngine<FilterChecker>::OR_OPERATOR ) {
+ BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: |" );
+ bool topResult = resultStack.top();
+ resultStack.pop();
+ resultStack.top() |= topResult;
+ }
+
+ // token is an operand
+ else {
+ // look up PropertyFilter that matches this token
+ filterIter = m_filters.find(token);
+ BAMTOOLS_ASSERT_MESSAGE( (filterIter != filterEnd), "Filter mentioned in rule, not found in FilterEngine" );
+ const PropertyFilter& filter = (*filterIter).second;
+ bool result = m_checker.check(filter, query);
+ resultStack.push( result );
+ }
+
+ // pop token from ruleQueue
+ ruleQueueCopy.pop();
}
+
+ // return last result
+ BAMTOOLS_ASSERT_MESSAGE( resultStack.size() == 1, "Result stack should only have one value remaining - cannot return result" );
+ return resultStack.top();
+}
- // property unknown to this filter
- else return true;
+// return list of current filter names
+template<typename FilterChecker>
+inline const std::vector<std::string> FilterEngine<FilterChecker>::filterNames(void) {
+ // initialize stringlist
+ std::vector<std::string> names;
+ names.reserve(m_filters.size());
+ // iterate over all filters, appending filter name
+ FilterMap::const_iterator mapIter = m_filters.begin();
+ FilterMap::const_iterator mapEnd = m_filters.end();
+ for ( ; mapIter != mapEnd; ++mapIter )
+ names.push_back( (*mapIter).first );
+ // return stringlist
+ return names;
}
-template<typename T>
-bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
+// parse a filterValue token string that may contain comparison qualifiers (">50", "*SRR", etc.)
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
// skip if token is empty
if ( token.empty() ) return false;
// more than one character, check for special chars
else {
const int firstChar = (int)token.at(0);
-
- switch ( (int)firstChar ) {
+ switch ( firstChar ) {
- case ( (int)FilterEngine::NOT_CHAR ) :
+ case ( FilterEngine<FilterChecker>::NOT_CHAR ) :
strippedToken = token.substr(1);
type = PropertyFilterValue::NOT;
break;
- case ( (int)FilterEngine::GREATER_THAN_CHAR ) :
+ case ( FilterEngine<FilterChecker>::GREATER_THAN_CHAR ) :
// check for '>=' case
- if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
+ if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
if ( token.length() == 2 ) return false;
strippedToken = token.substr(2);
type = PropertyFilterValue::GREATER_THAN_EQUAL;
break;
- case ( (int)FilterEngine::LESS_THAN_CHAR ) :
+ case ( FilterEngine<FilterChecker>::LESS_THAN_CHAR ) :
// check for '<=' case
- if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
+ if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
if ( token.length() == 2 ) return false;
strippedToken = token.substr(2);
type = PropertyFilterValue::LESS_THAN_EQUAL;
break;
- case ( (int)FilterEngine::WILDCARD_CHAR ) :
+ case ( FilterEngine<FilterChecker>::WILDCARD_CHAR ) :
// check for *str* case (CONTAINS)
- if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
+ if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
if ( token.length() == 2 ) return false;
strippedToken = token.substr(1, token.length() - 2);
type = PropertyFilterValue::CONTAINS;
break;
-
default :
// check for str* case (STARTS_WITH)
- if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
+ if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
if ( token.length() == 2 ) return false;
strippedToken = token.substr(0, token.length() - 1);
type = PropertyFilterValue::STARTS_WITH;
return true;
}
+// sets comparison operator between filters if no rule string given
+// default is to do an OR on each filter
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::setDefaultCompareType(const FilterCompareType::Type& type) {
+ // check for supported compare type
+ if ( type == FilterCompareType::AND || type == FilterCompareType::OR ) {
+ // if not the current compare type
+ if ( m_defaultCompareType != type ) {
+ m_defaultCompareType = type;
+ buildRuleQueue();
+ }
+ }
+}
+
// sets property filter (value, type) for propertyName, on a particular filter set
// setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
-template<typename T>
-bool FilterEngine::setProperty(const std::string& filterName,
- const std::string& propertyName,
- const T& value,
- const PropertyFilterValue::ValueCompareType& type)
+template<class FilterChecker> template<typename T>
+bool FilterEngine<FilterChecker>::setProperty(const std::string& filterName,
+ const std::string& propertyName,
+ const T& value,
+ const PropertyFilterValue::ValueCompareType& type)
{
// lookup filter by name, return false if not found
FilterMap::iterator filterIter = m_filters.find(filterName);
}
// property already known, set as enabled
- else
- (*knownPropertyIter).IsEnabled = true;
+ else (*knownPropertyIter).IsEnabled = true;
// return success
return true;
}
-// returns false if query does not pass any filters on 'propertyName'
-// returns true if property unknown (i.e. nothing has been set for this property... so query is considered to pass filter)
-template<typename T>
-bool FilterEngine::check(const std::string& propertyName, const T& query) {
-
- // check enabled properties list
- // return true if no properties enabled at all OR if property is unknown to FilterEngine
- const std::vector<std::string> enabledProperties = enabledPropertyNames();
- if ( enabledProperties.empty() ) return true;
- const bool found = std::binary_search( enabledProperties.begin(), enabledProperties.end(), propertyName );
- if ( !found ) return true;
-
- // iterate over all filters in FilterEngine
- FilterMap::const_iterator filterIter = m_filters.begin();
- FilterMap::const_iterator filterEnd = m_filters.end();
- for ( ; filterIter != filterEnd; ++filterIter ) {
-
- // check query against this filter
- const PropertyFilter& filter = (*filterIter).second;
- if ( filter.check(propertyName, query) ) return true;
+// sets user-specified rule string & signals update of rule-expression queue
+template<typename FilterChecker>
+inline void FilterEngine<FilterChecker>::setRule(const std::string& ruleString) {
+ if ( m_ruleString != ruleString) {
+ m_ruleString = ruleString;
+ buildRuleQueue();
}
-
- // query passes none of the filters with current property enabled
- return false;
}
} // namespace BamTools
--- /dev/null
+// ***************************************************************************
+// bamtools_filter_properties.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 30 August 2010
+// ---------------------------------------------------------------------------
+// Provides support data structures & methods for FilterEngine
+//
+// The FilterEngine consists, most importantly, of :
+//
+// a list of possible properties (each tagged whether it has been 'enabled' as a filter)
+// a map of filterName => propertySet
+// queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
+//
+// Each propertySet is a list of properties enabled for this particular filter object
+//
+// Implemented as a map of propertyNames to propertyFilterValue
+// ( "property1" => pfv1
+// "property2" => pfv2
+// "property4" => pfv4
+// etc. )
+//
+// Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled
+// via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true
+// for any query. Meaning that if a property is not set on this filter, we don't care
+// about it here, so it passes though OK.
+//
+// A propertyFilterValue contains a value and comparison type
+//
+// ( pfv1: Value = 50, Type = GREATER_THAN_EQUAL
+// pfv2: Value = "foo", Type = STARTS_WITH
+// pfv4: Value = "bar", Type = CONTAINS
+// etc. )
+//
+// This allows for more complex queries (than simple isEqual?) against a variety of data types.
+//
+// ***************************************************************************
+
+#ifndef BAMTOOLS_FILTER_PROPERTIES_H
+#define BAMTOOLS_FILTER_PROPERTIES_H
+
+#include <iostream>
+#include <map>
+#include <string>
+#include "bamtools_utilities.h"
+#include "bamtools_variant.h"
+
+namespace BamTools {
+
+// ----------------------------------------------------------
+// PropertyFilterValue
+
+struct PropertyFilterValue {
+
+ // define valid ValueCompareTypes
+ enum ValueCompareType { CONTAINS = 0
+ , ENDS_WITH
+ , EXACT
+ , GREATER_THAN
+ , GREATER_THAN_EQUAL
+ , LESS_THAN
+ , LESS_THAN_EQUAL
+ , NOT
+ , STARTS_WITH
+ };
+
+ // ctor
+ PropertyFilterValue(const Variant& value = Variant(),
+ const ValueCompareType& type = PropertyFilterValue::EXACT)
+ : Value(value)
+ , Type(type)
+ { }
+
+ // filter check methods
+ template<typename T>
+ bool check(const T& query) const;
+ bool check(const std::string& query) const;
+
+ // data members
+ Variant Value;
+ ValueCompareType Type;
+};
+
+// checks a query against a filter (value, compare type)
+template<typename T>
+bool PropertyFilterValue::check(const T& query) const {
+
+ // ensure filter value & query are same type
+ if ( !Value.is_type<T>() ) {
+ std::cerr << "Cannot compare different types!" << std::endl;
+ return false;
+ }
+
+ // string matching
+ if ( Value.is_type<std::string>() ) {
+ std::cerr << "Cannot compare different types - query is a string!" << std::endl;
+ return false;
+ }
+
+ // numeric matching based on our filter type
+ switch ( Type ) {
+ case ( PropertyFilterValue::EXACT) : return ( query == Value.get<T>() );
+ case ( PropertyFilterValue::GREATER_THAN) : return ( query > Value.get<T>() );
+ case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() );
+ case ( PropertyFilterValue::LESS_THAN) : return ( query < Value.get<T>() );
+ case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= Value.get<T>() );
+ case ( PropertyFilterValue::NOT) : return ( query != Value.get<T>() );
+ default : BAMTOOLS_ASSERT_UNREACHABLE;
+ }
+ return false;
+}
+
+// checks a string query against filter (value, compare type)
+inline
+bool PropertyFilterValue::check(const std::string& query) const {
+
+ // ensure filter value & query are same type
+ if ( !Value.is_type<std::string>() ) {
+ std::cerr << "Cannot compare different types!" << std::endl;
+ return false;
+ }
+
+ // localize string version of our filter value
+ const std::string& valueString = Value.get<std::string>();
+
+ // string matching based on our filter type
+ switch ( Type ) {
+ case ( PropertyFilterValue::CONTAINS) : return ( query.find(valueString) != std::string::npos );
+ case ( PropertyFilterValue::ENDS_WITH) : return ( query.find(valueString) == (query.length() - valueString.length()) );
+ case ( PropertyFilterValue::EXACT) : return ( query == valueString );
+ case ( PropertyFilterValue::GREATER_THAN) : return ( query > valueString );
+ case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= valueString );
+ case ( PropertyFilterValue::LESS_THAN) : return ( query < valueString );
+ case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= valueString );
+ case ( PropertyFilterValue::NOT) : return ( query != valueString );
+ case ( PropertyFilterValue::STARTS_WITH) : return ( query.find(valueString) == 0 );
+ default : BAMTOOLS_ASSERT_UNREACHABLE;
+ }
+ return false;
+}
+
+inline
+const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
+
+ switch ( type ) {
+ case ( PropertyFilterValue::CONTAINS ) : return std::string( "CONTAINS");
+ case ( PropertyFilterValue::ENDS_WITH ) : return std::string( "ENDS_WITH");
+ case ( PropertyFilterValue::EXACT ) : return std::string( "EXACT");
+ case ( PropertyFilterValue::GREATER_THAN ) : return std::string( "GREATER_THAN");
+ case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
+ case ( PropertyFilterValue::LESS_THAN ) : return std::string( "LESS_THAN");
+ case ( PropertyFilterValue::LESS_THAN_EQUAL ) : return std::string( "LESS_THAN_EQUAL");
+ case ( PropertyFilterValue::NOT ) : return std::string( "NOT");
+ case ( PropertyFilterValue::STARTS_WITH ) : return std::string( "STARTS_WITH");
+ default : BAMTOOLS_ASSERT_UNREACHABLE;
+ }
+ return std::string();
+}
+
+// property name => property filter value
+// ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
+typedef std::map<std::string, PropertyFilterValue> PropertyMap;
+
+// ----------------------------------------------------------
+// PropertyFilter
+
+struct PropertyFilter {
+ // data members
+ PropertyMap Properties;
+};
+
+// filter name => properties
+// ('filter1' => properties1, 'filter2' => properties2, etc...)
+typedef std::map<std::string, PropertyFilter> FilterMap;
+
+// ----------------------------------------------------------
+// Property
+
+// used to store properties known to engine & keep track of enabled state
+struct Property {
+ std::string Name;
+ bool IsEnabled;
+ Property(const std::string& name, bool isEnabled = false)
+ : Name(name)
+ , IsEnabled(isEnabled)
+ { }
+};
+
+inline bool operator< (const Property& lhs, const Property& rhs) { return lhs.Name < rhs.Name; }
+inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_FILTER_PROPERTIES_H
--- /dev/null
+// ***************************************************************************
+// bamtools_filter_ruleparser.h (c) 2010 Derek Barnett, Erik Garrison
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 21 September 2010
+// ---------------------------------------------------------------------------
+// Provides a compound rule parser for FilterEngine.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_FILTER_RULEPARSER_H
+#define BAMTOOLS_FILTER_RULEPARSER_H
+
+#include <queue>
+#include <stack>
+#include <string>
+#include "bamtools_utilities.h"
+
+namespace BamTools {
+
+// -------------------------------------------
+// char constants
+
+const char LEFT_PARENTHESIS_CHAR = '(';
+const char RIGHT_PARENTHESIS_CHAR = ')';
+const char AND_OPERATOR_CHAR = '&';
+const char OR_OPERATOR_CHAR = '|';
+const char NOT_OPERATOR_CHAR = '!';
+const char SPACE_CHAR = ' ';
+
+// -------------------------------------------
+// RuleToken implementation
+
+struct RuleToken {
+
+ // enums
+ enum RuleTokenType { OPERAND = 0
+ , AND_OPERATOR
+ , OR_OPERATOR
+ , NOT_OPERATOR
+ , LEFT_PARENTHESIS
+ , RIGHT_PARENTHESIS
+ };
+
+ // data members
+ RuleTokenType Type;
+ std::string Value;
+};
+
+inline int priority(const RuleToken& token) {
+ switch ( token.Type ) {
+ case ( RuleToken::NOT_OPERATOR ) : return 3;
+ case ( RuleToken::AND_OPERATOR ) : return 2;
+ case ( RuleToken::OR_OPERATOR ) : return 1;
+ case ( RuleToken::LEFT_PARENTHESIS ) : return 0;
+ case ( RuleToken::RIGHT_PARENTHESIS ) : return 0;
+ default: BAMTOOLS_ASSERT_UNREACHABLE;
+ }
+}
+
+inline bool isRightAssociative(const RuleToken& token) {
+ return (token.Type == RuleToken::NOT_OPERATOR ||
+ token.Type == RuleToken::LEFT_PARENTHESIS);
+}
+
+inline bool isLeftAssociative(const RuleToken& token) {
+ return !isRightAssociative(token);
+}
+
+inline bool isLeftParenthesis(const RuleToken& token) {
+ return ( token.Type == RuleToken::LEFT_PARENTHESIS );
+}
+
+inline bool isRightParenthesis(const RuleToken& token) {
+ return ( token.Type == RuleToken::RIGHT_PARENTHESIS );
+}
+
+inline bool isOperand(const RuleToken& token) {
+ return ( token.Type == RuleToken::OPERAND );
+}
+
+inline bool isOperator(const RuleToken& token) {
+ return ( token.Type == RuleToken::AND_OPERATOR ||
+ token.Type == RuleToken::OR_OPERATOR ||
+ token.Type == RuleToken::NOT_OPERATOR);
+}
+
+// -------------------------------------------
+// RuleParser implementation
+
+class RuleParser {
+
+ // ctor & dtor
+ public:
+ RuleParser(const std::string& ruleString)
+ : m_ruleString(ruleString)
+ {
+ // initialize char markers
+ m_begin = (char*)m_ruleString.c_str();
+ m_end = m_begin + m_ruleString.length();
+ ignoreQuotes();
+ }
+
+ ~RuleParser(void) { }
+
+ // public interface
+ public:
+ void parse(void);
+ std::queue<std::string> results(void) const { return m_ruleQueue; }
+
+ // internal methods
+ private:
+ char getNextChar(void);
+ void ignoreQuotes(void);
+ bool readToken(RuleToken& token);
+ void skipSpaces(void);
+
+ // data members
+ private:
+ std::string m_ruleString;
+ char* m_begin;
+ char* m_current;
+ char* m_end;
+
+ std::queue<std::string> m_ruleQueue;
+ std::stack<RuleToken> m_operatorStack;
+};
+
+inline
+char RuleParser::getNextChar(void) {
+ if ( m_current == m_end ) return 0;
+ return *m_current++;
+}
+
+inline
+void RuleParser::ignoreQuotes(void) {
+ if ( *m_begin == '\"' ) ++m_begin;
+ if ( *m_end == '\"' ) --m_end;
+}
+
+inline
+void RuleParser::parse(void) {
+
+ // clear out any prior data
+ while ( !m_ruleQueue.empty() )
+ m_ruleQueue.pop();
+
+ // skip if no rule to parse
+ if ( m_ruleString.empty() ) return;
+
+ // start at beginning of ruleString
+ m_current = m_begin;
+
+ // iterate through tokens in rule string
+ RuleToken token;
+ while ( readToken(token) ) {
+
+ if ( token.Value.empty() ) break;
+
+ // if token is an operand
+ if ( isOperand(token) )
+ m_ruleQueue.push(token.Value);
+
+ // if token is an operator
+ else if ( isOperator(token) ) {
+
+ // pop any operators at top of stack with higher priority
+ while ( !m_operatorStack.empty() ) {
+ const RuleToken& opToken = m_operatorStack.top();
+ if ( (isLeftAssociative(token) && (priority(token) <= priority(opToken))) ||
+ (isRightAssociative(token) && (priority(token) < priority(opToken)))
+ )
+ {
+ m_ruleQueue.push(opToken.Value);
+ m_operatorStack.pop();
+ }
+ else break;
+ }
+
+ // push current operator token onto stack
+ m_operatorStack.push(token);
+ }
+
+ // if token is left parenthesis
+ else if ( isLeftParenthesis(token) )
+ m_operatorStack.push(token);
+
+ // if token is right parenthesis
+ else if ( isRightParenthesis(token) ) {
+
+ bool foundLeftParenthesis = false;
+
+ // push operators into rule queue until left parenthesis found
+ while ( !m_operatorStack.empty() && !foundLeftParenthesis ) {
+ const RuleToken& opToken = m_operatorStack.top();
+ if ( !isLeftParenthesis(opToken) )
+ m_ruleQueue.push(opToken.Value);
+ else
+ foundLeftParenthesis = true;
+ m_operatorStack.pop();
+ }
+
+ // no left parenthesis found, error
+ BAMTOOLS_ASSERT_MESSAGE( foundLeftParenthesis, "ERROR: Mismatched parenthesis in rule string.1");
+ }
+
+ // error: unknown operand
+ else BAMTOOLS_ASSERT_UNREACHABLE;
+ }
+
+ // while there are still operators on stack
+ while ( !m_operatorStack.empty() ) {
+ const RuleToken& token = m_operatorStack.top();
+ BAMTOOLS_ASSERT_MESSAGE( (!isLeftParenthesis(token) && !isRightParenthesis(token)), "ERROR: Mismatched parenthesis in rule string.2");
+ m_ruleQueue.push(token.Value);
+ m_operatorStack.pop();
+ }
+}
+
+inline
+bool RuleParser::readToken(RuleToken& token) {
+
+ // skip any preceding whitespace
+ skipSpaces();
+ if ( m_current == m_end ) return false;
+
+ // clear out prior token value
+ token.Value.clear();
+
+ // read chars while still in token
+ char c = 1;
+ bool keepReading = true;
+ bool inOperandString = false;
+ while ( keepReading && (c != 0) ) {
+
+ // get next char
+ c = getNextChar();
+ switch (c) {
+
+ // current char is '('
+ case ( LEFT_PARENTHESIS_CHAR ) :
+ token.Type = RuleToken::LEFT_PARENTHESIS;
+ token.Value.append(1, LEFT_PARENTHESIS_CHAR);
+ keepReading = false;
+ break;
+
+ // current char is ')'
+ case ( RIGHT_PARENTHESIS_CHAR ) :
+ if ( inOperandString )
+ --m_current;
+ else {
+ token.Type = RuleToken::RIGHT_PARENTHESIS;
+ token.Value.append(1, RIGHT_PARENTHESIS_CHAR);
+ }
+ keepReading = false;
+ break;
+
+ // current char is '&'
+ case ( AND_OPERATOR_CHAR ) :
+ if ( inOperandString )
+ --m_current;
+ else {
+ token.Type = RuleToken::AND_OPERATOR;
+ token.Value.append(1, AND_OPERATOR_CHAR);
+ }
+ keepReading = false;
+ break;
+
+ // current char is '|'
+ case ( OR_OPERATOR_CHAR ) :
+ if ( inOperandString )
+ --m_current;
+ else {
+ token.Type = RuleToken::OR_OPERATOR;
+ token.Value.append(1, OR_OPERATOR_CHAR);
+ }
+ keepReading = false;
+ break;
+
+ // current char is '!'
+ case ( NOT_OPERATOR_CHAR ) :
+ token.Type = RuleToken::NOT_OPERATOR;
+ token.Value.append(1, NOT_OPERATOR_CHAR);
+ keepReading = false;
+ break;
+
+ // current char is ' '
+ case ( SPACE_CHAR ) :
+ keepReading = false;
+ break;
+
+ // current char is a true value token
+ default:
+ if ( c != 0 ) {
+ token.Type = RuleToken::OPERAND;
+ token.Value.append(1, c);
+ inOperandString = true;
+ keepReading = true;
+ }
+ }
+ }
+
+ return true;
+}
+
+inline
+void RuleParser::skipSpaces(void) {
+ while ( m_current != m_end ) {
+ const char c = *m_current;
+ if ( c == ' ' || c == '\t' || c == '\r' || c == '\n')
+ ++m_current;
+ else break;
+ }
+}
+
+} // namespace BamTools
+
+#endif // BAMTOOLS_FILTER_RULEPARSER_H
\ No newline at end of file