1 // ***************************************************************************
2 // bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 19 November 2010
6 // ---------------------------------------------------------------------------
7 // Provides a generic filter engine based on filter-sets of properties,
8 // with possible "rules" (compound logical expressions) to create more complex
9 // queries on a data set.
11 // FilterEngine consists, most importantly, of :
13 // a list of possible properties (each tagged whether it has been 'enabled' as a filter)
14 // a map of filterName => propertySet
15 // queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
17 // Each propertySet is a list of properties enabled for this particular filter object
19 // Implemented as a map of propertyNames to propertyFilterValue
20 // ( "property1" => pfv1
21 // "property2" => pfv2
22 // "property4" => pfv4
25 // Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled
26 // via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true
27 // for any query. Meaning that if a property is not set on this filter, we don't care
28 // about it here, so it passes though OK.
30 // A propertyFilterValue contains a value and comparison type
32 // ( pfv1: Value = 50, Type = GREATER_THAN_EQUAL
33 // pfv2: Value = "foo", Type = STARTS_WITH
34 // pfv4: Value = "bar", Type = CONTAINS
37 // This allows for more complex queries (than simple isEqual?) against a variety of data types.
39 // ***************************************************************************
41 #ifndef BAMTOOLS_FILTER_ENGINE_H
42 #define BAMTOOLS_FILTER_ENGINE_H
44 #include <utils/bamtools_filter_properties.h>
45 #include <utils/bamtools_filter_ruleparser.h>
46 #include <utils/bamtools_utilities.h>
47 #include <utils/utils_global.h>
60 struct UTILS_EXPORT FilterCompareType {
67 // -----------------------------------------------------------
70 template <typename FilterChecker>
71 class UTILS_EXPORT FilterEngine {
77 , m_isRuleQueueGenerated(false)
78 , m_defaultCompareType(FilterCompareType::OR)
84 ~FilterEngine(void) { }
86 // 'filter set' methods
88 // creates a new filter set, returns true if created, false if error or already exists
89 bool addFilter(const std::string& filterName);
91 // return list of current filter names
92 const std::vector<std::string> filterNames(void);
97 // add a new known property (& type) to engine
98 bool addProperty(const std::string& propertyName);
100 // sets property filter (value, type) for propertyName, on a particular filter set
101 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
103 bool setProperty(const std::string& filterName,
104 const std::string& propertyName,
106 const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
108 // returns list of all properties known by FilterEngine ( any created using addProperty() )
109 const std::vector<std::string> allPropertyNames(void);
111 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
112 const std::vector<std::string> enabledPropertyNames(void);
117 // sets comparison operator between filters if no rule string given
118 // default is to do an OR on each filter
119 void setDefaultCompareType(const FilterCompareType::Type& type = FilterCompareType::OR);
121 // sets rule string for building expression queue
123 void setRule(const std::string& ruleString = "");
125 // token parsing (for property filter generation)
128 static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
132 // returns true if query passes all filters in FilterEngine
134 bool check(const T& query);
136 // internal rule-handling methods
138 void buildDefaultRuleString(void);
139 void buildRuleQueue(void);
141 bool evaluateFilterRules(const T& query);
148 // all known properties
149 std::vector<Property> m_properties;
151 // infix expression of filter-set comparison rules
152 std::string m_ruleString;
154 // postfix expression of tokens (filterNames) and operators (as strings)
155 // if this is empty, uses m_compareType to build default expression queue
156 std::queue<std::string> m_ruleQueue;
158 // flag to test if the rule expression queue has been generated
159 bool m_isRuleQueueGenerated;
161 // 'default' comparison operator between filters if no rule string given
162 // if this is changed, m_ruleString is used to build new m_ruleQueue
163 FilterCompareType::Type m_defaultCompareType;
165 // client-specified checking type ( provides method: bool check(PropertyFilter, T object) )
166 FilterChecker m_checker;
168 // token-parsing constants
169 static const int NOT_CHAR = (int)'!';
170 static const int EQUAL_CHAR = (int)'=';
171 static const int GREATER_THAN_CHAR = (int)'>';
172 static const int LESS_THAN_CHAR = (int)'<';
173 static const int WILDCARD_CHAR = (int)'*';
175 // filter evaluation constants
176 const std::string AND_OPERATOR;
177 const std::string OR_OPERATOR;
178 const std::string NOT_OPERATOR;
181 // creates a new filter set, returns true if created, false if error or already exists
182 template<typename FilterChecker>
183 inline bool FilterEngine<FilterChecker>::addFilter(const std::string& filterName) {
184 return (m_filters.insert(std::make_pair(filterName, PropertyFilter()))).second;
187 // add a new known property & type to engine
188 template<typename FilterChecker>
189 inline bool FilterEngine<FilterChecker>::addProperty(const std::string& propertyName) {
190 const std::vector<std::string> propertyNames = allPropertyNames();
191 bool found = std::binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
192 if ( found ) return false;
193 m_properties.push_back( Property(propertyName) );
194 std::sort( m_properties.begin(), m_properties.end() );
198 // returns list of all properties known by FilterEngine
199 // ( any that were created using addProperty() )
200 template<typename FilterChecker>
201 inline const std::vector<std::string> FilterEngine<FilterChecker>::allPropertyNames(void) {
203 std::vector<std::string> names;
204 names.reserve(m_properties.size());
205 // iterate through all properties, appending to stringlist
206 std::vector<Property>::const_iterator propIter = m_properties.begin();
207 std::vector<Property>::const_iterator propEnd = m_properties.end();
208 for ( ; propIter != propEnd; ++propIter )
209 names.push_back( (*propIter).Name );
214 // builds a default rule string based on m_defaultCompareType
215 // used if user supplied an explicit rule string
216 template<typename FilterChecker>
217 inline void FilterEngine<FilterChecker>::buildDefaultRuleString(void) {
219 // set up temp string stream
220 std::stringstream ruleStream("");
222 // get first filterName
223 FilterMap::const_iterator mapIter = m_filters.begin();
224 ruleStream << (*mapIter).first;
226 // if there are more filters present
227 // iterate over remaining filters, appending compare operator and filter name
228 if ( m_filters.size() > 1 ) {
229 for ( ++mapIter ; mapIter != m_filters.end(); ++mapIter )
230 ruleStream << ( (m_defaultCompareType == FilterCompareType::AND) ? " & " : " | " )
234 // set m_ruleString from temp stream
235 m_ruleString = ruleStream.str();
238 // build expression queue based on ruleString
239 template<typename FilterChecker>
240 inline void FilterEngine<FilterChecker>::buildRuleQueue(void) {
242 // skip if no filters present
243 if ( m_filters.empty() ) return;
245 // clear out any prior expression queue data
246 while ( !m_ruleQueue.empty() )
249 // create a rule string, if not provided
250 if ( m_ruleString.empty() )
251 buildDefaultRuleString();
253 // initialize RuleParser, run, and retrieve results
254 RuleParser ruleParser(m_ruleString);
256 m_ruleQueue = ruleParser.results();
258 // set flag if rule queue contains any values
259 m_isRuleQueueGenerated = (!m_ruleQueue.empty());
262 // returns whether query value passes filter engine rules
263 template<class FilterChecker> template<typename T>
264 bool FilterEngine<FilterChecker>::check(const T& query) {
266 // return result of querying against filter rules
267 return evaluateFilterRules(query);
270 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
271 template<typename FilterChecker>
272 inline const std::vector<std::string> FilterEngine<FilterChecker>::enabledPropertyNames(void) {
273 // initialize stringlist
274 std::vector<std::string> names;
275 names.reserve(m_properties.size());
276 // iterate over all properties, appending if enabled
277 std::vector<Property>::const_iterator propIter = m_properties.begin();
278 std::vector<Property>::const_iterator propEnd = m_properties.end();
279 for ( ; propIter != propEnd; ++propIter )
280 if ( (*propIter).IsEnabled )
281 names.push_back( (*propIter).Name );
286 // evaluates postfix rule queue - with each filter as an operand, AND|OR|NOT as operators
287 template<class FilterChecker> template<typename T>
288 bool FilterEngine<FilterChecker>::evaluateFilterRules(const T& query) {
290 // build ruleQueue if not done before
291 if ( !m_isRuleQueueGenerated )
294 std::stack<bool> resultStack;
295 FilterMap::const_iterator filterIter;
296 FilterMap::const_iterator filterEnd = m_filters.end();
297 std::queue<std::string> ruleQueueCopy = m_ruleQueue;
298 while ( !ruleQueueCopy.empty() ) {
299 const std::string& token = ruleQueueCopy.front();
301 // token is NOT_OPERATOR
302 if ( token == FilterEngine<FilterChecker>::NOT_OPERATOR ) {
303 BAMTOOLS_ASSERT_MESSAGE( !resultStack.empty(), "Empty result stack - cannot apply operator: !" );
304 resultStack.top() = !resultStack.top();
307 // token is AND_OPERATOR
308 else if ( token == FilterEngine<FilterChecker>::AND_OPERATOR ) {
309 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: &" );
310 bool topResult = resultStack.top();
312 resultStack.top() &= topResult;
315 // token is OR_OPERATOR
316 else if ( token == FilterEngine<FilterChecker>::OR_OPERATOR ) {
317 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: |" );
318 bool topResult = resultStack.top();
320 resultStack.top() |= topResult;
323 // token is an operand
325 // look up PropertyFilter that matches this token
326 filterIter = m_filters.find(token);
327 BAMTOOLS_ASSERT_MESSAGE( (filterIter != filterEnd), "Filter mentioned in rule, not found in FilterEngine" );
328 const PropertyFilter& filter = (*filterIter).second;
329 bool result = m_checker.check(filter, query);
330 resultStack.push( result );
333 // pop token from ruleQueue
337 // return last result
338 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() == 1, "Result stack should only have one value remaining - cannot return result" );
339 return resultStack.top();
342 // return list of current filter names
343 template<typename FilterChecker>
344 inline const std::vector<std::string> FilterEngine<FilterChecker>::filterNames(void) {
345 // initialize stringlist
346 std::vector<std::string> names;
347 names.reserve(m_filters.size());
348 // iterate over all filters, appending filter name
349 FilterMap::const_iterator mapIter = m_filters.begin();
350 FilterMap::const_iterator mapEnd = m_filters.end();
351 for ( ; mapIter != mapEnd; ++mapIter )
352 names.push_back( (*mapIter).first );
357 // parse a filterValue token string that may contain comparison qualifiers (">50", "*SRR", etc.)
358 template<class FilterChecker> template<typename T>
359 bool FilterEngine<FilterChecker>::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
361 // skip if token is empty
362 if ( token.empty() ) return false;
364 // will store token after special chars are removed
365 std::string strippedToken;
367 // if only single character
368 if ( token.length() == 1 ) {
369 strippedToken = token;
370 type = PropertyFilterValue::EXACT;
373 // more than one character, check for special chars
375 const int firstChar = (int)token.at(0);
376 switch ( firstChar ) {
378 case ( FilterEngine<FilterChecker>::NOT_CHAR ) :
379 strippedToken = token.substr(1);
380 type = PropertyFilterValue::NOT;
383 case ( FilterEngine<FilterChecker>::GREATER_THAN_CHAR ) :
385 // check for '>=' case
386 if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
387 if ( token.length() == 2 ) return false;
388 strippedToken = token.substr(2);
389 type = PropertyFilterValue::GREATER_THAN_EQUAL;
392 // otherwise only '>'
394 strippedToken = token.substr(1);
395 type = PropertyFilterValue::GREATER_THAN;
400 case ( FilterEngine<FilterChecker>::LESS_THAN_CHAR ) :
402 // check for '<=' case
403 if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
404 if ( token.length() == 2 ) return false;
405 strippedToken = token.substr(2);
406 type = PropertyFilterValue::LESS_THAN_EQUAL;
409 // otherwise only '<'
411 strippedToken = token.substr(1);
412 type = PropertyFilterValue::LESS_THAN;
417 case ( FilterEngine<FilterChecker>::WILDCARD_CHAR ) :
419 // check for *str* case (CONTAINS)
420 if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
421 if ( token.length() == 2 ) return false;
422 strippedToken = token.substr(1, token.length() - 2);
423 type = PropertyFilterValue::CONTAINS;
426 // otherwise *str case (ENDS_WITH)
428 strippedToken = token.substr(1);
429 type = PropertyFilterValue::ENDS_WITH;
435 // check for str* case (STARTS_WITH)
436 if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
437 if ( token.length() == 2 ) return false;
438 strippedToken = token.substr(0, token.length() - 1);
439 type = PropertyFilterValue::STARTS_WITH;
444 strippedToken = token;
445 type = PropertyFilterValue::EXACT;
452 // convert stripped token to value
453 std::stringstream stream(strippedToken);
454 if ( strippedToken == "true" || strippedToken == "false" )
455 stream >> std::boolalpha >> value;
459 // check for valid CompareType on type T
460 Variant variantCheck = value;
462 // if T is not string AND CompareType is for string values, return false
463 if ( !variantCheck.is_type<std::string>() ) {
464 if ( type == PropertyFilterValue::CONTAINS ||
465 type == PropertyFilterValue::ENDS_WITH ||
466 type == PropertyFilterValue::STARTS_WITH )
475 // sets comparison operator between filters if no rule string given
476 // default is to do an OR on each filter
477 template<typename FilterChecker>
478 inline void FilterEngine<FilterChecker>::setDefaultCompareType(const FilterCompareType::Type& type) {
479 // check for supported compare type
480 if ( type == FilterCompareType::AND || type == FilterCompareType::OR ) {
481 // if not the current compare type
482 if ( m_defaultCompareType != type ) {
483 m_defaultCompareType = type;
489 // sets property filter (value, type) for propertyName, on a particular filter set
490 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
491 template<class FilterChecker> template<typename T>
492 bool FilterEngine<FilterChecker>::setProperty(const std::string& filterName,
493 const std::string& propertyName,
495 const PropertyFilterValue::ValueCompareType& type)
497 // lookup filter by name, return false if not found
498 FilterMap::iterator filterIter = m_filters.find(filterName);
499 if ( filterIter == m_filters.end() ) return false;
501 // lookup property for filter, add new PropertyFilterValue if not found, modify if already exists
502 PropertyFilter& filter = (*filterIter).second;
503 PropertyMap::iterator propertyIter = filter.Properties.find(propertyName);
507 // property not found for this filter, create new entry
508 if ( propertyIter == filter.Properties.end() )
509 success = (filter.Properties.insert(std::make_pair(propertyName, PropertyFilterValue(value, type)))).second;
511 // property already exists, modify
513 PropertyFilterValue& filterValue = (*propertyIter).second;
514 filterValue.Value = value;
515 filterValue.Type = type;
519 // if error so far, return false
520 if ( !success ) return false;
522 // --------------------------------------------
523 // otherwise, set Property.IsEnabled to true
526 std::vector<Property>::iterator knownPropertyIter = std::find( m_properties.begin(), m_properties.end(), propertyName);
528 // if not found, create a new (enabled) entry (& re-sort list)
529 if ( knownPropertyIter == m_properties.end() ) {
530 m_properties.push_back( Property(propertyName, true) );
531 std::sort( m_properties.begin(), m_properties.end() );
534 // property already known, set as enabled
535 else (*knownPropertyIter).IsEnabled = true;
541 // sets user-specified rule string & signals update of rule-expression queue
542 template<typename FilterChecker>
543 inline void FilterEngine<FilterChecker>::setRule(const std::string& ruleString) {
544 if ( m_ruleString != ruleString) {
545 m_ruleString = ruleString;
550 } // namespace BamTools
552 #endif // BAMTOOLS_FILTER_ENGINE_H