1 // ***************************************************************************
2 // bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 3 May 2013
6 // ---------------------------------------------------------------------------
7 // Provides a generic filter engine based on filter-sets of properties,
8 // with possible "rules" (compound logical expressions) to create more complex
9 // queries on a data set.
11 // FilterEngine consists, most importantly, of :
13 // a list of possible properties (each tagged whether it has been 'enabled' as a filter)
14 // a map of filterName => propertySet
15 // queue for compound rule expression (i.e. "(filter1 AND filter2) OR !filter3" )
17 // Each propertySet is a list of properties enabled for this particular filter object
19 // Implemented as a map of propertyNames to propertyFilterValue
20 // ( "property1" => pfv1
21 // "property2" => pfv2
22 // "property4" => pfv4
25 // Any properties that are 'possible', via FilterEngine::addProperty(), but not enabled
26 // via FilterEngine::setProperty() (in our example, say "property3"), evaluate to true
27 // for any query. Meaning that if a property is not set on this filter, we don't care
28 // about it here, so it passes though OK.
30 // A propertyFilterValue contains a value and comparison type
32 // ( pfv1: Value = 50, Type = GREATER_THAN_EQUAL
33 // pfv2: Value = "foo", Type = STARTS_WITH
34 // pfv4: Value = "bar", Type = CONTAINS
37 // This allows for more complex queries (than simple isEqual?) against a variety of data types.
39 // ***************************************************************************
41 #ifndef BAMTOOLS_FILTER_ENGINE_H
42 #define BAMTOOLS_FILTER_ENGINE_H
44 #include "utils/utils_global.h"
45 #include "utils/bamtools_filter_properties.h"
46 #include "utils/bamtools_filter_ruleparser.h"
47 #include "utils/bamtools_utilities.h"
61 struct UTILS_EXPORT FilterCompareType {
68 // -----------------------------------------------------------
71 template <typename FilterChecker>
72 class UTILS_EXPORT FilterEngine {
78 , m_isRuleQueueGenerated(false)
79 , m_defaultCompareType(FilterCompareType::OR)
85 ~FilterEngine(void) { }
87 // 'filter set' methods
89 // creates a new filter set, returns true if created, false if error or already exists
90 bool addFilter(const std::string& filterName);
92 // return list of current filter names
93 const std::vector<std::string> filterNames(void);
98 // add a new known property (& type) to engine
99 bool addProperty(const std::string& propertyName);
101 // sets property filter (value, type) for propertyName, on a particular filter set
102 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
104 bool setProperty(const std::string& filterName,
105 const std::string& propertyName,
107 const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
109 // returns list of all properties known by FilterEngine ( any created using addProperty() )
110 const std::vector<std::string> allPropertyNames(void);
112 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
113 const std::vector<std::string> enabledPropertyNames(void);
118 // sets comparison operator between filters if no rule string given
119 // default is to do an OR on each filter
120 void setDefaultCompareType(const FilterCompareType::Type& type = FilterCompareType::OR);
122 // sets rule string for building expression queue
124 void setRule(const std::string& ruleString = "");
126 // token parsing (for property filter generation)
129 static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
133 // returns true if query passes all filters in FilterEngine
135 bool check(const T& query);
137 // internal rule-handling methods
139 void buildDefaultRuleString(void);
140 void buildRuleQueue(void);
142 bool evaluateFilterRules(const T& query);
149 // all known properties
150 std::vector<Property> m_properties;
152 // infix expression of filter-set comparison rules
153 std::string m_ruleString;
155 // postfix expression of tokens (filterNames) and operators (as strings)
156 // if this is empty, uses m_compareType to build default expression queue
157 std::queue<std::string> m_ruleQueue;
159 // flag to test if the rule expression queue has been generated
160 bool m_isRuleQueueGenerated;
162 // 'default' comparison operator between filters if no rule string given
163 // if this is changed, m_ruleString is used to build new m_ruleQueue
164 FilterCompareType::Type m_defaultCompareType;
166 // client-specified checking type ( provides method: bool check(PropertyFilter, T object) )
167 FilterChecker m_checker;
169 // token-parsing constants
170 static const int NOT_CHAR = (int)'!';
171 static const int EQUAL_CHAR = (int)'=';
172 static const int GREATER_THAN_CHAR = (int)'>';
173 static const int LESS_THAN_CHAR = (int)'<';
174 static const int WILDCARD_CHAR = (int)'*';
176 // filter evaluation constants
177 const std::string AND_OPERATOR;
178 const std::string OR_OPERATOR;
179 const std::string NOT_OPERATOR;
182 // creates a new filter set, returns true if created, false if error or already exists
183 template<typename FilterChecker>
184 inline bool FilterEngine<FilterChecker>::addFilter(const std::string& filterName) {
185 return (m_filters.insert(std::make_pair(filterName, PropertyFilter()))).second;
188 // add a new known property & type to engine
189 template<typename FilterChecker>
190 inline bool FilterEngine<FilterChecker>::addProperty(const std::string& propertyName) {
191 const std::vector<std::string> propertyNames = allPropertyNames();
192 bool found = std::binary_search( propertyNames.begin(), propertyNames.end(), propertyName );
193 if ( found ) return false;
194 m_properties.push_back( Property(propertyName) );
195 std::sort( m_properties.begin(), m_properties.end() );
199 // returns list of all properties known by FilterEngine
200 // ( any that were created using addProperty() )
201 template<typename FilterChecker>
202 inline const std::vector<std::string> FilterEngine<FilterChecker>::allPropertyNames(void) {
204 std::vector<std::string> names;
205 names.reserve(m_properties.size());
206 // iterate through all properties, appending to stringlist
207 std::vector<Property>::const_iterator propIter = m_properties.begin();
208 std::vector<Property>::const_iterator propEnd = m_properties.end();
209 for ( ; propIter != propEnd; ++propIter )
210 names.push_back( (*propIter).Name );
215 // builds a default rule string based on m_defaultCompareType
216 // used if user supplied an explicit rule string
217 template<typename FilterChecker>
218 inline void FilterEngine<FilterChecker>::buildDefaultRuleString(void) {
220 // set up temp string stream
221 std::stringstream ruleStream("");
223 // get first filterName
224 FilterMap::const_iterator mapIter = m_filters.begin();
225 ruleStream << (*mapIter).first;
227 // if there are more filters present
228 // iterate over remaining filters, appending compare operator and filter name
229 if ( m_filters.size() > 1 ) {
230 for ( ++mapIter ; mapIter != m_filters.end(); ++mapIter )
231 ruleStream << ( (m_defaultCompareType == FilterCompareType::AND) ? " & " : " | " )
235 // set m_ruleString from temp stream
236 m_ruleString = ruleStream.str();
239 // build expression queue based on ruleString
240 template<typename FilterChecker>
241 inline void FilterEngine<FilterChecker>::buildRuleQueue(void) {
243 // skip if no filters present
244 if ( m_filters.empty() ) return;
246 // clear out any prior expression queue data
247 while ( !m_ruleQueue.empty() )
250 // create a rule string, if not provided
251 if ( m_ruleString.empty() )
252 buildDefaultRuleString();
254 // initialize RuleParser, run, and retrieve results
255 RuleParser ruleParser(m_ruleString);
257 m_ruleQueue = ruleParser.results();
259 // set flag if rule queue contains any values
260 m_isRuleQueueGenerated = (!m_ruleQueue.empty());
263 // returns whether query value passes filter engine rules
264 template<class FilterChecker> template<typename T>
265 bool FilterEngine<FilterChecker>::check(const T& query) {
267 // return result of querying against filter rules
268 return evaluateFilterRules(query);
271 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
272 template<typename FilterChecker>
273 inline const std::vector<std::string> FilterEngine<FilterChecker>::enabledPropertyNames(void) {
274 // initialize stringlist
275 std::vector<std::string> names;
276 names.reserve(m_properties.size());
277 // iterate over all properties, appending if enabled
278 std::vector<Property>::const_iterator propIter = m_properties.begin();
279 std::vector<Property>::const_iterator propEnd = m_properties.end();
280 for ( ; propIter != propEnd; ++propIter )
281 if ( (*propIter).IsEnabled )
282 names.push_back( (*propIter).Name );
287 // evaluates postfix rule queue - with each filter as an operand, AND|OR|NOT as operators
288 template<class FilterChecker> template<typename T>
289 bool FilterEngine<FilterChecker>::evaluateFilterRules(const T& query) {
291 // build ruleQueue if not done before
292 if ( !m_isRuleQueueGenerated )
295 std::stack<bool> resultStack;
296 FilterMap::const_iterator filterIter;
297 std::queue<std::string> ruleQueueCopy = m_ruleQueue;
298 while ( !ruleQueueCopy.empty() ) {
299 const std::string& token = ruleQueueCopy.front();
301 // token is NOT_OPERATOR
302 if ( token == FilterEngine<FilterChecker>::NOT_OPERATOR ) {
303 BAMTOOLS_ASSERT_MESSAGE( !resultStack.empty(), "Empty result stack - cannot apply operator: !" );
304 resultStack.top() = !resultStack.top();
307 // token is AND_OPERATOR
308 else if ( token == FilterEngine<FilterChecker>::AND_OPERATOR ) {
309 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: &" );
310 bool topResult = resultStack.top();
312 resultStack.top() &= topResult;
315 // token is OR_OPERATOR
316 else if ( token == FilterEngine<FilterChecker>::OR_OPERATOR ) {
317 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() >= 2 , "Not enough operands - cannot apply operator: |" );
318 bool topResult = resultStack.top();
320 resultStack.top() |= topResult;
323 // token is an operand
325 // look up PropertyFilter that matches this token
326 filterIter = m_filters.find(token);
327 BAMTOOLS_ASSERT_MESSAGE( (filterIter != m_filters.end() ), "Filter mentioned in rule, not found in FilterEngine" );
328 const PropertyFilter& filter = (*filterIter).second;
329 bool result = m_checker.check(filter, query);
330 resultStack.push( result );
333 // pop token from ruleQueue
337 // return last result
338 BAMTOOLS_ASSERT_MESSAGE( resultStack.size() == 1, "Result stack should only have one value remaining - cannot return result" );
339 return resultStack.top();
342 // return list of current filter names
343 template<typename FilterChecker>
344 inline const std::vector<std::string> FilterEngine<FilterChecker>::filterNames(void) {
345 // initialize stringlist
346 std::vector<std::string> names;
347 names.reserve(m_filters.size());
348 // iterate over all filters, appending filter name
349 FilterMap::const_iterator mapIter = m_filters.begin();
350 FilterMap::const_iterator mapEnd = m_filters.end();
351 for ( ; mapIter != mapEnd; ++mapIter )
352 names.push_back( (*mapIter).first );
357 // parse a filterValue token string that may contain comparison qualifiers (">50", "*SRR", etc.)
358 template<class FilterChecker> template<typename T>
359 bool FilterEngine<FilterChecker>::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
361 // skip if token is empty
362 if ( token.empty() ) return false;
364 // will store token after special chars are removed
365 std::string strippedToken;
367 // if only single character
368 if ( token.length() == 1 ) {
369 strippedToken = token;
370 type = PropertyFilterValue::EXACT;
373 // more than one character, check for special chars
375 const int firstChar = (int)token.at(0);
376 switch ( firstChar ) {
378 case ( FilterEngine<FilterChecker>::NOT_CHAR ) :
379 strippedToken = token.substr(1);
380 type = PropertyFilterValue::NOT;
383 case ( FilterEngine<FilterChecker>::GREATER_THAN_CHAR ) :
385 // check for '>=' case
386 if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
387 if ( token.length() == 2 ) return false;
388 strippedToken = token.substr(2);
389 type = PropertyFilterValue::GREATER_THAN_EQUAL;
392 // otherwise only '>'
394 strippedToken = token.substr(1);
395 type = PropertyFilterValue::GREATER_THAN;
400 case ( FilterEngine<FilterChecker>::LESS_THAN_CHAR ) :
402 // check for '<=' case
403 if ( token.at(1) == FilterEngine<FilterChecker>::EQUAL_CHAR ) {
404 if ( token.length() == 2 ) return false;
405 strippedToken = token.substr(2);
406 type = PropertyFilterValue::LESS_THAN_EQUAL;
409 // otherwise only '<'
411 strippedToken = token.substr(1);
412 type = PropertyFilterValue::LESS_THAN;
417 case ( FilterEngine<FilterChecker>::WILDCARD_CHAR ) :
419 // check for *str* case (CONTAINS)
420 if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
421 if ( token.length() == 2 ) return false;
422 strippedToken = token.substr(1, token.length() - 2);
423 type = PropertyFilterValue::CONTAINS;
426 // otherwise *str case (ENDS_WITH)
428 strippedToken = token.substr(1);
429 type = PropertyFilterValue::ENDS_WITH;
435 // check for str* case (STARTS_WITH)
436 if ( token.at( token.length() - 1 ) == FilterEngine<FilterChecker>::WILDCARD_CHAR ) {
437 if ( token.length() == 2 ) return false;
438 strippedToken = token.substr(0, token.length() - 1);
439 type = PropertyFilterValue::STARTS_WITH;
444 strippedToken = token;
445 type = PropertyFilterValue::EXACT;
452 // convert stripped token to value
453 std::stringstream stream(strippedToken);
454 if ( strippedToken == "true" || strippedToken == "false" )
455 stream >> std::boolalpha >> value;
459 // check for valid CompareType on type T
460 Variant variantCheck = value;
462 // if T is not string AND CompareType is for string values, return false
463 if ( !variantCheck.is_type<std::string>() ) {
464 if ( type == PropertyFilterValue::CONTAINS ||
465 type == PropertyFilterValue::ENDS_WITH ||
466 type == PropertyFilterValue::STARTS_WITH )
475 // sets comparison operator between filters if no rule string given
476 // default is to do an OR on each filter
477 template<typename FilterChecker>
478 inline void FilterEngine<FilterChecker>::setDefaultCompareType(const FilterCompareType::Type& type) {
479 // check for supported compare type
480 if ( type == FilterCompareType::AND || type == FilterCompareType::OR ) {
481 // if not the current compare type
482 if ( m_defaultCompareType != type ) {
483 m_defaultCompareType = type;
489 // sets property filter (value, type) for propertyName, on a particular filter set
490 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
491 template<class FilterChecker> template<typename T>
492 bool FilterEngine<FilterChecker>::setProperty(const std::string& filterName,
493 const std::string& propertyName,
495 const PropertyFilterValue::ValueCompareType& type)
497 // lookup filter by name, return false if not found
498 FilterMap::iterator filterIter = m_filters.find(filterName);
499 if ( filterIter == m_filters.end() ) return false;
501 // lookup property for filter, add new PropertyFilterValue if not found, modify if already exists
502 PropertyFilter& filter = (*filterIter).second;
503 PropertyMap::iterator propertyIter = filter.Properties.find(propertyName);
507 // property not found for this filter, create new entry
508 if ( propertyIter == filter.Properties.end() )
509 success = (filter.Properties.insert(std::make_pair(propertyName, PropertyFilterValue(value, type)))).second;
511 // property already exists, modify
513 PropertyFilterValue& filterValue = (*propertyIter).second;
514 filterValue.Value = value;
515 filterValue.Type = type;
519 // if error so far, return false
520 if ( !success ) return false;
522 // --------------------------------------------
523 // otherwise, set Property.IsEnabled to true
526 std::vector<Property>::iterator knownPropertyIter = std::find( m_properties.begin(), m_properties.end(), propertyName);
528 // if not found, create a new (enabled) entry (& re-sort list)
529 if ( knownPropertyIter == m_properties.end() ) {
530 m_properties.push_back( Property(propertyName, true) );
531 std::sort( m_properties.begin(), m_properties.end() );
534 // property already known, set as enabled
535 else (*knownPropertyIter).IsEnabled = true;
541 // sets user-specified rule string & signals update of rule-expression queue
542 template<typename FilterChecker>
543 inline void FilterEngine<FilterChecker>::setRule(const std::string& ruleString) {
544 if ( m_ruleString != ruleString) {
545 m_ruleString = ruleString;
550 } // namespace BamTools
552 #endif // BAMTOOLS_FILTER_ENGINE_H