1 // ***************************************************************************
2 // bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 30 August 2010
7 // ---------------------------------------------------------------------------
9 // ***************************************************************************
11 #ifndef BAMTOOLS_FILTER_ENGINE_H
12 #define BAMTOOLS_FILTER_ENGINE_H
21 #include "bamtools_utilities.h"
22 #include "bamtools_variant.h"
26 struct PropertyFilterValue {
28 // define valid ValueCompareTypes
29 enum ValueCompareType { CONTAINS = 0
41 PropertyFilterValue(const Variant& value = Variant(),
42 const ValueCompareType& type = PropertyFilterValue::EXACT)
47 // filter check methods
49 bool check(const T& query) const;
50 bool check(const std::string& query) const;
54 ValueCompareType Type;
58 const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
61 case ( PropertyFilterValue::CONTAINS ) : return std::string( "CONTAINS");
62 case ( PropertyFilterValue::ENDS_WITH ) : return std::string( "ENDS_WITH");
63 case ( PropertyFilterValue::EXACT ) : return std::string( "EXACT");
64 case ( PropertyFilterValue::GREATER_THAN ) : return std::string( "GREATER_THAN");
65 case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
66 case ( PropertyFilterValue::LESS_THAN ) : return std::string( "LESS_THAN");
67 case ( PropertyFilterValue::LESS_THAN_EQUAL ) : return std::string( "LESS_THAN_EQUAL");
68 case ( PropertyFilterValue::NOT ) : return std::string( "NOT");
69 case ( PropertyFilterValue::STARTS_WITH ) : return std::string( "STARTS_WITH");
70 default : BAMTOOLS_ASSERT_UNREACHABLE;
75 // property name => property filter value
76 // ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
77 typedef std::map<std::string, PropertyFilterValue> PropertyMap;
79 struct PropertyFilter {
81 enum FilterCompareType { AND = 0
88 PropertyMap Properties;
89 FilterCompareType Type;
90 PropertyFilter* LeftChild;
91 PropertyFilter* RightChild;
95 ~PropertyFilter(void);
97 // filter check methods
99 bool check(const std::string& propertyName, const T& query) const;
102 // filter name => properties
103 // ('filter1' => properties1, 'filter2' => properties2, etc...)
104 typedef std::map<std::string, PropertyFilter> FilterMap;
106 // used to store properties known to engine & keep track of enabled state
110 Property(const std::string& name, bool isEnabled = false)
112 , IsEnabled(isEnabled)
116 inline bool operator< (const Property& lhs, const Property& rhs) { return lhs.Name < rhs.Name; }
117 inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
121 // 'filter set' methods
123 // creates a new filter set, returns true if created, false if error or already exists
124 static bool addFilter(const std::string& filterName);
126 // return list of current filter names
127 static const std::vector<std::string> filterNames(void);
129 // 'property' methods
132 // add a new known property (& type) to engine
133 static bool addProperty(const std::string& propertyName);
135 // sets property filter (value, type) for propertyName, on a particular filter set
136 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
138 static bool setProperty(const std::string& filterName,
139 const std::string& propertyName,
141 const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
143 // returns list of all properties known by FilterEngine ( any created using addProperty() )
144 static const std::vector<std::string> allPropertyNames(void);
146 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
147 static const std::vector<std::string> enabledPropertyNames(void);
149 // token parsing (for property filter generation)
152 static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
156 // returns true if query passes all filters on 'propertyName'
158 static bool check(const std::string& propertyName, const T& query);
162 static void print(void);
163 static void printAllProperties(void);
164 static void printEnabledProperties(void);
165 static void printFilters(void);
170 static FilterMap m_filters;
172 // all known properties
173 static std::vector<Property> m_properties;
175 // token-parsing constants
176 static const int NOT_CHAR = (int)'!';
177 static const int EQUAL_CHAR = (int)'=';
178 static const int GREATER_THAN_CHAR = (int)'>';
179 static const int LESS_THAN_CHAR = (int)'<';
180 static const int WILDCARD_CHAR = (int)'*';
183 // -------------------------------------------------------------------
186 // checks a query against a filter (value, compare type)
188 bool PropertyFilterValue::check(const T& query) const {
190 // ensure filter value & query are same type
191 if ( !Value.is_type<T>() ) {
192 std::cerr << "Cannot compare different types!" << std::endl;
197 if ( Value.is_type<std::string>() ) {
198 std::cerr << "Cannot compare different types - query is a string!" << std::endl;
202 // numeric matching based on our filter type
204 case ( PropertyFilterValue::EXACT) : return ( query == Value.get<T>() );
205 case ( PropertyFilterValue::GREATER_THAN) : return ( query > Value.get<T>() );
206 case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() );
207 case ( PropertyFilterValue::LESS_THAN) : return ( query < Value.get<T>() );
208 case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= Value.get<T>() );
209 case ( PropertyFilterValue::NOT) : return ( query != Value.get<T>() );
210 default : BAMTOOLS_ASSERT_UNREACHABLE;
216 bool PropertyFilter::check(const std::string& propertyName, const T& query) const {
218 // if this filter is a 'leaf' filter
219 if ( (LeftChild == 0 ) && ( RightChild == 0 ) ) {
221 // if propertyName found for this filter,
222 PropertyMap::const_iterator propIter = Properties.find(propertyName);
223 if ( propIter != Properties.end() ) {
224 const PropertyFilterValue& filterValue = (*propIter).second;
228 case ( PropertyFilter::EXACT ) : return filterValue.check(query);
229 case ( PropertyFilter::NOT ) : return !filterValue.check(query);
230 case ( PropertyFilter::AND ) :
231 case ( PropertyFilter::OR ) : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a binary compare operator on 1 value");
232 default : BAMTOOLS_ASSERT_UNREACHABLE;
234 return false; // unreachable
237 // property unknown to this filter
241 // if this filter is a parent filter (contains valid left & right children)
242 else if ( LeftChild && RightChild ) {
244 // return result of children using this filter's compare type (AND, OR)
246 case ( PropertyFilter::AND ) : return LeftChild->check(propertyName, query) && RightChild->check(propertyName, query);
247 case ( PropertyFilter::OR ) : return LeftChild->check(propertyName, query) || RightChild->check(propertyName, query);
248 case ( PropertyFilter::EXACT) :
249 case ( PropertyFilter::NOT) : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a unary compare operator on 2 values");
250 default : BAMTOOLS_ASSERT_UNREACHABLE;
252 return false; // unreachable
255 // otherwise filter contains one child... invalid state
257 BAMTOOLS_ASSERT_MESSAGE(false, "PropertyFilter needs both children to do a binary compare");
263 bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
265 // skip if token is empty
266 if ( token.empty() ) return false;
268 // will store token after special chars are removed
269 std::string strippedToken;
271 // if only single character
272 if ( token.length() == 1 ) {
273 strippedToken = token;
274 type = PropertyFilterValue::EXACT;
277 // more than one character, check for special chars
279 const int firstChar = (int)token.at(0);
281 switch ( (int)firstChar ) {
283 case ( (int)FilterEngine::NOT_CHAR ) :
285 strippedToken = token.substr(1);
286 type = PropertyFilterValue::NOT;
290 case ( (int)FilterEngine::GREATER_THAN_CHAR ) :
292 // check for '>=' case
293 if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
294 if ( token.length() == 2 ) return false;
295 strippedToken = token.substr(2);
296 type = PropertyFilterValue::GREATER_THAN_EQUAL;
299 // otherwise only '>'
301 strippedToken = token.substr(1);
302 type = PropertyFilterValue::GREATER_THAN;
307 case ( (int)FilterEngine::LESS_THAN_CHAR ) :
309 // check for '<=' case
310 if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
311 if ( token.length() == 2 ) return false;
312 strippedToken = token.substr(2);
313 type = PropertyFilterValue::LESS_THAN_EQUAL;
316 // otherwise only '<'
318 strippedToken = token.substr(1);
319 type = PropertyFilterValue::LESS_THAN;
324 case ( (int)FilterEngine::WILDCARD_CHAR ) :
326 // check for *str* case (CONTAINS)
327 if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
328 if ( token.length() == 2 ) return false;
329 strippedToken = token.substr(1, token.length() - 2);
330 type = PropertyFilterValue::CONTAINS;
333 // otherwise *str case (ENDS_WITH)
335 strippedToken = token.substr(1);
336 type = PropertyFilterValue::ENDS_WITH;
344 // check for str* case (STARTS_WITH)
345 if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
346 if ( token.length() == 2 ) return false;
347 strippedToken = token.substr(0, token.length() - 1);
348 type = PropertyFilterValue::STARTS_WITH;
353 strippedToken = token;
354 type = PropertyFilterValue::EXACT;
361 // convert stripped token to value
362 std::stringstream stream(strippedToken);
363 if ( strippedToken == "true" || strippedToken == "false" )
364 stream >> std::boolalpha >> value;
368 // check for valid CompareType on type T
369 Variant variantCheck = value;
371 // if T is not string AND CompareType is for string values, return false
372 if ( !variantCheck.is_type<std::string>() ) {
373 if ( type == PropertyFilterValue::CONTAINS ||
374 type == PropertyFilterValue::ENDS_WITH ||
375 type == PropertyFilterValue::STARTS_WITH )
384 // sets property filter (value, type) for propertyName, on a particular filter set
385 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
387 bool FilterEngine::setProperty(const std::string& filterName,
388 const std::string& propertyName,
390 const PropertyFilterValue::ValueCompareType& type)
392 // lookup filter by name, return false if not found
393 FilterMap::iterator filterIter = m_filters.find(filterName);
394 if ( filterIter == m_filters.end() ) return false;
396 // lookup property for filter, add new PropertyFilterValue if not found, modify if already exists
397 PropertyFilter& filter = (*filterIter).second;
398 PropertyMap::iterator propertyIter = filter.Properties.find(propertyName);
402 // property not found for this filter, create new entry
403 if ( propertyIter == filter.Properties.end() )
404 success = (filter.Properties.insert(std::make_pair(propertyName, PropertyFilterValue(value, type)))).second;
406 // property already exists, modify
408 PropertyFilterValue& filterValue = (*propertyIter).second;
409 filterValue.Value = value;
410 filterValue.Type = type;
414 // if error so far, return false
415 if ( !success ) return false;
417 // --------------------------------------------
418 // otherwise, set Property.IsEnabled to true
421 std::vector<Property>::iterator knownPropertyIter = std::find( m_properties.begin(), m_properties.end(), propertyName);
423 // if not found, create a new (enabled) entry (& re-sort list)
424 if ( knownPropertyIter == m_properties.end() ) {
425 m_properties.push_back( Property(propertyName, true) );
426 std::sort( m_properties.begin(), m_properties.end() );
429 // property already known, set as enabled
431 (*knownPropertyIter).IsEnabled = true;
437 // returns false if query does not pass any filters on 'propertyName'
438 // returns true if property unknown (i.e. nothing has been set for this property... so query is considered to pass filter)
440 bool FilterEngine::check(const std::string& propertyName, const T& query) {
442 // check enabled properties list
443 // return true if no properties enabled at all OR if property is unknown to FilterEngine
444 const std::vector<std::string> enabledProperties = enabledPropertyNames();
445 if ( enabledProperties.empty() ) return true;
446 const bool found = std::binary_search( enabledProperties.begin(), enabledProperties.end(), propertyName );
447 if ( !found ) return true;
449 // iterate over all filters in FilterEngine
450 FilterMap::const_iterator filterIter = m_filters.begin();
451 FilterMap::const_iterator filterEnd = m_filters.end();
452 for ( ; filterIter != filterEnd; ++filterIter ) {
454 // check query against this filter
455 const PropertyFilter& filter = (*filterIter).second;
456 if ( !filter.check(propertyName, query) ) return false;
459 // // see if filter set has this property enabled
460 // const PropertyFilter& filter = (*filterIter).second;
461 // PropertyMap::const_iterator propIter = filter.Properties.find(propertyName);
462 // if ( propIter != filter.Properties.end() ) {
464 // // if so, check query against filter, return false if check fails
465 // const PropertyFilterValue& propertyFilter = (*propIter).second;
466 // if ( !propertyFilter.check(query) ) return false;
470 // query passes all filters with property enabled
474 } // namespace BamTools
476 #endif // BAMTOOLS_FILTER_ENGINE_H