1 // ***************************************************************************
2 // bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 30 August 2010
7 // ---------------------------------------------------------------------------
9 // ***************************************************************************
11 #ifndef BAMTOOLS_FILTER_ENGINE_H
12 #define BAMTOOLS_FILTER_ENGINE_H
20 #include "bamtools_utilities.h"
21 #include "bamtools_variant.h"
25 struct PropertyFilterValue {
27 // define valid ValueCompareTypes
28 enum ValueCompareType { CONTAINS = 0
40 PropertyFilterValue(const Variant& value = Variant(),
41 const ValueCompareType& type = PropertyFilterValue::EXACT)
46 // filter check methods
48 bool check(const T& query) const;
49 bool check(const std::string& query) const;
53 ValueCompareType Type;
57 const std::string toString(const PropertyFilterValue::ValueCompareType& type) {
60 case ( PropertyFilterValue::CONTAINS ) : return std::string( "CONTAINS");
61 case ( PropertyFilterValue::ENDS_WITH ) : return std::string( "ENDS_WITH");
62 case ( PropertyFilterValue::EXACT ) : return std::string( "EXACT");
63 case ( PropertyFilterValue::GREATER_THAN ) : return std::string( "GREATER_THAN");
64 case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL");
65 case ( PropertyFilterValue::LESS_THAN ) : return std::string( "LESS_THAN");
66 case ( PropertyFilterValue::LESS_THAN_EQUAL ) : return std::string( "LESS_THAN_EQUAL");
67 case ( PropertyFilterValue::NOT ) : return std::string( "NOT");
68 case ( PropertyFilterValue::STARTS_WITH ) : return std::string( "STARTS_WITH");
69 default : BAMTOOLS_ASSERT_UNREACHABLE;
74 // property name => property filter value
75 // ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...)
76 typedef std::map<std::string, PropertyFilterValue> PropertyMap;
78 struct PropertyFilter {
80 // will be used more later
81 // if we implement a compound 'rules' system - i.e. "(filter1 AND filter2) OR filter 3"
82 enum FilterCompareType { AND = 0
89 PropertyMap Properties;
90 FilterCompareType Type;
93 PropertyFilter(void) : Type( PropertyFilter::EXACT ) { }
95 // filter check methods
97 bool check(const std::string& propertyName, const T& query) const;
100 // filter name => properties
101 // ('filter1' => properties1, 'filter2' => properties2, etc...)
102 typedef std::map<std::string, PropertyFilter> FilterMap;
104 // used to store properties known to engine & keep track of enabled state
108 Property(const std::string& name, bool isEnabled = false)
110 , IsEnabled(isEnabled)
114 inline bool operator< (const Property& lhs, const Property& rhs) { return lhs.Name < rhs.Name; }
115 inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; }
119 // 'filter set' methods
121 // creates a new filter set, returns true if created, false if error or already exists
122 static bool addFilter(const std::string& filterName);
124 // return list of current filter names
125 static const std::vector<std::string> filterNames(void);
127 // 'property' methods
130 // add a new known property (& type) to engine
131 static bool addProperty(const std::string& propertyName);
133 // sets property filter (value, type) for propertyName, on a particular filter set
134 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
136 static bool setProperty(const std::string& filterName,
137 const std::string& propertyName,
139 const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT);
141 // returns list of all properties known by FilterEngine ( any created using addProperty() )
142 static const std::vector<std::string> allPropertyNames(void);
144 // returns list of property names that are 'enabled' ( only those touched by setProperty() )
145 static const std::vector<std::string> enabledPropertyNames(void);
147 // token parsing (for property filter generation)
150 static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type);
154 // returns true if query passes all filters on 'propertyName'
156 static bool check(const std::string& propertyName, const T& query);
161 static FilterMap m_filters;
163 // all known properties
164 static std::vector<Property> m_properties;
166 // token-parsing constants
167 static const int NOT_CHAR = (int)'!';
168 static const int EQUAL_CHAR = (int)'=';
169 static const int GREATER_THAN_CHAR = (int)'>';
170 static const int LESS_THAN_CHAR = (int)'<';
171 static const int WILDCARD_CHAR = (int)'*';
174 // -------------------------------------------------------------------
177 // checks a query against a filter (value, compare type)
179 bool PropertyFilterValue::check(const T& query) const {
181 // ensure filter value & query are same type
182 if ( !Value.is_type<T>() ) {
183 std::cerr << "Cannot compare different types!" << std::endl;
188 if ( Value.is_type<std::string>() ) {
189 std::cerr << "Cannot compare different types - query is a string!" << std::endl;
193 // numeric matching based on our filter type
195 case ( PropertyFilterValue::EXACT) : return ( query == Value.get<T>() );
196 case ( PropertyFilterValue::GREATER_THAN) : return ( query > Value.get<T>() );
197 case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get<T>() );
198 case ( PropertyFilterValue::LESS_THAN) : return ( query < Value.get<T>() );
199 case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= Value.get<T>() );
200 case ( PropertyFilterValue::NOT) : return ( query != Value.get<T>() );
201 default : BAMTOOLS_ASSERT_UNREACHABLE;
207 bool PropertyFilter::check(const std::string& propertyName, const T& query) const {
209 // if propertyName found for this filter,
210 PropertyMap::const_iterator propIter = Properties.find(propertyName);
211 if ( propIter != Properties.end() ) {
212 const PropertyFilterValue& filterValue = (*propIter).second;
216 case ( PropertyFilter::EXACT ) : return filterValue.check(query);
217 case ( PropertyFilter::NOT ) : return !filterValue.check(query);
218 case ( PropertyFilter::AND ) :
219 case ( PropertyFilter::OR ) : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a binary compare operator on 1 value");
220 default : BAMTOOLS_ASSERT_UNREACHABLE;
222 return false; // unreachable
225 // property unknown to this filter
230 bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) {
232 // skip if token is empty
233 if ( token.empty() ) return false;
235 // will store token after special chars are removed
236 std::string strippedToken;
238 // if only single character
239 if ( token.length() == 1 ) {
240 strippedToken = token;
241 type = PropertyFilterValue::EXACT;
244 // more than one character, check for special chars
246 const int firstChar = (int)token.at(0);
248 switch ( (int)firstChar ) {
250 case ( (int)FilterEngine::NOT_CHAR ) :
252 strippedToken = token.substr(1);
253 type = PropertyFilterValue::NOT;
257 case ( (int)FilterEngine::GREATER_THAN_CHAR ) :
259 // check for '>=' case
260 if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
261 if ( token.length() == 2 ) return false;
262 strippedToken = token.substr(2);
263 type = PropertyFilterValue::GREATER_THAN_EQUAL;
266 // otherwise only '>'
268 strippedToken = token.substr(1);
269 type = PropertyFilterValue::GREATER_THAN;
274 case ( (int)FilterEngine::LESS_THAN_CHAR ) :
276 // check for '<=' case
277 if ( token.at(1) == FilterEngine::EQUAL_CHAR ) {
278 if ( token.length() == 2 ) return false;
279 strippedToken = token.substr(2);
280 type = PropertyFilterValue::LESS_THAN_EQUAL;
283 // otherwise only '<'
285 strippedToken = token.substr(1);
286 type = PropertyFilterValue::LESS_THAN;
291 case ( (int)FilterEngine::WILDCARD_CHAR ) :
293 // check for *str* case (CONTAINS)
294 if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
295 if ( token.length() == 2 ) return false;
296 strippedToken = token.substr(1, token.length() - 2);
297 type = PropertyFilterValue::CONTAINS;
300 // otherwise *str case (ENDS_WITH)
302 strippedToken = token.substr(1);
303 type = PropertyFilterValue::ENDS_WITH;
311 // check for str* case (STARTS_WITH)
312 if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) {
313 if ( token.length() == 2 ) return false;
314 strippedToken = token.substr(0, token.length() - 1);
315 type = PropertyFilterValue::STARTS_WITH;
320 strippedToken = token;
321 type = PropertyFilterValue::EXACT;
328 // convert stripped token to value
329 std::stringstream stream(strippedToken);
330 if ( strippedToken == "true" || strippedToken == "false" )
331 stream >> std::boolalpha >> value;
335 // check for valid CompareType on type T
336 Variant variantCheck = value;
338 // if T is not string AND CompareType is for string values, return false
339 if ( !variantCheck.is_type<std::string>() ) {
340 if ( type == PropertyFilterValue::CONTAINS ||
341 type == PropertyFilterValue::ENDS_WITH ||
342 type == PropertyFilterValue::STARTS_WITH )
351 // sets property filter (value, type) for propertyName, on a particular filter set
352 // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL)
354 bool FilterEngine::setProperty(const std::string& filterName,
355 const std::string& propertyName,
357 const PropertyFilterValue::ValueCompareType& type)
359 // lookup filter by name, return false if not found
360 FilterMap::iterator filterIter = m_filters.find(filterName);
361 if ( filterIter == m_filters.end() ) return false;
363 // lookup property for filter, add new PropertyFilterValue if not found, modify if already exists
364 PropertyFilter& filter = (*filterIter).second;
365 PropertyMap::iterator propertyIter = filter.Properties.find(propertyName);
369 // property not found for this filter, create new entry
370 if ( propertyIter == filter.Properties.end() )
371 success = (filter.Properties.insert(std::make_pair(propertyName, PropertyFilterValue(value, type)))).second;
373 // property already exists, modify
375 PropertyFilterValue& filterValue = (*propertyIter).second;
376 filterValue.Value = value;
377 filterValue.Type = type;
381 // if error so far, return false
382 if ( !success ) return false;
384 // --------------------------------------------
385 // otherwise, set Property.IsEnabled to true
388 std::vector<Property>::iterator knownPropertyIter = std::find( m_properties.begin(), m_properties.end(), propertyName);
390 // if not found, create a new (enabled) entry (& re-sort list)
391 if ( knownPropertyIter == m_properties.end() ) {
392 m_properties.push_back( Property(propertyName, true) );
393 std::sort( m_properties.begin(), m_properties.end() );
396 // property already known, set as enabled
398 (*knownPropertyIter).IsEnabled = true;
404 // returns false if query does not pass any filters on 'propertyName'
405 // returns true if property unknown (i.e. nothing has been set for this property... so query is considered to pass filter)
407 bool FilterEngine::check(const std::string& propertyName, const T& query) {
409 // check enabled properties list
410 // return true if no properties enabled at all OR if property is unknown to FilterEngine
411 const std::vector<std::string> enabledProperties = enabledPropertyNames();
412 if ( enabledProperties.empty() ) return true;
413 const bool found = std::binary_search( enabledProperties.begin(), enabledProperties.end(), propertyName );
414 if ( !found ) return true;
416 // iterate over all filters in FilterEngine
417 FilterMap::const_iterator filterIter = m_filters.begin();
418 FilterMap::const_iterator filterEnd = m_filters.end();
419 for ( ; filterIter != filterEnd; ++filterIter ) {
421 // check query against this filter
422 const PropertyFilter& filter = (*filterIter).second;
423 if ( filter.check(propertyName, query) ) return true;
426 // query passes none of the filters with current property enabled
430 } // namespace BamTools
432 #endif // BAMTOOLS_FILTER_ENGINE_H