1 // ***************************************************************************
2 // bamtools_filter_ruleparser.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 September 2010
7 // ---------------------------------------------------------------------------
8 // Provides a compound rule parser for FilterEngine.
9 // ***************************************************************************
11 #ifndef BAMTOOLS_FILTER_RULEPARSER_H
12 #define BAMTOOLS_FILTER_RULEPARSER_H
17 #include "bamtools_utilities.h"
21 // -------------------------------------------
24 const char LEFT_PARENTHESIS_CHAR = '(';
25 const char RIGHT_PARENTHESIS_CHAR = ')';
26 const char AND_OPERATOR_CHAR = '&';
27 const char OR_OPERATOR_CHAR = '|';
28 const char NOT_OPERATOR_CHAR = '!';
29 const char SPACE_CHAR = ' ';
31 // -------------------------------------------
32 // RuleToken implementation
37 enum RuleTokenType { OPERAND = 0
50 inline int priority(const RuleToken& token) {
51 switch ( token.Type ) {
52 case ( RuleToken::NOT_OPERATOR ) : return 3;
53 case ( RuleToken::AND_OPERATOR ) : return 2;
54 case ( RuleToken::OR_OPERATOR ) : return 1;
55 case ( RuleToken::LEFT_PARENTHESIS ) : return 0;
56 case ( RuleToken::RIGHT_PARENTHESIS ) : return 0;
57 default: BAMTOOLS_ASSERT_UNREACHABLE;
61 inline bool isRightAssociative(const RuleToken& token) {
62 return (token.Type == RuleToken::NOT_OPERATOR ||
63 token.Type == RuleToken::LEFT_PARENTHESIS);
66 inline bool isLeftAssociative(const RuleToken& token) {
67 return !isRightAssociative(token);
70 inline bool isLeftParenthesis(const RuleToken& token) {
71 return ( token.Type == RuleToken::LEFT_PARENTHESIS );
74 inline bool isRightParenthesis(const RuleToken& token) {
75 return ( token.Type == RuleToken::RIGHT_PARENTHESIS );
78 inline bool isOperand(const RuleToken& token) {
79 return ( token.Type == RuleToken::OPERAND );
82 inline bool isOperator(const RuleToken& token) {
83 return ( token.Type == RuleToken::AND_OPERATOR ||
84 token.Type == RuleToken::OR_OPERATOR ||
85 token.Type == RuleToken::NOT_OPERATOR);
88 // -------------------------------------------
89 // RuleParser implementation
95 RuleParser(const std::string& ruleString)
96 : m_ruleString(ruleString)
98 // initialize char markers
99 m_begin = (char*)m_ruleString.c_str();
100 m_end = m_begin + m_ruleString.length();
104 ~RuleParser(void) { }
109 std::queue<std::string> results(void) const { return m_ruleQueue; }
113 char getNextChar(void);
114 void ignoreQuotes(void);
115 bool readToken(RuleToken& token);
116 void skipSpaces(void);
120 std::string m_ruleString;
125 std::queue<std::string> m_ruleQueue;
126 std::stack<RuleToken> m_operatorStack;
130 char RuleParser::getNextChar(void) {
131 if ( m_current == m_end ) return 0;
136 void RuleParser::ignoreQuotes(void) {
137 if ( *m_begin == '\"' ) ++m_begin;
138 if ( *m_end == '\"' ) --m_end;
142 void RuleParser::parse(void) {
144 // clear out any prior data
145 while ( !m_ruleQueue.empty() )
148 // skip if no rule to parse
149 if ( m_ruleString.empty() ) return;
151 // start at beginning of ruleString
154 // iterate through tokens in rule string
156 while ( readToken(token) ) {
158 if ( token.Value.empty() ) break;
160 // if token is an operand
161 if ( isOperand(token) )
162 m_ruleQueue.push(token.Value);
164 // if token is an operator
165 else if ( isOperator(token) ) {
167 // pop any operators at top of stack with higher priority
168 while ( !m_operatorStack.empty() ) {
169 const RuleToken& opToken = m_operatorStack.top();
170 if ( (isLeftAssociative(token) && (priority(token) <= priority(opToken))) ||
171 (isRightAssociative(token) && (priority(token) < priority(opToken)))
174 m_ruleQueue.push(opToken.Value);
175 m_operatorStack.pop();
180 // push current operator token onto stack
181 m_operatorStack.push(token);
184 // if token is left parenthesis
185 else if ( isLeftParenthesis(token) )
186 m_operatorStack.push(token);
188 // if token is right parenthesis
189 else if ( isRightParenthesis(token) ) {
191 bool foundLeftParenthesis = false;
193 // push operators into rule queue until left parenthesis found
194 while ( !m_operatorStack.empty() && !foundLeftParenthesis ) {
195 const RuleToken& opToken = m_operatorStack.top();
196 if ( !isLeftParenthesis(opToken) )
197 m_ruleQueue.push(opToken.Value);
199 foundLeftParenthesis = true;
200 m_operatorStack.pop();
203 // no left parenthesis found, error
204 BAMTOOLS_ASSERT_MESSAGE( foundLeftParenthesis, "ERROR: Mismatched parenthesis in rule string.1");
207 // error: unknown operand
208 else BAMTOOLS_ASSERT_UNREACHABLE;
211 // while there are still operators on stack
212 while ( !m_operatorStack.empty() ) {
213 const RuleToken& token = m_operatorStack.top();
214 BAMTOOLS_ASSERT_MESSAGE( (!isLeftParenthesis(token) && !isRightParenthesis(token)), "ERROR: Mismatched parenthesis in rule string.2");
215 m_ruleQueue.push(token.Value);
216 m_operatorStack.pop();
221 bool RuleParser::readToken(RuleToken& token) {
223 // skip any preceding whitespace
225 if ( m_current == m_end ) return false;
227 // clear out prior token value
230 // read chars while still in token
232 bool keepReading = true;
233 bool inOperandString = false;
234 while ( keepReading && (c != 0) ) {
240 // current char is '('
241 case ( LEFT_PARENTHESIS_CHAR ) :
242 token.Type = RuleToken::LEFT_PARENTHESIS;
243 token.Value.append(1, LEFT_PARENTHESIS_CHAR);
247 // current char is ')'
248 case ( RIGHT_PARENTHESIS_CHAR ) :
249 if ( inOperandString )
252 token.Type = RuleToken::RIGHT_PARENTHESIS;
253 token.Value.append(1, RIGHT_PARENTHESIS_CHAR);
258 // current char is '&'
259 case ( AND_OPERATOR_CHAR ) :
260 if ( inOperandString )
263 token.Type = RuleToken::AND_OPERATOR;
264 token.Value.append(1, AND_OPERATOR_CHAR);
269 // current char is '|'
270 case ( OR_OPERATOR_CHAR ) :
271 if ( inOperandString )
274 token.Type = RuleToken::OR_OPERATOR;
275 token.Value.append(1, OR_OPERATOR_CHAR);
280 // current char is '!'
281 case ( NOT_OPERATOR_CHAR ) :
282 token.Type = RuleToken::NOT_OPERATOR;
283 token.Value.append(1, NOT_OPERATOR_CHAR);
287 // current char is ' '
288 case ( SPACE_CHAR ) :
292 // current char is a true value token
295 token.Type = RuleToken::OPERAND;
296 token.Value.append(1, c);
297 inOperandString = true;
307 void RuleParser::skipSpaces(void) {
308 while ( m_current != m_end ) {
309 const char c = *m_current;
310 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n')
316 } // namespace BamTools
318 #endif // BAMTOOLS_FILTER_RULEPARSER_H