1 // ***************************************************************************
2 // bamtools_filter_ruleparser.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 19 November 2010
7 // ---------------------------------------------------------------------------
8 // Provides a compound rule parser for FilterEngine.
9 // ***************************************************************************
11 #ifndef BAMTOOLS_FILTER_RULEPARSER_H
12 #define BAMTOOLS_FILTER_RULEPARSER_H
14 #include <utils/bamtools_utilities.h>
21 // -------------------------------------------
24 const char LEFT_PARENTHESIS_CHAR = '(';
25 const char RIGHT_PARENTHESIS_CHAR = ')';
26 const char AND_OPERATOR_CHAR = '&';
27 const char OR_OPERATOR_CHAR = '|';
28 const char NOT_OPERATOR_CHAR = '!';
29 const char SPACE_CHAR = ' ';
31 // -------------------------------------------
32 // RuleToken implementation
37 enum RuleTokenType { OPERAND = 0
50 inline int priority(const RuleToken& token) {
51 switch ( token.Type ) {
52 case ( RuleToken::NOT_OPERATOR ) : return 3;
53 case ( RuleToken::AND_OPERATOR ) : return 2;
54 case ( RuleToken::OR_OPERATOR ) : return 1;
55 case ( RuleToken::LEFT_PARENTHESIS ) : return 0;
56 case ( RuleToken::RIGHT_PARENTHESIS ) : return 0;
58 BAMTOOLS_ASSERT_UNREACHABLE;
63 inline bool isRightAssociative(const RuleToken& token) {
64 return (token.Type == RuleToken::NOT_OPERATOR ||
65 token.Type == RuleToken::LEFT_PARENTHESIS);
68 inline bool isLeftAssociative(const RuleToken& token) {
69 return !isRightAssociative(token);
72 inline bool isLeftParenthesis(const RuleToken& token) {
73 return ( token.Type == RuleToken::LEFT_PARENTHESIS );
76 inline bool isRightParenthesis(const RuleToken& token) {
77 return ( token.Type == RuleToken::RIGHT_PARENTHESIS );
80 inline bool isOperand(const RuleToken& token) {
81 return ( token.Type == RuleToken::OPERAND );
84 inline bool isOperator(const RuleToken& token) {
85 return ( token.Type == RuleToken::AND_OPERATOR ||
86 token.Type == RuleToken::OR_OPERATOR ||
87 token.Type == RuleToken::NOT_OPERATOR);
90 // -------------------------------------------
91 // RuleParser implementation
97 RuleParser(const std::string& ruleString)
98 : m_ruleString(ruleString)
100 // initialize char markers
101 m_begin = (char*)m_ruleString.c_str();
102 m_end = m_begin + m_ruleString.length();
106 ~RuleParser(void) { }
111 std::queue<std::string> results(void) const { return m_ruleQueue; }
115 char getNextChar(void);
116 void ignoreQuotes(void);
117 bool readToken(RuleToken& token);
118 void skipSpaces(void);
122 std::string m_ruleString;
127 std::queue<std::string> m_ruleQueue;
128 std::stack<RuleToken> m_operatorStack;
132 char RuleParser::getNextChar(void) {
133 if ( m_current == m_end ) return 0;
138 void RuleParser::ignoreQuotes(void) {
139 if ( *m_begin == '\"' ) ++m_begin;
140 if ( *m_end == '\"' ) --m_end;
144 void RuleParser::parse(void) {
146 // clear out any prior data
147 while ( !m_ruleQueue.empty() )
150 // skip if no rule to parse
151 if ( m_ruleString.empty() ) return;
153 // start at beginning of ruleString
156 // iterate through tokens in rule string
158 while ( readToken(token) ) {
160 if ( token.Value.empty() ) break;
162 // if token is an operand
163 if ( isOperand(token) )
164 m_ruleQueue.push(token.Value);
166 // if token is an operator
167 else if ( isOperator(token) ) {
169 // pop any operators at top of stack with higher priority
170 while ( !m_operatorStack.empty() ) {
171 const RuleToken& opToken = m_operatorStack.top();
172 if ( (isLeftAssociative(token) && (priority(token) <= priority(opToken))) ||
173 (isRightAssociative(token) && (priority(token) < priority(opToken)))
176 m_ruleQueue.push(opToken.Value);
177 m_operatorStack.pop();
182 // push current operator token onto stack
183 m_operatorStack.push(token);
186 // if token is left parenthesis
187 else if ( isLeftParenthesis(token) )
188 m_operatorStack.push(token);
190 // if token is right parenthesis
191 else if ( isRightParenthesis(token) ) {
193 bool foundLeftParenthesis = false;
195 // push operators into rule queue until left parenthesis found
196 while ( !m_operatorStack.empty() && !foundLeftParenthesis ) {
197 const RuleToken& opToken = m_operatorStack.top();
198 if ( !isLeftParenthesis(opToken) )
199 m_ruleQueue.push(opToken.Value);
201 foundLeftParenthesis = true;
202 m_operatorStack.pop();
205 // no left parenthesis found, error
206 BAMTOOLS_ASSERT_MESSAGE( foundLeftParenthesis, "ERROR: Mismatched parenthesis in rule string.1");
209 // error: unknown operand
210 else BAMTOOLS_ASSERT_UNREACHABLE;
213 // while there are still operators on stack
214 while ( !m_operatorStack.empty() ) {
215 const RuleToken& token = m_operatorStack.top();
216 BAMTOOLS_ASSERT_MESSAGE( (!isLeftParenthesis(token) && !isRightParenthesis(token)), "ERROR: Mismatched parenthesis in rule string.2");
217 m_ruleQueue.push(token.Value);
218 m_operatorStack.pop();
223 bool RuleParser::readToken(RuleToken& token) {
225 // skip any preceding whitespace
227 if ( m_current == m_end ) return false;
229 // clear out prior token value
232 // read chars while still in token
234 bool keepReading = true;
235 bool inOperandString = false;
236 while ( keepReading && (c != 0) ) {
242 // current char is '('
243 case ( LEFT_PARENTHESIS_CHAR ) :
244 token.Type = RuleToken::LEFT_PARENTHESIS;
245 token.Value.append(1, LEFT_PARENTHESIS_CHAR);
249 // current char is ')'
250 case ( RIGHT_PARENTHESIS_CHAR ) :
251 if ( inOperandString )
254 token.Type = RuleToken::RIGHT_PARENTHESIS;
255 token.Value.append(1, RIGHT_PARENTHESIS_CHAR);
260 // current char is '&'
261 case ( AND_OPERATOR_CHAR ) :
262 if ( inOperandString )
265 token.Type = RuleToken::AND_OPERATOR;
266 token.Value.append(1, AND_OPERATOR_CHAR);
271 // current char is '|'
272 case ( OR_OPERATOR_CHAR ) :
273 if ( inOperandString )
276 token.Type = RuleToken::OR_OPERATOR;
277 token.Value.append(1, OR_OPERATOR_CHAR);
282 // current char is '!'
283 case ( NOT_OPERATOR_CHAR ) :
284 token.Type = RuleToken::NOT_OPERATOR;
285 token.Value.append(1, NOT_OPERATOR_CHAR);
289 // current char is ' '
290 case ( SPACE_CHAR ) :
294 // current char is a true value token
297 token.Type = RuleToken::OPERAND;
298 token.Value.append(1, c);
299 inOperandString = true;
309 void RuleParser::skipSpaces(void) {
310 while ( m_current != m_end ) {
311 const char c = *m_current;
312 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n')
318 } // namespace BamTools
320 #endif // BAMTOOLS_FILTER_RULEPARSER_H