1 // ***************************************************************************
2 // bamtools_filter_ruleparser.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011
6 // ---------------------------------------------------------------------------
7 // Provides a compound rule parser for FilterEngine.
8 // ***************************************************************************
10 #ifndef BAMTOOLS_FILTER_RULEPARSER_H
11 #define BAMTOOLS_FILTER_RULEPARSER_H
13 #include "utils/bamtools_utilities.h"
20 // -------------------------------------------
23 const char LEFT_PARENTHESIS_CHAR = '(';
24 const char RIGHT_PARENTHESIS_CHAR = ')';
25 const char AND_OPERATOR_CHAR = '&';
26 const char OR_OPERATOR_CHAR = '|';
27 const char NOT_OPERATOR_CHAR = '!';
28 const char SPACE_CHAR = ' ';
30 // -------------------------------------------
31 // RuleToken implementation
36 enum RuleTokenType { OPERAND = 0
49 inline int priority(const RuleToken& token) {
50 switch ( token.Type ) {
51 case ( RuleToken::NOT_OPERATOR ) : return 3;
52 case ( RuleToken::AND_OPERATOR ) : return 2;
53 case ( RuleToken::OR_OPERATOR ) : return 1;
54 case ( RuleToken::LEFT_PARENTHESIS ) : return 0;
55 case ( RuleToken::RIGHT_PARENTHESIS ) : return 0;
57 BAMTOOLS_ASSERT_UNREACHABLE;
62 inline bool isRightAssociative(const RuleToken& token) {
63 return (token.Type == RuleToken::NOT_OPERATOR ||
64 token.Type == RuleToken::LEFT_PARENTHESIS);
67 inline bool isLeftAssociative(const RuleToken& token) {
68 return !isRightAssociative(token);
71 inline bool isLeftParenthesis(const RuleToken& token) {
72 return ( token.Type == RuleToken::LEFT_PARENTHESIS );
75 inline bool isRightParenthesis(const RuleToken& token) {
76 return ( token.Type == RuleToken::RIGHT_PARENTHESIS );
79 inline bool isOperand(const RuleToken& token) {
80 return ( token.Type == RuleToken::OPERAND );
83 inline bool isOperator(const RuleToken& token) {
84 return ( token.Type == RuleToken::AND_OPERATOR ||
85 token.Type == RuleToken::OR_OPERATOR ||
86 token.Type == RuleToken::NOT_OPERATOR);
89 // -------------------------------------------
90 // RuleParser implementation
96 RuleParser(const std::string& ruleString)
97 : m_ruleString(ruleString)
99 // initialize char markers
100 m_begin = (char*)m_ruleString.c_str();
101 m_end = m_begin + m_ruleString.length();
105 ~RuleParser(void) { }
110 std::queue<std::string> results(void) const { return m_ruleQueue; }
114 char getNextChar(void);
115 void ignoreQuotes(void);
116 bool readToken(RuleToken& token);
117 void skipSpaces(void);
121 std::string m_ruleString;
126 std::queue<std::string> m_ruleQueue;
127 std::stack<RuleToken> m_operatorStack;
131 char RuleParser::getNextChar(void) {
132 if ( m_current == m_end ) return 0;
137 void RuleParser::ignoreQuotes(void) {
138 if ( *m_begin == '\"' ) ++m_begin;
139 if ( *m_end == '\"' ) --m_end;
143 void RuleParser::parse(void) {
145 // clear out any prior data
146 while ( !m_ruleQueue.empty() )
149 // skip if no rule to parse
150 if ( m_ruleString.empty() ) return;
152 // start at beginning of ruleString
155 // iterate through tokens in rule string
157 while ( readToken(token) ) {
159 if ( token.Value.empty() ) break;
161 // if token is an operand
162 if ( isOperand(token) )
163 m_ruleQueue.push(token.Value);
165 // if token is an operator
166 else if ( isOperator(token) ) {
168 // pop any operators at top of stack with higher priority
169 while ( !m_operatorStack.empty() ) {
170 const RuleToken& opToken = m_operatorStack.top();
171 if ( (isLeftAssociative(token) && (priority(token) <= priority(opToken))) ||
172 (isRightAssociative(token) && (priority(token) < priority(opToken)))
175 m_ruleQueue.push(opToken.Value);
176 m_operatorStack.pop();
181 // push current operator token onto stack
182 m_operatorStack.push(token);
185 // if token is left parenthesis
186 else if ( isLeftParenthesis(token) )
187 m_operatorStack.push(token);
189 // if token is right parenthesis
190 else if ( isRightParenthesis(token) ) {
192 bool foundLeftParenthesis = false;
194 // push operators into rule queue until left parenthesis found
195 while ( !m_operatorStack.empty() && !foundLeftParenthesis ) {
196 const RuleToken& opToken = m_operatorStack.top();
197 if ( !isLeftParenthesis(opToken) )
198 m_ruleQueue.push(opToken.Value);
200 foundLeftParenthesis = true;
201 m_operatorStack.pop();
204 // no left parenthesis found, error
205 BAMTOOLS_ASSERT_MESSAGE( foundLeftParenthesis, "ERROR: Mismatched parenthesis in rule string.1");
208 // error: unknown operand
209 else BAMTOOLS_ASSERT_UNREACHABLE;
212 // while there are still operators on stack
213 while ( !m_operatorStack.empty() ) {
214 const RuleToken& token = m_operatorStack.top();
215 BAMTOOLS_ASSERT_MESSAGE( (!isLeftParenthesis(token) && !isRightParenthesis(token)), "ERROR: Mismatched parenthesis in rule string.2");
216 m_ruleQueue.push(token.Value);
217 m_operatorStack.pop();
222 bool RuleParser::readToken(RuleToken& token) {
224 // skip any preceding whitespace
226 if ( m_current == m_end ) return false;
228 // clear out prior token value
231 // read chars while still in token
233 bool keepReading = true;
234 bool inOperandString = false;
235 while ( keepReading && (c != 0) ) {
241 // current char is '('
242 case ( LEFT_PARENTHESIS_CHAR ) :
243 token.Type = RuleToken::LEFT_PARENTHESIS;
244 token.Value.append(1, LEFT_PARENTHESIS_CHAR);
248 // current char is ')'
249 case ( RIGHT_PARENTHESIS_CHAR ) :
250 if ( inOperandString )
253 token.Type = RuleToken::RIGHT_PARENTHESIS;
254 token.Value.append(1, RIGHT_PARENTHESIS_CHAR);
259 // current char is '&'
260 case ( AND_OPERATOR_CHAR ) :
261 if ( inOperandString )
264 token.Type = RuleToken::AND_OPERATOR;
265 token.Value.append(1, AND_OPERATOR_CHAR);
270 // current char is '|'
271 case ( OR_OPERATOR_CHAR ) :
272 if ( inOperandString )
275 token.Type = RuleToken::OR_OPERATOR;
276 token.Value.append(1, OR_OPERATOR_CHAR);
281 // current char is '!'
282 case ( NOT_OPERATOR_CHAR ) :
283 token.Type = RuleToken::NOT_OPERATOR;
284 token.Value.append(1, NOT_OPERATOR_CHAR);
288 // current char is ' '
289 case ( SPACE_CHAR ) :
293 // current char is a true value token
296 token.Type = RuleToken::OPERAND;
297 token.Value.append(1, c);
298 inOperandString = true;
308 void RuleParser::skipSpaces(void) {
309 while ( m_current != m_end ) {
310 const char c = *m_current;
311 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n')
317 } // namespace BamTools
319 #endif // BAMTOOLS_FILTER_RULEPARSER_H