stator
A math, geometry, and utility library
parser.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2017 Marcus N Campbell Bannerman <[email protected]>
3 
4  This file is part of stator.
5 
6  stator is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  stator is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with stator. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #pragma once
21 
23 #include <stator/exception.hpp>
24 #include <algorithm>
25 #include <cctype>
26 #include <map>
27 #include <sstream>
28 
29 namespace sym {
30  namespace detail {
56  class ExprTokenizer {
57  public:
58  ExprTokenizer(const std::string& str):
59  _str(str),
60  _start(0),
61  _end(0)
62  {
63  //Initialise the operators
64 
65  //These are left-associative operators, so we expect their RBP
66  //to be LBP+1, and they can group with themselves
67  //LBP, RBP, NBP
68 
70  //
71  //LBP: Left binding power is the precedence of the right operator.
72  //
73  //RBP: Right binding power is the precedence that the operator
74  //will collect as its right arguments (prefix operators have
75  //no right arguments, thus RBP is unused). If this is higher
76  //than the binding power, it will not collect itself (thus it
77  //is left-associative). If it is equal or greater, it will
78  //collect itself in its right argument (thus it is right
79  //associative).
80  //
81  //NBP: Next binding power
82 
87  //Power is right-associative, thus its RBP is equal to its LBP
88  //to ensure a^b^c is a^(b^c).
90 
91  //The unary operators
92 // _left_operators["+"].reset(new SkipToken<std::numeric_limits<int>::max()>());
93 // _left_operators["-"].reset(new UnaryNegative<std::numeric_limits<int>::max()>());
94 
97 
98  //The parenthesis operator is entirely handled by Parenthesis.
99  _left_operators["("].reset(new ParenthesisToken);
100  //A halt token stops parseExpression processing. The right
101  //parenthesis should be handled by the previous entry.
102  _right_operators[")"].reset(new HaltToken);
103 
104  //Unary operators have a maximum BP
105  _left_operators["sin"].reset(new UnaryOpToken<detail::Sine, std::numeric_limits<int>::max()>());
106  _left_operators["cos"].reset(new UnaryOpToken<detail::Cosine, std::numeric_limits<int>::max()>());
107  _left_operators["exp"].reset(new UnaryOpToken<detail::Exp, std::numeric_limits<int>::max()>());
108  _left_operators["ln"].reset(new UnaryOpToken<detail::Log, std::numeric_limits<int>::max()>());
109 
110  //The actual tokenisation is done in the consume() member
111  //function. The first call starts the process and clears the "end" state.
112  consume();
113  }
114 
115  std::string next() {
116  return empty() ? "" : _str.substr(_start, _end - _start);
117  }
118 
119  void expect(std::string token) {
120  if (next() != token)
121  stator_throw() << "Expected " << ((token.empty()) ? "end of expression": ("\""+token+"\"")) << " but found " << (empty() ? "the end of expression" : next()) << "\" instead?\n" << parserLoc();
122  consume();
123  }
124 
125  bool empty() const {
126  return _start == _str.size();
127  }
128 
129  void consume() {
130  _start = _end;
131 
132  //Skip whitespace
133  while ((_str[_start] == ' ') && (_start < _str.size())) ++_start;
134 
135  _end = _start;
136 
137  //Check for sequence end
138  if (empty()) return;
139 
140  //Not at end of sequence so at least one character in symbol
141  _end = _start + 1;
142 
143  //Parse numbers with decimal points and (possible signed)
144  //exponents. Signs at the front of numbers are parsed as unary
145  //operators.
146  if (std::isdigit(_str[_start])) {
147  consumeFloat();
148  return;
149  }
150 
151  //Parsing a string
152  if (std::isalpha(_str[_start])) {
153  while ((_end < _str.size()) && std::isalpha(_str[_end]))
154  ++_end;
155  return;
156  }
157 
158  //Allow non-alpha single character operators (longer operators are usually strings and caught above!)
159  if (_right_operators.find(_str.substr(_start, 1)) != _right_operators.end())
160  return;
161  if (_left_operators.find(_str.substr(_start, 1)) != _left_operators.end())
162  return;
163 
164  stator_throw() << "Unrecognised token \"" << _str[_start] << "\"\n" << parserLoc();
165  }
166 
167  void consumeFloat() {
168  bool decimal = false;
169  bool exponent = false;
170 
171  while (_end != _str.size()) {
172  if (_str[_end] == '.') {
173  if (!decimal && !exponent) {
174  decimal = true;
175  ++_end;
176  continue;
177  } else {
178  stator_throw() << "Unexpected decimal point?\n" << parserLoc();
179  }
180  }
181 
182  if ((_str[_end] == 'e') || (_str[_end] == 'E')) {
183  if (!exponent) {
184  exponent = true;
185  decimal = true; //Don't allow decimals in the exponent
186  ++_end;
187 
188  if (_end == _str.size())
189  stator_throw() << "String ended during parsing of exponent\n" << parserLoc();
190 
191  //Eat the exponent sign if present
192  if ((_str[_end] == '+') || (_str[_end] == '-'))
193  ++_end;
194 
195  if (_end == _str.size())
196  stator_throw() << "String ended during parsing of exponent\n" << parserLoc();
197 
198  if (!std::isdigit(_str[_end]))
199  stator_throw() << "Malformed exponent?\n" << parserLoc();
200 
201  continue;
202  } else
203  stator_throw() << "Double exponent?\n" << parserLoc();
204  }
205 
206  if (std::isdigit(_str[_end])) {
207  ++_end;
208  continue;
209  }
210 
211  break;
212  }
213  }
214 
215  std::string parserLoc() {
216  return _str + "\n"
217  + std::string(_start, ' ') + std::string((_start < _end) ? _end - _start -1: 0, '-') + "^";
218  }
219 
224  virtual Expr apply(Expr, ExprTokenizer&) const = 0;
226  virtual int LBP() const = 0;
228  virtual int NBP() const = 0;
229  };
230 
234  virtual Expr apply(Expr, ExprTokenizer&) const = 0;
236  virtual int BP() const = 0;
237  };
238 
239  template<class Op>
241  Expr apply(Expr l, ExprTokenizer& tk) const {
242  return Op::apply(l, tk.parseExpression(detail::RBP<Op>()));
243  }
244 
245  int LBP() const { return Op::leftBindingPower; }
246 
247  int NBP() const { return detail::NBP<Op>(); }
248  };
249 
251  virtual Expr apply(Expr, ExprTokenizer&) const { stator_throw() << "Should never be called!"; }
252  //To ensure it is always treated as a separate expression, its BP is negative (nothing can claim it)
253  virtual int LBP() const { return -1; }
254  int NBP() const { stator_throw() << "Should never be called!"; }
255  };
256 
258  Expr apply(Expr l, ExprTokenizer& tk) const {
259  tk.expect(")");
260  return l;
261  }
262 
263  //Parenthesis bind the whole following expression
264  int BP() const {
265  return 0;
266  }
267  };
268 
269  template<class Op, int tBP>
271  Expr apply(Expr l, ExprTokenizer& tk) const {
272  return Op::apply(l);
273  }
274 
275  int BP() const {
276  return tBP;
277  }
278  };
279 
280  template<int tBP>
281  struct SkipToken : public LeftOperatorBase {
282  Expr apply(Expr l, ExprTokenizer& tk) const {
283  return l;
284  }
285 
286  int BP() const {
287  return tBP;
288  }
289  };
290 
291  template<int tBP>
292  struct UnaryNegative : public LeftOperatorBase {
293  struct Visitor : VisitorHelper<Visitor> {
294  template<class T> Expr apply(const T& val) {
295  return -val;
296  }
297  };
298 
299  Expr apply(Expr l, ExprTokenizer& tk) const {
300  Visitor v;
301  return l->visit(v);
302  }
303 
304  int BP() const {
305  return tBP;
306  }
307  };
308 
309  std::map<std::string, shared_ptr<LeftOperatorBase> > _left_operators;
310  std::map<std::string, shared_ptr<RightOperatorBase> > _right_operators;
311 
316  std::string token = next();
317  consume();
318 
319  if (token.empty())
320  stator_throw() << "Unexpected end of expression?\n" << parserLoc();
321 
322  //Parse numbers
323  if (std::isdigit(token[0])) {
324  std::stringstream ss;
325  ss << token;
326  double val;
327  ss >> val;
328  return Expr(val);
329  }
330 
331  //Parse left operators
332  auto it = _left_operators.find(token);
333  if (it != _left_operators.end()) {
334  return it->second->apply(parseExpression(it->second->BP()), *this);
335  }
336 
337  //Its not a prefix operator or a number, if it is a single
338  //alpha character, then assume its a variable!
339  if ((token.size() == 1) && (std::isalpha(token[0])))
340  return Expr(VarRT(token[0]));
341 
342  stator_throw() << "Could not parse as a valid token?\n" << parserLoc();
343  }
344 
388  Expr parseExpression(int minLBP = 0) {
389  //Grab the first token (should be either a unary/prefix
390  //operator or a number/variable all of which may be a LHS of a
391  //multi arg operator (which are handled in this
392  //function). Unary/prefix operators are handled directly by
393  //parseToken()
394  Expr t = parseToken();
395 
396  //maxLBP is only allowed to decrease as it ensures the while
397  //loop climbs down the precedence tree (up the AST) to the
398  //root of the expression.
399  int maxLBP = std::numeric_limits<int>::max();
400  while (true) {
401  std::string token = next();
402 
403  //Handle the special case of end of string
404  if (token.empty()) break;
405 
406  //Determine the type of operator found
407  auto it = _right_operators.find(token);
408 
409  //If there is no match, then return an error
410  if (it == _right_operators.end())
411  stator_throw() << "Expected right operator but got \""<< token << "\"?\n" << parserLoc();
412 
413  //If the operator has a lower binding power than what this
414  //call can collect (as limited by minLBP), then return. If
415  //an operator has already been collected and its next
416  //binding power (stored in maxLBP) doesn't allow it to
417  //collect this operator, then exit and allow the calling
418  //function to handle this operator.
419  if ((minLBP > it->second->LBP()) || (it->second->LBP() > maxLBP)) break;
420 
421  consume();
422  t = it->second->apply(t, *this);
423  maxLBP = it->second->NBP();
424  }
425 
426  return t;
427  }
428 
429  private:
430 
431  std::string _str;
432  std::size_t _start;
433  std::size_t _end;
434  };
435  }
436 
437  Expr::Expr(const std::string& str)
438  {
439  auto tokenizer = detail::ExprTokenizer(str);
440  *this = simplify(tokenizer.parseExpression());
441 
442  //Check that the full string was parsed
443  if (!tokenizer.empty())
444  stator_throw() << "Parsing terminated unexpectedly early?\n" << tokenizer.parserLoc();
445  }
446 
447  Expr::Expr(const char* str) : Expr(std::string(str)) {}
448 }
A CRTP helper base class which transforms the visitor interface into a call to the derived classes ap...
Definition: runtime.hpp:135
int LBP() const
Left binding power (Precedence of this operator)
Definition: parser.hpp:245
Expr apply(Expr l, ExprTokenizer &tk) const
Takes one operand and returns the corresponding Expr.
Definition: parser.hpp:271
Expr apply(Expr l, ExprTokenizer &tk) const
Takes one operand and returns the corresponding Expr.
Definition: parser.hpp:299
Expr apply(Expr l, ExprTokenizer &tk) const
Takes left operand and returns the corresponding Expr, fetching the right operands from the tokenizer...
Definition: parser.hpp:241
virtual int LBP() const =0
Left binding power (Precedence of this operator)
int NBP() const
Next binding power (highest precedence of the operator that this operator can be a left operand of) ...
Definition: parser.hpp:247
void expect(std::string token)
Definition: parser.hpp:119
std::map< std::string, shared_ptr< LeftOperatorBase > > _left_operators
Definition: parser.hpp:309
int BP() const
Binding power to the right arguments of the Token.
Definition: parser.hpp:286
virtual Expr apply(Expr, ExprTokenizer &) const =0
Takes left operand and returns the corresponding Expr, fetching the right operands from the tokenizer...
std::string parserLoc()
Definition: parser.hpp:215
virtual int LBP() const
Left binding power (Precedence of this operator)
Definition: parser.hpp:253
Expr parseExpression(int minLBP=0)
Main parsing entry function.
Definition: parser.hpp:388
int BP() const
Binding power to the right arguments of the Token.
Definition: parser.hpp:275
Expr simplify(const Expr &f)
Definition: runtime.hpp:511
int NBP() const
Next binding power (highest precedence of the operator that this operator can be a left operand of) ...
Definition: parser.hpp:254
The generic holder/smart pointer for a runtime Abstract Syntax Tree (AST) (expression).
Definition: runtime.hpp:68
std::map< std::string, shared_ptr< RightOperatorBase > > _right_operators
Definition: parser.hpp:310
The stator symbolic math library.
Expr parseToken()
Parses a single token (unary/prefix op, variable, or number), where precedence issues do not arise...
Definition: parser.hpp:315
Implementation of expression tokenization and parsing into Expr types.
Definition: parser.hpp:56
Expr apply(Expr l, ExprTokenizer &tk) const
Takes one operand and returns the corresponding Expr.
Definition: parser.hpp:258
int BP() const
Binding power to the right arguments of the Token.
Definition: parser.hpp:304
#define stator_throw()
Definition: exception.hpp:26
Var< Dynamic > VarRT
Definition: runtime.hpp:58
int BP() const
Binding power to the right arguments of the Token.
Definition: parser.hpp:264
virtual Expr apply(Expr, ExprTokenizer &) const
Takes left operand and returns the corresponding Expr, fetching the right operands from the tokenizer...
Definition: parser.hpp:251
std::pair< int, int > BP(const T &v)
Returns the binding powers (precedence) of binary operators.
Definition: print.hpp:163
virtual int NBP() const =0
Next binding power (highest precedence of the operator that this operator can be a left operand of) ...
Expr apply(Expr l, ExprTokenizer &tk) const
Takes one operand and returns the corresponding Expr.
Definition: parser.hpp:282
ExprTokenizer(const std::string &str)
Definition: parser.hpp:58