1 /* -------------------------------------------------------------------------- *
2  *                                   Lepton                                   *
3  * -------------------------------------------------------------------------- *
4  * This is part of the Lepton expression parser originating from              *
5  * Simbios, the NIH National Center for Physics-Based Simulation of           *
6  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
7  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
8  *                                                                            *
9  * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
10  * Authors: Peter Eastman                                                     *
11  * Contributors:                                                              *
12  *                                                                            *
13  * Permission is hereby granted, free of charge, to any person obtaining a    *
14  * copy of this software and associated documentation files (the "Software"), *
15  * to deal in the Software without restriction, including without limitation  *
16  * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
17  * and/or sell copies of the Software, and to permit persons to whom the      *
18  * Software is furnished to do so, subject to the following conditions:       *
19  *                                                                            *
20  * The above copyright notice and this permission notice shall be included in *
21  * all copies or substantial portions of the Software.                        *
22  *                                                                            *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
26  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
27  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
28  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
29  * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
30  * -------------------------------------------------------------------------- */
31 
32 #include "lepton/Parser.h"
33 #include "lepton/CustomFunction.h"
34 #include "lepton/Exception.h"
35 #include "lepton/ExpressionTreeNode.h"
36 #include "lepton/Operation.h"
37 #include "lepton/ParsedExpression.h"
38 #include <cctype>
39 #include <iostream>
40 
41 using namespace Lepton;
42 using namespace std;
43 
44 static const string Digits = "0123456789";
45 static const string Operators = "+-*/^";
46 static const bool LeftAssociative[] = {true, true, true, true, false};
47 static const int Precedence[] = {0, 0, 1, 1, 3};
48 static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
49 
50 class Lepton::ParseToken {
51 public:
52     enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
53 
ParseToken(string text,Type type)54     ParseToken(string text, Type type) : text(text), type(type) {
55     }
getText() const56     const string& getText() const {
57         return text;
58     }
getType() const59     Type getType() const {
60         return type;
61     }
62 private:
63     string text;
64     Type type;
65 };
66 
trim(const string & expression)67 string Parser::trim(const string& expression) {
68     // Remove leading and trailing spaces.
69 
70     int start, end;
71     for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
72         ;
73     for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
74         ;
75     if (start == end && isspace(expression[end]))
76         return "";
77     return expression.substr(start, end-start+1);
78 }
79 
getNextToken(const string & expression,int start)80 ParseToken Parser::getNextToken(const string& expression, int start) {
81     char c = expression[start];
82     if (c == '(')
83         return ParseToken("(", ParseToken::LeftParen);
84     if (c == ')')
85         return ParseToken(")", ParseToken::RightParen);
86     if (c == ',')
87         return ParseToken(",", ParseToken::Comma);
88     if (Operators.find(c) != string::npos)
89         return ParseToken(string(1, c), ParseToken::Operator);
90     if (isspace(c)) {
91         // White space
92 
93         for (int pos = start+1; pos < (int) expression.size(); pos++) {
94             if (!isspace(expression[pos]))
95                 return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
96         }
97         return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
98     }
99     if (c == '.' || Digits.find(c) != string::npos) {
100         // A number
101 
102         bool foundDecimal = (c == '.');
103         bool foundExp = false;
104         int pos;
105         for (pos = start+1; pos < (int) expression.size(); pos++) {
106             c = expression[pos];
107             if (Digits.find(c) != string::npos)
108                 continue;
109             if (c == '.' && !foundDecimal) {
110                 foundDecimal = true;
111                 continue;
112             }
113             if ((c == 'e' || c == 'E') && !foundExp) {
114                 foundExp = true;
115                 if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
116                     pos++;
117                 continue;
118             }
119             break;
120         }
121         return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
122     }
123 
124     // A variable, function, or left parenthesis
125 
126     for (int pos = start; pos < (int) expression.size(); pos++) {
127         c = expression[pos];
128         if (c == '(')
129             return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
130         if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
131             return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
132     }
133     return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
134 }
135 
tokenize(const string & expression)136 vector<ParseToken> Parser::tokenize(const string& expression) {
137     vector<ParseToken> tokens;
138     int pos = 0;
139     while (pos < (int) expression.size()) {
140         ParseToken token = getNextToken(expression, pos);
141         if (token.getType() != ParseToken::Whitespace)
142             tokens.push_back(token);
143         pos += (int) token.getText().size();
144     }
145     return tokens;
146 }
147 
parse(const string & expression)148 ParsedExpression Parser::parse(const string& expression) {
149     return parse(expression, map<string, CustomFunction*>());
150 }
151 
parse(const string & expression,const map<string,CustomFunction * > & customFunctions)152 ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
153     try {
154         // First split the expression into subexpressions.
155 
156         string primaryExpression = expression;
157         vector<string> subexpressions;
158         while (true) {
159             string::size_type pos = primaryExpression.find_last_of(';');
160             if (pos == string::npos)
161                 break;
162             string sub = trim(primaryExpression.substr(pos+1));
163             if (sub.size() > 0)
164                 subexpressions.push_back(sub);
165             primaryExpression = primaryExpression.substr(0, pos);
166         }
167 
168         // Parse the subexpressions.
169 
170         map<string, ExpressionTreeNode> subexpDefs;
171         for (int i = 0; i < (int) subexpressions.size(); i++) {
172             string::size_type equalsPos = subexpressions[i].find('=');
173             if (equalsPos == string::npos)
174                 throw Exception("subexpression does not specify a name");
175             string name = trim(subexpressions[i].substr(0, equalsPos));
176             if (name.size() == 0)
177                 throw Exception("subexpression does not specify a name");
178             vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
179             int pos = 0;
180             subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
181             if (pos != tokens.size())
182                 throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
183         }
184 
185         // Now parse the primary expression.
186 
187         vector<ParseToken> tokens = tokenize(primaryExpression);
188         int pos = 0;
189         ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
190         if (pos != tokens.size())
191             throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
192         return ParsedExpression(result);
193     }
194     catch (Exception& ex) {
195         throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
196     }
197 }
198 
parsePrecedence(const vector<ParseToken> & tokens,int & pos,const map<string,CustomFunction * > & customFunctions,const map<string,ExpressionTreeNode> & subexpressionDefs,int precedence)199 ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
200             const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
201     if (pos == tokens.size())
202         throw Exception("unexpected end of expression");
203 
204     // Parse the next value (number, variable, function, parenthesized expression)
205 
206     ParseToken token = tokens[pos];
207     ExpressionTreeNode result;
208     if (token.getType() == ParseToken::Number) {
209         double value;
210         stringstream(token.getText()) >> value;
211         result = ExpressionTreeNode(new Operation::Constant(value));
212         pos++;
213     }
214     else if (token.getType() == ParseToken::Variable) {
215         map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
216         if (subexp == subexpressionDefs.end()) {
217             Operation* op = new Operation::Variable(token.getText());
218             result = ExpressionTreeNode(op);
219         }
220         else
221             result = subexp->second;
222         pos++;
223     }
224     else if (token.getType() == ParseToken::LeftParen) {
225         pos++;
226         result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
227         if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
228             throw Exception("unbalanced parentheses");
229         pos++;
230     }
231     else if (token.getType() == ParseToken::Function) {
232         pos++;
233         vector<ExpressionTreeNode> args;
234         bool moreArgs;
235         do {
236             args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
237             moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
238             if (moreArgs)
239                 pos++;
240         } while (moreArgs);
241         if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
242             throw Exception("unbalanced parentheses");
243         pos++;
244         Operation* op = getFunctionOperation(token.getText(), customFunctions);
245         try {
246             result = ExpressionTreeNode(op, args);
247         }
248         catch (...) {
249             delete op;
250             throw;
251         }
252     }
253     else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
254         pos++;
255         ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
256         result = ExpressionTreeNode(new Operation::Negate(), toNegate);
257     }
258     else
259         throw Exception("unexpected token: "+token.getText());
260 
261     // Now deal with the next binary operator.
262 
263     while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
264         token = tokens[pos];
265         int opIndex = (int) Operators.find(token.getText());
266         int opPrecedence = Precedence[opIndex];
267         if (opPrecedence < precedence)
268             return result;
269         pos++;
270         ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
271         Operation* op = getOperatorOperation(token.getText());
272         try {
273             result = ExpressionTreeNode(op, result, arg);
274         }
275         catch (...) {
276             delete op;
277             throw;
278         }
279     }
280     return result;
281 }
282 
getOperatorOperation(const std::string & name)283 Operation* Parser::getOperatorOperation(const std::string& name) {
284     switch (OperationId[Operators.find(name)]) {
285         case Operation::ADD:
286             return new Operation::Add();
287         case Operation::SUBTRACT:
288             return new Operation::Subtract();
289         case Operation::MULTIPLY:
290             return new Operation::Multiply();
291         case Operation::DIVIDE:
292             return new Operation::Divide();
293         case Operation::POWER:
294             return new Operation::Power();
295         default:
296             throw Exception("unknown operator");
297     }
298 }
299 
getFunctionOperation(const std::string & name,const map<string,CustomFunction * > & customFunctions)300 Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
301 
302     static map<string, Operation::Id> opMap;
303     if (opMap.size() == 0) {
304         opMap["sqrt"] = Operation::SQRT;
305         opMap["exp"] = Operation::EXP;
306         opMap["log"] = Operation::LOG;
307         opMap["sin"] = Operation::SIN;
308         opMap["cos"] = Operation::COS;
309         opMap["sec"] = Operation::SEC;
310         opMap["csc"] = Operation::CSC;
311         opMap["tan"] = Operation::TAN;
312         opMap["cot"] = Operation::COT;
313         opMap["asin"] = Operation::ASIN;
314         opMap["acos"] = Operation::ACOS;
315         opMap["atan"] = Operation::ATAN;
316         opMap["atan2"] = Operation::ATAN2;
317         opMap["sinh"] = Operation::SINH;
318         opMap["cosh"] = Operation::COSH;
319         opMap["tanh"] = Operation::TANH;
320         opMap["erf"] = Operation::ERF;
321         opMap["erfc"] = Operation::ERFC;
322         opMap["step"] = Operation::STEP;
323         opMap["delta"] = Operation::DELTA;
324         opMap["square"] = Operation::SQUARE;
325         opMap["cube"] = Operation::CUBE;
326         opMap["recip"] = Operation::RECIPROCAL;
327         opMap["min"] = Operation::MIN;
328         opMap["max"] = Operation::MAX;
329         opMap["abs"] = Operation::ABS;
330         opMap["floor"] = Operation::FLOOR;
331         opMap["ceil"] = Operation::CEIL;
332         opMap["select"] = Operation::SELECT;
333     }
334     string trimmed = name.substr(0, name.size()-1);
335 
336     // First check custom functions.
337 
338     map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
339     if (custom != customFunctions.end())
340         return new Operation::Custom(trimmed, custom->second->clone());
341 
342     // Now try standard functions.
343 
344     map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
345     if (iter == opMap.end())
346         throw Exception("unknown function: "+trimmed);
347     switch (iter->second) {
348         case Operation::SQRT:
349             return new Operation::Sqrt();
350         case Operation::EXP:
351             return new Operation::Exp();
352         case Operation::LOG:
353             return new Operation::Log();
354         case Operation::SIN:
355             return new Operation::Sin();
356         case Operation::COS:
357             return new Operation::Cos();
358         case Operation::SEC:
359             return new Operation::Sec();
360         case Operation::CSC:
361             return new Operation::Csc();
362         case Operation::TAN:
363             return new Operation::Tan();
364         case Operation::COT:
365             return new Operation::Cot();
366         case Operation::ASIN:
367             return new Operation::Asin();
368         case Operation::ACOS:
369             return new Operation::Acos();
370         case Operation::ATAN:
371             return new Operation::Atan();
372         case Operation::ATAN2:
373             return new Operation::Atan2();
374         case Operation::SINH:
375             return new Operation::Sinh();
376         case Operation::COSH:
377             return new Operation::Cosh();
378         case Operation::TANH:
379             return new Operation::Tanh();
380         case Operation::ERF:
381             return new Operation::Erf();
382         case Operation::ERFC:
383             return new Operation::Erfc();
384         case Operation::STEP:
385             return new Operation::Step();
386         case Operation::DELTA:
387             return new Operation::Delta();
388         case Operation::SQUARE:
389             return new Operation::Square();
390         case Operation::CUBE:
391             return new Operation::Cube();
392         case Operation::RECIPROCAL:
393             return new Operation::Reciprocal();
394         case Operation::MIN:
395             return new Operation::Min();
396         case Operation::MAX:
397             return new Operation::Max();
398         case Operation::ABS:
399             return new Operation::Abs();
400         case Operation::FLOOR:
401             return new Operation::Floor();
402         case Operation::CEIL:
403             return new Operation::Ceil();
404         case Operation::SELECT:
405             return new Operation::Select();
406         default:
407             throw Exception("unknown function");
408     }
409 }
410