1 /*
2  * The Doomsday Engine Project -- libcore
3  *
4  * Copyright © 2004-2017 Jaakko Keränen <jaakko.keranen@iki.fi>
5  *
6  * @par License
7  * LGPL: http://www.gnu.org/licenses/lgpl.html
8  *
9  * <small>This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as published by
11  * the Free Software Foundation; either version 3 of the License, or (at your
12  * option) any later version. This program is distributed in the hope that it
13  * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
15  * General Public License for more details. You should have received a copy of
16  * the GNU Lesser General Public License along with this program; if not, see:
17  * http://www.gnu.org/licenses</small>
18  */
19 
20 #include "de/Parser"
21 #include "de/ArrayExpression"
22 #include "de/AssignStatement"
23 #include "de/BuiltInExpression"
24 #include "de/CatchStatement"
25 #include "de/ConstantExpression"
26 #include "de/DeleteStatement"
27 #include "de/DictionaryExpression"
28 #include "de/ExpressionStatement"
29 #include "de/FlowStatement"
30 #include "de/ForStatement"
31 #include "de/FunctionStatement"
32 #include "de/IfStatement"
33 #include "de/LogBuffer"
34 #include "de/NameExpression"
35 #include "de/NumberValue"
36 #include "de/Operator"
37 #include "de/OperatorExpression"
38 #include "de/PrintStatement"
39 #include "de/ScopeStatement"
40 #include "de/Script"
41 #include "de/ScriptLex"
42 #include "de/TextValue"
43 #include "de/TokenBuffer"
44 #include "de/TokenRange"
45 #include "de/TryStatement"
46 #include "de/WhileStatement"
47 
48 
49 #include <sstream>
50 
51 using namespace de;
52 
Parser()53 Parser::Parser()
54 {}
55 
~Parser()56 Parser::~Parser()
57 {}
58 
parse(String const & input,Script & output)59 void Parser::parse(String const &input, Script &output)
60 {
61     // Lexical analyzer for Haw scripts.
62     _analyzer = ScriptLex(input);
63 
64     // Get the tokens of the first statement.
65     if (nextStatement() > 0)
66     {
67         // Parse the bottom-level compound.
68         parseCompound(output.compound());
69     }
70 
71     // We're done, free the remaining tokens.
72     _tokens.clear();
73 }
74 
nextStatement()75 duint Parser::nextStatement()
76 {
77     duint result = _analyzer.getStatement(_tokens);
78 
79     // Begin with the whole thing.
80     _statementRange = TokenRange(_tokens);
81 
82     //std::cout << "Next statement: '" << _statementRange.asText() << "'\n";
83 
84     return result;
85 }
86 
parseCompound(Compound & compound)87 void Parser::parseCompound(Compound &compound)
88 {
89     while (_statementRange.size() > 0)
90     {
91         if (_statementRange.firstToken().equals(ScriptLex::ELSIF) ||
92             _statementRange.firstToken().equals(ScriptLex::ELSE)  ||
93             _statementRange.firstToken().equals(ScriptLex::CATCH) ||
94             (_statementRange.size() == 1 && _statementRange.firstToken().equals(ScriptLex::END)))
95         {
96             // End of compound.
97             break;
98         }
99 
100         // We have a list of tokens, which form a statement.
101         // Figure out what it is and generate the correct statement(s)
102         // and expressions.
103         parseStatement(compound);
104     }
105 }
106 
parseStatement(Compound & compound)107 void Parser::parseStatement(Compound &compound)
108 {
109     DENG2_ASSERT(!_statementRange.isEmpty());
110 
111     Token const &firstToken = _statementRange.firstToken();
112     const auto firstTokenLine = firstToken.line();
113 
114     // Statements with a compound: if, for, while, def.
115     if (firstToken.equals(ScriptLex::IF))
116     {
117         compound.add(parseIfStatement(), firstTokenLine);
118         return;
119     }
120     else if (firstToken.equals(ScriptLex::WHILE))
121     {
122         compound.add(parseWhileStatement(), firstTokenLine);
123         return;
124     }
125     else if (firstToken.equals(ScriptLex::FOR))
126     {
127         compound.add(parseForStatement(), firstTokenLine);
128         return;
129     }
130     else if (firstToken.equals(ScriptLex::DEF))
131     {
132         compound.add(parseFunctionStatement(), firstTokenLine);
133         return;
134     }
135     else if (firstToken.equals(ScriptLex::TRY))
136     {
137         parseTryCatchSequence(compound);
138         return;
139     }
140 
141     // Statements without a compound (must advance to next statement manually).
142     if (firstToken.equals(ScriptLex::IMPORT))
143     {
144         compound.add(parseImportStatement(), firstTokenLine);
145     }
146     else if (firstToken.equals(ScriptLex::RECORD))
147     {
148         compound.add(parseDeclarationStatement(), firstTokenLine);
149     }
150     else if (firstToken.equals(ScriptLex::DEL))
151     {
152         compound.add(parseDeleteStatement(), firstTokenLine);
153     }
154     else if (firstToken.equals(ScriptLex::PASS))
155     {
156         compound.add(new FlowStatement(FlowStatement::PASS), firstTokenLine);
157     }
158     else if (firstToken.equals(ScriptLex::CONTINUE))
159     {
160         compound.add(new FlowStatement(FlowStatement::CONTINUE), firstTokenLine);
161     }
162     else if (firstToken.equals(ScriptLex::BREAK))
163     {
164         // Break may have an expression argument that tells us how many
165         // nested compounds to break out of.
166         Expression *breakCount = 0;
167         if (_statementRange.size() > 1)
168         {
169             breakCount = parseExpression(_statementRange.startingFrom(1));
170         }
171         compound.add(new FlowStatement(FlowStatement::BREAK, breakCount), firstTokenLine);
172     }
173     else if (firstToken.equals(ScriptLex::RETURN) || firstToken.equals(ScriptLex::THROW))
174     {
175         Expression *argValue = 0;
176         if (_statementRange.size() > 1)
177         {
178             argValue = parseExpression(_statementRange.startingFrom(1));
179         }
180         compound.add(new FlowStatement(
181             firstToken.equals(ScriptLex::RETURN)? FlowStatement::RETURN : FlowStatement::THROW,
182             argValue), firstTokenLine);
183     }
184     else if (firstToken.equals(ScriptLex::PRINT))
185     {
186         compound.add(parsePrintStatement(), firstTokenLine);
187     }
188     else if (_statementRange.hasBracketless(ScriptLex::ASSIGN) ||
189              _statementRange.hasBracketless(ScriptLex::SCOPE_ASSIGN) ||
190              _statementRange.hasBracketless(ScriptLex::WEAK_ASSIGN))
191     {
192         compound.add(parseAssignStatement(), firstTokenLine);
193     }
194 #if 0
195     else if (firstToken.equals(ScriptLex::EXPORT))
196     {
197         compound.add(parseExportStatement(), firstTokenLine);
198     }
199 #endif
200     else
201     {
202         compound.add(parseExpressionStatement(), firstTokenLine);
203     }
204 
205     // We've fully parsed the current set of tokens, get the next statement.
206     nextStatement();
207 }
208 
parseIfStatement()209 IfStatement *Parser::parseIfStatement()
210 {
211     // The "end" keyword is necessary in the full form.
212     bool expectEnd = !_statementRange.hasBracketless(Token::COLON);
213 
214     std::unique_ptr<IfStatement> statement(new IfStatement());
215     statement->newBranch();
216     statement->setBranchCondition(
217         parseConditionalCompound(statement->branchCompound(),
218             HasCondition | StayAtClosingStatement));
219 
220     while (_statementRange.beginsWith(ScriptLex::ELSIF))
221     {
222         expectEnd = !_statementRange.hasBracketless(Token::COLON);
223         statement->newBranch();
224         statement->setBranchCondition(
225             parseConditionalCompound(statement->branchCompound(),
226                 HasCondition | StayAtClosingStatement));
227     }
228 
229     if (_statementRange.beginsWith(ScriptLex::ELSE))
230     {
231         expectEnd = !_statementRange.has(Token::COLON);
232         parseConditionalCompound(statement->elseCompound(), StayAtClosingStatement);
233     }
234 
235     if (expectEnd)
236     {
237         if (_statementRange.size() != 1 || !_statementRange.firstToken().equals(ScriptLex::END))
238         {
239             throw UnexpectedTokenError("Parser::parseIfStatement", "Expected '" + ScriptLex::END +
240                                        "', but got " + _statementRange.firstToken().asText());
241         }
242         nextStatement();
243     }
244 
245     return statement.release();
246 }
247 
parseWhileStatement()248 WhileStatement *Parser::parseWhileStatement()
249 {
250     // "while" expr ":" statement
251     // "while" expr "\n" compound
252 
253     std::unique_ptr<WhileStatement> statement(new WhileStatement());
254     statement->setCondition(parseConditionalCompound(statement->compound(), HasCondition));
255     return statement.release();
256 }
257 
parseForStatement()258 ForStatement *Parser::parseForStatement()
259 {
260     // "for" by-ref-expr "in" expr ":" statement
261     // "for" by-ref-expr "in" expr "\n" compound
262 
263     dint colonPos = _statementRange.find(Token::COLON);
264     dint inPos = _statementRange.find(ScriptLex::IN);
265     if (inPos < 0 || (colonPos > 0 && inPos > colonPos))
266     {
267         throw MissingTokenError("Parser::parseForStatement",
268             "Expected 'in' to follow " + _statementRange.firstToken().asText());
269     }
270 
271     std::unique_ptr<Expression> iter(parseExpression(_statementRange.between(1, inPos),
272         Expression::ByReference | Expression::NewVariable | Expression::LocalOnly));
273     Expression *iterable = parseExpression(_statementRange.between(inPos + 1, colonPos));
274 
275     std::unique_ptr<ForStatement> statement(new ForStatement(iter.release(), iterable));
276 
277     // Parse the statements of the method.
278     parseConditionalCompound(statement->compound(), IgnoreExtraBeforeColon);
279 
280     return statement.release();
281 }
282 
parseImportStatement()283 ExpressionStatement *Parser::parseImportStatement()
284 {
285     // "import" ["record"] name-expr ["," name-expr]*
286 
287     if (_statementRange.size() < 2)
288     {
289         throw MissingTokenError("Parser::parseImportStatement",
290             "Expected identifier to follow " + _statementRange.firstToken().asText());
291     }
292     dint startAt = 1;
293     Expression::Flags flags =
294             Expression::Import     |
295             //Expression::NotInScope |
296             Expression::LocalOnly;
297     if (_statementRange.size() >= 3 && _statementRange.token(1).equals(ScriptLex::RECORD))
298     {
299         // Take a copy of the imported record instead of referencing it.
300         flags |= Expression::ByValue;
301         startAt = 2;
302     }
303     return new ExpressionStatement(parseList(_statementRange.startingFrom(startAt), Token::COMMA, flags));
304 }
305 
306 #if 0
307 ExpressionStatement *Parser::parseExportStatement()
308 {
309     // "export" name-expr ["," name-expr]*
310 
311     if (_statementRange.size() < 2)
312     {
313         throw MissingTokenError("Parser::parseExportStatement",
314             "Expected identifier to follow " + _statementRange.firstToken().asText());
315     }
316 
317     return new ExpressionStatement(parseList(_statementRange.startingFrom(1), Token::COMMA,
318                                              Expression::Export | Expression::LocalOnly));
319 }
320 #endif
321 
parseDeclarationStatement()322 Statement *Parser::parseDeclarationStatement()
323 {
324     // "record" name-expr ["," name-expr]*
325     // "record" name-expr "(" [ name-expr ["," name-expr]* ] ")" members-compound
326 
327     if (_statementRange.size() < 2)
328     {
329         throw MissingTokenError("Parser::parseDeclarationStatement",
330             "Expected identifier to follow " + _statementRange.firstToken().asText());
331     }
332 
333     // Is this a class record declaration?
334     dint pos = _statementRange.find(Token::PARENTHESIS_OPEN);
335     if (pos >= 0)
336     {
337         QScopedPointer<Expression> name(parseExpression(_statementRange.between(1, pos),
338                                                         Expression::NewSubrecordIfNotInScope));
339         QScopedPointer<ScopeStatement> names(new ScopeStatement(name.take(),
340                 parseList(_statementRange.between(pos + 1, _statementRange.closingBracket(pos)))));
341 
342         parseConditionalCompound(names->compound(),
343                                  IgnoreExtraBeforeColon | StayAtClosingStatement);
344         return names.take();
345     }
346     else
347     {
348         // Regular record declaration.
349         Expression::Flags flags = Expression::LocalOnly | Expression::NewSubrecord;
350         return new ExpressionStatement(parseList(_statementRange.startingFrom(1), Token::COMMA, flags));
351     }
352 }
353 
parseDeleteStatement()354 DeleteStatement *Parser::parseDeleteStatement()
355 {
356     // "del" name-expr ["," name-expr]*
357 
358     if (_statementRange.size() < 2)
359     {
360         throw MissingTokenError("Parser::parseDeleteStatement",
361             "Expected identifier to follow " + _statementRange.firstToken().asText());
362     }
363     return new DeleteStatement(parseList(_statementRange.startingFrom(1), Token::COMMA,
364                                          Expression::LocalOnly | Expression::ByReference));
365 }
366 
parseFunctionStatement()367 FunctionStatement *Parser::parseFunctionStatement()
368 {
369     // "def" name-expr "(" [ name-expr ["," name-expr]* ] ")" cond-compound
370 
371     dint pos = _statementRange.find(Token::PARENTHESIS_OPEN);
372     if (pos < 0)
373     {
374         throw MissingTokenError("Parser::parseFunctionStatement",
375             "Expected arguments for " + _statementRange.firstToken().asText());
376     }
377 
378     // The function must have a name that is not already in use in the scope.
379     std::unique_ptr<FunctionStatement> statement(new FunctionStatement(
380         parseExpression(_statementRange.between(1, pos),
381                         Expression::LocalOnly   | Expression::ByReference |
382                         Expression::NewVariable | Expression::NotInScope)));
383 
384     // Collect the argument names.
385     TokenRange argRange = _statementRange.between(pos + 1, _statementRange.closingBracket(pos));
386     if (!argRange.isEmpty())
387     {
388         // The arguments are comma-separated.
389         TokenRange delim = argRange.undefinedRange();
390         while (argRange.getNextDelimited(Token::COMMA, delim))
391         {
392             if (delim.size() == 1 && delim.firstToken().type() == Token::IDENTIFIER)
393             {
394                 // Just the name of the argument.
395                 statement->addArgument(delim.firstToken().str());
396             }
397             else if (delim.size() >= 3 &&
398                 delim.token(0).type() == Token::IDENTIFIER &&
399                 delim.token(1).equals(ScriptLex::ASSIGN))
400             {
401                 // Argument with a default value.
402                 statement->addArgument(delim.firstToken().str(),
403                     parseExpression(delim.startingFrom(2)));
404             }
405             else
406             {
407                 throw UnexpectedTokenError("Parser::parseFunctionStatement",
408                     "'" + delim.asText() + "' was unexpected in argument definition at " +
409                     argRange.firstToken().asText());
410             }
411         }
412     }
413 
414     // Parse the statements of the function.
415     parseConditionalCompound(statement->compound(), IgnoreExtraBeforeColon);
416 
417     return statement.release();
418 }
419 
parseTryCatchSequence(Compound & compound)420 void Parser::parseTryCatchSequence(Compound &compound)
421 {
422     // "try" cond-compound catch-compound [catch-compound]*
423     // catch-compound: "catch" name-expr ["," ref-name-expr] cond-compound
424 
425     const duint lineNumber = _statementRange.firstToken().line();
426     std::unique_ptr<TryStatement> tryStat(new TryStatement);
427     parseConditionalCompound(tryStat->compound(), StayAtClosingStatement);
428     compound.add(tryStat.release(), lineNumber);
429 
430     // One catch is required.
431     if (!_statementRange.firstToken().equals(ScriptLex::CATCH))
432     {
433         throw UnexpectedTokenError("Parser::parseTryCatchSequence",
434             "Expected 'catch', but got " + _statementRange.firstToken().asText());
435     }
436     CatchStatement *finalCatch = nullptr;
437     bool expectEnd = false;
438     while (!_statementRange.isEmpty() &&
439            _statementRange.firstToken().equals(ScriptLex::CATCH))
440     {
441         dint colon = _statementRange.find(Token::COLON);
442         expectEnd = (colon < 0);
443 
444         // Parse the arguments.
445         std::unique_ptr<ArrayExpression> args;
446         if (_statementRange.size() > 1)
447         {
448             TokenRange argRange;
449             if (colon < 0)
450             {
451                 argRange = _statementRange.startingFrom(1);
452             }
453             else
454             {
455                 argRange = _statementRange.between(1, colon);
456             }
457             args.reset(parseList(argRange, Token::COMMA,
458                 Expression::ByReference | Expression::LocalOnly | Expression::NewVariable));
459         }
460 
461         std::unique_ptr<CatchStatement> catchStat(new CatchStatement(args.release()));
462         parseConditionalCompound(catchStat->compound(),
463             StayAtClosingStatement | IgnoreExtraBeforeColon);
464 
465         // The final catch will be flagged.
466         finalCatch = catchStat.get();
467 
468         // Add it to the compound.
469         compound.add(catchStat.release(), lineNumber);
470     }
471     if (finalCatch)
472     {
473         finalCatch->flags |= CatchStatement::FinalCompound;
474     }
475     if (expectEnd)
476     {
477         if (!_statementRange.firstToken().equals(ScriptLex::END))
478         {
479             throw UnexpectedTokenError("Parser::parseTryCatchSequence",
480             "Expected 'end', but got " + _statementRange.firstToken().asText());
481         }
482         nextStatement();
483     }
484 }
485 
parsePrintStatement()486 PrintStatement *Parser::parsePrintStatement()
487 {
488     ArrayExpression *args = 0;
489     if (_statementRange.size() == 1) // Just the keyword.
490     {
491         args = new ArrayExpression();
492     }
493     else
494     {
495         // Parse the arguments of the print statement.
496         args = parseList(_statementRange.startingFrom(1));
497     }
498     return new PrintStatement(args);
499 }
500 
parseAssignStatement()501 AssignStatement *Parser::parseAssignStatement()
502 {
503     Expression::Flags flags = Expression::NewVariable | Expression::ByReference | Expression::LocalOnly;
504 
505 #if 0
506     /// "export" will export the newly assigned variable.
507     if (_statementRange.firstToken().equals(ScriptLex::EXPORT))
508     {
509         flags |= Expression::Export;
510         _statementRange = _statementRange.startingFrom(1);
511     }
512 #endif
513 
514     /// "const" makes read-only variables.
515     if (_statementRange.firstToken().equals(ScriptLex::CONST))
516     {
517         flags |= Expression::ReadOnly;
518         _statementRange = _statementRange.startingFrom(1);
519     }
520 
521     dint pos = _statementRange.find(ScriptLex::ASSIGN);
522     if (pos < 0)
523     {
524         flags &= ~Expression::LocalOnly;
525         pos = _statementRange.find(ScriptLex::SCOPE_ASSIGN);
526         if (pos < 0)
527         {
528             // Must be weak assingment, then.
529             pos = _statementRange.find(ScriptLex::WEAK_ASSIGN);
530             flags |= Expression::ThrowawayIfInScope;
531         }
532     }
533 
534     // Has indices been specified?
535     AssignStatement::Indices indices;
536     dint nameEndPos = pos;
537     dint bracketPos = pos - 1;
538     try
539     {
540         while (_statementRange.token(bracketPos).equals(Token::BRACKET_CLOSE))
541         {
542             dint startPos = _statementRange.openingBracket(bracketPos);
543             nameEndPos = startPos;
544             Expression *indexExpr = parseExpression(
545                 _statementRange.between(startPos + 1, bracketPos));
546             indices.push_back(indexExpr);
547             bracketPos = nameEndPos - 1;
548         }
549 
550         if (indices.size() > 0 && (flags & Expression::ThrowawayIfInScope))
551         {
552             throw SyntaxError("Parser::parseAssignStatement",
553                 "Weak assignment cannot be used with indices");
554         }
555 
556         QScopedPointer<Expression> lValue(parseExpression(_statementRange.endingTo(nameEndPos), flags));
557         QScopedPointer<Expression> rValue(parseExpression(_statementRange.startingFrom(pos + 1)));
558 
559         AssignStatement *st = new AssignStatement(lValue.data(), indices, rValue.data());
560 
561         lValue.take();
562         rValue.take();
563 
564         return st;
565     }
566     catch (Error const &)
567     {
568         // Cleanup.
569         for (AssignStatement::Indices::iterator i = indices.begin(); i != indices.end(); ++i)
570         {
571             delete *i;
572         }
573         throw;
574     }
575 }
576 
parseExpressionStatement()577 ExpressionStatement *Parser::parseExpressionStatement()
578 {
579     return new ExpressionStatement(parseExpression(_statementRange));
580 }
581 
parseConditionalCompound(Compound & compound,CompoundFlags const & flags)582 Expression *Parser::parseConditionalCompound(Compound &compound, CompoundFlags const &flags)
583 {
584     // keyword [expr] ":" statement
585     // keyword [expr] "\n" compound
586 
587     TokenRange range = _statementRange;
588 
589     // See if there is a colon on this line.
590     dint colon = range.findBracketless(Token::COLON);
591 
592     QScopedPointer<Expression> condition;
593     if (flags.testFlag(HasCondition))
594     {
595         LOG_AS("parseConditionalCompound");
596         LOGDEV_SCR_XVERBOSE_DEBUGONLY("colon at %i", colon);
597 
598         TokenRange conditionRange = range.between(1, colon);
599         if (conditionRange.isEmpty())
600         {
601             throw MissingTokenError("Parser::parseConditionalCompound",
602                 "A condition expression was expected after " + range.token(0).asText());
603         }
604         condition.reset(parseExpression(conditionRange));
605     }
606     else if (colon > 0 && (colon > 1 && !flags.testFlag(IgnoreExtraBeforeColon)))
607     {
608         throw UnexpectedTokenError("Parser::parseConditionalCompound",
609             range.token(1).asText() + " was unexpected");
610     }
611 
612     if (colon > 0)
613     {
614         if (colon == dint(range.size()) - 1)
615         {
616             // The color is the last token: this is most likely a programmer error.
617             throw MissingTokenError("Parser::parseConditionalCompound",
618                                     "Expected at least one token to follow " +
619                                     range.token(colon).asText());
620         }
621         // There must be a statement continuing on the same line.
622         _statementRange = _statementRange.startingFrom(colon + 1);
623         parseStatement(compound);
624     }
625     else
626     {
627         nextStatement();
628         parseCompound(compound);
629         if (!flags.testFlag(StayAtClosingStatement))
630         {
631             nextStatement();
632         }
633     }
634     return condition.take();
635 }
636 
parseList(TokenRange const & range,QChar const * separator,Expression::Flags const & flags)637 ArrayExpression *Parser::parseList(TokenRange const &range, QChar const *separator,
638                                    Expression::Flags const &flags)
639 {
640     QScopedPointer<ArrayExpression> exp(new ArrayExpression);
641     if (range.size() > 0)
642     {
643         // The arguments are comma-separated.
644         TokenRange delim = range.undefinedRange();
645         while (range.getNextDelimited(separator, delim))
646         {
647             exp->add(parseExpression(delim, flags));
648         }
649     }
650     return exp.take();
651 }
652 
parseExpression(TokenRange const & fullRange,Expression::Flags const & flags)653 Expression *Parser::parseExpression(TokenRange const &fullRange, Expression::Flags const &flags)
654 {
655     TokenRange range = fullRange;
656 
657     LOG_AS("parseExpression");
658     LOGDEV_SCR_XVERBOSE_DEBUGONLY("%s (flags:%x)", range.asText() << flags);
659 
660     if (!range.size())
661     {
662         // Empty expression yields a None value.
663         return ConstantExpression::None();
664     }
665 
666     // We can ignore extra parenthesis around the range.
667     while (range.firstToken().equals(Token::PARENTHESIS_OPEN) && range.closingBracket(0) == range.size() - 1)
668     {
669         range = range.shrink(1);
670     }
671 
672     // Do we have a record declaration in the expression?
673     if (range.firstToken().type() == Token::KEYWORD &&
674        range.firstToken().equals(ScriptLex::RECORD))
675     {
676         LOGDEV_SCR_XVERBOSE_DEBUGONLY("declaration expression: RECORD %s", range.startingFrom(1).asText());
677 
678         if (range.size() == 1)
679         {
680             throw MissingTokenError("Parser::parseDeclarationExpression",
681                                     "Expected identifier to follow " + range.firstToken().asText());
682         }
683         return parseExpression(range.startingFrom(1),
684                                flags | Expression::LocalOnly | Expression::NewSubrecord);
685     }
686 
687     TokenRange leftSide = range.between(0, 0);
688     TokenRange rightSide = leftSide;
689 
690     // Locate the lowest-ranking operator.
691     Operator op = findLowestOperator(range, leftSide, rightSide);
692 
693     if (op == NONE)
694     {
695         // This is a constant or a variable reference.
696         return parseTokenExpression(range, flags);
697     }
698     else if (op == ARRAY)
699     {
700         return parseArrayExpression(range);
701     }
702     else if (op == DICTIONARY)
703     {
704         return parseDictionaryExpression(range);
705     }
706     else if (op == CALL)
707     {
708         return parseCallExpression(leftSide, rightSide);
709     }
710     else
711     {
712         // Left side is empty with unary operators.
713         // The right side inherits the flags of the expression (e.g., name-by-reference).
714         return parseOperatorExpression(op, leftSide, rightSide, flags);
715     }
716 }
717 
parseArrayExpression(TokenRange const & range)718 ArrayExpression *Parser::parseArrayExpression(TokenRange const &range)
719 {
720     if (!range.firstToken().equals(Token::BRACKET_OPEN) ||
721         range.closingBracket(0) != range.size() - 1)
722     {
723         throw MissingTokenError("Parser::parseArrayExpression",
724             "Expected brackets for the array expression beginning at " +
725             range.firstToken().asText());
726     }
727     return parseList(range.shrink(1));
728 }
729 
parseDictionaryExpression(TokenRange const & range)730 DictionaryExpression *Parser::parseDictionaryExpression(TokenRange const &range)
731 {
732     if (!range.firstToken().equals(Token::CURLY_OPEN) ||
733         range.closingBracket(0) != range.size() - 1)
734     {
735         throw MissingTokenError("Parser::parseDictionaryExpression",
736             "Expected brackets for the dictionary expression beginning at " +
737             range.firstToken().asText());
738     }
739     TokenRange shrunk = range.shrink(1);
740 
741     std::unique_ptr<DictionaryExpression> exp(new DictionaryExpression);
742     if (shrunk.size() > 0)
743     {
744         // The arguments are comma-separated.
745         TokenRange delim = shrunk.undefinedRange();
746         while (shrunk.getNextDelimited(Token::COMMA, delim))
747         {
748             dint colonPos = delim.findBracketless(Token::COLON);
749             if (colonPos < 0)
750             {
751                 throw MissingTokenError("Parser::parseDictionaryExpression",
752                     "Colon is missing from '" + delim.asText() + "' at " +
753                     delim.firstToken().asText());
754             }
755 
756             QScopedPointer<Expression> key(parseExpression(delim.endingTo(colonPos)));
757             Expression *value = parseExpression(delim.startingFrom(colonPos + 1));
758             exp->add(key.take(), value);
759         }
760     }
761     return exp.release();
762 }
763 
parseCallExpression(TokenRange const & nameRange,TokenRange const & argumentRange)764 Expression *Parser::parseCallExpression(TokenRange const &nameRange, TokenRange const &argumentRange)
765 {
766     //std::cerr << "call name: " << nameRange.asText() << "\n";
767     //std::cerr << "call args: " << argumentRange.asText() << "\n";
768 
769     if (!argumentRange.firstToken().equals(Token::PARENTHESIS_OPEN) ||
770          argumentRange.closingBracket(0) < argumentRange.size() - 1)
771     {
772         throw SyntaxError("Parser::parseCallExpression",
773             "Call arguments must be enclosed in parenthesis for " +
774             argumentRange.firstToken().asText());
775     }
776 
777     // Parse the arguments, with possible labels included.
778     // The named arguments are evaluated by a dictionary which is always
779     // included as the first expression in the array.
780     QScopedPointer<ArrayExpression> args(new ArrayExpression);
781     DictionaryExpression *namedArgs = new DictionaryExpression;
782     args->add(namedArgs);
783 
784     TokenRange argsRange = argumentRange.shrink(1);
785     if (!argsRange.isEmpty())
786     {
787         // The arguments are comma-separated.
788         TokenRange delim = argsRange.undefinedRange();
789         while (argsRange.getNextDelimited(Token::COMMA, delim))
790         {
791             if (delim.has(ScriptLex::ASSIGN))
792             {
793                 // A label is included.
794                 if (delim.size() < 3 ||
795                     delim.firstToken().type() != Token::IDENTIFIER ||
796                     !delim.token(1).equals(ScriptLex::ASSIGN))
797                 {
798                     throw UnexpectedTokenError("Parser::parseCallExpression",
799                         "Labeled argument '" + delim.asText() + "' is malformed");
800                 }
801                 // Create a dictionary entry for this.
802                 Expression *value = parseExpression(delim.startingFrom(2));
803                 namedArgs->add(new ConstantExpression(new TextValue(delim.firstToken().str())), value);
804             }
805             else
806             {
807                 // Unlabeled argument.
808                 args->add(parseExpression(delim));
809             }
810         }
811     }
812 
813     // Check for some built-in methods, which are usable everywhere.
814     if (nameRange.size() == 1)
815     {
816         BuiltInExpression::Type builtIn = BuiltInExpression::findType(nameRange.firstToken().str());
817         if (builtIn != BuiltInExpression::NONE)
818         {
819             return new BuiltInExpression(builtIn, args.take());
820         }
821     }
822     QScopedPointer<Expression> identifier(parseExpression(nameRange, Expression::ByReference));
823     return new OperatorExpression(CALL, identifier.take(), args.take());
824 }
825 
parseOperatorExpression(Operator op,TokenRange const & leftSide,TokenRange const & rightSide,Expression::Flags const & rightFlags)826 OperatorExpression *Parser::parseOperatorExpression(Operator op, TokenRange const &leftSide,
827     TokenRange const &rightSide, Expression::Flags const &rightFlags)
828 {
829     //std::cerr << "left: " << leftSide.asText() << ", right: " << rightSide.asText() << "\n";
830 
831     if (leftSide.isEmpty())
832     {
833         // Must be unary.
834         QScopedPointer<Expression> operand(parseExpression(rightSide));
835         OperatorExpression *x = new OperatorExpression(op, operand.data());
836         operand.take();
837         return x;
838     }
839     else
840     {
841         Expression::Flags leftOpFlags = (leftOperandByReference(op)?
842                                              Expression::ByReference : Expression::ByValue);
843 
844         Expression::Flags rightOpFlags = rightFlags;
845         if (op == MEMBER)
846         {
847             // Don't create new variables for the left side of the member. The only place
848             // where a new variable is created is on the right.
849             leftOpFlags &= ~Expression::NewVariable;
850         }
851         else
852         {
853             rightOpFlags &= ~Expression::ByReference;
854         }
855 
856         // Binary operation.
857         QScopedPointer<Expression> leftOperand(parseExpression(leftSide, leftOpFlags));
858         QScopedPointer<Expression> rightOperand(op == SLICE? parseList(rightSide, Token::COLON) :
859             parseExpression(rightSide, rightOpFlags));
860         OperatorExpression *x = new OperatorExpression(op, leftOperand.data(), rightOperand.data());
861         x->setFlags(rightFlags); // original flags
862         rightOperand.take();
863         leftOperand.take();
864         return x;
865     }
866 }
867 
parseTokenExpression(TokenRange const & range,Expression::Flags const & flags)868 Expression *Parser::parseTokenExpression(TokenRange const &range, Expression::Flags const &flags)
869 {
870     if (!range.size())
871     {
872         throw MissingTokenError("Parser::parseTokenExpression",
873             "Expected tokens, but got nothing -- near " +
874             range.buffer().at(range.tokenIndex(0)).asText());
875     }
876 
877     Token const &token = range.token(0);
878 
879     if (token.type() == Token::KEYWORD)
880     {
881         if (token.equals(ScriptLex::T_TRUE))
882         {
883             return ConstantExpression::True();
884         }
885         else if (token.equals(ScriptLex::T_FALSE))
886         {
887             return ConstantExpression::False();
888         }
889         else if (token.equals(ScriptLex::NONE))
890         {
891             return ConstantExpression::None();
892         }
893         else if (token.equals(ScriptLex::PI))
894         {
895             return ConstantExpression::Pi();
896         }
897         else if (token.equals(ScriptLex::SCOPE) &&
898                  range.size() == 2 &&
899                  range.token(1).type() == Token::IDENTIFIER)
900         {
901             // Explicit local scope.
902             return new NameExpression(StringList{NameExpression::LOCAL_SCOPE, range.token(1).str()},
903                                       flags);
904         }
905     }
906 
907     switch (token.type())
908     {
909     case Token::IDENTIFIER:
910         if (range.size() == 1)
911         {
912             return new NameExpression(range.token(0).str(), flags);
913         }
914         else if (range.size() >= 3 &&
915                  range.token(1).equals(ScriptLex::SCOPE) &&
916                  range.token(2).type() == Token::IDENTIFIER)
917         {
918             StringList identifierSequence;
919             identifierSequence << range.token(0).str()
920                                << range.token(2).str();
921             for (duint i = 3; i < range.size() - 1; i += 2)
922             {
923                 if (range.token(i).equals(ScriptLex::SCOPE) &&
924                     range.token(i + 1).type() == Token::IDENTIFIER)
925                 {
926                     identifierSequence << range.token(i + 1).str();
927                 }
928                 else
929                 {
930                     throw UnexpectedTokenError("Parser::parseTokenExpression",
931                                                "Unexpected tokens: " + range.token(i).asText() +
932                                                    " followed by " + range.token(i + 1).asText());
933                 }
934             }
935             // Scoped name. This is intended for allowing access to shadowed
936             // identifiers from super records.
937             return new NameExpression(identifierSequence, flags);
938         }
939         else
940         {
941             throw UnexpectedTokenError("Parser::parseTokenExpression",
942                 "Unexpected token " + range.token(1).asText());
943         }
944 
945     case Token::LITERAL_STRING_APOSTROPHE:
946     case Token::LITERAL_STRING_QUOTED:
947     case Token::LITERAL_STRING_LONG:
948         return new ConstantExpression(new TextValue(token.unescapeStringLiteral()));
949 
950     case Token::LITERAL_NUMBER:
951         return new ConstantExpression(new NumberValue(token.toNumber()));
952 
953     default:
954         throw UnexpectedTokenError("Parser::parseTokenExpression",
955             "Unexpected " + token.asText() + " which was identified as " +
956             Token::typeToText(token.type()));
957     }
958 }
959 
findLowestOperator(TokenRange const & range,TokenRange & leftSide,TokenRange & rightSide)960 Operator Parser::findLowestOperator(TokenRange const &range, TokenRange &leftSide, TokenRange &rightSide)
961 {
962     enum {
963         MAX_RANK         = 0x7fffffff,
964         RANK_MEMBER      = 23,
965         RANK_CALL        = 24,
966         RANK_INDEX       = 24,
967         RANK_SLICE       = 24,
968         RANK_DOT         = 25,
969         RANK_ARRAY       = MAX_RANK - 1,
970         RANK_DICTIONARY  = RANK_ARRAY,
971         RANK_PARENTHESIS = MAX_RANK - 1
972     };
973     enum Direction {
974         LEFT_TO_RIGHT,
975         RIGHT_TO_LEFT
976     };
977 
978     Operator previousOp = NONE;
979     Operator lowestOp = NONE;
980     int lowestRank = MAX_RANK;
981 
982     for (duint i = 0, continueFrom = 0; i < range.size(); i = continueFrom)
983     {
984         continueFrom = i + 1;
985 
986         int rank = MAX_RANK;
987         Operator op = NONE;
988         Direction direction = LEFT_TO_RIGHT;
989 
990         Token const &token = range.token(i);
991 
992         if (token.equals(Token::PARENTHESIS_OPEN))
993         {
994             continueFrom = range.closingBracket(i) + 1;
995             if ((previousOp == NONE || previousOp == INDEX || previousOp == SLICE ||
996                 previousOp == PARENTHESIS || previousOp == CALL) && i > 0)
997             {
998                 // The previous token was not an operator, but there
999                 // was something before this one. It must be a function
1000                 // call.
1001                 op = CALL;
1002                 rank = RANK_CALL;
1003             }
1004             else
1005             {
1006                 // It's parenthesis. Skip past it.
1007                 op = PARENTHESIS;
1008                 rank = RANK_PARENTHESIS;
1009             }
1010         }
1011         else if (token.equals(Token::BRACKET_OPEN))
1012         {
1013             continueFrom = range.closingBracket(i) + 1;
1014             if ((previousOp == NONE || previousOp == PARENTHESIS ||
1015                 previousOp == INDEX || previousOp == SLICE || previousOp == CALL) && i > 0)
1016             {
1017                 if (range.between(i + 1, continueFrom - 1).has(Token::COLON))
1018                 {
1019                     op = SLICE;
1020                     rank = RANK_SLICE;
1021                 }
1022                 else
1023                 {
1024                     op = INDEX;
1025                     rank = RANK_INDEX;
1026                 }
1027             }
1028             else
1029             {
1030                 op = ARRAY;
1031                 rank = RANK_ARRAY;
1032             }
1033         }
1034         else if (token.equals(Token::CURLY_OPEN))
1035         {
1036             continueFrom = range.closingBracket(i) + 1;
1037             op = DICTIONARY;
1038             rank = RANK_DICTIONARY;
1039         }
1040         else
1041         {
1042             const struct {
1043                 char const *token;
1044                 Operator op;
1045                 int rank;
1046                 Direction direction;
1047             } rankings[] = { // Ascending order.
1048                 { "+=",     PLUS_ASSIGN,        0,          RIGHT_TO_LEFT },
1049                 { "-=",     MINUS_ASSIGN,       0,          RIGHT_TO_LEFT },
1050                 { "*=",     MULTIPLY_ASSIGN,    0,          RIGHT_TO_LEFT },
1051                 { "/=",     DIVIDE_ASSIGN,      0,          RIGHT_TO_LEFT },
1052                 { "%=",     MODULO_ASSIGN,      0,          RIGHT_TO_LEFT },
1053                 { "or",     OR,                 1,          LEFT_TO_RIGHT },
1054                 { "and",    AND,                2,          LEFT_TO_RIGHT },
1055                 { "not",    NOT,                3,          RIGHT_TO_LEFT },
1056                 { "in",     IN,                 4,          LEFT_TO_RIGHT },
1057                 { "|",      BITWISE_OR,         5,          LEFT_TO_RIGHT },
1058                 { "^",      BITWISE_XOR,        6,          LEFT_TO_RIGHT },
1059                 { "&",      BITWISE_AND,        7,          LEFT_TO_RIGHT },
1060                 { "==",     EQUAL,              8,          LEFT_TO_RIGHT },
1061                 { "!=",     NOT_EQUAL,          8,          LEFT_TO_RIGHT },
1062                 { "<",      LESS,               9,          LEFT_TO_RIGHT },
1063                 { ">",      GREATER,            9,          LEFT_TO_RIGHT },
1064                 { "<=",     LEQUAL,             9,          LEFT_TO_RIGHT },
1065                 { ">=",     GEQUAL,             9,          LEFT_TO_RIGHT },
1066                 { "+",      PLUS,               12,         LEFT_TO_RIGHT },
1067                 { "-",      MINUS,              12,         LEFT_TO_RIGHT },
1068                 { "*",      MULTIPLY,           13,         LEFT_TO_RIGHT },
1069                 { "/",      DIVIDE,             13,         LEFT_TO_RIGHT },
1070                 { "%",      MODULO,             13,         LEFT_TO_RIGHT },
1071                 { "~",      BITWISE_NOT,        14,         LEFT_TO_RIGHT },
1072                 { ".",      DOT,                RANK_DOT,   LEFT_TO_RIGHT },
1073                 { 0,        NONE,               MAX_RANK,   LEFT_TO_RIGHT }
1074             };
1075 
1076             // Operator precedence:
1077             // .
1078             // function call
1079             // member
1080             // []
1081             // ()
1082             // * / %
1083             // + -
1084             // & | ^
1085             // << >>
1086             // < > <= >=
1087             // == !=
1088             // in
1089             // not
1090             // and
1091             // or
1092 
1093             // Check the rankings table.
1094             for (int k = 0; rankings[k].token; ++k)
1095             {
1096                 if (token.equals(String(rankings[k].token)))
1097                 {
1098                     op = rankings[k].op;
1099                     rank = rankings[k].rank;
1100                     direction = rankings[k].direction;
1101 
1102                     if (op == DOT) // && previousOp == NONE)
1103                     {
1104                         op = MEMBER;
1105                         rank = RANK_MEMBER;
1106                         direction = LEFT_TO_RIGHT;
1107                     }
1108                     else if (!i || (previousOp != NONE && previousOp != PARENTHESIS &&
1109                         previousOp != CALL && previousOp != INDEX && previousOp != SLICE &&
1110                         previousOp != ARRAY && previousOp != DICTIONARY))
1111                     {
1112                         // There already was an operator before this one.
1113                         // Must be unary?
1114                         if (op == PLUS || op == MINUS)
1115                         {
1116                             rank += 100;
1117                             //std::cerr << "Rank boost for " << rankings[k].token << "\n";
1118                         }
1119                     }
1120                     break;
1121                 }
1122             }
1123         }
1124 
1125         if (op != NONE &&
1126             ((direction == LEFT_TO_RIGHT && rank <= lowestRank) ||
1127              (direction == RIGHT_TO_LEFT && rank < lowestRank)))
1128         {
1129             lowestOp = op;
1130             lowestRank = rank;
1131             leftSide = range.endingTo(i);
1132             if (op == INDEX || op == SLICE)
1133             {
1134                 rightSide = range.between(i + 1, continueFrom - 1);
1135             }
1136             else
1137             {
1138                 rightSide = range.startingFrom(op == CALL || op == ARRAY ||
1139                     op == DICTIONARY? i : i + 1);
1140             }
1141         }
1142 
1143         previousOp = op;
1144     }
1145 
1146     return lowestOp;
1147 }
1148