1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/parser.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14 
15 const char kGrammar_Help[] =
16     R"*(Language and grammar for GN build files
17 
18 Tokens
19 
20   GN build files are read as sequences of tokens.  While splitting the file
21   into tokens, the next token is the longest sequence of characters that form a
22   valid token.
23 
24 White space and comments
25 
26   White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27   carriage returns (U+000D), and newlines (U+000A).
28 
29   Comments start at the character "#" and stop at the next newline.
30 
31   White space and comments are ignored except that they may separate tokens
32   that would otherwise combine into a single token.
33 
34 Identifiers
35 
36   Identifiers name variables and functions.
37 
38       identifier = letter { letter | digit } .
39       letter     = "A" ... "Z" | "a" ... "z" | "_" .
40       digit      = "0" ... "9" .
41 
42 Keywords
43 
44   The following keywords are reserved and may not be used as identifiers:
45 
46           else    false   if      true
47 
48 Integer literals
49 
50   An integer literal represents a decimal integer value.
51 
52       integer = [ "-" ] digit { digit } .
53 
54   Leading zeros and negative zero are disallowed.
55 
56 String literals
57 
58   A string literal represents a string value consisting of the quoted
59   characters with possible escape sequences and variable expansions.
60 
61       string           = `"` { char | escape | expansion } `"` .
62       escape           = `\` ( "$" | `"` | char ) .
63       BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" .
64       Hex              = "0x" [0-9A-Fa-f][0-9A-Fa-f]
65       expansion        = "$" ( identifier | BracketExpansion | Hex ) .
66       char             = /* any character except "$", `"`, or newline */ .
67 
68   After a backslash, certain sequences represent special characters:
69 
70           \"    U+0022    quotation mark
71           \$    U+0024    dollar sign
72           \\    U+005C    backslash
73 
74   All other backslashes represent themselves.
75 
76   To insert an arbitrary byte value, use $0xFF. For example, to insert a
77   newline character: "Line one$0x0ALine two".
78 
79   An expansion will evaluate the variable following the '$' and insert a
80   stringified version of it into the result. For example, to concat two path
81   components with a slash separating them:
82     "$var_one/$var_two"
83   Use the "${var_one}" format to be explicitly deliniate the variable for
84   otherwise-ambiguous cases.
85 
86 Punctuation
87 
88   The following character sequences represent punctuation:
89 
90           +       +=      ==      !=      (       )
91           -       -=      <       <=      [       ]
92           !       =       >       >=      {       }
93                           &&      ||      .       ,
94 
95 Grammar
96 
97   The input tokens form a syntax tree following a context-free grammar:
98 
99       File = StatementList .
100 
101       Statement     = Assignment | Call | Condition .
102       LValue        = identifier | ArrayAccess | ScopeAccess .
103       Assignment    = LValue AssignOp Expr .
104       Call          = identifier "(" [ ExprList ] ")" [ Block ] .
105       Condition     = "if" "(" Expr ")" Block
106                       [ "else" ( Condition | Block ) ] .
107       Block         = "{" StatementList "}" .
108       StatementList = { Statement } .
109 
110       ArrayAccess = identifier "[" Expr "]" .
111       ScopeAccess = identifier "." identifier .
112       Expr        = UnaryExpr | Expr BinaryOp Expr .
113       UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .
114       PrimaryExpr = identifier | integer | string | Call
115                   | ArrayAccess | ScopeAccess | Block
116                   | "(" Expr ")"
117                   | "[" [ ExprList [ "," ] ] "]" .
118       ExprList    = Expr { "," Expr } .
119 
120       AssignOp = "=" | "+=" | "-=" .
121       UnaryOp  = "!" .
122       BinaryOp = "+" | "-"                  // highest priority
123                | "<" | "<=" | ">" | ">="
124                | "==" | "!="
125                | "&&"
126                | "||" .                     // lowest priority
127 
128   All binary operators are left-associative.
129 
130 Types
131 
132   The GN language is dynamically typed. The following types are used:
133 
134    - Boolean: Uses the keywords "true" and "false". There is no implicit
135      conversion between booleans and integers.
136 
137    - Integers: All numbers in GN are signed 64-bit integers.
138 
139    - Strings: Strings are 8-bit with no enforced encoding. When a string is
140      used to interact with other systems with particular encodings (like the
141      Windows and Mac filesystems) it is assumed to be UTF-8. See "String
142      literals" above for more.
143 
144    - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
145      below for more.
146 
147    - Scopes: Scopes are like dictionaries that use variable names for keys. See
148      "Scopes" below for more.
149 
150 Lists
151 
152   Lists are created with [] and using commas to separate items:
153 
154        mylist = [ 0, 1, 2, "some string" ]
155 
156   A comma after the last item is optional. Lists are dereferenced using 0-based
157   indexing:
158 
159        mylist[0] += 1
160        var = mylist[2]
161 
162   Lists can be concatenated using the '+' and '+=' operators. Bare values can
163   not be concatenated with lists, to add a single item, it must be put into a
164   list of length one.
165 
166   Items can be removed from lists using the '-' and '-=' operators. This will
167   remove all occurrences of every item in the right-hand list from the
168   left-hand list. It is an error to remove an item not in the list. This is to
169   prevent common typos and to detect dead code that is removing things that no
170   longer apply.
171 
172   It is an error to use '=' to replace a nonempty list with another nonempty
173   list. This is to prevent accidentally overwriting data when in most cases
174   '+=' was intended. To overwrite a list on purpose, first assign it to the
175   empty list:
176 
177     mylist = []
178     mylist = otherlist
179 
180 Scopes
181 
182   All execution happens in the context of a scope which holds the current state
183   (like variables). With the exception of loops and conditions, '{' introduces
184   a new scope that has a parent reference to the old scope.
185 
186   Variable reads recursively search all nested scopes until the variable is
187   found or there are no more scopes. Variable writes always go into the current
188   scope. This means that after the closing '}' (again excepting loops and
189   conditions), all local variables will be restored to the previous values.
190   This also means that "foo = foo" can do useful work by copying a variable
191   into the current scope that was defined in a containing scope.
192 
193   Scopes can also be assigned to variables. Such scopes can be created by
194   functions like exec_script, when invoking a template (the template code
195   refers to the variables set by the invoking code by the implicitly-created
196   "invoker" scope), or explicitly like:
197 
198     empty_scope = {}
199     myvalues = {
200       foo = 21
201       bar = "something"
202     }
203 
204   Inside such a scope definition can be any GN code including conditionals and
205   function calls. After the close of the scope, it will contain all variables
206   explicitly set by the code contained inside it. After this, the values can be
207   read, modified, or added to:
208 
209     myvalues.foo += 2
210     empty_scope.new_thing = [ 1, 2, 3 ]
211 
212   Scope equality is defined as single-level scopes identical within the current
213   scope. That is, all values in the first scope must be present and identical
214   within the second, and vice versa. Note that this means inherited scopes are
215   always unequal by definition.
216 )*";
217 
218 // Precedence constants.
219 //
220 // Currently all operators are left-associative so this list is sequential. To
221 // implement a right-assocative operators in a Pratt parser we would leave gaps
222 // in between the constants, and right-associative operators get a precedence
223 // of "<left-associated-precedence> - 1".
224 enum Precedence {
225   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
226   PRECEDENCE_OR = 2,
227   PRECEDENCE_AND = 3,
228   PRECEDENCE_EQUALITY = 4,
229   PRECEDENCE_RELATION = 5,
230   PRECEDENCE_SUM = 6,
231   PRECEDENCE_PREFIX = 7,
232   PRECEDENCE_CALL = 8,
233   PRECEDENCE_DOT = 9,  // Highest precedence.
234 };
235 
236 // The top-level for blocks/ifs is recursive descent, the expression parser is
237 // a Pratt parser. The basic idea there is to have the precedences (and
238 // associativities) encoded relative to each other and only parse up until you
239 // hit something of that precedence. There's a dispatch table in expressions_
240 // at the top of parser.cc that describes how each token dispatches if it's
241 // seen as either a prefix or infix operator, and if it's infix, what its
242 // precedence is.
243 //
244 // References:
245 // http://javascript.crockford.com/tdop/tdop.html
246 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
247 
248 // Indexed by Token::Type.
249 ParserHelper Parser::expressions_[] = {
250     {nullptr, nullptr, -1},                                   // INVALID
251     {&Parser::Literal, nullptr, -1},                          // INTEGER
252     {&Parser::Literal, nullptr, -1},                          // STRING
253     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
254     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
255     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
256     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
257     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
258     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
259     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
260     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
261     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
262     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
263     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
264     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
265     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
266     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
267     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
268     {&Parser::Not, nullptr, -1},                              // BANG
269     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
270     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
271     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
272     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
273     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
274     {&Parser::Block, nullptr, -1},                            // LEFT_BRACE
275     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
276     {nullptr, nullptr, -1},                                   // IF
277     {nullptr, nullptr, -1},                                   // ELSE
278     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
279     {nullptr, nullptr, -1},                                       // COMMA
280     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
281     {nullptr, nullptr, -1},                // LINE_COMMENT
282     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
283     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
284 };
285 
Parser(const std::vector<Token> & tokens,Err * err)286 Parser::Parser(const std::vector<Token>& tokens, Err* err)
287     : invalid_token_(Location(), Token::INVALID, std::string_view()),
288       err_(err),
289       cur_(0) {
290   for (const auto& token : tokens) {
291     switch (token.type()) {
292       case Token::LINE_COMMENT:
293         line_comment_tokens_.push_back(token);
294         break;
295       case Token::SUFFIX_COMMENT:
296         suffix_comment_tokens_.push_back(token);
297         break;
298       default:
299         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
300         // through the real parser.
301         tokens_.push_back(token);
302         break;
303     }
304   }
305 }
306 
307 Parser::~Parser() = default;
308 
309 // static
Parse(const std::vector<Token> & tokens,Err * err)310 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
311                                          Err* err) {
312   Parser p(tokens, err);
313   return p.ParseFile();
314 }
315 
316 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)317 std::unique_ptr<ParseNode> Parser::ParseExpression(
318     const std::vector<Token>& tokens,
319     Err* err) {
320   Parser p(tokens, err);
321   std::unique_ptr<ParseNode> expr = p.ParseExpression();
322   if (!p.at_end() && !err->has_error()) {
323     *err = Err(p.cur_token(), "Trailing garbage");
324     return nullptr;
325   }
326   return expr;
327 }
328 
329 // static
ParseValue(const std::vector<Token> & tokens,Err * err)330 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
331                                               Err* err) {
332   for (const Token& token : tokens) {
333     switch (token.type()) {
334       case Token::INTEGER:
335       case Token::STRING:
336       case Token::TRUE_TOKEN:
337       case Token::FALSE_TOKEN:
338       case Token::LEFT_BRACKET:
339       case Token::RIGHT_BRACKET:
340       case Token::COMMA:
341         continue;
342       default:
343         *err = Err(token, "Invalid token in literal value");
344         return nullptr;
345     }
346   }
347 
348   return ParseExpression(tokens, err);
349 }
350 
IsAssignment(const ParseNode * node) const351 bool Parser::IsAssignment(const ParseNode* node) const {
352   return node && node->AsBinaryOp() &&
353          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
354           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
355           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
356 }
357 
IsStatementBreak(Token::Type token_type) const358 bool Parser::IsStatementBreak(Token::Type token_type) const {
359   switch (token_type) {
360     case Token::IDENTIFIER:
361     case Token::LEFT_BRACE:
362     case Token::RIGHT_BRACE:
363     case Token::IF:
364     case Token::ELSE:
365       return true;
366     default:
367       return false;
368   }
369 }
370 
LookAhead(Token::Type type)371 bool Parser::LookAhead(Token::Type type) {
372   if (at_end())
373     return false;
374   return cur_token().type() == type;
375 }
376 
Match(Token::Type type)377 bool Parser::Match(Token::Type type) {
378   if (!LookAhead(type))
379     return false;
380   Consume();
381   return true;
382 }
383 
Consume(Token::Type type,const char * error_message)384 const Token& Parser::Consume(Token::Type type, const char* error_message) {
385   Token::Type types[1] = {type};
386   return Consume(types, 1, error_message);
387 }
388 
Consume(Token::Type * types,size_t num_types,const char * error_message)389 const Token& Parser::Consume(Token::Type* types,
390                              size_t num_types,
391                              const char* error_message) {
392   if (has_error()) {
393     // Don't overwrite current error, but make progress through tokens so that
394     // a loop that's expecting a particular token will still terminate.
395     if (!at_end())
396       cur_++;
397     return invalid_token_;
398   }
399   if (at_end()) {
400     const char kEOFMsg[] = "I hit EOF instead.";
401     if (tokens_.empty())
402       *err_ = Err(Location(), error_message, kEOFMsg);
403     else
404       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
405     return invalid_token_;
406   }
407 
408   for (size_t i = 0; i < num_types; ++i) {
409     if (cur_token().type() == types[i])
410       return Consume();
411   }
412   *err_ = Err(cur_token(), error_message);
413   return invalid_token_;
414 }
415 
Consume()416 const Token& Parser::Consume() {
417   return tokens_[cur_++];
418 }
419 
ParseExpression()420 std::unique_ptr<ParseNode> Parser::ParseExpression() {
421   return ParseExpression(0);
422 }
423 
ParseExpression(int precedence)424 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
425   if (at_end())
426     return std::unique_ptr<ParseNode>();
427 
428   const Token& token = Consume();
429   PrefixFunc prefix = expressions_[token.type()].prefix;
430 
431   if (prefix == nullptr) {
432     *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
433     return std::unique_ptr<ParseNode>();
434   }
435 
436   std::unique_ptr<ParseNode> left = (this->*prefix)(token);
437   if (has_error())
438     return left;
439 
440   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
441          precedence <= expressions_[cur_token().type()].precedence) {
442     const Token& next_token = Consume();
443     InfixFunc infix = expressions_[next_token.type()].infix;
444     if (infix == nullptr) {
445       *err_ = Err(next_token,
446                   "Unexpected token '" + std::string(next_token.value()) + "'");
447       return std::unique_ptr<ParseNode>();
448     }
449     left = (this->*infix)(std::move(left), next_token);
450     if (has_error())
451       return std::unique_ptr<ParseNode>();
452   }
453 
454   return left;
455 }
456 
Block(const Token & token)457 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
458   // This entrypoint into ParseBlock means it's part of an expression and we
459   // always want the result.
460   return ParseBlock(token, BlockNode::RETURNS_SCOPE);
461 }
462 
Literal(const Token & token)463 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
464   return std::make_unique<LiteralNode>(token);
465 }
466 
Name(const Token & token)467 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
468   return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
469 }
470 
BlockComment(const Token & token)471 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
472   std::unique_ptr<BlockCommentNode> comment =
473       std::make_unique<BlockCommentNode>();
474   comment->set_comment(token);
475   return std::move(comment);
476 }
477 
Group(const Token & token)478 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
479   std::unique_ptr<ParseNode> expr = ParseExpression();
480   if (has_error())
481     return std::unique_ptr<ParseNode>();
482   Consume(Token::RIGHT_PAREN, "Expected ')'");
483   return expr;
484 }
485 
Not(const Token & token)486 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
487   std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
488   if (has_error())
489     return std::unique_ptr<ParseNode>();
490   if (!expr) {
491     if (!has_error())
492       *err_ = Err(token, "Expected right-hand side for '!'.");
493     return std::unique_ptr<ParseNode>();
494   }
495   std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
496   unary_op->set_op(token);
497   unary_op->set_operand(std::move(expr));
498   return std::move(unary_op);
499 }
500 
List(const Token & node)501 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
502   std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
503   if (!has_error() && !at_end())
504     Consume(Token::RIGHT_BRACKET, "Expected ']'");
505   return list;
506 }
507 
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)508 std::unique_ptr<ParseNode> Parser::BinaryOperator(
509     std::unique_ptr<ParseNode> left,
510     const Token& token) {
511   std::unique_ptr<ParseNode> right =
512       ParseExpression(expressions_[token.type()].precedence + 1);
513   if (!right) {
514     if (!has_error()) {
515       *err_ = Err(token, "Expected right-hand side for '" +
516                              std::string(token.value()) + "'");
517     }
518     return std::unique_ptr<ParseNode>();
519   }
520   std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
521   binary_op->set_op(token);
522   binary_op->set_left(std::move(left));
523   binary_op->set_right(std::move(right));
524   return std::move(binary_op);
525 }
526 
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)527 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
528     std::unique_ptr<ParseNode> left,
529     const Token& token) {
530   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
531   list->set_begin_token(token);
532   list->set_end(std::make_unique<EndNode>(token));
533   std::unique_ptr<BlockNode> block;
534   bool has_arg = false;
535   if (LookAhead(Token::LEFT_PAREN)) {
536     const Token& start_token = Consume();
537     // Parsing a function call.
538     has_arg = true;
539     if (Match(Token::RIGHT_PAREN)) {
540       // Nothing, just an empty call.
541     } else {
542       list = ParseList(start_token, Token::RIGHT_PAREN, false);
543       if (has_error())
544         return std::unique_ptr<ParseNode>();
545       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
546     }
547     // Optionally with a scope.
548     if (LookAhead(Token::LEFT_BRACE)) {
549       block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
550       if (has_error())
551         return std::unique_ptr<ParseNode>();
552     }
553   }
554 
555   if (!left && !has_arg) {
556     // Not a function call, just a standalone identifier.
557     return std::make_unique<IdentifierNode>(token);
558   }
559   std::unique_ptr<FunctionCallNode> func_call =
560       std::make_unique<FunctionCallNode>();
561   func_call->set_function(token);
562   func_call->set_args(std::move(list));
563   if (block)
564     func_call->set_block(std::move(block));
565   return std::move(func_call);
566 }
567 
Assignment(std::unique_ptr<ParseNode> left,const Token & token)568 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
569                                               const Token& token) {
570   if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
571     *err_ = Err(left.get(),
572                 "The left-hand side of an assignment must be an identifier, "
573                 "scope access, or array access.");
574     return std::unique_ptr<ParseNode>();
575   }
576   std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
577   if (!value) {
578     if (!has_error())
579       *err_ = Err(token, "Expected right-hand side for assignment.");
580     return std::unique_ptr<ParseNode>();
581   }
582   std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
583   assign->set_op(token);
584   assign->set_left(std::move(left));
585   assign->set_right(std::move(value));
586   return std::move(assign);
587 }
588 
Subscript(std::unique_ptr<ParseNode> left,const Token & token)589 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
590                                              const Token& token) {
591   // TODO: Maybe support more complex expressions like a[0][0]. This would
592   // require work on the evaluator too.
593   if (left->AsIdentifier() == nullptr) {
594     *err_ = Err(
595         left.get(), "May only subscript identifiers.",
596         "The thing on the left hand side of the [] must be an identifier\n"
597         "and not an expression. If you need this, you'll have to assign the\n"
598         "value to a temporary before subscripting. Sorry.");
599     return std::unique_ptr<ParseNode>();
600   }
601   std::unique_ptr<ParseNode> value = ParseExpression();
602   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
603   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
604   accessor->set_base(left->AsIdentifier()->value());
605   accessor->set_subscript(std::move(value));
606   return std::move(accessor);
607 }
608 
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)609 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
610                                                const Token& token) {
611   if (left->AsIdentifier() == nullptr) {
612     *err_ = Err(
613         left.get(), "May only use \".\" for identifiers.",
614         "The thing on the left hand side of the dot must be an identifier\n"
615         "and not an expression. If you need this, you'll have to assign the\n"
616         "value to a temporary first. Sorry.");
617     return std::unique_ptr<ParseNode>();
618   }
619 
620   std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
621   if (!right || !right->AsIdentifier()) {
622     *err_ = Err(
623         token, "Expected identifier for right-hand-side of \".\"",
624         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
625     return std::unique_ptr<ParseNode>();
626   }
627 
628   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
629   accessor->set_base(left->AsIdentifier()->value());
630   accessor->set_member(std::unique_ptr<IdentifierNode>(
631       static_cast<IdentifierNode*>(right.release())));
632   return std::move(accessor);
633 }
634 
635 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)636 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
637                                             Token::Type stop_before,
638                                             bool allow_trailing_comma) {
639   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
640   list->set_begin_token(start_token);
641   bool just_got_comma = false;
642   bool first_time = true;
643   while (!LookAhead(stop_before)) {
644     if (!first_time) {
645       if (!just_got_comma) {
646         // Require commas separate things in lists.
647         *err_ = Err(cur_token(), "Expected comma between items.");
648         return std::unique_ptr<ListNode>();
649       }
650     }
651     first_time = false;
652 
653     // Why _OR? We're parsing things that are higher precedence than the ,
654     // that separates the items of the list. , should appear lower than
655     // boolean expressions (the lowest of which is OR), but above assignments.
656     list->append_item(ParseExpression(PRECEDENCE_OR));
657     if (has_error())
658       return std::unique_ptr<ListNode>();
659     if (at_end()) {
660       *err_ =
661           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
662       return std::unique_ptr<ListNode>();
663     }
664     if (list->contents().back()->AsBlockComment()) {
665       // If there was a comment inside the list, we don't need a comma to the
666       // next item, so pretend we got one, if we're expecting one.
667       just_got_comma = allow_trailing_comma;
668     } else {
669       just_got_comma = Match(Token::COMMA);
670     }
671   }
672   if (just_got_comma && !allow_trailing_comma) {
673     *err_ = Err(cur_token(), "Trailing comma");
674     return std::unique_ptr<ListNode>();
675   }
676   list->set_end(std::make_unique<EndNode>(cur_token()));
677   return list;
678 }
679 
ParseFile()680 std::unique_ptr<ParseNode> Parser::ParseFile() {
681   std::unique_ptr<BlockNode> file =
682       std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
683   for (;;) {
684     if (at_end())
685       break;
686     std::unique_ptr<ParseNode> statement = ParseStatement();
687     if (!statement)
688       break;
689     file->append_statement(std::move(statement));
690   }
691   if (!at_end() && !has_error())
692     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
693   if (has_error())
694     return std::unique_ptr<ParseNode>();
695 
696   // TODO(scottmg): If this is measurably expensive, it could be done only
697   // when necessary (when reformatting, or during tests). Comments are
698   // separate from the parse tree at this point, so downstream code can remain
699   // ignorant of them.
700   AssignComments(file.get());
701 
702   return std::move(file);
703 }
704 
ParseStatement()705 std::unique_ptr<ParseNode> Parser::ParseStatement() {
706   if (LookAhead(Token::IF)) {
707     return ParseCondition();
708   } else if (LookAhead(Token::BLOCK_COMMENT)) {
709     return BlockComment(Consume());
710   } else {
711     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
712     // to allow "pointless" expressions and return ParseExpression() directly.
713     std::unique_ptr<ParseNode> stmt = ParseExpression();
714     if (stmt) {
715       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
716         return stmt;
717     }
718     if (!has_error()) {
719       const Token& token = cur_or_last_token();
720       *err_ = Err(token, "Expecting assignment or function call.");
721     }
722     return std::unique_ptr<ParseNode>();
723   }
724 }
725 
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)726 std::unique_ptr<BlockNode> Parser::ParseBlock(
727     const Token& begin_brace,
728     BlockNode::ResultMode result_mode) {
729   if (has_error())
730     return std::unique_ptr<BlockNode>();
731   std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
732   block->set_begin_token(begin_brace);
733 
734   for (;;) {
735     if (LookAhead(Token::RIGHT_BRACE)) {
736       block->set_end(std::make_unique<EndNode>(Consume()));
737       break;
738     }
739 
740     std::unique_ptr<ParseNode> statement = ParseStatement();
741     if (!statement)
742       return std::unique_ptr<BlockNode>();
743     block->append_statement(std::move(statement));
744   }
745   return block;
746 }
747 
ParseCondition()748 std::unique_ptr<ParseNode> Parser::ParseCondition() {
749   std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
750   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
751   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
752   condition->set_condition(ParseExpression());
753   if (IsAssignment(condition->condition()))
754     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
755   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
756   condition->set_if_true(ParseBlock(
757       Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
758       BlockNode::DISCARDS_RESULT));
759   if (Match(Token::ELSE)) {
760     if (LookAhead(Token::LEFT_BRACE)) {
761       condition->set_if_false(
762           ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
763     } else if (LookAhead(Token::IF)) {
764       condition->set_if_false(ParseStatement());
765     } else {
766       *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
767       return std::unique_ptr<ParseNode>();
768     }
769   }
770   if (has_error())
771     return std::unique_ptr<ParseNode>();
772   return std::move(condition);
773 }
774 
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)775 void Parser::TraverseOrder(const ParseNode* root,
776                            std::vector<const ParseNode*>* pre,
777                            std::vector<const ParseNode*>* post) {
778   if (root) {
779     pre->push_back(root);
780 
781     if (const AccessorNode* accessor = root->AsAccessor()) {
782       TraverseOrder(accessor->subscript(), pre, post);
783       TraverseOrder(accessor->member(), pre, post);
784     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
785       TraverseOrder(binop->left(), pre, post);
786       TraverseOrder(binop->right(), pre, post);
787     } else if (const BlockNode* block = root->AsBlock()) {
788       for (const auto& statement : block->statements())
789         TraverseOrder(statement.get(), pre, post);
790       TraverseOrder(block->End(), pre, post);
791     } else if (const ConditionNode* condition = root->AsCondition()) {
792       TraverseOrder(condition->condition(), pre, post);
793       TraverseOrder(condition->if_true(), pre, post);
794       TraverseOrder(condition->if_false(), pre, post);
795     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
796       TraverseOrder(func_call->args(), pre, post);
797       TraverseOrder(func_call->block(), pre, post);
798     } else if (root->AsIdentifier()) {
799       // Nothing.
800     } else if (const ListNode* list = root->AsList()) {
801       for (const auto& node : list->contents())
802         TraverseOrder(node.get(), pre, post);
803       TraverseOrder(list->End(), pre, post);
804     } else if (root->AsLiteral()) {
805       // Nothing.
806     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
807       TraverseOrder(unaryop->operand(), pre, post);
808     } else if (root->AsBlockComment()) {
809       // Nothing.
810     } else if (root->AsEnd()) {
811       // Nothing.
812     } else {
813       CHECK(false) << "Unhandled case in TraverseOrder.";
814     }
815 
816     post->push_back(root);
817   }
818 }
819 
AssignComments(ParseNode * file)820 void Parser::AssignComments(ParseNode* file) {
821   // Start by generating a pre- and post- order traversal of the tree so we
822   // can determine what's before and after comments.
823   std::vector<const ParseNode*> pre;
824   std::vector<const ParseNode*> post;
825   TraverseOrder(file, &pre, &post);
826 
827   // Assign line comments to syntax immediately following.
828   int cur_comment = 0;
829   for (auto* node : pre) {
830     if (node->GetRange().is_null()) {
831       CHECK_EQ(node, file) << "Only expected on top file node";
832       continue;
833     }
834     const Location start = node->GetRange().begin();
835     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
836       if (start >= line_comment_tokens_[cur_comment].location()) {
837         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
838             line_comment_tokens_[cur_comment]);
839         ++cur_comment;
840       } else {
841         break;
842       }
843     }
844   }
845 
846   // Remaining line comments go at end of file.
847   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
848        ++cur_comment)
849     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
850 
851   // Assign suffix to syntax immediately before.
852   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
853   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
854        i != post.rend(); ++i) {
855     // Don't assign suffix comments to the function, list, or block, but instead
856     // to the last thing inside.
857     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
858       continue;
859 
860     Location start = (*i)->GetRange().begin();
861     Location end = (*i)->GetRange().end();
862 
863     // Don't assign suffix comments to something that starts on an earlier
864     // line, so that in:
865     //
866     // sources = [ "a",
867     //     "b" ] # comment
868     //
869     // it's attached to "b", not sources = [ ... ].
870     if (start.line_number() != end.line_number())
871       continue;
872 
873     while (cur_comment >= 0) {
874       if (end <= suffix_comment_tokens_[cur_comment].location()) {
875         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
876             suffix_comment_tokens_[cur_comment]);
877         --cur_comment;
878       } else {
879         break;
880       }
881     }
882 
883     // Suffix comments were assigned in reverse, so if there were multiple on
884     // the same node, they need to be reversed.
885     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
886       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
887   }
888 }
889 
IndentFor(int value)890 std::string IndentFor(int value) {
891   return std::string(value, ' ');
892 }
893 
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)894 void RenderToText(const base::Value& node,
895                   int indent_level,
896                   std::ostringstream& os) {
897   const base::Value* child = node.FindKey(std::string("child"));
898   std::string node_type(node.FindKey("type")->GetString());
899   if (node_type == "ACCESSOR") {
900     // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
901     // for the base.
902     os << IndentFor(indent_level) << node_type << std::endl;
903     os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
904        << std::endl;
905   } else {
906     os << IndentFor(indent_level) << node_type;
907     if (node.FindKey("value")) {
908       os << "(" << node.FindKey("value")->GetString() << ")";
909     }
910     os << std::endl;
911   }
912   if (node.FindKey(kJsonBeforeComment)) {
913     for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
914       os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
915          << "\")\n";
916     }
917   }
918   if (node.FindKey(kJsonSuffixComment)) {
919     for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
920       os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
921          << "\")\n";
922     }
923   }
924   if (node.FindKey(kJsonAfterComment)) {
925     for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
926       os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
927          << "\")\n";
928     }
929   }
930   if (child) {
931     for (const base::Value& n : child->GetList()) {
932       RenderToText(n, indent_level + 1, os);
933     }
934   }
935   const base::Value* end = node.FindKey("end");
936   if (end &&
937       (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) ||
938        end->FindKey(kJsonAfterComment))) {
939     RenderToText(*end, indent_level + 1, os);
940   }
941 }
942