1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "gn/parser.h"
6
7 #include <memory>
8 #include <utility>
9
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14
15 const char kGrammar_Help[] =
16 R"*(Language and grammar for GN build files
17
18 Tokens
19
20 GN build files are read as sequences of tokens. While splitting the file
21 into tokens, the next token is the longest sequence of characters that form a
22 valid token.
23
24 White space and comments
25
26 White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27 carriage returns (U+000D), and newlines (U+000A).
28
29 Comments start at the character "#" and stop at the next newline.
30
31 White space and comments are ignored except that they may separate tokens
32 that would otherwise combine into a single token.
33
34 Identifiers
35
36 Identifiers name variables and functions.
37
38 identifier = letter { letter | digit } .
39 letter = "A" ... "Z" | "a" ... "z" | "_" .
40 digit = "0" ... "9" .
41
42 Keywords
43
44 The following keywords are reserved and may not be used as identifiers:
45
46 else false if true
47
48 Integer literals
49
50 An integer literal represents a decimal integer value.
51
52 integer = [ "-" ] digit { digit } .
53
54 Leading zeros and negative zero are disallowed.
55
56 String literals
57
58 A string literal represents a string value consisting of the quoted
59 characters with possible escape sequences and variable expansions.
60
61 string = `"` { char | escape | expansion } `"` .
62 escape = `\` ( "$" | `"` | char ) .
63 BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" .
64 Hex = "0x" [0-9A-Fa-f][0-9A-Fa-f]
65 expansion = "$" ( identifier | BracketExpansion | Hex ) .
66 char = /* any character except "$", `"`, or newline */ .
67
68 After a backslash, certain sequences represent special characters:
69
70 \" U+0022 quotation mark
71 \$ U+0024 dollar sign
72 \\ U+005C backslash
73
74 All other backslashes represent themselves.
75
76 To insert an arbitrary byte value, use $0xFF. For example, to insert a
77 newline character: "Line one$0x0ALine two".
78
79 An expansion will evaluate the variable following the '$' and insert a
80 stringified version of it into the result. For example, to concat two path
81 components with a slash separating them:
82 "$var_one/$var_two"
83 Use the "${var_one}" format to be explicitly deliniate the variable for
84 otherwise-ambiguous cases.
85
86 Punctuation
87
88 The following character sequences represent punctuation:
89
90 + += == != ( )
91 - -= < <= [ ]
92 ! = > >= { }
93 && || . ,
94
95 Grammar
96
97 The input tokens form a syntax tree following a context-free grammar:
98
99 File = StatementList .
100
101 Statement = Assignment | Call | Condition .
102 LValue = identifier | ArrayAccess | ScopeAccess .
103 Assignment = LValue AssignOp Expr .
104 Call = identifier "(" [ ExprList ] ")" [ Block ] .
105 Condition = "if" "(" Expr ")" Block
106 [ "else" ( Condition | Block ) ] .
107 Block = "{" StatementList "}" .
108 StatementList = { Statement } .
109
110 ArrayAccess = identifier "[" Expr "]" .
111 ScopeAccess = identifier "." identifier .
112 Expr = UnaryExpr | Expr BinaryOp Expr .
113 UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .
114 PrimaryExpr = identifier | integer | string | Call
115 | ArrayAccess | ScopeAccess | Block
116 | "(" Expr ")"
117 | "[" [ ExprList [ "," ] ] "]" .
118 ExprList = Expr { "," Expr } .
119
120 AssignOp = "=" | "+=" | "-=" .
121 UnaryOp = "!" .
122 BinaryOp = "+" | "-" // highest priority
123 | "<" | "<=" | ">" | ">="
124 | "==" | "!="
125 | "&&"
126 | "||" . // lowest priority
127
128 All binary operators are left-associative.
129
130 Types
131
132 The GN language is dynamically typed. The following types are used:
133
134 - Boolean: Uses the keywords "true" and "false". There is no implicit
135 conversion between booleans and integers.
136
137 - Integers: All numbers in GN are signed 64-bit integers.
138
139 - Strings: Strings are 8-bit with no enforced encoding. When a string is
140 used to interact with other systems with particular encodings (like the
141 Windows and Mac filesystems) it is assumed to be UTF-8. See "String
142 literals" above for more.
143
144 - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
145 below for more.
146
147 - Scopes: Scopes are like dictionaries that use variable names for keys. See
148 "Scopes" below for more.
149
150 Lists
151
152 Lists are created with [] and using commas to separate items:
153
154 mylist = [ 0, 1, 2, "some string" ]
155
156 A comma after the last item is optional. Lists are dereferenced using 0-based
157 indexing:
158
159 mylist[0] += 1
160 var = mylist[2]
161
162 Lists can be concatenated using the '+' and '+=' operators. Bare values can
163 not be concatenated with lists, to add a single item, it must be put into a
164 list of length one.
165
166 Items can be removed from lists using the '-' and '-=' operators. This will
167 remove all occurrences of every item in the right-hand list from the
168 left-hand list. It is an error to remove an item not in the list. This is to
169 prevent common typos and to detect dead code that is removing things that no
170 longer apply.
171
172 It is an error to use '=' to replace a nonempty list with another nonempty
173 list. This is to prevent accidentally overwriting data when in most cases
174 '+=' was intended. To overwrite a list on purpose, first assign it to the
175 empty list:
176
177 mylist = []
178 mylist = otherlist
179
180 Scopes
181
182 All execution happens in the context of a scope which holds the current state
183 (like variables). With the exception of loops and conditions, '{' introduces
184 a new scope that has a parent reference to the old scope.
185
186 Variable reads recursively search all nested scopes until the variable is
187 found or there are no more scopes. Variable writes always go into the current
188 scope. This means that after the closing '}' (again excepting loops and
189 conditions), all local variables will be restored to the previous values.
190 This also means that "foo = foo" can do useful work by copying a variable
191 into the current scope that was defined in a containing scope.
192
193 Scopes can also be assigned to variables. Such scopes can be created by
194 functions like exec_script, when invoking a template (the template code
195 refers to the variables set by the invoking code by the implicitly-created
196 "invoker" scope), or explicitly like:
197
198 empty_scope = {}
199 myvalues = {
200 foo = 21
201 bar = "something"
202 }
203
204 Inside such a scope definition can be any GN code including conditionals and
205 function calls. After the close of the scope, it will contain all variables
206 explicitly set by the code contained inside it. After this, the values can be
207 read, modified, or added to:
208
209 myvalues.foo += 2
210 empty_scope.new_thing = [ 1, 2, 3 ]
211
212 Scope equality is defined as single-level scopes identical within the current
213 scope. That is, all values in the first scope must be present and identical
214 within the second, and vice versa. Note that this means inherited scopes are
215 always unequal by definition.
216 )*";
217
218 // Precedence constants.
219 //
220 // Currently all operators are left-associative so this list is sequential. To
221 // implement a right-assocative operators in a Pratt parser we would leave gaps
222 // in between the constants, and right-associative operators get a precedence
223 // of "<left-associated-precedence> - 1".
224 enum Precedence {
225 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
226 PRECEDENCE_OR = 2,
227 PRECEDENCE_AND = 3,
228 PRECEDENCE_EQUALITY = 4,
229 PRECEDENCE_RELATION = 5,
230 PRECEDENCE_SUM = 6,
231 PRECEDENCE_PREFIX = 7,
232 PRECEDENCE_CALL = 8,
233 PRECEDENCE_DOT = 9, // Highest precedence.
234 };
235
236 // The top-level for blocks/ifs is recursive descent, the expression parser is
237 // a Pratt parser. The basic idea there is to have the precedences (and
238 // associativities) encoded relative to each other and only parse up until you
239 // hit something of that precedence. There's a dispatch table in expressions_
240 // at the top of parser.cc that describes how each token dispatches if it's
241 // seen as either a prefix or infix operator, and if it's infix, what its
242 // precedence is.
243 //
244 // References:
245 // http://javascript.crockford.com/tdop/tdop.html
246 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
247
248 // Indexed by Token::Type.
249 ParserHelper Parser::expressions_[] = {
250 {nullptr, nullptr, -1}, // INVALID
251 {&Parser::Literal, nullptr, -1}, // INTEGER
252 {&Parser::Literal, nullptr, -1}, // STRING
253 {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN
254 {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN
255 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
256 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
257 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
258 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
259 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
260 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
261 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
262 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
263 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
264 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
265 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
266 {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
267 {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
268 {&Parser::Not, nullptr, -1}, // BANG
269 {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
270 {&Parser::Group, nullptr, -1}, // LEFT_PAREN
271 {nullptr, nullptr, -1}, // RIGHT_PAREN
272 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
273 {nullptr, nullptr, -1}, // RIGHT_BRACKET
274 {&Parser::Block, nullptr, -1}, // LEFT_BRACE
275 {nullptr, nullptr, -1}, // RIGHT_BRACE
276 {nullptr, nullptr, -1}, // IF
277 {nullptr, nullptr, -1}, // ELSE
278 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
279 {nullptr, nullptr, -1}, // COMMA
280 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
281 {nullptr, nullptr, -1}, // LINE_COMMENT
282 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
283 {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT
284 };
285
Parser(const std::vector<Token> & tokens,Err * err)286 Parser::Parser(const std::vector<Token>& tokens, Err* err)
287 : invalid_token_(Location(), Token::INVALID, std::string_view()),
288 err_(err),
289 cur_(0) {
290 for (const auto& token : tokens) {
291 switch (token.type()) {
292 case Token::LINE_COMMENT:
293 line_comment_tokens_.push_back(token);
294 break;
295 case Token::SUFFIX_COMMENT:
296 suffix_comment_tokens_.push_back(token);
297 break;
298 default:
299 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
300 // through the real parser.
301 tokens_.push_back(token);
302 break;
303 }
304 }
305 }
306
307 Parser::~Parser() = default;
308
309 // static
Parse(const std::vector<Token> & tokens,Err * err)310 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
311 Err* err) {
312 Parser p(tokens, err);
313 return p.ParseFile();
314 }
315
316 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)317 std::unique_ptr<ParseNode> Parser::ParseExpression(
318 const std::vector<Token>& tokens,
319 Err* err) {
320 Parser p(tokens, err);
321 std::unique_ptr<ParseNode> expr = p.ParseExpression();
322 if (!p.at_end() && !err->has_error()) {
323 *err = Err(p.cur_token(), "Trailing garbage");
324 return nullptr;
325 }
326 return expr;
327 }
328
329 // static
ParseValue(const std::vector<Token> & tokens,Err * err)330 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
331 Err* err) {
332 for (const Token& token : tokens) {
333 switch (token.type()) {
334 case Token::INTEGER:
335 case Token::STRING:
336 case Token::TRUE_TOKEN:
337 case Token::FALSE_TOKEN:
338 case Token::LEFT_BRACKET:
339 case Token::RIGHT_BRACKET:
340 case Token::COMMA:
341 continue;
342 default:
343 *err = Err(token, "Invalid token in literal value");
344 return nullptr;
345 }
346 }
347
348 return ParseExpression(tokens, err);
349 }
350
IsAssignment(const ParseNode * node) const351 bool Parser::IsAssignment(const ParseNode* node) const {
352 return node && node->AsBinaryOp() &&
353 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
354 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
355 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
356 }
357
IsStatementBreak(Token::Type token_type) const358 bool Parser::IsStatementBreak(Token::Type token_type) const {
359 switch (token_type) {
360 case Token::IDENTIFIER:
361 case Token::LEFT_BRACE:
362 case Token::RIGHT_BRACE:
363 case Token::IF:
364 case Token::ELSE:
365 return true;
366 default:
367 return false;
368 }
369 }
370
LookAhead(Token::Type type)371 bool Parser::LookAhead(Token::Type type) {
372 if (at_end())
373 return false;
374 return cur_token().type() == type;
375 }
376
Match(Token::Type type)377 bool Parser::Match(Token::Type type) {
378 if (!LookAhead(type))
379 return false;
380 Consume();
381 return true;
382 }
383
Consume(Token::Type type,const char * error_message)384 const Token& Parser::Consume(Token::Type type, const char* error_message) {
385 Token::Type types[1] = {type};
386 return Consume(types, 1, error_message);
387 }
388
Consume(Token::Type * types,size_t num_types,const char * error_message)389 const Token& Parser::Consume(Token::Type* types,
390 size_t num_types,
391 const char* error_message) {
392 if (has_error()) {
393 // Don't overwrite current error, but make progress through tokens so that
394 // a loop that's expecting a particular token will still terminate.
395 if (!at_end())
396 cur_++;
397 return invalid_token_;
398 }
399 if (at_end()) {
400 const char kEOFMsg[] = "I hit EOF instead.";
401 if (tokens_.empty())
402 *err_ = Err(Location(), error_message, kEOFMsg);
403 else
404 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
405 return invalid_token_;
406 }
407
408 for (size_t i = 0; i < num_types; ++i) {
409 if (cur_token().type() == types[i])
410 return Consume();
411 }
412 *err_ = Err(cur_token(), error_message);
413 return invalid_token_;
414 }
415
Consume()416 const Token& Parser::Consume() {
417 return tokens_[cur_++];
418 }
419
ParseExpression()420 std::unique_ptr<ParseNode> Parser::ParseExpression() {
421 return ParseExpression(0);
422 }
423
ParseExpression(int precedence)424 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
425 if (at_end())
426 return std::unique_ptr<ParseNode>();
427
428 const Token& token = Consume();
429 PrefixFunc prefix = expressions_[token.type()].prefix;
430
431 if (prefix == nullptr) {
432 *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
433 return std::unique_ptr<ParseNode>();
434 }
435
436 std::unique_ptr<ParseNode> left = (this->*prefix)(token);
437 if (has_error())
438 return left;
439
440 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
441 precedence <= expressions_[cur_token().type()].precedence) {
442 const Token& next_token = Consume();
443 InfixFunc infix = expressions_[next_token.type()].infix;
444 if (infix == nullptr) {
445 *err_ = Err(next_token,
446 "Unexpected token '" + std::string(next_token.value()) + "'");
447 return std::unique_ptr<ParseNode>();
448 }
449 left = (this->*infix)(std::move(left), next_token);
450 if (has_error())
451 return std::unique_ptr<ParseNode>();
452 }
453
454 return left;
455 }
456
Block(const Token & token)457 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
458 // This entrypoint into ParseBlock means it's part of an expression and we
459 // always want the result.
460 return ParseBlock(token, BlockNode::RETURNS_SCOPE);
461 }
462
Literal(const Token & token)463 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
464 return std::make_unique<LiteralNode>(token);
465 }
466
Name(const Token & token)467 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
468 return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
469 }
470
BlockComment(const Token & token)471 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
472 std::unique_ptr<BlockCommentNode> comment =
473 std::make_unique<BlockCommentNode>();
474 comment->set_comment(token);
475 return std::move(comment);
476 }
477
Group(const Token & token)478 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
479 std::unique_ptr<ParseNode> expr = ParseExpression();
480 if (has_error())
481 return std::unique_ptr<ParseNode>();
482 Consume(Token::RIGHT_PAREN, "Expected ')'");
483 return expr;
484 }
485
Not(const Token & token)486 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
487 std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
488 if (has_error())
489 return std::unique_ptr<ParseNode>();
490 if (!expr) {
491 if (!has_error())
492 *err_ = Err(token, "Expected right-hand side for '!'.");
493 return std::unique_ptr<ParseNode>();
494 }
495 std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
496 unary_op->set_op(token);
497 unary_op->set_operand(std::move(expr));
498 return std::move(unary_op);
499 }
500
List(const Token & node)501 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
502 std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
503 if (!has_error() && !at_end())
504 Consume(Token::RIGHT_BRACKET, "Expected ']'");
505 return list;
506 }
507
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)508 std::unique_ptr<ParseNode> Parser::BinaryOperator(
509 std::unique_ptr<ParseNode> left,
510 const Token& token) {
511 std::unique_ptr<ParseNode> right =
512 ParseExpression(expressions_[token.type()].precedence + 1);
513 if (!right) {
514 if (!has_error()) {
515 *err_ = Err(token, "Expected right-hand side for '" +
516 std::string(token.value()) + "'");
517 }
518 return std::unique_ptr<ParseNode>();
519 }
520 std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
521 binary_op->set_op(token);
522 binary_op->set_left(std::move(left));
523 binary_op->set_right(std::move(right));
524 return std::move(binary_op);
525 }
526
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)527 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
528 std::unique_ptr<ParseNode> left,
529 const Token& token) {
530 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
531 list->set_begin_token(token);
532 list->set_end(std::make_unique<EndNode>(token));
533 std::unique_ptr<BlockNode> block;
534 bool has_arg = false;
535 if (LookAhead(Token::LEFT_PAREN)) {
536 const Token& start_token = Consume();
537 // Parsing a function call.
538 has_arg = true;
539 if (Match(Token::RIGHT_PAREN)) {
540 // Nothing, just an empty call.
541 } else {
542 list = ParseList(start_token, Token::RIGHT_PAREN, false);
543 if (has_error())
544 return std::unique_ptr<ParseNode>();
545 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
546 }
547 // Optionally with a scope.
548 if (LookAhead(Token::LEFT_BRACE)) {
549 block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
550 if (has_error())
551 return std::unique_ptr<ParseNode>();
552 }
553 }
554
555 if (!left && !has_arg) {
556 // Not a function call, just a standalone identifier.
557 return std::make_unique<IdentifierNode>(token);
558 }
559 std::unique_ptr<FunctionCallNode> func_call =
560 std::make_unique<FunctionCallNode>();
561 func_call->set_function(token);
562 func_call->set_args(std::move(list));
563 if (block)
564 func_call->set_block(std::move(block));
565 return std::move(func_call);
566 }
567
Assignment(std::unique_ptr<ParseNode> left,const Token & token)568 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
569 const Token& token) {
570 if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
571 *err_ = Err(left.get(),
572 "The left-hand side of an assignment must be an identifier, "
573 "scope access, or array access.");
574 return std::unique_ptr<ParseNode>();
575 }
576 std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
577 if (!value) {
578 if (!has_error())
579 *err_ = Err(token, "Expected right-hand side for assignment.");
580 return std::unique_ptr<ParseNode>();
581 }
582 std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
583 assign->set_op(token);
584 assign->set_left(std::move(left));
585 assign->set_right(std::move(value));
586 return std::move(assign);
587 }
588
Subscript(std::unique_ptr<ParseNode> left,const Token & token)589 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
590 const Token& token) {
591 // TODO: Maybe support more complex expressions like a[0][0]. This would
592 // require work on the evaluator too.
593 if (left->AsIdentifier() == nullptr) {
594 *err_ = Err(
595 left.get(), "May only subscript identifiers.",
596 "The thing on the left hand side of the [] must be an identifier\n"
597 "and not an expression. If you need this, you'll have to assign the\n"
598 "value to a temporary before subscripting. Sorry.");
599 return std::unique_ptr<ParseNode>();
600 }
601 std::unique_ptr<ParseNode> value = ParseExpression();
602 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
603 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
604 accessor->set_base(left->AsIdentifier()->value());
605 accessor->set_subscript(std::move(value));
606 return std::move(accessor);
607 }
608
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)609 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
610 const Token& token) {
611 if (left->AsIdentifier() == nullptr) {
612 *err_ = Err(
613 left.get(), "May only use \".\" for identifiers.",
614 "The thing on the left hand side of the dot must be an identifier\n"
615 "and not an expression. If you need this, you'll have to assign the\n"
616 "value to a temporary first. Sorry.");
617 return std::unique_ptr<ParseNode>();
618 }
619
620 std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
621 if (!right || !right->AsIdentifier()) {
622 *err_ = Err(
623 token, "Expected identifier for right-hand-side of \".\"",
624 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
625 return std::unique_ptr<ParseNode>();
626 }
627
628 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
629 accessor->set_base(left->AsIdentifier()->value());
630 accessor->set_member(std::unique_ptr<IdentifierNode>(
631 static_cast<IdentifierNode*>(right.release())));
632 return std::move(accessor);
633 }
634
635 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)636 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
637 Token::Type stop_before,
638 bool allow_trailing_comma) {
639 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
640 list->set_begin_token(start_token);
641 bool just_got_comma = false;
642 bool first_time = true;
643 while (!LookAhead(stop_before)) {
644 if (!first_time) {
645 if (!just_got_comma) {
646 // Require commas separate things in lists.
647 *err_ = Err(cur_token(), "Expected comma between items.");
648 return std::unique_ptr<ListNode>();
649 }
650 }
651 first_time = false;
652
653 // Why _OR? We're parsing things that are higher precedence than the ,
654 // that separates the items of the list. , should appear lower than
655 // boolean expressions (the lowest of which is OR), but above assignments.
656 list->append_item(ParseExpression(PRECEDENCE_OR));
657 if (has_error())
658 return std::unique_ptr<ListNode>();
659 if (at_end()) {
660 *err_ =
661 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
662 return std::unique_ptr<ListNode>();
663 }
664 if (list->contents().back()->AsBlockComment()) {
665 // If there was a comment inside the list, we don't need a comma to the
666 // next item, so pretend we got one, if we're expecting one.
667 just_got_comma = allow_trailing_comma;
668 } else {
669 just_got_comma = Match(Token::COMMA);
670 }
671 }
672 if (just_got_comma && !allow_trailing_comma) {
673 *err_ = Err(cur_token(), "Trailing comma");
674 return std::unique_ptr<ListNode>();
675 }
676 list->set_end(std::make_unique<EndNode>(cur_token()));
677 return list;
678 }
679
ParseFile()680 std::unique_ptr<ParseNode> Parser::ParseFile() {
681 std::unique_ptr<BlockNode> file =
682 std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
683 for (;;) {
684 if (at_end())
685 break;
686 std::unique_ptr<ParseNode> statement = ParseStatement();
687 if (!statement)
688 break;
689 file->append_statement(std::move(statement));
690 }
691 if (!at_end() && !has_error())
692 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
693 if (has_error())
694 return std::unique_ptr<ParseNode>();
695
696 // TODO(scottmg): If this is measurably expensive, it could be done only
697 // when necessary (when reformatting, or during tests). Comments are
698 // separate from the parse tree at this point, so downstream code can remain
699 // ignorant of them.
700 AssignComments(file.get());
701
702 return std::move(file);
703 }
704
ParseStatement()705 std::unique_ptr<ParseNode> Parser::ParseStatement() {
706 if (LookAhead(Token::IF)) {
707 return ParseCondition();
708 } else if (LookAhead(Token::BLOCK_COMMENT)) {
709 return BlockComment(Consume());
710 } else {
711 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
712 // to allow "pointless" expressions and return ParseExpression() directly.
713 std::unique_ptr<ParseNode> stmt = ParseExpression();
714 if (stmt) {
715 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
716 return stmt;
717 }
718 if (!has_error()) {
719 const Token& token = cur_or_last_token();
720 *err_ = Err(token, "Expecting assignment or function call.");
721 }
722 return std::unique_ptr<ParseNode>();
723 }
724 }
725
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)726 std::unique_ptr<BlockNode> Parser::ParseBlock(
727 const Token& begin_brace,
728 BlockNode::ResultMode result_mode) {
729 if (has_error())
730 return std::unique_ptr<BlockNode>();
731 std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
732 block->set_begin_token(begin_brace);
733
734 for (;;) {
735 if (LookAhead(Token::RIGHT_BRACE)) {
736 block->set_end(std::make_unique<EndNode>(Consume()));
737 break;
738 }
739
740 std::unique_ptr<ParseNode> statement = ParseStatement();
741 if (!statement)
742 return std::unique_ptr<BlockNode>();
743 block->append_statement(std::move(statement));
744 }
745 return block;
746 }
747
ParseCondition()748 std::unique_ptr<ParseNode> Parser::ParseCondition() {
749 std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
750 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
751 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
752 condition->set_condition(ParseExpression());
753 if (IsAssignment(condition->condition()))
754 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
755 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
756 condition->set_if_true(ParseBlock(
757 Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
758 BlockNode::DISCARDS_RESULT));
759 if (Match(Token::ELSE)) {
760 if (LookAhead(Token::LEFT_BRACE)) {
761 condition->set_if_false(
762 ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
763 } else if (LookAhead(Token::IF)) {
764 condition->set_if_false(ParseStatement());
765 } else {
766 *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
767 return std::unique_ptr<ParseNode>();
768 }
769 }
770 if (has_error())
771 return std::unique_ptr<ParseNode>();
772 return std::move(condition);
773 }
774
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)775 void Parser::TraverseOrder(const ParseNode* root,
776 std::vector<const ParseNode*>* pre,
777 std::vector<const ParseNode*>* post) {
778 if (root) {
779 pre->push_back(root);
780
781 if (const AccessorNode* accessor = root->AsAccessor()) {
782 TraverseOrder(accessor->subscript(), pre, post);
783 TraverseOrder(accessor->member(), pre, post);
784 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
785 TraverseOrder(binop->left(), pre, post);
786 TraverseOrder(binop->right(), pre, post);
787 } else if (const BlockNode* block = root->AsBlock()) {
788 for (const auto& statement : block->statements())
789 TraverseOrder(statement.get(), pre, post);
790 TraverseOrder(block->End(), pre, post);
791 } else if (const ConditionNode* condition = root->AsCondition()) {
792 TraverseOrder(condition->condition(), pre, post);
793 TraverseOrder(condition->if_true(), pre, post);
794 TraverseOrder(condition->if_false(), pre, post);
795 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
796 TraverseOrder(func_call->args(), pre, post);
797 TraverseOrder(func_call->block(), pre, post);
798 } else if (root->AsIdentifier()) {
799 // Nothing.
800 } else if (const ListNode* list = root->AsList()) {
801 for (const auto& node : list->contents())
802 TraverseOrder(node.get(), pre, post);
803 TraverseOrder(list->End(), pre, post);
804 } else if (root->AsLiteral()) {
805 // Nothing.
806 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
807 TraverseOrder(unaryop->operand(), pre, post);
808 } else if (root->AsBlockComment()) {
809 // Nothing.
810 } else if (root->AsEnd()) {
811 // Nothing.
812 } else {
813 CHECK(false) << "Unhandled case in TraverseOrder.";
814 }
815
816 post->push_back(root);
817 }
818 }
819
AssignComments(ParseNode * file)820 void Parser::AssignComments(ParseNode* file) {
821 // Start by generating a pre- and post- order traversal of the tree so we
822 // can determine what's before and after comments.
823 std::vector<const ParseNode*> pre;
824 std::vector<const ParseNode*> post;
825 TraverseOrder(file, &pre, &post);
826
827 // Assign line comments to syntax immediately following.
828 int cur_comment = 0;
829 for (auto* node : pre) {
830 if (node->GetRange().is_null()) {
831 CHECK_EQ(node, file) << "Only expected on top file node";
832 continue;
833 }
834 const Location start = node->GetRange().begin();
835 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
836 if (start >= line_comment_tokens_[cur_comment].location()) {
837 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
838 line_comment_tokens_[cur_comment]);
839 ++cur_comment;
840 } else {
841 break;
842 }
843 }
844 }
845
846 // Remaining line comments go at end of file.
847 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
848 ++cur_comment)
849 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
850
851 // Assign suffix to syntax immediately before.
852 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
853 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
854 i != post.rend(); ++i) {
855 // Don't assign suffix comments to the function, list, or block, but instead
856 // to the last thing inside.
857 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
858 continue;
859
860 Location start = (*i)->GetRange().begin();
861 Location end = (*i)->GetRange().end();
862
863 // Don't assign suffix comments to something that starts on an earlier
864 // line, so that in:
865 //
866 // sources = [ "a",
867 // "b" ] # comment
868 //
869 // it's attached to "b", not sources = [ ... ].
870 if (start.line_number() != end.line_number())
871 continue;
872
873 while (cur_comment >= 0) {
874 if (end <= suffix_comment_tokens_[cur_comment].location()) {
875 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
876 suffix_comment_tokens_[cur_comment]);
877 --cur_comment;
878 } else {
879 break;
880 }
881 }
882
883 // Suffix comments were assigned in reverse, so if there were multiple on
884 // the same node, they need to be reversed.
885 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
886 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
887 }
888 }
889
IndentFor(int value)890 std::string IndentFor(int value) {
891 return std::string(value, ' ');
892 }
893
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)894 void RenderToText(const base::Value& node,
895 int indent_level,
896 std::ostringstream& os) {
897 const base::Value* child = node.FindKey(std::string("child"));
898 std::string node_type(node.FindKey("type")->GetString());
899 if (node_type == "ACCESSOR") {
900 // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
901 // for the base.
902 os << IndentFor(indent_level) << node_type << std::endl;
903 os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
904 << std::endl;
905 } else {
906 os << IndentFor(indent_level) << node_type;
907 if (node.FindKey("value")) {
908 os << "(" << node.FindKey("value")->GetString() << ")";
909 }
910 os << std::endl;
911 }
912 if (node.FindKey(kJsonBeforeComment)) {
913 for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
914 os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
915 << "\")\n";
916 }
917 }
918 if (node.FindKey(kJsonSuffixComment)) {
919 for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
920 os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
921 << "\")\n";
922 }
923 }
924 if (node.FindKey(kJsonAfterComment)) {
925 for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
926 os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
927 << "\")\n";
928 }
929 }
930 if (child) {
931 for (const base::Value& n : child->GetList()) {
932 RenderToText(n, indent_level + 1, os);
933 }
934 }
935 const base::Value* end = node.FindKey("end");
936 if (end &&
937 (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) ||
938 end->FindKey(kJsonAfterComment))) {
939 RenderToText(*end, indent_level + 1, os);
940 }
941 }
942