src/gn/parser.cc

// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "gn/parser.h"

#include <memory>
#include <utility>

#include "base/logging.h"
#include "gn/functions.h"
#include "gn/operators.h"
#include "gn/token.h"

const char kGrammar_Help[] =
    R"*(Language and grammar for GN build files

Tokens

  GN build files are read as sequences of tokens.  While splitting the file
  into tokens, the next token is the longest sequence of characters that form a
  valid token.

White space and comments

  White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
  carriage returns (U+000D), and newlines (U+000A).

  Comments start at the character "#" and stop at the next newline.

  White space and comments are ignored except that they may separate tokens
  that would otherwise combine into a single token.

Identifiers

  Identifiers name variables and functions.

      identifier = letter { letter | digit } .
      letter     = "A" ... "Z" | "a" ... "z" | "_" .
      digit      = "0" ... "9" .

Keywords

  The following keywords are reserved and may not be used as identifiers:

          else    false   if      true

Integer literals

  An integer literal represents a decimal integer value.

      integer = [ "-" ] digit { digit } .

  Leading zeros and negative zero are disallowed.

String literals

  A string literal represents a string value consisting of the quoted
  characters with possible escape sequences and variable expansions.

      string           = `"` { char | escape | expansion } `"` .
      escape           = `\` ( "$" | `"` | char ) .
      BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" .
      Hex              = "0x" [0-9A-Fa-f][0-9A-Fa-f]
      expansion        = "$" ( identifier | BracketExpansion | Hex ) .
      char             = /* any character except "$", `"`, or newline */ .

  After a backslash, certain sequences represent special characters:

          \"    U+0022    quotation mark
          \$    U+0024    dollar sign
          \\    U+005C    backslash

  All other backslashes represent themselves.

  To insert an arbitrary byte value, use $0xFF. For example, to insert a
  newline character: "Line one$0x0ALine two".

  An expansion will evaluate the variable following the '$' and insert a
  stringified version of it into the result. For example, to concat two path
  components with a slash separating them:
    "$var_one/$var_two"
  Use the "${var_one}" format to be explicitly deliniate the variable for
  otherwise-ambiguous cases.

Punctuation

  The following character sequences represent punctuation:

          +       +=      ==      !=      (       )
          -       -=      <       <=      [       ]
          !       =       >       >=      {       }
                          &&      ||      .       ,

Grammar

  The input tokens form a syntax tree following a context-free grammar:

      File = StatementList .

      Statement     = Assignment | Call | Condition .
      LValue        = identifier | ArrayAccess | ScopeAccess .
      Assignment    = LValue AssignOp Expr .
      Call          = identifier "(" [ ExprList ] ")" [ Block ] .
      Condition     = "if" "(" Expr ")" Block
                      [ "else" ( Condition | Block ) ] .
      Block         = "{" StatementList "}" .
      StatementList = { Statement } .

      ArrayAccess = identifier "[" Expr "]" .
      ScopeAccess = identifier "." identifier .
      Expr        = UnaryExpr | Expr BinaryOp Expr .
      UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .
      PrimaryExpr = identifier | integer | string | Call
                  | ArrayAccess | ScopeAccess | Block
                  | "(" Expr ")"
                  | "[" [ ExprList [ "," ] ] "]" .
      ExprList    = Expr { "," Expr } .

      AssignOp = "=" | "+=" | "-=" .
      UnaryOp  = "!" .
      BinaryOp = "+" | "-"                  // highest priority
               | "<" | "<=" | ">" | ">="
               | "==" | "!="
               | "&&"
               | "||" .                     // lowest priority

  All binary operators are left-associative.

Types

  The GN language is dynamically typed. The following types are used:

   - Boolean: Uses the keywords "true" and "false". There is no implicit
     conversion between booleans and integers.

   - Integers: All numbers in GN are signed 64-bit integers.

   - Strings: Strings are 8-bit with no enforced encoding. When a string is
     used to interact with other systems with particular encodings (like the
     Windows and Mac filesystems) it is assumed to be UTF-8. See "String
     literals" above for more.

   - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
     below for more.

   - Scopes: Scopes are like dictionaries that use variable names for keys. See
     "Scopes" below for more.

Lists

  Lists are created with [] and using commas to separate items:

       mylist = [ 0, 1, 2, "some string" ]

  A comma after the last item is optional. Lists are dereferenced using 0-based
  indexing:

       mylist[0] += 1
       var = mylist[2]

  Lists can be concatenated using the '+' and '+=' operators. Bare values can
  not be concatenated with lists, to add a single item, it must be put into a
  list of length one.

  Items can be removed from lists using the '-' and '-=' operators. This will
  remove all occurrences of every item in the right-hand list from the
  left-hand list. It is an error to remove an item not in the list. This is to
  prevent common typos and to detect dead code that is removing things that no
  longer apply.

  It is an error to use '=' to replace a nonempty list with another nonempty
  list. This is to prevent accidentally overwriting data when in most cases
  '+=' was intended. To overwrite a list on purpose, first assign it to the
  empty list:

    mylist = []
    mylist = otherlist

Scopes

  All execution happens in the context of a scope which holds the current state
  (like variables). With the exception of loops and conditions, '{' introduces
  a new scope that has a parent reference to the old scope.

  Variable reads recursively search all nested scopes until the variable is
  found or there are no more scopes. Variable writes always go into the current
  scope. This means that after the closing '}' (again excepting loops and
  conditions), all local variables will be restored to the previous values.
  This also means that "foo = foo" can do useful work by copying a variable
  into the current scope that was defined in a containing scope.

  Scopes can also be assigned to variables. Such scopes can be created by
  functions like exec_script, when invoking a template (the template code
  refers to the variables set by the invoking code by the implicitly-created
  "invoker" scope), or explicitly like:

    empty_scope = {}
    myvalues = {
      foo = 21
      bar = "something"
    }

  Inside such a scope definition can be any GN code including conditionals and
  function calls. After the close of the scope, it will contain all variables
  explicitly set by the code contained inside it. After this, the values can be
  read, modified, or added to:

    myvalues.foo += 2
    empty_scope.new_thing = [ 1, 2, 3 ]

  Scope equality is defined as single-level scopes identical within the current
  scope. That is, all values in the first scope must be present and identical
  within the second, and vice versa. Note that this means inherited scopes are
  always unequal by definition.
)*";

// Precedence constants.
//
// Currently all operators are left-associative so this list is sequential. To
// implement a right-assocative operators in a Pratt parser we would leave gaps
// in between the constants, and right-associative operators get a precedence
// of "<left-associated-precedence> - 1".
enum Precedence {
  PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
  PRECEDENCE_OR = 2,
  PRECEDENCE_AND = 3,
  PRECEDENCE_EQUALITY = 4,
  PRECEDENCE_RELATION = 5,
  PRECEDENCE_SUM = 6,
  PRECEDENCE_PREFIX = 7,
  PRECEDENCE_CALL = 8,
  PRECEDENCE_DOT = 9,  // Highest precedence.
};

// The top-level for blocks/ifs is recursive descent, the expression parser is
// a Pratt parser. The basic idea there is to have the precedences (and
// associativities) encoded relative to each other and only parse up until you
// hit something of that precedence. There's a dispatch table in expressions_
// at the top of parser.cc that describes how each token dispatches if it's
// seen as either a prefix or infix operator, and if it's infix, what its
// precedence is.
//
// References:
// http://javascript.crockford.com/tdop/tdop.html
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/

// Indexed by Token::Type.
ParserHelper Parser::expressions_[] = {
    {nullptr, nullptr, -1},                                   // INVALID
    {&Parser::Literal, nullptr, -1},                          // INTEGER
    {&Parser::Literal, nullptr, -1},                          // STRING
    {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
    {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
    {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
    {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
    {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
    {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
    {&Parser::Not, nullptr, -1},                              // BANG
    {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
    {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
    {nullptr, nullptr, -1},                                   // RIGHT_PAREN
    {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
    {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
    {&Parser::Block, nullptr, -1},                            // LEFT_BRACE
    {nullptr, nullptr, -1},                                   // RIGHT_BRACE
    {nullptr, nullptr, -1},                                   // IF
    {nullptr, nullptr, -1},                                   // ELSE
    {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
    {nullptr, nullptr, -1},                                       // COMMA
    {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
    {nullptr, nullptr, -1},                // LINE_COMMENT
    {nullptr, nullptr, -1},                // SUFFIX_COMMENT
    {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
};

Parser::Parser(const std::vector<Token>& tokens, Err* err)
    : invalid_token_(Location(), Token::INVALID, std::string_view()),
      err_(err),
      cur_(0) {
  for (const auto& token : tokens) {
    switch (token.type()) {
      case Token::LINE_COMMENT:
        line_comment_tokens_.push_back(token);
        break;
      case Token::SUFFIX_COMMENT:
        suffix_comment_tokens_.push_back(token);
        break;
      default:
        // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
        // through the real parser.
        tokens_.push_back(token);
        break;
    }
  }
}

Parser::~Parser() = default;

// static
std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
                                         Err* err) {
  Parser p(tokens, err);
  return p.ParseFile();
}

// static
std::unique_ptr<ParseNode> Parser::ParseExpression(
    const std::vector<Token>& tokens,
    Err* err) {
  Parser p(tokens, err);
  std::unique_ptr<ParseNode> expr = p.ParseExpression();
  if (!p.at_end() && !err->has_error()) {
    *err = Err(p.cur_token(), "Trailing garbage");
    return nullptr;
  }
  return expr;
}

// static
std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
                                              Err* err) {
  for (const Token& token : tokens) {
    switch (token.type()) {
      case Token::INTEGER:
      case Token::STRING:
      case Token::TRUE_TOKEN:
      case Token::FALSE_TOKEN:
      case Token::LEFT_BRACKET:
      case Token::RIGHT_BRACKET:
      case Token::COMMA:
        continue;
      default:
        *err = Err(token, "Invalid token in literal value");
        return nullptr;
    }
  }

  return ParseExpression(tokens, err);
}

bool Parser::IsAssignment(const ParseNode* node) const {
  return node && node->AsBinaryOp() &&
         (node->AsBinaryOp()->op().type() == Token::EQUAL ||
          node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
          node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
}

bool Parser::IsStatementBreak(Token::Type token_type) const {
  switch (token_type) {
    case Token::IDENTIFIER:
    case Token::LEFT_BRACE:
    case Token::RIGHT_BRACE:
    case Token::IF:
    case Token::ELSE:
      return true;
    default:
      return false;
  }
}

bool Parser::LookAhead(Token::Type type) {
  if (at_end())
    return false;
  return cur_token().type() == type;
}

bool Parser::Match(Token::Type type) {
  if (!LookAhead(type))
    return false;
  Consume();
  return true;
}

const Token& Parser::Consume(Token::Type type, const char* error_message) {
  Token::Type types[1] = {type};
  return Consume(types, 1, error_message);
}

const Token& Parser::Consume(Token::Type* types,
                             size_t num_types,
                             const char* error_message) {
  if (has_error()) {
    // Don't overwrite current error, but make progress through tokens so that
    // a loop that's expecting a particular token will still terminate.
    if (!at_end())
      cur_++;
    return invalid_token_;
  }
  if (at_end()) {
    const char kEOFMsg[] = "I hit EOF instead.";
    if (tokens_.empty())
      *err_ = Err(Location(), error_message, kEOFMsg);
    else
      *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
    return invalid_token_;
  }

  for (size_t i = 0; i < num_types; ++i) {
    if (cur_token().type() == types[i])
      return Consume();
  }
  *err_ = Err(cur_token(), error_message);
  return invalid_token_;
}

const Token& Parser::Consume() {
  return tokens_[cur_++];
}

std::unique_ptr<ParseNode> Parser::ParseExpression() {
  return ParseExpression(0);
}

std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
  if (at_end())
    return std::unique_ptr<ParseNode>();

  const Token& token = Consume();
  PrefixFunc prefix = expressions_[token.type()].prefix;

  if (prefix == nullptr) {
    *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
    return std::unique_ptr<ParseNode>();
  }

  std::unique_ptr<ParseNode> left = (this->*prefix)(token);
  if (has_error())
    return left;

  while (!at_end() && !IsStatementBreak(cur_token().type()) &&
         precedence <= expressions_[cur_token().type()].precedence) {
    const Token& next_token = Consume();
    InfixFunc infix = expressions_[next_token.type()].infix;
    if (infix == nullptr) {
      *err_ = Err(next_token,
                  "Unexpected token '" + std::string(next_token.value()) + "'");
      return std::unique_ptr<ParseNode>();
    }
    left = (this->*infix)(std::move(left), next_token);
    if (has_error())
      return std::unique_ptr<ParseNode>();
  }

  return left;
}

std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
  // This entrypoint into ParseBlock means it's part of an expression and we
  // always want the result.
  return ParseBlock(token, BlockNode::RETURNS_SCOPE);
}

std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
  return std::make_unique<LiteralNode>(token);
}

std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
  return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
}

std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
  std::unique_ptr<BlockCommentNode> comment =
      std::make_unique<BlockCommentNode>();
  comment->set_comment(token);
  return std::move(comment);
}

std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
  std::unique_ptr<ParseNode> expr = ParseExpression();
  if (has_error())
    return std::unique_ptr<ParseNode>();
  Consume(Token::RIGHT_PAREN, "Expected ')'");
  return expr;
}

std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
  std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
  if (has_error())
    return std::unique_ptr<ParseNode>();
  if (!expr) {
    if (!has_error())
      *err_ = Err(token, "Expected right-hand side for '!'.");
    return std::unique_ptr<ParseNode>();
  }
  std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
  unary_op->set_op(token);
  unary_op->set_operand(std::move(expr));
  return std::move(unary_op);
}

std::unique_ptr<ParseNode> Parser::List(const Token& node) {
  std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
  if (!has_error() && !at_end())
    Consume(Token::RIGHT_BRACKET, "Expected ']'");
  return list;
}

std::unique_ptr<ParseNode> Parser::BinaryOperator(
    std::unique_ptr<ParseNode> left,
    const Token& token) {
  std::unique_ptr<ParseNode> right =
      ParseExpression(expressions_[token.type()].precedence + 1);
  if (!right) {
    if (!has_error()) {
      *err_ = Err(token, "Expected right-hand side for '" +
                             std::string(token.value()) + "'");
    }
    return std::unique_ptr<ParseNode>();
  }
  std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
  binary_op->set_op(token);
  binary_op->set_left(std::move(left));
  binary_op->set_right(std::move(right));
  return std::move(binary_op);
}

std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
    std::unique_ptr<ParseNode> left,
    const Token& token) {
  std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
  list->set_begin_token(token);
  list->set_end(std::make_unique<EndNode>(token));
  std::unique_ptr<BlockNode> block;
  bool has_arg = false;
  if (LookAhead(Token::LEFT_PAREN)) {
    const Token& start_token = Consume();
    // Parsing a function call.
    has_arg = true;
    if (Match(Token::RIGHT_PAREN)) {
      // Nothing, just an empty call.
    } else {
      list = ParseList(start_token, Token::RIGHT_PAREN, false);
      if (has_error())
        return std::unique_ptr<ParseNode>();
      Consume(Token::RIGHT_PAREN, "Expected ')' after call");
    }
    // Optionally with a scope.
    if (LookAhead(Token::LEFT_BRACE)) {
      block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
      if (has_error())
        return std::unique_ptr<ParseNode>();
    }
  }

  if (!left && !has_arg) {
    // Not a function call, just a standalone identifier.
    return std::make_unique<IdentifierNode>(token);
  }
  std::unique_ptr<FunctionCallNode> func_call =
      std::make_unique<FunctionCallNode>();
  func_call->set_function(token);
  func_call->set_args(std::move(list));
  if (block)
    func_call->set_block(std::move(block));
  return std::move(func_call);
}

std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
                                              const Token& token) {
  if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
    *err_ = Err(left.get(),
                "The left-hand side of an assignment must be an identifier, "
                "scope access, or array access.");
    return std::unique_ptr<ParseNode>();
  }
  std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
  if (!value) {
    if (!has_error())
      *err_ = Err(token, "Expected right-hand side for assignment.");
    return std::unique_ptr<ParseNode>();
  }
  std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
  assign->set_op(token);
  assign->set_left(std::move(left));
  assign->set_right(std::move(value));
  return std::move(assign);
}

std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
                                             const Token& token) {
  // TODO: Maybe support more complex expressions like a[0][0]. This would
  // require work on the evaluator too.
  if (left->AsIdentifier() == nullptr) {
    *err_ = Err(
        left.get(), "May only subscript identifiers.",
        "The thing on the left hand side of the [] must be an identifier\n"
        "and not an expression. If you need this, you'll have to assign the\n"
        "value to a temporary before subscripting. Sorry.");
    return std::unique_ptr<ParseNode>();
  }
  std::unique_ptr<ParseNode> value = ParseExpression();
  Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
  std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
  accessor->set_base(left->AsIdentifier()->value());
  accessor->set_subscript(std::move(value));
  return std::move(accessor);
}

std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
                                               const Token& token) {
  if (left->AsIdentifier() == nullptr) {
    *err_ = Err(
        left.get(), "May only use \".\" for identifiers.",
        "The thing on the left hand side of the dot must be an identifier\n"
        "and not an expression. If you need this, you'll have to assign the\n"
        "value to a temporary first. Sorry.");
    return std::unique_ptr<ParseNode>();
  }

  std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
  if (!right || !right->AsIdentifier()) {
    *err_ = Err(
        token, "Expected identifier for right-hand-side of \".\"",
        "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
    return std::unique_ptr<ParseNode>();
  }

  std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
  accessor->set_base(left->AsIdentifier()->value());
  accessor->set_member(std::unique_ptr<IdentifierNode>(
      static_cast<IdentifierNode*>(right.release())));
  return std::move(accessor);
}

// Does not Consume the start or end token.
std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
                                            Token::Type stop_before,
                                            bool allow_trailing_comma) {
  std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
  list->set_begin_token(start_token);
  bool just_got_comma = false;
  bool first_time = true;
  while (!LookAhead(stop_before)) {
    if (!first_time) {
      if (!just_got_comma) {
        // Require commas separate things in lists.
        *err_ = Err(cur_token(), "Expected comma between items.");
        return std::unique_ptr<ListNode>();
      }
    }
    first_time = false;

    // Why _OR? We're parsing things that are higher precedence than the ,
    // that separates the items of the list. , should appear lower than
    // boolean expressions (the lowest of which is OR), but above assignments.
    list->append_item(ParseExpression(PRECEDENCE_OR));
    if (has_error())
      return std::unique_ptr<ListNode>();
    if (at_end()) {
      *err_ =
          Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
      return std::unique_ptr<ListNode>();
    }
    if (list->contents().back()->AsBlockComment()) {
      // If there was a comment inside the list, we don't need a comma to the
      // next item, so pretend we got one, if we're expecting one.
      just_got_comma = allow_trailing_comma;
    } else {
      just_got_comma = Match(Token::COMMA);
    }
  }
  if (just_got_comma && !allow_trailing_comma) {
    *err_ = Err(cur_token(), "Trailing comma");
    return std::unique_ptr<ListNode>();
  }
  list->set_end(std::make_unique<EndNode>(cur_token()));
  return list;
}

std::unique_ptr<ParseNode> Parser::ParseFile() {
  std::unique_ptr<BlockNode> file =
      std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
  for (;;) {
    if (at_end())
      break;
    std::unique_ptr<ParseNode> statement = ParseStatement();
    if (!statement)
      break;
    file->append_statement(std::move(statement));
  }
  if (!at_end() && !has_error())
    *err_ = Err(cur_token(), "Unexpected here, should be newline.");
  if (has_error())
    return std::unique_ptr<ParseNode>();

  // TODO(scottmg): If this is measurably expensive, it could be done only
  // when necessary (when reformatting, or during tests). Comments are
  // separate from the parse tree at this point, so downstream code can remain
  // ignorant of them.
  AssignComments(file.get());

  return std::move(file);
}

std::unique_ptr<ParseNode> Parser::ParseStatement() {
  if (LookAhead(Token::IF)) {
    return ParseCondition();
  } else if (LookAhead(Token::BLOCK_COMMENT)) {
    return BlockComment(Consume());
  } else {
    // TODO(scottmg): Is this too strict? Just drop all the testing if we want
    // to allow "pointless" expressions and return ParseExpression() directly.
    std::unique_ptr<ParseNode> stmt = ParseExpression();
    if (stmt) {
      if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
        return stmt;
    }
    if (!has_error()) {
      const Token& token = cur_or_last_token();
      *err_ = Err(token, "Expecting assignment or function call.");
    }
    return std::unique_ptr<ParseNode>();
  }
}

std::unique_ptr<BlockNode> Parser::ParseBlock(
    const Token& begin_brace,
    BlockNode::ResultMode result_mode) {
  if (has_error())
    return std::unique_ptr<BlockNode>();
  std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
  block->set_begin_token(begin_brace);

  for (;;) {
    if (LookAhead(Token::RIGHT_BRACE)) {
      block->set_end(std::make_unique<EndNode>(Consume()));
      break;
    }

    std::unique_ptr<ParseNode> statement = ParseStatement();
    if (!statement)
      return std::unique_ptr<BlockNode>();
    block->append_statement(std::move(statement));
  }
  return block;
}

std::unique_ptr<ParseNode> Parser::ParseCondition() {
  std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
  condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
  Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
  condition->set_condition(ParseExpression());
  if (IsAssignment(condition->condition()))
    *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
  Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
  condition->set_if_true(ParseBlock(
      Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
      BlockNode::DISCARDS_RESULT));
  if (Match(Token::ELSE)) {
    if (LookAhead(Token::LEFT_BRACE)) {
      condition->set_if_false(
          ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
    } else if (LookAhead(Token::IF)) {
      condition->set_if_false(ParseStatement());
    } else {
      *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
      return std::unique_ptr<ParseNode>();
    }
  }
  if (has_error())
    return std::unique_ptr<ParseNode>();
  return std::move(condition);
}

void Parser::TraverseOrder(const ParseNode* root,
                           std::vector<const ParseNode*>* pre,
                           std::vector<const ParseNode*>* post) {
  if (root) {
    pre->push_back(root);

    if (const AccessorNode* accessor = root->AsAccessor()) {
      TraverseOrder(accessor->subscript(), pre, post);
      TraverseOrder(accessor->member(), pre, post);
    } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
      TraverseOrder(binop->left(), pre, post);
      TraverseOrder(binop->right(), pre, post);
    } else if (const BlockNode* block = root->AsBlock()) {
      for (const auto& statement : block->statements())
        TraverseOrder(statement.get(), pre, post);
      TraverseOrder(block->End(), pre, post);
    } else if (const ConditionNode* condition = root->AsCondition()) {
      TraverseOrder(condition->condition(), pre, post);
      TraverseOrder(condition->if_true(), pre, post);
      TraverseOrder(condition->if_false(), pre, post);
    } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
      TraverseOrder(func_call->args(), pre, post);
      TraverseOrder(func_call->block(), pre, post);
    } else if (root->AsIdentifier()) {
      // Nothing.
    } else if (const ListNode* list = root->AsList()) {
      for (const auto& node : list->contents())
        TraverseOrder(node.get(), pre, post);
      TraverseOrder(list->End(), pre, post);
    } else if (root->AsLiteral()) {
      // Nothing.
    } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
      TraverseOrder(unaryop->operand(), pre, post);
    } else if (root->AsBlockComment()) {
      // Nothing.
    } else if (root->AsEnd()) {
      // Nothing.
    } else {
      CHECK(false) << "Unhandled case in TraverseOrder.";
    }

    post->push_back(root);
  }
}

void Parser::AssignComments(ParseNode* file) {
  // Start by generating a pre- and post- order traversal of the tree so we
  // can determine what's before and after comments.
  std::vector<const ParseNode*> pre;
  std::vector<const ParseNode*> post;
  TraverseOrder(file, &pre, &post);

  // Assign line comments to syntax immediately following.
  int cur_comment = 0;
  for (auto* node : pre) {
    if (node->GetRange().is_null()) {
      CHECK_EQ(node, file) << "Only expected on top file node";
      continue;
    }
    const Location start = node->GetRange().begin();
    while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
      if (start >= line_comment_tokens_[cur_comment].location()) {
        const_cast<ParseNode*>(node)->comments_mutable()->append_before(
            line_comment_tokens_[cur_comment]);
        ++cur_comment;
      } else {
        break;
      }
    }
  }

  // Remaining line comments go at end of file.
  for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
       ++cur_comment)
    file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);

  // Assign suffix to syntax immediately before.
  cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
  for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
       i != post.rend(); ++i) {
    // Don't assign suffix comments to the function, list, or block, but instead
    // to the last thing inside.
    if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
      continue;

    Location start = (*i)->GetRange().begin();
    Location end = (*i)->GetRange().end();

    // Don't assign suffix comments to something that starts on an earlier
    // line, so that in:
    //
    // sources = [ "a",
    //     "b" ] # comment
    //
    // it's attached to "b", not sources = [ ... ].
    if (start.line_number() != end.line_number())
      continue;

    while (cur_comment >= 0) {
      if (end <= suffix_comment_tokens_[cur_comment].location()) {
        const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
            suffix_comment_tokens_[cur_comment]);
        --cur_comment;
      } else {
        break;
      }
    }

    // Suffix comments were assigned in reverse, so if there were multiple on
    // the same node, they need to be reversed.
    if ((*i)->comments() && !(*i)->comments()->suffix().empty())
      const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
  }
}

std::string IndentFor(int value) {
  return std::string(value, ' ');
}

void RenderToText(const base::Value& node,
                  int indent_level,
                  std::ostringstream& os) {
  const base::Value* child = node.FindKey(std::string("child"));
  std::string node_type(node.FindKey("type")->GetString());
  if (node_type == "ACCESSOR") {
    // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
    // for the base.
    os << IndentFor(indent_level) << node_type << std::endl;
    os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
       << std::endl;
  } else {
    os << IndentFor(indent_level) << node_type;
    if (node.FindKey("value")) {
      os << "(" << node.FindKey("value")->GetString() << ")";
    }
    os << std::endl;
  }
  if (node.FindKey(kJsonBeforeComment)) {
    for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
      os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
         << "\")\n";
    }
  }
  if (node.FindKey(kJsonSuffixComment)) {
    for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
      os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
         << "\")\n";
    }
  }
  if (node.FindKey(kJsonAfterComment)) {
    for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
      os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
         << "\")\n";
    }
  }
  if (child) {
    for (const base::Value& n : child->GetList()) {
      RenderToText(n, indent_level + 1, os);
    }
  }
  const base::Value* end = node.FindKey("end");
  if (end &&
      (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) ||
       end->FindKey(kJsonAfterComment))) {
    RenderToText(*end, indent_level + 1, os);
  }
}