1/**
2 * Copyright (c) 2015-present, Facebook, Inc.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8%{
9#include <algorithm>
10#include <cassert>
11#include <cctype>
12#include <climits>
13#include <cstdio>
14#include <string>
15#include <vector>
16#include "location.hh"
17#include "position.hh"
18#include "parser.tab.hpp"
19#include "syntaxdefs.h"
20
21// Keep track of token lengths.
22#define YY_USER_ACTION yyextra->loc.columns(yyleng);
23
24static void escape(char c, char *buf);
25
26static std::string clean_up_block_string(const std::string &str);
27
28%}
29
30%option bison-bridge bison-locations
31%option noyywrap batch noinput nounput
32%option reentrant
33%option extra-type="struct LexerExtra *"
34
35%x STRING_STATE
36%x BLOCK_STRING_STATE
37%x C_COMMENT_STATE
38%x LINE_COMMENT_STATE
39
40FLOAT -?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?
41INTEGER -?(0|[1-9][0-9]*)
42IDENTIFIER [_A-Za-z][_0-9A-Za-z]*
43VARIABLE $[_0-9A-Za-z]+
44BOM \xef\xbb\xbf
45CRLF \r\n
46BADCHAR [\x00-\x08\x0b\x0c\x0e-\x1f]
47GOODCHAR [^\x00-\x08\x0b\x0c\x0e-\x1f]
48STRINGCHAR [^\x00-\x1f\\\x22]
49
50blank [ \t,]
51newline [\n\r]
52notnewline [^\n\r]
53
54%%
55
56%{
57  yyextra->loc.step();
58%}
59
60<STRING_STATE>{
61  \"    {
62    BEGIN(INITIAL);
63    yylval->str = yyextra->str.c_str();
64    *yylloc = yyextra->loc;
65    return yy::GraphQLParserImpl::token::TOK_STRING;
66  }
67
68  {newline} {
69    throw make_error(yyextra->loc, "Unterminated string");
70  }
71
72  <<EOF>> {
73    throw make_error(yyextra->loc, "Unterminated string at EOF");
74  }
75
76  {STRINGCHAR}+  {
77    char *p = yytext;
78    while (*p) {
79      yyextra->str.push_back(*p++);
80    }
81  }
82
83  \\\" { yyextra->str.push_back('"'); }
84  \\\\ { yyextra->str.push_back('\\'); }
85  \\\/ { yyextra->str.push_back('/'); }
86  \\n { yyextra->str.push_back('\n'); }
87  \\t { yyextra->str.push_back('\t'); }
88  \\r { yyextra->str.push_back('\r'); }
89  \\b { yyextra->str.push_back('\b'); }
90  \\f { yyextra->str.push_back('\f'); }
91
92  \\u[0-9A-Fa-f]{4} {
93    int ch;
94    sscanf(yytext + 2, "%x", &ch);
95    yyextra->str.push_back(ch);
96  }
97
98  \\u { throw make_error(yyextra->loc, "bad Unicode escape sequence"); }
99  \\. { throw make_error(yyextra->loc, std::string("bad escape sequence \\") + yytext[1]); }
100
101}
102
103<BLOCK_STRING_STATE>{
104  <<EOF>> {
105     throw make_error(yyextra->loc, "Unterminated block string at EOF");
106  }
107
108  {BADCHAR} {
109     throw make_error(yyextra->loc, std::string("Invalid character ") + yytext[0]);
110  }
111
112  {GOODCHAR} {
113    /* Can't use {GOODCHAR}+ because that would be a better match for
114       """ than the explicit rule! */
115    yyextra->str.push_back(*yytext);
116  }
117
118  \\\"\"\" {
119    yyextra->str.append(3, '"');
120  }
121
122  \"\"\" {
123    BEGIN(INITIAL);
124    yyextra->str = clean_up_block_string(yyextra->str);
125    yylval->str = yyextra->str.c_str();
126    *yylloc = yyextra->loc;
127    return yy::GraphQLParserImpl::token::TOK_STRING;
128  }
129}
130
131<LINE_COMMENT_STATE>{
132  {CRLF} { yyextra->loc.lines(yyleng / 2); yyextra->loc.step(); BEGIN(INITIAL); }
133  {newline} { yyextra->loc.lines(yyleng); yyextra->loc.step(); BEGIN(INITIAL); }
134  {notnewline}+ /* eat comment character */
135}
136
137<INITIAL>{
138  {blank}+ { yyextra->loc.step(); }
139  {BOM}+ { yyextra->loc.step(); yyextra->loc.step(); yyextra->loc.step(); }
140  {CRLF}+ { yyextra->loc.lines(yyleng / 2); yyextra->loc.step(); }
141  {newline}+ { yyextra->loc.lines(yyleng); yyextra->loc.step(); }
142
143  # {yyextra->loc.step(); BEGIN(LINE_COMMENT_STATE); }
144
145  directive   { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_DIRECTIVE; }
146  enum   { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ENUM; }
147  extend   { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EXTEND; }
148  false   { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FALSE; }
149  fragment { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FRAGMENT; }
150  implements { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_IMPLEMENTS; }
151  input { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INPUT; }
152  interface { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INTERFACE; }
153  mutation { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_MUTATION; }
154  null { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_NULL; }
155  on { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ON; }
156  query { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_QUERY; }
157  scalar { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SCALAR; }
158  schema { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SCHEMA; }
159  subscription { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SUBSCRIPTION; }
160  true { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_TRUE; }
161  type { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_TYPE; }
162  union { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_UNION; }
163
164  {INTEGER} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INTEGER; }
165  {FLOAT} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FLOAT; }
166  {IDENTIFIER} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_IDENTIFIER; }
167  {VARIABLE} { yylval->str = yytext + 1; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_VARIABLE; }
168
169  "!" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_BANG; }
170  "(" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LPAREN; }
171  ")" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RPAREN; }
172  "..." { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ELLIPSIS; }
173  ":" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_COLON; }
174  "=" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EQUAL; }
175  "@" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_AT; }
176  "[" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LBRACKET; }
177  "]" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RBRACKET; }
178  "{" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LBRACE; }
179  "|" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_PIPE; }
180  "}" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RBRACE; }
181
182
183  <<EOF>> { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EOF; }
184
185  \"\"\" {
186    BEGIN(BLOCK_STRING_STATE);
187    yyextra->str.clear();
188  }
189
190  \"   {
191    BEGIN(STRING_STATE);
192    yyextra->str.clear();
193  }
194}
195
196<INITIAL,STRING_STATE,LINE_COMMENT_STATE>. {
197  char buf[6];
198  escape(yytext[0], buf);
199  throw make_error(
200    yyextra->loc,
201    std::string("unrecognized character ") + buf);
202}
203
204%%
205
206static void escape(char c, char *buf) {
207  if (std::isgraph(c)) {
208    *buf = c;
209    buf[1] = '\0';
210  } else {
211    buf[0] = '\\';
212    buf[2] = '\0';
213    switch (c) {
214    case '\a':
215      buf[1] = 'a';
216      break;
217    case '\b':
218      buf[1] = 'b';
219      break;
220    case '\f':
221      buf[1] = 'f';
222      break;
223    case '\n':
224      buf[1] = 'n';
225      break;
226    case '\r':
227      buf[1] = 'r';
228      break;
229    case '\t':
230      buf[1] = 't';
231      break;
232    case '\v':
233      buf[1] = 'v';
234      break;
235    default:
236      buf[1] = 'x';
237      std::snprintf(buf + 2, 3, "%x", ((int)c & 0xFF));
238      break;
239    }
240  }
241}
242
243static std::vector<std::string> splitLines(const std::string &str) {
244  std::vector<std::string> lines;
245  auto it = str.begin();
246  while (it != str.end()) {
247    static char terminators[2] = {'\r', '\n'};
248    auto nextIt = std::find_first_of(it, str.end(), terminators, terminators + sizeof(terminators));
249    lines.emplace_back(str.data() + (it - str.begin()), nextIt - it);
250    if (nextIt != str.end()) {
251      auto advancedIt = nextIt + 1;
252      if (advancedIt != str.end()) {
253        if (*nextIt == '\r' && *advancedIt == '\n') {
254          ++advancedIt;
255        }
256      }
257      nextIt = std::move(advancedIt);
258    }
259    it = std::move(nextIt);
260  }
261  return lines;
262}
263
264static int count_leading_whitespace(const std::string &str) {
265  auto pos = str.find_first_not_of(" \t", 0, strlen(" \t"));
266  if (pos == std::string::npos) {
267    return str.length();
268  }
269  return pos;
270}
271
272static bool is_all_whitespace(const std::string &str) {
273  return count_leading_whitespace(str) == str.length();
274}
275
276static std::string clean_up_block_string(const std::string &str) {
277  auto lines = splitLines(str);
278  bool first = true;
279  int commonIndent = INT_MAX;
280  for (const auto &line : lines) {
281    if (first) {
282      first = false;
283      continue;
284    }
285    const auto indent = count_leading_whitespace(line);
286    if (indent < line.length()) {
287      if (indent < commonIndent) {
288        commonIndent = indent;
289      }
290    }
291  }
292  if (commonIndent != INT_MAX) {
293    first = true;
294    for (auto &line : lines) {
295      if (first) {
296        first = false;
297        continue;
298      }
299      line.erase(0, commonIndent);
300    }
301  }
302
303  const auto firstNonBlankIt = std::find_if(lines.begin(), lines.end(), [](const std::string &line) {
304      return !is_all_whitespace(line);
305  });
306  lines.erase(lines.begin(), firstNonBlankIt);
307
308  const auto firstNonBlankReverseIt = std::find_if(lines.rbegin(), lines.rend(), [](const std::string &line) {
309      return !is_all_whitespace(line);
310  });
311  lines.erase(lines.end() - (firstNonBlankReverseIt - lines.rbegin()), lines.end());
312
313  std::string formatted;
314  first = true;
315  for (const auto &line: lines) {
316    if (first) {
317      first = false;
318    } else {
319      formatted.push_back('\n');
320    }
321    formatted.append(line);
322  }
323  return formatted;
324}
325