1/** 2 * Copyright (c) 2015-present, Facebook, Inc. 3 * 4 * This source code is licensed under the MIT license found in the 5 * LICENSE file in the root directory of this source tree. 6 */ 7 8%{ 9#include <algorithm> 10#include <cassert> 11#include <cctype> 12#include <climits> 13#include <cstdio> 14#include <string> 15#include <vector> 16#include "location.hh" 17#include "position.hh" 18#include "parser.tab.hpp" 19#include "syntaxdefs.h" 20 21// Keep track of token lengths. 22#define YY_USER_ACTION yyextra->loc.columns(yyleng); 23 24static void escape(char c, char *buf); 25 26static std::string clean_up_block_string(const std::string &str); 27 28%} 29 30%option bison-bridge bison-locations 31%option noyywrap batch noinput nounput 32%option reentrant 33%option extra-type="struct LexerExtra *" 34 35%x STRING_STATE 36%x BLOCK_STRING_STATE 37%x C_COMMENT_STATE 38%x LINE_COMMENT_STATE 39 40FLOAT -?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)? 41INTEGER -?(0|[1-9][0-9]*) 42IDENTIFIER [_A-Za-z][_0-9A-Za-z]* 43VARIABLE $[_0-9A-Za-z]+ 44BOM \xef\xbb\xbf 45CRLF \r\n 46BADCHAR [\x00-\x08\x0b\x0c\x0e-\x1f] 47GOODCHAR [^\x00-\x08\x0b\x0c\x0e-\x1f] 48STRINGCHAR [^\x00-\x1f\\\x22] 49 50blank [ \t,] 51newline [\n\r] 52notnewline [^\n\r] 53 54%% 55 56%{ 57 yyextra->loc.step(); 58%} 59 60<STRING_STATE>{ 61 \" { 62 BEGIN(INITIAL); 63 yylval->str = yyextra->str.c_str(); 64 *yylloc = yyextra->loc; 65 return yy::GraphQLParserImpl::token::TOK_STRING; 66 } 67 68 {newline} { 69 throw make_error(yyextra->loc, "Unterminated string"); 70 } 71 72 <<EOF>> { 73 throw make_error(yyextra->loc, "Unterminated string at EOF"); 74 } 75 76 {STRINGCHAR}+ { 77 char *p = yytext; 78 while (*p) { 79 yyextra->str.push_back(*p++); 80 } 81 } 82 83 \\\" { yyextra->str.push_back('"'); } 84 \\\\ { yyextra->str.push_back('\\'); } 85 \\\/ { yyextra->str.push_back('/'); } 86 \\n { yyextra->str.push_back('\n'); } 87 \\t { yyextra->str.push_back('\t'); } 88 \\r { yyextra->str.push_back('\r'); } 89 \\b { yyextra->str.push_back('\b'); } 90 \\f { yyextra->str.push_back('\f'); } 91 92 \\u[0-9A-Fa-f]{4} { 93 int ch; 94 sscanf(yytext + 2, "%x", &ch); 95 yyextra->str.push_back(ch); 96 } 97 98 \\u { throw make_error(yyextra->loc, "bad Unicode escape sequence"); } 99 \\. { throw make_error(yyextra->loc, std::string("bad escape sequence \\") + yytext[1]); } 100 101} 102 103<BLOCK_STRING_STATE>{ 104 <<EOF>> { 105 throw make_error(yyextra->loc, "Unterminated block string at EOF"); 106 } 107 108 {BADCHAR} { 109 throw make_error(yyextra->loc, std::string("Invalid character ") + yytext[0]); 110 } 111 112 {GOODCHAR} { 113 /* Can't use {GOODCHAR}+ because that would be a better match for 114 """ than the explicit rule! */ 115 yyextra->str.push_back(*yytext); 116 } 117 118 \\\"\"\" { 119 yyextra->str.append(3, '"'); 120 } 121 122 \"\"\" { 123 BEGIN(INITIAL); 124 yyextra->str = clean_up_block_string(yyextra->str); 125 yylval->str = yyextra->str.c_str(); 126 *yylloc = yyextra->loc; 127 return yy::GraphQLParserImpl::token::TOK_STRING; 128 } 129} 130 131<LINE_COMMENT_STATE>{ 132 {CRLF} { yyextra->loc.lines(yyleng / 2); yyextra->loc.step(); BEGIN(INITIAL); } 133 {newline} { yyextra->loc.lines(yyleng); yyextra->loc.step(); BEGIN(INITIAL); } 134 {notnewline}+ /* eat comment character */ 135} 136 137<INITIAL>{ 138 {blank}+ { yyextra->loc.step(); } 139 {BOM}+ { yyextra->loc.step(); yyextra->loc.step(); yyextra->loc.step(); } 140 {CRLF}+ { yyextra->loc.lines(yyleng / 2); yyextra->loc.step(); } 141 {newline}+ { yyextra->loc.lines(yyleng); yyextra->loc.step(); } 142 143 # {yyextra->loc.step(); BEGIN(LINE_COMMENT_STATE); } 144 145 directive { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_DIRECTIVE; } 146 enum { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ENUM; } 147 extend { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EXTEND; } 148 false { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FALSE; } 149 fragment { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FRAGMENT; } 150 implements { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_IMPLEMENTS; } 151 input { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INPUT; } 152 interface { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INTERFACE; } 153 mutation { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_MUTATION; } 154 null { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_NULL; } 155 on { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ON; } 156 query { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_QUERY; } 157 scalar { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SCALAR; } 158 schema { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SCHEMA; } 159 subscription { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_SUBSCRIPTION; } 160 true { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_TRUE; } 161 type { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_TYPE; } 162 union { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_UNION; } 163 164 {INTEGER} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_INTEGER; } 165 {FLOAT} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_FLOAT; } 166 {IDENTIFIER} { yylval->str = yytext; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_IDENTIFIER; } 167 {VARIABLE} { yylval->str = yytext + 1; *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_VARIABLE; } 168 169 "!" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_BANG; } 170 "(" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LPAREN; } 171 ")" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RPAREN; } 172 "..." { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_ELLIPSIS; } 173 ":" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_COLON; } 174 "=" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EQUAL; } 175 "@" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_AT; } 176 "[" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LBRACKET; } 177 "]" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RBRACKET; } 178 "{" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_LBRACE; } 179 "|" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_PIPE; } 180 "}" { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_RBRACE; } 181 182 183 <<EOF>> { *yylloc = yyextra->loc; return yy::GraphQLParserImpl::token::TOK_EOF; } 184 185 \"\"\" { 186 BEGIN(BLOCK_STRING_STATE); 187 yyextra->str.clear(); 188 } 189 190 \" { 191 BEGIN(STRING_STATE); 192 yyextra->str.clear(); 193 } 194} 195 196<INITIAL,STRING_STATE,LINE_COMMENT_STATE>. { 197 char buf[6]; 198 escape(yytext[0], buf); 199 throw make_error( 200 yyextra->loc, 201 std::string("unrecognized character ") + buf); 202} 203 204%% 205 206static void escape(char c, char *buf) { 207 if (std::isgraph(c)) { 208 *buf = c; 209 buf[1] = '\0'; 210 } else { 211 buf[0] = '\\'; 212 buf[2] = '\0'; 213 switch (c) { 214 case '\a': 215 buf[1] = 'a'; 216 break; 217 case '\b': 218 buf[1] = 'b'; 219 break; 220 case '\f': 221 buf[1] = 'f'; 222 break; 223 case '\n': 224 buf[1] = 'n'; 225 break; 226 case '\r': 227 buf[1] = 'r'; 228 break; 229 case '\t': 230 buf[1] = 't'; 231 break; 232 case '\v': 233 buf[1] = 'v'; 234 break; 235 default: 236 buf[1] = 'x'; 237 std::snprintf(buf + 2, 3, "%x", ((int)c & 0xFF)); 238 break; 239 } 240 } 241} 242 243static std::vector<std::string> splitLines(const std::string &str) { 244 std::vector<std::string> lines; 245 auto it = str.begin(); 246 while (it != str.end()) { 247 static char terminators[2] = {'\r', '\n'}; 248 auto nextIt = std::find_first_of(it, str.end(), terminators, terminators + sizeof(terminators)); 249 lines.emplace_back(str.data() + (it - str.begin()), nextIt - it); 250 if (nextIt != str.end()) { 251 auto advancedIt = nextIt + 1; 252 if (advancedIt != str.end()) { 253 if (*nextIt == '\r' && *advancedIt == '\n') { 254 ++advancedIt; 255 } 256 } 257 nextIt = std::move(advancedIt); 258 } 259 it = std::move(nextIt); 260 } 261 return lines; 262} 263 264static int count_leading_whitespace(const std::string &str) { 265 auto pos = str.find_first_not_of(" \t", 0, strlen(" \t")); 266 if (pos == std::string::npos) { 267 return str.length(); 268 } 269 return pos; 270} 271 272static bool is_all_whitespace(const std::string &str) { 273 return count_leading_whitespace(str) == str.length(); 274} 275 276static std::string clean_up_block_string(const std::string &str) { 277 auto lines = splitLines(str); 278 bool first = true; 279 int commonIndent = INT_MAX; 280 for (const auto &line : lines) { 281 if (first) { 282 first = false; 283 continue; 284 } 285 const auto indent = count_leading_whitespace(line); 286 if (indent < line.length()) { 287 if (indent < commonIndent) { 288 commonIndent = indent; 289 } 290 } 291 } 292 if (commonIndent != INT_MAX) { 293 first = true; 294 for (auto &line : lines) { 295 if (first) { 296 first = false; 297 continue; 298 } 299 line.erase(0, commonIndent); 300 } 301 } 302 303 const auto firstNonBlankIt = std::find_if(lines.begin(), lines.end(), [](const std::string &line) { 304 return !is_all_whitespace(line); 305 }); 306 lines.erase(lines.begin(), firstNonBlankIt); 307 308 const auto firstNonBlankReverseIt = std::find_if(lines.rbegin(), lines.rend(), [](const std::string &line) { 309 return !is_all_whitespace(line); 310 }); 311 lines.erase(lines.end() - (firstNonBlankReverseIt - lines.rbegin()), lines.end()); 312 313 std::string formatted; 314 first = true; 315 for (const auto &line: lines) { 316 if (first) { 317 first = false; 318 } else { 319 formatted.push_back('\n'); 320 } 321 formatted.append(line); 322 } 323 return formatted; 324} 325