1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20/** 21 * Thrift scanner. 22 * 23 * Tokenizes a thrift definition file. 24 */ 25 26%{ 27 28/* This is redundant with some of the flags in Makefile.am, but it works 29 * when people override CXXFLAGS without being careful. The pragmas are 30 * the 'right' way to do it, but don't work on old-enough GCC (in particular 31 * the GCC that ship on Mac OS X 10.6.5, *counter* to what the GNU docs say) 32 * 33 * We should revert the Makefile.am changes once Apple ships a reasonable 34 * GCC. 35 */ 36#ifdef __GNUC__ 37#pragma GCC diagnostic ignored "-Wunused-function" 38#pragma GCC diagnostic ignored "-Wunused-label" 39#endif 40 41#ifdef _MSC_VER 42#pragma warning( push ) 43 44// warning C4102: 'find_rule' : unreferenced label 45#pragma warning( disable : 4102 ) 46 47// warning C4267: 'argument' : conversion from 'size_t' to 'int', possible loss of data 48#pragma warning( disable : 4267 ) 49 50// avoid isatty redefinition 51#define YY_NEVER_INTERACTIVE 1 52 53#define YY_NO_UNISTD_H 1 54#endif 55 56#include <cassert> 57#include <string> 58#include <errno.h> 59#include <stdlib.h> 60 61#ifdef _MSC_VER 62#include "thrift/windows/config.h" 63#endif 64#include "thrift/main.h" 65#include "thrift/common.h" 66#include "thrift/globals.h" 67#include "thrift/parse/t_program.h" 68 69/** 70 * Must be included AFTER parse/t_program.h, but I can't remember why anymore 71 * because I wrote this a while ago. 72 */ 73#if defined(BISON_USE_PARSER_H_EXTENSION) 74#include "thrift/thrifty.h" 75#else 76#include "thrift/thrifty.hh" 77#endif 78 79void integer_overflow(char* text) { 80 yyerror("This integer is too big: \"%s\"\n", text); 81 exit(1); 82} 83 84void unexpected_token(char* text) { 85 yyerror("Unexpected token in input: \"%s\"\n", text); 86 exit(1); 87} 88 89%} 90 91/** 92 * Provides the yylineno global, useful for debugging output 93 */ 94%option lex-compat 95 96/** 97 * Our inputs are all single files, so no need for yywrap 98 */ 99%option noyywrap 100 101/** 102 * We don't use it, and it fires up warnings at -Wall 103 */ 104%option nounput 105 106/** 107 * Helper definitions, comments, constants, and whatnot 108 */ 109 110intconstant ([+-]?[0-9]+) 111hexconstant ([+-]?"0x"[0-9A-Fa-f]+) 112dubconstant ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?) 113identifier ([a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*) 114whitespace ([ \t\r\n]*) 115sillycomm ("/*""*"*"*/") 116multicm_begin ("/*") 117doctext_begin ("/**") 118comment ("//"[^\n]*) 119unixcomment ("#"[^\n]*) 120symbol ([:;\,\{\}\(\)\=<>\[\]]) 121literal_begin (['\"]) 122 123%% 124 125{whitespace} { /* do nothing */ } 126{sillycomm} { /* do nothing */ } 127 128{doctext_begin} { 129 std::string parsed("/**"); 130 int state = 0; // 0 = normal, 1 = "*" seen, "*/" seen 131 while(state < 2) 132 { 133 int ch = yyinput(); 134 parsed.push_back(ch); 135 switch (ch) { 136 case EOF: 137 yyerror("Unexpected end of file in doc-comment at %d\n", yylineno); 138 exit(1); 139 case '*': 140 state = 1; 141 break; 142 case '/': 143 state = (state == 1) ? 2 : 0; 144 break; 145 default: 146 state = 0; 147 break; 148 } 149 } 150 pdebug("doctext = \"%s\"\n",parsed.c_str()); 151 152 /* This does not show up in the parse tree. */ 153 /* Rather, the parser will grab it out of the global. */ 154 if (g_parse_mode == PROGRAM) { 155 clear_doctext(); 156 g_doctext = strdup(parsed.c_str() + 3); 157 assert(strlen(g_doctext) >= 2); 158 g_doctext[strlen(g_doctext) - 2] = ' '; 159 g_doctext[strlen(g_doctext) - 1] = '\0'; 160 g_doctext = clean_up_doctext(g_doctext); 161 g_doctext_lineno = yylineno; 162 if( (g_program_doctext_candidate == nullptr) && (g_program_doctext_status == INVALID)){ 163 g_program_doctext_candidate = strdup(g_doctext); 164 g_program_doctext_lineno = g_doctext_lineno; 165 g_program_doctext_status = STILL_CANDIDATE; 166 pdebug("%s","program doctext set to STILL_CANDIDATE"); 167 } 168 } 169} 170 171{multicm_begin} { /* parsed, but thrown away */ 172 std::string parsed("/*"); 173 int state = 0; // 0 = normal, 1 = "*" seen, "*/" seen 174 while(state < 2) 175 { 176 int ch = yyinput(); 177 parsed.push_back(ch); 178 switch (ch) { 179 case EOF: 180 yyerror("Unexpected end of file in multiline comment at %d\n", yylineno); 181 exit(1); 182 case '*': 183 state = 1; 184 break; 185 case '/': 186 state = (state == 1) ? 2 : 0; 187 break; 188 default: 189 state = 0; 190 break; 191 } 192 } 193 pdebug("multi_comm = \"%s\"\n",parsed.c_str()); 194} 195 196{comment} { /* do nothing */ } 197{unixcomment} { /* do nothing */ } 198 199{symbol} { return yytext[0]; } 200"*" { return yytext[0]; } 201 202"false" { yylval.iconst=0; return tok_int_constant; } 203"true" { yylval.iconst=1; return tok_int_constant; } 204 205"namespace" { return tok_namespace; } 206"cpp_namespace" { error_unsupported_namespace_decl("cpp"); /* do nothing */ } 207"cpp_include" { return tok_cpp_include; } 208"cpp_type" { return tok_cpp_type; } 209"java_package" { error_unsupported_namespace_decl("java_package", "java"); /* do nothing */ } 210"delphi_namespace" { error_unsupported_namespace_decl("delphi"); /* do nothing */ } 211"php_namespace" { error_unsupported_namespace_decl("php"); /* do nothing */ } 212"py_module" { error_unsupported_namespace_decl("py_module", "py"); /* do nothing */ } 213"perl_package" { error_unsupported_namespace_decl("perl_package", "perl"); /* do nothing */ } 214"ruby_namespace" { error_unsupported_namespace_decl("ruby"); /* do nothing */ } 215"smalltalk_category" { error_unsupported_namespace_decl("smalltalk_category", "st"); /* do nothing */ } 216"smalltalk_prefix" { error_unsupported_namespace_decl("smalltalk_prefix", "st"); /* do nothing */ } 217"xsd_all" { return tok_xsd_all; } 218"xsd_optional" { return tok_xsd_optional; } 219"xsd_nillable" { return tok_xsd_nillable; } 220"xsd_namespace" { error_unsupported_namespace_decl("xsd"); /* do nothing */ } 221"xsd_attrs" { return tok_xsd_attrs; } 222"include" { return tok_include; } 223"void" { return tok_void; } 224"bool" { return tok_bool; } 225"byte" { 226 emit_byte_type_warning(); 227 return tok_i8; 228} 229"i8" { return tok_i8; } 230"i16" { return tok_i16; } 231"i32" { return tok_i32; } 232"i64" { return tok_i64; } 233"double" { return tok_double; } 234"string" { return tok_string; } 235"binary" { return tok_binary; } 236"slist" { 237 pwarning(0, "\"slist\" is deprecated and will be removed in a future compiler version. This type should be replaced with \"string\".\n"); 238 return tok_slist; 239} 240"senum" { 241 pwarning(0, "\"senum\" is deprecated and will be removed in a future compiler version. This type should be replaced with \"string\".\n"); 242 return tok_senum; 243} 244"map" { return tok_map; } 245"list" { return tok_list; } 246"set" { return tok_set; } 247"oneway" { return tok_oneway; } 248"typedef" { return tok_typedef; } 249"struct" { return tok_struct; } 250"union" { return tok_union; } 251"exception" { return tok_xception; } 252"extends" { return tok_extends; } 253"throws" { return tok_throws; } 254"service" { return tok_service; } 255"enum" { return tok_enum; } 256"const" { return tok_const; } 257"required" { return tok_required; } 258"optional" { return tok_optional; } 259"async" { 260 pwarning(0, "\"async\" is deprecated. It is called \"oneway\" now.\n"); 261 return tok_oneway; 262} 263"&" { return tok_reference; } 264 265{intconstant} { 266 errno = 0; 267 yylval.iconst = strtoll(yytext, nullptr, 10); 268 if (errno == ERANGE) { 269 integer_overflow(yytext); 270 } 271 return tok_int_constant; 272} 273 274{hexconstant} { 275 errno = 0; 276 char sign = yytext[0]; 277 int shift = sign == '0' ? 2 : 3; 278 yylval.iconst = strtoll(yytext+shift, nullptr, 16); 279 if (sign == '-') { 280 yylval.iconst = -yylval.iconst; 281 } 282 if (errno == ERANGE) { 283 integer_overflow(yytext); 284 } 285 return tok_int_constant; 286} 287 288{identifier} { 289 yylval.id = strdup(yytext); 290 return tok_identifier; 291} 292 293{dubconstant} { 294 /* Deliberately placed after identifier, since "e10" is NOT a double literal (THRIFT-3477) */ 295 yylval.dconst = atof(yytext); 296 return tok_dub_constant; 297} 298 299{literal_begin} { 300 char mark = yytext[0]; 301 std::string result; 302 for(;;) 303 { 304 int ch = yyinput(); 305 switch (ch) { 306 case EOF: 307 yyerror("End of file while read string at %d\n", yylineno); 308 exit(1); 309 case '\n': 310 yyerror("End of line while read string at %d\n", yylineno - 1); 311 exit(1); 312 case '\\': 313 ch = yyinput(); 314 switch (ch) { 315 case 'r': 316 result.push_back('\r'); 317 continue; 318 case 'n': 319 result.push_back('\n'); 320 continue; 321 case 't': 322 result.push_back('\t'); 323 continue; 324 case '"': 325 result.push_back('"'); 326 continue; 327 case '\'': 328 result.push_back('\''); 329 continue; 330 case '\\': 331 result.push_back('\\'); 332 continue; 333 default: 334 yyerror("Bad escape character\n"); 335 return -1; 336 } 337 break; 338 default: 339 if (ch == mark) { 340 yylval.id = strdup(result.c_str()); 341 return tok_literal; 342 } else { 343 result.push_back(ch); 344 } 345 } 346 } 347} 348 349 350. { 351 unexpected_token(yytext); 352} 353 354%% 355 356#ifdef _MSC_VER 357#pragma warning( pop ) 358#endif 359 360/* vim: filetype=lex 361*/ 362