1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 *   http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/**
21 * Thrift scanner.
22 *
23 * Tokenizes a thrift definition file.
24 */
25
26%{
27
28/* This is redundant with some of the flags in Makefile.am, but it works
29 * when people override CXXFLAGS without being careful. The pragmas are
30 * the 'right' way to do it, but don't work on old-enough GCC (in particular
31 * the GCC that ship on Mac OS X 10.6.5, *counter* to what the GNU docs say)
32 *
33 * We should revert the Makefile.am changes once Apple ships a reasonable
34 * GCC.
35 */
36#ifdef __GNUC__
37#pragma GCC diagnostic ignored "-Wunused-function"
38#pragma GCC diagnostic ignored "-Wunused-label"
39#endif
40
41#ifdef _MSC_VER
42#pragma warning( push )
43
44// warning C4102: 'find_rule' : unreferenced label
45#pragma warning( disable : 4102 )
46
47// warning C4267: 'argument' : conversion from 'size_t' to 'int', possible loss of data
48#pragma warning( disable : 4267 )
49
50// avoid isatty redefinition
51#define YY_NEVER_INTERACTIVE 1
52
53#define YY_NO_UNISTD_H 1
54#endif
55
56#include <cassert>
57#include <string>
58#include <errno.h>
59#include <stdlib.h>
60
61#ifdef _MSC_VER
62#include "thrift/windows/config.h"
63#endif
64#include "thrift/main.h"
65#include "thrift/common.h"
66#include "thrift/globals.h"
67#include "thrift/parse/t_program.h"
68
69/**
70 * Must be included AFTER parse/t_program.h, but I can't remember why anymore
71 * because I wrote this a while ago.
72 */
73#if defined(BISON_USE_PARSER_H_EXTENSION)
74#include "thrift/thrifty.h"
75#else
76#include "thrift/thrifty.hh"
77#endif
78
79void integer_overflow(char* text) {
80  yyerror("This integer is too big: \"%s\"\n", text);
81  exit(1);
82}
83
84void unexpected_token(char* text) {
85  yyerror("Unexpected token in input: \"%s\"\n", text);
86  exit(1);
87}
88
89%}
90
91/**
92 * Provides the yylineno global, useful for debugging output
93 */
94%option lex-compat
95
96/**
97 * Our inputs are all single files, so no need for yywrap
98 */
99%option noyywrap
100
101/**
102 * We don't use it, and it fires up warnings at -Wall
103 */
104%option nounput
105
106/**
107 * Helper definitions, comments, constants, and whatnot
108 */
109
110intconstant   ([+-]?[0-9]+)
111hexconstant   ([+-]?"0x"[0-9A-Fa-f]+)
112dubconstant   ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
113identifier    ([a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*)
114whitespace    ([ \t\r\n]*)
115sillycomm     ("/*""*"*"*/")
116multicm_begin ("/*")
117doctext_begin ("/**")
118comment       ("//"[^\n]*)
119unixcomment   ("#"[^\n]*)
120symbol        ([:;\,\{\}\(\)\=<>\[\]])
121literal_begin (['\"])
122
123%%
124
125{whitespace}         { /* do nothing */                 }
126{sillycomm}          { /* do nothing */                 }
127
128{doctext_begin} {
129  std::string parsed("/**");
130  int state = 0;  // 0 = normal, 1 = "*" seen, "*/" seen
131  while(state < 2)
132  {
133    int ch = yyinput();
134    parsed.push_back(ch);
135    switch (ch) {
136      case EOF:
137        yyerror("Unexpected end of file in doc-comment at %d\n", yylineno);
138        exit(1);
139      case '*':
140        state = 1;
141        break;
142      case '/':
143        state = (state == 1) ? 2 : 0;
144        break;
145      default:
146        state = 0;
147        break;
148    }
149  }
150  pdebug("doctext = \"%s\"\n",parsed.c_str());
151
152 /* This does not show up in the parse tree. */
153 /* Rather, the parser will grab it out of the global. */
154  if (g_parse_mode == PROGRAM) {
155    clear_doctext();
156    g_doctext = strdup(parsed.c_str() + 3);
157    assert(strlen(g_doctext) >= 2);
158    g_doctext[strlen(g_doctext) - 2] = ' ';
159    g_doctext[strlen(g_doctext) - 1] = '\0';
160    g_doctext = clean_up_doctext(g_doctext);
161    g_doctext_lineno = yylineno;
162    if( (g_program_doctext_candidate == nullptr) && (g_program_doctext_status == INVALID)){
163      g_program_doctext_candidate = strdup(g_doctext);
164      g_program_doctext_lineno = g_doctext_lineno;
165      g_program_doctext_status = STILL_CANDIDATE;
166      pdebug("%s","program doctext set to STILL_CANDIDATE");
167    }
168  }
169}
170
171{multicm_begin}  { /* parsed, but thrown away */
172  std::string parsed("/*");
173  int state = 0;  // 0 = normal, 1 = "*" seen, "*/" seen
174  while(state < 2)
175  {
176    int ch = yyinput();
177    parsed.push_back(ch);
178    switch (ch) {
179      case EOF:
180        yyerror("Unexpected end of file in multiline comment at %d\n", yylineno);
181        exit(1);
182      case '*':
183        state = 1;
184        break;
185      case '/':
186        state = (state == 1) ? 2 : 0;
187        break;
188      default:
189        state = 0;
190        break;
191    }
192  }
193  pdebug("multi_comm = \"%s\"\n",parsed.c_str());
194}
195
196{comment}            { /* do nothing */                 }
197{unixcomment}        { /* do nothing */                 }
198
199{symbol}             { return yytext[0];                }
200"*"                  { return yytext[0];                }
201
202"false"              { yylval.iconst=0; return tok_int_constant; }
203"true"               { yylval.iconst=1; return tok_int_constant; }
204
205"namespace"          { return tok_namespace;            }
206"cpp_namespace"      { error_unsupported_namespace_decl("cpp"); /* do nothing */ }
207"cpp_include"        { return tok_cpp_include;          }
208"cpp_type"           { return tok_cpp_type;             }
209"java_package"       { error_unsupported_namespace_decl("java_package", "java"); /* do nothing */ }
210"delphi_namespace"   { error_unsupported_namespace_decl("delphi"); /* do nothing */ }
211"php_namespace"      { error_unsupported_namespace_decl("php"); /* do nothing */ }
212"py_module"          { error_unsupported_namespace_decl("py_module", "py"); /* do nothing */ }
213"perl_package"       { error_unsupported_namespace_decl("perl_package", "perl"); /* do nothing */ }
214"ruby_namespace"     { error_unsupported_namespace_decl("ruby"); /* do nothing */ }
215"smalltalk_category" { error_unsupported_namespace_decl("smalltalk_category", "st"); /* do nothing */ }
216"smalltalk_prefix"   { error_unsupported_namespace_decl("smalltalk_prefix", "st"); /* do nothing */ }
217"xsd_all"            { return tok_xsd_all;              }
218"xsd_optional"       { return tok_xsd_optional;         }
219"xsd_nillable"       { return tok_xsd_nillable;         }
220"xsd_namespace"      { error_unsupported_namespace_decl("xsd"); /* do nothing */ }
221"xsd_attrs"          { return tok_xsd_attrs;            }
222"include"            { return tok_include;              }
223"void"               { return tok_void;                 }
224"bool"               { return tok_bool;                 }
225"byte"               {
226  emit_byte_type_warning();
227  return tok_i8;
228}
229"i8"                 { return tok_i8;                   }
230"i16"                { return tok_i16;                  }
231"i32"                { return tok_i32;                  }
232"i64"                { return tok_i64;                  }
233"double"             { return tok_double;               }
234"string"             { return tok_string;               }
235"binary"             { return tok_binary;               }
236"slist" {
237  pwarning(0, "\"slist\" is deprecated and will be removed in a future compiler version.  This type should be replaced with \"string\".\n");
238  return tok_slist;
239}
240"senum" {
241  pwarning(0, "\"senum\" is deprecated and will be removed in a future compiler version.  This type should be replaced with \"string\".\n");
242  return tok_senum;
243}
244"map"                { return tok_map;                  }
245"list"               { return tok_list;                 }
246"set"                { return tok_set;                  }
247"oneway"             { return tok_oneway;               }
248"typedef"            { return tok_typedef;              }
249"struct"             { return tok_struct;               }
250"union"              { return tok_union;                }
251"exception"          { return tok_xception;             }
252"extends"            { return tok_extends;              }
253"throws"             { return tok_throws;               }
254"service"            { return tok_service;              }
255"enum"               { return tok_enum;                 }
256"const"              { return tok_const;                }
257"required"           { return tok_required;             }
258"optional"           { return tok_optional;             }
259"async" {
260  pwarning(0, "\"async\" is deprecated.  It is called \"oneway\" now.\n");
261  return tok_oneway;
262}
263"&"                  { return tok_reference;            }
264
265{intconstant} {
266  errno = 0;
267  yylval.iconst = strtoll(yytext, nullptr, 10);
268  if (errno == ERANGE) {
269    integer_overflow(yytext);
270  }
271  return tok_int_constant;
272}
273
274{hexconstant} {
275  errno = 0;
276  char sign = yytext[0];
277  int shift = sign == '0' ? 2 : 3;
278  yylval.iconst = strtoll(yytext+shift, nullptr, 16);
279  if (sign == '-') {
280    yylval.iconst = -yylval.iconst;
281  }
282  if (errno == ERANGE) {
283    integer_overflow(yytext);
284  }
285  return tok_int_constant;
286}
287
288{identifier} {
289  yylval.id = strdup(yytext);
290  return tok_identifier;
291}
292
293{dubconstant} {
294 /* Deliberately placed after identifier, since "e10" is NOT a double literal (THRIFT-3477) */
295  yylval.dconst = atof(yytext);
296  return tok_dub_constant;
297}
298
299{literal_begin} {
300  char mark = yytext[0];
301  std::string result;
302  for(;;)
303  {
304    int ch = yyinput();
305    switch (ch) {
306      case EOF:
307        yyerror("End of file while read string at %d\n", yylineno);
308        exit(1);
309      case '\n':
310        yyerror("End of line while read string at %d\n", yylineno - 1);
311        exit(1);
312      case '\\':
313        ch = yyinput();
314        switch (ch) {
315          case 'r':
316            result.push_back('\r');
317            continue;
318          case 'n':
319            result.push_back('\n');
320            continue;
321          case 't':
322            result.push_back('\t');
323            continue;
324          case '"':
325            result.push_back('"');
326            continue;
327          case '\'':
328            result.push_back('\'');
329            continue;
330          case '\\':
331            result.push_back('\\');
332            continue;
333          default:
334            yyerror("Bad escape character\n");
335            return -1;
336        }
337        break;
338      default:
339        if (ch == mark) {
340          yylval.id = strdup(result.c_str());
341          return tok_literal;
342        } else {
343          result.push_back(ch);
344        }
345    }
346  }
347}
348
349
350. {
351  unexpected_token(yytext);
352}
353
354%%
355
356#ifdef _MSC_VER
357#pragma warning( pop )
358#endif
359
360/* vim: filetype=lex
361*/
362