1%include {
2/* protobuf_lang_parser.lemon
3 *
4 * C Protocol Buffers Language (PBL) Parser (for *.proto files)
5 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
6 *
7 * SPDX-License-Identifier: GPL-2.0-or-later
8 */
9
10/* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files.
11 * There are two formats of *.proto files:
12 * 1) Protocol Buffers Version 3 Language Specification:
13 *      https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
14 * 2) Protocol Buffers Version 2 Language Specification:
15 *      https://developers.google.com/protocol-buffers/docs/reference/proto2-spec
16 * There are some errors about 'proto', 'option' (value) and 'reserved' (fieldName) definitions on that sites.
17 * This parser is created because Wireshark is mainly implemented in plain ANSI C but the offical
18 * Protocol Buffers Language parser is implemented in C++.
19 */
20#include "config.h"
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <stdarg.h>
25#include <glib.h>
26#include <assert.h>
27#include <wsutil/file_util.h>
28#include "protobuf_lang_tree.h"
29#include "protobuf_lang_parser.h"
30#include "protobuf_lang_scanner_lex.h"
31
32#define NAME_TO_BE_SET "<NAME_TO_BE_SET>"
33#define NEED_NOT_NAME "<NEED_NOT_NAME>"
34
35static void *ProtobufLangParserAlloc(void *(*mallocProc)(size_t));
36static void ProtobufLangParser(void *yyp, int yymajor, protobuf_lang_token_t *yyminor, protobuf_lang_state_t *state);
37static void ProtobufLangParserFree(void *p, void (*freeProc)(void*));
38
39/* Error handling function for parser */
40void protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg);
41
42/* Extended error handling function */
43void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
44
45/* It's just the approximate line number which is gotten when a grammar rule is reduced
46   by the parser (lemon). That might be overridden by the lineno argument of
47   pbl_set_node_name() later. */
48#define CUR_LINENO (protobuf_lang_get_lineno(state->scanner))
49
50} /* end of %include */
51
52%name ProtobufLangParser
53
54%extra_argument { protobuf_lang_state_t *state }
55
56%token_type { protobuf_lang_token_t* }
57
58%token_destructor {
59    /* We manage memory allocated for token values by ourself */
60    (void) state; /* Mark unused, similar to Q_UNUSED */
61    (void) $$; /* Mark unused, similar to Q_UNUSED */
62}
63
64%syntax_error {
65    pbl_parser_error(state, "Syntax Error: unexpected token \"%s\"!", yyminor->v);
66    state->grammar_error = TRUE;
67}
68
69%parse_failure {
70    pbl_parser_error(state, "Parse Error");
71    state->grammar_error = TRUE;
72}
73
74/* Keywords like 'syntax', 'message', etc can be used as the names of messages, fields or enums.
75   So we tell the lemon: "If you are unable to parse this keyword, try treating it as an identifier instead.*/
76%fallback PT_IDENT  PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL.
77%fallback PT_IDENT  PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS.
78%fallback PT_IDENT  PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO.
79
80%type strLit { gchar* }
81%type label { gchar* }
82%type type { gchar* }
83%type keyType { gchar* }
84%type messageType { gchar* }
85%type constant { gchar* }
86
87%type exIdent { protobuf_lang_token_t* }
88%type optionName { protobuf_lang_token_t* }
89%type messageName { protobuf_lang_token_t* }
90%type enumName { protobuf_lang_token_t* }
91%type streamName { protobuf_lang_token_t* }
92%type fieldName { protobuf_lang_token_t* }
93%type oneofName { protobuf_lang_token_t* }
94%type mapName { protobuf_lang_token_t* }
95%type serviceName { protobuf_lang_token_t* }
96%type rpcName { protobuf_lang_token_t* }
97%type groupName { protobuf_lang_token_t* }
98
99%type protoBody { pbl_node_t* }
100%type topLevelDef { pbl_node_t* }
101%type message { pbl_node_t* }
102%type messageBody { pbl_node_t* }
103%type rpc { pbl_node_t* }
104%type rpcDecl { pbl_node_t* }
105%type field { pbl_node_t* }
106%type oneofField { pbl_node_t* }
107%type enum { pbl_node_t* }
108%type enumBody { pbl_node_t* }
109%type enumField { pbl_node_t* }
110%type service { pbl_node_t* }
111%type serviceBody { pbl_node_t* }
112%type stream { pbl_node_t* }
113%type streamDecl { pbl_node_t* }
114%type fieldOptions { pbl_node_t* }
115%type fieldOption { pbl_node_t* }
116%type oneof { pbl_node_t* }
117%type oneofBody { pbl_node_t* }
118%type mapField { pbl_node_t* }
119%type group { pbl_node_t* }
120%type extend { pbl_node_t* }
121%type extendBody { pbl_node_t* }
122
123%type intLit { guint64 }
124
125%type fieldNumber { int }
126%type enumNumber { int }
127
128/* We don't care about the types of following nodes:
129syntax import package option enumValueOptions enumValueOption rpcBody streamBody
130extensions reserved ranges range quoteFieldNames emptyStatement
131*/
132
133%start_symbol proto
134
135/* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */
136/* Offical PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement }
137   The default syntax version is "proto2". */
138proto ::= wholeProtoBody.
139proto ::= syntax wholeProtoBody.
140
141wholeProtoBody ::= protoBody(B).
142    {
143        /* set real package name */
144        pbl_set_node_name(B, state->file->package_name_lineno, state->file->package_name);
145        /* use the allocate mem of the name of the package node */
146        state->file->package_name = pbl_get_node_name(B);
147        /* put this file data into package tables */
148        pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name);
149        if (packnode) {
150            pbl_merge_children(packnode, B);
151            pbl_free_node(B);
152        } else {
153            g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), B);
154        }
155    }
156
157/* v2: syntax = "syntax" "=" quote "proto2" quote ";" */
158/* v3: syntax = "syntax" "=" quote "proto3" quote ";" */
159syntax ::= PT_SYNTAX PT_ASSIGN strLit(B) PT_SEMICOLON.
160    {
161        if (!strcmp(B, "proto3")) {
162            state->file->syntax_version = 3;
163        } else if (!strcmp(B, "proto2")) {
164            state->file->syntax_version = 2;
165        } else {
166            pbl_parser_error(state, "Unrecognized syntax identifier [%s]. This parser only recognizes \"proto3\" or \"proto2\"!", B);
167            state->grammar_error = TRUE;
168        }
169    }
170
171protoBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */
172protoBody ::= protoBody import. /* default action is {A = B; } */
173protoBody ::= protoBody package.
174protoBody ::= protoBody option.
175protoBody(A) ::= protoBody(B) topLevelDef(C).  { A = B; pbl_add_child(A, C); }
176protoBody ::= protoBody emptyStatement.
177
178/* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */
179import ::=  PT_IMPORT strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } /* append file to todo list */
180import ::=  PT_IMPORT PT_PUBLIC strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
181import ::=  PT_IMPORT PT_WEAK strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
182
183/* v2/v3: package = "package" fullIdent ";" */
184package ::= PT_PACKAGE exIdent(B) PT_SEMICOLON.
185    {   /* The memory of (B) will be freed after parsing, but the package_name will
186           be replaced by the new-allocated name of package node late */
187        state->file->package_name = B->v;
188        state->file->package_name_lineno = B->ln;
189    }
190
191/* v2/v3: option = "option" optionName  "=" constant ";" */
192/* Offical PBL bugfix: option = "option" optionName  "=" ( constant | customOptionValue ) ";" */
193option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON.
194option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON.
195
196/* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */
197/* Offical PBL bugfix: optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) } */
198extIdentInParentheses(A) ::= PT_LPAREN exIdent(B) PT_RPAREN.
199    { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); }
200optionName ::= exIdent.
201optionName ::= extIdentInParentheses.
202optionName(A) ::= optionName(B) exIdent(C). // Note that the exIdent contains "."
203    { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, C->v, NULL)); }
204optionName(A) ::= optionName(B) PT_DOT extIdentInParentheses(C).
205    { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
206optionName(A) ::= optionName(B) extIdentInParentheses(C).
207    { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
208
209/* Allow format which not defined in offical PBL specification like:
210    option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" };
211    option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" };
212    option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" };
213*/
214customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY.
215
216/* The formal EBNF of customOptionBody seems to be */
217/*
218customOptionBody ::= .
219customOptionBody ::= customOptionBody optionField.
220customOptionBody ::= customOptionBody PT_COMMA optionField.
221customOptionBody ::= customOptionBody PT_SEMICOLON optionField.
222
223optionField ::= optionName PT_COLON constant.
224optionField ::= optionName PT_COLON customOptionValue.
225optionField ::= optionName customOptionValue.
226optionField ::= optionName PT_COLON array.
227
228array ::= PT_LBRACKET arrayBody PT_RBRACKET.
229arrayBodyConst ::= constant.
230arrayBodyConst ::= arrayBody PT_COMMA constant.
231arrayBodyCustom ::= customOptionValue.
232arrayBodyCustom ::= arrayBody PT_COMMA customOptionValue.
233arrayBody ::= arrayBodyConst.
234arrayBody ::= arrayBodyCustom.
235*/
236/* but for handling unexpected situations, we still use following EBNF  */
237customOptionBody ::= .
238customOptionBody ::= customOptionBody exIdent.
239customOptionBody ::= customOptionBody PT_STRLIT.
240customOptionBody ::= customOptionBody symbolsWithoutCurly.
241customOptionBody ::= customOptionBody intLit.
242customOptionBody ::= customOptionBody customOptionValue.
243
244symbolsWithoutCurly ::= PT_LPAREN.
245symbolsWithoutCurly ::= PT_RPAREN.
246symbolsWithoutCurly ::= PT_LBRACKET.
247symbolsWithoutCurly ::= PT_RBRACKET.
248symbolsWithoutCurly ::= PT_EQUAL.
249symbolsWithoutCurly ::= PT_NOTEQUAL.
250symbolsWithoutCurly ::= PT_NOTEQUAL2.
251symbolsWithoutCurly ::= PT_GEQUAL.
252symbolsWithoutCurly ::= PT_LEQUAL.
253symbolsWithoutCurly ::= PT_ASSIGN_PLUS.
254symbolsWithoutCurly ::= PT_ASSIGN.
255symbolsWithoutCurly ::= PT_PLUS.
256symbolsWithoutCurly ::= PT_MINUS.
257symbolsWithoutCurly ::= PT_MULTIPLY.
258symbolsWithoutCurly ::= PT_DIV.
259symbolsWithoutCurly ::= PT_LOGIC_OR.
260symbolsWithoutCurly ::= PT_OR.
261symbolsWithoutCurly ::= PT_LOGIC_AND.
262symbolsWithoutCurly ::= PT_AND.
263symbolsWithoutCurly ::= PT_NOT.
264symbolsWithoutCurly ::= PT_NEG.
265symbolsWithoutCurly ::= PT_XOR.
266symbolsWithoutCurly ::= PT_SHL.
267symbolsWithoutCurly ::= PT_SHR.
268symbolsWithoutCurly ::= PT_PERCENT.
269symbolsWithoutCurly ::= PT_DOLLAR.
270symbolsWithoutCurly ::= PT_COND.
271symbolsWithoutCurly ::= PT_SEMICOLON.
272symbolsWithoutCurly ::= PT_DOT.
273symbolsWithoutCurly ::= PT_COMMA.
274symbolsWithoutCurly ::= PT_COLON.
275symbolsWithoutCurly ::= PT_LESS.
276symbolsWithoutCurly ::= PT_GREATER.
277
278/* v2: topLevelDef = message | enum | extend | service */
279/* v3: topLevelDef = message | enum | service */
280topLevelDef ::= message.
281topLevelDef ::= enum.
282topLevelDef ::= extend. /*v2 only */
283topLevelDef ::= service.
284
285/* v2/v3: message = "message" messageName messageBody */
286message(A) ::= PT_MESSAGE messageName(B) PT_LCURLY messageBody(C) PT_RCURLY.
287    { A = C; pbl_set_node_name(A, B->ln, B->v); }
288
289/* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */
290/* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */
291messageBody(A) ::= .  { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
292messageBody(A) ::= messageBody(B) field(C).  { A = B; pbl_add_child(A, C); }
293messageBody(A) ::= messageBody(B) enum(C).  { A = B; pbl_add_child(A, C); }
294messageBody(A) ::= messageBody(B) message(C).  { A = B; pbl_add_child(A, C); }
295messageBody ::= messageBody extend.  /* v2 only */
296messageBody ::= messageBody extensions. /* v2 only */
297messageBody(A) ::= messageBody(B) group(C). /* v2 only */  { A = B; pbl_add_child(A, C); }
298messageBody ::= messageBody option.
299messageBody(A) ::= messageBody(B) oneof(C).  { A = B; pbl_merge_children(A, C); pbl_free_node(C); }
300messageBody(A) ::= messageBody(B) mapField(C).  { A = B; pbl_add_child(A, C); }
301messageBody ::= messageBody reserved.
302messageBody ::= messageBody emptyStatement.
303
304/* v2/v3: enum = "enum" enumName enumBody */
305enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY.
306    { A = C; pbl_set_node_name(A, B->ln, B->v); }
307
308/* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */
309/* Offical PBL bugfix: enumBody = "{" { reserved | option | enumField | emptyStatement } "}" */
310enumBody(A) ::= .  { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); }
311enumBody ::= enumBody reserved.
312enumBody ::= enumBody option.
313enumBody(A) ::= enumBody(B) enumField(C).  { A = B; pbl_add_child(A, C); }
314enumBody ::= enumBody emptyStatement.
315
316/* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { ","  enumValueOption } "]" ]";" */
317enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C) PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON.
318    { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
319enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C).
320    { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
321
322/* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
323enumNumber(A) ::= intLit(B). { A = (int)B; }
324enumNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
325enumNumber(A) ::= PT_MINUS intLit(B). { A = -(int)B; }
326
327/* v2/v3: enumValueOption { ","  enumValueOption } */
328enumValueOptions ::= enumValueOption.
329enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption.
330
331/* v2/v3: enumValueOption = optionName "=" constant */
332/* Offical PBL bugfix: enumValueOption = optionName  "=" ( constant | customOptionValue ) ";" */
333enumValueOption ::= optionName PT_ASSIGN constant.
334enumValueOption ::= optionName PT_ASSIGN customOptionValue.
335
336/* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */
337/* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */
338service(A) ::= PT_SERVICE serviceName(B) PT_LCURLY serviceBody(C) PT_RCURLY.
339    { A = C; pbl_set_node_name(A, B->ln, B->v); }
340
341serviceBody(A) ::= .  { A = pbl_create_node(state->file, CUR_LINENO, PBL_SERVICE, NAME_TO_BE_SET); }
342serviceBody ::= serviceBody option.
343serviceBody(A) ::= serviceBody(B) rpc(C).  { A = B; pbl_add_child(A, C); }
344serviceBody ::= serviceBody emptyStatement.
345serviceBody(A) ::= serviceBody(B) stream(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
346
347/* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */
348rpc ::= rpcDecl PT_SEMICOLON.
349rpc ::= rpcDecl PT_LCURLY rpcBody PT_RCURLY.
350
351/* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */
352rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
353    { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, FALSE); }
354rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
355    { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, FALSE); }
356rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
357    { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, TRUE); }
358rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
359    { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
360
361rpcBody ::= .
362rpcBody ::= rpcBody option.
363rpcBody ::= rpcBody emptyStatement.
364
365/* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */
366stream ::= streamDecl PT_SEMICOLON.
367stream ::= streamDecl PT_LCURLY streamBody PT_RCURLY.
368
369/* v2 only */
370streamDecl(A) ::= PT_STREAM streamName(B) PT_LPAREN messageType(C) PT_COMMA messageType(D) PT_RPAREN.
371    { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
372
373/* v2 only */
374streamBody ::= .
375streamBody ::= streamBody option.
376streamBody ::= streamBody emptyStatement.
377
378/* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
379/* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
380field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
381    { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, NULL); }
382field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
383    { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, F); }
384field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
385    { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, NULL); }
386field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
387    { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, F); }
388
389/* v2: label = "required" | "optional" | "repeated" */
390label(A) ::= PT_REQUIRED(B).  { A = B->v; }
391label(A) ::= PT_OPTIONAL(B).  { A = B->v; }
392label(A) ::= PT_REPEATED(B).  { A = B->v; }
393
394/* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
395      | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
396      | "bool" | "string" | "bytes" | messageType | enumType
397*/
398type(A) ::= exIdent(B).  { A = B->v; }
399
400/* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
401fieldNumber(A) ::= intLit(B). { A = (int)B; }
402fieldNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
403
404/* v2/v3: fieldOptions = fieldOption { ","  fieldOption } */
405fieldOptions(A) ::= fieldOption(B).
406    { A = pbl_create_node(state->file, CUR_LINENO, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child(A, B); }
407fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C).
408    { A = B; pbl_add_child(A, C); }
409
410/* v2/v3: fieldOption = optionName "=" constant */
411/* Offical PBL bugfix: fieldOption = optionName  "=" ( constant | customOptionValue ) ";" */
412fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C).
413    { A = pbl_create_option_node(state->file, B->ln, B->v, C); }
414fieldOption(A) ::= optionName(B) PT_ASSIGN customOptionValue.
415    { A = pbl_create_option_node(state->file, B->ln, B->v, pbl_store_string_token(state, g_strdup("{ ... }"))); }
416
417/* v2 only: group = label "group" groupName "=" fieldNumber messageBody */
418/* Offical PBL bugfix: there is no label if the 'group' is a member of oneof body */
419group(A) ::= PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
420    { A = C; pbl_set_node_name(A, B->ln, B->v); }
421group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
422    { A = C; pbl_set_node_name(A, B->ln, B->v); }
423
424groupName ::= exIdent.
425
426/* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */
427/* Offical PBL bugfix: oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}" */
428oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY.
429    { A = C; pbl_set_node_name(A, B->ln, B->v); }
430
431oneofBody(A) ::= .  { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); }
432oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); }
433oneofBody ::= oneofBody option.
434oneofBody ::= oneofBody group.
435oneofBody ::= oneofBody emptyStatement.
436
437/* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
438oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_LBRACKET fieldOptions(E) PT_RBRACKET PT_SEMICOLON.
439    { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, E); }
440oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_SEMICOLON.
441    { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, NULL); }
442
443/* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
444mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
445    {
446        A = pbl_create_map_field_node(state->file, D->ln, D->v, E, F);
447        pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
448        pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
449    }
450mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
451    {
452        A = pbl_create_map_field_node(state->file, D->ln, D->v, E, NULL);
453        pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
454        pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
455    }
456
457/* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
458          "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */
459keyType(A) ::= exIdent(B). { A = B->v; }
460
461/* v2 only: extensions = "extensions" ranges ";" */
462extensions ::= PT_EXTENSIONS ranges PT_SEMICOLON.
463
464/* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */
465reserved ::= PT_RESERVED ranges PT_SEMICOLON.
466reserved ::= PT_RESERVED quoteFieldNames PT_SEMICOLON.
467
468/* v2/v3: ranges = range { "," range } */
469ranges ::= range.
470ranges ::= ranges PT_COMMA range.
471
472/* v2/v3: range =  intLit [ "to" ( intLit | "max" ) ] */
473range ::= intLit.
474range ::= intLit PT_TO intLit.
475range ::= intLit PT_TO exIdent.
476
477/* v2/v3: fieldNames = fieldName { "," fieldName }
478Note that there is an error in BNF definition about reserved fieldName. It's strLit, not ident.
479*/
480quoteFieldNames ::= strLit.
481quoteFieldNames ::= quoteFieldNames PT_COMMA strLit.
482
483/* v2/v3: extend = "extend" messageType "{" {field | group | emptyStatement} "}"
484Note that creating custom options uses extensions, which are permitted only for custom options in proto3.
485We don't use custom options while parsing packet, so we just ignore the 'extend'.
486*/
487extend(A) ::= PT_EXTEND messageType PT_LCURLY extendBody(B) PT_RCURLY.
488    { A = NULL; pbl_free_node(B); }
489
490extendBody(A) ::= .  { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
491extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); }
492extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); }
493extendBody ::= extendBody emptyStatement.
494
495messageName ::= exIdent.
496enumName ::= exIdent.
497streamName ::= exIdent.
498fieldName ::= exIdent.
499oneofName ::= exIdent.
500mapName ::= exIdent.
501serviceName ::= exIdent.
502rpcName ::= exIdent.
503
504/* messageType = [ "." ] { ident "." } messageName */
505messageType(A) ::= exIdent(B).  { A = B->v; }
506
507/* enumType = [ "." ] { ident "." } enumName */
508/*enumType ::= exIdent.*/
509
510/* intLit     = decimalLit | octalLit | hexLit */
511intLit(A) ::= PT_DECIMALLIT(B).  { A = g_ascii_strtoull(B->v, NULL, 10); }
512intLit(A) ::= PT_OCTALLIT(B).  { A = g_ascii_strtoull(B->v+1, NULL, 8); }
513intLit(A) ::= PT_HEXLIT(B).  { A = g_ascii_strtoull(B->v+2, NULL, 16); }
514
515/* emptyStatement = ";" */
516emptyStatement ::= PT_SEMICOLON.
517
518/* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
519constant(A) ::= exIdent(B).  { A = B->v; } /* boolLit is parsed as exIdent */
520constant ::= strLit.
521constant(A) ::= intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); }
522constant(A) ::= PT_PLUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); }
523constant(A) ::= PT_MINUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("-%" G_GUINT64_FORMAT, B)); }
524constant(A) ::= PT_PLUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("+", B->v, NULL)); } /* This cover floatLit. */
525constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("-", B->v, NULL)); }
526
527exIdent ::= PT_IDENT.
528
529strLit(A) ::= PT_STRLIT(B).  { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); }
530/* support one string being splitted into multi-lines */
531strLit(A) ::= strLit(B) PT_STRLIT(C). { gchar *v = g_strndup(C->v + 1, strlen(C->v) - 2); A = pbl_store_string_token(state, g_strconcat(B, v, NULL)); g_free(v); }
532
533%code {
534
535void
536protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg)
537{
538    int lineno;
539    void(*error_cb)(const char *format, ...);
540    const char* filepath = (state && state->file) ?
541                            state->file->filename : "UNKNOWN";
542
543    error_cb = (state && state->pool->error_cb) ?
544                state->pool->error_cb : pbl_printf;
545
546    lineno = yyscanner ? protobuf_lang_get_lineno(yyscanner) : -1;
547
548    if (lineno > -1) {
549        error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg);
550    } else {
551        error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg);
552    }
553}
554
555void
556pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...)
557{
558    char* msg;
559    void* scanner;
560    va_list ap;
561    va_start(ap, fmt);
562    msg = g_strdup_vprintf(fmt, ap);
563    scanner = state ? state->scanner : NULL;
564    protobuf_lang_error(scanner, state, msg);
565    va_end(ap);
566    g_free(msg);
567}
568
569static void
570pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool)
571{
572    if (state == NULL) {
573        return;
574    }
575
576    state->pool = NULL;
577    state->file = NULL;
578    state->grammar_error = FALSE;
579    state->tmp_token = NULL;
580
581    if (state->scanner) {
582        protobuf_lang_lex_destroy(state->scanner);
583        state->scanner = NULL;
584    }
585
586    if (state->pParser) {
587        ProtobufLangParserFree(state->pParser, g_free);
588        state->pParser = NULL;
589    }
590
591    if (state->lex_string_tokens) {
592        g_slist_free_full(state->lex_string_tokens, g_free);
593        state->lex_string_tokens = NULL;
594    }
595
596    if (state->lex_struct_tokens) {
597        g_slist_free_full(state->lex_struct_tokens, g_free);
598        state->lex_struct_tokens = NULL;
599    }
600
601    if (pool) {
602        pool->parser_state = NULL;
603    }
604}
605
606static void
607pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath)
608{
609    if (state == NULL) {
610        return;
611    }
612    pbl_clear_state(state, pool);
613
614    state->pool = pool;
615    state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath);
616    state->pParser = ProtobufLangParserAlloc(g_malloc);
617
618    if (pool) {
619        pool->parser_state = state;
620    }
621}
622
623int run_pbl_parser(pbl_descriptor_pool_t* pool)
624{
625    protobuf_lang_state_t state = {0};
626    yyscan_t scanner;
627    GSList* it;
628    FILE * fp;
629    int status = 0;
630    int token_id;
631    const char* filepath;
632
633    it = pool->proto_files_to_be_parsed;
634    while (it) {
635        filepath = (const char*) it->data;
636        /* reinit state and scanner */
637        pbl_reinit_state(&state, pool, filepath);
638        scanner = NULL;
639
640        /* Note that filepath is absolute path in proto_files */
641        fp = ws_fopen(filepath, "r");
642        if (fp == NULL) {
643            pbl_parser_error(&state, "File does not exists!");
644            status = -1;
645            goto finish;
646        }
647
648        status = protobuf_lang_lex_init(&scanner);
649        if (status != 0) {
650            pbl_parser_error(&state, "Initialize Protocol Buffers Language scanner failed!\n");
651            fclose(fp);
652            goto finish;
653        }
654
655        /* associate the parser state with the lexical analyzer state */
656        protobuf_lang_set_extra(&state, scanner);
657        state.scanner = scanner;
658
659        protobuf_lang_restart(fp, scanner);
660        /* uncomment the next line for debugging */
661        /* ProtobufLangParserTrace(stdout, ">>>"); */
662        while (!state.grammar_error && (token_id = protobuf_lang_lex(scanner))) {
663            /* state.tmp_token contains token string value and lineno information */
664            ProtobufLangParser(state.pParser, token_id, state.tmp_token, &state);
665        }
666        fclose(fp);
667
668        if (state.grammar_error) {
669            status = -2;
670            goto finish;
671        } else {
672            ProtobufLangParser(state.pParser, 0, NULL, &state);
673        }
674
675        /* remove the parsed file from list */
676        pool->proto_files_to_be_parsed = it = g_slist_delete_link(pool->proto_files_to_be_parsed, it);
677    }
678
679finish:
680    pbl_clear_state(&state, pool);
681    return status;
682}
683
684} /* end of %code block */
685