1%include { 2/* protobuf_lang_parser.lemon 3 * 4 * C Protocol Buffers Language (PBL) Parser (for *.proto files) 5 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com> 6 * 7 * SPDX-License-Identifier: GPL-2.0-or-later 8 */ 9 10/* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files. 11 * There are two formats of *.proto files: 12 * 1) Protocol Buffers Version 3 Language Specification: 13 * https://developers.google.com/protocol-buffers/docs/reference/proto3-spec 14 * 2) Protocol Buffers Version 2 Language Specification: 15 * https://developers.google.com/protocol-buffers/docs/reference/proto2-spec 16 * There are some errors about 'proto', 'option' (value) and 'reserved' (fieldName) definitions on that sites. 17 * This parser is created because Wireshark is mainly implemented in plain ANSI C but the offical 18 * Protocol Buffers Language parser is implemented in C++. 19 */ 20#include "config.h" 21#include <stdio.h> 22#include <stdlib.h> 23#include <string.h> 24#include <stdarg.h> 25#include <glib.h> 26#include <assert.h> 27#include <wsutil/file_util.h> 28#include "protobuf_lang_tree.h" 29#include "protobuf_lang_parser.h" 30#include "protobuf_lang_scanner_lex.h" 31 32#define NAME_TO_BE_SET "<NAME_TO_BE_SET>" 33#define NEED_NOT_NAME "<NEED_NOT_NAME>" 34 35static void *ProtobufLangParserAlloc(void *(*mallocProc)(size_t)); 36static void ProtobufLangParser(void *yyp, int yymajor, protobuf_lang_token_t *yyminor, protobuf_lang_state_t *state); 37static void ProtobufLangParserFree(void *p, void (*freeProc)(void*)); 38 39/* Error handling function for parser */ 40void protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg); 41 42/* Extended error handling function */ 43void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...); 44 45/* It's just the approximate line number which is gotten when a grammar rule is reduced 46 by the parser (lemon). That might be overridden by the lineno argument of 47 pbl_set_node_name() later. */ 48#define CUR_LINENO (protobuf_lang_get_lineno(state->scanner)) 49 50} /* end of %include */ 51 52%name ProtobufLangParser 53 54%extra_argument { protobuf_lang_state_t *state } 55 56%token_type { protobuf_lang_token_t* } 57 58%token_destructor { 59 /* We manage memory allocated for token values by ourself */ 60 (void) state; /* Mark unused, similar to Q_UNUSED */ 61 (void) $$; /* Mark unused, similar to Q_UNUSED */ 62} 63 64%syntax_error { 65 pbl_parser_error(state, "Syntax Error: unexpected token \"%s\"!", yyminor->v); 66 state->grammar_error = TRUE; 67} 68 69%parse_failure { 70 pbl_parser_error(state, "Parse Error"); 71 state->grammar_error = TRUE; 72} 73 74/* Keywords like 'syntax', 'message', etc can be used as the names of messages, fields or enums. 75 So we tell the lemon: "If you are unable to parse this keyword, try treating it as an identifier instead.*/ 76%fallback PT_IDENT PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL. 77%fallback PT_IDENT PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS. 78%fallback PT_IDENT PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO. 79 80%type strLit { gchar* } 81%type label { gchar* } 82%type type { gchar* } 83%type keyType { gchar* } 84%type messageType { gchar* } 85%type constant { gchar* } 86 87%type exIdent { protobuf_lang_token_t* } 88%type optionName { protobuf_lang_token_t* } 89%type messageName { protobuf_lang_token_t* } 90%type enumName { protobuf_lang_token_t* } 91%type streamName { protobuf_lang_token_t* } 92%type fieldName { protobuf_lang_token_t* } 93%type oneofName { protobuf_lang_token_t* } 94%type mapName { protobuf_lang_token_t* } 95%type serviceName { protobuf_lang_token_t* } 96%type rpcName { protobuf_lang_token_t* } 97%type groupName { protobuf_lang_token_t* } 98 99%type protoBody { pbl_node_t* } 100%type topLevelDef { pbl_node_t* } 101%type message { pbl_node_t* } 102%type messageBody { pbl_node_t* } 103%type rpc { pbl_node_t* } 104%type rpcDecl { pbl_node_t* } 105%type field { pbl_node_t* } 106%type oneofField { pbl_node_t* } 107%type enum { pbl_node_t* } 108%type enumBody { pbl_node_t* } 109%type enumField { pbl_node_t* } 110%type service { pbl_node_t* } 111%type serviceBody { pbl_node_t* } 112%type stream { pbl_node_t* } 113%type streamDecl { pbl_node_t* } 114%type fieldOptions { pbl_node_t* } 115%type fieldOption { pbl_node_t* } 116%type oneof { pbl_node_t* } 117%type oneofBody { pbl_node_t* } 118%type mapField { pbl_node_t* } 119%type group { pbl_node_t* } 120%type extend { pbl_node_t* } 121%type extendBody { pbl_node_t* } 122 123%type intLit { guint64 } 124 125%type fieldNumber { int } 126%type enumNumber { int } 127 128/* We don't care about the types of following nodes: 129syntax import package option enumValueOptions enumValueOption rpcBody streamBody 130extensions reserved ranges range quoteFieldNames emptyStatement 131*/ 132 133%start_symbol proto 134 135/* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */ 136/* Offical PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement } 137 The default syntax version is "proto2". */ 138proto ::= wholeProtoBody. 139proto ::= syntax wholeProtoBody. 140 141wholeProtoBody ::= protoBody(B). 142 { 143 /* set real package name */ 144 pbl_set_node_name(B, state->file->package_name_lineno, state->file->package_name); 145 /* use the allocate mem of the name of the package node */ 146 state->file->package_name = pbl_get_node_name(B); 147 /* put this file data into package tables */ 148 pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name); 149 if (packnode) { 150 pbl_merge_children(packnode, B); 151 pbl_free_node(B); 152 } else { 153 g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), B); 154 } 155 } 156 157/* v2: syntax = "syntax" "=" quote "proto2" quote ";" */ 158/* v3: syntax = "syntax" "=" quote "proto3" quote ";" */ 159syntax ::= PT_SYNTAX PT_ASSIGN strLit(B) PT_SEMICOLON. 160 { 161 if (!strcmp(B, "proto3")) { 162 state->file->syntax_version = 3; 163 } else if (!strcmp(B, "proto2")) { 164 state->file->syntax_version = 2; 165 } else { 166 pbl_parser_error(state, "Unrecognized syntax identifier [%s]. This parser only recognizes \"proto3\" or \"proto2\"!", B); 167 state->grammar_error = TRUE; 168 } 169 } 170 171protoBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */ 172protoBody ::= protoBody import. /* default action is {A = B; } */ 173protoBody ::= protoBody package. 174protoBody ::= protoBody option. 175protoBody(A) ::= protoBody(B) topLevelDef(C). { A = B; pbl_add_child(A, C); } 176protoBody ::= protoBody emptyStatement. 177 178/* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */ 179import ::= PT_IMPORT strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } /* append file to todo list */ 180import ::= PT_IMPORT PT_PUBLIC strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } 181import ::= PT_IMPORT PT_WEAK strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } 182 183/* v2/v3: package = "package" fullIdent ";" */ 184package ::= PT_PACKAGE exIdent(B) PT_SEMICOLON. 185 { /* The memory of (B) will be freed after parsing, but the package_name will 186 be replaced by the new-allocated name of package node late */ 187 state->file->package_name = B->v; 188 state->file->package_name_lineno = B->ln; 189 } 190 191/* v2/v3: option = "option" optionName "=" constant ";" */ 192/* Offical PBL bugfix: option = "option" optionName "=" ( constant | customOptionValue ) ";" */ 193option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON. 194option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON. 195 196/* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */ 197/* Offical PBL bugfix: optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) } */ 198extIdentInParentheses(A) ::= PT_LPAREN exIdent(B) PT_RPAREN. 199 { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); } 200optionName ::= exIdent. 201optionName ::= extIdentInParentheses. 202optionName(A) ::= optionName(B) exIdent(C). // Note that the exIdent contains "." 203 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, C->v, NULL)); } 204optionName(A) ::= optionName(B) PT_DOT extIdentInParentheses(C). 205 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); } 206optionName(A) ::= optionName(B) extIdentInParentheses(C). 207 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); } 208 209/* Allow format which not defined in offical PBL specification like: 210 option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" }; 211 option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" }; 212 option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" }; 213*/ 214customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY. 215 216/* The formal EBNF of customOptionBody seems to be */ 217/* 218customOptionBody ::= . 219customOptionBody ::= customOptionBody optionField. 220customOptionBody ::= customOptionBody PT_COMMA optionField. 221customOptionBody ::= customOptionBody PT_SEMICOLON optionField. 222 223optionField ::= optionName PT_COLON constant. 224optionField ::= optionName PT_COLON customOptionValue. 225optionField ::= optionName customOptionValue. 226optionField ::= optionName PT_COLON array. 227 228array ::= PT_LBRACKET arrayBody PT_RBRACKET. 229arrayBodyConst ::= constant. 230arrayBodyConst ::= arrayBody PT_COMMA constant. 231arrayBodyCustom ::= customOptionValue. 232arrayBodyCustom ::= arrayBody PT_COMMA customOptionValue. 233arrayBody ::= arrayBodyConst. 234arrayBody ::= arrayBodyCustom. 235*/ 236/* but for handling unexpected situations, we still use following EBNF */ 237customOptionBody ::= . 238customOptionBody ::= customOptionBody exIdent. 239customOptionBody ::= customOptionBody PT_STRLIT. 240customOptionBody ::= customOptionBody symbolsWithoutCurly. 241customOptionBody ::= customOptionBody intLit. 242customOptionBody ::= customOptionBody customOptionValue. 243 244symbolsWithoutCurly ::= PT_LPAREN. 245symbolsWithoutCurly ::= PT_RPAREN. 246symbolsWithoutCurly ::= PT_LBRACKET. 247symbolsWithoutCurly ::= PT_RBRACKET. 248symbolsWithoutCurly ::= PT_EQUAL. 249symbolsWithoutCurly ::= PT_NOTEQUAL. 250symbolsWithoutCurly ::= PT_NOTEQUAL2. 251symbolsWithoutCurly ::= PT_GEQUAL. 252symbolsWithoutCurly ::= PT_LEQUAL. 253symbolsWithoutCurly ::= PT_ASSIGN_PLUS. 254symbolsWithoutCurly ::= PT_ASSIGN. 255symbolsWithoutCurly ::= PT_PLUS. 256symbolsWithoutCurly ::= PT_MINUS. 257symbolsWithoutCurly ::= PT_MULTIPLY. 258symbolsWithoutCurly ::= PT_DIV. 259symbolsWithoutCurly ::= PT_LOGIC_OR. 260symbolsWithoutCurly ::= PT_OR. 261symbolsWithoutCurly ::= PT_LOGIC_AND. 262symbolsWithoutCurly ::= PT_AND. 263symbolsWithoutCurly ::= PT_NOT. 264symbolsWithoutCurly ::= PT_NEG. 265symbolsWithoutCurly ::= PT_XOR. 266symbolsWithoutCurly ::= PT_SHL. 267symbolsWithoutCurly ::= PT_SHR. 268symbolsWithoutCurly ::= PT_PERCENT. 269symbolsWithoutCurly ::= PT_DOLLAR. 270symbolsWithoutCurly ::= PT_COND. 271symbolsWithoutCurly ::= PT_SEMICOLON. 272symbolsWithoutCurly ::= PT_DOT. 273symbolsWithoutCurly ::= PT_COMMA. 274symbolsWithoutCurly ::= PT_COLON. 275symbolsWithoutCurly ::= PT_LESS. 276symbolsWithoutCurly ::= PT_GREATER. 277 278/* v2: topLevelDef = message | enum | extend | service */ 279/* v3: topLevelDef = message | enum | service */ 280topLevelDef ::= message. 281topLevelDef ::= enum. 282topLevelDef ::= extend. /*v2 only */ 283topLevelDef ::= service. 284 285/* v2/v3: message = "message" messageName messageBody */ 286message(A) ::= PT_MESSAGE messageName(B) PT_LCURLY messageBody(C) PT_RCURLY. 287 { A = C; pbl_set_node_name(A, B->ln, B->v); } 288 289/* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */ 290/* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */ 291messageBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); } 292messageBody(A) ::= messageBody(B) field(C). { A = B; pbl_add_child(A, C); } 293messageBody(A) ::= messageBody(B) enum(C). { A = B; pbl_add_child(A, C); } 294messageBody(A) ::= messageBody(B) message(C). { A = B; pbl_add_child(A, C); } 295messageBody ::= messageBody extend. /* v2 only */ 296messageBody ::= messageBody extensions. /* v2 only */ 297messageBody(A) ::= messageBody(B) group(C). /* v2 only */ { A = B; pbl_add_child(A, C); } 298messageBody ::= messageBody option. 299messageBody(A) ::= messageBody(B) oneof(C). { A = B; pbl_merge_children(A, C); pbl_free_node(C); } 300messageBody(A) ::= messageBody(B) mapField(C). { A = B; pbl_add_child(A, C); } 301messageBody ::= messageBody reserved. 302messageBody ::= messageBody emptyStatement. 303 304/* v2/v3: enum = "enum" enumName enumBody */ 305enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY. 306 { A = C; pbl_set_node_name(A, B->ln, B->v); } 307 308/* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */ 309/* Offical PBL bugfix: enumBody = "{" { reserved | option | enumField | emptyStatement } "}" */ 310enumBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); } 311enumBody ::= enumBody reserved. 312enumBody ::= enumBody option. 313enumBody(A) ::= enumBody(B) enumField(C). { A = B; pbl_add_child(A, C); } 314enumBody ::= enumBody emptyStatement. 315 316/* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */ 317enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C) PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON. 318 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); } 319enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C). 320 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); } 321 322/* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */ 323enumNumber(A) ::= intLit(B). { A = (int)B; } 324enumNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; } 325enumNumber(A) ::= PT_MINUS intLit(B). { A = -(int)B; } 326 327/* v2/v3: enumValueOption { "," enumValueOption } */ 328enumValueOptions ::= enumValueOption. 329enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption. 330 331/* v2/v3: enumValueOption = optionName "=" constant */ 332/* Offical PBL bugfix: enumValueOption = optionName "=" ( constant | customOptionValue ) ";" */ 333enumValueOption ::= optionName PT_ASSIGN constant. 334enumValueOption ::= optionName PT_ASSIGN customOptionValue. 335 336/* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */ 337/* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */ 338service(A) ::= PT_SERVICE serviceName(B) PT_LCURLY serviceBody(C) PT_RCURLY. 339 { A = C; pbl_set_node_name(A, B->ln, B->v); } 340 341serviceBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_SERVICE, NAME_TO_BE_SET); } 342serviceBody ::= serviceBody option. 343serviceBody(A) ::= serviceBody(B) rpc(C). { A = B; pbl_add_child(A, C); } 344serviceBody ::= serviceBody emptyStatement. 345serviceBody(A) ::= serviceBody(B) stream(C). /* v2 only */ { A = B; pbl_add_child(A, C); } 346 347/* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */ 348rpc ::= rpcDecl PT_SEMICOLON. 349rpc ::= rpcDecl PT_LCURLY rpcBody PT_RCURLY. 350 351/* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */ 352rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN. 353 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, FALSE); } 354rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN. 355 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, FALSE); } 356rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN. 357 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, TRUE); } 358rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN. 359 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); } 360 361rpcBody ::= . 362rpcBody ::= rpcBody option. 363rpcBody ::= rpcBody emptyStatement. 364 365/* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */ 366stream ::= streamDecl PT_SEMICOLON. 367stream ::= streamDecl PT_LCURLY streamBody PT_RCURLY. 368 369/* v2 only */ 370streamDecl(A) ::= PT_STREAM streamName(B) PT_LPAREN messageType(C) PT_COMMA messageType(D) PT_RPAREN. 371 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); } 372 373/* v2 only */ 374streamBody ::= . 375streamBody ::= streamBody option. 376streamBody ::= streamBody emptyStatement. 377 378/* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */ 379/* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */ 380field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON. 381 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, NULL); } 382field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON. 383 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, F); } 384field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON. 385 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, NULL); } 386field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON. 387 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, F); } 388 389/* v2: label = "required" | "optional" | "repeated" */ 390label(A) ::= PT_REQUIRED(B). { A = B->v; } 391label(A) ::= PT_OPTIONAL(B). { A = B->v; } 392label(A) ::= PT_REPEATED(B). { A = B->v; } 393 394/* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64" 395 | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" 396 | "bool" | "string" | "bytes" | messageType | enumType 397*/ 398type(A) ::= exIdent(B). { A = B->v; } 399 400/* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */ 401fieldNumber(A) ::= intLit(B). { A = (int)B; } 402fieldNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; } 403 404/* v2/v3: fieldOptions = fieldOption { "," fieldOption } */ 405fieldOptions(A) ::= fieldOption(B). 406 { A = pbl_create_node(state->file, CUR_LINENO, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child(A, B); } 407fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C). 408 { A = B; pbl_add_child(A, C); } 409 410/* v2/v3: fieldOption = optionName "=" constant */ 411/* Offical PBL bugfix: fieldOption = optionName "=" ( constant | customOptionValue ) ";" */ 412fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C). 413 { A = pbl_create_option_node(state->file, B->ln, B->v, C); } 414fieldOption(A) ::= optionName(B) PT_ASSIGN customOptionValue. 415 { A = pbl_create_option_node(state->file, B->ln, B->v, pbl_store_string_token(state, g_strdup("{ ... }"))); } 416 417/* v2 only: group = label "group" groupName "=" fieldNumber messageBody */ 418/* Offical PBL bugfix: there is no label if the 'group' is a member of oneof body */ 419group(A) ::= PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY. 420 { A = C; pbl_set_node_name(A, B->ln, B->v); } 421group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY. 422 { A = C; pbl_set_node_name(A, B->ln, B->v); } 423 424groupName ::= exIdent. 425 426/* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */ 427/* Offical PBL bugfix: oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}" */ 428oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY. 429 { A = C; pbl_set_node_name(A, B->ln, B->v); } 430 431oneofBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); } 432oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); } 433oneofBody ::= oneofBody option. 434oneofBody ::= oneofBody group. 435oneofBody ::= oneofBody emptyStatement. 436 437/* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */ 438oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_LBRACKET fieldOptions(E) PT_RBRACKET PT_SEMICOLON. 439 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, E); } 440oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_SEMICOLON. 441 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, NULL); } 442 443/* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */ 444mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON. 445 { 446 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, F); 447 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */ 448 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */ 449 } 450mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON. 451 { 452 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, NULL); 453 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */ 454 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */ 455 } 456 457/* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | 458 "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */ 459keyType(A) ::= exIdent(B). { A = B->v; } 460 461/* v2 only: extensions = "extensions" ranges ";" */ 462extensions ::= PT_EXTENSIONS ranges PT_SEMICOLON. 463 464/* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */ 465reserved ::= PT_RESERVED ranges PT_SEMICOLON. 466reserved ::= PT_RESERVED quoteFieldNames PT_SEMICOLON. 467 468/* v2/v3: ranges = range { "," range } */ 469ranges ::= range. 470ranges ::= ranges PT_COMMA range. 471 472/* v2/v3: range = intLit [ "to" ( intLit | "max" ) ] */ 473range ::= intLit. 474range ::= intLit PT_TO intLit. 475range ::= intLit PT_TO exIdent. 476 477/* v2/v3: fieldNames = fieldName { "," fieldName } 478Note that there is an error in BNF definition about reserved fieldName. It's strLit, not ident. 479*/ 480quoteFieldNames ::= strLit. 481quoteFieldNames ::= quoteFieldNames PT_COMMA strLit. 482 483/* v2/v3: extend = "extend" messageType "{" {field | group | emptyStatement} "}" 484Note that creating custom options uses extensions, which are permitted only for custom options in proto3. 485We don't use custom options while parsing packet, so we just ignore the 'extend'. 486*/ 487extend(A) ::= PT_EXTEND messageType PT_LCURLY extendBody(B) PT_RCURLY. 488 { A = NULL; pbl_free_node(B); } 489 490extendBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); } 491extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); } 492extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); } 493extendBody ::= extendBody emptyStatement. 494 495messageName ::= exIdent. 496enumName ::= exIdent. 497streamName ::= exIdent. 498fieldName ::= exIdent. 499oneofName ::= exIdent. 500mapName ::= exIdent. 501serviceName ::= exIdent. 502rpcName ::= exIdent. 503 504/* messageType = [ "." ] { ident "." } messageName */ 505messageType(A) ::= exIdent(B). { A = B->v; } 506 507/* enumType = [ "." ] { ident "." } enumName */ 508/*enumType ::= exIdent.*/ 509 510/* intLit = decimalLit | octalLit | hexLit */ 511intLit(A) ::= PT_DECIMALLIT(B). { A = g_ascii_strtoull(B->v, NULL, 10); } 512intLit(A) ::= PT_OCTALLIT(B). { A = g_ascii_strtoull(B->v+1, NULL, 8); } 513intLit(A) ::= PT_HEXLIT(B). { A = g_ascii_strtoull(B->v+2, NULL, 16); } 514 515/* emptyStatement = ";" */ 516emptyStatement ::= PT_SEMICOLON. 517 518/* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */ 519constant(A) ::= exIdent(B). { A = B->v; } /* boolLit is parsed as exIdent */ 520constant ::= strLit. 521constant(A) ::= intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); } 522constant(A) ::= PT_PLUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("%" G_GUINT64_FORMAT, B)); } 523constant(A) ::= PT_MINUS intLit(B). { A = pbl_store_string_token(state, g_strdup_printf("-%" G_GUINT64_FORMAT, B)); } 524constant(A) ::= PT_PLUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("+", B->v, NULL)); } /* This cover floatLit. */ 525constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("-", B->v, NULL)); } 526 527exIdent ::= PT_IDENT. 528 529strLit(A) ::= PT_STRLIT(B). { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); } 530/* support one string being splitted into multi-lines */ 531strLit(A) ::= strLit(B) PT_STRLIT(C). { gchar *v = g_strndup(C->v + 1, strlen(C->v) - 2); A = pbl_store_string_token(state, g_strconcat(B, v, NULL)); g_free(v); } 532 533%code { 534 535void 536protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg) 537{ 538 int lineno; 539 void(*error_cb)(const char *format, ...); 540 const char* filepath = (state && state->file) ? 541 state->file->filename : "UNKNOWN"; 542 543 error_cb = (state && state->pool->error_cb) ? 544 state->pool->error_cb : pbl_printf; 545 546 lineno = yyscanner ? protobuf_lang_get_lineno(yyscanner) : -1; 547 548 if (lineno > -1) { 549 error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg); 550 } else { 551 error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg); 552 } 553} 554 555void 556pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...) 557{ 558 char* msg; 559 void* scanner; 560 va_list ap; 561 va_start(ap, fmt); 562 msg = g_strdup_vprintf(fmt, ap); 563 scanner = state ? state->scanner : NULL; 564 protobuf_lang_error(scanner, state, msg); 565 va_end(ap); 566 g_free(msg); 567} 568 569static void 570pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool) 571{ 572 if (state == NULL) { 573 return; 574 } 575 576 state->pool = NULL; 577 state->file = NULL; 578 state->grammar_error = FALSE; 579 state->tmp_token = NULL; 580 581 if (state->scanner) { 582 protobuf_lang_lex_destroy(state->scanner); 583 state->scanner = NULL; 584 } 585 586 if (state->pParser) { 587 ProtobufLangParserFree(state->pParser, g_free); 588 state->pParser = NULL; 589 } 590 591 if (state->lex_string_tokens) { 592 g_slist_free_full(state->lex_string_tokens, g_free); 593 state->lex_string_tokens = NULL; 594 } 595 596 if (state->lex_struct_tokens) { 597 g_slist_free_full(state->lex_struct_tokens, g_free); 598 state->lex_struct_tokens = NULL; 599 } 600 601 if (pool) { 602 pool->parser_state = NULL; 603 } 604} 605 606static void 607pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath) 608{ 609 if (state == NULL) { 610 return; 611 } 612 pbl_clear_state(state, pool); 613 614 state->pool = pool; 615 state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath); 616 state->pParser = ProtobufLangParserAlloc(g_malloc); 617 618 if (pool) { 619 pool->parser_state = state; 620 } 621} 622 623int run_pbl_parser(pbl_descriptor_pool_t* pool) 624{ 625 protobuf_lang_state_t state = {0}; 626 yyscan_t scanner; 627 GSList* it; 628 FILE * fp; 629 int status = 0; 630 int token_id; 631 const char* filepath; 632 633 it = pool->proto_files_to_be_parsed; 634 while (it) { 635 filepath = (const char*) it->data; 636 /* reinit state and scanner */ 637 pbl_reinit_state(&state, pool, filepath); 638 scanner = NULL; 639 640 /* Note that filepath is absolute path in proto_files */ 641 fp = ws_fopen(filepath, "r"); 642 if (fp == NULL) { 643 pbl_parser_error(&state, "File does not exists!"); 644 status = -1; 645 goto finish; 646 } 647 648 status = protobuf_lang_lex_init(&scanner); 649 if (status != 0) { 650 pbl_parser_error(&state, "Initialize Protocol Buffers Language scanner failed!\n"); 651 fclose(fp); 652 goto finish; 653 } 654 655 /* associate the parser state with the lexical analyzer state */ 656 protobuf_lang_set_extra(&state, scanner); 657 state.scanner = scanner; 658 659 protobuf_lang_restart(fp, scanner); 660 /* uncomment the next line for debugging */ 661 /* ProtobufLangParserTrace(stdout, ">>>"); */ 662 while (!state.grammar_error && (token_id = protobuf_lang_lex(scanner))) { 663 /* state.tmp_token contains token string value and lineno information */ 664 ProtobufLangParser(state.pParser, token_id, state.tmp_token, &state); 665 } 666 fclose(fp); 667 668 if (state.grammar_error) { 669 status = -2; 670 goto finish; 671 } else { 672 ProtobufLangParser(state.pParser, 0, NULL, &state); 673 } 674 675 /* remove the parsed file from list */ 676 pool->proto_files_to_be_parsed = it = g_slist_delete_link(pool->proto_files_to_be_parsed, it); 677 } 678 679finish: 680 pbl_clear_state(&state, pool); 681 return status; 682} 683 684} /* end of %code block */ 685