1 /* Copyright (C) 2005-2008,2011 G.P. Halkes
2 This program is free software: you can redistribute it and/or modify
3 it under the terms of the GNU General Public License version 3, as
4 published by the Free Software Foundation.
5
6 This program is distributed in the hope that it will be useful,
7 but WITHOUT ANY WARRANTY; without even the implied warranty of
8 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 GNU General Public License for more details.
10
11 You should have received a copy of the GNU General Public License
12 along with this program. If not, see <http://www.gnu.org/licenses/>.
13 */
14
15 #include <string.h>
16 #include <ctype.h>
17 #include <stdlib.h>
18
19 #include "generate.h"
20 #include "nonRuleAnalysis.h"
21 #include "bool.h"
22 #include "list.h"
23 #include "option.h"
24 #include "globals.h"
25 #include "io.h"
26
27 /** Construct the name of an output file, from the name of an input file.
28 @param originalName The name of the input file.
29 @param extension The extension of the output file.
30
31 The file constructed will be the originalName with a trailing '.g'
32 removed, after which the extension is added.
33 */
createNameWithExtension(const char * originalName,const char * extension)34 char *createNameWithExtension(const char *originalName, const char *extension) {
35 bool addExtension;
36 char *fullName;
37 size_t length, extensionLength;
38
39 extensionLength = strlen(extension) + 1;
40
41 length = strlen(originalName);
42
43 /* Check for '.g' */
44 addExtension = !(length >= 2 && originalName[length - 1] == 'g' && originalName[length - 2] == '.');
45 length += extensionLength;
46
47 if (!addExtension)
48 length -= 2;
49
50 /* Add 1 for the 0 byte */
51 fullName = (char *) safeMalloc(length + 1, "openOutputs");
52 strcpy(fullName, originalName);
53
54 if (addExtension)
55 fullName[length - extensionLength] = '.';
56
57 strcpy(fullName + length - extensionLength + 1, extension);
58 return fullName;
59 }
60
61 /** Write a code fragment that was read from the input to the output.
62 @param output The @a File to write to.
63 @param code The code to write.
64 @param keepMarkers Boolean to indicate whether to keep the parentheses or
65 braces that delimited the input.
66 */
outputCode(File * output,CCode * code,bool keepMarkers)67 void outputCode(File *output, CCode *code, bool keepMarkers) {
68 tfprintDirective(output, code);
69
70 if (keepMarkers) {
71 tfputs(output, code->text);
72 } else {
73 size_t length;
74 length = strlen(code->text);
75
76 tfputsn(output, code->text + 1, length - 2);
77 }
78 tfputc(output, '\n');
79
80 tfprintDirective(output, NULL);
81 }
82
83 /*================== Code to generate the header file ========================*/
84 /** Write the #define's for the tokens.
85 @param output The @a File to write to.
86 */
generateTokenDeclarations(File * output)87 static void generateTokenDeclarations(File *output) {
88 int i;
89 Declaration *declaration;
90 Directive *directive;
91
92 for (i = 0; i < listSize(declarations); i++) {
93 declaration = (Declaration *) listIndex(declarations, i);
94 if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == TOKEN_DIRECTIVE) {
95 directive = declaration->uDirective;
96 tfprintf(output, "#define %s %d\n", directive->token[0]->text, directive->uNumber);
97 }
98 }
99 }
100
generateParserHeader(File * output,Directive * directive)101 void generateParserHeader(File *output, Directive *directive) {
102 Declaration *startRule;
103
104 tfprintf(output, "%s %s(", option.abort ? "int" : "void", directive->token[0]->text);
105
106 startRule = (Declaration *) lookup(globalScope, directive->token[1]->text);
107 ASSERT(startRule != NULL);
108
109 if (option.threadSafe) {
110 tfprintf(output, "%s LLuserData", userDataTypeTS);
111 if (startRule->uNonTerminal->retvalIdent != NULL)
112 tfprintf(output, ", %s *LLretval", startRule->uNonTerminal->retvalIdent->text);
113 } else if (startRule->uNonTerminal->retvalIdent != NULL) {
114 tfprintf(output, "%s *LLretval", startRule->uNonTerminal->retvalIdent->text);
115 } else {
116 tfputs(output, "void");
117 }
118
119 tfputs(output, ")");
120 }
121
122 /** Write a prototype for each parser.
123 @param output The @a File to write to.
124 */
generateParserDeclarations(File * output)125 void generateParserDeclarations(File *output) {
126 int i;
127 Declaration *declaration;
128 Directive *directive;
129
130 for (i = 0; i < listSize(declarations); i++) {
131 declaration = (Declaration *) listIndex(declarations, i);
132 if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == START_DIRECTIVE) {
133 directive = declaration->uDirective;
134 generateParserHeader(output, directive);
135 tfputs(output, ";\n");
136 }
137 }
138 }
139
140 /** Write the top of a header file.
141 @param output The @a File to write to.
142 */
generateHeaderFileTop(File * output)143 void generateHeaderFileTop(File *output) {
144 /* Include guard. */
145 tfprintf(output, "#ifndef __LLNEXTGEN_%s_H__\n#define __LLNEXTGEN_%s_H__\n", prefix, prefix);
146
147 tfputs(output, "#ifndef LL_NOTOKENS\n");
148 tfputs(output, "#define EOFILE 256\n");
149 generateTokenDeclarations(output);
150 tfputs(output, "#endif\n");
151 /* Add some defines in the way that the prefix directive has effect. */
152 tfprintf(output, "#define %s_MAXTOKNO %d\n", prefix, maxTokenNumber - 1);
153 tfprintf(output, "#define %s_MISSINGEOF (-1)\n", prefix);
154 tfprintf(output, "#define %s_DELETE (%d)\n", prefix, option.noEOFZero ? -2 : 0);
155 tfprintf(output, "#define %s_VERSION " VERSION_HEX "\n", prefix);
156 if (!option.noLLreissue)
157 tfprintf(output, "#define %s_NEW_TOKEN (-2)\n", prefix);
158 }
159 /** Write the header file.
160 @param output The @a File to write to.
161 */
generateHeaderFile(File * output)162 void generateHeaderFile(File *output) {
163 generateHeaderFileTop(output);
164
165 /* LLgen does not declare the parsing routines in the header file,
166 but we should. This makes tidy programming so much easer. */
167 if (!option.noPrototypesHeader) {
168 generateParserDeclarations(output);
169 if (option.generateSymbolTable)
170 tfprintf(output, "const char *%sgetSymbol(int);\n", prefix);
171 if (option.abort)
172 tfprintf(output, "void %sabort(int);\n", prefix);
173 tfprintf(output, "extern int %ssymb;\n", prefix);
174 if (!option.noLLreissue && !option.generateLexerWrapper)
175 tfprintf(output, "extern int %sreissue;\n", prefix);
176 }
177
178 /* Include guard. */
179 tfprintf(output, "#endif\n");
180 }
181
182 /*=============== Code to generate the first part of a C file ================*/
183 /** Write the set of translations from LL to <prefix> variants.
184 @param output The @a File to write to.
185 */
generateDefaultPrefixTranslations(File * output)186 void generateDefaultPrefixTranslations(File *output) {
187 /* Add some defines so that the prefix directive has effect. Note
188 that only externally visible symbols need to be renamed. */
189 tfprintf(output, "#define LLsymb %ssymb\n", prefix);
190 tfprintf(output, "#define LLmessage %smessage\n", prefix);
191 if (!option.noLLreissue && !option.generateLexerWrapper)
192 tfprintf(output, "#define LLreissue %sreissue\n", prefix);
193 if (option.abort)
194 tfprintf(output, "#define LLabort %sabort\n", prefix);
195 if (option.generateSymbolTable)
196 tfprintf(output, "#define LLgetSymbol %sgetSymbol\n", prefix);
197 /* We want these available under their LL names in the C file regardless */
198 tfputs(output, "#define LL_MISSINGEOF (-1)\n");
199 tfprintf(output, "#define LL_DELETE (%d)\n", option.noEOFZero ? -2 : 0);
200 tfputs(output, "#define LL_VERSION " VERSION_HEX "\n");
201 if (!option.noLLreissue)
202 tfputs(output, "#define LL_NEW_TOKEN (-2)\n");
203 }
204
205 /** Write the set of declarations and #define's common to all .c files.
206 @param output The @a File to write to.
207 @param storageType The storage type ('static', 'extern' or nothing) for
208 the variables.
209
210 Most #define's are simply there to allow use of LL<name> instead of
211 <prefix><name>. In the case that %prefix isn't used, they are omitted.
212 Further, #define's are generated for the number of tokens and sets, and
213 some utility macro's. The variables LLscnt, LLtcnt and LLcsymb are also
214 declared.
215 */
generateDefaultDeclarations(File * output,const char * storageType)216 static void generateDefaultDeclarations(File *output, const char *storageType) {
217 size_t setSize;
218 if (prefixDirective != NULL) {
219 generateDefaultPrefixTranslations(output);
220 if (option.LLgenStyleOutputs) {
221 /* In the case of LLgen style outputs, we need to do renaming of
222 these as well, as these symbols are not static as they are for
223 LLnextgen style outputs. */
224 tfprintf(output, "#define LLread %sread\n", prefix);
225 tfprintf(output, "#define LLskip %sskip\n", prefix);
226 tfprintf(output, "#define LLfirst %sfirst\n", prefix);
227 tfprintf(output, "#define LLerror %serror\n", prefix);
228 tfprintf(output, "#define LLsets %ssets\n", prefix);
229 tfprintf(output, "#define LLindex %sindex\n", prefix);
230 tfprintf(output, "#define LLcsymb %scsymb\n", prefix);
231 tfprintf(output, "#define LLscnt %sscnt\n", prefix);
232 tfprintf(output, "#define LLtcnt %stcnt\n", prefix);
233 }
234 }
235
236 /* Define the number of tokens and the sets */
237 tfprintf(output, "#define LL_NTERMINALS %d\n", condensedNumber);
238 /* The number of sets is the number bytes in a set. Because we start from
239 int's, we round up the needed number of bytes to a multiple of the size
240 of an int. This makes it easier to generate the sets. */
241 setSize = ((condensedNumber + sizeof(int) * 8 - 1) / (sizeof(int)*8)) * sizeof(int);
242 tfprintf(output, "#define LL_NSETS %d\n#define LL_SSETS %u\n", numberOfSets, (unsigned int) setSize);
243 /* Define LLinset macro. Note that any decent compiler will
244 replace the division with a shift */
245 tfprintf(output, "#define LLinset(LLx) (LLsets[LLx*LL_SSETS + (LLcsymb/8)] & (1<<(LLcsymb & 7)))\n");
246 tfputs(output, "#define LL_SCANDONE(LLx) if (LLsymb != LLx) LLerror(LLx);\n");
247
248 /* For reentrant scanners, we don't use the global variables as arrays,
249 but as pointers to stack arrays. This saves us global memory space and
250 memory copying. */
251 if (!option.reentrant)
252 /* Add one the the number of sets, to make sure we don't end up with an
253 array of size 0. */
254 tfprintf(output, "%sint LLscnt[LL_NSETS+1], LLtcnt[LL_NTERMINALS];\n", storageType);
255 else
256 tfprintf(output, "%sint *LLscnt, *LLtcnt;\n", storageType);
257
258 tfprintf(output, "%sint LLcsymb;\n", storageType);
259 }
260
261 /** Write the header of an LLgen style .c file.
262 @param output The @a File to write to.
263 @param headerName The name of the header file to include.
264
265 These declarations are only necessary for LLgen style .c files. The
266 LLnextgen style .c file includes the definitions, so these declarations
267 are superfluous.
268 */
generateDefaultLocalDeclarations(File * output,const char * headerName)269 void generateDefaultLocalDeclarations(File *output, const char *headerName) {
270 /* Include our header file */
271 tfprintf(output, "#include \"%s\"\n", headerName);
272 generateDefaultDeclarations(output, STORAGE_EXTERN);
273
274 /* LLsets and LLindex are defined here different from the rest because
275 they need to be initialised when the storage class is not extern */
276 tfputs(output, "extern const char LLsets[];\n");
277 tfputs(output, "extern const int LLindex[];\n");
278 /* LLsymb is defined here because it needs to be extern or global, and
279 never static (not even for LLnextgen outputs). This is because a
280 user can supply a LLmessage routine outside of the generated files and
281 in LLmessage the user needs LLsymb. */
282 tfputs(output, "extern int LLsymb;\n");
283 /* For LLreissue a similar reasoning holds as for LLsymb, only based on
284 the lexer(-wrapper). */
285 if (!option.noLLreissue && !option.generateLexerWrapper)
286 tfprintf(output, "extern int LLreissue;\n");
287
288 /* These functions are here because we don't need these headers at all
289 when generating LLnextgen style outputs. */
290 tfputs(output, "void LLread(void);\n");
291 tfputs(output, "int LLskip(void);\n");
292 if (hasFirst)
293 tfputs(output, "int LLfirst(int LLtoken, int LLset);\n");
294 tfputs(output, "void LLerror(int LLtoken);\n");
295 }
296
297 /** Write code for a directive.
298 @param output The @a File to write to.
299 @param directive The directive to write code for.
300
301 At this point the only directive that needs code written here is the
302 %first directive. The other directives apart from %start are integrated
303 in the code. The %start directive is not generated with this routine, as
304 for LLgen we need to generate the parsers only in Lpars.c and not for each
305 .c file.
306 */
generateDirectiveDeclaration(File * output,Directive * directive)307 static void generateDirectiveDeclaration(File *output, Directive *directive) {
308 switch (directive->subtype) {
309 case START_DIRECTIVE:
310 case TOKEN_DIRECTIVE:
311 case LEXICAL_DIRECTIVE:
312 case PREFIX_DIRECTIVE:
313 case LABEL_DIRECTIVE:
314 case DATATYPE_DIRECTIVE:
315 /* Generated elsewhere */
316 break;
317 case FIRST_DIRECTIVE: {
318 Declaration *declaration;
319 NonTerminal *nonTerminal;
320 int fill;
321
322 declaration = (Declaration *) lookup(globalScope, directive->token[1]->text);
323 ASSERT(declaration != NULL && declaration->subtype == NONTERMINAL);
324 nonTerminal = declaration->uNonTerminal;
325
326 fill = setFill(nonTerminal->term.first);
327 if (fill > 1) {
328 tfprintf(output, "#define %s(LLx) LLfirst(LLx, %d)\n", directive->token[0]->text, setFindIndex(nonTerminal->term.first, false));
329 } else if (fill == 1) {
330 int token = setFirstToken(nonTerminal->term.first);
331 if (condensedToTerminal[token].flags & CONDENSED_ISTOKEN)
332 token = condensedToTerminal[token].uToken->uNumber;
333 else
334 token = condensedToTerminal[token].uLiteral;
335 tfprintf(output, "#define %s(LLx) (LLx == %d)\n", directive->token[0]->text, token);
336 } else {
337 tfprintf(output, "#define %s(LLx) 0\n", directive->token[0]->text);
338 }
339 break;
340 }
341 case ONERROR_DIRECTIVE:
342 /* NOTE: not supported yet */
343 break;
344 default:
345 PANIC();
346 }
347
348 }
349
350 /** Write code for all directives.
351 @param output The @a File to write to.
352 */
generateDirectiveDeclarations(File * output)353 void generateDirectiveDeclarations(File *output) {
354 int i;
355 Declaration *declaration;
356
357 for (i = 0; i < listSize(declarations); i++) {
358 declaration = (Declaration *) listIndex(declarations, i);
359 if (declaration->subtype == DIRECTIVE)
360 generateDirectiveDeclaration(output, declaration->uDirective);
361 }
362 }
363
364 /** Write the name (and return type) of a rule.
365 @param output The @a File to write to.
366 @param nonTerminal The rule to write the name for.
367 */
generateNonTerminalName(File * output,NonTerminal * nonTerminal)368 static void generateNonTerminalName(File *output, NonTerminal *nonTerminal) {
369 if (!option.LLgenStyleOutputs)
370 tfputs(output, "static ");
371 tfprintf(output, "%s %s%d_%s", nonTerminal->retvalIdent == NULL ? "void" : nonTerminal->retvalIdent->text, prefix, nonTerminal->number, nonTerminal->token->text);
372 }
373
374 /** Write the parameters of a rule.
375 @param output The @a File to write to.
376 @param nonTerminal The rule to write the parameters for.
377 */
generateNonTerminalParameters(File * output,NonTerminal * nonTerminal)378 static void generateNonTerminalParameters(File *output, NonTerminal *nonTerminal) {
379 if (nonTerminal->parameters != NULL) {
380 tfputc(output, '\n');
381 outputCode(output, nonTerminal->parameters, true);
382 } else {
383 tfprintf(output, "(void)");
384 }
385 }
386
387 /** Write the prototype for a rule.
388 @param output The @a File to write to.
389 @param nonTerminal The rule to write the prototype for.
390 */
generateNonTerminalDeclaration(File * output,NonTerminal * nonTerminal)391 void generateNonTerminalDeclaration(File *output, NonTerminal *nonTerminal) {
392 /* Don't generate code for unreachable non-terminals */
393 if (!(nonTerminal->flags & NONTERMINAL_REACHABLE))
394 return;
395 /* Generate declarations for non-terminals. LLgen is minimalistic in the
396 declarations it generates, but there is not really a reason to do so. */
397 generateNonTerminalName(output, nonTerminal);
398 if (!option.threadSafe)
399 generateNonTerminalParameters(output, nonTerminal);
400 else
401 generateNonTerminalParametersTS(output, nonTerminal);
402 tfprintf(output, ";\n");
403 }
404
405 /** Write the prototypes for all the rules.
406 @param output The @a File to write to.
407 */
generatePrototypes(File * output)408 void generatePrototypes(File *output) {
409 int i;
410 Declaration *declaration;
411
412 /* Note: unfortunately this can't be done with walkRules, because
413 generateNonTerminalDeclaration needs the file to output to */
414 for (i = 0; i < listSize(declarations); i++) {
415 declaration = (Declaration *) listIndex(declarations, i);
416 if (declaration->subtype == NONTERMINAL)
417 generateNonTerminalDeclaration(output, declaration->uNonTerminal);
418 }
419 }
420
421
422 /*=============== Code to generate the first part of LLnextgen C file or Lpars.c ================*/
423 static const char *defaultSymbolTable[] = { "\"<EOF>\"", /* For converting -1 */
424 /* NOTE: the EOF name for -1 includes the quotes, to make it easier to write
425 the code for overriding the default name. */
426 "<EOF>", "<SOH>", "<STX>", "<ETX>", "<EOT>", "<ENQ>", "<ACK>", "<BEL>",
427 "<BS>", "<TAB>", "<NL>", "<VT>", "<FF>", "<CR>", "<SO>", "<SI>",
428 "<DLE>", "<DC1>", "<DC2>", "<DC3>", "<DC4>", "<NAK>", "<SYN>", "<ETB>",
429 "<CAN>", "<EM>", "<SUB>", "<ESC>", "<FS>", "<GS>", "<RS>", "<US>",
430 "<SP>", "'!'", "'\\\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
431 "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
432 "'0'", "'1'", "'2'", "'3'", "'4'", "'5'","'6'", "'7'",
433 "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
434 "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
435 "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
436 "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
437 "'X'", "'Y'", "'Z'", "'['", "'\\\\'", "']'", "'^'", "'_'",
438 "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
439 "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
440 "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
441 "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "<DEL>",
442 "'\\x80'", "'\\x81'", "'\\x82'", "'\\x83'", "'\\x84'", "'\\x85'", "'\\x86'", "'\\x87'",
443 "'\\x88'", "'\\x89'", "'\\x8A'", "'\\x8B'", "'\\x8C'", "'\\x8D'", "'\\x8E'", "'\\x8F'",
444 "'\\x90'", "'\\x91'", "'\\x92'", "'\\x93'", "'\\x94'", "'\\x95'", "'\\x96'", "'\\x97'",
445 "'\\x98'", "'\\x99'", "'\\x9A'", "'\\x9B'", "'\\x9C'", "'\\x9D'", "'\\x9E'", "'\\x9F'",
446 "'\\xA0'", "'\\xA1'", "'\\xA2'", "'\\xA3'", "'\\xA4'", "'\\xA5'", "'\\xA6'", "'\\xA7'",
447 "'\\xA8'", "'\\xA9'", "'\\xAA'", "'\\xAB'", "'\\xAC'", "'\\xAD'", "'\\xAE'", "'\\xAF'",
448 "'\\xB0'", "'\\xB1'", "'\\xB2'", "'\\xB3'", "'\\xB4'", "'\\xB5'", "'\\xB6'", "'\\xB7'",
449 "'\\xB8'", "'\\xB9'", "'\\xBA'", "'\\xBB'", "'\\xBC'", "'\\xBD'", "'\\xBE'", "'\\xBF'",
450 "'\\xC0'", "'\\xC1'", "'\\xC2'", "'\\xC3'", "'\\xC4'", "'\\xC5'", "'\\xC6'", "'\\xC7'",
451 "'\\xC8'", "'\\xC9'", "'\\xCA'", "'\\xCB'", "'\\xCC'", "'\\xCD'", "'\\xCE'", "'\\xCF'",
452 "'\\xD0'", "'\\xD1'", "'\\xD2'", "'\\xD3'", "'\\xD4'", "'\\xD5'", "'\\xD6'", "'\\xD7'",
453 "'\\xD8'", "'\\xD9'", "'\\xDA'", "'\\xDB'", "'\\xDC'", "'\\xDD'", "'\\xDE'", "'\\xDF'",
454 "'\\xE0'", "'\\xE1'", "'\\xE2'", "'\\xE3'", "'\\xE4'", "'\\xE5'", "'\\xE6'", "'\\xE7'",
455 "'\\xE8'", "'\\xE9'", "'\\xEA'", "'\\xEB'", "'\\xEC'", "'\\xED'", "'\\xEE'", "'\\xEF'",
456 "'\\xF0'", "'\\xF1'", "'\\xF2'", "'\\xF3'", "'\\xF4'", "'\\xF5'", "'\\xF6'", "'\\xF7'",
457 "'\\xF8'", "'\\xF9'", "'\\xFA'", "'\\xFB'", "'\\xFC'", "'\\xFD'", "'\\xFE'", "'\\xFF'" };
458
459 /** Header for regualar lexer wrapper. */
460 static const char *lexerWrapperHeader = "int LLlexerWrapper(void) {\n";
461
462 /** The code for the lexer wrapper, with a %s in it as a place holder for the
463 real lexer. */
464 const char *lexerWrapperString =
465 "\tif (LLreissue == %s) {\n"
466 "\t\treturn %s(%s);\n"
467 "\t} else {\n"
468 "\t\tint LLretval = LLreissue;\n"
469 "\t\tLLreissue = %s;\n"
470 "\t\treturn LLretval;\n"
471 "\t}\n"
472 "}\n";
473
474 /** Header for regular llmessage. */
475 static const char *llmessageHeader = "void LLmessage(int LLtoken) {\n";
476
477 /* Note: this string is too long to be one string (at least according to the
478 ANSI C standard). That is why it is an array of two strings. */
479 /** The code for the default LLmessage.
480
481 NOTE: DON'T use printf to write this string. It contains fprintf
482 statements with format strings that are not escaped.
483 */
484 const char *llmessageString[2] = {
485 "\tswitch (LLtoken) {\n"
486 "\t\tcase LL_MISSINGEOF:\n"
487 "\t\t\tfprintf(stderr, \"Expected %s, found %s. Skipping.\\n\", LLgetSymbol(EOFILE), LLgetSymbol(LLsymb));\n"
488 "\t\t\tbreak;\n", /* Note the comma, to break it into two strings. */
489 "\t\tcase LL_DELETE:\n"
490 "\t\t\tfprintf(stderr, \"Skipping unexpected %s.\\n\", LLgetSymbol(LLsymb));\n"
491 "\t\t\tbreak;\n"
492 "\t\tdefault:\n"
493 "\t\t\tfprintf(stderr, \"Expected %s, found %s. Inserting.\\n\", LLgetSymbol(LLtoken), LLgetSymbol(LLsymb));\n"
494 "\t\t\tbreak;\n"
495 "\t}\n"
496 "}\n"};
497
498 /** Write the conversion tables used in the parser to file.
499 @param output The @a File to write to.
500 @param storageType The storage type ('static', 'extern' or nothing) for
501 the variables.
502
503 These tables can be used both for thread-safe and regular parsers, as no
504 changes to them are made.
505 */
generateConversionTables(File * output,const char * storageType)506 void generateConversionTables(File *output, const char *storageType) {
507 size_t j, k, setSize;
508 int i;
509 /* Generate the table with token sets. To allow generating code on a
510 machine with for example 4 byte words for a machine with 2 byte words
511 we use char's here instead of int's. */
512 setSize = (condensedNumber + sizeof(int) * 8 - 1) / (sizeof(int)*8);
513 tfprintf(output, "%sconst char LLsets[] = {\n", storageType);
514 for (i = 0; i < numberOfSets; i++) {
515 for (j = 0; j < setSize; j++)
516 for (k = 0; k < sizeof(int); k++)
517 tfprintf(output, "\t'\\x%02X', ", (setList[i].bits[j] >> (k*8)) & 0xff);
518 tfputc(output, '\n');
519 }
520 tfprintf(output, "\t0\n};\n");
521
522 /* Output token number conversion table */
523 tfprintf(output, "%sconst int LLindex[] = { 0", storageType);
524 for (i = 0; i < maxTokenNumber; i++) {
525 if ((i % 8) == 0)
526 tfprintf(output, ",\n\t");
527 else
528 tfprintf(output, ", ");
529 tfprintf(output, "%4d", terminalToCondensed[i]);
530 }
531 tfprintf(output, "};\n");
532 }
533
534 /** Write a symbol table and the LLgetSymbol routine.
535 @param output The @a File to write to.
536 */
generateSymbolTable(File * output)537 void generateSymbolTable(File *output) {
538 Declaration *declaration;
539 const char *eofLabel = NULL;
540 size_t j;
541 int i;
542
543 if (option.noEOFZero)
544 defaultSymbolTable[1] = "<NUL>";
545
546 /* use the EOFILE for -1 and also for 0 if !option.noEOFZero */
547 declaration = (Declaration *) lookup(globalScope, "EOFILE");
548 ASSERT(declaration != NULL && declaration->subtype == DIRECTIVE);
549 if (declaration->uDirective->token[1] != NULL) {
550 eofLabel = declaration->uDirective->token[1]->text;
551 if (!option.noEOFZero)
552 literalLabels[0] = declaration->uDirective->token[1];
553 }
554
555 /* Write the first part of the symbol table, using the user supplied
556 symbolnames if available. */
557 /* NOTE: The EOF names already include the quotes. */
558 tfputs(output, "static const char *LLsymbolTable[] = {\n");
559
560 /* Gettext-noop macro definition. Note: we do this AFTER the definition of
561 the variable, so that we avoid any chance of name collisions. */
562 if (option.gettext)
563 tfprintf(output, "#define %s(LLx) LLx\n", option.gettextMacro);
564
565 if (option.gettext && eofLabel)
566 tfprintf(output, "%s(", option.gettextMacro);
567 tfputs(output, eofLabel == NULL ? defaultSymbolTable[0] : eofLabel);
568 tfputs(output, option.gettext && eofLabel ? "), " : ", ");
569
570 for (i = 0; i < 256; i++) {
571 if ((i % 4) == 0)
572 tfputc(output, '\n');
573 if (literalLabels[i] == NULL) {
574 tfprintf(output, "\"%s\", ", defaultSymbolTable[i + 1]);
575 } else {
576 if (option.gettext)
577 tfprintf(output, "%s(", option.gettextMacro);
578 tfprintf(output, "%s", literalLabels[i]->text);
579 tfputs(output, option.gettext ? "), " : ", ");
580 }
581 }
582 tfputc(output, '\n');
583 /* NOTE: The EOF names already include the quotes. */
584 if (option.gettext && eofLabel)
585 tfprintf(output, "%s(", option.gettextMacro);
586 tfputs(output, eofLabel == NULL ? defaultSymbolTable[0] : eofLabel);
587 if (option.gettext && eofLabel)
588 tfputs(output, ")");
589
590 /* Write the symbol names for the %token defined tokens. */
591 for (i = 257; i < maxTokenNumber; i++) {
592 Directive *directive;
593 tfputs(output, ", ");
594
595 if ((i % 4) == 0)
596 tfputc(output, '\n');
597 directive = condensedToTerminal[terminalToCondensed[i]].uToken;
598 /* If no %label has been specified, create the name from the
599 token name itself. */
600 if (directive->token[1] == NULL) {
601 char *text;
602 if (option.lowercaseSymbols) {
603 size_t length;
604 text = safeStrdup(directive->token[0]->text, "generateDefaultGlobalDeclarations");
605 length = strlen(text);
606 for (j = 0; j < length; j++)
607 text[j] = (char) tolower(text[j]);
608 } else {
609 text = directive->token[0]->text;
610 }
611
612 tfprintf(output, "\"%s\"", text);
613
614 if (option.lowercaseSymbols)
615 free(text);
616 } else {
617 if (option.gettext)
618 tfprintf(output, "%s(", option.gettextMacro);
619 tfputs(output, directive->token[1]->text);
620 if (option.gettext)
621 tfputc(output, ')');
622 }
623 }
624 tfputs(output, "};\n");
625
626 /* Make sure the macro doesn't interfere with our code (or the user's). */
627 if (option.gettext)
628 tfprintf(output, "#undef %s\n#ifdef %s\nchar *gettext(const char *);\n#else\n#define gettext(LLx) LLx\n#endif\n", option.gettextMacro, option.gettextGuard);
629
630 tfprintf(output,
631 "const char *LLgetSymbol(int LLtoken) {\n"
632 "\tif (LLtoken < -1 || LLtoken > %d /* == LL_MAXTOKNO */)\n"
633 "\t\treturn (char *) 0;\n", maxTokenNumber - 1);
634 if (option.gettext)
635 tfputs(output, "\treturn gettext(LLsymbolTable[LLtoken+1]);\n");
636 else
637 tfputs(output, "\treturn LLsymbolTable[LLtoken+1];\n");
638 tfputs(output, "}\n");
639
640 if (option.gettext)
641 tfprintf(output, "#ifndef %s\n#undef gettext\n#endif\n", option.gettextGuard);
642 }
643
644 /** Write code to define all internal variables and utility functions.
645 @param output The @a File to write to.
646 @param headerName The name of the header file to include.
647
648 These variables and functions end up in the .c file, or in the case of
649 LLgen style outputs in the Lpars.c file.
650 */
generateDefaultGlobalDeclarations(File * output,const char * headerName)651 void generateDefaultGlobalDeclarations(File *output, const char *headerName) {
652 const char *lexer, *lexerInternal = NULL; /* The = NULL is there to shut up the compiler */
653 const char *storageType = option.LLgenStyleOutputs ? STORAGE_GLOBAL : STORAGE_STATIC;
654
655 lexer = lexicalDirective ? lexicalDirective->token[0]->text : "yylex";
656 if (option.generateLexerWrapper) {
657 lexerInternal = lexer;
658 lexer = "LLlexerWrapper";
659 }
660
661 generateDefaultDeclarations(output, storageType);
662 tfputs(output, "#include <string.h>\n");
663
664 generateConversionTables(output, storageType);
665
666 /* Because LLmessage and the lexer(-wrapper) can be supplied externally,
667 these have to be made global. */
668 tfputs(output, "int LLsymb;\n");
669 if (!option.noLLreissue)
670 tfprintf(output, "%sint LLreissue;\n", option.generateLexerWrapper ? STORAGE_STATIC : STORAGE_GLOBAL);
671
672 /* Declare the lexer */
673 tfprintf(output, "int %s(void);\n", lexerInternal != NULL ? lexerInternal : lexer);
674
675 if (option.generateLexerWrapper) {
676 tfputs(output, "static ");
677 tfputs(output, lexerWrapperHeader);
678 tfprintf(output, lexerWrapperString, "-2 /* LL_NEW_TOKEN */", lexerInternal, "", "-2 /* LL_NEW_TOKEN */");
679 }
680 /* As we generate our standard LLmessage as a static routine, we need to
681 declare it static here as well to prevent compiler warnings. */
682 if (option.generateLLmessage)
683 tfputs(output, "static ");
684 /* Declare LLmessage (or actually <%prefix>message, because of the defines) */
685 tfputs(output, "void LLmessage(int);\n");
686
687 /* Define LLread. For correcting error recovery, it can be really simple.
688 As a good compiler will inline this anyway, we don't have to bother
689 with a #define. */
690 tfprintf(output, "%svoid LLread(void) { LLcsymb = LLindex[(LLsymb = %s()) + 1]; }\n", storageType, lexer);
691
692 /* Define the error handling routines LLskip and LLerror
693 Note: LLskip is spread over two tfprintf statements because otherwise
694 the format string would exceed the maximum length for a string constant.
695 */
696 tfprintf(output,
697 "%sint LLskip(void) {\n"
698 "\tint LL_i;\n"
699 "\tif (LLcsymb >= 0) {\n"
700 "\t\tif (LLtcnt[LLcsymb] != 0) return 0;\n"
701 "\t\tfor (LL_i = 0; LL_i < LL_NSETS; LL_i++)\n"
702 "\t\t\tif (LLscnt[LL_i] != 0 && LLinset(LL_i))\n"
703 "\t\t\t\treturn 0;\n"
704 "\t}\n\n", storageType);
705 tfprintf(output,
706 "\tfor (;;) {\n"
707 "\t\tLLmessage(%d /* LL_DELETE */);\n"
708 "\t\twhile ((LLcsymb = LLindex[(LLsymb = %s()) + 1]) < 0) LLmessage(%d /* LL_DELETE */);\n"
709 "\t\tif (LLtcnt[LLcsymb] != 0)\n"
710 "\t\t\treturn 1;\n"
711 "\t\tfor (LL_i = 0; LL_i < LL_NSETS; LL_i++)\n"
712 "\t\t\tif (LLscnt[LL_i] != 0 && LLinset(LL_i))\n"
713 "\t\t\t\treturn 1;\n"
714 "\t}\n}\n", option.noEOFZero ? -2 : 0, lexer, option.noEOFZero ? -2 : 0);
715
716 tfprintf(output,
717 "%svoid LLerror(int LLtoken) {\n"
718 "\tif (LLtoken == 256 /* EOFILE */) {\n"
719 "\t\tLLmessage(-1 /* LL_MISSINGEOF */);\n"
720 "\t\twhile (LLindex[(LLsymb = %s()) + 1] != 0) /*NOTHING*/ ;\n"
721 "\t\treturn;\n"
722 "\t}\n"
723 "\tLLtcnt[LLindex[LLtoken + 1]]++;\n"
724 "\tLLskip();\n"
725 "\tLLtcnt[LLindex[LLtoken + 1]]--;\n"
726 "\tif (LLsymb != LLtoken) {%s LLmessage(LLtoken); }\n"
727 "}\n", storageType, lexer, option.noLLreissue ? "" : " LLreissue = LLsymb;");
728
729 /* Generate the LLfirst routine, but only if a %first directive is used. */
730 if (hasFirst) {
731 tfprintf(output,
732 "%sint LLfirst(int LLtoken, int LLset) {\n"
733 "\tint LLctoken;\n"
734 "\treturn (LLtoken >= -1 && LLtoken < %d && (LLctoken = LLindex[LLtoken+1]) >= 0 &&\n"
735 "\t\t(LLsets[LLset*LL_SSETS + (LLctoken/8)] & (1<<(LLctoken & 7))));\n"
736 "}\n", storageType, maxTokenNumber);
737 }
738
739 /* Generate the LLabort routine, but only if a the --abort option is used. */
740 if (option.abort) {
741 tfputs(output,
742 "#include <setjmp.h>\n"
743 "static jmp_buf LLjmp_buf;\n"
744 "void LLabort(int LLretval) {\n"
745 "\tlongjmp(LLjmp_buf, LLretval);\n"
746 "}\n");
747 }
748
749 /* Generate the symbol table and LLgetSymbol routine, but only if a the
750 --generateSymbolTable option is used. */
751 if (option.generateSymbolTable)
752 generateSymbolTable(output);
753
754
755 if (headerName != NULL) {
756 /* Include our header file */
757 tfprintf(output, "#include \"%s\"\n", headerName);
758 }
759
760 if (option.generateLLmessage) {
761 tfputs(output, "#include <stdio.h>\nstatic ");
762 tfputs(output, llmessageHeader);
763 tfputs(output, llmessageString[0]);
764 tfputs(output, llmessageString[1]);
765 }
766 }
767
768 /*============== Code to generate the actual rules and parsers ================*/
769 /*===== General routines =====*/
770 /** Write code snippet for tracking the contains set.
771 @param output The @a File to write to.
772 @param tokens The set to push or pop.
773 @param direction Whether to push or pop.
774 */
generateSetPushPop(File * output,const set tokens,PushDirection direction)775 void generateSetPushPop(File *output, const set tokens, PushDirection direction) {
776 int size = setFill(tokens);
777 /* For empty sets, there is nothing we need to do. */
778 if (size == 0)
779 return;
780 /* Take note of the character after LL. */
781 if (size == 1)
782 tfprintf(output, "LLtcnt[%d]%s;\n", setFirstToken(tokens), direction == DIR_PUSH ? "++" : "--");
783 else
784 tfprintf(output, "LLscnt[%d]%s;\n", setFindIndex(tokens, false), direction == DIR_PUSH ? "++" : "--");
785 }
786
787 /** Write case labels for all tokens in a set.
788 @param output The @a File to write to.
789 @param tokens The set to write case labels for.
790
791 The generated code makes extensive use of switch statements. This routine
792 writes the case labels needed for one set.
793 */
generateCaseLabels(File * output,const set tokens)794 static void generateCaseLabels(File *output, const set tokens) {
795 int i;
796 for (i = 0; i < condensedNumber; i++) {
797 if (setContains(tokens, i)) {
798 tfprintf(output, "case %d:", i);
799 tfprintTokenComment(output, i);
800 }
801 }
802 }
803
804 /*===== Code to generate parser =====*/
805 /** Write the code for a parser.
806 @param output The @a File to write to.
807 @param directive The %start directive to write code for.
808 */
generateParser(File * output,Directive * directive)809 static void generateParser(File *output, Directive *directive) {
810 NonTerminal *nonTerminal;
811
812 nonTerminal = ((Declaration *) lookup(globalScope, directive->token[1]->text))->uNonTerminal;
813 if (option.LLgenStyleOutputs)
814 generateNonTerminalDeclaration(output, nonTerminal);
815
816 /* NOTE: as we cannot move this function to before the header file inclusion
817 (thread safe parsers will likely have some user defined type as
818 argument), symbols created by the user might collide with ours (although
819 not for LLgen-style output). Therefore we prefix our vars so we can
820 consider it a user problem (don't say we didn't warn you in the manual). */
821
822 #ifdef DEBUG
823 /* To prevent warnings about implicit declarations, we add these includes */
824 tfputs(output, "#include <stdio.h>\n#include <stdlib.h>\n");
825 #endif
826
827 generateParserHeader(output, directive);
828 tfputs(output, " {\n");
829
830 if (option.abort)
831 tfputs(output, "int LLsetjmpRetval;\n");
832 if (option.reentrant) {
833 const char *volatileMarker = "";
834
835 /* According to the C rationale, after returning to a setjmp call context
836 through a longjmp call, the only way to make sure that all local
837 variables contain the values present at setjmp call time is to mark
838 the variables as volatile. Otherwise the last values written may be
839 in a register that is not restored after longjmp. */
840 if (option.abort)
841 volatileMarker = "volatile ";
842
843 /* For reentrant parsers, the LLtcnt and LLscnt are actually
844 arrays on the stack, and the variables are pointers to this
845 stack array. */
846 tfprintf(output, "\t%sint LLcounts[LL_NTERMINALS + LL_NSETS + 2];\n", volatileMarker);
847 tfprintf(output, "\t%sint *LLbackupLLscnt = LLscnt, *LLbackupLLtcnt = LLtcnt;\n", volatileMarker);
848 if (!option.noLLreissue)
849 tfprintf(output, "\t%sint LLbackupLLreissue = LLreissue;\n", volatileMarker);
850 if (option.abort)
851 tfprintf(output, "\t%sjmp_buf LLbackupJmp_buf;\n\tmemcpy(&LLbackupJmp_buf, &LLjmp_buf, sizeof(jmp_buf));\n", volatileMarker);
852 tfputs(output, "\tLLcounts[0] = LLsymb; LLcounts[1] = LLcsymb;\n");
853 tfputs(output, "\tLLtcnt = LLcounts + 2; LLscnt = LLcounts + 2 + LL_NTERMINALS;\n");
854 }
855 /* Clear the contains set tracking variables. */
856 tfputs(output, "\tmemset(LLscnt, 0, LL_NSETS * sizeof(int));\n");
857 tfputs(output, "\tmemset(LLtcnt, 0, LL_NTERMINALS * sizeof(int));\n");
858 /* EOFILE is always in the contains set: */
859 tfputs(output, "\tLLtcnt[0]++;\n");
860
861 tfputs(output, "\tLLsymb = 0;\n");
862
863 if (!option.noLLreissue)
864 tfputs(output, "\tLLreissue = -2 /* LL_NEW_TOKEN */;\n");
865
866 if (option.abort)
867 tfputs(output, "\tLLsetjmpRetval = setjmp(LLjmp_buf);\n\tif (LLsetjmpRetval == 0) {\n");
868 tfputs(output, "\tLLread();\n");
869
870 if (nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE)
871 generateSetPushPop(output, nonTerminal->term.contains, DIR_PUSH);
872
873 tfputc(output, '\t');
874 if (nonTerminal->retvalIdent != NULL)
875 tfputs(output, "*LLretval = ");
876
877 tfprintf(output, "%s%d_%s();\n", prefix, nonTerminal->number, directive->token[1]->text);
878 if (nonTerminal->term.flags & TERM_NO_FINAL_READ)
879 tfputs(output, "\tLLread();\n");
880 tfputs(output, "\tif (LLcsymb != 0) LLerror(256 /* EOFILE*/);\n");
881 #ifdef DEBUG
882 tfputs(output, "\tLLtcnt[0]--;\n");
883 tfputs(output, "{ int LL_i; for(LL_i = 0; LL_i < LL_NTERMINALS; LL_i++) if (LLtcnt[LL_i] != 0) { printf(\"Counts are not all zero!!!!\\n\"); exit(10); }}\n");
884 tfputs(output, "{ int LL_i; for(LL_i = 0; LL_i < LL_NSETS; LL_i++) if (LLscnt[LL_i] != 0) { printf(\"Counts are not all zero!!!!\\n\"); exit(10); }}\n");
885 #endif
886 if (option.abort)
887 tfputs(output, "\t}\n");
888
889 if (option.reentrant) {
890 /* Restore the previous values */
891 tfputs(output, "\tLLscnt = LLbackupLLscnt; LLtcnt = LLbackupLLtcnt;\n");
892 tfputs(output, "\tLLsymb = LLcounts[0]; LLcsymb = LLcounts[1];\n");
893 if (!option.noLLreissue)
894 tfputs(output, "\tLLreissue = LLbackupLLreissue;\n");
895 if (option.abort)
896 tfputs(output, "\tmemcpy(&LLjmp_buf, &LLbackupJmp_buf, sizeof(jmp_buf));\n");
897 }
898
899 if (option.abort)
900 tfputs(output, "\treturn LLsetjmpRetval;\n");
901 tfputs(output, "}\n\n");
902 }
903
904 /** Write code for directives that can only appear at the end of the .c file.
905 @param output The @a File to write to.
906 */
generateDirectiveCodeAtEnd(File * output)907 void generateDirectiveCodeAtEnd(File *output) {
908 int i;
909 Declaration *declaration;
910
911 for (i = 0; i < listSize(declarations); i++) {
912 declaration = (Declaration *) listIndex(declarations, i);
913 if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == START_DIRECTIVE)
914 generateParser(output, declaration->uDirective);
915 }
916 }
917 /*===== Code to generate rules =====*/
918 int labelCounter;
919 static void alternativeGenerateCode(File *output, Alternative *alternative, bool needsFinalRead);
920
921 /** Write the code for the @a Alternatives of a @a Term.
922 @param output The @a File to write to.
923 @param term The @a Term to write code for.
924 @param canSkip Boolean to indicate whether the current token when taking
925 the switch is not always valid, which may require jumping out of the
926 switch and take it again after skipping the current token.
927 @param popType One of POP_NEVER, POP_ALWAYS and POP_CONDITIONAL.
928 Determines whether the contains set of the term is popped after the
929 correct alternative has been determined. POP_ALWAYS is used for terms
930 that don't repeat, POP_CONDITIONAL is used for terms with fixed
931 repetition counts, while POP_NEVER is used for variable repetition
932 count terms.
933 */
termGenerateCode(File * output,Term * term,const bool canSkip,PopType popType)934 void termGenerateCode(File *output, Term *term, const bool canSkip, PopType popType) {
935 int i, j, label = 0; /* The = 0 is there to shut up the compiler */
936 Alternative *alternative, *otherAlternative;
937 set conflicts, temporary;
938 CCode *lastExpression;
939 int lastLabel;
940
941 if (!(term->flags & TERM_MULTIPLE_ALTERNATIVE)) {
942 /* This is mainly for the case where we have a single alternative that
943 has a repetition other than 1. */
944 if (popType) {
945 if (popType == POP_CONDITIONAL)
946 tfputs(output, "if (!LL_i)\n");
947 generateSetPushPop(output, term->contains, DIR_POP);
948 }
949 alternativeGenerateCode(output, (Alternative *) listIndex(term->rule, 0), !(term->flags & TERM_NO_FINAL_READ));
950 return;
951 }
952
953 /* If the default case can skip the current token, it needs to be able to
954 jump out of the switch so that it can chose an alternative again. This
955 requires a label. */
956 if (canSkip) {
957 label = labelCounter++;
958 tfprintf(output, "LL_%d:\n", label);
959 }
960 tfprintf(output, "switch (LLcsymb) {\n");
961
962 conflicts = newSet(condensedNumber);
963 temporary = newSet(condensedNumber);
964
965 for (i = 0; i < listSize(term->rule); i++) {
966 alternative = (Alternative *) listIndex(term->rule, i);
967 /* For alternatives that have conflicting tokens, special handling is
968 required. */
969 if (alternative->flags & ALTERNATIVE_IF) {
970 setClear(conflicts);
971 for (j = i + 1; j < listSize(term->rule); j++) {
972 otherAlternative = (Alternative *) listIndex(term->rule, j);
973 if (!setIntersectionEmpty(alternative->first, otherAlternative->first)) {
974 setCopy(temporary, alternative->first);
975 setIntersection(temporary, otherAlternative->first);
976 setUnion(conflicts, temporary);
977 if (otherAlternative->label < 0)
978 otherAlternative->label = labelCounter++;
979 }
980 }
981
982 /* The %if may have been for tokens that have already been handled
983 when processing a previous alternative (also with a %if). So
984 the conflicting set may be empty now. */
985 if (!setEmpty(conflicts)) {
986 /* Give the alternative a label, if it doesn't already have
987 one. */
988 if (alternative->label < 0)
989 alternative->label = labelCounter++;
990 /* Remove conflicting tokens from first set of this alternative,
991 to prevent duplicate case labels. */
992 setMinus(alternative->first, conflicts);
993
994 while (!setEmpty(conflicts)) {
995 /* Steps:
996 - find a set of tokens that share conflicting alternatives (these can be put in one case)
997 - generate case labels for that set
998 - generate a sequence of "if (condition) goto label;" statements, ending with a "goto label;"
999 for the final alternative.
1000 - remove conflicting tokens from first set of any affected alternatives
1001 - repeat until there are no more conflicting tokens
1002 */
1003 setCopy(temporary, conflicts);
1004 for (j = i + 1; j < listSize(term->rule); j++) {
1005 otherAlternative = (Alternative *) listIndex(term->rule, j);
1006 if (!setIntersectionEmpty(temporary, otherAlternative->first)) {
1007 setIntersection(temporary, otherAlternative->first);
1008 }
1009 }
1010 /* Now we have a set with conflict tokens, which share
1011 the same conflicting alternatives. */
1012 generateCaseLabels(output, temporary);
1013 lastExpression = alternative->expression;
1014 lastLabel = alternative->label;
1015 for (j = i + 1; j < listSize(term->rule); j++) {
1016 otherAlternative = (Alternative *) listIndex(term->rule, j);
1017 if (!setIntersectionEmpty(temporary, otherAlternative->first)) {
1018 setMinus(otherAlternative->first, temporary);
1019 tfputs(output, "if\n");
1020 outputCode(output, lastExpression, true);
1021 tfprintf(output, "goto LL_%d;\n", lastLabel);
1022 lastExpression = otherAlternative->expression;
1023 lastLabel = otherAlternative->label;
1024 }
1025 }
1026 tfprintf(output, "goto LL_%d;\n", lastLabel);
1027 setMinus(conflicts, temporary);
1028 }
1029 }
1030 }
1031
1032
1033 if (alternative->flags & ALTERNATIVE_DEFAULT) {
1034 tfprintf(output, "default:\n");
1035 if (canSkip) {
1036 tfprintf(output, "if (LLskip())\ngoto LL_%d;\n/*FALLTHROUGH*/\n", label);
1037 generateCaseLabels(output, alternative->first);
1038 }
1039 } else {
1040 /* This may not actually generate any case labels, i.e. if an
1041 alternative only has tokens that conflict with other
1042 alternatives. The alternative WILL have a regular label then. */
1043 generateCaseLabels(output, alternative->first);
1044 }
1045 if (alternative->label >= 0)
1046 tfprintf(output, "LL_%d:\n", alternative->label);
1047 if (popType) {
1048 /* For use in fixed maximum repetition loops. */
1049 if (popType == POP_CONDITIONAL)
1050 tfputs(output, "if (!LL_i)\n");
1051 generateSetPushPop(output, term->contains, DIR_POP);
1052 }
1053 alternativeGenerateCode(output, alternative, !(term->flags & TERM_NO_FINAL_READ));
1054 tfputs(output, "break;\n");
1055 }
1056 tfprintf(output, "}\n");
1057 deleteSet(conflicts);
1058 deleteSet(temporary);
1059 }
1060
1061 /** Write code for the ..? operator.
1062 @param output The @a File to write to.
1063 @param term The @a Term to write code for.
1064 */
finoptGenerateCode(File * output,Term * term)1065 static void finoptGenerateCode(File *output, Term *term) {
1066 /* Info needed:
1067 conflict set from enclosing term
1068
1069 [ fixed:
1070 if (LL_i)
1071 goto first;
1072 ]
1073 switch (LLcsymb) {
1074 default:
1075 LLskip...
1076 case follow-outer-only: //only in follow set of outer term
1077 goto outLabel;
1078 [ not fixed:
1079 case conflict-outer: //only in conflict set of outer term
1080 if ((LL_checked = (.1. != 0) + 1) == 1)
1081 break;
1082 goto first; //generated only if other conflict cases follow
1083 case conflict-first-conflict-outer: //in both first set and conflict set of outer term
1084 if ((.2. || (LL_checked = (.1. != 0) + 1) == 1)
1085 break;
1086 goto first; //generated only if other conflict cases follow
1087 ]
1088 case conflict-first-follow-outer: //in first set and follow set of outer term
1089 if (!.2.)
1090 break;
1091 [ not fixed:
1092 case decision-outer-only: //only in decision set of outer term
1093 ]
1094 case first: //first set of this term
1095 ...
1096 }
1097 */
1098 Term *outerTerm = term->enclosing.alternative->enclosing;
1099 int label = labelCounter++, firstLabel = -1;
1100 bool needsGotoBeforeNewCase = false;
1101 set resultSet;
1102
1103 /* The firstLabel is initially set to -1, because it may not be necessary
1104 to generate the label at all. It all depends on any repetition conflicts
1105 and whether it is a fixed repetition type. */
1106
1107 /* Note that if this is in a fixed repetition, than the number of
1108 repetitions will be greater than 1. In the event that the finopt
1109 operator is specified within a single fixed repetition the finopt
1110 operator will have been converted to a STAR operator. */
1111 if (outerTerm->repeats.subtype & FIXED) {
1112 ASSERT(outerTerm->repeats.number > 1);
1113 /* firstLabel is always -1 here. */
1114 firstLabel = labelCounter++;
1115 tfprintf(output, "if (LL_i) goto LL_%d;\n", firstLabel);
1116 }
1117
1118 tfprintf(output, "LL_%d:\n"
1119 "switch (LLcsymb) {\n"
1120 "default:\n"
1121 "if (LLskip())\ngoto LL_%d;\n", label, label);
1122
1123 resultSet = newSet(condensedNumber);
1124
1125 /* =========================================================
1126 Generate case labels for tokens only in the follow set of the outer term */
1127 setCopy(resultSet, outerTerm->follow);
1128 setMinus(resultSet, term->first);
1129 if (!setEmpty(resultSet)) {
1130 tfputs(output, "/*FALLTHROUGH*/\n");
1131 generateCaseLabels(output, resultSet);
1132 }
1133
1134 generateSetPushPop(output, term->contains, DIR_POP);
1135
1136 /* This jump out the switch needs to be here regardless of the tokens in the
1137 outerTerm follow set, because of the default case. */
1138 tfputs(output, "break;\n");
1139
1140 if (outerTerm->flags & TERM_REQUIRES_LLCHECKED) {
1141 /* =========================================================
1142 Generate the case for tokens only in the conflict set of the
1143 outer term */
1144 setCopy(resultSet, *(outerTerm->conflicts));
1145 setMinus(resultSet, term->first);
1146
1147 if (!setEmpty(resultSet)) {
1148 generateCaseLabels(output, resultSet);
1149 tfputs(output, "if ((LL_checked = (\n");
1150 outputCode(output, outerTerm->expression, true);
1151 tfputs(output, "!= 0) + 1) == 1) {\n");
1152 generateSetPushPop(output, term->contains, DIR_POP);
1153 tfprintf(output, "break;\n}\n");
1154 needsGotoBeforeNewCase = true;
1155 }
1156
1157 /* =========================================================
1158 Generate the case for tokens in both the conflict set of the
1159 outer term and the first set of this term */
1160 setCopy(resultSet, *(outerTerm->conflicts));
1161 setIntersection(resultSet, term->first);
1162 if (!setEmpty(resultSet)) {
1163 if (needsGotoBeforeNewCase) {
1164 if (firstLabel == -1)
1165 firstLabel = labelCounter++;
1166 tfprintf(output, "goto LL_%d;\n", firstLabel);
1167 }
1168 generateCaseLabels(output, resultSet);
1169 tfputs(output, "if ((\n");
1170 outputCode(output, term->expression, true);
1171 tfputs(output, "|| (LL_checked = (\n");
1172 outputCode(output, outerTerm->expression, true);
1173 tfputs(output, "!= 0) + 1) == 1)) {\n");
1174 generateSetPushPop(output, term->contains, DIR_POP);
1175 tfprintf(output, "break;\n}\n");
1176 needsGotoBeforeNewCase = true;
1177 }
1178 }
1179
1180 /* =========================================================
1181 Generate the case for tokens in both the follow set of the
1182 outer term and the first set of this term */
1183 setCopy(resultSet, outerTerm->follow);
1184 setIntersection(resultSet, term->first);
1185 if (!setEmpty(resultSet)) {
1186 if (needsGotoBeforeNewCase) {
1187 if (firstLabel == -1)
1188 firstLabel = labelCounter++;
1189 tfprintf(output, "goto LL_%d;\n", firstLabel);
1190 }
1191 generateCaseLabels(output, resultSet);
1192 tfputs(output, "if (!\n");
1193 outputCode(output, term->expression, true);
1194 tfputs(output, ") {\n");
1195 generateSetPushPop(output, term->contains, DIR_POP);
1196 tfputs(output, "break;}\n");
1197 /* Eventhough this is the last case, we set the flag anyway, so that
1198 we can properly generate the fallthrough. */
1199 needsGotoBeforeNewCase = true;
1200 }
1201
1202 if (needsGotoBeforeNewCase)
1203 tfputs(output, "/*FALLTHROUGH*/\n");
1204
1205 /* =========================================================
1206 Generate the case for tokens in either the first set of the
1207 outer term and the first set of this term */
1208 if (!(outerTerm->repeats.subtype & FIXED)) {
1209 if (outerTerm->repeats.subtype == PLUS && !(outerTerm->flags & TERM_PERSISTENT))
1210 setCopy(resultSet, outerTerm->first);
1211 else
1212 setCopy(resultSet, outerTerm->contains);
1213
1214 setMinus(resultSet, term->first);
1215 if (outerTerm->flags & TERM_REQUIRES_LLCHECKED)
1216 setMinus(resultSet, *(outerTerm->conflicts));
1217
1218 if (!setEmpty(resultSet))
1219 generateCaseLabels(output, resultSet);
1220 }
1221
1222 /* Now for all tokens in the term's first set that have not been handled
1223 in previous cases. */
1224 setCopy(resultSet, term->first);
1225 setMinus(resultSet, outerTerm->follow);
1226 if (outerTerm->flags & TERM_REQUIRES_LLCHECKED)
1227 setMinus(resultSet, *(outerTerm->conflicts));
1228
1229 generateCaseLabels(output, resultSet);
1230
1231 if (firstLabel != -1)
1232 tfprintf(output, "LL_%d:\n", firstLabel);
1233
1234 generateSetPushPop(output, term->contains, DIR_POP);
1235 termGenerateCode(output, term, outerTerm->repeats.subtype & FIXED, POP_NEVER);
1236
1237 tfputs(output, "}\n");
1238 deleteSet(resultSet);
1239 }
1240
1241 /** Write the code for the repeat-structure of a @a Term.
1242 @param output The @a File to write to.
1243 @param term The @a Term to write code for.
1244 */
termGenerateRepeatsCode(File * output,Term * term)1245 static void termGenerateRepeatsCode(File *output, Term *term) {
1246 /* Initialize the firstSetLabel only in the cases we actually need it */
1247 int firstSetLabel = (term->repeats.subtype & PLUS) ? labelCounter++ : -1;
1248 int braces = 0;
1249
1250 if (term->repeats.number > 1 || term->flags & TERM_REQUIRES_LLCHECKED) {
1251 tfputc(output, '{');
1252 braces++;
1253 }
1254
1255 if (term->repeats.number > 1) {
1256 /* Initialize it here, so that when we jump into the
1257 for-loop (+ repetitions) it is initialized correctly. */
1258 tfprintf(output, "int LL_i = %d;\n", term->repeats.number - 1);
1259 }
1260
1261 if (term->flags & TERM_REQUIRES_LLCHECKED) {
1262 /* Create flag for checking whether a first-follow conflict has
1263 already been checked for. This flag has the following values:
1264 - 0: no checking has been done.
1265 - 1: checking has been done, and the result was false.
1266 - 2: checking has been done, and the result was true.
1267 */
1268 tfprintf(output, "int LL_checked = 0;\n");
1269 }
1270
1271 /* For + repetitions we need to make sure we do any necessary
1272 skips before jumping into the for-loop. Also, we need to do
1273 the skips before changing from contains set to firstset
1274 (if that is necessary). */
1275 if (term->repeats.subtype & PLUS) {
1276 tfputs(output, "switch (LLcsymb) {\n");
1277 generateCaseLabels(output, term->first);
1278 tfputs(output, "break;\n"
1279 "default: LLskip(); break;\n"
1280 "}\n");
1281
1282 /* For persistent + repetitions the contains set equals
1283 the first set. */
1284 if (!(term->flags & TERM_PERSISTENT)) {
1285 generateSetPushPop(output, term->contains, DIR_POP);
1286 generateSetPushPop(output, term->first, DIR_PUSH);
1287 }
1288 /* Jump into the for-loop, so that we at least do one
1289 iteration. */
1290 tfprintf(output, "goto LL_%d;\n", firstSetLabel);
1291 }
1292
1293 if (term->repeats.number < 0) {
1294 tfprintf(output, "for (;;) {\n");
1295 braces++;
1296 } else if (term->repeats.number > 1) {
1297 tfputs(output, "for(; LL_i >= 0; LL_i--) {\n");
1298 braces++;
1299 }
1300
1301 if (term->repeats.subtype & (STAR|PLUS)) {
1302 int label = labelCounter++;
1303 set decisionSet, labelSet, conflicts;
1304
1305 /* Note that because there is no switch surrounding the term
1306 code, it may skip (at least the first time). */
1307 tfprintf(output, "LL_%d:\n"
1308 "switch (LLcsymb) {\n", label);
1309
1310 if (term->repeats.subtype == PLUS && !(term->flags & TERM_PERSISTENT))
1311 decisionSet = term->first;
1312 else
1313 decisionSet = term->contains;
1314 labelSet = newSet(condensedNumber);
1315 setCopy(labelSet, decisionSet);
1316
1317 /* =========================================================
1318 Generate the code for tokens neither in the first nor
1319 the follow set. */
1320 tfprintf(output, "default:\n"
1321 "if (LLskip()) goto LL_%d;\n", label);
1322
1323 /* The fallthrough LLgen tries to support here is useless. The only time
1324 it would do the fallthrough is the case where no tokens are skipped
1325 and the current token is in the decision set. However, if that is
1326 the case the switch would have jumped to the code rather than to the
1327 default case. */
1328 if (term->repeats.subtype == STAR && term->repeats.number == 1)
1329 generateSetPushPop(output, decisionSet, DIR_POP);
1330 tfprintf(output, "break;\n");
1331
1332 /* =========================================================
1333 Generate code for tokens in the follow set. */
1334
1335 /* If a repetition conflict was there, it also has a %while
1336 associated with it. Therefore we need to remove the
1337 conflicting tokens from the first and follow set, and
1338 generate separate code for that. */
1339 if (term->flags & TERM_RCONFLICT) {
1340 conflicts = newSet(condensedNumber);
1341 /* Determine the set of conflicting tokens */
1342 setCopy(conflicts, term->follow);
1343 setIntersection(conflicts, labelSet);
1344 /* Remove conflicting tokens from first and follow set */
1345 setMinus(term->follow, conflicts);
1346 setMinus(labelSet, conflicts);
1347 }
1348
1349 /* The follow set can be empty if only conflicting tokens
1350 can follow the repetition. */
1351 if (!setEmpty(term->follow)) {
1352 generateCaseLabels(output, term->follow);
1353 if (term->repeats.subtype == STAR && term->repeats.number == 1)
1354 generateSetPushPop(output, decisionSet, DIR_POP);
1355 tfprintf(output, "break;\n");
1356 }
1357
1358 if (term->flags & TERM_RCONFLICT) {
1359 /* Generate the code for the conflicting tokens */
1360 generateCaseLabels(output, conflicts);
1361 tfputs(output, "if (!(");
1362 if (term->flags & TERM_REQUIRES_LLCHECKED)
1363 tfputs(output, "LL_checked == 2 || (LL_checked == 0 &&");
1364 tfputc(output, '\n');
1365 outputCode(output, term->expression, true);
1366 if (term->flags & TERM_REQUIRES_LLCHECKED)
1367 tfputc(output, ')');
1368 tfputs(output, ")) {\n");
1369 if (term->repeats.subtype == STAR && term->repeats.number == 1)
1370 generateSetPushPop(output, decisionSet, DIR_POP);
1371 tfputs(output, "break;\n}\n");
1372 if (!setEmpty(labelSet))
1373 tfputs(output, "/*FALLTHROUGH*/\n");
1374 }
1375
1376 /* =========================================================
1377 Generate code for tokens in the first set. */
1378
1379 /* Although the first set can be empty after removing any
1380 conflicting tokens, we might as well just call
1381 generateCaseLabels because the code following it will
1382 need to be generated anyway. */
1383 generateCaseLabels(output, labelSet);
1384 deleteSet(labelSet);
1385
1386 if (firstSetLabel >= 0)
1387 tfprintf(output, "LL_%d:\n", firstSetLabel);
1388
1389 if (term->flags & TERM_REQUIRES_LLCHECKED)
1390 tfputs(output, "LL_checked = 0;\n");
1391
1392 if (term->repeats.number > 1)
1393 tfputs(output, "if (!LL_i)\n");
1394 if (term->repeats.number >= 1)
1395 generateSetPushPop(output, decisionSet, DIR_POP);
1396
1397 if (term->flags & TERM_CONTAINS_FINOPT) {
1398 term->conflicts = &conflicts;
1399 }
1400
1401 termGenerateCode(output, term, false, POP_NEVER);
1402 if (!(term->repeats.subtype == STAR && term->repeats.number == 1)) {
1403 tfputs(output, "continue;\n}\n");
1404 generateSetPushPop(output, decisionSet, DIR_POP);
1405 tfputs(output, "break;\n");
1406 } else {
1407 tfputs(output, "}\n");
1408 }
1409
1410 if (term->flags & TERM_RCONFLICT)
1411 deleteSet(conflicts);
1412 } else {
1413 termGenerateCode(output, term, true, term->repeats.number > 1 ? POP_CONDITIONAL : POP_ALWAYS);
1414 }
1415 /* Write the closing braces (if there are any). Write a \n
1416 after the last one. */
1417 for (; braces > 1; braces--)
1418 tfputc(output, '}');
1419 if (braces)
1420 tfputs(output, "}\n");
1421 }
1422
1423 /** Write code for an alternative.
1424 @param output The @a File to write to.
1425 @param alternative The @a Alternative to write code for.
1426 @param needsFinalRead Boolean to indicate whether the alternative should
1427 end with a LLread call.
1428 */
alternativeGenerateCode(File * output,Alternative * alternative,bool needsFinalRead)1429 static void alternativeGenerateCode(File *output, Alternative *alternative, bool needsFinalRead) {
1430 int i;
1431 GrammarPart *grammarPart;
1432 bool needsRead = false;
1433
1434 bool needsPushPop = false;
1435
1436 /* Push contains sets for all parts, although refrain from pushing for the
1437 first part. TERMs and non-terminals with multiple alternatives are
1438 exceptions in this respect because they always need to be pushed. */
1439 for (i = 0; i < listSize(alternative->parts); i++) {
1440 grammarPart = (GrammarPart *) listIndex(alternative->parts, i);
1441 if (needsPushPop) {
1442 /* Don't push TERMs here, as they will be pushed outside this if */
1443 if (grammarPart->subtype & (PART_TERMINAL|PART_LITERAL))
1444 tfprintf(output, "LLtcnt[%d]++;\n", grammarPart->uTerminal.terminal);
1445 else if (grammarPart->subtype == PART_NONTERMINAL)
1446 generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_PUSH);
1447 } else if (grammarPart->subtype != PART_ACTION) {
1448 needsPushPop = true;
1449 /* This is here mainly for efficiency reasons. There are two
1450 options:
1451 - add an LLincr to the start of each rule, which may result in
1452 an LLdecr in the calling rule followed by an LLincr in the rule
1453 - always do the LLincr in the calling rule, thereby avoiding
1454 possible LLdecr's */
1455 if (grammarPart->subtype == PART_NONTERMINAL && (grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE))
1456 generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_PUSH);
1457 }
1458 /* TERMs alway need to be pushed due to the possibility of skipping
1459 tokens. We don't want to skip anything in the TERMs contains set. */
1460 if (grammarPart->subtype == PART_TERM)
1461 generateSetPushPop(output, grammarPart->uTerm.contains, DIR_PUSH);
1462 }
1463
1464 needsPushPop = false;
1465 for (i = 0; i < listSize(alternative->parts); i++) {
1466 grammarPart = (GrammarPart *) listIndex(alternative->parts, i);
1467 switch (grammarPart->subtype) {
1468 case PART_ACTION:
1469 outputCode(output, grammarPart->uAction, true);
1470 break;
1471 case PART_TERMINAL:
1472 case PART_LITERAL:
1473 if (needsRead)
1474 tfputs(output, "LLread();\n");
1475 if (needsPushPop)
1476 tfprintf(output, "LLtcnt[%d]--;\n", grammarPart->uTerminal.terminal);
1477
1478 if (grammarPart->uTerminal.terminal == 0)
1479 tfputs(output, "\tif (LLcsymb != 0) LLerror(EOFILE);\n");
1480 else if (condensedToTerminal[grammarPart->uTerminal.terminal].flags & CONDENSED_ISTOKEN)
1481 tfprintf(output, "LL_SCANDONE(%d);", condensedToTerminal[grammarPart->uTerminal.terminal].uToken->uNumber);
1482 else
1483 tfprintf(output, "LL_SCANDONE(%d);", condensedToTerminal[grammarPart->uTerminal.terminal].uLiteral);
1484
1485 tfprintTokenComment(output, grammarPart->uTerminal.terminal);
1486 needsRead = true;
1487 break;
1488 case PART_NONTERMINAL:
1489 if (needsRead)
1490 tfputs(output, "LLread();\n");
1491 /* As explained above (where sets are pushed), non-terminals
1492 with multiple alternatives never need their contains sets
1493 decremented here. */
1494 if (needsPushPop && !(grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE))
1495 generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_POP);
1496
1497 if (!(grammarPart->uNonTerminal.flags & CALL_LLDISCARD)) {
1498 if (grammarPart->uNonTerminal.retvalIdent)
1499 tfprintf(output, "%s = ", grammarPart->uNonTerminal.retvalIdent->text);
1500 else if (grammarPart->uNonTerminal.nonTerminal->retvalIdent)
1501 tfprintf(output, "%s = ", grammarPart->token->text);
1502 }
1503
1504 tfprintf(output, "%s%d_%s", prefix, grammarPart->uNonTerminal.nonTerminal->number, grammarPart->token->text);
1505 /* Thread-safe parsers require LLthis as the first argument.
1506 Therefore we need to handle call generation for thread-safe
1507 parsers quite differently. */
1508 if (!option.threadSafe) {
1509 if (grammarPart->uNonTerminal.expression != NULL) {
1510 tfputc(output, '\n');
1511 outputCode(output, grammarPart->uNonTerminal.expression, true);
1512 tfputs(output, ";\n");
1513 } else {
1514 tfputs(output, "();\n");
1515 }
1516 } else {
1517 if (grammarPart->uNonTerminal.expression != NULL) {
1518 tfputs(output, "(LLthis");
1519 if (grammarPart->uNonTerminal.nonTerminal->argCount > 0)
1520 tfputc(output, ',');
1521 tfputc(output, '\n');
1522 outputCode(output, grammarPart->uNonTerminal.expression, false);
1523 tfputs(output, ");\n");
1524 } else {
1525 tfprintf(output, "(LLthis);\n");
1526 }
1527 }
1528 needsRead = grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_NO_FINAL_READ;
1529 break;
1530 case PART_TERM:
1531 if (needsRead)
1532 tfputs(output, "LLread();\n");
1533
1534 if (grammarPart->uTerm.repeats.subtype == FINOPT)
1535 finoptGenerateCode(output, &grammarPart->uTerm);
1536 else
1537 termGenerateRepeatsCode(output, &grammarPart->uTerm);
1538
1539 needsRead = grammarPart->uTerm.flags & TERM_NO_FINAL_READ;
1540 break;
1541 default:
1542 PANIC();
1543 }
1544 /* If the part didn't require a pop and it isn't an action, then the
1545 next part does need a pop. */
1546 if (!needsPushPop && grammarPart->subtype != PART_ACTION)
1547 needsPushPop = true;
1548 }
1549 if (needsRead && needsFinalRead)
1550 tfputs(output, "LLread();\n");
1551 }
1552
1553 /** Generate code for a variable to hold return values.
1554 @param output The @a File to write to.
1555 @param retvalIdent The type of the variable.
1556 @param name The name of the variable.
1557 */
generateReturnVariable(File * output,Token * retvalIdent,const char * name)1558 static void generateReturnVariable(File *output, Token *retvalIdent, const char *name) {
1559 tfprintDirective(output, retvalIdent);
1560 tfputs(output, retvalIdent->text);
1561 tfputc(output, '\n');
1562 tfprintDirective(output, NULL);
1563 tfprintf(output, "%s;\n", name);
1564 }
1565
1566 /** Iterator routine to generate variables for return values.
1567 @param key The name of the variable.
1568 @param data The first @a grammarPart that names this variable.
1569 @param userData The @a File to write to.
1570 */
generateVariable(const char * key,void * data,void * userData)1571 static void generateVariable(const char *key, void *data, void *userData) {
1572 GrammarPart *grammarPart = (GrammarPart *) data;
1573
1574 generateReturnVariable((File *) userData, grammarPart->uNonTerminal.nonTerminal->retvalIdent, key);
1575 }
1576
1577 /** Iterator routine to initialise generated variables for return values.
1578 @param key The name of the variable.
1579 @param data The first @a grammarPart that names this variable (unused).
1580 @param userData The @a File to write to.
1581 */
generateVariableInit(const char * key,void * data,void * userData)1582 static void generateVariableInit(const char *key, void *data, void *userData) {
1583 /* Parameter data is unused in this function, so make the compiler shut up. */
1584 (void) data;
1585
1586 tfprintf((File *) userData, "memset(&%s, 0, sizeof(%s));\n", key, key);
1587 }
1588
1589 /** Write code for a non-terminal.
1590 @param output The @a File to write to.
1591 @param nonTerminal The @a NonTerminal to write code for.
1592 */
generateNonTerminalCode(File * output,NonTerminal * nonTerminal)1593 void generateNonTerminalCode(File *output, NonTerminal *nonTerminal) {
1594 bool memsetsGenerated = false;
1595
1596 /* Don't generate code for unreachable non-terminals */
1597 if (!(nonTerminal->flags & NONTERMINAL_REACHABLE))
1598 return;
1599
1600 labelCounter = 0;
1601
1602 /* Generate the header for the function, with parameters if it has them */
1603 generateNonTerminalName(output, nonTerminal);
1604 if (!option.threadSafe)
1605 generateNonTerminalParameters(output, nonTerminal);
1606 else
1607 generateNonTerminalParametersTS(output, nonTerminal);
1608
1609 tfprintf(output, "{\n");
1610
1611 /* Declare variables for return values. */
1612 if (nonTerminal->retvalIdent != NULL)
1613 generateReturnVariable(output, nonTerminal->retvalIdent, "LLretval");
1614
1615 iterateScope(nonTerminal->retvalScope, generateVariable, output);
1616
1617 if (!option.noInitLLretval) {
1618 /* Clear LLretval and variables generated to hold return values, and open
1619 a new scope so that the variables defined by the user will be defined
1620 at the start of the scope. */
1621 if (nonTerminal->retvalIdent != NULL) {
1622 tfputs(output, "memset(&LLretval, 0, sizeof(LLretval));\n");
1623 memsetsGenerated = true;
1624 }
1625
1626 if (iterateScope(nonTerminal->retvalScope, generateVariableInit, output))
1627 memsetsGenerated = true;
1628
1629 if (memsetsGenerated)
1630 tfputs(output, "{\n");
1631 }
1632
1633 /* Generate the declarations if available */
1634 if (nonTerminal->declarations != NULL)
1635 outputCode(output, nonTerminal->declarations, false);
1636
1637 termGenerateCode(output, &nonTerminal->term, true, nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE ? POP_ALWAYS : POP_NEVER);
1638
1639 if (memsetsGenerated)
1640 /* Close scope opened after clearing LLretval (if it was opened). */
1641 tfputs(output, "}\n");
1642
1643 if (nonTerminal->retvalIdent != NULL)
1644 tfputs(output, "return LLretval;\n");
1645
1646 tfputs(output, "}\n");
1647 }
1648