1 /* Copyright (C) 2005-2008,2011 G.P. Halkes
2    This program is free software: you can redistribute it and/or modify
3    it under the terms of the GNU General Public License version 3, as
4    published by the Free Software Foundation.
5 
6    This program is distributed in the hope that it will be useful,
7    but WITHOUT ANY WARRANTY; without even the implied warranty of
8    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9    GNU General Public License for more details.
10 
11    You should have received a copy of the GNU General Public License
12    along with this program.  If not, see <http://www.gnu.org/licenses/>.
13 */
14 
15 #include <string.h>
16 #include <ctype.h>
17 #include <stdlib.h>
18 
19 #include "generate.h"
20 #include "nonRuleAnalysis.h"
21 #include "bool.h"
22 #include "list.h"
23 #include "option.h"
24 #include "globals.h"
25 #include "io.h"
26 
27 /** Construct the name of an output file, from the name of an input file.
28 	@param originalName The name of the input file.
29 	@param extension The extension of the output file.
30 
31 	The file constructed will be the originalName with a trailing '.g'
32 	removed, after which the extension is added.
33 */
createNameWithExtension(const char * originalName,const char * extension)34 char *createNameWithExtension(const char *originalName, const char *extension) {
35 	bool addExtension;
36 	char *fullName;
37 	size_t length, extensionLength;
38 
39 	extensionLength = strlen(extension) + 1;
40 
41 	length = strlen(originalName);
42 
43 	/* Check for '.g' */
44 	addExtension = !(length >= 2 && originalName[length - 1] == 'g' && originalName[length - 2] == '.');
45 	length += extensionLength;
46 
47 	if (!addExtension)
48 		length -= 2;
49 
50 	/* Add 1 for the 0 byte */
51 	fullName = (char *) safeMalloc(length + 1, "openOutputs");
52 	strcpy(fullName, originalName);
53 
54 	if (addExtension)
55 		fullName[length - extensionLength] = '.';
56 
57 	strcpy(fullName + length - extensionLength + 1, extension);
58 	return fullName;
59 }
60 
61 /** Write a code fragment that was read from the input to the output.
62 	@param output The @a File to write to.
63 	@param code The code to write.
64 	@param keepMarkers Boolean to indicate whether to keep the parentheses or
65 		braces that delimited the input.
66 */
outputCode(File * output,CCode * code,bool keepMarkers)67 void outputCode(File *output, CCode *code, bool keepMarkers) {
68 	tfprintDirective(output, code);
69 
70 	if (keepMarkers) {
71 		tfputs(output, code->text);
72 	} else {
73 		size_t length;
74 		length = strlen(code->text);
75 
76 		tfputsn(output, code->text + 1, length - 2);
77 	}
78 	tfputc(output, '\n');
79 
80 	tfprintDirective(output, NULL);
81 }
82 
83 /*================== Code to generate the header file ========================*/
84 /** Write the #define's for the tokens.
85 	@param output The @a File to write to.
86 */
generateTokenDeclarations(File * output)87 static void generateTokenDeclarations(File *output) {
88 	int i;
89 	Declaration *declaration;
90 	Directive *directive;
91 
92 	for (i = 0; i < listSize(declarations); i++) {
93 		declaration = (Declaration *) listIndex(declarations, i);
94 		if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == TOKEN_DIRECTIVE) {
95 			directive = declaration->uDirective;
96 			tfprintf(output, "#define %s %d\n", directive->token[0]->text, directive->uNumber);
97 		}
98 	}
99 }
100 
generateParserHeader(File * output,Directive * directive)101 void generateParserHeader(File *output, Directive *directive) {
102 	Declaration *startRule;
103 
104 	tfprintf(output, "%s %s(", option.abort ? "int" : "void", directive->token[0]->text);
105 
106 	startRule = (Declaration *) lookup(globalScope, directive->token[1]->text);
107 	ASSERT(startRule != NULL);
108 
109 	if (option.threadSafe) {
110 		tfprintf(output, "%s LLuserData", userDataTypeTS);
111 		if (startRule->uNonTerminal->retvalIdent != NULL)
112 			tfprintf(output, ", %s *LLretval", startRule->uNonTerminal->retvalIdent->text);
113 	} else if (startRule->uNonTerminal->retvalIdent != NULL) {
114 		tfprintf(output, "%s *LLretval", startRule->uNonTerminal->retvalIdent->text);
115 	} else {
116 		tfputs(output, "void");
117 	}
118 
119 	tfputs(output, ")");
120 }
121 
122 /** Write a prototype for each parser.
123 	@param output The @a File to write to.
124 */
generateParserDeclarations(File * output)125 void generateParserDeclarations(File *output) {
126 	int i;
127 	Declaration *declaration;
128 	Directive *directive;
129 
130 	for (i = 0; i < listSize(declarations); i++) {
131 		declaration = (Declaration *) listIndex(declarations, i);
132 		if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == START_DIRECTIVE) {
133 			directive = declaration->uDirective;
134 			generateParserHeader(output, directive);
135 			tfputs(output, ";\n");
136 		}
137 	}
138 }
139 
140 /** Write the top of a header file.
141 	@param output The @a File to write to.
142 */
generateHeaderFileTop(File * output)143 void generateHeaderFileTop(File *output) {
144 	/* Include guard. */
145 	tfprintf(output, "#ifndef  __LLNEXTGEN_%s_H__\n#define __LLNEXTGEN_%s_H__\n", prefix, prefix);
146 
147 	tfputs(output, "#ifndef LL_NOTOKENS\n");
148 	tfputs(output, "#define EOFILE 256\n");
149 	generateTokenDeclarations(output);
150 	tfputs(output, "#endif\n");
151 	/* Add some defines in the way that the prefix directive has effect. */
152 	tfprintf(output, "#define %s_MAXTOKNO %d\n", prefix, maxTokenNumber - 1);
153 	tfprintf(output, "#define %s_MISSINGEOF (-1)\n", prefix);
154 	tfprintf(output, "#define %s_DELETE (%d)\n", prefix, option.noEOFZero ? -2 : 0);
155 	tfprintf(output, "#define %s_VERSION " VERSION_HEX "\n", prefix);
156 	if (!option.noLLreissue)
157 		tfprintf(output, "#define %s_NEW_TOKEN (-2)\n", prefix);
158 }
159 /** Write the header file.
160 	@param output The @a File to write to.
161 */
generateHeaderFile(File * output)162 void generateHeaderFile(File *output) {
163 	generateHeaderFileTop(output);
164 
165 	/* LLgen does not declare the parsing routines in the header file,
166 	   but we should. This makes tidy programming so much easer. */
167 	if (!option.noPrototypesHeader) {
168 		generateParserDeclarations(output);
169 		if (option.generateSymbolTable)
170 			tfprintf(output, "const char *%sgetSymbol(int);\n", prefix);
171 		if (option.abort)
172 			tfprintf(output, "void %sabort(int);\n", prefix);
173 		tfprintf(output, "extern int %ssymb;\n", prefix);
174 		if (!option.noLLreissue && !option.generateLexerWrapper)
175 			tfprintf(output, "extern int %sreissue;\n", prefix);
176 	}
177 
178 	/* Include guard. */
179 	tfprintf(output, "#endif\n");
180 }
181 
182 /*=============== Code to generate the first part of a C file ================*/
183 /** Write the set of translations from LL to <prefix> variants.
184 	@param output The @a File to write to.
185 */
generateDefaultPrefixTranslations(File * output)186 void generateDefaultPrefixTranslations(File *output) {
187 	/* Add some defines so that the prefix directive has effect. Note
188 	   that only externally visible symbols need to be renamed. */
189 	tfprintf(output, "#define LLsymb %ssymb\n", prefix);
190 	tfprintf(output, "#define LLmessage %smessage\n", prefix);
191 	if (!option.noLLreissue && !option.generateLexerWrapper)
192 		tfprintf(output, "#define LLreissue %sreissue\n", prefix);
193 	if (option.abort)
194 		tfprintf(output, "#define LLabort %sabort\n", prefix);
195 	if (option.generateSymbolTable)
196 		tfprintf(output, "#define LLgetSymbol %sgetSymbol\n", prefix);
197 	/* We want these available under their LL names in the C file regardless */
198 	tfputs(output, "#define LL_MISSINGEOF (-1)\n");
199 	tfprintf(output, "#define LL_DELETE (%d)\n", option.noEOFZero ? -2 : 0);
200 	tfputs(output, "#define LL_VERSION " VERSION_HEX "\n");
201 	if (!option.noLLreissue)
202 		tfputs(output, "#define LL_NEW_TOKEN (-2)\n");
203 }
204 
205 /** Write the set of declarations and #define's common to all .c files.
206 	@param output The @a File to write to.
207 	@param storageType The storage type ('static', 'extern' or nothing) for
208 		the variables.
209 
210 	Most #define's are simply there to allow use of LL<name> instead of
211 	<prefix><name>. In the case that %prefix isn't used, they are omitted.
212 	Further, #define's are generated for the number of tokens and sets, and
213 	some utility macro's. The variables LLscnt, LLtcnt and LLcsymb are also
214 	declared.
215 */
generateDefaultDeclarations(File * output,const char * storageType)216 static void generateDefaultDeclarations(File *output, const char *storageType) {
217 	size_t setSize;
218 	if (prefixDirective != NULL) {
219 		generateDefaultPrefixTranslations(output);
220 		if (option.LLgenStyleOutputs) {
221 			/* In the case of LLgen style outputs, we need to do renaming of
222 			   these as well, as these symbols are not static as they are for
223 			   LLnextgen style outputs. */
224 			tfprintf(output, "#define LLread %sread\n", prefix);
225 			tfprintf(output, "#define LLskip %sskip\n", prefix);
226 			tfprintf(output, "#define LLfirst %sfirst\n", prefix);
227 			tfprintf(output, "#define LLerror %serror\n", prefix);
228 			tfprintf(output, "#define LLsets %ssets\n", prefix);
229 			tfprintf(output, "#define LLindex %sindex\n", prefix);
230 			tfprintf(output, "#define LLcsymb %scsymb\n", prefix);
231 			tfprintf(output, "#define LLscnt %sscnt\n", prefix);
232 			tfprintf(output, "#define LLtcnt %stcnt\n", prefix);
233 		}
234 	}
235 
236 	/* Define the number of tokens and the sets */
237 	tfprintf(output, "#define LL_NTERMINALS %d\n", condensedNumber);
238 	/* The number of sets is the number bytes in a set. Because we start from
239 	   int's, we round up the needed number of bytes to a multiple of the size
240 	   of an int. This makes it easier to generate the sets. */
241 	setSize = ((condensedNumber + sizeof(int) * 8 - 1) / (sizeof(int)*8)) * sizeof(int);
242 	tfprintf(output, "#define LL_NSETS %d\n#define LL_SSETS %u\n", numberOfSets, (unsigned int) setSize);
243 	/* Define LLinset macro. Note that any decent compiler will
244 	   replace the division with a shift */
245 	tfprintf(output, "#define LLinset(LLx) (LLsets[LLx*LL_SSETS + (LLcsymb/8)] & (1<<(LLcsymb & 7)))\n");
246 	tfputs(output, "#define LL_SCANDONE(LLx) if (LLsymb != LLx) LLerror(LLx);\n");
247 
248 	/* For reentrant scanners, we don't use the global variables as arrays,
249 	   but as pointers to stack arrays. This saves us global memory space and
250 	   memory copying. */
251 	if (!option.reentrant)
252 		/* Add one the the number of sets, to make sure we don't end up with an
253 		   array of size 0. */
254 		tfprintf(output, "%sint LLscnt[LL_NSETS+1], LLtcnt[LL_NTERMINALS];\n", storageType);
255 	else
256 		tfprintf(output, "%sint *LLscnt, *LLtcnt;\n", storageType);
257 
258 	tfprintf(output, "%sint LLcsymb;\n", storageType);
259 }
260 
261 /** Write the header of an LLgen style .c file.
262 	@param output The @a File to write to.
263 	@param headerName The name of the header file to include.
264 
265 	These declarations are only necessary for LLgen style .c files. The
266 	LLnextgen style .c file includes the definitions, so these declarations
267 	are superfluous.
268 */
generateDefaultLocalDeclarations(File * output,const char * headerName)269 void generateDefaultLocalDeclarations(File *output, const char *headerName) {
270 	/* Include our header file */
271 	tfprintf(output, "#include \"%s\"\n", headerName);
272 	generateDefaultDeclarations(output, STORAGE_EXTERN);
273 
274 	/* LLsets and LLindex are defined here different from the rest because
275 	   they need to be initialised when the storage class is not extern */
276 	tfputs(output, "extern const char LLsets[];\n");
277 	tfputs(output, "extern const int LLindex[];\n");
278 	/* LLsymb is defined here because it needs to be extern or global, and
279 	   never static (not even for LLnextgen outputs). This is because a
280 	   user can supply a LLmessage routine outside of the generated files and
281 	   in LLmessage the user needs LLsymb. */
282 	tfputs(output, "extern int LLsymb;\n");
283 	/* For LLreissue a similar reasoning holds as for LLsymb, only based on
284 	   the lexer(-wrapper). */
285 	if (!option.noLLreissue && !option.generateLexerWrapper)
286 		tfprintf(output, "extern int LLreissue;\n");
287 
288 	/* These functions are here because we don't need these headers at all
289 	   when generating LLnextgen style outputs. */
290 	tfputs(output, "void LLread(void);\n");
291 	tfputs(output, "int LLskip(void);\n");
292 	if (hasFirst)
293 		tfputs(output, "int LLfirst(int LLtoken, int LLset);\n");
294 	tfputs(output, "void LLerror(int LLtoken);\n");
295 }
296 
297 /** Write code for a directive.
298 	@param output The @a File to write to.
299 	@param directive The directive to write code for.
300 
301 	At this point the only directive that needs code written here is the
302 	%first directive. The other directives apart from %start are integrated
303 	in the code. The %start directive is not generated with this routine, as
304 	for LLgen we need to generate the parsers only in Lpars.c and not for each
305 	.c file.
306 */
generateDirectiveDeclaration(File * output,Directive * directive)307 static void generateDirectiveDeclaration(File *output, Directive *directive) {
308 	switch (directive->subtype) {
309 		case START_DIRECTIVE:
310 		case TOKEN_DIRECTIVE:
311  		case LEXICAL_DIRECTIVE:
312 		case PREFIX_DIRECTIVE:
313 		case LABEL_DIRECTIVE:
314 		case DATATYPE_DIRECTIVE:
315 			/* Generated elsewhere */
316 			break;
317 		case FIRST_DIRECTIVE: {
318 			Declaration *declaration;
319 			NonTerminal *nonTerminal;
320 			int fill;
321 
322 			declaration = (Declaration *) lookup(globalScope, directive->token[1]->text);
323 			ASSERT(declaration != NULL && declaration->subtype == NONTERMINAL);
324 			nonTerminal = declaration->uNonTerminal;
325 
326 			fill = setFill(nonTerminal->term.first);
327 			if (fill > 1) {
328 				tfprintf(output, "#define %s(LLx) LLfirst(LLx, %d)\n", directive->token[0]->text, setFindIndex(nonTerminal->term.first, false));
329 			} else if (fill == 1) {
330 				int token = setFirstToken(nonTerminal->term.first);
331 				if (condensedToTerminal[token].flags & CONDENSED_ISTOKEN)
332 					token = condensedToTerminal[token].uToken->uNumber;
333 				else
334 					token = condensedToTerminal[token].uLiteral;
335 				tfprintf(output, "#define %s(LLx) (LLx == %d)\n", directive->token[0]->text, token);
336 			} else {
337 				tfprintf(output, "#define %s(LLx) 0\n", directive->token[0]->text);
338 			}
339 			break;
340 		}
341 		case ONERROR_DIRECTIVE:
342 			/* NOTE: not supported yet */
343 			break;
344 		default:
345 			PANIC();
346 	}
347 
348 }
349 
350 /** Write code for all directives.
351 	@param output The @a File to write to.
352 */
generateDirectiveDeclarations(File * output)353 void generateDirectiveDeclarations(File *output) {
354 	int i;
355 	Declaration *declaration;
356 
357 	for (i = 0; i < listSize(declarations); i++) {
358 		declaration = (Declaration *) listIndex(declarations, i);
359 		if (declaration->subtype == DIRECTIVE)
360 			generateDirectiveDeclaration(output, declaration->uDirective);
361 	}
362 }
363 
364 /** Write the name (and return type) of a rule.
365 	@param output The @a File to write to.
366 	@param nonTerminal The rule to write the name for.
367 */
generateNonTerminalName(File * output,NonTerminal * nonTerminal)368 static void generateNonTerminalName(File *output, NonTerminal *nonTerminal) {
369 	if (!option.LLgenStyleOutputs)
370 		tfputs(output, "static ");
371 	tfprintf(output, "%s %s%d_%s", nonTerminal->retvalIdent == NULL ? "void" : nonTerminal->retvalIdent->text, prefix, nonTerminal->number, nonTerminal->token->text);
372 }
373 
374 /** Write the parameters of a rule.
375 	@param output The @a File to write to.
376 	@param nonTerminal The rule to write the parameters for.
377 */
generateNonTerminalParameters(File * output,NonTerminal * nonTerminal)378 static void generateNonTerminalParameters(File *output, NonTerminal *nonTerminal) {
379 	if (nonTerminal->parameters != NULL) {
380 		tfputc(output, '\n');
381 		outputCode(output, nonTerminal->parameters, true);
382 	} else {
383 		tfprintf(output, "(void)");
384 	}
385 }
386 
387 /** Write the prototype for a rule.
388 	@param output The @a File to write to.
389 	@param nonTerminal The rule to write the prototype for.
390 */
generateNonTerminalDeclaration(File * output,NonTerminal * nonTerminal)391 void generateNonTerminalDeclaration(File *output, NonTerminal *nonTerminal) {
392 	/* Don't generate code for unreachable non-terminals */
393 	if (!(nonTerminal->flags & NONTERMINAL_REACHABLE))
394 		return;
395 	/* Generate declarations for non-terminals. LLgen is minimalistic in the
396 	   declarations it generates, but there is not really a reason to do so. */
397 	generateNonTerminalName(output, nonTerminal);
398 	if (!option.threadSafe)
399 		generateNonTerminalParameters(output, nonTerminal);
400 	else
401 		generateNonTerminalParametersTS(output, nonTerminal);
402 	tfprintf(output, ";\n");
403 }
404 
405 /** Write the prototypes for all the rules.
406 	@param output The @a File to write to.
407 */
generatePrototypes(File * output)408 void generatePrototypes(File *output) {
409 	int i;
410 	Declaration *declaration;
411 
412 	/* Note: unfortunately this can't be done with walkRules, because
413 		generateNonTerminalDeclaration needs the file to output to */
414 	for (i = 0; i < listSize(declarations); i++) {
415 		declaration = (Declaration *) listIndex(declarations, i);
416 		if (declaration->subtype == NONTERMINAL)
417 			generateNonTerminalDeclaration(output, declaration->uNonTerminal);
418 	}
419 }
420 
421 
422 /*=============== Code to generate the first part of LLnextgen C file or Lpars.c ================*/
423 static const char *defaultSymbolTable[] = { "\"<EOF>\"", /* For converting -1 */
424 	/* NOTE: the EOF name for -1 includes the quotes, to make it easier to write
425 	   the code for overriding the default name. */
426 	"<EOF>", "<SOH>", "<STX>", "<ETX>", "<EOT>", "<ENQ>", "<ACK>", "<BEL>",
427 	"<BS>", "<TAB>", "<NL>", "<VT>", "<FF>", "<CR>", "<SO>", "<SI>",
428 	"<DLE>", "<DC1>", "<DC2>", "<DC3>", "<DC4>", "<NAK>", "<SYN>", "<ETB>",
429 	"<CAN>", "<EM>", "<SUB>", "<ESC>", "<FS>", "<GS>", "<RS>", "<US>",
430 	"<SP>", "'!'", "'\\\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
431 	"'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
432 	"'0'", "'1'", "'2'", "'3'", "'4'", "'5'","'6'", "'7'",
433 	"'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
434 	"'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
435 	"'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
436 	"'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
437 	"'X'", "'Y'", "'Z'", "'['", "'\\\\'", "']'", "'^'", "'_'",
438 	"'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
439 	"'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
440 	"'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
441 	"'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "<DEL>",
442 	"'\\x80'", "'\\x81'", "'\\x82'", "'\\x83'", "'\\x84'", "'\\x85'", "'\\x86'", "'\\x87'",
443 	"'\\x88'", "'\\x89'", "'\\x8A'", "'\\x8B'", "'\\x8C'", "'\\x8D'", "'\\x8E'", "'\\x8F'",
444 	"'\\x90'", "'\\x91'", "'\\x92'", "'\\x93'", "'\\x94'", "'\\x95'", "'\\x96'", "'\\x97'",
445 	"'\\x98'", "'\\x99'", "'\\x9A'", "'\\x9B'", "'\\x9C'", "'\\x9D'", "'\\x9E'", "'\\x9F'",
446 	"'\\xA0'", "'\\xA1'", "'\\xA2'", "'\\xA3'", "'\\xA4'", "'\\xA5'", "'\\xA6'", "'\\xA7'",
447 	"'\\xA8'", "'\\xA9'", "'\\xAA'", "'\\xAB'", "'\\xAC'", "'\\xAD'", "'\\xAE'", "'\\xAF'",
448 	"'\\xB0'", "'\\xB1'", "'\\xB2'", "'\\xB3'", "'\\xB4'", "'\\xB5'", "'\\xB6'", "'\\xB7'",
449 	"'\\xB8'", "'\\xB9'", "'\\xBA'", "'\\xBB'", "'\\xBC'", "'\\xBD'", "'\\xBE'", "'\\xBF'",
450 	"'\\xC0'", "'\\xC1'", "'\\xC2'", "'\\xC3'", "'\\xC4'", "'\\xC5'", "'\\xC6'", "'\\xC7'",
451 	"'\\xC8'", "'\\xC9'", "'\\xCA'", "'\\xCB'", "'\\xCC'", "'\\xCD'", "'\\xCE'", "'\\xCF'",
452 	"'\\xD0'", "'\\xD1'", "'\\xD2'", "'\\xD3'", "'\\xD4'", "'\\xD5'", "'\\xD6'", "'\\xD7'",
453 	"'\\xD8'", "'\\xD9'", "'\\xDA'", "'\\xDB'", "'\\xDC'", "'\\xDD'", "'\\xDE'", "'\\xDF'",
454 	"'\\xE0'", "'\\xE1'", "'\\xE2'", "'\\xE3'", "'\\xE4'", "'\\xE5'", "'\\xE6'", "'\\xE7'",
455 	"'\\xE8'", "'\\xE9'", "'\\xEA'", "'\\xEB'", "'\\xEC'", "'\\xED'", "'\\xEE'", "'\\xEF'",
456 	"'\\xF0'", "'\\xF1'", "'\\xF2'", "'\\xF3'", "'\\xF4'", "'\\xF5'", "'\\xF6'", "'\\xF7'",
457 	"'\\xF8'", "'\\xF9'", "'\\xFA'", "'\\xFB'", "'\\xFC'", "'\\xFD'", "'\\xFE'", "'\\xFF'" };
458 
459 /** Header for regualar lexer wrapper. */
460 static const char *lexerWrapperHeader = "int LLlexerWrapper(void) {\n";
461 
462 /** The code for the lexer wrapper, with a %s in it as a place holder for the
463 	real lexer. */
464 const char *lexerWrapperString =
465 		"\tif (LLreissue == %s) {\n"
466 			"\t\treturn %s(%s);\n"
467 		"\t} else {\n"
468 			"\t\tint LLretval = LLreissue;\n"
469 			"\t\tLLreissue = %s;\n"
470 			"\t\treturn LLretval;\n"
471 		"\t}\n"
472 	"}\n";
473 
474 /** Header for regular llmessage. */
475 static const char *llmessageHeader = "void LLmessage(int LLtoken) {\n";
476 
477 /* Note: this string is too long to be one string (at least according to the
478    ANSI C standard). That is why it is an array of two strings. */
479 /** The code for the default LLmessage.
480 
481 	NOTE: DON'T use printf to write this string. It contains fprintf
482 	statements with format strings that are not escaped.
483 */
484 const char *llmessageString[2] = {
485 		"\tswitch (LLtoken) {\n"
486 			"\t\tcase LL_MISSINGEOF:\n"
487 				"\t\t\tfprintf(stderr, \"Expected %s, found %s. Skipping.\\n\", LLgetSymbol(EOFILE), LLgetSymbol(LLsymb));\n"
488 				"\t\t\tbreak;\n", /* Note the comma, to break it into two strings. */
489 			"\t\tcase LL_DELETE:\n"
490 				"\t\t\tfprintf(stderr, \"Skipping unexpected %s.\\n\", LLgetSymbol(LLsymb));\n"
491 				"\t\t\tbreak;\n"
492 			"\t\tdefault:\n"
493 				"\t\t\tfprintf(stderr, \"Expected %s, found %s. Inserting.\\n\", LLgetSymbol(LLtoken), LLgetSymbol(LLsymb));\n"
494 				"\t\t\tbreak;\n"
495 		"\t}\n"
496 	"}\n"};
497 
498 /** Write the conversion tables used in the parser to file.
499 	@param output The @a File to write to.
500 	@param storageType The storage type ('static', 'extern' or nothing) for
501 		the variables.
502 
503 	These tables can be used both for thread-safe and regular parsers, as no
504 	changes to them are made.
505 */
generateConversionTables(File * output,const char * storageType)506 void generateConversionTables(File *output, const char *storageType) {
507 	size_t j, k, setSize;
508 	int i;
509 	/* Generate the table with token sets. To allow generating code on a
510 	   machine with for example 4 byte words for a machine with 2 byte words
511 	   we use char's here instead of int's. */
512 	setSize = (condensedNumber + sizeof(int) * 8 - 1) / (sizeof(int)*8);
513 	tfprintf(output, "%sconst char LLsets[] = {\n", storageType);
514 	for (i = 0; i < numberOfSets; i++) {
515 		for (j = 0; j < setSize; j++)
516 			for (k = 0; k < sizeof(int); k++)
517 				tfprintf(output, "\t'\\x%02X', ", (setList[i].bits[j] >> (k*8)) & 0xff);
518 		tfputc(output, '\n');
519 	}
520 	tfprintf(output, "\t0\n};\n");
521 
522 	/* Output token number conversion table */
523 	tfprintf(output, "%sconst int LLindex[] = { 0", storageType);
524 	for (i = 0; i < maxTokenNumber; i++) {
525 		if ((i % 8) == 0)
526 			tfprintf(output, ",\n\t");
527 		else
528 			tfprintf(output, ", ");
529 		tfprintf(output, "%4d", terminalToCondensed[i]);
530 	}
531 	tfprintf(output, "};\n");
532 }
533 
534 /** Write a symbol table and the LLgetSymbol routine.
535 	@param output The @a File to write to.
536 */
generateSymbolTable(File * output)537 void generateSymbolTable(File *output) {
538 	Declaration *declaration;
539 	const char *eofLabel = NULL;
540 	size_t j;
541 	int i;
542 
543 	if (option.noEOFZero)
544 		defaultSymbolTable[1] = "<NUL>";
545 
546 	/* use the EOFILE for -1 and also for 0 if !option.noEOFZero */
547 	declaration = (Declaration *) lookup(globalScope, "EOFILE");
548 	ASSERT(declaration != NULL && declaration->subtype == DIRECTIVE);
549 	if (declaration->uDirective->token[1] != NULL) {
550 		eofLabel = declaration->uDirective->token[1]->text;
551 		if (!option.noEOFZero)
552 			literalLabels[0] = declaration->uDirective->token[1];
553 	}
554 
555 	/* Write the first part of the symbol table, using the user supplied
556 	   symbolnames if available. */
557 	/* NOTE: The EOF names already include the quotes. */
558 	tfputs(output, "static const char *LLsymbolTable[] = {\n");
559 
560 	/* Gettext-noop macro definition. Note: we do this AFTER the definition of
561 	   the variable, so that we avoid any chance of name collisions. */
562 	if (option.gettext)
563 		tfprintf(output, "#define %s(LLx) LLx\n", option.gettextMacro);
564 
565 	if (option.gettext && eofLabel)
566 		tfprintf(output, "%s(", option.gettextMacro);
567 	tfputs(output, eofLabel == NULL ? defaultSymbolTable[0] : eofLabel);
568 	tfputs(output, option.gettext && eofLabel ? "), " : ", ");
569 
570 	for (i = 0; i < 256; i++) {
571 		if ((i % 4) == 0)
572 			tfputc(output, '\n');
573 		if (literalLabels[i] == NULL) {
574 			tfprintf(output, "\"%s\", ", defaultSymbolTable[i + 1]);
575 		} else {
576 			if (option.gettext)
577 				tfprintf(output, "%s(", option.gettextMacro);
578 			tfprintf(output, "%s", literalLabels[i]->text);
579 			tfputs(output, option.gettext ? "), " : ", ");
580 		}
581 	}
582 	tfputc(output, '\n');
583 	/* NOTE: The EOF names already include the quotes. */
584 	if (option.gettext && eofLabel)
585 		tfprintf(output, "%s(", option.gettextMacro);
586 	tfputs(output, eofLabel == NULL ? defaultSymbolTable[0] : eofLabel);
587 	if (option.gettext && eofLabel)
588 		tfputs(output, ")");
589 
590 	/* Write the symbol names for the %token defined tokens. */
591 	for (i = 257; i < maxTokenNumber; i++) {
592 		Directive *directive;
593 		tfputs(output, ", ");
594 
595 		if ((i % 4) == 0)
596 			tfputc(output, '\n');
597 		directive = condensedToTerminal[terminalToCondensed[i]].uToken;
598 		/* If no %label has been specified, create the name from the
599 		   token name itself. */
600 		if (directive->token[1] == NULL) {
601 			char *text;
602 			if (option.lowercaseSymbols) {
603 				size_t length;
604 				text = safeStrdup(directive->token[0]->text, "generateDefaultGlobalDeclarations");
605 				length = strlen(text);
606 				for (j = 0; j < length; j++)
607 					text[j] = (char) tolower(text[j]);
608 			} else {
609 				text = directive->token[0]->text;
610 			}
611 
612 			tfprintf(output, "\"%s\"", text);
613 
614 			if (option.lowercaseSymbols)
615 				free(text);
616 		} else {
617 			if (option.gettext)
618 				tfprintf(output, "%s(", option.gettextMacro);
619 			tfputs(output, directive->token[1]->text);
620 			if (option.gettext)
621 				tfputc(output, ')');
622 		}
623 	}
624 	tfputs(output, "};\n");
625 
626 	/* Make sure the macro doesn't interfere with our code (or the user's). */
627 	if (option.gettext)
628 		tfprintf(output, "#undef %s\n#ifdef %s\nchar *gettext(const char *);\n#else\n#define gettext(LLx) LLx\n#endif\n", option.gettextMacro, option.gettextGuard);
629 
630 	tfprintf(output,
631 		"const char *LLgetSymbol(int LLtoken) {\n"
632 			"\tif (LLtoken < -1 || LLtoken > %d /* == LL_MAXTOKNO */)\n"
633 				"\t\treturn (char *) 0;\n", maxTokenNumber - 1);
634 	if (option.gettext)
635 		tfputs(output, "\treturn gettext(LLsymbolTable[LLtoken+1]);\n");
636 	else
637 		tfputs(output, "\treturn LLsymbolTable[LLtoken+1];\n");
638 	tfputs(output, "}\n");
639 
640 	if (option.gettext)
641 		tfprintf(output, "#ifndef %s\n#undef gettext\n#endif\n", option.gettextGuard);
642 }
643 
644 /** Write code to define all internal variables and utility functions.
645 	@param output The @a File to write to.
646 	@param headerName The name of the header file to include.
647 
648 	These variables and functions end up in the .c file, or in the case of
649 	LLgen style outputs in the Lpars.c file.
650 */
generateDefaultGlobalDeclarations(File * output,const char * headerName)651 void generateDefaultGlobalDeclarations(File *output, const char *headerName) {
652 	const char *lexer, *lexerInternal = NULL; /* The = NULL is there to shut up the compiler */
653 	const char *storageType = option.LLgenStyleOutputs ? STORAGE_GLOBAL : STORAGE_STATIC;
654 
655 	lexer = lexicalDirective ? lexicalDirective->token[0]->text : "yylex";
656 	if (option.generateLexerWrapper) {
657 		lexerInternal = lexer;
658 		lexer = "LLlexerWrapper";
659 	}
660 
661 	generateDefaultDeclarations(output, storageType);
662 	tfputs(output, "#include <string.h>\n");
663 
664 	generateConversionTables(output, storageType);
665 
666 	/* Because LLmessage and the lexer(-wrapper) can be supplied externally,
667 	   these have to be made global. */
668 	tfputs(output, "int LLsymb;\n");
669 	if (!option.noLLreissue)
670 		tfprintf(output, "%sint LLreissue;\n", option.generateLexerWrapper ? STORAGE_STATIC : STORAGE_GLOBAL);
671 
672 	/* Declare the lexer */
673 	tfprintf(output, "int %s(void);\n", lexerInternal != NULL ? lexerInternal : lexer);
674 
675 	if (option.generateLexerWrapper) {
676 		tfputs(output, "static ");
677 		tfputs(output, lexerWrapperHeader);
678 		tfprintf(output, lexerWrapperString, "-2 /* LL_NEW_TOKEN */", lexerInternal, "", "-2 /* LL_NEW_TOKEN */");
679 	}
680 	/* As we generate our standard LLmessage as a static routine, we need to
681 	   declare it static here as well to prevent compiler warnings. */
682 	if (option.generateLLmessage)
683 		tfputs(output, "static ");
684 	/* Declare LLmessage (or actually <%prefix>message, because of the defines) */
685 	tfputs(output, "void LLmessage(int);\n");
686 
687 	/* Define LLread. For correcting error recovery, it can be really simple.
688 	   As a good compiler will inline this anyway, we don't have to bother
689 	   with a #define. */
690 	tfprintf(output, "%svoid LLread(void) { LLcsymb = LLindex[(LLsymb = %s()) + 1]; }\n", storageType, lexer);
691 
692 	/* Define the error handling routines LLskip and LLerror
693 	   Note: LLskip is spread over two tfprintf statements because otherwise
694 	   the format string would exceed the maximum length for a string constant.
695 	*/
696 	tfprintf(output,
697 		"%sint LLskip(void) {\n"
698 			"\tint LL_i;\n"
699 			"\tif (LLcsymb >= 0) {\n"
700 				"\t\tif (LLtcnt[LLcsymb] != 0) return 0;\n"
701 				"\t\tfor (LL_i = 0; LL_i < LL_NSETS; LL_i++)\n"
702 					"\t\t\tif (LLscnt[LL_i] != 0 && LLinset(LL_i))\n"
703 						"\t\t\t\treturn 0;\n"
704 			"\t}\n\n", storageType);
705 	tfprintf(output,
706 			"\tfor (;;) {\n"
707 				"\t\tLLmessage(%d /* LL_DELETE */);\n"
708 				"\t\twhile ((LLcsymb = LLindex[(LLsymb = %s()) + 1]) < 0) LLmessage(%d /* LL_DELETE */);\n"
709 				"\t\tif (LLtcnt[LLcsymb] != 0)\n"
710 					"\t\t\treturn 1;\n"
711 				"\t\tfor (LL_i = 0; LL_i < LL_NSETS; LL_i++)\n"
712 					"\t\t\tif (LLscnt[LL_i] != 0 && LLinset(LL_i))\n"
713 						"\t\t\t\treturn 1;\n"
714 		"\t}\n}\n", option.noEOFZero ? -2 : 0, lexer, option.noEOFZero ? -2 : 0);
715 
716 	tfprintf(output,
717 		"%svoid LLerror(int LLtoken) {\n"
718 			"\tif (LLtoken == 256 /* EOFILE */) {\n"
719 				"\t\tLLmessage(-1 /* LL_MISSINGEOF */);\n"
720 				"\t\twhile (LLindex[(LLsymb = %s()) + 1] != 0) /*NOTHING*/ ;\n"
721 				"\t\treturn;\n"
722 			"\t}\n"
723 			"\tLLtcnt[LLindex[LLtoken + 1]]++;\n"
724 			"\tLLskip();\n"
725 			"\tLLtcnt[LLindex[LLtoken + 1]]--;\n"
726 			"\tif (LLsymb != LLtoken) {%s LLmessage(LLtoken); }\n"
727 		"}\n", storageType, lexer, option.noLLreissue ? "" : " LLreissue = LLsymb;");
728 
729 	/* Generate the LLfirst routine, but only if a %first directive is used. */
730 	if (hasFirst) {
731 		tfprintf(output,
732 			"%sint LLfirst(int LLtoken, int LLset) {\n"
733 				"\tint LLctoken;\n"
734 				"\treturn (LLtoken >= -1 && LLtoken < %d && (LLctoken = LLindex[LLtoken+1]) >= 0 &&\n"
735 					"\t\t(LLsets[LLset*LL_SSETS + (LLctoken/8)] & (1<<(LLctoken & 7))));\n"
736 			"}\n", storageType, maxTokenNumber);
737 	}
738 
739 	/* Generate the LLabort routine, but only if a the --abort option is used. */
740 	if (option.abort) {
741 		tfputs(output,
742 			"#include <setjmp.h>\n"
743 			"static jmp_buf LLjmp_buf;\n"
744 			"void LLabort(int LLretval) {\n"
745 				"\tlongjmp(LLjmp_buf, LLretval);\n"
746 			"}\n");
747 	}
748 
749 	/* Generate the symbol table and LLgetSymbol routine, but only if a the
750 	   --generateSymbolTable option is used. */
751 	if (option.generateSymbolTable)
752 		generateSymbolTable(output);
753 
754 
755 	if (headerName != NULL) {
756 		/* Include our header file */
757 		tfprintf(output, "#include \"%s\"\n", headerName);
758 	}
759 
760 	if (option.generateLLmessage) {
761 		tfputs(output, "#include <stdio.h>\nstatic ");
762 		tfputs(output, llmessageHeader);
763 		tfputs(output, llmessageString[0]);
764 		tfputs(output, llmessageString[1]);
765 	}
766 }
767 
768 /*============== Code to generate the actual rules and parsers ================*/
769 /*===== General routines =====*/
770 /** Write code snippet for tracking the contains set.
771 	@param output The @a File to write to.
772 	@param tokens The set to push or pop.
773 	@param direction Whether to push or pop.
774 */
generateSetPushPop(File * output,const set tokens,PushDirection direction)775 void generateSetPushPop(File *output, const set tokens, PushDirection direction) {
776 	int size = setFill(tokens);
777 	/* For empty sets, there is nothing we need to do. */
778 	if (size == 0)
779 		return;
780 	/* Take note of the character after LL. */
781 	if (size == 1)
782 		tfprintf(output, "LLtcnt[%d]%s;\n", setFirstToken(tokens), direction == DIR_PUSH ? "++" : "--");
783 	else
784 		tfprintf(output, "LLscnt[%d]%s;\n", setFindIndex(tokens, false), direction == DIR_PUSH ? "++" : "--");
785 }
786 
787 /** Write case labels for all tokens in a set.
788 	@param output The @a File to write to.
789 	@param tokens The set to write case labels for.
790 
791 	The generated code makes extensive use of switch statements. This routine
792 	writes the case labels needed for one set.
793 */
generateCaseLabels(File * output,const set tokens)794 static void generateCaseLabels(File *output, const set tokens) {
795 	int i;
796 	for (i = 0; i < condensedNumber; i++) {
797 		if (setContains(tokens, i)) {
798 			tfprintf(output, "case %d:", i);
799 			tfprintTokenComment(output, i);
800 		}
801 	}
802 }
803 
804 /*===== Code to generate parser =====*/
805 /** Write the code for a parser.
806 	@param output The @a File to write to.
807 	@param directive The %start directive to write code for.
808 */
generateParser(File * output,Directive * directive)809 static void generateParser(File *output, Directive *directive) {
810 	NonTerminal *nonTerminal;
811 
812 	nonTerminal = ((Declaration *) lookup(globalScope, directive->token[1]->text))->uNonTerminal;
813 	if (option.LLgenStyleOutputs)
814 		generateNonTerminalDeclaration(output, nonTerminal);
815 
816 	/* NOTE: as we cannot move this function to before the header file inclusion
817 	   (thread safe parsers will likely have some user defined type as
818 	   argument), symbols created by the user might collide with ours (although
819 	   not for LLgen-style output). Therefore we prefix our vars so we can
820 	   consider it a user problem (don't say we didn't warn you in the manual). */
821 
822 #ifdef DEBUG
823 	/* To prevent warnings about implicit declarations, we add these includes */
824 	tfputs(output, "#include <stdio.h>\n#include <stdlib.h>\n");
825 #endif
826 
827 	generateParserHeader(output, directive);
828 	tfputs(output, " {\n");
829 
830 	if (option.abort)
831 		tfputs(output, "int LLsetjmpRetval;\n");
832 	if (option.reentrant) {
833 		const char *volatileMarker = "";
834 
835 		/* According to the C rationale, after returning to a setjmp call context
836 		   through a longjmp call, the only way to make sure that all local
837 		   variables contain the values present at setjmp call time is to mark
838 		   the variables as volatile. Otherwise the last values written may be
839 		   in a register that is not restored after longjmp. */
840 		if (option.abort)
841 			volatileMarker = "volatile ";
842 
843 		/* For reentrant parsers, the LLtcnt and LLscnt are actually
844 		   arrays on the stack, and the variables are pointers to this
845 		   stack array. */
846 		tfprintf(output, "\t%sint LLcounts[LL_NTERMINALS + LL_NSETS + 2];\n", volatileMarker);
847 		tfprintf(output, "\t%sint *LLbackupLLscnt = LLscnt, *LLbackupLLtcnt = LLtcnt;\n", volatileMarker);
848 		if (!option.noLLreissue)
849 			tfprintf(output, "\t%sint LLbackupLLreissue = LLreissue;\n", volatileMarker);
850 		if (option.abort)
851 			tfprintf(output, "\t%sjmp_buf LLbackupJmp_buf;\n\tmemcpy(&LLbackupJmp_buf, &LLjmp_buf, sizeof(jmp_buf));\n", volatileMarker);
852 		tfputs(output, "\tLLcounts[0] = LLsymb; LLcounts[1] = LLcsymb;\n");
853 		tfputs(output, "\tLLtcnt = LLcounts + 2; LLscnt = LLcounts + 2 + LL_NTERMINALS;\n");
854 	}
855 	/* Clear the contains set tracking variables. */
856 	tfputs(output, "\tmemset(LLscnt, 0, LL_NSETS * sizeof(int));\n");
857 	tfputs(output, "\tmemset(LLtcnt, 0, LL_NTERMINALS * sizeof(int));\n");
858 	/* EOFILE is always in the contains set: */
859 	tfputs(output, "\tLLtcnt[0]++;\n");
860 
861 	tfputs(output, "\tLLsymb = 0;\n");
862 
863 	if (!option.noLLreissue)
864 		tfputs(output, "\tLLreissue = -2 /* LL_NEW_TOKEN */;\n");
865 
866 	if (option.abort)
867 		tfputs(output, "\tLLsetjmpRetval = setjmp(LLjmp_buf);\n\tif (LLsetjmpRetval == 0) {\n");
868 	tfputs(output, "\tLLread();\n");
869 
870 	if (nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE)
871 		generateSetPushPop(output, nonTerminal->term.contains, DIR_PUSH);
872 
873 	tfputc(output, '\t');
874 	if (nonTerminal->retvalIdent != NULL)
875 		tfputs(output, "*LLretval = ");
876 
877 	tfprintf(output, "%s%d_%s();\n", prefix, nonTerminal->number, directive->token[1]->text);
878 	if (nonTerminal->term.flags & TERM_NO_FINAL_READ)
879 		tfputs(output, "\tLLread();\n");
880 	tfputs(output, "\tif (LLcsymb != 0) LLerror(256 /* EOFILE*/);\n");
881 #ifdef DEBUG
882 	tfputs(output, "\tLLtcnt[0]--;\n");
883 	tfputs(output, "{ int LL_i; for(LL_i = 0; LL_i < LL_NTERMINALS; LL_i++) if (LLtcnt[LL_i] != 0) { printf(\"Counts are not all zero!!!!\\n\"); exit(10); }}\n");
884 	tfputs(output, "{ int LL_i; for(LL_i = 0; LL_i < LL_NSETS; LL_i++) if (LLscnt[LL_i] != 0) { printf(\"Counts are not all zero!!!!\\n\"); exit(10); }}\n");
885 #endif
886 	if (option.abort)
887 		tfputs(output, "\t}\n");
888 
889 	if (option.reentrant) {
890 		/* Restore the previous values */
891 		tfputs(output, "\tLLscnt = LLbackupLLscnt; LLtcnt = LLbackupLLtcnt;\n");
892 		tfputs(output, "\tLLsymb = LLcounts[0]; LLcsymb = LLcounts[1];\n");
893 		if (!option.noLLreissue)
894 			tfputs(output, "\tLLreissue = LLbackupLLreissue;\n");
895 		if (option.abort)
896 			tfputs(output, "\tmemcpy(&LLjmp_buf, &LLbackupJmp_buf, sizeof(jmp_buf));\n");
897 	}
898 
899 	if (option.abort)
900 		tfputs(output, "\treturn LLsetjmpRetval;\n");
901 	tfputs(output, "}\n\n");
902 }
903 
904 /** Write code for directives that can only appear at the end of the .c file.
905 	@param output The @a File to write to.
906 */
generateDirectiveCodeAtEnd(File * output)907 void generateDirectiveCodeAtEnd(File *output) {
908 	int i;
909 	Declaration *declaration;
910 
911 	for (i = 0; i < listSize(declarations); i++) {
912 		declaration = (Declaration *) listIndex(declarations, i);
913 		if (declaration->subtype == DIRECTIVE && declaration->uDirective->subtype == START_DIRECTIVE)
914 			generateParser(output, declaration->uDirective);
915 	}
916 }
917 /*===== Code to generate rules =====*/
918 int labelCounter;
919 static void alternativeGenerateCode(File *output, Alternative *alternative, bool needsFinalRead);
920 
921 /** Write the code for the @a Alternatives of a @a Term.
922 	@param output The @a File to write to.
923 	@param term The @a Term to write code for.
924 	@param canSkip Boolean to indicate whether the current token when taking
925 		the switch is not always valid, which may require jumping out of the
926 		switch and take it again after skipping the current token.
927 	@param popType One of POP_NEVER, POP_ALWAYS and POP_CONDITIONAL.
928 		Determines whether the contains set of the term is popped after the
929 		correct alternative has been determined. POP_ALWAYS is used for terms
930 		that don't repeat, POP_CONDITIONAL is used for terms with fixed
931 		repetition counts, while POP_NEVER is used for variable repetition
932 		count terms.
933 */
termGenerateCode(File * output,Term * term,const bool canSkip,PopType popType)934 void termGenerateCode(File *output, Term *term, const bool canSkip, PopType popType) {
935 	int i, j, label = 0; /* The = 0 is there to shut up the compiler */
936 	Alternative *alternative, *otherAlternative;
937 	set conflicts, temporary;
938 	CCode *lastExpression;
939 	int lastLabel;
940 
941 	if (!(term->flags & TERM_MULTIPLE_ALTERNATIVE)) {
942 		/* This is mainly for the case where we have a single alternative that
943 		   has a repetition other than 1. */
944 		if (popType) {
945 			if (popType == POP_CONDITIONAL)
946 				tfputs(output, "if (!LL_i)\n");
947 			generateSetPushPop(output, term->contains, DIR_POP);
948 		}
949 		alternativeGenerateCode(output, (Alternative *) listIndex(term->rule, 0), !(term->flags & TERM_NO_FINAL_READ));
950 		return;
951 	}
952 
953 	/* If the default case can skip the current token, it needs to be able to
954 	   jump out of the switch so that it can chose an alternative again. This
955 	   requires a label. */
956 	if (canSkip) {
957 		label = labelCounter++;
958 		tfprintf(output, "LL_%d:\n", label);
959 	}
960 	tfprintf(output, "switch (LLcsymb) {\n");
961 
962 	conflicts = newSet(condensedNumber);
963 	temporary = newSet(condensedNumber);
964 
965 	for (i = 0; i < listSize(term->rule); i++) {
966 		alternative = (Alternative *) listIndex(term->rule, i);
967 		/* For alternatives that have conflicting tokens, special handling is
968 		   required. */
969 		if (alternative->flags & ALTERNATIVE_IF) {
970 			setClear(conflicts);
971 			for (j = i + 1; j < listSize(term->rule); j++) {
972 				otherAlternative = (Alternative *) listIndex(term->rule, j);
973 				if (!setIntersectionEmpty(alternative->first, otherAlternative->first)) {
974 					setCopy(temporary, alternative->first);
975 					setIntersection(temporary, otherAlternative->first);
976 					setUnion(conflicts, temporary);
977 					if (otherAlternative->label < 0)
978 						otherAlternative->label = labelCounter++;
979 				}
980 			}
981 
982 			/* The %if may have been for tokens that have already been handled
983 			   when processing a previous alternative (also with a %if). So
984 			   the conflicting set may be empty now. */
985 			if (!setEmpty(conflicts)) {
986 				/* Give the alternative a label, if it doesn't already have
987 				   one. */
988 				if (alternative->label < 0)
989 					alternative->label = labelCounter++;
990 				/* Remove conflicting tokens from first set of this alternative,
991 				   to prevent duplicate case labels. */
992 				setMinus(alternative->first, conflicts);
993 
994 				while (!setEmpty(conflicts)) {
995 				/* Steps:
996 					- find a set of tokens that share conflicting alternatives (these can be put in one case)
997 					- generate case labels for that set
998 					- generate a sequence of "if (condition) goto label;" statements, ending with a "goto label;"
999 						for the final alternative.
1000 					- remove conflicting tokens from first set of any affected alternatives
1001 					- repeat until there are no more conflicting tokens
1002 				*/
1003 					setCopy(temporary, conflicts);
1004 					for (j = i + 1; j < listSize(term->rule); j++) {
1005 						otherAlternative = (Alternative *) listIndex(term->rule, j);
1006 						if (!setIntersectionEmpty(temporary, otherAlternative->first)) {
1007 							setIntersection(temporary, otherAlternative->first);
1008 						}
1009 					}
1010 					/* Now we have a set with conflict tokens, which share
1011 					   the same conflicting alternatives. */
1012 					generateCaseLabels(output, temporary);
1013 					lastExpression = alternative->expression;
1014 					lastLabel = alternative->label;
1015 					for (j = i + 1; j < listSize(term->rule); j++) {
1016 						otherAlternative = (Alternative *) listIndex(term->rule, j);
1017 						if (!setIntersectionEmpty(temporary, otherAlternative->first)) {
1018 							setMinus(otherAlternative->first, temporary);
1019 							tfputs(output, "if\n");
1020 							outputCode(output, lastExpression, true);
1021 							tfprintf(output, "goto LL_%d;\n", lastLabel);
1022 							lastExpression = otherAlternative->expression;
1023 							lastLabel = otherAlternative->label;
1024 						}
1025 					}
1026 					tfprintf(output, "goto LL_%d;\n", lastLabel);
1027 					setMinus(conflicts, temporary);
1028 				}
1029 			}
1030 		}
1031 
1032 
1033 		if (alternative->flags & ALTERNATIVE_DEFAULT) {
1034 			tfprintf(output, "default:\n");
1035 			if (canSkip) {
1036 				tfprintf(output, "if (LLskip())\ngoto LL_%d;\n/*FALLTHROUGH*/\n", label);
1037 				generateCaseLabels(output, alternative->first);
1038 			}
1039 		} else {
1040 			/* This may not actually generate any case labels, i.e. if an
1041 			   alternative only has tokens that conflict with other
1042 			   alternatives. The alternative WILL have a regular label then. */
1043 			generateCaseLabels(output, alternative->first);
1044 		}
1045 		if (alternative->label >= 0)
1046 			tfprintf(output, "LL_%d:\n", alternative->label);
1047 		if (popType) {
1048 			/* For use in fixed maximum repetition loops. */
1049 			if (popType == POP_CONDITIONAL)
1050 				tfputs(output, "if (!LL_i)\n");
1051 			generateSetPushPop(output, term->contains, DIR_POP);
1052 		}
1053 		alternativeGenerateCode(output, alternative, !(term->flags & TERM_NO_FINAL_READ));
1054 		tfputs(output, "break;\n");
1055 	}
1056 	tfprintf(output, "}\n");
1057 	deleteSet(conflicts);
1058 	deleteSet(temporary);
1059 }
1060 
1061 /** Write code for the ..? operator.
1062 	@param output The @a File to write to.
1063 	@param term The @a Term to write code for.
1064 */
finoptGenerateCode(File * output,Term * term)1065 static void finoptGenerateCode(File *output, Term *term) {
1066 	/* Info needed:
1067 		conflict set from enclosing term
1068 
1069 	  [ fixed:
1070 		if (LL_i)
1071 			goto first;
1072 	  ]
1073 		switch (LLcsymb) {
1074 			default:
1075 				LLskip...
1076 			case follow-outer-only:	//only in follow set of outer term
1077 				goto outLabel;
1078 	  [ not fixed:
1079 			case conflict-outer:	//only in conflict set of outer term
1080 				if ((LL_checked = (.1. != 0) + 1) == 1)
1081 					break;
1082 				goto first; //generated only if other conflict cases follow
1083 			case conflict-first-conflict-outer: //in both first set and conflict set of outer term
1084 				if ((.2. || (LL_checked = (.1. != 0) + 1) == 1)
1085 					break;
1086 				goto first; //generated only if other conflict cases follow
1087 	  ]
1088 			case conflict-first-follow-outer:	//in first set and follow set of outer term
1089 				if (!.2.)
1090 					break;
1091 	  [ not fixed:
1092 			case decision-outer-only:	//only in decision set of outer term
1093 	  ]
1094 			case first:	//first set of this term
1095 				...
1096 		}
1097 	*/
1098 	Term *outerTerm = term->enclosing.alternative->enclosing;
1099 	int label = labelCounter++, firstLabel = -1;
1100 	bool needsGotoBeforeNewCase = false;
1101 	set resultSet;
1102 
1103 	/* The firstLabel is initially set to -1, because it may not be necessary
1104 	   to generate the label at all. It all depends on any repetition conflicts
1105 	   and whether it is a fixed repetition type. */
1106 
1107 	/* Note that if this is in a fixed repetition, than the number of
1108 	   repetitions will be greater than 1. In the event that the finopt
1109 	   operator is specified within a single fixed repetition the finopt
1110 	   operator will have been converted to a STAR operator. */
1111 	if (outerTerm->repeats.subtype & FIXED) {
1112 		ASSERT(outerTerm->repeats.number > 1);
1113 		/* firstLabel is always -1 here. */
1114 		firstLabel = labelCounter++;
1115 		tfprintf(output, "if (LL_i) goto LL_%d;\n", firstLabel);
1116 	}
1117 
1118 	tfprintf(output, "LL_%d:\n"
1119 		"switch (LLcsymb) {\n"
1120 			"default:\n"
1121 				"if (LLskip())\ngoto LL_%d;\n", label, label);
1122 
1123 	resultSet = newSet(condensedNumber);
1124 
1125 	/* =========================================================
1126 	   Generate case labels for tokens only in the follow set of the outer term */
1127 	setCopy(resultSet, outerTerm->follow);
1128 	setMinus(resultSet, term->first);
1129 	if (!setEmpty(resultSet)) {
1130 		tfputs(output, "/*FALLTHROUGH*/\n");
1131 		generateCaseLabels(output, resultSet);
1132 	}
1133 
1134 	generateSetPushPop(output, term->contains, DIR_POP);
1135 
1136 	/* This jump out the switch needs to be here regardless of the tokens in the
1137 	   outerTerm follow set, because of the default case. */
1138 	tfputs(output, "break;\n");
1139 
1140 	if (outerTerm->flags & TERM_REQUIRES_LLCHECKED) {
1141 		/* =========================================================
1142 		   Generate the case for tokens only in the conflict set of the
1143 		   outer term */
1144 		setCopy(resultSet, *(outerTerm->conflicts));
1145 		setMinus(resultSet, term->first);
1146 
1147 		if (!setEmpty(resultSet)) {
1148 			generateCaseLabels(output, resultSet);
1149 			tfputs(output, "if ((LL_checked = (\n");
1150 			outputCode(output, outerTerm->expression, true);
1151 			tfputs(output, "!= 0) + 1) == 1) {\n");
1152 			generateSetPushPop(output, term->contains, DIR_POP);
1153 			tfprintf(output, "break;\n}\n");
1154 			needsGotoBeforeNewCase = true;
1155 		}
1156 
1157 		/* =========================================================
1158 		   Generate the case for tokens in both the conflict set of the
1159 		   outer term and the first set of this term */
1160 		setCopy(resultSet, *(outerTerm->conflicts));
1161 		setIntersection(resultSet, term->first);
1162 		if (!setEmpty(resultSet)) {
1163 			if (needsGotoBeforeNewCase) {
1164 				if (firstLabel == -1)
1165 					firstLabel = labelCounter++;
1166 				tfprintf(output, "goto LL_%d;\n", firstLabel);
1167 			}
1168 			generateCaseLabels(output, resultSet);
1169 			tfputs(output, "if ((\n");
1170 			outputCode(output, term->expression, true);
1171 			tfputs(output, "|| (LL_checked = (\n");
1172 			outputCode(output, outerTerm->expression, true);
1173 			tfputs(output, "!= 0) + 1) == 1)) {\n");
1174 			generateSetPushPop(output, term->contains, DIR_POP);
1175 			tfprintf(output, "break;\n}\n");
1176 			needsGotoBeforeNewCase = true;
1177 		}
1178 	}
1179 
1180 	/* =========================================================
1181 	   Generate the case for tokens in both the follow set of the
1182 	   outer term and the first set of this term */
1183 	setCopy(resultSet, outerTerm->follow);
1184 	setIntersection(resultSet, term->first);
1185 	if (!setEmpty(resultSet)) {
1186 		if (needsGotoBeforeNewCase) {
1187 			if (firstLabel == -1)
1188 				firstLabel = labelCounter++;
1189 			tfprintf(output, "goto LL_%d;\n", firstLabel);
1190 		}
1191 		generateCaseLabels(output, resultSet);
1192 		tfputs(output, "if (!\n");
1193 		outputCode(output, term->expression, true);
1194 		tfputs(output, ") {\n");
1195 		generateSetPushPop(output, term->contains, DIR_POP);
1196 		tfputs(output, "break;}\n");
1197 		/* Eventhough this is the last case, we set the flag anyway, so that
1198 		   we can properly generate the fallthrough. */
1199 		needsGotoBeforeNewCase = true;
1200 	}
1201 
1202 	if (needsGotoBeforeNewCase)
1203 		tfputs(output, "/*FALLTHROUGH*/\n");
1204 
1205 	/* =========================================================
1206 	   Generate the case for tokens in either the first set of the
1207 	   outer term and the first set of this term */
1208 	if (!(outerTerm->repeats.subtype & FIXED)) {
1209 		if (outerTerm->repeats.subtype == PLUS && !(outerTerm->flags & TERM_PERSISTENT))
1210 			setCopy(resultSet, outerTerm->first);
1211 		else
1212 			setCopy(resultSet, outerTerm->contains);
1213 
1214 		setMinus(resultSet, term->first);
1215 		if (outerTerm->flags & TERM_REQUIRES_LLCHECKED)
1216 			setMinus(resultSet, *(outerTerm->conflicts));
1217 
1218 		if (!setEmpty(resultSet))
1219 			generateCaseLabels(output, resultSet);
1220 	}
1221 
1222 	/* Now for all tokens in the term's first set that have not been handled
1223 	   in previous cases. */
1224 	setCopy(resultSet, term->first);
1225 	setMinus(resultSet, outerTerm->follow);
1226 	if (outerTerm->flags & TERM_REQUIRES_LLCHECKED)
1227 		setMinus(resultSet, *(outerTerm->conflicts));
1228 
1229 	generateCaseLabels(output, resultSet);
1230 
1231 	if (firstLabel != -1)
1232 		tfprintf(output, "LL_%d:\n", firstLabel);
1233 
1234 	generateSetPushPop(output, term->contains, DIR_POP);
1235 	termGenerateCode(output, term, outerTerm->repeats.subtype & FIXED, POP_NEVER);
1236 
1237 	tfputs(output, "}\n");
1238 	deleteSet(resultSet);
1239 }
1240 
1241 /** Write the code for the repeat-structure of a @a Term.
1242 	@param output The @a File to write to.
1243 	@param term The @a Term to write code for.
1244 */
termGenerateRepeatsCode(File * output,Term * term)1245 static void termGenerateRepeatsCode(File *output, Term *term) {
1246 	/* Initialize the firstSetLabel only in the cases we actually need it */
1247 	int firstSetLabel = (term->repeats.subtype & PLUS) ? labelCounter++ : -1;
1248 	int braces = 0;
1249 
1250 	if (term->repeats.number > 1 || term->flags & TERM_REQUIRES_LLCHECKED) {
1251 		tfputc(output, '{');
1252 		braces++;
1253 	}
1254 
1255 	if (term->repeats.number > 1) {
1256 		/* Initialize it here, so that when we jump into the
1257 		   for-loop (+ repetitions) it is initialized correctly. */
1258 		tfprintf(output, "int LL_i = %d;\n", term->repeats.number - 1);
1259 	}
1260 
1261 	if (term->flags & TERM_REQUIRES_LLCHECKED) {
1262 		/* Create flag for checking whether a first-follow conflict has
1263 		   already been checked for. This flag has the following values:
1264 		   - 0: no checking has been done.
1265 		   - 1: checking has been done, and the result was false.
1266 		   - 2: checking has been done, and the result was true.
1267 		*/
1268 		tfprintf(output, "int LL_checked = 0;\n");
1269 	}
1270 
1271 	/* For + repetitions we need to make sure we do any necessary
1272 	   skips before jumping into the for-loop. Also, we need to do
1273 	   the skips before changing from contains set to firstset
1274 	   (if that is necessary). */
1275 	if (term->repeats.subtype & PLUS) {
1276 		tfputs(output, "switch (LLcsymb) {\n");
1277 		generateCaseLabels(output, term->first);
1278 		tfputs(output, "break;\n"
1279 			"default: LLskip(); break;\n"
1280 			"}\n");
1281 
1282 		/* For persistent + repetitions the contains set equals
1283 		   the first set. */
1284 		if (!(term->flags & TERM_PERSISTENT)) {
1285 			generateSetPushPop(output, term->contains, DIR_POP);
1286 			generateSetPushPop(output, term->first, DIR_PUSH);
1287 		}
1288 		/* Jump into the for-loop, so that we at least do one
1289 		   iteration. */
1290 		tfprintf(output, "goto LL_%d;\n", firstSetLabel);
1291 	}
1292 
1293 	if (term->repeats.number < 0) {
1294 		tfprintf(output, "for (;;) {\n");
1295 		braces++;
1296 	} else if (term->repeats.number > 1) {
1297 		tfputs(output, "for(; LL_i >= 0; LL_i--) {\n");
1298 		braces++;
1299 	}
1300 
1301 	if (term->repeats.subtype & (STAR|PLUS)) {
1302 		int label = labelCounter++;
1303 		set decisionSet, labelSet, conflicts;
1304 
1305 		/* Note that because there is no switch surrounding the term
1306 		   code, it may skip (at least the first time). */
1307 		tfprintf(output, "LL_%d:\n"
1308 			"switch (LLcsymb) {\n", label);
1309 
1310 		if (term->repeats.subtype == PLUS && !(term->flags & TERM_PERSISTENT))
1311 			decisionSet = term->first;
1312 		else
1313 			decisionSet = term->contains;
1314 		labelSet = newSet(condensedNumber);
1315 		setCopy(labelSet, decisionSet);
1316 
1317 		/* =========================================================
1318 		   Generate the code for tokens neither in the first nor
1319 		   the follow set. */
1320 		tfprintf(output, "default:\n"
1321 			"if (LLskip()) goto LL_%d;\n", label);
1322 
1323 		/* The fallthrough LLgen tries to support here is useless. The only time
1324 		   it would do the fallthrough is the case where no tokens are skipped
1325 		   and the current token is in the decision set. However, if that is
1326 		   the case the switch would have jumped to the code rather than to the
1327 		   default case. */
1328 		if (term->repeats.subtype == STAR && term->repeats.number == 1)
1329 			generateSetPushPop(output, decisionSet, DIR_POP);
1330 		tfprintf(output, "break;\n");
1331 
1332 		/* =========================================================
1333 		   Generate code for tokens in the follow set. */
1334 
1335 		/* If a repetition conflict was there, it also has a %while
1336 		   associated with it. Therefore we need to remove the
1337 		   conflicting tokens from the first and follow set, and
1338 		   generate separate code for that. */
1339 		if (term->flags & TERM_RCONFLICT) {
1340 			conflicts = newSet(condensedNumber);
1341 			/* Determine the set of conflicting tokens */
1342 			setCopy(conflicts, term->follow);
1343 			setIntersection(conflicts, labelSet);
1344 			/* Remove conflicting tokens from first and follow set */
1345 			setMinus(term->follow, conflicts);
1346 			setMinus(labelSet, conflicts);
1347 		}
1348 
1349 		/* The follow set can be empty if only conflicting tokens
1350 		   can follow the repetition. */
1351 		if (!setEmpty(term->follow)) {
1352 			generateCaseLabels(output, term->follow);
1353 			if (term->repeats.subtype == STAR && term->repeats.number == 1)
1354 				generateSetPushPop(output, decisionSet, DIR_POP);
1355 			tfprintf(output, "break;\n");
1356 		}
1357 
1358 		if (term->flags & TERM_RCONFLICT) {
1359 			/* Generate the code for the conflicting tokens */
1360 			generateCaseLabels(output, conflicts);
1361 			tfputs(output, "if (!(");
1362 			if (term->flags & TERM_REQUIRES_LLCHECKED)
1363 				tfputs(output, "LL_checked == 2 || (LL_checked == 0 &&");
1364 			tfputc(output, '\n');
1365 			outputCode(output, term->expression, true);
1366 			if (term->flags & TERM_REQUIRES_LLCHECKED)
1367 				tfputc(output, ')');
1368 			tfputs(output, ")) {\n");
1369 			if (term->repeats.subtype == STAR && term->repeats.number == 1)
1370 				generateSetPushPop(output, decisionSet, DIR_POP);
1371 			tfputs(output, "break;\n}\n");
1372 			if (!setEmpty(labelSet))
1373 				tfputs(output, "/*FALLTHROUGH*/\n");
1374 		}
1375 
1376 		/* =========================================================
1377 		   Generate code for tokens in the first set. */
1378 
1379 		/* Although the first set can be empty after removing any
1380 		   conflicting tokens, we might as well just call
1381 		   generateCaseLabels because the code following it will
1382 		   need to be generated anyway. */
1383 		generateCaseLabels(output, labelSet);
1384 		deleteSet(labelSet);
1385 
1386 		if (firstSetLabel >= 0)
1387 			tfprintf(output, "LL_%d:\n", firstSetLabel);
1388 
1389 		if (term->flags & TERM_REQUIRES_LLCHECKED)
1390 			tfputs(output, "LL_checked = 0;\n");
1391 
1392 		if (term->repeats.number > 1)
1393 			tfputs(output, "if (!LL_i)\n");
1394 		if (term->repeats.number >= 1)
1395 			generateSetPushPop(output, decisionSet, DIR_POP);
1396 
1397 		if (term->flags & TERM_CONTAINS_FINOPT) {
1398 			term->conflicts = &conflicts;
1399 		}
1400 
1401 		termGenerateCode(output, term, false, POP_NEVER);
1402 		if (!(term->repeats.subtype == STAR && term->repeats.number == 1)) {
1403 			tfputs(output, "continue;\n}\n");
1404 			generateSetPushPop(output, decisionSet, DIR_POP);
1405 			tfputs(output, "break;\n");
1406 		} else {
1407 			tfputs(output, "}\n");
1408 		}
1409 
1410 		if (term->flags & TERM_RCONFLICT)
1411 			deleteSet(conflicts);
1412 	} else {
1413 		termGenerateCode(output, term, true, term->repeats.number > 1 ? POP_CONDITIONAL : POP_ALWAYS);
1414 	}
1415 	/* Write the closing braces (if there are any). Write a \n
1416 	   after the last one. */
1417 	for (; braces > 1; braces--)
1418 		tfputc(output, '}');
1419 	if (braces)
1420 		tfputs(output, "}\n");
1421 }
1422 
1423 /** Write code for an alternative.
1424 	@param output The @a File to write to.
1425 	@param alternative The @a Alternative to write code for.
1426 	@param needsFinalRead Boolean to indicate whether the alternative should
1427 		end with a LLread call.
1428 */
alternativeGenerateCode(File * output,Alternative * alternative,bool needsFinalRead)1429 static void alternativeGenerateCode(File *output, Alternative *alternative, bool needsFinalRead) {
1430 	int i;
1431 	GrammarPart *grammarPart;
1432 	bool needsRead = false;
1433 
1434 	bool needsPushPop = false;
1435 
1436 	/* Push contains sets for all parts, although refrain from pushing for the
1437 	   first part. TERMs and non-terminals with multiple alternatives are
1438 	   exceptions in this respect because they always need to be pushed. */
1439 	for (i = 0; i < listSize(alternative->parts); i++) {
1440 		grammarPart = (GrammarPart *) listIndex(alternative->parts, i);
1441 		if (needsPushPop) {
1442 			/* Don't push TERMs here, as they will be pushed outside this if */
1443 			if (grammarPart->subtype & (PART_TERMINAL|PART_LITERAL))
1444 				tfprintf(output, "LLtcnt[%d]++;\n", grammarPart->uTerminal.terminal);
1445 			else if (grammarPart->subtype == PART_NONTERMINAL)
1446 				generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_PUSH);
1447 		} else if (grammarPart->subtype != PART_ACTION) {
1448 			needsPushPop = true;
1449 			/* This is here mainly for efficiency reasons. There are two
1450 			   options:
1451 			   - add an LLincr to the start of each rule, which may result in
1452 			   an LLdecr in the calling rule followed by an LLincr in the rule
1453 			   - always do the LLincr in the calling rule, thereby avoiding
1454 			   possible LLdecr's */
1455 			if (grammarPart->subtype == PART_NONTERMINAL && (grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE))
1456 				generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_PUSH);
1457 		}
1458 		/* TERMs alway need to be pushed due to the possibility of skipping
1459 		   tokens. We don't want to skip anything in the TERMs contains set. */
1460 		if (grammarPart->subtype == PART_TERM)
1461 			generateSetPushPop(output, grammarPart->uTerm.contains, DIR_PUSH);
1462 	}
1463 
1464 	needsPushPop = false;
1465 	for (i = 0; i < listSize(alternative->parts); i++) {
1466 		grammarPart = (GrammarPart *) listIndex(alternative->parts, i);
1467 		switch (grammarPart->subtype) {
1468 			case PART_ACTION:
1469 				outputCode(output, grammarPart->uAction, true);
1470 				break;
1471 			case PART_TERMINAL:
1472 			case PART_LITERAL:
1473 				if (needsRead)
1474 					tfputs(output, "LLread();\n");
1475 				if (needsPushPop)
1476 					tfprintf(output, "LLtcnt[%d]--;\n", grammarPart->uTerminal.terminal);
1477 
1478 				if (grammarPart->uTerminal.terminal == 0)
1479 					tfputs(output, "\tif (LLcsymb != 0) LLerror(EOFILE);\n");
1480 				else if (condensedToTerminal[grammarPart->uTerminal.terminal].flags & CONDENSED_ISTOKEN)
1481 					tfprintf(output, "LL_SCANDONE(%d);", condensedToTerminal[grammarPart->uTerminal.terminal].uToken->uNumber);
1482 				else
1483 					tfprintf(output, "LL_SCANDONE(%d);", condensedToTerminal[grammarPart->uTerminal.terminal].uLiteral);
1484 
1485 				tfprintTokenComment(output, grammarPart->uTerminal.terminal);
1486 				needsRead = true;
1487 				break;
1488 			case PART_NONTERMINAL:
1489 				if (needsRead)
1490 					tfputs(output, "LLread();\n");
1491 				/* As explained above (where sets are pushed), non-terminals
1492 				   with multiple alternatives never need their contains sets
1493 				   decremented here. */
1494 				if (needsPushPop && !(grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE))
1495 					generateSetPushPop(output, grammarPart->uNonTerminal.nonTerminal->term.contains, DIR_POP);
1496 
1497 				if (!(grammarPart->uNonTerminal.flags & CALL_LLDISCARD)) {
1498 					if (grammarPart->uNonTerminal.retvalIdent)
1499 						tfprintf(output, "%s = ", grammarPart->uNonTerminal.retvalIdent->text);
1500 					else if (grammarPart->uNonTerminal.nonTerminal->retvalIdent)
1501 						tfprintf(output, "%s = ", grammarPart->token->text);
1502 				}
1503 
1504 				tfprintf(output, "%s%d_%s", prefix, grammarPart->uNonTerminal.nonTerminal->number, grammarPart->token->text);
1505 				/* Thread-safe parsers require LLthis as the first argument.
1506 				   Therefore we need to handle call generation for thread-safe
1507 				   parsers quite differently. */
1508 				if (!option.threadSafe) {
1509 					if (grammarPart->uNonTerminal.expression != NULL) {
1510 						tfputc(output, '\n');
1511 						outputCode(output, grammarPart->uNonTerminal.expression, true);
1512 						tfputs(output, ";\n");
1513 					} else {
1514 						tfputs(output, "();\n");
1515 					}
1516 				} else {
1517 					if (grammarPart->uNonTerminal.expression != NULL) {
1518 						tfputs(output, "(LLthis");
1519 						if (grammarPart->uNonTerminal.nonTerminal->argCount > 0)
1520 							tfputc(output, ',');
1521 						tfputc(output, '\n');
1522 						outputCode(output, grammarPart->uNonTerminal.expression, false);
1523 						tfputs(output, ");\n");
1524 					} else {
1525 						tfprintf(output, "(LLthis);\n");
1526 					}
1527 				}
1528 				needsRead = grammarPart->uNonTerminal.nonTerminal->term.flags & TERM_NO_FINAL_READ;
1529 				break;
1530 			case PART_TERM:
1531 				if (needsRead)
1532 					tfputs(output, "LLread();\n");
1533 
1534 				if (grammarPart->uTerm.repeats.subtype == FINOPT)
1535 					finoptGenerateCode(output, &grammarPart->uTerm);
1536 				else
1537 					termGenerateRepeatsCode(output, &grammarPart->uTerm);
1538 
1539 				needsRead = grammarPart->uTerm.flags & TERM_NO_FINAL_READ;
1540 				break;
1541 			default:
1542 				PANIC();
1543 		}
1544 		/* If the part didn't require a pop and it isn't an action, then the
1545 		   next part does need a pop. */
1546 		if (!needsPushPop && grammarPart->subtype != PART_ACTION)
1547 			needsPushPop = true;
1548 	}
1549 	if (needsRead && needsFinalRead)
1550 		tfputs(output, "LLread();\n");
1551 }
1552 
1553 /** Generate code for a variable to hold return values.
1554 	@param output The @a File to write to.
1555 	@param retvalIdent The type of the variable.
1556 	@param name The name of the variable.
1557 */
generateReturnVariable(File * output,Token * retvalIdent,const char * name)1558 static void generateReturnVariable(File *output, Token *retvalIdent, const char *name) {
1559 	tfprintDirective(output, retvalIdent);
1560 	tfputs(output, retvalIdent->text);
1561 	tfputc(output, '\n');
1562 	tfprintDirective(output, NULL);
1563 	tfprintf(output, "%s;\n", name);
1564 }
1565 
1566 /** Iterator routine to generate variables for return values.
1567 	@param key The name of the variable.
1568 	@param data The first @a grammarPart that names this variable.
1569 	@param userData The @a File to write to.
1570 */
generateVariable(const char * key,void * data,void * userData)1571 static void generateVariable(const char *key, void *data, void *userData) {
1572 	GrammarPart *grammarPart = (GrammarPart *) data;
1573 
1574 	generateReturnVariable((File *) userData, grammarPart->uNonTerminal.nonTerminal->retvalIdent, key);
1575 }
1576 
1577 /** Iterator routine to initialise generated variables for return values.
1578 	@param key The name of the variable.
1579 	@param data The first @a grammarPart that names this variable (unused).
1580 	@param userData The @a File to write to.
1581 */
generateVariableInit(const char * key,void * data,void * userData)1582 static void generateVariableInit(const char *key, void *data, void *userData) {
1583 	/* Parameter data is unused in this function, so make the compiler shut up. */
1584 	(void) data;
1585 
1586 	tfprintf((File *) userData, "memset(&%s, 0, sizeof(%s));\n", key, key);
1587 }
1588 
1589 /** Write code for a non-terminal.
1590 	@param output The @a File to write to.
1591 	@param nonTerminal The @a NonTerminal to write code for.
1592 */
generateNonTerminalCode(File * output,NonTerminal * nonTerminal)1593 void generateNonTerminalCode(File *output, NonTerminal *nonTerminal) {
1594 	bool memsetsGenerated = false;
1595 
1596 	/* Don't generate code for unreachable non-terminals */
1597 	if (!(nonTerminal->flags & NONTERMINAL_REACHABLE))
1598 		return;
1599 
1600 	labelCounter = 0;
1601 
1602 	/* Generate the header for the function, with parameters if it has them */
1603 	generateNonTerminalName(output, nonTerminal);
1604 	if (!option.threadSafe)
1605 		generateNonTerminalParameters(output, nonTerminal);
1606 	else
1607 		generateNonTerminalParametersTS(output, nonTerminal);
1608 
1609 	tfprintf(output, "{\n");
1610 
1611 	/* Declare variables for return values. */
1612 	if (nonTerminal->retvalIdent != NULL)
1613 		generateReturnVariable(output, nonTerminal->retvalIdent, "LLretval");
1614 
1615 	iterateScope(nonTerminal->retvalScope, generateVariable, output);
1616 
1617 	if (!option.noInitLLretval) {
1618 		/* Clear LLretval and variables generated to hold return values, and open
1619 		   a new scope so that the variables defined by the user will be defined
1620 		   at the start of the scope. */
1621 		if (nonTerminal->retvalIdent != NULL) {
1622 			tfputs(output, "memset(&LLretval, 0, sizeof(LLretval));\n");
1623 			memsetsGenerated = true;
1624 		}
1625 
1626 		if (iterateScope(nonTerminal->retvalScope, generateVariableInit, output))
1627 			memsetsGenerated = true;
1628 
1629 		if (memsetsGenerated)
1630 			tfputs(output, "{\n");
1631 	}
1632 
1633 	/* Generate the declarations if available */
1634 	if (nonTerminal->declarations != NULL)
1635 		outputCode(output, nonTerminal->declarations, false);
1636 
1637 	termGenerateCode(output, &nonTerminal->term, true, nonTerminal->term.flags & TERM_MULTIPLE_ALTERNATIVE ? POP_ALWAYS : POP_NEVER);
1638 
1639 	if (memsetsGenerated)
1640 		/* Close scope opened after clearing LLretval (if it was opened). */
1641 		tfputs(output, "}\n");
1642 
1643 	if (nonTerminal->retvalIdent != NULL)
1644 		tfputs(output, "return LLretval;\n");
1645 
1646 	tfputs(output, "}\n");
1647 }
1648