1 /*
2  * This file is part of RGBDS.
3  *
4  * Copyright (c) 2019, Eldred Habert and RGBDS contributors.
5  *
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #include <ctype.h>
10 #include <inttypes.h>
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include "link/main.h"
17 #include "link/script.h"
18 #include "link/section.h"
19 
20 #include "error.h"
21 
22 FILE *linkerScript;
23 char *includeFileName;
24 
25 static uint32_t lineNo;
26 
27 static struct {
28 	FILE *file;
29 	uint32_t lineNo;
30 	char *name;
31 } *fileStack;
32 
33 static uint32_t fileStackSize;
34 static uint32_t fileStackIndex;
35 
pushFile(char * newFileName)36 static void pushFile(char *newFileName)
37 {
38 	if (fileStackIndex == UINT32_MAX)
39 		errx("%s(%" PRIu32 "): INCLUDE recursion limit reached",
40 		     linkerScriptName, lineNo);
41 
42 	if (fileStackIndex == fileStackSize) {
43 		if (!fileStackSize) /* Init file stack */
44 			fileStackSize = 4;
45 		fileStackSize *= 2;
46 		fileStack = realloc(fileStack, sizeof(*fileStack) * fileStackSize);
47 		if (!fileStack)
48 			err("%s(%" PRIu32 "): Internal INCLUDE error",
49 			    linkerScriptName, lineNo);
50 	}
51 
52 	fileStack[fileStackIndex].file = linkerScript;
53 	fileStack[fileStackIndex].lineNo = lineNo;
54 	fileStack[fileStackIndex].name = linkerScriptName;
55 	fileStackIndex++;
56 
57 	linkerScript = fopen(newFileName, "r");
58 	if (!linkerScript)
59 		err("%s(%" PRIu32 "): Could not open \"%s\"",
60 		    linkerScriptName, lineNo, newFileName);
61 	lineNo = 1;
62 	linkerScriptName = newFileName;
63 }
64 
popFile(void)65 static bool popFile(void)
66 {
67 	if (!fileStackIndex)
68 		return false;
69 
70 	free(linkerScriptName);
71 
72 	fileStackIndex--;
73 	linkerScript = fileStack[fileStackIndex].file;
74 	lineNo = fileStack[fileStackIndex].lineNo;
75 	linkerScriptName = fileStack[fileStackIndex].name;
76 
77 	return true;
78 }
79 
isWhiteSpace(int c)80 static bool isWhiteSpace(int c)
81 {
82 	return c == ' ' || c == '\t';
83 }
84 
isNewline(int c)85 static bool isNewline(int c)
86 {
87 	return c == '\r' || c == '\n';
88 }
89 
90 /**
91  * Try parsing a number, in base 16 if it begins with a dollar,
92  * in base 10 otherwise
93  * @param str The number to parse
94  * @param number A pointer where the number will be written to
95  * @return True if parsing was successful, false otherwise
96  */
tryParseNumber(char const * str,uint32_t * number)97 static bool tryParseNumber(char const *str, uint32_t *number)
98 {
99 	static char const digits[] = {
100 		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
101 		'A', 'B', 'C', 'D', 'E', 'F'
102 	};
103 	uint8_t base = 10;
104 
105 	if (*str == '$') {
106 		str++;
107 		base = 16;
108 	}
109 
110 	/* An empty string is not a number */
111 	if (!*str)
112 		return false;
113 
114 	*number = 0;
115 	do {
116 		char chr = toupper(*str++);
117 		uint8_t digit = 0;
118 
119 		while (digit < base) {
120 			if (chr == digits[digit])
121 				break;
122 			digit++;
123 		}
124 		if (digit == base)
125 			return false;
126 		*number = *number * base + digit;
127 	} while (*str);
128 
129 	return true;
130 }
131 
132 enum LinkerScriptTokenType {
133 	TOKEN_NEWLINE,
134 	TOKEN_COMMAND,
135 	TOKEN_BANK,
136 	TOKEN_INCLUDE,
137 	TOKEN_NUMBER,
138 	TOKEN_STRING,
139 	TOKEN_EOF,
140 
141 	TOKEN_INVALID
142 };
143 
144 char const *tokenTypes[] = {
145 	[TOKEN_NEWLINE] = "newline",
146 	[TOKEN_COMMAND] = "command",
147 	[TOKEN_BANK]    = "bank command",
148 	[TOKEN_NUMBER]  = "number",
149 	[TOKEN_STRING]  = "string",
150 	[TOKEN_EOF]     = "end of file"
151 };
152 
153 enum LinkerScriptCommand {
154 	COMMAND_ORG,
155 	COMMAND_ALIGN,
156 
157 	COMMAND_INVALID
158 };
159 
160 struct LinkerScriptToken {
161 	enum LinkerScriptTokenType type;
162 	union LinkerScriptTokenAttr {
163 		enum LinkerScriptCommand command;
164 		enum SectionType secttype;
165 		uint32_t number;
166 		char *string;
167 	} attr;
168 };
169 
170 static char const * const commands[] = {
171 	[COMMAND_ORG] = "ORG",
172 	[COMMAND_ALIGN] = "ALIGN"
173 };
174 
nextChar(void)175 static int nextChar(void)
176 {
177 	int curchar = getc(linkerScript);
178 
179 	if (curchar == EOF && ferror(linkerScript))
180 		err("%s(%" PRIu32 "): Unexpected error in %s",
181 		    linkerScriptName, lineNo, __func__);
182 	return curchar;
183 }
184 
nextToken(void)185 static struct LinkerScriptToken *nextToken(void)
186 {
187 	static struct LinkerScriptToken token;
188 	int curchar;
189 
190 	/* If the token has a string, make sure to avoid leaking it */
191 	if (token.type == TOKEN_STRING)
192 		free(token.attr.string);
193 
194 	/* Skip initial whitespace... */
195 	do
196 		curchar = nextChar();
197 	while (isWhiteSpace(curchar));
198 
199 	/* If this is a comment, skip to the end of the line */
200 	if (curchar == ';') {
201 		do {
202 			curchar = nextChar();
203 		} while (!isNewline(curchar) && curchar != EOF);
204 	}
205 
206 	if (curchar == EOF) {
207 		token.type = TOKEN_EOF;
208 	} else if (isNewline(curchar)) {
209 		/* If we have a newline char, this is a newline token */
210 		token.type = TOKEN_NEWLINE;
211 
212 		if (curchar == '\r') {
213 			/* Handle CRLF */
214 			curchar = nextChar();
215 			if (curchar != '\n')
216 				ungetc(curchar, linkerScript);
217 		}
218 	} else if (curchar == '"') {
219 		/* If we have a string start, this is a string */
220 		token.type = TOKEN_STRING;
221 		token.attr.string = NULL; /* Force initial alloc */
222 
223 		size_t size = 0;
224 		size_t capacity = 16; /* Half of the default capacity */
225 
226 		do {
227 			curchar = nextChar();
228 			if (curchar == EOF || isNewline(curchar)) {
229 				errx("%s(%" PRIu32 "): Unterminated string",
230 				     linkerScriptName, lineNo);
231 			} else if (curchar == '"') {
232 				/* Quotes force a string termination */
233 				curchar = '\0';
234 			} else if (curchar == '\\') {
235 				/* Backslashes are escape sequences */
236 				curchar = nextChar();
237 				if (curchar == EOF || isNewline(curchar))
238 					errx("%s(%" PRIu32 "): Unterminated string",
239 					     linkerScriptName, lineNo);
240 				else if (curchar == 'n')
241 					curchar = '\n';
242 				else if (curchar == 'r')
243 					curchar = '\r';
244 				else if (curchar == 't')
245 					curchar = '\t';
246 				else if (curchar != '\\' && curchar != '"')
247 					errx("%s(%" PRIu32 "): Illegal character escape",
248 					     linkerScriptName, lineNo);
249 			}
250 
251 			if (size >= capacity || token.attr.string == NULL) {
252 				capacity *= 2;
253 				token.attr.string = realloc(token.attr.string, capacity);
254 				if (!token.attr.string)
255 					err("%s: Failed to allocate memory for string",
256 					    __func__);
257 			}
258 			token.attr.string[size++] = curchar;
259 		} while (curchar);
260 	} else {
261 		/* This is either a number, command or bank, that is: a word */
262 		char *str = NULL;
263 		size_t size = 0;
264 		size_t capacity = 8; /* Half of the default capacity */
265 
266 		for (;;) {
267 			if (size >= capacity || str == NULL) {
268 				capacity *= 2;
269 				str = realloc(str, capacity);
270 				if (!str)
271 					err("%s: Failed to allocate memory for token",
272 					    __func__);
273 			}
274 			str[size] = toupper(curchar);
275 			size++;
276 
277 			if (!curchar)
278 				break;
279 
280 			curchar = nextChar();
281 			/* Whitespace, a newline or a comment end the token */
282 			if (isWhiteSpace(curchar) || isNewline(curchar) || curchar == ';') {
283 				ungetc(curchar, linkerScript);
284 				curchar = '\0';
285 			}
286 		}
287 
288 		token.type = TOKEN_INVALID;
289 
290 		/* Try to match a command */
291 		for (enum LinkerScriptCommand i = 0; i < COMMAND_INVALID; i++) {
292 			if (!strcmp(commands[i], str)) {
293 				token.type = TOKEN_COMMAND;
294 				token.attr.command = i;
295 				break;
296 			}
297 		}
298 
299 		if (token.type == TOKEN_INVALID) {
300 			/* Try to match a bank specifier */
301 			for (enum SectionType type = 0; type < SECTTYPE_INVALID; type++) {
302 				if (!strcmp(typeNames[type], str)) {
303 					token.type = TOKEN_BANK;
304 					token.attr.secttype = type;
305 					break;
306 				}
307 			}
308 		}
309 
310 		if (token.type == TOKEN_INVALID) {
311 			/* Try to match an include token */
312 			if (!strcmp("INCLUDE", str))
313 				token.type = TOKEN_INCLUDE;
314 		}
315 
316 		if (token.type == TOKEN_INVALID) {
317 			/* None of the strings matched, do we have a number? */
318 			if (tryParseNumber(str, &token.attr.number))
319 				token.type = TOKEN_NUMBER;
320 			else
321 				errx("%s(%" PRIu32 "): Unknown token \"%s\"",
322 				     linkerScriptName, lineNo, str);
323 		}
324 
325 		free(str);
326 	}
327 
328 	return &token;
329 }
330 
processCommand(enum LinkerScriptCommand command,uint16_t arg,uint16_t * pc)331 static void processCommand(enum LinkerScriptCommand command, uint16_t arg, uint16_t *pc)
332 {
333 	switch (command) {
334 	case COMMAND_INVALID:
335 		unreachable_();
336 
337 	case COMMAND_ORG:
338 		break;
339 
340 	case COMMAND_ALIGN:
341 		if (arg >= 16)
342 			arg = 0;
343 		else
344 			arg = (*pc + (1 << arg) - 1) & ~((1 << arg) - 1);
345 	}
346 
347 	if (arg < *pc)
348 		errx("%s(%" PRIu32 "): `%s` cannot be used to go backwards (currently at $%x)",
349 		     linkerScriptName, lineNo, commands[command], *pc);
350 	*pc = arg;
351 }
352 
353 enum LinkerScriptParserState {
354 	PARSER_FIRSTTIME,
355 	PARSER_LINESTART,
356 	PARSER_INCLUDE, /* After an INCLUDE token */
357 	PARSER_LINEEND
358 };
359 
360 /* Part of internal state, but has data that needs to be freed */
361 static uint16_t *curaddr[SECTTYPE_INVALID];
362 
363 /* Put as global to ensure it's initialized only once */
364 static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME;
365 
script_NextSection(void)366 struct SectionPlacement *script_NextSection(void)
367 {
368 	static struct SectionPlacement section;
369 	static enum SectionType type;
370 	static uint32_t bank;
371 	static uint32_t bankID;
372 
373 	if (parserState == PARSER_FIRSTTIME) {
374 		lineNo = 1;
375 
376 		/* Init PC for all banks */
377 		for (enum SectionType i = 0; i < SECTTYPE_INVALID; i++) {
378 			curaddr[i] = malloc(sizeof(*curaddr[i]) * nbbanks(i));
379 			for (uint32_t b = 0; b < nbbanks(i); b++)
380 				curaddr[i][b] = startaddr[i];
381 		}
382 
383 		type = SECTTYPE_INVALID;
384 
385 		parserState = PARSER_LINESTART;
386 	}
387 
388 	for (;;) {
389 		struct LinkerScriptToken *token = nextToken();
390 		enum LinkerScriptTokenType tokType;
391 		union LinkerScriptTokenAttr attr;
392 		bool hasArg;
393 		uint32_t arg;
394 
395 		if (type != SECTTYPE_INVALID) {
396 			if (curaddr[type][bankID] > endaddr(type) + 1)
397 				errx("%s(%" PRIu32 "): Sections would extend past the end of %s ($%04" PRIx16 " > $%04" PRIx16 ")",
398 				     linkerScriptName, lineNo, typeNames[type],
399 				     curaddr[type][bankID], endaddr(type));
400 			if (curaddr[type][bankID] < startaddr[type])
401 				errx("%s(%" PRIu32 "): PC underflowed ($%04" PRIx16 " < $%04" PRIx16 ")",
402 				     linkerScriptName, lineNo,
403 				     curaddr[type][bankID], startaddr[type]);
404 		}
405 
406 		switch (parserState) {
407 		case PARSER_FIRSTTIME:
408 			unreachable_();
409 
410 		case PARSER_LINESTART:
411 			switch (token->type) {
412 			case TOKEN_INVALID:
413 				unreachable_();
414 
415 			case TOKEN_EOF:
416 				if (!popFile())
417 					return NULL;
418 				parserState = PARSER_LINEEND;
419 				break;
420 
421 			case TOKEN_NUMBER:
422 				errx("%s(%" PRIu32 "): stray number \"%" PRIu32 "\"",
423 				     linkerScriptName, lineNo,
424 				     token->attr.number);
425 
426 			case TOKEN_NEWLINE:
427 				lineNo++;
428 				break;
429 
430 			/* A stray string is a section name */
431 			case TOKEN_STRING:
432 				parserState = PARSER_LINEEND;
433 
434 				if (type == SECTTYPE_INVALID)
435 					errx("%s(%" PRIu32 "): Didn't specify a location before the section",
436 					     linkerScriptName, lineNo);
437 
438 				section.section =
439 					sect_GetSection(token->attr.string);
440 				if (!section.section)
441 					errx("%s(%" PRIu32 "): Unknown section \"%s\"",
442 					     linkerScriptName, lineNo,
443 					     token->attr.string);
444 				section.org = curaddr[type][bankID];
445 				section.bank = bank;
446 
447 				curaddr[type][bankID] += section.section->size;
448 				return &section;
449 
450 			case TOKEN_COMMAND:
451 			case TOKEN_BANK:
452 				tokType = token->type;
453 				attr = token->attr;
454 
455 				token = nextToken();
456 				hasArg = token->type == TOKEN_NUMBER;
457 				/*
458 				 * Leaving `arg` uninitialized when `!hasArg`
459 				 * causes GCC to warn about its use as an
460 				 * argument to `processCommand`. This cannot
461 				 * happen because `hasArg` has to be true, but
462 				 * silence the warning anyways.
463 				 * I dislike doing this because it could swallow
464 				 * actual errors, but I don't have a choice.
465 				 */
466 				arg = hasArg ? token->attr.number : 0;
467 
468 				if (tokType == TOKEN_COMMAND) {
469 					if (type == SECTTYPE_INVALID)
470 						errx("%s(%" PRIu32 "): Didn't specify a location before the command",
471 						     linkerScriptName, lineNo);
472 					if (!hasArg)
473 						errx("%s(%" PRIu32 "): Command specified without an argument",
474 						     linkerScriptName, lineNo);
475 
476 					processCommand(attr.command, arg, &curaddr[type][bankID]);
477 				} else { /* TOKEN_BANK */
478 					type = attr.secttype;
479 					/*
480 					 * If there's only one bank,
481 					 * specifying the number is optional.
482 					 */
483 					if (!hasArg && nbbanks(type) != 1)
484 						errx("%s(%" PRIu32 "): Didn't specify a bank number",
485 						     linkerScriptName, lineNo);
486 					else if (!hasArg)
487 						arg = bankranges[type][0];
488 					else if (arg < bankranges[type][0])
489 						errx("%s(%" PRIu32 "): specified bank number is too low (%" PRIu32 " < %" PRIu32 ")",
490 						     linkerScriptName, lineNo,
491 						     arg, bankranges[type][0]);
492 					else if (arg > bankranges[type][1])
493 						errx("%s(%" PRIu32 "): specified bank number is too high (%" PRIu32 " > %" PRIu32 ")",
494 						     linkerScriptName, lineNo,
495 						     arg, bankranges[type][1]);
496 					bank = arg;
497 					bankID = arg - bankranges[type][0];
498 				}
499 
500 				/* If we read a token we shouldn't have... */
501 				if (token->type != TOKEN_NUMBER)
502 					goto lineend;
503 				break;
504 
505 			case TOKEN_INCLUDE:
506 				parserState = PARSER_INCLUDE;
507 				break;
508 			}
509 			break;
510 
511 		case PARSER_INCLUDE:
512 			if (token->type != TOKEN_STRING)
513 				errx("%s(%" PRIu32 "): Expected a file name after INCLUDE",
514 				     linkerScriptName, lineNo);
515 
516 			/* Switch to that file */
517 			pushFile(token->attr.string);
518 			/* The file stack took ownership of the string */
519 			token->attr.string = NULL;
520 
521 			parserState = PARSER_LINESTART;
522 			break;
523 
524 		case PARSER_LINEEND:
525 lineend:
526 			lineNo++;
527 			parserState = PARSER_LINESTART;
528 			if (token->type == TOKEN_EOF) {
529 				if (!popFile())
530 					return NULL;
531 				parserState = PARSER_LINEEND;
532 			} else if (token->type != TOKEN_NEWLINE)
533 				errx("%s(%" PRIu32 "): Unexpected %s at the end of the line",
534 				     linkerScriptName, lineNo,
535 				     tokenTypes[token->type]);
536 			break;
537 		}
538 	}
539 }
540 
script_Cleanup(void)541 void script_Cleanup(void)
542 {
543 	for (enum SectionType type = 0; type < SECTTYPE_INVALID; type++)
544 		free(curaddr[type]);
545 }
546