1 /*
2  * This file is part of RGBDS.
3  *
4  * Copyright (c) 2013-2018, stag019 and RGBDS contributors.
5  *
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #include <errno.h>
10 #include <stdbool.h>
11 #include <stdint.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 #include "asm/charmap.h"
17 #include "asm/main.h"
18 #include "asm/output.h"
19 #include "asm/util.h"
20 #include "asm/warning.h"
21 
22 #include "hashmap.h"
23 
24 /*
25  * Charmaps are stored using a structure known as "trie".
26  * Essentially a tree, where each nodes stores a single character's worth of info:
27  * whether there exists a mapping that ends at the current character,
28  */
29 struct Charnode {
30 	bool isTerminal; /* Whether there exists a mapping that ends here */
31 	uint8_t value; /* If the above is true, its corresponding value */
32 	/* This MUST be indexes and not pointers, because pointers get invalidated by `realloc`!! */
33 	size_t next[255]; /* Indexes of where to go next, 0 = nowhere */
34 };
35 
36 #define INITIAL_CAPACITY 32
37 
38 struct Charmap {
39 	char *name;
40 	size_t usedNodes; /* How many nodes are being used */
41 	size_t capacity; /* How many nodes have been allocated */
42 	struct Charnode nodes[]; /* first node is reserved for the root node */
43 };
44 
45 static HashMap charmaps;
46 
47 /*
48  * Store pointers to hashmap nodes, so that there is only one pointer to the memory block
49  * that gets reallocated.
50  */
51 static struct Charmap **currentCharmap;
52 
53 struct CharmapStackEntry {
54 	struct Charmap **charmap;
55 	struct CharmapStackEntry *next;
56 };
57 
58 struct CharmapStackEntry *charmapStack;
59 
charmap_Get(char const * name)60 static struct Charmap *charmap_Get(char const *name)
61 {
62 	return hash_GetElement(charmaps, name);
63 }
64 
resizeCharmap(struct Charmap ** map,size_t capacity)65 static void resizeCharmap(struct Charmap **map, size_t capacity)
66 {
67 	*map = realloc(*map, sizeof(**map) + sizeof(*(*map)->nodes) * capacity);
68 
69 	if (!*map)
70 		fatalerror("Failed to %s charmap: %s\n",
71 			   *map ? "create" : "resize", strerror(errno));
72 	(**map).capacity = capacity;
73 }
74 
initNode(struct Charnode * node)75 static void initNode(struct Charnode *node)
76 {
77 	node->isTerminal = false;
78 	memset(node->next, 0, sizeof(node->next));
79 }
80 
charmap_New(char const * name,char const * baseName)81 struct Charmap *charmap_New(char const *name, char const *baseName)
82 {
83 	struct Charmap *base = NULL;
84 
85 	if (baseName != NULL) {
86 		base = charmap_Get(baseName);
87 
88 		if (base == NULL)
89 			error("Base charmap '%s' doesn't exist\n", baseName);
90 	}
91 
92 	struct Charmap *charmap = charmap_Get(name);
93 
94 	if (charmap) {
95 		error("Charmap '%s' already exists\n", name);
96 		return charmap;
97 	}
98 
99 	/* Init the new charmap's fields */
100 	if (base) {
101 		resizeCharmap(&charmap, base->capacity);
102 		charmap->usedNodes = base->usedNodes;
103 
104 		memcpy(charmap->nodes, base->nodes, sizeof(base->nodes[0]) * charmap->usedNodes);
105 	} else {
106 		resizeCharmap(&charmap, INITIAL_CAPACITY);
107 		charmap->usedNodes = 1;
108 		initNode(&charmap->nodes[0]); /* Init the root node */
109 	}
110 	charmap->name = strdup(name);
111 
112 	currentCharmap = (struct Charmap **)hash_AddElement(charmaps, charmap->name, charmap);
113 
114 	return charmap;
115 }
116 
charmap_Delete(struct Charmap * charmap)117 void charmap_Delete(struct Charmap *charmap)
118 {
119 	free(charmap->name);
120 	free(charmap);
121 }
122 
charmap_Set(char const * name)123 void charmap_Set(char const *name)
124 {
125 	struct Charmap **charmap = (struct Charmap **)hash_GetNode(charmaps, name);
126 
127 	if (charmap == NULL)
128 		error("Charmap '%s' doesn't exist\n", name);
129 	else
130 		currentCharmap = charmap;
131 }
132 
charmap_Push(void)133 void charmap_Push(void)
134 {
135 	struct CharmapStackEntry *stackEntry;
136 
137 	stackEntry = malloc(sizeof(*stackEntry));
138 	if (stackEntry == NULL)
139 		fatalerror("Failed to alloc charmap stack entry: %s\n", strerror(errno));
140 
141 	stackEntry->charmap = currentCharmap;
142 	stackEntry->next = charmapStack;
143 
144 	charmapStack = stackEntry;
145 }
146 
charmap_Pop(void)147 void charmap_Pop(void)
148 {
149 	if (charmapStack == NULL) {
150 		error("No entries in the charmap stack\n");
151 		return;
152 	}
153 
154 	struct CharmapStackEntry *top = charmapStack;
155 
156 	currentCharmap = top->charmap;
157 	charmapStack = top->next;
158 	free(top);
159 }
160 
charmap_Add(char * mapping,uint8_t value)161 void charmap_Add(char *mapping, uint8_t value)
162 {
163 	struct Charmap *charmap = *currentCharmap;
164 	struct Charnode *node = &charmap->nodes[0];
165 
166 	for (uint8_t c; *mapping; mapping++) {
167 		c = *mapping - 1;
168 
169 		if (node->next[c]) {
170 			node = &charmap->nodes[node->next[c]];
171 		} else {
172 			/* Register next available node */
173 			node->next[c] = charmap->usedNodes;
174 			/* If no more nodes are available, get new ones */
175 			if (charmap->usedNodes == charmap->capacity) {
176 				charmap->capacity *= 2;
177 				resizeCharmap(currentCharmap, charmap->capacity);
178 				charmap = *currentCharmap;
179 			}
180 
181 			/* Switch to and init new node */
182 			node = &charmap->nodes[charmap->usedNodes++];
183 			initNode(node);
184 		}
185 	}
186 
187 	if (node->isTerminal)
188 		warning(WARNING_CHARMAP_REDEF, "Overriding charmap mapping\n");
189 
190 	node->isTerminal = true;
191 	node->value = value;
192 }
193 
charmap_Convert(char const * input,uint8_t * output)194 size_t charmap_Convert(char const *input, uint8_t *output)
195 {
196 	uint8_t *start = output;
197 
198 	while (charmap_ConvertNext(&input, &output))
199 		;
200 
201 	return output - start;
202 }
203 
charmap_ConvertNext(char const ** input,uint8_t ** output)204 size_t charmap_ConvertNext(char const **input, uint8_t **output)
205 {
206 	/*
207 	 * The goal is to match the longest mapping possible.
208 	 * For that, advance through the trie with each character read.
209 	 * If that would lead to a dead end, rewind characters until the last match, and output.
210 	 * If no match, read a UTF-8 codepoint and output that.
211 	 */
212 	struct Charmap const *charmap = *currentCharmap;
213 	struct Charnode const *node = &charmap->nodes[0];
214 	struct Charnode const *match = NULL;
215 	size_t rewindDistance = 0;
216 
217 	for (;;) {
218 		uint8_t c = **input - 1;
219 
220 		if (**input && node->next[c]) {
221 			// Consume that char
222 			(*input)++;
223 			rewindDistance++;
224 
225 			// Advance to next node (index starts at 1)
226 			node = &charmap->nodes[node->next[c]];
227 			if (node->isTerminal) {
228 				// This node matches, register it
229 				match = node;
230 				rewindDistance = 0; // If no longer match is found, rewind here
231 			}
232 
233 		} else {
234 			// We are at a dead end (either because we reached the end of input, or of
235 			// the trie), so rewind up to the last match, and output.
236 			*input -= rewindDistance; // This will rewind all the way if no match found
237 
238 			if (match) { // A match was found, use it
239 				if (output)
240 					*(*output)++ = match->value;
241 
242 				return 1;
243 
244 			} else if (**input) { // No match found, but there is some input left
245 				// This will write the codepoint's value to `output`, little-endian
246 				size_t codepointLen = readUTF8Char(output ? *output : NULL,
247 								   *input);
248 
249 				if (codepointLen == 0)
250 					error("Input string is not valid UTF-8!\n");
251 
252 				// OK because UTF-8 has no NUL in multi-byte chars
253 				*input += codepointLen;
254 				if (output)
255 					*output += codepointLen;
256 
257 				return codepointLen;
258 
259 			} else { // End of input
260 				return 0;
261 			}
262 		}
263 	}
264 }
265