1 /* liblouis Braille Translation and Back-Translation Library
2 
3    Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4    BRLTTY Team
5 
6    Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7    Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8    Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9    Copyright (C) 2016 Davy Kager, Dedicon
10 
11    This file is part of liblouis.
12 
13    liblouis is free software: you can redistribute it and/or modify it
14    under the terms of the GNU Lesser General Public License as published
15    by the Free Software Foundation, either version 2.1 of the License, or
16    (at your option) any later version.
17 
18    liblouis is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21    Lesser General Public License for more details.
22 
23    You should have received a copy of the GNU Lesser General Public
24    License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 /**
28  * @file
29  * @brief Read and compile translation tables
30  */
31 
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdarg.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39 
40 #include "internal.h"
41 #include "config.h"
42 
43 #define QUOTESUB 28 /* Stand-in for double quotes in strings */
44 
45 /* needed to make debuggin easier */
46 #ifdef DEBUG
47 wchar_t wchar;
48 #endif
49 
50 /* The following variables and functions make it possible to specify the
51  * path on which all tables for liblouis and all files for liblouisutdml,
52  * in their proper directories, will be found.
53  */
54 
55 static char *dataPathPtr;
56 
57 char *EXPORT_CALL
lou_setDataPath(const char * path)58 lou_setDataPath(const char *path) {
59 	static char dataPath[MAXSTRING];
60 	dataPathPtr = NULL;
61 	if (path == NULL) return NULL;
62 	strcpy(dataPath, path);
63 	dataPathPtr = dataPath;
64 	return dataPathPtr;
65 }
66 
67 char *EXPORT_CALL
lou_getDataPath(void)68 lou_getDataPath(void) {
69 	return dataPathPtr;
70 }
71 
72 /* End of dataPath code. */
73 
74 static int
eqasc2uni(const unsigned char * a,const widechar * b,const int len)75 eqasc2uni(const unsigned char *a, const widechar *b, const int len) {
76 	int k;
77 	for (k = 0; k < len; k++)
78 		if ((widechar)a[k] != b[k]) return 0;
79 	return 1;
80 }
81 
82 typedef struct CharsString {
83 	widechar length;
84 	widechar chars[MAXSTRING];
85 } CharsString;
86 
87 static int errorCount;
88 static int warningCount;
89 
90 typedef struct TranslationTableChainEntry {
91 	struct TranslationTableChainEntry *next;
92 	TranslationTableHeader *table;
93 	int tableListLength;
94 	char tableList[1];
95 } TranslationTableChainEntry;
96 
97 static TranslationTableChainEntry *translationTableChain = NULL;
98 
99 typedef struct DisplayTableChainEntry {
100 	struct DisplayTableChainEntry *next;
101 	DisplayTableHeader *table;
102 	int tableListLength;
103 	char tableList[1];
104 } DisplayTableChainEntry;
105 
106 static DisplayTableChainEntry *displayTableChain = NULL;
107 
108 /* predifined character classes */
109 static const char *characterClassNames[] = {
110 	"space",
111 	"letter",
112 	"digit",
113 	"punctuation",
114 	"uppercase",
115 	"lowercase",
116 	"math",
117 	"sign",
118 	"litdigit",
119 	NULL,
120 };
121 
122 static const char *opcodeNames[CTO_None] = {
123 	"include",
124 	"locale",
125 	"undefined",
126 	"capsletter",
127 	"begcapsword",
128 	"endcapsword",
129 	"begcaps",
130 	"endcaps",
131 	"begcapsphrase",
132 	"endcapsphrase",
133 	"lencapsphrase",
134 	"letsign",
135 	"noletsignbefore",
136 	"noletsign",
137 	"noletsignafter",
138 	"numsign",
139 	"numericmodechars",
140 	"midendnumericmodechars",
141 	"numericnocontchars",
142 	"seqdelimiter",
143 	"seqbeforechars",
144 	"seqafterchars",
145 	"seqafterpattern",
146 	"seqafterexpression",
147 	"emphclass",
148 	"emphletter",
149 	"begemphword",
150 	"endemphword",
151 	"begemph",
152 	"endemph",
153 	"begemphphrase",
154 	"endemphphrase",
155 	"lenemphphrase",
156 	"capsmodechars",
157 	"emphmodechars",
158 	"begcomp",
159 	"compbegemph1",
160 	"compendemph1",
161 	"compbegemph2",
162 	"compendemph2",
163 	"compbegemph3",
164 	"compendemph3",
165 	"compcapsign",
166 	"compbegcaps",
167 	"compendcaps",
168 	"endcomp",
169 	"nocontractsign",
170 	"multind",
171 	"compdots",
172 	"comp6",
173 	"class",
174 	"after",
175 	"before",
176 	"noback",
177 	"nofor",
178 	"empmatchbefore",
179 	"empmatchafter",
180 	"swapcc",
181 	"swapcd",
182 	"swapdd",
183 	"space",
184 	"digit",
185 	"punctuation",
186 	"math",
187 	"sign",
188 	"letter",
189 	"uppercase",
190 	"lowercase",
191 	"grouping",
192 	"uplow",
193 	"litdigit",
194 	"display",
195 	"replace",
196 	"context",
197 	"correct",
198 	"pass2",
199 	"pass3",
200 	"pass4",
201 	"repeated",
202 	"repword",
203 	"capsnocont",
204 	"always",
205 	"exactdots",
206 	"nocross",
207 	"syllable",
208 	"nocont",
209 	"compbrl",
210 	"literal",
211 	"largesign",
212 	"word",
213 	"partword",
214 	"joinnum",
215 	"joinword",
216 	"lowword",
217 	"contraction",
218 	"sufword",
219 	"prfword",
220 	"begword",
221 	"begmidword",
222 	"midword",
223 	"midendword",
224 	"endword",
225 	"prepunc",
226 	"postpunc",
227 	"begnum",
228 	"midnum",
229 	"endnum",
230 	"decpoint",
231 	"hyphen",
232 	// "apostrophe",
233 	// "initial",
234 	"nobreak",
235 	"match",
236 	"backmatch",
237 	"attribute",
238 };
239 
240 static short opcodeLengths[CTO_None] = { 0 };
241 
242 static void
243 compileError(FileInfo *nested, const char *format, ...);
244 
245 static void
246 free_tablefiles(char **tables);
247 
248 static int
getAChar(FileInfo * nested)249 getAChar(FileInfo *nested) {
250 	/* Read a big endian, little endian or ASCII 8 file and convert it to
251 	 * 16- or 32-bit unsigned integers */
252 	int ch1 = 0, ch2 = 0;
253 	widechar character;
254 	if (nested->encoding == ascii8)
255 		if (nested->status == 2) {
256 			nested->status++;
257 			return nested->checkencoding[1];
258 		}
259 	while ((ch1 = fgetc(nested->in)) != EOF) {
260 		if (nested->status < 2) nested->checkencoding[nested->status] = ch1;
261 		nested->status++;
262 		if (nested->status == 2) {
263 			if (nested->checkencoding[0] == 0xfe && nested->checkencoding[1] == 0xff)
264 				nested->encoding = bigEndian;
265 			else if (nested->checkencoding[0] == 0xff && nested->checkencoding[1] == 0xfe)
266 				nested->encoding = littleEndian;
267 			else if (nested->checkencoding[0] < 128 && nested->checkencoding[1] < 128) {
268 				nested->encoding = ascii8;
269 				return nested->checkencoding[0];
270 			} else {
271 				compileError(nested,
272 						"encoding is neither big-endian, little-endian nor ASCII 8.");
273 				ch1 = EOF;
274 				break;
275 				;
276 			}
277 			continue;
278 		}
279 		switch (nested->encoding) {
280 		case noEncoding:
281 			break;
282 		case ascii8:
283 			return ch1;
284 			break;
285 		case bigEndian:
286 			ch2 = fgetc(nested->in);
287 			if (ch2 == EOF) break;
288 			character = (widechar)(ch1 << 8) | ch2;
289 			return (int)character;
290 			break;
291 		case littleEndian:
292 			ch2 = fgetc(nested->in);
293 			if (ch2 == EOF) break;
294 			character = (widechar)(ch2 << 8) | ch1;
295 			return (int)character;
296 			break;
297 		}
298 		if (ch1 == EOF || ch2 == EOF) break;
299 	}
300 	return EOF;
301 }
302 
303 int EXPORT_CALL
_lou_getALine(FileInfo * nested)304 _lou_getALine(FileInfo *nested) {
305 	/* Read a line of widechar's from an input file */
306 	int ch;
307 	int pch = 0;
308 	nested->linelen = 0;
309 	while ((ch = getAChar(nested)) != EOF) {
310 		if (ch == 13) continue;
311 		if (pch == '\\' && ch == 10) {
312 			nested->linelen--;
313 			pch = ch;
314 			continue;
315 		}
316 		if (ch == 10 || nested->linelen >= MAXSTRING - 1) break;
317 		nested->line[nested->linelen++] = (widechar)ch;
318 		pch = ch;
319 	}
320 	nested->line[nested->linelen] = 0;
321 	nested->linepos = 0;
322 	if (ch == EOF) return 0;
323 	nested->lineNumber++;
324 	return 1;
325 }
326 
327 static inline int
atEndOfLine(FileInfo * nested)328 atEndOfLine(FileInfo *nested) {
329 	return nested->linepos >= nested->linelen;
330 }
331 
332 static inline int
atTokenDelimiter(FileInfo * nested)333 atTokenDelimiter(FileInfo *nested) {
334 	return nested->line[nested->linepos] <= 32;
335 }
336 
337 static int
getToken(FileInfo * nested,CharsString * result,const char * description,int * lastToken)338 getToken(FileInfo *nested, CharsString *result, const char *description, int *lastToken) {
339 	/* Find the next string of contiguous non-whitespace characters. If this
340 	 * is the last token on the line, return 2 instead of 1. */
341 	while (!atEndOfLine(nested) && atTokenDelimiter(nested)) nested->linepos++;
342 	result->length = 0;
343 	while (!atEndOfLine(nested) && !atTokenDelimiter(nested)) {
344 		int maxlen = MAXSTRING;
345 		if (result->length >= maxlen) {
346 			compileError(nested, "more than %d characters (bytes)", maxlen);
347 			return 0;
348 		} else
349 			result->chars[result->length++] = nested->line[nested->linepos++];
350 	}
351 	if (!result->length) {
352 		/* Not enough tokens */
353 		if (description) compileError(nested, "%s not specified.", description);
354 		return 0;
355 	}
356 	result->chars[result->length] = 0;
357 	while (!atEndOfLine(nested) && atTokenDelimiter(nested)) nested->linepos++;
358 	return (*lastToken = atEndOfLine(nested)) ? 2 : 1;
359 }
360 
361 static void
compileError(FileInfo * nested,const char * format,...)362 compileError(FileInfo *nested, const char *format, ...) {
363 #ifndef __SYMBIAN32__
364 	char buffer[MAXSTRING];
365 	va_list arguments;
366 	va_start(arguments, format);
367 	vsnprintf(buffer, sizeof(buffer), format, arguments);
368 	va_end(arguments);
369 	if (nested)
370 		_lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", nested->fileName,
371 				nested->lineNumber, buffer);
372 	else
373 		_lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer);
374 	errorCount++;
375 #endif
376 }
377 
378 static void
compileWarning(FileInfo * nested,const char * format,...)379 compileWarning(FileInfo *nested, const char *format, ...) {
380 #ifndef __SYMBIAN32__
381 	char buffer[MAXSTRING];
382 	va_list arguments;
383 	va_start(arguments, format);
384 	vsnprintf(buffer, sizeof(buffer), format, arguments);
385 	va_end(arguments);
386 	if (nested)
387 		_lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", nested->fileName,
388 				nested->lineNumber, buffer);
389 	else
390 		_lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer);
391 	warningCount++;
392 #endif
393 }
394 
395 static int
allocateSpaceInTranslationTable(FileInfo * nested,TranslationTableOffset * offset,int count,TranslationTableHeader ** table)396 allocateSpaceInTranslationTable(FileInfo *nested, TranslationTableOffset *offset,
397 		int count, TranslationTableHeader **table) {
398 	/* allocate memory for table and expand previously allocated memory if necessary */
399 	int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
400 	TranslationTableOffset newSize = (*table)->bytesUsed + spaceNeeded;
401 	TranslationTableOffset size = (*table)->tableSize;
402 	if (newSize > size) {
403 		TranslationTableHeader *newTable;
404 		newSize += (newSize / OFFSETSIZE);
405 		newTable = realloc(*table, newSize);
406 		if (!newTable) {
407 			compileError(nested, "Not enough memory for translation table.");
408 			_lou_outOfMemory();
409 		}
410 		memset(((unsigned char *)newTable) + size, 0, newSize - size);
411 		/* update references to the old table */
412 		{
413 			TranslationTableChainEntry *entry;
414 			for (entry = translationTableChain; entry != NULL; entry = entry->next)
415 				if (entry->table == *table)
416 					entry->table = (TranslationTableHeader *)newTable;
417 		}
418 		*table = (TranslationTableHeader *)newTable;
419 		(*table)->tableSize = newSize;
420 	}
421 	if (offset != NULL) {
422 		*offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE;
423 		(*table)->bytesUsed += spaceNeeded;
424 	}
425 	return 1;
426 }
427 
428 static int
allocateSpaceInDisplayTable(FileInfo * nested,TranslationTableOffset * offset,int count,DisplayTableHeader ** table)429 allocateSpaceInDisplayTable(FileInfo *nested, TranslationTableOffset *offset, int count,
430 		DisplayTableHeader **table) {
431 	/* allocate memory for table and expand previously allocated memory if necessary */
432 	int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
433 	TranslationTableOffset newSize = (*table)->bytesUsed + spaceNeeded;
434 	TranslationTableOffset size = (*table)->tableSize;
435 	if (newSize > size) {
436 		DisplayTableHeader *newTable;
437 		newSize += (newSize / OFFSETSIZE);
438 		newTable = realloc(*table, newSize);
439 		if (!newTable) {
440 			compileError(nested, "Not enough memory for display table.");
441 			_lou_outOfMemory();
442 		}
443 		memset(((unsigned char *)newTable) + size, 0, newSize - size);
444 		/* update references to the old table */
445 		{
446 			DisplayTableChainEntry *entry;
447 			for (entry = displayTableChain; entry != NULL; entry = entry->next)
448 				if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable;
449 		}
450 		*table = (DisplayTableHeader *)newTable;
451 		(*table)->tableSize = newSize;
452 	}
453 	if (offset != NULL) {
454 		*offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE;
455 		(*table)->bytesUsed += spaceNeeded;
456 	}
457 	return 1;
458 }
459 
460 static int
allocateTranslationTable(FileInfo * nested,TranslationTableHeader ** table)461 allocateTranslationTable(FileInfo *nested, TranslationTableHeader **table) {
462 	/* Allocate memory for the table and a guess on the number of rules */
463 	const TranslationTableOffset startSize = 2 * sizeof(**table);
464 	if (*table) return 1;
465 	TranslationTableOffset bytesUsed =
466 			sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */
467 	if (!(*table = malloc(startSize))) {
468 		compileError(nested, "Not enough memory");
469 		if (*table != NULL) free(*table);
470 		*table = NULL;
471 		_lou_outOfMemory();
472 	}
473 	memset(*table, 0, startSize);
474 	(*table)->tableSize = startSize;
475 	(*table)->bytesUsed = bytesUsed;
476 	return 1;
477 }
478 
479 static int
allocateDisplayTable(FileInfo * nested,DisplayTableHeader ** table)480 allocateDisplayTable(FileInfo *nested, DisplayTableHeader **table) {
481 	/* Allocate memory for the table and a guess on the number of rules */
482 	const TranslationTableOffset startSize = 2 * sizeof(**table);
483 	if (*table) return 1;
484 	TranslationTableOffset bytesUsed =
485 			sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */
486 	if (!(*table = malloc(startSize))) {
487 		compileError(nested, "Not enough memory");
488 		if (*table != NULL) free(*table);
489 		*table = NULL;
490 		_lou_outOfMemory();
491 	}
492 	memset(*table, 0, startSize);
493 	(*table)->tableSize = startSize;
494 	(*table)->bytesUsed = bytesUsed;
495 	return 1;
496 }
497 
498 static TranslationTableCharacter *
compile_findCharOrDots(widechar c,int m,TranslationTableHeader * table)499 compile_findCharOrDots(widechar c, int m, TranslationTableHeader *table) {
500 	/* Look up a character or dot pattern. If m is 0 look up a character,
501 	 * otherwise look up a dot pattern. Although the algorithms are almost
502 	 * identical, different tables are needed for characters and dots because
503 	 * of the possibility of conflicts. */
504 	TranslationTableCharacter *character;
505 	TranslationTableOffset bucket;
506 	unsigned long int makeHash = _lou_charHash(c);
507 	if (m == 0)
508 		bucket = table->characters[makeHash];
509 	else
510 		bucket = table->dots[makeHash];
511 	while (bucket) {
512 		character = (TranslationTableCharacter *)&table->ruleArea[bucket];
513 		if (character->realchar == c) return character;
514 		bucket = character->next;
515 	}
516 	return NULL;
517 }
518 
519 static TranslationTableCharacter *
addCharOrDots(FileInfo * nested,widechar c,int m,TranslationTableHeader ** table)520 addCharOrDots(FileInfo *nested, widechar c, int m, TranslationTableHeader **table) {
521 	/* See if a character or dot pattern is in the appropriate table. If not,
522 	 * insert it. In either
523 	 * case, return a pointer to it. */
524 	TranslationTableOffset bucket;
525 	TranslationTableCharacter *character;
526 	TranslationTableCharacter *oldchar;
527 	TranslationTableOffset offset;
528 	unsigned long int makeHash;
529 	if ((character = compile_findCharOrDots(c, m, *table))) return character;
530 	if (!allocateSpaceInTranslationTable(nested, &offset, sizeof(*character), table))
531 		return NULL;
532 	character = (TranslationTableCharacter *)&(*table)->ruleArea[offset];
533 	memset(character, 0, sizeof(*character));
534 	character->realchar = c;
535 	makeHash = _lou_charHash(c);
536 	if (m == 0)
537 		bucket = (*table)->characters[makeHash];
538 	else
539 		bucket = (*table)->dots[makeHash];
540 	if (!bucket) {
541 		if (m == 0)
542 			(*table)->characters[makeHash] = offset;
543 		else
544 			(*table)->dots[makeHash] = offset;
545 	} else {
546 		oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[bucket];
547 		while (oldchar->next)
548 			oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next];
549 		oldchar->next = offset;
550 	}
551 	return character;
552 }
553 
554 static CharOrDots *
getCharOrDots(widechar c,int m,const DisplayTableHeader * table)555 getCharOrDots(widechar c, int m, const DisplayTableHeader *table) {
556 	CharOrDots *cdPtr;
557 	TranslationTableOffset bucket;
558 	unsigned long int makeHash = _lou_charHash(c);
559 	if (m == 0)
560 		bucket = table->charToDots[makeHash];
561 	else
562 		bucket = table->dotsToChar[makeHash];
563 	while (bucket) {
564 		cdPtr = (CharOrDots *)&table->ruleArea[bucket];
565 		if (cdPtr->lookFor == c) return cdPtr;
566 		bucket = cdPtr->next;
567 	}
568 	return NULL;
569 }
570 
571 widechar EXPORT_CALL
_lou_getDotsForChar(widechar c,const DisplayTableHeader * table)572 _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) {
573 	CharOrDots *cdPtr = getCharOrDots(c, 0, table);
574 	if (cdPtr) return cdPtr->found;
575 	return LOU_DOTS;
576 }
577 
578 widechar EXPORT_CALL
_lou_getCharFromDots(widechar d,const DisplayTableHeader * table)579 _lou_getCharFromDots(widechar d, const DisplayTableHeader *table) {
580 	CharOrDots *cdPtr = getCharOrDots(d, 1, table);
581 	if (cdPtr) return cdPtr->found;
582 	return '\0';
583 }
584 
585 static int
putCharAndDots(FileInfo * nested,widechar c,widechar d,DisplayTableHeader ** table)586 putCharAndDots(FileInfo *nested, widechar c, widechar d, DisplayTableHeader **table) {
587 	TranslationTableOffset bucket;
588 	CharOrDots *cdPtr;
589 	CharOrDots *oldcdPtr = NULL;
590 	TranslationTableOffset offset;
591 	unsigned long int makeHash;
592 	if (!(cdPtr = getCharOrDots(c, 0, *table))) {
593 		if (!allocateSpaceInDisplayTable(nested, &offset, sizeof(*cdPtr), table))
594 			return 0;
595 		cdPtr = (CharOrDots *)&(*table)->ruleArea[offset];
596 		cdPtr->next = 0;
597 		cdPtr->lookFor = c;
598 		cdPtr->found = d;
599 		makeHash = _lou_charHash(c);
600 		bucket = (*table)->charToDots[makeHash];
601 		if (!bucket)
602 			(*table)->charToDots[makeHash] = offset;
603 		else {
604 			oldcdPtr = (CharOrDots *)&(*table)->ruleArea[bucket];
605 			while (oldcdPtr->next)
606 				oldcdPtr = (CharOrDots *)&(*table)->ruleArea[oldcdPtr->next];
607 			oldcdPtr->next = offset;
608 		}
609 	}
610 	if (!(cdPtr = getCharOrDots(d, 1, *table))) {
611 		if (!allocateSpaceInDisplayTable(nested, &offset, sizeof(*cdPtr), table))
612 			return 0;
613 		cdPtr = (CharOrDots *)&(*table)->ruleArea[offset];
614 		cdPtr->next = 0;
615 		cdPtr->lookFor = d;
616 		cdPtr->found = c;
617 		makeHash = _lou_charHash(d);
618 		bucket = (*table)->dotsToChar[makeHash];
619 		if (!bucket)
620 			(*table)->dotsToChar[makeHash] = offset;
621 		else {
622 			oldcdPtr = (CharOrDots *)&(*table)->ruleArea[bucket];
623 			while (oldcdPtr->next)
624 				oldcdPtr = (CharOrDots *)&(*table)->ruleArea[oldcdPtr->next];
625 			oldcdPtr->next = offset;
626 		}
627 	}
628 	return 1;
629 }
630 
631 static inline const char *
getPartName(int actionPart)632 getPartName(int actionPart) {
633 	return actionPart ? "action" : "test";
634 }
635 
636 static int
passFindCharacters(FileInfo * nested,widechar * instructions,int end,widechar ** characters,int * length)637 passFindCharacters(FileInfo *nested, widechar *instructions, int end,
638 		widechar **characters, int *length) {
639 	int IC = 0;
640 	int lookback = 0;
641 
642 	*characters = NULL;
643 	*length = 0;
644 
645 	while (IC < end) {
646 		widechar instruction = instructions[IC];
647 
648 		switch (instruction) {
649 		case pass_string:
650 		case pass_dots: {
651 			int count = instructions[IC + 1];
652 			IC += 2;
653 			if (count > lookback) {
654 				*characters = &instructions[IC + lookback];
655 				*length = count - lookback;
656 				return 1;
657 			} else {
658 				lookback -= count;
659 			}
660 			IC += count;
661 			continue;
662 		}
663 
664 		case pass_attributes:
665 			IC += 5;
666 			if (instructions[IC - 2] == instructions[IC - 1] &&
667 					instructions[IC - 1] <= lookback) {
668 				lookback -= instructions[IC - 1];
669 				continue;
670 			}
671 			goto NO_CHARACTERS;
672 
673 		case pass_swap:
674 			IC += 2;
675 			/* fall through */
676 
677 		case pass_groupstart:
678 		case pass_groupend:
679 		case pass_groupreplace:
680 			IC += 3;
681 
682 		NO_CHARACTERS : { return 1; }
683 
684 		case pass_eq:
685 		case pass_lt:
686 		case pass_gt:
687 		case pass_lteq:
688 		case pass_gteq:
689 			IC += 3;
690 			continue;
691 
692 		case pass_lookback:
693 			lookback += instructions[IC + 1];
694 			IC += 2;
695 			continue;
696 
697 		case pass_not:
698 		case pass_startReplace:
699 		case pass_endReplace:
700 		case pass_first:
701 		case pass_last:
702 		case pass_copy:
703 		case pass_omit:
704 		case pass_plus:
705 		case pass_hyphen:
706 			IC += 1;
707 			continue;
708 
709 		case pass_endTest:
710 			goto NO_CHARACTERS;
711 
712 		default:
713 			compileError(nested, "unhandled test suboperand: \\x%02x", instruction);
714 			return 0;
715 		}
716 	}
717 	goto NO_CHARACTERS;
718 }
719 
720 /* The following functions are called by addRule to handle various cases. */
721 
722 static void
addForwardRuleWithSingleChar(FileInfo * nested,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader ** table)723 addForwardRuleWithSingleChar(FileInfo *nested, TranslationTableOffset newRuleOffset,
724 		TranslationTableRule *newRule, TranslationTableHeader **table) {
725 	/* direction = 0, newRule->charslen = 1 */
726 	TranslationTableRule *currentRule;
727 	TranslationTableOffset *currentOffsetPtr;
728 	TranslationTableCharacter *character;
729 	int m = 0;
730 	if (newRule->opcode == CTO_CompDots || newRule->opcode == CTO_Comp6) return;
731 	if (newRule->opcode >= CTO_Pass2 && newRule->opcode <= CTO_Pass4) m = 1;
732 	// get the character from the table, or if the character is not defined yet, define it
733 	// (without adding attributes)
734 	character = addCharOrDots(nested, newRule->charsdots[0], m, table);
735 	if (m != 1 && character->attributes & CTC_Letter &&
736 			(newRule->opcode == CTO_WholeWord || newRule->opcode == CTO_LargeSign)) {
737 		if ((*table)->noLetsignCount < LETSIGNSIZE)
738 			(*table)->noLetsign[(*table)->noLetsignCount++] = newRule->charsdots[0];
739 	}
740 	// if the new rule is a character definition rule, set the main definition rule of
741 	// this character to it
742 	// (possibly overwriting previous definition rules)
743 	// adding the attributes to the character has already been done elsewhere
744 	if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
745 		character->definitionRule = newRuleOffset;
746 	// add the new rule to the list of rules associated with this character
747 	// if the new rule is a character definition rule, it is inserted at the end of the
748 	// list
749 	// otherwise it is inserted before the first character definition rule
750 	currentOffsetPtr = &character->otherRules;
751 	while (*currentOffsetPtr) {
752 		currentRule = (TranslationTableRule *)&(*table)->ruleArea[*currentOffsetPtr];
753 		if (currentRule->charslen == 0) break;
754 		if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
755 			if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)) break;
756 		currentOffsetPtr = &currentRule->charsnext;
757 	}
758 	newRule->charsnext = *currentOffsetPtr;
759 	*currentOffsetPtr = newRuleOffset;
760 }
761 
762 static void
addForwardRuleWithMultipleChars(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)763 addForwardRuleWithMultipleChars(TranslationTableOffset newRuleOffset,
764 		TranslationTableRule *newRule, TranslationTableHeader *table) {
765 	/* direction = 0 newRule->charslen > 1 */
766 	TranslationTableRule *currentRule = NULL;
767 	TranslationTableOffset *currentOffsetPtr =
768 			&table->forRules[_lou_stringHash(&newRule->charsdots[0], 0, NULL)];
769 	while (*currentOffsetPtr) {
770 		currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
771 		if (newRule->charslen > currentRule->charslen) break;
772 		if (newRule->charslen == currentRule->charslen)
773 			if ((currentRule->opcode == CTO_Always) && (newRule->opcode != CTO_Always))
774 				break;
775 		currentOffsetPtr = &currentRule->charsnext;
776 	}
777 	newRule->charsnext = *currentOffsetPtr;
778 	*currentOffsetPtr = newRuleOffset;
779 }
780 
781 static void
addBackwardRuleWithSingleCell(FileInfo * nested,widechar cell,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader ** table)782 addBackwardRuleWithSingleCell(FileInfo *nested, widechar cell,
783 		TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
784 		TranslationTableHeader **table) {
785 	/* direction = 1, newRule->dotslen = 1 */
786 	TranslationTableRule *currentRule;
787 	TranslationTableOffset *currentOffsetPtr;
788 	TranslationTableCharacter *dots;
789 	if (newRule->opcode == CTO_SwapCc || newRule->opcode == CTO_Repeated)
790 		return; /* too ambiguous */
791 	// get the cell from the table, or if the cell is not defined yet, define it (without
792 	// adding attributes)
793 	dots = addCharOrDots(nested, cell, 1, table);
794 	if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
795 		dots->definitionRule = newRuleOffset;
796 	currentOffsetPtr = &dots->otherRules;
797 	while (*currentOffsetPtr) {
798 		currentRule = (TranslationTableRule *)&(*table)->ruleArea[*currentOffsetPtr];
799 		if (newRule->charslen > currentRule->charslen || currentRule->dotslen == 0) break;
800 		if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
801 			if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)) break;
802 		currentOffsetPtr = &currentRule->dotsnext;
803 	}
804 	newRule->dotsnext = *currentOffsetPtr;
805 	*currentOffsetPtr = newRuleOffset;
806 }
807 
808 static void
addBackwardRuleWithMultipleCells(widechar * cells,int count,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)809 addBackwardRuleWithMultipleCells(widechar *cells, int count,
810 		TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
811 		TranslationTableHeader *table) {
812 	/* direction = 1, newRule->dotslen > 1 */
813 	TranslationTableRule *currentRule = NULL;
814 	TranslationTableOffset *currentOffsetPtr =
815 			&table->backRules[_lou_stringHash(cells, 0, NULL)];
816 	if (newRule->opcode == CTO_SwapCc) return;
817 	while (*currentOffsetPtr) {
818 		int currentLength;
819 		int newLength;
820 		currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
821 		currentLength = currentRule->dotslen + currentRule->charslen;
822 		newLength = count + newRule->charslen;
823 		if (newLength > currentLength) break;
824 		if (currentLength == newLength)
825 			if ((currentRule->opcode == CTO_Always) && (newRule->opcode != CTO_Always))
826 				break;
827 		currentOffsetPtr = &currentRule->dotsnext;
828 	}
829 	newRule->dotsnext = *currentOffsetPtr;
830 	*currentOffsetPtr = newRuleOffset;
831 }
832 
833 static int
addForwardPassRule(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)834 addForwardPassRule(TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
835 		TranslationTableHeader *table) {
836 	TranslationTableOffset *currentOffsetPtr;
837 	TranslationTableRule *currentRule;
838 	switch (newRule->opcode) {
839 	case CTO_Correct:
840 		currentOffsetPtr = &table->forPassRules[0];
841 		break;
842 	case CTO_Context:
843 		currentOffsetPtr = &table->forPassRules[1];
844 		break;
845 	case CTO_Pass2:
846 		currentOffsetPtr = &table->forPassRules[2];
847 		break;
848 	case CTO_Pass3:
849 		currentOffsetPtr = &table->forPassRules[3];
850 		break;
851 	case CTO_Pass4:
852 		currentOffsetPtr = &table->forPassRules[4];
853 		break;
854 	default:
855 		return 0;
856 	}
857 	while (*currentOffsetPtr) {
858 		currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
859 		if (newRule->charslen > currentRule->charslen) break;
860 		currentOffsetPtr = &currentRule->charsnext;
861 	}
862 	newRule->charsnext = *currentOffsetPtr;
863 	*currentOffsetPtr = newRuleOffset;
864 	return 1;
865 }
866 
867 static int
addBackwardPassRule(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)868 addBackwardPassRule(TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
869 		TranslationTableHeader *table) {
870 	TranslationTableOffset *currentOffsetPtr;
871 	TranslationTableRule *currentRule;
872 	switch (newRule->opcode) {
873 	case CTO_Correct:
874 		currentOffsetPtr = &table->backPassRules[0];
875 		break;
876 	case CTO_Context:
877 		currentOffsetPtr = &table->backPassRules[1];
878 		break;
879 	case CTO_Pass2:
880 		currentOffsetPtr = &table->backPassRules[2];
881 		break;
882 	case CTO_Pass3:
883 		currentOffsetPtr = &table->backPassRules[3];
884 		break;
885 	case CTO_Pass4:
886 		currentOffsetPtr = &table->backPassRules[4];
887 		break;
888 	default:
889 		return 0;
890 	}
891 	while (*currentOffsetPtr) {
892 		currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
893 		if (newRule->charslen > currentRule->charslen) break;
894 		currentOffsetPtr = &currentRule->dotsnext;
895 	}
896 	newRule->dotsnext = *currentOffsetPtr;
897 	*currentOffsetPtr = newRuleOffset;
898 	return 1;
899 }
900 
901 static int
addRule(FileInfo * nested,TranslationTableOpcode opcode,CharsString * ruleChars,CharsString * ruleDots,TranslationTableCharacterAttributes after,TranslationTableCharacterAttributes before,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)902 addRule(FileInfo *nested, TranslationTableOpcode opcode, CharsString *ruleChars,
903 		CharsString *ruleDots, TranslationTableCharacterAttributes after,
904 		TranslationTableCharacterAttributes before, TranslationTableOffset *newRuleOffset,
905 		TranslationTableRule **newRule, int noback, int nofor,
906 		TranslationTableHeader **table) {
907 	/* Add a rule to the table, using the hash function to find the start of
908 	 * chains and chaining both the chars and dots strings */
909 	TranslationTableOffset ruleOffset;
910 	int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE);
911 	if (ruleChars) ruleSize += CHARSIZE * ruleChars->length;
912 	if (ruleDots) ruleSize += CHARSIZE * ruleDots->length;
913 	if (!allocateSpaceInTranslationTable(nested, &ruleOffset, ruleSize, table)) return 0;
914 	TranslationTableRule *rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
915 	if (newRule) *newRule = rule;
916 	if (newRuleOffset) *newRuleOffset = ruleOffset;
917 	rule->opcode = opcode;
918 	rule->after = after;
919 	rule->before = before;
920 	if (ruleChars)
921 		memcpy(&rule->charsdots[0], &ruleChars->chars[0],
922 				CHARSIZE * (rule->charslen = ruleChars->length));
923 	else
924 		rule->charslen = 0;
925 	if (ruleDots)
926 		memcpy(&rule->charsdots[rule->charslen], &ruleDots->chars[0],
927 				CHARSIZE * (rule->dotslen = ruleDots->length));
928 	else
929 		rule->dotslen = 0;
930 
931 	/* link new rule into table. */
932 	if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1;
933 	if (opcode >= CTO_Context && opcode <= CTO_Pass4)
934 		if (!(opcode == CTO_Context && rule->charslen > 0)) {
935 			if (!nofor)
936 				if (!addForwardPassRule(ruleOffset, rule, *table)) return 0;
937 			if (!noback)
938 				if (!addBackwardPassRule(ruleOffset, rule, *table)) return 0;
939 			return 1;
940 		}
941 	if (!nofor) {
942 		if (rule->charslen == 1)
943 			addForwardRuleWithSingleChar(nested, ruleOffset, rule, table);
944 		else if (rule->charslen > 1)
945 			addForwardRuleWithMultipleChars(ruleOffset, rule, *table);
946 	}
947 	if (!noback) {
948 		widechar *cells;
949 		int count;
950 
951 		if (rule->opcode == CTO_Context) {
952 			cells = &rule->charsdots[0];
953 			count = rule->charslen;
954 		} else {
955 			cells = &rule->charsdots[rule->charslen];
956 			count = rule->dotslen;
957 		}
958 
959 		if (count == 1)
960 			addBackwardRuleWithSingleCell(nested, *cells, ruleOffset, rule, table);
961 		else if (count > 1)
962 			addBackwardRuleWithMultipleCells(cells, count, ruleOffset, rule, *table);
963 	}
964 	return 1;
965 }
966 
967 static const CharacterClass *
findCharacterClass(const CharsString * name,const TranslationTableHeader * table)968 findCharacterClass(const CharsString *name, const TranslationTableHeader *table) {
969 	/* Find a character class, whether predefined or user-defined */
970 	const CharacterClass *class = table->characterClasses;
971 	while (class) {
972 		if ((name->length == class->length) &&
973 				(memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0))
974 			return class;
975 		class = class->next;
976 	}
977 	return NULL;
978 }
979 
980 static CharacterClass *
addCharacterClass(FileInfo * nested,const widechar * name,int length,TranslationTableHeader * table)981 addCharacterClass(FileInfo *nested, const widechar *name, int length,
982 		TranslationTableHeader *table) {
983 	/* Define a character class, Whether predefined or user-defined */
984 	CharacterClass **classes = &table->characterClasses;
985 	;
986 	TranslationTableCharacterAttributes *nextAttribute =
987 			&table->nextCharacterClassAttribute;
988 	CharacterClass *class;
989 	if (*nextAttribute) {
990 		if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1))))
991 			_lou_outOfMemory();
992 		else {
993 			memset(class, 0, sizeof(*class));
994 			memcpy(class->name, name, CHARSIZE * (class->length = length));
995 			class->attribute = *nextAttribute;
996 			if (*nextAttribute == CTC_Class4)
997 				*nextAttribute = CTC_UserDefined0;
998 			else if (*nextAttribute == CTC_UserDefined7)
999 				*nextAttribute = CTC_Class13;
1000 			else
1001 				*nextAttribute <<= 1;
1002 			class->next = *classes;
1003 			*classes = class;
1004 			return class;
1005 		}
1006 	}
1007 	compileError(nested, "character class table overflow.");
1008 	return NULL;
1009 }
1010 
1011 static void
deallocateCharacterClasses(TranslationTableHeader * table)1012 deallocateCharacterClasses(TranslationTableHeader *table) {
1013 	CharacterClass **classes = &table->characterClasses;
1014 	while (*classes) {
1015 		CharacterClass *class = *classes;
1016 		*classes = (*classes)->next;
1017 		if (class) free(class);
1018 	}
1019 }
1020 
1021 static int
allocateCharacterClasses(TranslationTableHeader * table)1022 allocateCharacterClasses(TranslationTableHeader *table) {
1023 	/* Allocate memory for predifined character classes */
1024 	int k = 0;
1025 	table->characterClasses = NULL;
1026 	table->nextCharacterClassAttribute = 1;
1027 	while (characterClassNames[k]) {
1028 		widechar wname[MAXSTRING];
1029 		int length = (int)strlen(characterClassNames[k]);
1030 		int kk;
1031 		for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk];
1032 		if (!addCharacterClass(NULL, wname, length, table)) {
1033 			deallocateCharacterClasses(table);
1034 			return 0;
1035 		}
1036 		k++;
1037 	}
1038 	return 1;
1039 }
1040 
1041 static TranslationTableOpcode
getOpcode(FileInfo * nested,const CharsString * token)1042 getOpcode(FileInfo *nested, const CharsString *token) {
1043 	static TranslationTableOpcode lastOpcode = 0;
1044 	TranslationTableOpcode opcode = lastOpcode;
1045 
1046 	do {
1047 		if (token->length == opcodeLengths[opcode])
1048 			if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0],
1049 						token->length)) {
1050 				lastOpcode = opcode;
1051 				return opcode;
1052 			}
1053 		opcode++;
1054 		if (opcode >= CTO_None) opcode = 0;
1055 	} while (opcode != lastOpcode);
1056 	compileError(nested, "opcode %s not defined.",
1057 			_lou_showString(&token->chars[0], token->length, 0));
1058 	return CTO_None;
1059 }
1060 
1061 TranslationTableOpcode EXPORT_CALL
_lou_findOpcodeNumber(const char * toFind)1062 _lou_findOpcodeNumber(const char *toFind) {
1063 	/* Used by tools such as lou_debug */
1064 	static TranslationTableOpcode lastOpcode = 0;
1065 	TranslationTableOpcode opcode = lastOpcode;
1066 	int length = (int)strlen(toFind);
1067 	do {
1068 		if (length == opcodeLengths[opcode] &&
1069 				strcasecmp(toFind, opcodeNames[opcode]) == 0) {
1070 			lastOpcode = opcode;
1071 			return opcode;
1072 		}
1073 		opcode++;
1074 		if (opcode >= CTO_None) opcode = 0;
1075 	} while (opcode != lastOpcode);
1076 	return CTO_None;
1077 }
1078 
1079 const char *EXPORT_CALL
_lou_findOpcodeName(TranslationTableOpcode opcode)1080 _lou_findOpcodeName(TranslationTableOpcode opcode) {
1081 	static char scratchBuf[MAXSTRING];
1082 	/* Used by tools such as lou_debug */
1083 	if (opcode < 0 || opcode >= CTO_None) {
1084 		sprintf(scratchBuf, "%u", opcode);
1085 		return scratchBuf;
1086 	}
1087 	return opcodeNames[opcode];
1088 }
1089 
1090 static widechar
hexValue(FileInfo * nested,const widechar * digits,int length)1091 hexValue(FileInfo *nested, const widechar *digits, int length) {
1092 	int k;
1093 	unsigned int binaryValue = 0;
1094 	for (k = 0; k < length; k++) {
1095 		unsigned int hexDigit = 0;
1096 		if (digits[k] >= '0' && digits[k] <= '9')
1097 			hexDigit = digits[k] - '0';
1098 		else if (digits[k] >= 'a' && digits[k] <= 'f')
1099 			hexDigit = digits[k] - 'a' + 10;
1100 		else if (digits[k] >= 'A' && digits[k] <= 'F')
1101 			hexDigit = digits[k] - 'A' + 10;
1102 		else {
1103 			compileError(nested, "invalid %d-digit hexadecimal number", length);
1104 			return (widechar)0xffffffff;
1105 		}
1106 		binaryValue |= hexDigit << (4 * (length - 1 - k));
1107 	}
1108 	return (widechar)binaryValue;
1109 }
1110 
1111 #define MAXBYTES 7
1112 static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC,
1113 	0XFE };
1114 
1115 static int
parseChars(FileInfo * nested,CharsString * result,CharsString * token)1116 parseChars(FileInfo *nested, CharsString *result, CharsString *token) {
1117 	int in = 0;
1118 	int out = 0;
1119 	int lastOutSize = 0;
1120 	int lastIn;
1121 	unsigned int ch = 0;
1122 	int numBytes = 0;
1123 	unsigned int utf32 = 0;
1124 	int k;
1125 	while (in < token->length) {
1126 		ch = token->chars[in++] & 0xff;
1127 		if (ch < 128) {
1128 			if (ch == '\\') { /* escape sequence */
1129 				switch (ch = token->chars[in]) {
1130 				case '\\':
1131 					break;
1132 				case 'e':
1133 					ch = 0x1b;
1134 					break;
1135 				case 'f':
1136 					ch = 12;
1137 					break;
1138 				case 'n':
1139 					ch = 10;
1140 					break;
1141 				case 'r':
1142 					ch = 13;
1143 					break;
1144 				case 's':
1145 					ch = ' ';
1146 					break;
1147 				case 't':
1148 					ch = 9;
1149 					break;
1150 				case 'v':
1151 					ch = 11;
1152 					break;
1153 				case 'w':
1154 					ch = LOU_ENDSEGMENT;
1155 					break;
1156 				case 34:
1157 					ch = QUOTESUB;
1158 					break;
1159 				case 'X':
1160 					compileWarning(nested, "\\Xhhhh (with a capital 'X') is deprecated.");
1161 				case 'x':
1162 					if (token->length - in > 4) {
1163 						ch = hexValue(nested, &token->chars[in + 1], 4);
1164 						in += 4;
1165 					}
1166 					break;
1167 				case 'Y':
1168 					compileWarning(
1169 							nested, "\\Yhhhhh (with a capital 'Y') is deprecated.");
1170 				case 'y':
1171 					if (CHARSIZE == 2) {
1172 					not32:
1173 						compileError(nested,
1174 								"liblouis has not been compiled for 32-bit Unicode");
1175 						break;
1176 					}
1177 					if (token->length - in > 5) {
1178 						ch = hexValue(nested, &token->chars[in + 1], 5);
1179 						in += 5;
1180 					}
1181 					break;
1182 				case 'Z':
1183 					compileWarning(
1184 							nested, "\\Zhhhhhhhh (with a capital 'Z') is deprecated.");
1185 				case 'z':
1186 					if (CHARSIZE == 2) goto not32;
1187 					if (token->length - in > 8) {
1188 						ch = hexValue(nested, &token->chars[in + 1], 8);
1189 						in += 8;
1190 					}
1191 					break;
1192 				default:
1193 					compileError(nested, "invalid escape sequence '\\%c'", ch);
1194 					break;
1195 				}
1196 				in++;
1197 			}
1198 			if (out >= MAXSTRING - 1) {
1199 				compileError(nested, "Token too long");
1200 				result->length = MAXSTRING - 1;
1201 				return 1;
1202 			}
1203 			result->chars[out++] = (widechar)ch;
1204 			continue;
1205 		}
1206 		lastOutSize = out;
1207 		lastIn = in;
1208 		for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--)
1209 			if (ch >= first0Bit[numBytes]) break;
1210 		utf32 = ch & (0XFF - first0Bit[numBytes]);
1211 		for (k = 0; k < numBytes; k++) {
1212 			if (in >= MAXSTRING - 1) break;
1213 			if (out >= MAXSTRING - 1) {
1214 				compileError(nested, "Token too long");
1215 				result->length = lastOutSize;
1216 				return 1;
1217 			}
1218 			if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) {
1219 				compileWarning(nested, "invalid UTF-8. Assuming Latin-1.");
1220 				result->chars[out++] = token->chars[lastIn];
1221 				in = lastIn + 1;
1222 				continue;
1223 			}
1224 			utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f);
1225 		}
1226 		if (out >= MAXSTRING - 1) {
1227 			compileError(nested, "Token too long");
1228 			result->length = lastOutSize;
1229 			return 1;
1230 		}
1231 		if (CHARSIZE == 2 && utf32 > 0xffff) utf32 = 0xffff;
1232 		result->chars[out++] = (widechar)utf32;
1233 	}
1234 	result->length = out;
1235 	return 1;
1236 }
1237 
1238 int EXPORT_CALL
_lou_extParseChars(const char * inString,widechar * outString)1239 _lou_extParseChars(const char *inString, widechar *outString) {
1240 	/* Parse external character strings */
1241 	CharsString wideIn;
1242 	CharsString result;
1243 	int k;
1244 	for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k];
1245 	wideIn.chars[k] = 0;
1246 	wideIn.length = k;
1247 	parseChars(NULL, &result, &wideIn);
1248 	if (errorCount) {
1249 		errorCount = 0;
1250 		return 0;
1251 	}
1252 	for (k = 0; k < result.length; k++) outString[k] = result.chars[k];
1253 	return result.length;
1254 }
1255 
1256 static int
parseDots(FileInfo * nested,CharsString * cells,const CharsString * token)1257 parseDots(FileInfo *nested, CharsString *cells, const CharsString *token) {
1258 	/* get dot patterns */
1259 	widechar cell = 0; /* assembly place for dots */
1260 	int cellCount = 0;
1261 	int index;
1262 	int start = 0;
1263 
1264 	for (index = 0; index < token->length; index++) {
1265 		int started = index != start;
1266 		widechar character = token->chars[index];
1267 		switch (character) { /* or dots to make up Braille cell */
1268 			{
1269 				int dot;
1270 			case '1':
1271 				dot = LOU_DOT_1;
1272 				goto haveDot;
1273 			case '2':
1274 				dot = LOU_DOT_2;
1275 				goto haveDot;
1276 			case '3':
1277 				dot = LOU_DOT_3;
1278 				goto haveDot;
1279 			case '4':
1280 				dot = LOU_DOT_4;
1281 				goto haveDot;
1282 			case '5':
1283 				dot = LOU_DOT_5;
1284 				goto haveDot;
1285 			case '6':
1286 				dot = LOU_DOT_6;
1287 				goto haveDot;
1288 			case '7':
1289 				dot = LOU_DOT_7;
1290 				goto haveDot;
1291 			case '8':
1292 				dot = LOU_DOT_8;
1293 				goto haveDot;
1294 			case '9':
1295 				dot = LOU_DOT_9;
1296 				goto haveDot;
1297 			case 'a':
1298 			case 'A':
1299 				dot = LOU_DOT_10;
1300 				goto haveDot;
1301 			case 'b':
1302 			case 'B':
1303 				dot = LOU_DOT_11;
1304 				goto haveDot;
1305 			case 'c':
1306 			case 'C':
1307 				dot = LOU_DOT_12;
1308 				goto haveDot;
1309 			case 'd':
1310 			case 'D':
1311 				dot = LOU_DOT_13;
1312 				goto haveDot;
1313 			case 'e':
1314 			case 'E':
1315 				dot = LOU_DOT_14;
1316 				goto haveDot;
1317 			case 'f':
1318 			case 'F':
1319 				dot = LOU_DOT_15;
1320 			haveDot:
1321 				if (started && !cell) goto invalid;
1322 				if (cell & dot) {
1323 					compileError(nested, "dot specified more than once.");
1324 					return 0;
1325 				}
1326 				cell |= dot;
1327 				break;
1328 			}
1329 		case '0': /* blank */
1330 			if (started) goto invalid;
1331 			break;
1332 		case '-': /* got all dots for this cell */
1333 			if (!started) {
1334 				compileError(nested, "missing cell specification.");
1335 				return 0;
1336 			}
1337 			cells->chars[cellCount++] = cell | LOU_DOTS;
1338 			cell = 0;
1339 			start = index + 1;
1340 			break;
1341 		default:
1342 		invalid:
1343 			compileError(
1344 					nested, "invalid dot number %s.", _lou_showString(&character, 1, 0));
1345 			return 0;
1346 		}
1347 	}
1348 	if (index == start) {
1349 		compileError(nested, "missing cell specification.");
1350 		return 0;
1351 	}
1352 	cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */
1353 	cells->length = cellCount;
1354 	return 1;
1355 }
1356 
1357 int EXPORT_CALL
_lou_extParseDots(const char * inString,widechar * outString)1358 _lou_extParseDots(const char *inString, widechar *outString) {
1359 	/* Parse external dot patterns */
1360 	CharsString wideIn;
1361 	CharsString result;
1362 	int k;
1363 	for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k];
1364 	wideIn.chars[k] = 0;
1365 	wideIn.length = k;
1366 	parseDots(NULL, &result, &wideIn);
1367 	if (errorCount) {
1368 		errorCount = 0;
1369 		return 0;
1370 	}
1371 	for (k = 0; k < result.length; k++) outString[k] = result.chars[k];
1372 	outString[k] = 0;
1373 	return result.length;
1374 }
1375 
1376 static int
getCharacters(FileInfo * nested,CharsString * characters,int * lastToken)1377 getCharacters(FileInfo *nested, CharsString *characters, int *lastToken) {
1378 	/* Get ruleChars string */
1379 	CharsString token;
1380 	if (getToken(nested, &token, "characters", lastToken))
1381 		if (parseChars(nested, characters, &token)) return 1;
1382 	return 0;
1383 }
1384 
1385 static int
getRuleCharsText(FileInfo * nested,CharsString * ruleChars,int * lastToken)1386 getRuleCharsText(FileInfo *nested, CharsString *ruleChars, int *lastToken) {
1387 	CharsString token;
1388 	if (getToken(nested, &token, "Characters operand", lastToken))
1389 		if (parseChars(nested, ruleChars, &token)) return 1;
1390 	return 0;
1391 }
1392 
1393 static int
getRuleDotsText(FileInfo * nested,CharsString * ruleDots,int * lastToken)1394 getRuleDotsText(FileInfo *nested, CharsString *ruleDots, int *lastToken) {
1395 	CharsString token;
1396 	if (getToken(nested, &token, "characters", lastToken))
1397 		if (parseChars(nested, ruleDots, &token)) return 1;
1398 	return 0;
1399 }
1400 
1401 static int
getRuleDotsPattern(FileInfo * nested,CharsString * ruleDots,int * lastToken)1402 getRuleDotsPattern(FileInfo *nested, CharsString *ruleDots, int *lastToken) {
1403 	/* Interpret the dets operand */
1404 	CharsString token;
1405 	if (getToken(nested, &token, "Dots operand", lastToken)) {
1406 		if (token.length == 1 && token.chars[0] == '=') {
1407 			ruleDots->length = 0;
1408 			return 1;
1409 		}
1410 		if (parseDots(nested, ruleDots, &token)) return 1;
1411 	}
1412 	return 0;
1413 }
1414 
1415 static int
getCharacterClass(FileInfo * nested,const CharacterClass ** class,const TranslationTableHeader * table,int * lastToken)1416 getCharacterClass(FileInfo *nested, const CharacterClass **class,
1417 		const TranslationTableHeader *table, int *lastToken) {
1418 	CharsString token;
1419 	if (getToken(nested, &token, "character class name", lastToken)) {
1420 		if ((*class = findCharacterClass(&token, table))) return 1;
1421 		compileError(nested, "character class not defined.");
1422 	}
1423 	return 0;
1424 }
1425 
1426 static int
1427 includeFile(FileInfo *nested, CharsString *includedFile, TranslationTableHeader **table,
1428 		DisplayTableHeader **displayTable);
1429 
1430 static TranslationTableOffset
findRuleName(const CharsString * name,const TranslationTableHeader * table)1431 findRuleName(const CharsString *name, const TranslationTableHeader *table) {
1432 	const RuleName *nameRule = table->ruleNames;
1433 	while (nameRule) {
1434 		if ((name->length == nameRule->length) &&
1435 				(memcmp(&name->chars[0], nameRule->name, CHARSIZE * name->length) == 0))
1436 			return nameRule->ruleOffset;
1437 		nameRule = nameRule->next;
1438 	}
1439 	return 0;
1440 }
1441 
1442 static int
addRuleName(FileInfo * nested,CharsString * name,TranslationTableOffset newRuleOffset,TranslationTableHeader * table)1443 addRuleName(FileInfo *nested, CharsString *name, TranslationTableOffset newRuleOffset,
1444 		TranslationTableHeader *table) {
1445 	int k;
1446 	RuleName *nameRule;
1447 	if (!(nameRule = malloc(sizeof(*nameRule) + CHARSIZE * (name->length - 1)))) {
1448 		compileError(nested, "not enough memory");
1449 		_lou_outOfMemory();
1450 	}
1451 	memset(nameRule, 0, sizeof(*nameRule));
1452 	// a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z'
1453 	for (k = 0; k < name->length; k++) {
1454 		widechar c = name->chars[k];
1455 		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
1456 			nameRule->name[k] = c;
1457 		else {
1458 			compileError(nested, "a name may contain only letters");
1459 			return 0;
1460 		}
1461 	}
1462 	nameRule->length = name->length;
1463 	nameRule->ruleOffset = newRuleOffset;
1464 	nameRule->next = table->ruleNames;
1465 	table->ruleNames = nameRule;
1466 	return 1;
1467 }
1468 
1469 static void
deallocateRuleNames(TranslationTableHeader * table)1470 deallocateRuleNames(TranslationTableHeader *table) {
1471 	RuleName **ruleNames = &table->ruleNames;
1472 	while (*ruleNames) {
1473 		RuleName *nameRule = *ruleNames;
1474 		*ruleNames = nameRule->next;
1475 		if (nameRule) free(nameRule);
1476 	}
1477 }
1478 
1479 static int
compileSwapDots(FileInfo * nested,CharsString * source,CharsString * dest)1480 compileSwapDots(FileInfo *nested, CharsString *source, CharsString *dest) {
1481 	int k = 0;
1482 	int kk = 0;
1483 	CharsString dotsSource;
1484 	CharsString dotsDest;
1485 	dest->length = 0;
1486 	dotsSource.length = 0;
1487 	while (k <= source->length) {
1488 		if (source->chars[k] != ',' && k != source->length)
1489 			dotsSource.chars[dotsSource.length++] = source->chars[k];
1490 		else {
1491 			if (!parseDots(nested, &dotsDest, &dotsSource)) return 0;
1492 			dest->chars[dest->length++] = dotsDest.length + 1;
1493 			for (kk = 0; kk < dotsDest.length; kk++)
1494 				dest->chars[dest->length++] = dotsDest.chars[kk];
1495 			dotsSource.length = 0;
1496 		}
1497 		k++;
1498 	}
1499 	return 1;
1500 }
1501 
1502 static int
compileSwap(FileInfo * nested,TranslationTableOpcode opcode,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)1503 compileSwap(FileInfo *nested, TranslationTableOpcode opcode, int *lastToken,
1504 		TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
1505 		int nofor, TranslationTableHeader **table) {
1506 	CharsString ruleChars;
1507 	CharsString ruleDots;
1508 	CharsString name;
1509 	CharsString matches;
1510 	CharsString replacements;
1511 	TranslationTableOffset ruleOffset;
1512 	if (!getToken(nested, &name, "name operand", lastToken)) return 0;
1513 	if (!getToken(nested, &matches, "matches operand", lastToken)) return 0;
1514 	if (!getToken(nested, &replacements, "replacements operand", lastToken)) return 0;
1515 	if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) {
1516 		if (!parseChars(nested, &ruleChars, &matches)) return 0;
1517 	} else {
1518 		if (!compileSwapDots(nested, &matches, &ruleChars)) return 0;
1519 	}
1520 	if (opcode == CTO_SwapCc) {
1521 		if (!parseChars(nested, &ruleDots, &replacements)) return 0;
1522 	} else {
1523 		if (!compileSwapDots(nested, &replacements, &ruleDots)) return 0;
1524 	}
1525 	if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, newRule,
1526 				noback, nofor, table))
1527 		return 0;
1528 	if (!addRuleName(nested, &name, ruleOffset, *table)) return 0;
1529 	if (newRuleOffset) *newRuleOffset = ruleOffset;
1530 	return 1;
1531 }
1532 
1533 static int
getNumber(widechar * source,widechar * dest)1534 getNumber(widechar *source, widechar *dest) {
1535 	/* Convert a string of wide character digits to an integer */
1536 	int k = 0;
1537 	*dest = 0;
1538 	while (source[k] >= '0' && source[k] <= '9') *dest = 10 * *dest + (source[k++] - '0');
1539 	return k;
1540 }
1541 
1542 /* Start of multipass compiler */
1543 
1544 static int
passGetAttributes(CharsString * passLine,int * passLinepos,TranslationTableCharacterAttributes * passAttributes,FileInfo * passNested)1545 passGetAttributes(CharsString *passLine, int *passLinepos,
1546 		TranslationTableCharacterAttributes *passAttributes, FileInfo *passNested) {
1547 	int more = 1;
1548 	*passAttributes = 0;
1549 	while (more) {
1550 		switch (passLine->chars[*passLinepos]) {
1551 		case pass_any:
1552 			*passAttributes = 0xffffffff;
1553 			break;
1554 		case pass_digit:
1555 			*passAttributes |= CTC_Digit;
1556 			break;
1557 		case pass_litDigit:
1558 			*passAttributes |= CTC_LitDigit;
1559 			break;
1560 		case pass_letter:
1561 			*passAttributes |= CTC_Letter;
1562 			break;
1563 		case pass_math:
1564 			*passAttributes |= CTC_Math;
1565 			break;
1566 		case pass_punctuation:
1567 			*passAttributes |= CTC_Punctuation;
1568 			break;
1569 		case pass_sign:
1570 			*passAttributes |= CTC_Sign;
1571 			break;
1572 		case pass_space:
1573 			*passAttributes |= CTC_Space;
1574 			break;
1575 		case pass_uppercase:
1576 			*passAttributes |= CTC_UpperCase;
1577 			break;
1578 		case pass_lowercase:
1579 			*passAttributes |= CTC_LowerCase;
1580 			break;
1581 		case pass_class1:
1582 			*passAttributes |= CTC_Class1;
1583 			break;
1584 		case pass_class2:
1585 			*passAttributes |= CTC_Class2;
1586 			break;
1587 		case pass_class3:
1588 			*passAttributes |= CTC_Class3;
1589 			break;
1590 		case pass_class4:
1591 			*passAttributes |= CTC_Class4;
1592 			break;
1593 		default:
1594 			more = 0;
1595 			break;
1596 		}
1597 		if (more) (*passLinepos)++;
1598 	}
1599 	if (!*passAttributes) {
1600 		compileError(passNested, "missing attribute");
1601 		(*passLinepos)--;
1602 		return 0;
1603 	}
1604 	return 1;
1605 }
1606 
1607 static int
passGetDots(CharsString * passLine,int * passLinepos,CharsString * passHoldString,FileInfo * passNested)1608 passGetDots(CharsString *passLine, int *passLinepos, CharsString *passHoldString,
1609 		FileInfo *passNested) {
1610 	CharsString collectDots;
1611 	collectDots.length = 0;
1612 	while (*passLinepos < passLine->length &&
1613 			(passLine->chars[*passLinepos] == '-' ||
1614 					(passLine->chars[*passLinepos] >= '0' &&
1615 							passLine->chars[*passLinepos] <= '9') ||
1616 					((passLine->chars[*passLinepos] | 32) >= 'a' &&
1617 							(passLine->chars[*passLinepos] | 32) <= 'f')))
1618 		collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++];
1619 	if (!parseDots(passNested, passHoldString, &collectDots)) return 0;
1620 	return 1;
1621 }
1622 
1623 static int
passGetString(CharsString * passLine,int * passLinepos,CharsString * passHoldString,FileInfo * passNested)1624 passGetString(CharsString *passLine, int *passLinepos, CharsString *passHoldString,
1625 		FileInfo *passNested) {
1626 	passHoldString->length = 0;
1627 	while (1) {
1628 		if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) {
1629 			compileError(passNested, "unterminated string");
1630 			return 0;
1631 		}
1632 		if (passLine->chars[*passLinepos] == 34) break;
1633 		if (passLine->chars[*passLinepos] == QUOTESUB)
1634 			passHoldString->chars[passHoldString->length++] = 34;
1635 		else
1636 			passHoldString->chars[passHoldString->length++] =
1637 					passLine->chars[*passLinepos];
1638 		(*passLinepos)++;
1639 	}
1640 	passHoldString->chars[passHoldString->length] = 0;
1641 	(*passLinepos)++;
1642 	return 1;
1643 }
1644 
1645 static int
passGetNumber(CharsString * passLine,int * passLinepos,widechar * passHoldNumber)1646 passGetNumber(CharsString *passLine, int *passLinepos, widechar *passHoldNumber) {
1647 	/* Convert a string of wide character digits to an integer */
1648 	*passHoldNumber = 0;
1649 	while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') &&
1650 			(passLine->chars[*passLinepos] <= '9'))
1651 		*passHoldNumber =
1652 				10 * (*passHoldNumber) + (passLine->chars[(*passLinepos)++] - '0');
1653 	return 1;
1654 }
1655 
1656 static int
passGetVariableNumber(FileInfo * nested,CharsString * passLine,int * passLinepos,widechar * passHoldNumber)1657 passGetVariableNumber(FileInfo *nested, CharsString *passLine, int *passLinepos,
1658 		widechar *passHoldNumber) {
1659 	if (!passGetNumber(passLine, passLinepos, passHoldNumber)) return 0;
1660 	if ((*passHoldNumber >= 0) && (*passHoldNumber < NUMVAR)) return 1;
1661 	compileError(nested, "variable number out of range");
1662 	return 0;
1663 }
1664 
1665 static int
passGetName(CharsString * passLine,int * passLinepos,CharsString * passHoldString)1666 passGetName(CharsString *passLine, int *passLinepos, CharsString *passHoldString) {
1667 	passHoldString->length = 0;
1668 	// a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z'
1669 	do {
1670 		widechar c = passLine->chars[*passLinepos];
1671 		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
1672 			passHoldString->chars[passHoldString->length++] = c;
1673 			(*passLinepos)++;
1674 		} else {
1675 			break;
1676 		}
1677 	} while (*passLinepos < passLine->length);
1678 	return 1;
1679 }
1680 
1681 static inline int
wantsString(TranslationTableOpcode opcode,int actionPart,int nofor)1682 wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) {
1683 	if (opcode == CTO_Correct) return 1;
1684 	if (opcode != CTO_Context) return 0;
1685 	return !nofor == !actionPart;
1686 }
1687 
1688 static int
verifyStringOrDots(FileInfo * nested,TranslationTableOpcode opcode,int isString,int actionPart,int nofor)1689 verifyStringOrDots(FileInfo *nested, TranslationTableOpcode opcode, int isString,
1690 		int actionPart, int nofor) {
1691 	if (!wantsString(opcode, actionPart, nofor) == !isString) return 1;
1692 
1693 	compileError(nested, "%s are not allowed in the %s part of a %s translation %s rule.",
1694 			isString ? "strings" : "dots", getPartName(actionPart),
1695 			nofor ? "backward" : "forward", _lou_findOpcodeName(opcode));
1696 
1697 	return 0;
1698 }
1699 
1700 static int
compilePassOpcode(FileInfo * nested,TranslationTableOpcode opcode,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)1701 compilePassOpcode(FileInfo *nested, TranslationTableOpcode opcode,
1702 		TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
1703 		int nofor, TranslationTableHeader **table) {
1704 	static CharsString passRuleChars;
1705 	static CharsString passRuleDots;
1706 	/* Compile the operands of a pass opcode */
1707 	widechar passSubOp;
1708 	const CharacterClass *class;
1709 	TranslationTableOffset ruleOffset = 0;
1710 	TranslationTableRule *rule = NULL;
1711 	int k;
1712 	int kk = 0;
1713 	int endTest = 0;
1714 	widechar *passInstructions = passRuleDots.chars;
1715 	int passIC = 0; /* Instruction counter */
1716 	passRuleChars.length = 0;
1717 	FileInfo *passNested = nested;
1718 	CharsString passHoldString;
1719 	widechar passHoldNumber;
1720 	CharsString passLine;
1721 	int passLinepos = 0;
1722 	TranslationTableCharacterAttributes passAttributes;
1723 	passHoldString.length = 0;
1724 	for (k = nested->linepos; k < nested->linelen; k++)
1725 		passHoldString.chars[passHoldString.length++] = nested->line[k];
1726 #define SEPCHAR 0x0001
1727 	for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++)
1728 		;
1729 	if (k < passHoldString.length)
1730 		passHoldString.chars[k] = SEPCHAR;
1731 	else {
1732 		compileError(passNested, "Invalid multipass operands");
1733 		return 0;
1734 	}
1735 	parseChars(passNested, &passLine, &passHoldString);
1736 	/* Compile test part */
1737 	for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++)
1738 		;
1739 	endTest = k;
1740 	passLine.chars[endTest] = pass_endTest;
1741 	passLinepos = 0;
1742 	while (passLinepos <= endTest) {
1743 		if (passIC >= MAXSTRING) {
1744 			compileError(passNested, "Test part in multipass operand too long");
1745 			return 0;
1746 		}
1747 		switch ((passSubOp = passLine.chars[passLinepos])) {
1748 		case pass_lookback:
1749 			passInstructions[passIC++] = pass_lookback;
1750 			passLinepos++;
1751 			passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1752 			if (passHoldNumber == 0) passHoldNumber = 1;
1753 			passInstructions[passIC++] = passHoldNumber;
1754 			break;
1755 		case pass_not:
1756 			passInstructions[passIC++] = pass_not;
1757 			passLinepos++;
1758 			break;
1759 		case pass_first:
1760 			passInstructions[passIC++] = pass_first;
1761 			passLinepos++;
1762 			break;
1763 		case pass_last:
1764 			passInstructions[passIC++] = pass_last;
1765 			passLinepos++;
1766 			break;
1767 		case pass_search:
1768 			passInstructions[passIC++] = pass_search;
1769 			passLinepos++;
1770 			break;
1771 		case pass_string:
1772 			if (!verifyStringOrDots(nested, opcode, 1, 0, nofor)) {
1773 				return 0;
1774 			}
1775 			passLinepos++;
1776 			passInstructions[passIC++] = pass_string;
1777 			passGetString(&passLine, &passLinepos, &passHoldString, passNested);
1778 			goto testDoCharsDots;
1779 		case pass_dots:
1780 			if (!verifyStringOrDots(nested, opcode, 0, 0, nofor)) {
1781 				return 0;
1782 			}
1783 			passLinepos++;
1784 			passInstructions[passIC++] = pass_dots;
1785 			passGetDots(&passLine, &passLinepos, &passHoldString, passNested);
1786 		testDoCharsDots:
1787 			if (passHoldString.length == 0) return 0;
1788 			if (passIC >= MAXSTRING) {
1789 				compileError(passNested,
1790 						"@ operand in test part of multipass operand too long");
1791 				return 0;
1792 			}
1793 			passInstructions[passIC++] = passHoldString.length;
1794 			for (kk = 0; kk < passHoldString.length; kk++) {
1795 				if (passIC >= MAXSTRING) {
1796 					compileError(passNested,
1797 							"@ operand in test part of multipass operand too long");
1798 					return 0;
1799 				}
1800 				passInstructions[passIC++] = passHoldString.chars[kk];
1801 			}
1802 			break;
1803 		case pass_startReplace:
1804 			passInstructions[passIC++] = pass_startReplace;
1805 			passLinepos++;
1806 			break;
1807 		case pass_endReplace:
1808 			passInstructions[passIC++] = pass_endReplace;
1809 			passLinepos++;
1810 			break;
1811 		case pass_variable:
1812 			passLinepos++;
1813 			if (!passGetVariableNumber(nested, &passLine, &passLinepos, &passHoldNumber))
1814 				return 0;
1815 			switch (passLine.chars[passLinepos]) {
1816 			case pass_eq:
1817 				passInstructions[passIC++] = pass_eq;
1818 				goto doComp;
1819 			case pass_lt:
1820 				if (passLine.chars[passLinepos + 1] == pass_eq) {
1821 					passLinepos++;
1822 					passInstructions[passIC++] = pass_lteq;
1823 				} else
1824 					passInstructions[passIC++] = pass_lt;
1825 				goto doComp;
1826 			case pass_gt:
1827 				if (passLine.chars[passLinepos + 1] == pass_eq) {
1828 					passLinepos++;
1829 					passInstructions[passIC++] = pass_gteq;
1830 				} else
1831 					passInstructions[passIC++] = pass_gt;
1832 			doComp:
1833 				passInstructions[passIC++] = passHoldNumber;
1834 				passLinepos++;
1835 				passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1836 				passInstructions[passIC++] = passHoldNumber;
1837 				break;
1838 			default:
1839 				compileError(passNested, "incorrect comparison operator");
1840 				return 0;
1841 			}
1842 			break;
1843 		case pass_attributes:
1844 			passLinepos++;
1845 			if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, passNested))
1846 				return 0;
1847 		insertAttributes:
1848 			passInstructions[passIC++] = pass_attributes;
1849 			passInstructions[passIC++] = passAttributes >> 16;
1850 			passInstructions[passIC++] = passAttributes & 0xffff;
1851 		getRange:
1852 			if (passLine.chars[passLinepos] == pass_until) {
1853 				passLinepos++;
1854 				passInstructions[passIC++] = 1;
1855 				passInstructions[passIC++] = 0xffff;
1856 				break;
1857 			}
1858 			passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1859 			if (passHoldNumber == 0) {
1860 				passHoldNumber = passInstructions[passIC++] = 1;
1861 				passInstructions[passIC++] = 1; /* This is not an error */
1862 				break;
1863 			}
1864 			passInstructions[passIC++] = passHoldNumber;
1865 			if (passLine.chars[passLinepos] != pass_hyphen) {
1866 				passInstructions[passIC++] = passHoldNumber;
1867 				break;
1868 			}
1869 			passLinepos++;
1870 			passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1871 			if (passHoldNumber == 0) {
1872 				compileError(passNested, "invalid range");
1873 				return 0;
1874 			}
1875 			passInstructions[passIC++] = passHoldNumber;
1876 			break;
1877 		case pass_groupstart:
1878 		case pass_groupend:
1879 			passLinepos++;
1880 			passGetName(&passLine, &passLinepos, &passHoldString);
1881 			ruleOffset = findRuleName(&passHoldString, *table);
1882 			if (ruleOffset)
1883 				rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
1884 			if (rule && rule->opcode == CTO_Grouping) {
1885 				passInstructions[passIC++] = passSubOp;
1886 				passInstructions[passIC++] = ruleOffset >> 16;
1887 				passInstructions[passIC++] = ruleOffset & 0xffff;
1888 				break;
1889 			} else {
1890 				compileError(passNested, "%s is not a grouping name",
1891 						_lou_showString(
1892 								&passHoldString.chars[0], passHoldString.length, 0));
1893 				return 0;
1894 			}
1895 			break;
1896 		case pass_swap:
1897 			passLinepos++;
1898 			passGetName(&passLine, &passLinepos, &passHoldString);
1899 			if ((class = findCharacterClass(&passHoldString, *table))) {
1900 				passAttributes = class->attribute;
1901 				goto insertAttributes;
1902 			}
1903 			ruleOffset = findRuleName(&passHoldString, *table);
1904 			if (ruleOffset)
1905 				rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
1906 			if (rule &&
1907 					(rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd ||
1908 							rule->opcode == CTO_SwapDd)) {
1909 				passInstructions[passIC++] = pass_swap;
1910 				passInstructions[passIC++] = ruleOffset >> 16;
1911 				passInstructions[passIC++] = ruleOffset & 0xffff;
1912 				goto getRange;
1913 			}
1914 			compileError(passNested, "%s is neither a class name nor a swap name.",
1915 					_lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
1916 			return 0;
1917 		case pass_endTest:
1918 			passInstructions[passIC++] = pass_endTest;
1919 			passLinepos++;
1920 			break;
1921 		default:
1922 			compileError(passNested, "incorrect operator '%c ' in test part",
1923 					passLine.chars[passLinepos]);
1924 			return 0;
1925 		}
1926 
1927 	} /* Compile action part */
1928 
1929 	/* Compile action part */
1930 	while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32)
1931 		passLinepos++;
1932 	while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) {
1933 		if (passIC >= MAXSTRING) {
1934 			compileError(passNested, "Action part in multipass operand too long");
1935 			return 0;
1936 		}
1937 		switch ((passSubOp = passLine.chars[passLinepos])) {
1938 		case pass_string:
1939 			if (!verifyStringOrDots(nested, opcode, 1, 1, nofor)) {
1940 				return 0;
1941 			}
1942 			passLinepos++;
1943 			passInstructions[passIC++] = pass_string;
1944 			passGetString(&passLine, &passLinepos, &passHoldString, passNested);
1945 			goto actionDoCharsDots;
1946 		case pass_dots:
1947 			if (!verifyStringOrDots(nested, opcode, 0, 1, nofor)) {
1948 				return 0;
1949 			}
1950 			passLinepos++;
1951 			passGetDots(&passLine, &passLinepos, &passHoldString, passNested);
1952 			passInstructions[passIC++] = pass_dots;
1953 		actionDoCharsDots:
1954 			if (passHoldString.length == 0) return 0;
1955 			if (passIC >= MAXSTRING) {
1956 				compileError(passNested,
1957 						"@ operand in action part of multipass operand too long");
1958 				return 0;
1959 			}
1960 			passInstructions[passIC++] = passHoldString.length;
1961 			for (kk = 0; kk < passHoldString.length; kk++) {
1962 				if (passIC >= MAXSTRING) {
1963 					compileError(passNested,
1964 							"@ operand in action part of multipass operand too long");
1965 					return 0;
1966 				}
1967 				passInstructions[passIC++] = passHoldString.chars[kk];
1968 			}
1969 			break;
1970 		case pass_variable:
1971 			passLinepos++;
1972 			if (!passGetVariableNumber(nested, &passLine, &passLinepos, &passHoldNumber))
1973 				return 0;
1974 			switch (passLine.chars[passLinepos]) {
1975 			case pass_eq:
1976 				passInstructions[passIC++] = pass_eq;
1977 				passInstructions[passIC++] = passHoldNumber;
1978 				passLinepos++;
1979 				passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1980 				passInstructions[passIC++] = passHoldNumber;
1981 				break;
1982 			case pass_plus:
1983 			case pass_hyphen:
1984 				passInstructions[passIC++] = passLine.chars[passLinepos++];
1985 				passInstructions[passIC++] = passHoldNumber;
1986 				break;
1987 			default:
1988 				compileError(passNested, "incorrect variable operator in action part");
1989 				return 0;
1990 			}
1991 			break;
1992 		case pass_copy:
1993 			passInstructions[passIC++] = pass_copy;
1994 			passLinepos++;
1995 			break;
1996 		case pass_omit:
1997 			passInstructions[passIC++] = pass_omit;
1998 			passLinepos++;
1999 			break;
2000 		case pass_groupreplace:
2001 		case pass_groupstart:
2002 		case pass_groupend:
2003 			passLinepos++;
2004 			passGetName(&passLine, &passLinepos, &passHoldString);
2005 			ruleOffset = findRuleName(&passHoldString, *table);
2006 			if (ruleOffset)
2007 				rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2008 			if (rule && rule->opcode == CTO_Grouping) {
2009 				passInstructions[passIC++] = passSubOp;
2010 				passInstructions[passIC++] = ruleOffset >> 16;
2011 				passInstructions[passIC++] = ruleOffset & 0xffff;
2012 				break;
2013 			}
2014 			compileError(passNested, "%s is not a grouping name",
2015 					_lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
2016 			return 0;
2017 		case pass_swap:
2018 			passLinepos++;
2019 			passGetName(&passLine, &passLinepos, &passHoldString);
2020 			ruleOffset = findRuleName(&passHoldString, *table);
2021 			if (ruleOffset)
2022 				rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2023 			if (rule &&
2024 					(rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd ||
2025 							rule->opcode == CTO_SwapDd)) {
2026 				passInstructions[passIC++] = pass_swap;
2027 				passInstructions[passIC++] = ruleOffset >> 16;
2028 				passInstructions[passIC++] = ruleOffset & 0xffff;
2029 				break;
2030 			}
2031 			compileError(passNested, "%s is not a swap name.",
2032 					_lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
2033 			return 0;
2034 			break;
2035 		default:
2036 			compileError(passNested, "incorrect operator in action part");
2037 			return 0;
2038 		}
2039 	}
2040 
2041 	/* Analyze and add rule */
2042 	passRuleDots.length = passIC;
2043 
2044 	{
2045 		widechar *characters;
2046 		int length;
2047 		int found = passFindCharacters(
2048 				passNested, passInstructions, passRuleDots.length, &characters, &length);
2049 
2050 		if (!found) return 0;
2051 
2052 		if (characters) {
2053 			for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k];
2054 			passRuleChars.length = k;
2055 		}
2056 	}
2057 
2058 	if (!addRule(passNested, opcode, &passRuleChars, &passRuleDots, 0, 0, newRuleOffset,
2059 				newRule, noback, nofor, table))
2060 		return 0;
2061 	return 1;
2062 }
2063 
2064 /* End of multipass compiler */
2065 
2066 static int
compileBrailleIndicator(FileInfo * nested,const char * ermsg,TranslationTableOpcode opcode,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)2067 compileBrailleIndicator(FileInfo *nested, const char *ermsg,
2068 		TranslationTableOpcode opcode, int *lastToken,
2069 		TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
2070 		int nofor, TranslationTableHeader **table) {
2071 	CharsString token;
2072 	CharsString cells;
2073 	if (getToken(nested, &token, ermsg, lastToken))
2074 		if (parseDots(nested, &cells, &token))
2075 			if (!addRule(nested, opcode, NULL, &cells, 0, 0, newRuleOffset, newRule,
2076 						noback, nofor, table))
2077 				return 0;
2078 	return 1;
2079 }
2080 
2081 static int
compileNumber(FileInfo * nested,int * lastToken)2082 compileNumber(FileInfo *nested, int *lastToken) {
2083 	CharsString token;
2084 	widechar dest;
2085 	if (!getToken(nested, &token, "number", lastToken)) return 0;
2086 	getNumber(&token.chars[0], &dest);
2087 	if (!(dest > 0)) {
2088 		compileError(nested, "a nonzero positive number is required");
2089 		return 0;
2090 	}
2091 	return dest;
2092 }
2093 
2094 static int
compileGrouping(FileInfo * nested,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2095 compileGrouping(FileInfo *nested, int *lastToken, TranslationTableOffset *newRuleOffset,
2096 		TranslationTableRule **newRule, int noback, int nofor,
2097 		TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2098 	int k;
2099 	CharsString name;
2100 	CharsString groupChars;
2101 	CharsString groupDots;
2102 	CharsString dotsParsed;
2103 	if (!getToken(nested, &name, "name operand", lastToken)) return 0;
2104 	if (!getRuleCharsText(nested, &groupChars, lastToken)) return 0;
2105 	if (!getToken(nested, &groupDots, "dots operand", lastToken)) return 0;
2106 	for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++)
2107 		;
2108 	if (k == groupDots.length) {
2109 		compileError(
2110 				nested, "Dots operand must consist of two cells separated by a comma");
2111 		return 0;
2112 	}
2113 	groupDots.chars[k] = '-';
2114 	if (!parseDots(nested, &dotsParsed, &groupDots)) return 0;
2115 	if (groupChars.length != 2 || dotsParsed.length != 2) {
2116 		compileError(nested,
2117 				"two Unicode characters and two cells separated by a comma are needed.");
2118 		return 0;
2119 	}
2120 	if (table) {
2121 		TranslationTableOffset ruleOffset;
2122 		TranslationTableCharacter *charsDotsPtr;
2123 		charsDotsPtr = addCharOrDots(nested, groupChars.chars[0], 0, table);
2124 		charsDotsPtr->attributes |= CTC_Math;
2125 		charsDotsPtr->uppercase = charsDotsPtr->realchar;
2126 		charsDotsPtr->lowercase = charsDotsPtr->realchar;
2127 		charsDotsPtr = addCharOrDots(nested, groupChars.chars[1], 0, table);
2128 		charsDotsPtr->attributes |= CTC_Math;
2129 		charsDotsPtr->uppercase = charsDotsPtr->realchar;
2130 		charsDotsPtr->lowercase = charsDotsPtr->realchar;
2131 		charsDotsPtr = addCharOrDots(nested, dotsParsed.chars[0], 1, table);
2132 		charsDotsPtr->attributes |= CTC_Math;
2133 		charsDotsPtr->uppercase = charsDotsPtr->realchar;
2134 		charsDotsPtr->lowercase = charsDotsPtr->realchar;
2135 		charsDotsPtr = addCharOrDots(nested, dotsParsed.chars[1], 1, table);
2136 		charsDotsPtr->attributes |= CTC_Math;
2137 		charsDotsPtr->uppercase = charsDotsPtr->realchar;
2138 		charsDotsPtr->lowercase = charsDotsPtr->realchar;
2139 		if (!addRule(nested, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset,
2140 					newRule, noback, nofor, table))
2141 			return 0;
2142 		if (!addRuleName(nested, &name, ruleOffset, *table)) return 0;
2143 		if (newRuleOffset) *newRuleOffset = ruleOffset;
2144 	}
2145 	if (displayTable) {
2146 		putCharAndDots(nested, groupChars.chars[0], dotsParsed.chars[0], displayTable);
2147 		putCharAndDots(nested, groupChars.chars[1], dotsParsed.chars[1], displayTable);
2148 	}
2149 	if (table) {
2150 		widechar endChar;
2151 		widechar endDots;
2152 		endChar = groupChars.chars[1];
2153 		endDots = dotsParsed.chars[1];
2154 		groupChars.length = dotsParsed.length = 1;
2155 		if (!addRule(nested, CTO_Math, &groupChars, &dotsParsed, 0, 0, newRuleOffset,
2156 					newRule, noback, nofor, table))
2157 			return 0;
2158 		groupChars.chars[0] = endChar;
2159 		dotsParsed.chars[0] = endDots;
2160 		if (!addRule(nested, CTO_Math, &groupChars, &dotsParsed, 0, 0, newRuleOffset,
2161 					newRule, noback, nofor, table))
2162 			return 0;
2163 	}
2164 	return 1;
2165 }
2166 
2167 static int
compileUplow(FileInfo * nested,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2168 compileUplow(FileInfo *nested, int *lastToken, TranslationTableOffset *newRuleOffset,
2169 		TranslationTableRule **newRule, int noback, int nofor,
2170 		TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2171 	int k;
2172 	TranslationTableCharacter *upperChar;
2173 	TranslationTableCharacter *lowerChar;
2174 	TranslationTableCharacter *upperCell = NULL;
2175 	TranslationTableCharacter *lowerCell = NULL;
2176 	CharsString ruleChars;
2177 	CharsString ruleDots;
2178 	CharsString upperDots;
2179 	CharsString lowerDots;
2180 	int haveLowerDots = 0;
2181 	TranslationTableCharacterAttributes attr;
2182 	if (!getRuleCharsText(nested, &ruleChars, lastToken)) return 0;
2183 	if (!getToken(nested, &ruleDots, "dots operand", lastToken)) return 0;
2184 	for (k = 0; k < ruleDots.length && ruleDots.chars[k] != ','; k++)
2185 		;
2186 	if (k == ruleDots.length) {
2187 		if (!parseDots(nested, &upperDots, &ruleDots)) return 0;
2188 		lowerDots.length = upperDots.length;
2189 		for (k = 0; k < upperDots.length; k++) lowerDots.chars[k] = upperDots.chars[k];
2190 		lowerDots.chars[k] = 0;
2191 	} else {
2192 		haveLowerDots = ruleDots.length;
2193 		ruleDots.length = k;
2194 		if (!parseDots(nested, &upperDots, &ruleDots)) return 0;
2195 		ruleDots.length = 0;
2196 		k++;
2197 		for (; k < haveLowerDots; k++)
2198 			ruleDots.chars[ruleDots.length++] = ruleDots.chars[k];
2199 		if (!parseDots(nested, &lowerDots, &ruleDots)) return 0;
2200 	}
2201 	if (ruleChars.length != 2 || upperDots.length < 1) {
2202 		compileError(nested,
2203 				"Exactly two Unicode characters and at least one cell are required.");
2204 		return 0;
2205 	}
2206 	if (haveLowerDots && lowerDots.length < 1) {
2207 		compileError(nested, "at least one cell is required after the comma.");
2208 		return 0;
2209 	}
2210 	if (table) {
2211 		upperChar = addCharOrDots(nested, ruleChars.chars[0], 0, table);
2212 		upperChar->attributes |= CTC_Letter | CTC_UpperCase;
2213 		upperChar->uppercase = ruleChars.chars[0];
2214 		upperChar->lowercase = ruleChars.chars[1];
2215 		lowerChar = addCharOrDots(nested, ruleChars.chars[1], 0, table);
2216 		lowerChar->attributes |= CTC_Letter | CTC_LowerCase;
2217 		lowerChar->uppercase = ruleChars.chars[0];
2218 		lowerChar->lowercase = ruleChars.chars[1];
2219 		for (k = 0; k < upperDots.length; k++)
2220 			if (!compile_findCharOrDots(upperDots.chars[k], 1, *table)) {
2221 				attr = CTC_Letter | CTC_UpperCase;
2222 				upperCell = addCharOrDots(nested, upperDots.chars[k], 1, table);
2223 				upperCell->attributes |= attr;
2224 				upperCell->uppercase = upperCell->realchar;
2225 			}
2226 		if (haveLowerDots) {
2227 			for (k = 0; k < lowerDots.length; k++)
2228 				if (!compile_findCharOrDots(lowerDots.chars[k], 1, *table)) {
2229 					attr = CTC_Letter | CTC_LowerCase;
2230 					lowerCell = addCharOrDots(nested, lowerDots.chars[k], 1, table);
2231 					if (lowerDots.length != 1) attr = CTC_Space;
2232 					lowerCell->attributes |= attr;
2233 					lowerCell->lowercase = lowerCell->realchar;
2234 				}
2235 		} else if (upperCell != NULL && upperDots.length == 1)
2236 			upperCell->attributes |= CTC_LowerCase;
2237 		if (upperCell != NULL) upperCell->lowercase = lowerDots.chars[0];
2238 		if (lowerCell != NULL) lowerCell->uppercase = upperDots.chars[0];
2239 	}
2240 	if (displayTable) {
2241 		if (lowerDots.length == 1)
2242 			putCharAndDots(nested, ruleChars.chars[1], lowerDots.chars[0], displayTable);
2243 		if (upperDots.length == 1)
2244 			putCharAndDots(nested, ruleChars.chars[0], upperDots.chars[0], displayTable);
2245 	}
2246 	if (table) {
2247 		ruleChars.length = 1;
2248 		ruleChars.chars[2] = ruleChars.chars[0];
2249 		ruleChars.chars[0] = ruleChars.chars[1];
2250 		if (!addRule(nested, CTO_LowerCase, &ruleChars, &lowerDots, 0, 0, newRuleOffset,
2251 					newRule, noback, nofor, table))
2252 			return 0;
2253 		ruleChars.chars[0] = ruleChars.chars[2];
2254 		if (!addRule(nested, CTO_UpperCase, &ruleChars, &upperDots, 0, 0, newRuleOffset,
2255 					newRule, noback, nofor, table))
2256 			return 0;
2257 	}
2258 	return 1;
2259 }
2260 
2261 /* Functions for compiling hyphenation tables */
2262 
2263 typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */
2264 	int numStates;
2265 	HyphenationState *states;
2266 } HyphenDict;
2267 
2268 #define DEFAULTSTATE 0xffff
2269 #define HYPHENHASHSIZE 8191
2270 
2271 typedef struct HyphenHashEntry {
2272 	struct HyphenHashEntry *next;
2273 	CharsString *key;
2274 	int val;
2275 } HyphenHashEntry;
2276 
2277 typedef struct HyphenHashTab {
2278 	HyphenHashEntry *entries[HYPHENHASHSIZE];
2279 } HyphenHashTab;
2280 
2281 /* a hash function from ASU - adapted from Gtk+ */
2282 static unsigned int
hyphenStringHash(const CharsString * s)2283 hyphenStringHash(const CharsString *s) {
2284 	int k;
2285 	unsigned int h = 0, g;
2286 	for (k = 0; k < s->length; k++) {
2287 		h = (h << 4) + s->chars[k];
2288 		if ((g = h & 0xf0000000)) {
2289 			h = h ^ (g >> 24);
2290 			h = h ^ g;
2291 		}
2292 	}
2293 	return h;
2294 }
2295 
2296 static HyphenHashTab *
hyphenHashNew(void)2297 hyphenHashNew(void) {
2298 	HyphenHashTab *hashTab;
2299 	if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory();
2300 	memset(hashTab, 0, sizeof(HyphenHashTab));
2301 	return hashTab;
2302 }
2303 
2304 static void
hyphenHashFree(HyphenHashTab * hashTab)2305 hyphenHashFree(HyphenHashTab *hashTab) {
2306 	int i;
2307 	HyphenHashEntry *e, *next;
2308 	for (i = 0; i < HYPHENHASHSIZE; i++)
2309 		for (e = hashTab->entries[i]; e; e = next) {
2310 			next = e->next;
2311 			free(e->key);
2312 			free(e);
2313 		}
2314 	free(hashTab);
2315 }
2316 
2317 /* assumes that key is not already present! */
2318 static void
hyphenHashInsert(HyphenHashTab * hashTab,const CharsString * key,int val)2319 hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) {
2320 	int i, j;
2321 	HyphenHashEntry *e;
2322 	i = hyphenStringHash(key) % HYPHENHASHSIZE;
2323 	if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory();
2324 	e->next = hashTab->entries[i];
2325 	e->key = malloc((key->length + 1) * CHARSIZE);
2326 	if (!e->key) _lou_outOfMemory();
2327 	e->key->length = key->length;
2328 	for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j];
2329 	e->val = val;
2330 	hashTab->entries[i] = e;
2331 }
2332 
2333 /* return val if found, otherwise DEFAULTSTATE */
2334 static int
hyphenHashLookup(HyphenHashTab * hashTab,const CharsString * key)2335 hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) {
2336 	int i, j;
2337 	HyphenHashEntry *e;
2338 	if (key->length == 0) return 0;
2339 	i = hyphenStringHash(key) % HYPHENHASHSIZE;
2340 	for (e = hashTab->entries[i]; e; e = e->next) {
2341 		if (key->length != e->key->length) continue;
2342 		for (j = 0; j < key->length; j++)
2343 			if (key->chars[j] != e->key->chars[j]) break;
2344 		if (j == key->length) return e->val;
2345 	}
2346 	return DEFAULTSTATE;
2347 }
2348 
2349 static int
hyphenGetNewState(HyphenDict * dict,HyphenHashTab * hashTab,const CharsString * string)2350 hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) {
2351 	hyphenHashInsert(hashTab, string, dict->numStates);
2352 	/* predicate is true if dict->numStates is a power of two */
2353 	if (!(dict->numStates & (dict->numStates - 1)))
2354 		dict->states =
2355 				realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState));
2356 	if (!dict->states) _lou_outOfMemory();
2357 	dict->states[dict->numStates].hyphenPattern = 0;
2358 	dict->states[dict->numStates].fallbackState = DEFAULTSTATE;
2359 	dict->states[dict->numStates].numTrans = 0;
2360 	dict->states[dict->numStates].trans.pointer = NULL;
2361 	return dict->numStates++;
2362 }
2363 
2364 /* add a transition from state1 to state2 through ch - assumes that the
2365  * transition does not already exist */
2366 static void
hyphenAddTrans(HyphenDict * dict,int state1,int state2,widechar ch)2367 hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) {
2368 	int numTrans;
2369 	numTrans = dict->states[state1].numTrans;
2370 	if (numTrans == 0)
2371 		dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans));
2372 	else if (!(numTrans & (numTrans - 1)))
2373 		dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer,
2374 				(numTrans << 1) * sizeof(HyphenationTrans));
2375 	dict->states[state1].trans.pointer[numTrans].ch = ch;
2376 	dict->states[state1].trans.pointer[numTrans].newState = state2;
2377 	dict->states[state1].numTrans++;
2378 }
2379 
2380 static int
compileHyphenation(FileInfo * nested,CharsString * encoding,int * lastToken,TranslationTableHeader ** table)2381 compileHyphenation(FileInfo *nested, CharsString *encoding, int *lastToken,
2382 		TranslationTableHeader **table) {
2383 	CharsString hyph;
2384 	HyphenationTrans *holdPointer;
2385 	HyphenHashTab *hashTab;
2386 	CharsString word;
2387 	char pattern[MAXSTRING + 1];
2388 	unsigned int stateNum = 0, lastState = 0;
2389 	int i, j, k = encoding->length;
2390 	widechar ch;
2391 	int found;
2392 	HyphenHashEntry *e;
2393 	HyphenDict dict;
2394 	TranslationTableOffset holdOffset;
2395 	/* Set aside enough space for hyphenation states and transitions in
2396 	 * translation table. Must be done before anything else */
2397 	allocateSpaceInTranslationTable(nested, NULL, 250000, table);
2398 	hashTab = hyphenHashNew();
2399 	dict.numStates = 1;
2400 	dict.states = malloc(sizeof(HyphenationState));
2401 	if (!dict.states) _lou_outOfMemory();
2402 	dict.states[0].hyphenPattern = 0;
2403 	dict.states[0].fallbackState = DEFAULTSTATE;
2404 	dict.states[0].numTrans = 0;
2405 	dict.states[0].trans.pointer = NULL;
2406 	do {
2407 		if (encoding->chars[0] == 'I') {
2408 			if (!getToken(nested, &hyph, NULL, lastToken)) continue;
2409 		} else {
2410 			/* UTF-8 */
2411 			if (!getToken(nested, &word, NULL, lastToken)) continue;
2412 			parseChars(nested, &hyph, &word);
2413 		}
2414 		if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' ||
2415 				hyph.chars[0] == '<')
2416 			continue; /* comment */
2417 		j = 0;
2418 		pattern[j] = '0';
2419 		for (i = 0; i < hyph.length; i++) {
2420 			if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9')
2421 				pattern[j] = (char)hyph.chars[i];
2422 			else {
2423 				word.chars[j] = hyph.chars[i];
2424 				pattern[++j] = '0';
2425 			}
2426 		}
2427 		word.chars[j] = 0;
2428 		word.length = j;
2429 		pattern[j + 1] = 0;
2430 		for (i = 0; pattern[i] == '0'; i++)
2431 			;
2432 		found = hyphenHashLookup(hashTab, &word);
2433 		if (found != DEFAULTSTATE)
2434 			stateNum = found;
2435 		else
2436 			stateNum = hyphenGetNewState(&dict, hashTab, &word);
2437 		k = j + 2 - i;
2438 		if (k > 0) {
2439 			allocateSpaceInTranslationTable(
2440 					nested, &dict.states[stateNum].hyphenPattern, k, table);
2441 			memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i],
2442 					k);
2443 		}
2444 		/* now, put in the prefix transitions */
2445 		while (found == DEFAULTSTATE) {
2446 			lastState = stateNum;
2447 			ch = word.chars[word.length-- - 1];
2448 			found = hyphenHashLookup(hashTab, &word);
2449 			if (found != DEFAULTSTATE)
2450 				stateNum = found;
2451 			else
2452 				stateNum = hyphenGetNewState(&dict, hashTab, &word);
2453 			hyphenAddTrans(&dict, stateNum, lastState, ch);
2454 		}
2455 	} while (_lou_getALine(nested));
2456 	/* put in the fallback states */
2457 	for (i = 0; i < HYPHENHASHSIZE; i++) {
2458 		for (e = hashTab->entries[i]; e; e = e->next) {
2459 			for (j = 1; j <= e->key->length; j++) {
2460 				word.length = 0;
2461 				for (k = j; k < e->key->length; k++)
2462 					word.chars[word.length++] = e->key->chars[k];
2463 				stateNum = hyphenHashLookup(hashTab, &word);
2464 				if (stateNum != DEFAULTSTATE) break;
2465 			}
2466 			if (e->val) dict.states[e->val].fallbackState = stateNum;
2467 		}
2468 	}
2469 	hyphenHashFree(hashTab);
2470 	/* Transfer hyphenation information to table */
2471 	for (i = 0; i < dict.numStates; i++) {
2472 		if (dict.states[i].numTrans == 0)
2473 			dict.states[i].trans.offset = 0;
2474 		else {
2475 			holdPointer = dict.states[i].trans.pointer;
2476 			allocateSpaceInTranslationTable(nested, &dict.states[i].trans.offset,
2477 					dict.states[i].numTrans * sizeof(HyphenationTrans), table);
2478 			memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer,
2479 					dict.states[i].numTrans * sizeof(HyphenationTrans));
2480 			free(holdPointer);
2481 		}
2482 	}
2483 	allocateSpaceInTranslationTable(
2484 			nested, &holdOffset, dict.numStates * sizeof(HyphenationState), table);
2485 	(*table)->hyphenStatesArray = holdOffset;
2486 	/* Prevents segmentation fault if table is reallocated */
2487 	memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0],
2488 			dict.numStates * sizeof(HyphenationState));
2489 	free(dict.states);
2490 	return 1;
2491 }
2492 
2493 static int
compileCharDef(FileInfo * nested,TranslationTableOpcode opcode,TranslationTableCharacterAttributes attributes,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2494 compileCharDef(FileInfo *nested, TranslationTableOpcode opcode,
2495 		TranslationTableCharacterAttributes attributes, int *lastToken,
2496 		TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
2497 		int nofor, TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2498 	CharsString ruleChars;
2499 	CharsString ruleDots;
2500 	if (!getRuleCharsText(nested, &ruleChars, lastToken)) return 0;
2501 	if (!getRuleDotsPattern(nested, &ruleDots, lastToken)) return 0;
2502 	if (ruleChars.length != 1) {
2503 		compileError(nested, "Exactly one character is required.");
2504 		return 0;
2505 	}
2506 	if (ruleDots.length < 1) {
2507 		compileError(nested, "At least one cell is required.");
2508 		return 0;
2509 	}
2510 	if (table) {
2511 		TranslationTableCharacter *character;
2512 		TranslationTableCharacter *cell = NULL;
2513 		int k;
2514 		if (attributes & (CTC_UpperCase | CTC_LowerCase)) attributes |= CTC_Letter;
2515 		character = addCharOrDots(nested, ruleChars.chars[0], 0, table);
2516 		character->attributes |= attributes;
2517 		character->uppercase = character->lowercase = character->realchar;
2518 		for (k = ruleDots.length - 1; k >= 0; k -= 1) {
2519 			cell = compile_findCharOrDots(ruleDots.chars[k], 1, *table);
2520 			if (!cell) {
2521 				cell = addCharOrDots(nested, ruleDots.chars[k], 1, table);
2522 				cell->uppercase = cell->lowercase = cell->realchar;
2523 			}
2524 		}
2525 		if (ruleDots.length == 1) cell->attributes |= attributes;
2526 	}
2527 	if (displayTable && ruleDots.length == 1)
2528 		putCharAndDots(nested, ruleChars.chars[0], ruleDots.chars[0], displayTable);
2529 	if (table)
2530 		if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, newRuleOffset, newRule,
2531 					noback, nofor, table))
2532 			return 0;
2533 	return 1;
2534 }
2535 
2536 static int
compileBeforeAfter(FileInfo * nested,int * lastToken)2537 compileBeforeAfter(FileInfo *nested, int *lastToken) {
2538 	/* 1=before, 2=after, 0=error */
2539 	CharsString token;
2540 	CharsString tmp;
2541 	if (getToken(nested, &token, "last word before or after", lastToken))
2542 		if (parseChars(nested, &tmp, &token)) {
2543 			if (eqasc2uni((unsigned char *)"before", tmp.chars, 6)) return 1;
2544 			if (eqasc2uni((unsigned char *)"after", tmp.chars, 5)) return 2;
2545 		}
2546 	return 0;
2547 }
2548 
2549 static int
compileRule(FileInfo * nested,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2550 compileRule(FileInfo *nested, TranslationTableOffset *newRuleOffset,
2551 		TranslationTableRule **newRule, TranslationTableHeader **table,
2552 		DisplayTableHeader **displayTable) {
2553 	int lastToken = 0;
2554 	int ok = 1;
2555 	CharsString token;
2556 	TranslationTableOpcode opcode;
2557 	CharsString ruleChars;
2558 	CharsString ruleDots;
2559 	CharsString cells;
2560 	CharsString scratchPad;
2561 	CharsString emphClass;
2562 	TranslationTableCharacterAttributes after = 0;
2563 	TranslationTableCharacterAttributes before = 0;
2564 	TranslationTableCharacter *c = NULL;
2565 	widechar *patterns = NULL;
2566 	int k, i;
2567 	int noback, nofor;
2568 	noback = nofor = 0;
2569 doOpcode:
2570 	if (!getToken(nested, &token, NULL, &lastToken)) return 1;	/* blank line */
2571 	if (token.chars[0] == '#' || token.chars[0] == '<') return 1; /* comment */
2572 	if (nested->lineNumber == 1 &&
2573 			(eqasc2uni((unsigned char *)"ISO", token.chars, 3) ||
2574 					eqasc2uni((unsigned char *)"UTF-8", token.chars, 5))) {
2575 		if (table)
2576 			compileHyphenation(nested, &token, &lastToken, table);
2577 		else
2578 			/* ignore the whole file */
2579 			while (_lou_getALine(nested))
2580 				;
2581 		return 1;
2582 	}
2583 	opcode = getOpcode(nested, &token);
2584 	switch (opcode) {
2585 	case CTO_IncludeFile: {
2586 		CharsString includedFile;
2587 		if (getToken(nested, &token, "include file name", &lastToken))
2588 			if (parseChars(nested, &includedFile, &token))
2589 				if (!includeFile(nested, &includedFile, table, displayTable)) ok = 0;
2590 		break;
2591 	}
2592 	case CTO_NoBack:
2593 		if (nofor) {
2594 			compileError(nested, "%s already specified.", _lou_findOpcodeName(CTO_NoFor));
2595 			ok = 0;
2596 			break;
2597 		}
2598 		noback = 1;
2599 		goto doOpcode;
2600 	case CTO_NoFor:
2601 		if (noback) {
2602 			compileError(
2603 					nested, "%s already specified.", _lou_findOpcodeName(CTO_NoBack));
2604 			ok = 0;
2605 			break;
2606 		}
2607 		nofor = 1;
2608 		goto doOpcode;
2609 	case CTO_Space:
2610 		compileCharDef(nested, opcode, CTC_Space, &lastToken, newRuleOffset, newRule,
2611 				noback, nofor, table, displayTable);
2612 		break;
2613 	case CTO_Digit:
2614 		compileCharDef(nested, opcode, CTC_Digit, &lastToken, newRuleOffset, newRule,
2615 				noback, nofor, table, displayTable);
2616 		break;
2617 	case CTO_LitDigit:
2618 		compileCharDef(nested, opcode, CTC_LitDigit, &lastToken, newRuleOffset, newRule,
2619 				noback, nofor, table, displayTable);
2620 		break;
2621 	case CTO_Punctuation:
2622 		compileCharDef(nested, opcode, CTC_Punctuation, &lastToken, newRuleOffset,
2623 				newRule, noback, nofor, table, displayTable);
2624 		break;
2625 	case CTO_Math:
2626 		compileCharDef(nested, opcode, CTC_Math, &lastToken, newRuleOffset, newRule,
2627 				noback, nofor, table, displayTable);
2628 		break;
2629 	case CTO_Sign:
2630 		compileCharDef(nested, opcode, CTC_Sign, &lastToken, newRuleOffset, newRule,
2631 				noback, nofor, table, displayTable);
2632 		break;
2633 	case CTO_Letter:
2634 		compileCharDef(nested, opcode, CTC_Letter, &lastToken, newRuleOffset, newRule,
2635 				noback, nofor, table, displayTable);
2636 		break;
2637 	case CTO_UpperCase:
2638 		compileCharDef(nested, opcode, CTC_UpperCase, &lastToken, newRuleOffset, newRule,
2639 				noback, nofor, table, displayTable);
2640 		break;
2641 	case CTO_LowerCase:
2642 		compileCharDef(nested, opcode, CTC_LowerCase, &lastToken, newRuleOffset, newRule,
2643 				noback, nofor, table, displayTable);
2644 		break;
2645 	case CTO_Grouping:
2646 		ok = compileGrouping(nested, &lastToken, newRuleOffset, newRule, noback, nofor,
2647 				table, displayTable);
2648 		break;
2649 	case CTO_UpLow:
2650 		ok = compileUplow(nested, &lastToken, newRuleOffset, newRule, noback, nofor,
2651 				table, displayTable);
2652 		break;
2653 	case CTO_Display:
2654 		if (!displayTable) break;
2655 		if (getRuleCharsText(nested, &ruleChars, &lastToken))
2656 			if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
2657 				if (ruleChars.length != 1 || ruleDots.length != 1) {
2658 					compileError(
2659 							nested, "Exactly one character and one cell are required.");
2660 					ok = 0;
2661 				}
2662 				putCharAndDots(
2663 						nested, ruleChars.chars[0], ruleDots.chars[0], displayTable);
2664 			}
2665 		break;
2666 	/* now only opcodes follow that don't modify the display table */
2667 	default:
2668 		if (!table) break;
2669 		switch (opcode) {
2670 		case CTO_None:
2671 			break;
2672 		case CTO_Locale:
2673 			compileWarning(nested,
2674 					"The locale opcode is not implemented. Use the locale meta data "
2675 					"instead.");
2676 			break;
2677 		case CTO_Undefined: {
2678 			// not passing pointer because compileBrailleIndicator may reallocate table
2679 			TranslationTableOffset ruleOffset = (*table)->undefined;
2680 			ok = compileBrailleIndicator(nested, "undefined character opcode",
2681 					CTO_Undefined, &lastToken, &ruleOffset, newRule, noback, nofor,
2682 					table);
2683 			(*table)->undefined = ruleOffset;
2684 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2685 			break;
2686 		}
2687 		case CTO_Match: {
2688 			TranslationTableRule *rule;
2689 			TranslationTableOffset ruleOffset;
2690 			CharsString ptn_before, ptn_after;
2691 			TranslationTableOffset patternsOffset;
2692 			int len, mrk;
2693 
2694 			size_t patternsByteSize = sizeof(*patterns) * 27720;
2695 			patterns = (widechar *)malloc(patternsByteSize);
2696 			if (!patterns) _lou_outOfMemory();
2697 			memset(patterns, 0xffff, patternsByteSize);
2698 
2699 			noback = 1;
2700 			getCharacters(nested, &ptn_before, &lastToken);
2701 			getRuleCharsText(nested, &ruleChars, &lastToken);
2702 			getCharacters(nested, &ptn_after, &lastToken);
2703 			getRuleDotsPattern(nested, &ruleDots, &lastToken);
2704 
2705 			if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
2706 						&ruleOffset, &rule, noback, nofor, table)) {
2707 				ok = 0;
2708 				break;
2709 			}
2710 			if (ptn_before.chars[0] == '-' && ptn_before.length == 1)
2711 				len = _lou_pattern_compile(
2712 						&ptn_before.chars[0], 0, &patterns[1], 13841, *table);
2713 			else
2714 				len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length,
2715 						&patterns[1], 13841, *table);
2716 			if (!len) {
2717 				ok = 0;
2718 				break;
2719 			}
2720 			mrk = patterns[0] = len + 1;
2721 			_lou_pattern_reverse(&patterns[1]);
2722 
2723 			if (ptn_after.chars[0] == '-' && ptn_after.length == 1)
2724 				len = _lou_pattern_compile(
2725 						&ptn_after.chars[0], 0, &patterns[mrk], 13841, *table);
2726 			else
2727 				len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length,
2728 						&patterns[mrk], 13841, *table);
2729 			if (!len) {
2730 				ok = 0;
2731 				break;
2732 			}
2733 			len += mrk;
2734 
2735 			if (!allocateSpaceInTranslationTable(
2736 						nested, &patternsOffset, len * sizeof(widechar), table)) {
2737 				ok = 0;
2738 				break;
2739 			}
2740 
2741 			/* realloc may have moved table, so make sure rule is still valid */
2742 			rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2743 			memcpy(&(*table)->ruleArea[patternsOffset], patterns, len * sizeof(widechar));
2744 			rule->patterns = patternsOffset;
2745 
2746 			if (newRule) *newRule = rule;
2747 			if (newRuleOffset) *newRuleOffset = ruleOffset;
2748 			break;
2749 		}
2750 
2751 		case CTO_BackMatch: {
2752 			TranslationTableRule *rule;
2753 			TranslationTableOffset ruleOffset;
2754 			CharsString ptn_before, ptn_after;
2755 			TranslationTableOffset patternOffset;
2756 			int len, mrk;
2757 
2758 			size_t patternsByteSize = sizeof(*patterns) * 27720;
2759 			patterns = (widechar *)malloc(patternsByteSize);
2760 			if (!patterns) _lou_outOfMemory();
2761 			memset(patterns, 0xffff, patternsByteSize);
2762 
2763 			nofor = 1;
2764 			getCharacters(nested, &ptn_before, &lastToken);
2765 			getRuleCharsText(nested, &ruleChars, &lastToken);
2766 			getCharacters(nested, &ptn_after, &lastToken);
2767 			getRuleDotsPattern(nested, &ruleDots, &lastToken);
2768 
2769 			if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, &rule,
2770 						noback, nofor, table)) {
2771 				ok = 0;
2772 				break;
2773 			}
2774 			if (ptn_before.chars[0] == '-' && ptn_before.length == 1)
2775 				len = _lou_pattern_compile(
2776 						&ptn_before.chars[0], 0, &patterns[1], 13841, *table);
2777 			else
2778 				len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length,
2779 						&patterns[1], 13841, *table);
2780 			if (!len) {
2781 				ok = 0;
2782 				break;
2783 			}
2784 			mrk = patterns[0] = len + 1;
2785 			_lou_pattern_reverse(&patterns[1]);
2786 
2787 			if (ptn_after.chars[0] == '-' && ptn_after.length == 1)
2788 				len = _lou_pattern_compile(
2789 						&ptn_after.chars[0], 0, &patterns[mrk], 13841, *table);
2790 			else
2791 				len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length,
2792 						&patterns[mrk], 13841, *table);
2793 			if (!len) {
2794 				ok = 0;
2795 				break;
2796 			}
2797 			len += mrk;
2798 
2799 			if (!allocateSpaceInTranslationTable(
2800 						nested, &patternOffset, len * sizeof(widechar), table)) {
2801 				ok = 0;
2802 				break;
2803 			}
2804 
2805 			/* realloc may have moved table, so make sure rule is still valid */
2806 			rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2807 
2808 			memcpy(&(*table)->ruleArea[patternOffset], patterns, len * sizeof(widechar));
2809 			rule->patterns = patternOffset;
2810 
2811 			if (newRule) *newRule = rule;
2812 			if (newRuleOffset) *newRuleOffset = ruleOffset;
2813 			break;
2814 		}
2815 
2816 		case CTO_BegCapsPhrase: {
2817 			// not passing pointer because compileBrailleIndicator may reallocate table
2818 			TranslationTableOffset ruleOffset =
2819 					(*table)->emphRules[capsRule][begPhraseOffset];
2820 			ok = compileBrailleIndicator(nested, "first word capital sign",
2821 					CTO_BegCapsPhraseRule, &lastToken, &ruleOffset, newRule, noback,
2822 					nofor, table);
2823 			(*table)->emphRules[capsRule][begPhraseOffset] = ruleOffset;
2824 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2825 			break;
2826 		}
2827 		case CTO_EndCapsPhrase: {
2828 			TranslationTableOffset ruleOffset;
2829 			switch (compileBeforeAfter(nested, &lastToken)) {
2830 			case 1:  // before
2831 				if ((*table)->emphRules[capsRule][endPhraseAfterOffset]) {
2832 					compileError(nested, "Capital sign after last word already defined.");
2833 					ok = 0;
2834 					break;
2835 				}
2836 				// not passing pointer because compileBrailleIndicator may reallocate
2837 				// table
2838 				ruleOffset = (*table)->emphRules[capsRule][endPhraseBeforeOffset];
2839 				ok = compileBrailleIndicator(nested, "capital sign before last word",
2840 						CTO_EndCapsPhraseBeforeRule, &lastToken, &ruleOffset, newRule,
2841 						noback, nofor, table);
2842 				(*table)->emphRules[capsRule][endPhraseBeforeOffset] = ruleOffset;
2843 				if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2844 				break;
2845 			case 2:  // after
2846 				if ((*table)->emphRules[capsRule][endPhraseBeforeOffset]) {
2847 					compileError(
2848 							nested, "Capital sign before last word already defined.");
2849 					ok = 0;
2850 					break;
2851 				}
2852 				// not passing pointer because compileBrailleIndicator may reallocate
2853 				// table
2854 				ruleOffset = (*table)->emphRules[capsRule][endPhraseAfterOffset];
2855 				ok = compileBrailleIndicator(nested, "capital sign after last word",
2856 						CTO_EndCapsPhraseAfterRule, &lastToken, &ruleOffset, newRule,
2857 						noback, nofor, table);
2858 				(*table)->emphRules[capsRule][endPhraseAfterOffset] = ruleOffset;
2859 				if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2860 				break;
2861 			default:  // error
2862 				compileError(nested, "Invalid lastword indicator location.");
2863 				ok = 0;
2864 				break;
2865 			}
2866 			break;
2867 		}
2868 		case CTO_BegCaps: {
2869 			// not passing pointer because compileBrailleIndicator may reallocate table
2870 			TranslationTableOffset ruleOffset = (*table)->emphRules[capsRule][begOffset];
2871 			ok = compileBrailleIndicator(nested, "first letter capital sign",
2872 					CTO_BegCapsRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2873 					table);
2874 			(*table)->emphRules[capsRule][begOffset] = ruleOffset;
2875 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2876 			break;
2877 		}
2878 		case CTO_EndCaps: {
2879 			// not passing pointer because compileBrailleIndicator may reallocate table
2880 			TranslationTableOffset ruleOffset = (*table)->emphRules[capsRule][endOffset];
2881 			ok = compileBrailleIndicator(nested, "last letter capital sign",
2882 					CTO_EndCapsRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2883 					table);
2884 			(*table)->emphRules[capsRule][endOffset] = ruleOffset;
2885 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2886 			break;
2887 		}
2888 		case CTO_CapsLetter: {
2889 			// not passing pointer because compileBrailleIndicator may reallocate table
2890 			TranslationTableOffset ruleOffset =
2891 					(*table)->emphRules[capsRule][letterOffset];
2892 			ok = compileBrailleIndicator(nested, "single letter capital sign",
2893 					CTO_CapsLetterRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2894 					table);
2895 			(*table)->emphRules[capsRule][letterOffset] = ruleOffset;
2896 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2897 			break;
2898 		}
2899 		case CTO_BegCapsWord: {
2900 			// not passing pointer because compileBrailleIndicator may reallocate table
2901 			TranslationTableOffset ruleOffset =
2902 					(*table)->emphRules[capsRule][begWordOffset];
2903 			ok = compileBrailleIndicator(nested, "capital word", CTO_BegCapsWordRule,
2904 					&lastToken, &ruleOffset, newRule, noback, nofor, table);
2905 			(*table)->emphRules[capsRule][begWordOffset] = ruleOffset;
2906 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2907 			break;
2908 		}
2909 		case CTO_EndCapsWord: {
2910 			// not passing pointer because compileBrailleIndicator may reallocate table
2911 			TranslationTableOffset ruleOffset =
2912 					(*table)->emphRules[capsRule][endWordOffset];
2913 			ok = compileBrailleIndicator(nested, "capital word stop", CTO_EndCapsWordRule,
2914 					&lastToken, &ruleOffset, newRule, noback, nofor, table);
2915 			(*table)->emphRules[capsRule][endWordOffset] = ruleOffset;
2916 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2917 			break;
2918 		}
2919 		case CTO_LenCapsPhrase:
2920 			ok = (*table)->emphRules[capsRule][lenPhraseOffset] =
2921 					compileNumber(nested, &lastToken);
2922 			break;
2923 
2924 		/* these 9 general purpose emphasis opcodes are compiled further down to more
2925 		 * specific internal opcodes:
2926 		 * - emphletter
2927 		 * - begemphword
2928 		 * - endemphword
2929 		 * - begemph
2930 		 * - endemph
2931 		 * - begemphphrase
2932 		 * - endemphphrase
2933 		 * - lenemphphrase
2934 		 */
2935 		case CTO_EmphClass:
2936 			if (getToken(nested, &token, "emphasis class", &lastToken))
2937 				if (parseChars(nested, &emphClass, &token)) {
2938 					char *s = malloc(sizeof(char) * (emphClass.length + 1));
2939 					for (k = 0; k < emphClass.length; k++)
2940 						s[k] = (char)emphClass.chars[k];
2941 					s[k++] = '\0';
2942 					for (i = 0; (*table)->emphClasses[i]; i++)
2943 						if (strcmp(s, (*table)->emphClasses[i]) == 0) {
2944 							_lou_logMessage(
2945 									LOU_LOG_WARN, "Duplicate emphasis class: %s", s);
2946 							warningCount++;
2947 							free(s);
2948 							return 1;
2949 						}
2950 					if (i < MAX_EMPH_CLASSES) {
2951 						switch (i) {
2952 						/* For backwards compatibility (i.e. because programs will assume
2953 						 * the first 3 typeform bits are `italic', `underline' and `bold')
2954 						 * we require that the first 3 emphclass definitions are (in that
2955 						 * order):
2956 						 *
2957 						 *   emphclass italic
2958 						 *   emphclass underline
2959 						 *   emphclass bold
2960 						 *
2961 						 * While it would be possible to use the emphclass opcode only for
2962 						 * defining
2963 						 * _additional_ classes (not allowing for them to be called
2964 						 * italic, underline or bold), thereby reducing the amount of
2965 						 * boilerplate, we deliberately choose not to do that in order to
2966 						 * not give italic, underline and bold any special status. The
2967 						 * hope is that eventually all programs will use liblouis for
2968 						 * emphasis the recommended way (i.e. by looking up the supported
2969 						 * typeforms in
2970 						 * the documentation or API) so that we can drop this restriction.
2971 						 */
2972 						case 0:
2973 							if (strcmp(s, "italic") != 0) {
2974 								_lou_logMessage(LOU_LOG_ERROR,
2975 										"First emphasis class must be \"italic\" but got "
2976 										"%s",
2977 										s);
2978 								errorCount++;
2979 								free(s);
2980 								return 0;
2981 							}
2982 							break;
2983 						case 1:
2984 							if (strcmp(s, "underline") != 0) {
2985 								_lou_logMessage(LOU_LOG_ERROR,
2986 										"Second emphasis class must be \"underline\" but "
2987 										"got "
2988 										"%s",
2989 										s);
2990 								errorCount++;
2991 								free(s);
2992 								return 0;
2993 							}
2994 							break;
2995 						case 2:
2996 							if (strcmp(s, "bold") != 0) {
2997 								_lou_logMessage(LOU_LOG_ERROR,
2998 										"Third emphasis class must be \"bold\" but got "
2999 										"%s",
3000 										s);
3001 								errorCount++;
3002 								free(s);
3003 								return 0;
3004 							}
3005 							break;
3006 						}
3007 						(*table)->emphClasses[i] = s;
3008 						(*table)->emphClasses[i + 1] = NULL;
3009 						ok = 1;
3010 						break;
3011 					} else {
3012 						_lou_logMessage(LOU_LOG_ERROR,
3013 								"Max number of emphasis classes (%i) reached",
3014 								MAX_EMPH_CLASSES);
3015 						errorCount++;
3016 						free(s);
3017 						ok = 0;
3018 						break;
3019 					}
3020 				}
3021 			compileError(nested, "emphclass must be followed by a valid class name.");
3022 			ok = 0;
3023 			break;
3024 		case CTO_EmphLetter:
3025 		case CTO_BegEmphWord:
3026 		case CTO_EndEmphWord:
3027 		case CTO_BegEmph:
3028 		case CTO_EndEmph:
3029 		case CTO_BegEmphPhrase:
3030 		case CTO_EndEmphPhrase:
3031 		case CTO_LenEmphPhrase: {
3032 			ok = 0;
3033 			TranslationTableOffset ruleOffset = 0;
3034 			if (getToken(nested, &token, "emphasis class", &lastToken))
3035 				if (parseChars(nested, &emphClass, &token)) {
3036 					char *s = malloc(sizeof(char) * (emphClass.length + 1));
3037 					for (k = 0; k < emphClass.length; k++)
3038 						s[k] = (char)emphClass.chars[k];
3039 					s[k++] = '\0';
3040 					for (i = 0; (*table)->emphClasses[i]; i++)
3041 						if (strcmp(s, (*table)->emphClasses[i]) == 0) break;
3042 					if (!(*table)->emphClasses[i]) {
3043 						_lou_logMessage(
3044 								LOU_LOG_ERROR, "Emphasis class %s not declared", s);
3045 						errorCount++;
3046 						free(s);
3047 						break;
3048 					}
3049 					i++;  // in table->emphRules the first index is used for caps
3050 					if (opcode == CTO_EmphLetter) {
3051 						// not passing pointer because compileBrailleIndicator may
3052 						// reallocate table
3053 						ruleOffset = (*table)->emphRules[i][letterOffset];
3054 						ok = compileBrailleIndicator(nested, "single letter",
3055 								CTO_Emph1LetterRule + letterOffset + (8 * i), &lastToken,
3056 								&ruleOffset, newRule, noback, nofor, table);
3057 						(*table)->emphRules[i][letterOffset] = ruleOffset;
3058 					} else if (opcode == CTO_BegEmphWord) {
3059 						// not passing pointer because compileBrailleIndicator may
3060 						// reallocate table
3061 						ruleOffset = (*table)->emphRules[i][begWordOffset];
3062 						ok = compileBrailleIndicator(nested, "word",
3063 								CTO_Emph1LetterRule + begWordOffset + (8 * i), &lastToken,
3064 								&ruleOffset, newRule, noback, nofor, table);
3065 						(*table)->emphRules[i][begWordOffset] = ruleOffset;
3066 					} else if (opcode == CTO_EndEmphWord) {
3067 						// not passing pointer because compileBrailleIndicator may
3068 						// reallocate table
3069 						ruleOffset = (*table)->emphRules[i][endWordOffset];
3070 						ok = compileBrailleIndicator(nested, "word stop",
3071 								CTO_Emph1LetterRule + endWordOffset + (8 * i), &lastToken,
3072 								&ruleOffset, newRule, noback, nofor, table);
3073 						(*table)->emphRules[i][endWordOffset] = ruleOffset;
3074 					} else if (opcode == CTO_BegEmph) {
3075 						/* fail if both begemph and any of begemphphrase or begemphword
3076 						 * are defined */
3077 						if ((*table)->emphRules[i][begWordOffset] ||
3078 								(*table)->emphRules[i][begPhraseOffset]) {
3079 							compileError(nested,
3080 									"Cannot define emphasis for both no context and word "
3081 									"or "
3082 									"phrase context, i.e. cannot have both begemph and "
3083 									"begemphword or begemphphrase.");
3084 							ok = 0;
3085 							break;
3086 						}
3087 						// not passing pointer because compileBrailleIndicator may
3088 						// reallocate table
3089 						ruleOffset = (*table)->emphRules[i][begOffset];
3090 						ok = compileBrailleIndicator(nested, "first letter",
3091 								CTO_Emph1LetterRule + begOffset + (8 * i), &lastToken,
3092 								&ruleOffset, newRule, noback, nofor, table);
3093 						(*table)->emphRules[i][begOffset] = ruleOffset;
3094 					} else if (opcode == CTO_EndEmph) {
3095 						if ((*table)->emphRules[i][endWordOffset] ||
3096 								(*table)->emphRules[i][endPhraseBeforeOffset] ||
3097 								(*table)->emphRules[i][endPhraseAfterOffset]) {
3098 							compileError(nested,
3099 									"Cannot define emphasis for both no context and word "
3100 									"or "
3101 									"phrase context, i.e. cannot have both endemph and "
3102 									"endemphword or endemphphrase.");
3103 							ok = 0;
3104 							break;
3105 						}
3106 						// not passing pointer because compileBrailleIndicator may
3107 						// reallocate table
3108 						ruleOffset = (*table)->emphRules[i][endOffset];
3109 						ok = compileBrailleIndicator(nested, "last letter",
3110 								CTO_Emph1LetterRule + endOffset + (8 * i), &lastToken,
3111 								&ruleOffset, newRule, noback, nofor, table);
3112 						(*table)->emphRules[i][endOffset] = ruleOffset;
3113 					} else if (opcode == CTO_BegEmphPhrase) {
3114 						// not passing pointer because compileBrailleIndicator may
3115 						// reallocate table
3116 						ruleOffset = (*table)->emphRules[i][begPhraseOffset];
3117 						ok = compileBrailleIndicator(nested, "first word",
3118 								CTO_Emph1LetterRule + begPhraseOffset + (8 * i),
3119 								&lastToken, &ruleOffset, newRule, noback, nofor, table);
3120 						(*table)->emphRules[i][begPhraseOffset] = ruleOffset;
3121 					} else if (opcode == CTO_EndEmphPhrase)
3122 						switch (compileBeforeAfter(nested, &lastToken)) {
3123 						case 1:  // before
3124 							if ((*table)->emphRules[i][endPhraseAfterOffset]) {
3125 								compileError(nested, "last word after already defined.");
3126 								ok = 0;
3127 								break;
3128 							}
3129 							// not passing pointer because compileBrailleIndicator may
3130 							// reallocate table
3131 							ruleOffset = (*table)->emphRules[i][endPhraseBeforeOffset];
3132 							ok = compileBrailleIndicator(nested, "last word before",
3133 									CTO_Emph1LetterRule + endPhraseBeforeOffset + (8 * i),
3134 									&lastToken, &ruleOffset, newRule, noback, nofor,
3135 									table);
3136 							(*table)->emphRules[i][endPhraseBeforeOffset] = ruleOffset;
3137 							break;
3138 						case 2:  // after
3139 							if ((*table)->emphRules[i][endPhraseBeforeOffset]) {
3140 								compileError(nested, "last word before already defined.");
3141 								ok = 0;
3142 								break;
3143 							}
3144 							// not passing pointer because compileBrailleIndicator may
3145 							// reallocate table
3146 							ruleOffset = (*table)->emphRules[i][endPhraseAfterOffset];
3147 							ok = compileBrailleIndicator(nested, "last word after",
3148 									CTO_Emph1LetterRule + endPhraseAfterOffset + (8 * i),
3149 									&lastToken, &ruleOffset, newRule, noback, nofor,
3150 									table);
3151 							(*table)->emphRules[i][endPhraseAfterOffset] = ruleOffset;
3152 							break;
3153 						default:  // error
3154 							compileError(nested, "Invalid lastword indicator location.");
3155 							ok = 0;
3156 							break;
3157 						}
3158 					else if (opcode == CTO_LenEmphPhrase)
3159 						ok = (*table)->emphRules[i][lenPhraseOffset] =
3160 								compileNumber(nested, &lastToken);
3161 					free(s);
3162 				}
3163 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3164 			break;
3165 		}
3166 		case CTO_LetterSign: {
3167 			// not passing pointer because compileBrailleIndicator may reallocate table
3168 			TranslationTableOffset ruleOffset = (*table)->letterSign;
3169 			ok = compileBrailleIndicator(nested, "letter sign", CTO_LetterRule,
3170 					&lastToken, &ruleOffset, newRule, noback, nofor, table);
3171 			(*table)->letterSign = ruleOffset;
3172 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3173 			break;
3174 		}
3175 		case CTO_NoLetsignBefore:
3176 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3177 				if (((*table)->noLetsignBeforeCount + ruleChars.length) > LETSIGNSIZE) {
3178 					compileError(nested, "More than %d characters", LETSIGNSIZE);
3179 					ok = 0;
3180 					break;
3181 				}
3182 				for (k = 0; k < ruleChars.length; k++)
3183 					(*table)->noLetsignBefore[(*table)->noLetsignBeforeCount++] =
3184 							ruleChars.chars[k];
3185 			}
3186 			break;
3187 		case CTO_NoLetsign:
3188 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3189 				if (((*table)->noLetsignCount + ruleChars.length) > LETSIGNSIZE) {
3190 					compileError(nested, "More than %d characters", LETSIGNSIZE);
3191 					ok = 0;
3192 					break;
3193 				}
3194 				for (k = 0; k < ruleChars.length; k++)
3195 					(*table)->noLetsign[(*table)->noLetsignCount++] = ruleChars.chars[k];
3196 			}
3197 			break;
3198 		case CTO_NoLetsignAfter:
3199 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3200 				if (((*table)->noLetsignAfterCount + ruleChars.length) > LETSIGNSIZE) {
3201 					compileError(nested, "More than %d characters", LETSIGNSIZE);
3202 					ok = 0;
3203 					break;
3204 				}
3205 				for (k = 0; k < ruleChars.length; k++)
3206 					(*table)->noLetsignAfter[(*table)->noLetsignAfterCount++] =
3207 							ruleChars.chars[k];
3208 			}
3209 			break;
3210 		case CTO_NumberSign: {
3211 			// not passing pointer because compileBrailleIndicator may reallocate table
3212 			TranslationTableOffset ruleOffset = (*table)->numberSign;
3213 			ok = compileBrailleIndicator(nested, "number sign", CTO_NumberRule,
3214 					&lastToken, &ruleOffset, newRule, noback, nofor, table);
3215 			(*table)->numberSign = ruleOffset;
3216 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3217 			break;
3218 		}
3219 		case CTO_Attribute:
3220 
3221 			c = NULL;
3222 			ok = 1;
3223 			if (!getToken(nested, &ruleChars, "attribute number", &lastToken)) {
3224 				compileError(nested, "Expected attribute number.");
3225 				ok = 0;
3226 				break;
3227 			}
3228 
3229 			k = -1;
3230 			switch (ruleChars.chars[0]) {
3231 			case '0':
3232 				k = 0;
3233 				break;
3234 			case '1':
3235 				k = 1;
3236 				break;
3237 			case '2':
3238 				k = 2;
3239 				break;
3240 			case '3':
3241 				k = 3;
3242 				break;
3243 			case '4':
3244 				k = 4;
3245 				break;
3246 			case '5':
3247 				k = 5;
3248 				break;
3249 			case '6':
3250 				k = 6;
3251 				break;
3252 			case '7':
3253 				k = 7;
3254 				break;
3255 			}
3256 			if (k == -1) {
3257 				compileError(nested, "Invalid attribute number.");
3258 				ok = 0;
3259 				break;
3260 			}
3261 
3262 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3263 				for (i = 0; i < ruleChars.length; i++) {
3264 					c = compile_findCharOrDots(ruleChars.chars[i], 0, *table);
3265 					if (c)
3266 						c->attributes |= (CTC_UserDefined0 << k);
3267 					else {
3268 						compileError(nested, "Attribute character undefined");
3269 						ok = 0;
3270 						break;
3271 					}
3272 				}
3273 			}
3274 			break;
3275 
3276 		case CTO_NumericModeChars:
3277 
3278 			c = NULL;
3279 			ok = 1;
3280 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3281 				for (k = 0; k < ruleChars.length; k++) {
3282 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3283 					if (c)
3284 						c->attributes |= CTC_NumericMode;
3285 					else {
3286 						compileError(nested, "Numeric mode character undefined");
3287 						ok = 0;
3288 						break;
3289 					}
3290 				}
3291 				(*table)->usesNumericMode = 1;
3292 			}
3293 			break;
3294 
3295 		case CTO_MidEndNumericModeChars:
3296 
3297 			c = NULL;
3298 			ok = 1;
3299 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3300 				for (k = 0; k < ruleChars.length; k++) {
3301 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3302 					if (c)
3303 						c->attributes |= CTC_MidEndNumericMode;
3304 					else {
3305 						compileError(nested, "Midendnumeric mode character undefined");
3306 						ok = 0;
3307 						break;
3308 					}
3309 				}
3310 				(*table)->usesNumericMode = 1;
3311 			}
3312 			break;
3313 
3314 		case CTO_NumericNoContractChars:
3315 
3316 			c = NULL;
3317 			ok = 1;
3318 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3319 				for (k = 0; k < ruleChars.length; k++) {
3320 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3321 					if (c)
3322 						c->attributes |= CTC_NumericNoContract;
3323 					else {
3324 						compileError(
3325 								nested, "Numeric no contraction character undefined");
3326 						ok = 0;
3327 						break;
3328 					}
3329 				}
3330 				(*table)->usesNumericMode = 1;
3331 			}
3332 			break;
3333 
3334 		case CTO_NoContractSign: {
3335 			// not passing pointer because compileBrailleIndicator may reallocate table
3336 			TranslationTableOffset ruleOffset = (*table)->noContractSign;
3337 			ok = compileBrailleIndicator(nested, "no contractions sign",
3338 					CTO_NoContractRule, &lastToken, &ruleOffset, newRule, noback, nofor,
3339 					table);
3340 			(*table)->noContractSign = ruleOffset;
3341 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3342 			break;
3343 		}
3344 		case CTO_SeqDelimiter:
3345 
3346 			c = NULL;
3347 			ok = 1;
3348 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3349 				for (k = 0; k < ruleChars.length; k++) {
3350 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3351 					if (c)
3352 						c->attributes |= CTC_SeqDelimiter;
3353 					else {
3354 						compileError(nested, "Sequence delimiter character undefined");
3355 						ok = 0;
3356 						break;
3357 					}
3358 				}
3359 				(*table)->usesSequences = 1;
3360 			}
3361 			break;
3362 
3363 		case CTO_SeqBeforeChars:
3364 
3365 			c = NULL;
3366 			ok = 1;
3367 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3368 				for (k = 0; k < ruleChars.length; k++) {
3369 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3370 					if (c)
3371 						c->attributes |= CTC_SeqBefore;
3372 					else {
3373 						compileError(nested, "Sequence before character undefined");
3374 						ok = 0;
3375 						break;
3376 					}
3377 				}
3378 			}
3379 			break;
3380 
3381 		case CTO_SeqAfterChars:
3382 
3383 			c = NULL;
3384 			ok = 1;
3385 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3386 				for (k = 0; k < ruleChars.length; k++) {
3387 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3388 					if (c)
3389 						c->attributes |= CTC_SeqAfter;
3390 					else {
3391 						compileError(nested, "Sequence after character undefined");
3392 						ok = 0;
3393 						break;
3394 					}
3395 				}
3396 			}
3397 			break;
3398 
3399 		case CTO_SeqAfterPattern:
3400 
3401 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3402 				if (((*table)->seqPatternsCount + ruleChars.length + 1) >
3403 						SEQPATTERNSIZE) {
3404 					compileError(nested, "More than %d characters", SEQPATTERNSIZE);
3405 					ok = 0;
3406 					break;
3407 				}
3408 				for (k = 0; k < ruleChars.length; k++)
3409 					(*table)->seqPatterns[(*table)->seqPatternsCount++] =
3410 							ruleChars.chars[k];
3411 				(*table)->seqPatterns[(*table)->seqPatternsCount++] = 0;
3412 			}
3413 			break;
3414 		case CTO_SeqAfterExpression:
3415 
3416 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3417 				for ((*table)->seqAfterExpressionLength = 0;
3418 						(*table)->seqAfterExpressionLength < ruleChars.length;
3419 						(*table)->seqAfterExpressionLength++)
3420 					(*table)->seqAfterExpression[(*table)->seqAfterExpressionLength] =
3421 							ruleChars.chars[(*table)->seqAfterExpressionLength];
3422 				(*table)->seqAfterExpression[(*table)->seqAfterExpressionLength] = 0;
3423 			}
3424 			break;
3425 
3426 		case CTO_CapsModeChars:
3427 
3428 			c = NULL;
3429 			ok = 1;
3430 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3431 				for (k = 0; k < ruleChars.length; k++) {
3432 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3433 					if (c)
3434 						c->attributes |= CTC_CapsMode;
3435 					else {
3436 						compileError(nested, "Capital mode character undefined");
3437 						ok = 0;
3438 						break;
3439 					}
3440 				}
3441 			}
3442 			break;
3443 
3444 		case CTO_EmphModeChars:
3445 
3446 			c = NULL;
3447 			ok = 1;
3448 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3449 				for (k = 0; k < ruleChars.length; k++) {
3450 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3451 					if (c)
3452 						c->attributes |= CTC_EmphMode;
3453 					else {
3454 						compileError(nested, "Emphasis mode character undefined");
3455 						ok = 0;
3456 						break;
3457 					}
3458 				}
3459 			}
3460 			(*table)->usesEmphMode = 1;
3461 			break;
3462 
3463 		case CTO_BegComp: {
3464 			// not passing pointer because compileBrailleIndicator may reallocate table
3465 			TranslationTableOffset ruleOffset = (*table)->begComp;
3466 			ok = compileBrailleIndicator(nested, "begin computer braille",
3467 					CTO_BegCompRule, &lastToken, &ruleOffset, newRule, noback, nofor,
3468 					table);
3469 			(*table)->begComp = ruleOffset;
3470 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3471 			break;
3472 		}
3473 		case CTO_EndComp: {
3474 			// not passing pointer because compileBrailleIndicator may reallocate table
3475 			TranslationTableOffset ruleOffset = (*table)->endComp;
3476 			ok = compileBrailleIndicator(nested, "end computer braslle", CTO_EndCompRule,
3477 					&lastToken, &ruleOffset, newRule, noback, nofor, table);
3478 			(*table)->endComp = ruleOffset;
3479 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3480 			break;
3481 		}
3482 		case CTO_Syllable:
3483 			(*table)->syllables = 1;
3484 		case CTO_Always:
3485 		case CTO_NoCross:
3486 		case CTO_LargeSign:
3487 		case CTO_WholeWord:
3488 		case CTO_PartWord:
3489 		case CTO_JoinNum:
3490 		case CTO_JoinableWord:
3491 		case CTO_LowWord:
3492 		case CTO_SuffixableWord:
3493 		case CTO_PrefixableWord:
3494 		case CTO_BegWord:
3495 		case CTO_BegMidWord:
3496 		case CTO_MidWord:
3497 		case CTO_MidEndWord:
3498 		case CTO_EndWord:
3499 		case CTO_PrePunc:
3500 		case CTO_PostPunc:
3501 		case CTO_BegNum:
3502 		case CTO_MidNum:
3503 		case CTO_EndNum:
3504 		case CTO_Repeated:
3505 		case CTO_RepWord:
3506 			if (getRuleCharsText(nested, &ruleChars, &lastToken))
3507 				if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
3508 					if (ruleDots.length == 0)  // `=`
3509 						for (k = 0; k < ruleChars.length; k++) {
3510 							c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3511 							if (!c || !c->definitionRule) {
3512 								compileError(nested, "Character %s is not defined",
3513 										_lou_showString(&ruleChars.chars[k], 1, 0));
3514 								return 0;
3515 							}
3516 						}
3517 					if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3518 								newRuleOffset, newRule, noback, nofor, table))
3519 						ok = 0;
3520 				}
3521 			// if (opcode == CTO_MidNum)
3522 			// {
3523 			//   TranslationTableCharacter *c = compile_findCharOrDots(ruleChars.chars[0],
3524 			//   0); if(c)
3525 			//     c->attributes |= CTC_NumericMode;
3526 			// }
3527 			break;
3528 		case CTO_CompDots:
3529 		case CTO_Comp6: {
3530 			TranslationTableOffset ruleOffset;
3531 			if (!getRuleCharsText(nested, &ruleChars, &lastToken)) return 0;
3532 			if (ruleChars.length != 1 || ruleChars.chars[0] > 255) {
3533 				compileError(nested, "first operand must be 1 character and < 256");
3534 				return 0;
3535 			}
3536 			if (!getRuleDotsPattern(nested, &ruleDots, &lastToken)) return 0;
3537 			if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3538 						&ruleOffset, newRule, noback, nofor, table))
3539 				ok = 0;
3540 			(*table)->compdotsPattern[ruleChars.chars[0]] = ruleOffset;
3541 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3542 			break;
3543 		}
3544 		case CTO_ExactDots:
3545 			if (!getRuleCharsText(nested, &ruleChars, &lastToken)) return 0;
3546 			if (ruleChars.chars[0] != '@') {
3547 				compileError(nested, "The operand must begin with an at sign (@)");
3548 				return 0;
3549 			}
3550 			for (k = 1; k < ruleChars.length; k++)
3551 				scratchPad.chars[k - 1] = ruleChars.chars[k];
3552 			scratchPad.length = ruleChars.length - 1;
3553 			if (!parseDots(nested, &ruleDots, &scratchPad)) return 0;
3554 			if (!addRule(nested, opcode, &ruleChars, &ruleDots, before, after,
3555 						newRuleOffset, newRule, noback, nofor, table))
3556 				ok = 0;
3557 			break;
3558 		case CTO_CapsNoCont: {
3559 			TranslationTableOffset ruleOffset;
3560 			ruleChars.length = 1;
3561 			ruleChars.chars[0] = 'a';
3562 			if (!addRule(nested, CTO_CapsNoContRule, &ruleChars, NULL, after, before,
3563 						&ruleOffset, newRule, noback, nofor, table))
3564 				ok = 0;
3565 			(*table)->capsNoCont = ruleOffset;
3566 			if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3567 			break;
3568 		}
3569 		case CTO_Replace:
3570 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3571 				if (lastToken)
3572 					ruleDots.length = ruleDots.chars[0] = 0;
3573 				else {
3574 					getRuleDotsText(nested, &ruleDots, &lastToken);
3575 					if (ruleDots.chars[0] == '#')
3576 						ruleDots.length = ruleDots.chars[0] = 0;
3577 					else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#')
3578 						memcpy(&ruleDots.chars[0], &ruleDots.chars[1],
3579 								ruleDots.length-- * CHARSIZE);
3580 				}
3581 			}
3582 			for (k = 0; k < ruleChars.length; k++)
3583 				addCharOrDots(nested, ruleChars.chars[k], 0, table);
3584 			for (k = 0; k < ruleDots.length; k++)
3585 				addCharOrDots(nested, ruleDots.chars[k], 0, table);
3586 			if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3587 						newRuleOffset, newRule, noback, nofor, table))
3588 				ok = 0;
3589 			break;
3590 		case CTO_Correct:
3591 			(*table)->corrections = 1;
3592 			goto doPass;
3593 		case CTO_Pass2:
3594 			if ((*table)->numPasses < 2) (*table)->numPasses = 2;
3595 			goto doPass;
3596 		case CTO_Pass3:
3597 			if ((*table)->numPasses < 3) (*table)->numPasses = 3;
3598 			goto doPass;
3599 		case CTO_Pass4:
3600 			if ((*table)->numPasses < 4) (*table)->numPasses = 4;
3601 		doPass:
3602 		case CTO_Context:
3603 			if (!(nofor || noback)) {
3604 				compileError(nested, "%s or %s must be specified.",
3605 						_lou_findOpcodeName(CTO_NoFor), _lou_findOpcodeName(CTO_NoBack));
3606 				ok = 0;
3607 				break;
3608 			}
3609 			if (!compilePassOpcode(
3610 						nested, opcode, newRuleOffset, newRule, noback, nofor, table))
3611 				ok = 0;
3612 			break;
3613 		case CTO_Contraction:
3614 		case CTO_NoCont:
3615 		case CTO_CompBrl:
3616 		case CTO_Literal:
3617 			if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3618 				for (k = 0; k < ruleChars.length; k++) {
3619 					c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3620 					if (!c || !c->definitionRule) {
3621 						compileError(nested, "Character %s is not defined",
3622 								_lou_showString(&ruleChars.chars[k], 1, 0));
3623 						return 0;
3624 					}
3625 				}
3626 				if (!addRule(nested, opcode, &ruleChars, NULL, after, before,
3627 							newRuleOffset, newRule, noback, nofor, table))
3628 					ok = 0;
3629 			}
3630 			break;
3631 		case CTO_MultInd: {
3632 			int t;
3633 			ruleChars.length = 0;
3634 			if (getToken(nested, &token, "multiple braille indicators", &lastToken) &&
3635 					parseDots(nested, &cells, &token)) {
3636 				while ((t = getToken(nested, &token, "multind opcodes", &lastToken))) {
3637 					opcode = getOpcode(nested, &token);
3638 					if (opcode >= CTO_CapsLetter && opcode < CTO_MultInd)
3639 						ruleChars.chars[ruleChars.length++] = (widechar)opcode;
3640 					else {
3641 						compileError(nested, "Not a braille indicator opcode.");
3642 						ok = 0;
3643 					}
3644 					if (t == 2) break;
3645 				}
3646 			} else
3647 				ok = 0;
3648 			if (!addRule(nested, CTO_MultInd, &ruleChars, &cells, after, before,
3649 						newRuleOffset, newRule, noback, nofor, table))
3650 				ok = 0;
3651 			break;
3652 		}
3653 
3654 		case CTO_Class: {
3655 			CharsString characters;
3656 			const CharacterClass *class;
3657 			if (!(*table)->characterClasses) {
3658 				if (!allocateCharacterClasses(*table)) ok = 0;
3659 			}
3660 			if (getToken(nested, &token, "character class name", &lastToken)) {
3661 				class = findCharacterClass(&token, *table);
3662 				if (!class)
3663 					// no class with that name: create one
3664 					class = addCharacterClass(
3665 							nested, &token.chars[0], token.length, *table);
3666 				if (class) {
3667 					// there is a class with that name or a new class was successfully
3668 					// created
3669 					if (getCharacters(nested, &characters, &lastToken)) {
3670 						int index;
3671 						for (index = 0; index < characters.length; ++index) {
3672 							TranslationTableRule *defRule;
3673 							// get the character from the table and add the new class to
3674 							// its attributes if the character is not defined yet, define
3675 							// it
3676 							TranslationTableCharacter *character = addCharOrDots(
3677 									nested, characters.chars[index], 0, table);
3678 							character->attributes |= class->attribute;
3679 							// also add the attribute to the associated dots (if any)
3680 							if (character->definitionRule) {
3681 								defRule = (TranslationTableRule *)&(*table)
3682 												  ->ruleArea[character->definitionRule];
3683 								if (defRule->dotslen == 1) {
3684 									character = compile_findCharOrDots(
3685 											defRule->charsdots[defRule->charslen], 1,
3686 											*table);
3687 									if (character)
3688 										character->attributes |= class->attribute;
3689 								}
3690 							}
3691 						}
3692 					}
3693 				}
3694 			}
3695 			break;
3696 		}
3697 
3698 			{
3699 				TranslationTableCharacterAttributes *attributes;
3700 				const CharacterClass *class;
3701 			case CTO_After:
3702 				attributes = &after;
3703 				goto doClass;
3704 			case CTO_Before:
3705 				attributes = &before;
3706 			doClass:
3707 				if (!(*table)->characterClasses) {
3708 					if (!allocateCharacterClasses(*table)) ok = 0;
3709 				}
3710 				if (getCharacterClass(nested, &class, *table, &lastToken)) {
3711 					*attributes |= class->attribute;
3712 					goto doOpcode;
3713 				}
3714 				break;
3715 			}
3716 
3717 		case CTO_EmpMatchBefore:
3718 			before |= CTC_EmpMatch;
3719 			goto doOpcode;
3720 		case CTO_EmpMatchAfter:
3721 			after |= CTC_EmpMatch;
3722 			goto doOpcode;
3723 
3724 		case CTO_SwapCc:
3725 		case CTO_SwapCd:
3726 		case CTO_SwapDd:
3727 			if (!compileSwap(nested, opcode, &lastToken, newRuleOffset, newRule, noback,
3728 						nofor, table))
3729 				ok = 0;
3730 			break;
3731 		case CTO_Hyphen:
3732 		case CTO_DecPoint:
3733 			//	case CTO_Apostrophe:
3734 			//	case CTO_Initial:
3735 			if (getRuleCharsText(nested, &ruleChars, &lastToken))
3736 				if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
3737 					if (ruleChars.length != 1 || ruleDots.length < 1) {
3738 						compileError(nested,
3739 								"One Unicode character and at least one cell are "
3740 								"required.");
3741 						ok = 0;
3742 					}
3743 					if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3744 								newRuleOffset, newRule, noback, nofor, table))
3745 						ok = 0;
3746 					// if (opcode == CTO_DecPoint)
3747 					// {
3748 					//   TranslationTableCharacter *c =
3749 					//   compile_findCharOrDots(ruleChars.chars[0], 0);
3750 					//   if(c)
3751 					//     c->attributes |= CTC_NumericMode;
3752 					// }
3753 				}
3754 			break;
3755 		default:
3756 			compileError(nested, "unimplemented opcode.");
3757 			ok = 0;
3758 			break;
3759 		}
3760 	}
3761 
3762 	if (patterns != NULL) free(patterns);
3763 
3764 	return ok;
3765 }
3766 
3767 int EXPORT_CALL
lou_readCharFromFile(const char * fileName,int * mode)3768 lou_readCharFromFile(const char *fileName, int *mode) {
3769 	/* Read a character from a file, whether big-endian, little-endian or
3770 	 * ASCII8 */
3771 	int ch;
3772 	static FileInfo nested;
3773 	if (fileName == NULL) return 0;
3774 	if (*mode == 1) {
3775 		*mode = 0;
3776 		nested.fileName = fileName;
3777 		nested.encoding = noEncoding;
3778 		nested.status = 0;
3779 		nested.lineNumber = 0;
3780 		if (!(nested.in = fopen(nested.fileName, "r"))) {
3781 			_lou_logMessage(LOU_LOG_ERROR, "Cannot open file '%s'", nested.fileName);
3782 			*mode = 1;
3783 			return EOF;
3784 		}
3785 	}
3786 	if (nested.in == NULL) {
3787 		*mode = 1;
3788 		return EOF;
3789 	}
3790 	ch = getAChar(&nested);
3791 	if (ch == EOF) {
3792 		fclose(nested.in);
3793 		nested.in = NULL;
3794 		*mode = 1;
3795 	}
3796 	return ch;
3797 }
3798 
3799 static int
compileString(const char * inString,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)3800 compileString(const char *inString, TranslationTableHeader **table,
3801 		DisplayTableHeader **displayTable) {
3802 	/* This function can be used to make changes to tables on the fly. */
3803 	int k;
3804 	FileInfo nested;
3805 	if (inString == NULL) return 0;
3806 	memset(&nested, 0, sizeof(nested));
3807 	nested.fileName = inString;
3808 	nested.encoding = noEncoding;
3809 	nested.lineNumber = 1;
3810 	nested.status = 0;
3811 	nested.linepos = 0;
3812 	for (k = 0; inString[k]; k++) nested.line[k] = inString[k];
3813 	nested.line[k] = 0;
3814 	nested.linelen = k;
3815 	return compileRule(&nested, NULL, NULL, table, displayTable);
3816 }
3817 
3818 static int
setDefaults(TranslationTableHeader * table)3819 setDefaults(TranslationTableHeader *table) {
3820 	if (!table->emphRules[emph1Rule][lenPhraseOffset])
3821 		table->emphRules[emph1Rule][lenPhraseOffset] = 4;
3822 	if (!table->emphRules[emph2Rule][lenPhraseOffset])
3823 		table->emphRules[emph2Rule][lenPhraseOffset] = 4;
3824 	if (!table->emphRules[emph3Rule][lenPhraseOffset])
3825 		table->emphRules[emph3Rule][lenPhraseOffset] = 4;
3826 	if (table->numPasses == 0) table->numPasses = 1;
3827 	return 1;
3828 }
3829 
3830 /* =============== *
3831  * TABLE RESOLVING *
3832  * =============== *
3833  *
3834  * A table resolver is a function that resolves a `tableList` path against a
3835  * `base` path, and returns the resolved table(s) as a list of absolute file
3836  * paths.
3837  *
3838  * The function must have the following signature:
3839  *
3840  *     char ** (const char * tableList, const char * base)
3841  *
3842  * In general, `tableList` is a path in the broad sense. The default
3843  * implementation accepts only *file* paths. But another implementation could
3844  * for instance handle URI's. `base` is always a file path however.
3845  *
3846  * The idea is to give other programs that use liblouis the ability to define
3847  * their own table resolver (in C, Java, Python, etc.) when the default
3848  * resolver is not satisfying. (see also lou_registerTableResolver)
3849  *
3850  */
3851 
3852 /**
3853  * Resolve a single (sub)table.
3854  *
3855  * Tries to resolve `table` against `base` if base is an absolute path. If
3856  * that fails, searches `searchPath`.
3857  *
3858  */
3859 static char *
resolveSubtable(const char * table,const char * base,const char * searchPath)3860 resolveSubtable(const char *table, const char *base, const char *searchPath) {
3861 	char *tableFile;
3862 	static struct stat info;
3863 
3864 	if (table == NULL || table[0] == '\0') return NULL;
3865 	tableFile = (char *)malloc(MAXSTRING * sizeof(char) * 2);
3866 
3867 	//
3868 	// First try to resolve against base
3869 	//
3870 	if (base) {
3871 		int k;
3872 		strcpy(tableFile, base);
3873 		k = (int)strlen(tableFile);
3874 		while (k >= 0 && tableFile[k] != '/' && tableFile[k] != '\\') k--;
3875 		tableFile[++k] = '\0';
3876 		strcat(tableFile, table);
3877 		if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3878 			_lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3879 			return tableFile;
3880 		}
3881 	}
3882 
3883 	//
3884 	// It could be an absolute path, or a path relative to the current working
3885 	// directory
3886 	//
3887 	strcpy(tableFile, table);
3888 	if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3889 		_lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3890 		return tableFile;
3891 	}
3892 
3893 	//
3894 	// Then search `LOUIS_TABLEPATH`, `dataPath` and `programPath`
3895 	//
3896 	if (searchPath[0] != '\0') {
3897 		char *dir;
3898 		int last;
3899 		char *cp;
3900 		char *searchPath_copy = strdup(searchPath);
3901 		for (dir = searchPath_copy;; dir = cp + 1) {
3902 			for (cp = dir; *cp != '\0' && *cp != ','; cp++)
3903 				;
3904 			last = (*cp == '\0');
3905 			*cp = '\0';
3906 			if (dir == cp) dir = ".";
3907 			sprintf(tableFile, "%s%c%s", dir, DIR_SEP, table);
3908 			if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3909 				_lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3910 				free(searchPath_copy);
3911 				return tableFile;
3912 			}
3913 			if (last) break;
3914 			sprintf(tableFile, "%s%c%s%c%s%c%s", dir, DIR_SEP, "liblouis", DIR_SEP,
3915 					"tables", DIR_SEP, table);
3916 			if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3917 				_lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3918 				free(searchPath_copy);
3919 				return tableFile;
3920 			}
3921 			if (last) break;
3922 		}
3923 		free(searchPath_copy);
3924 	}
3925 	free(tableFile);
3926 	return NULL;
3927 }
3928 
3929 char *EXPORT_CALL
_lou_getTablePath(void)3930 _lou_getTablePath(void) {
3931 	char searchPath[MAXSTRING];
3932 	char *path;
3933 	char *cp;
3934 	int envset = 0;
3935 	cp = searchPath;
3936 	path = getenv("LOUIS_TABLEPATH");
3937 	if (path != NULL && path[0] != '\0') {
3938 		envset = 1;
3939 		cp += sprintf(cp, ",%s", path);
3940 	}
3941 	path = lou_getDataPath();
3942 	if (path != NULL && path[0] != '\0')
3943 		cp += sprintf(cp, ",%s%c%s%c%s", path, DIR_SEP, "liblouis", DIR_SEP, "tables");
3944 	if (!envset) {
3945 #ifdef _WIN32
3946 		path = lou_getProgramPath();
3947 		if (path != NULL) {
3948 			if (path[0] != '\0')
3949 				cp += sprintf(cp, ",%s%s", path, "\\share\\liblouis\\tables");
3950 			free(path);
3951 		}
3952 #else
3953 		cp += sprintf(cp, ",%s", TABLESDIR);
3954 #endif
3955 	}
3956 	if (searchPath[0] != '\0')
3957 		return strdup(&searchPath[1]);
3958 	else
3959 		return strdup(".");
3960 }
3961 
3962 /**
3963  * The default table resolver
3964  *
3965  * Tries to resolve tableList against base. The search path is set to
3966  * `LOUIS_TABLEPATH`, `dataPath` and `programPath` (in that order).
3967  *
3968  * @param table A file path, may be absolute or relative. May be a list of
3969  *              tables separated by comma's. In that case, the first table
3970  *              is used as the base for the other subtables.
3971  * @param base A file path or directory path, or NULL.
3972  * @return The file paths of the resolved subtables, or NULL if the table
3973  *         could not be resolved.
3974  *
3975  */
3976 char **EXPORT_CALL
_lou_defaultTableResolver(const char * tableList,const char * base)3977 _lou_defaultTableResolver(const char *tableList, const char *base) {
3978 	char *searchPath;
3979 	char **tableFiles;
3980 	char *subTable;
3981 	char *tableList_copy;
3982 	char *cp;
3983 	int last;
3984 	int k;
3985 
3986 	/* Set up search path */
3987 	searchPath = _lou_getTablePath();
3988 
3989 	/* Count number of subtables in table list */
3990 	k = 0;
3991 	for (cp = (char *)tableList; *cp != '\0'; cp++)
3992 		if (*cp == ',') k++;
3993 	tableFiles = (char **)calloc(k + 2, sizeof(char *));
3994 	if (!tableFiles) _lou_outOfMemory();
3995 
3996 	/* Resolve subtables */
3997 	k = 0;
3998 	tableList_copy = strdup(tableList);
3999 	for (subTable = tableList_copy;; subTable = cp + 1) {
4000 		for (cp = subTable; *cp != '\0' && *cp != ','; cp++)
4001 			;
4002 		last = (*cp == '\0');
4003 		*cp = '\0';
4004 		if (!(tableFiles[k++] = resolveSubtable(subTable, base, searchPath))) {
4005 			char *path;
4006 			_lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", subTable);
4007 			path = getenv("LOUIS_TABLEPATH");
4008 			if (path != NULL && path[0] != '\0')
4009 				_lou_logMessage(LOU_LOG_ERROR, "LOUIS_TABLEPATH=%s", path);
4010 			free(searchPath);
4011 			free(tableList_copy);
4012 			free_tablefiles(tableFiles);
4013 			return NULL;
4014 		}
4015 		if (k == 1) base = subTable;
4016 		if (last) break;
4017 	}
4018 	free(searchPath);
4019 	free(tableList_copy);
4020 	tableFiles[k] = NULL;
4021 	return tableFiles;
4022 }
4023 
4024 static char **(EXPORT_CALL *tableResolver)(
4025 		const char *tableList, const char *base) = &_lou_defaultTableResolver;
4026 
4027 static char **
copyStringArray(char ** array)4028 copyStringArray(char **array) {
4029 	int len;
4030 	char **copy;
4031 	if (!array) return NULL;
4032 	len = 0;
4033 	while (array[len]) len++;
4034 	copy = malloc((len + 1) * sizeof(char *));
4035 	copy[len] = NULL;
4036 	while (len) {
4037 		len--;
4038 		copy[len] = strdup(array[len]);
4039 	}
4040 	return copy;
4041 }
4042 
4043 char **EXPORT_CALL
_lou_resolveTable(const char * tableList,const char * base)4044 _lou_resolveTable(const char *tableList, const char *base) {
4045 	char **tableFiles = (*tableResolver)(tableList, base);
4046 	char **result = copyStringArray(tableFiles);
4047 	if (tableResolver == &_lou_defaultTableResolver) free_tablefiles(tableFiles);
4048 	return result;
4049 }
4050 
4051 /**
4052  * Register a new table resolver. Overrides the default resolver.
4053  *
4054  * @param resolver The new resolver as a function pointer.
4055  *
4056  */
4057 void EXPORT_CALL
lou_registerTableResolver(char ** (EXPORT_CALL * resolver)(const char * tableList,const char * base))4058 lou_registerTableResolver(
4059 		char **(EXPORT_CALL *resolver)(const char *tableList, const char *base)) {
4060 	tableResolver = resolver;
4061 }
4062 
4063 static int fileCount = 0;
4064 
4065 /**
4066  * Compile a single file
4067  *
4068  */
4069 static int
compileFile(const char * fileName,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)4070 compileFile(const char *fileName, TranslationTableHeader **table,
4071 		DisplayTableHeader **displayTable) {
4072 	FileInfo nested;
4073 	fileCount++;
4074 	nested.fileName = fileName;
4075 	nested.encoding = noEncoding;
4076 	nested.status = 0;
4077 	nested.lineNumber = 0;
4078 	if ((nested.in = fopen(nested.fileName, "rb"))) {
4079 		while (_lou_getALine(&nested))
4080 			compileRule(&nested, NULL, NULL, table, displayTable);
4081 		fclose(nested.in);
4082 		return 1;
4083 	} else
4084 		_lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", nested.fileName);
4085 	errorCount++;
4086 	return 0;
4087 }
4088 
4089 /**
4090  * Free a char** array
4091  */
4092 static void
free_tablefiles(char ** tables)4093 free_tablefiles(char **tables) {
4094 	char **table;
4095 	if (!tables) return;
4096 	for (table = tables; *table; table++) free(*table);
4097 	free(tables);
4098 }
4099 
4100 /**
4101  * Implement include opcode
4102  *
4103  */
4104 static int
includeFile(FileInfo * nested,CharsString * includedFile,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)4105 includeFile(FileInfo *nested, CharsString *includedFile, TranslationTableHeader **table,
4106 		DisplayTableHeader **displayTable) {
4107 	int k;
4108 	char includeThis[MAXSTRING];
4109 	char **tableFiles;
4110 	int rv;
4111 	for (k = 0; k < includedFile->length; k++)
4112 		includeThis[k] = (char)includedFile->chars[k];
4113 	if (k >= MAXSTRING) {
4114 		compileError(nested, "Include statement too long: 'include %s'", includeThis);
4115 		return 0;
4116 	}
4117 	includeThis[k] = 0;
4118 	tableFiles = _lou_resolveTable(includeThis, nested->fileName);
4119 	if (tableFiles == NULL) {
4120 		errorCount++;
4121 		return 0;
4122 	}
4123 	if (tableFiles[1] != NULL) {
4124 		free_tablefiles(tableFiles);
4125 		compileError(nested,
4126 				"Table list not supported in include statement: 'include %s'",
4127 				includeThis);
4128 		return 0;
4129 	}
4130 	rv = compileFile(*tableFiles, table, displayTable);
4131 	free_tablefiles(tableFiles);
4132 	return rv;
4133 }
4134 
4135 /**
4136  * Compile source tables into a table in memory
4137  *
4138  */
4139 static int
compileTable(const char * tableList,const char * displayTableList,TranslationTableHeader ** translationTable,DisplayTableHeader ** displayTable)4140 compileTable(const char *tableList, const char *displayTableList,
4141 		TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) {
4142 	char **tableFiles;
4143 	char **subTable;
4144 	if (translationTable && !tableList) return 0;
4145 	if (displayTable && !displayTableList) return 0;
4146 	if (!translationTable && !displayTable) return 0;
4147 	if (translationTable) *translationTable = NULL;
4148 	if (displayTable) *displayTable = NULL;
4149 	errorCount = warningCount = fileCount = 0;
4150 	if (!opcodeLengths[0]) {
4151 		TranslationTableOpcode opcode;
4152 		for (opcode = 0; opcode < CTO_None; opcode++)
4153 			opcodeLengths[opcode] = (short)strlen(opcodeNames[opcode]);
4154 	}
4155 	if (translationTable) allocateTranslationTable(NULL, translationTable);
4156 	if (displayTable) allocateDisplayTable(NULL, displayTable);
4157 
4158 	if (translationTable) {
4159 		(*translationTable)->emphClasses[0] = NULL;
4160 		(*translationTable)->characterClasses = NULL;
4161 		(*translationTable)->ruleNames = NULL;
4162 	}
4163 
4164 	/* Compile things that are necesary for the proper operation of
4165 	 * liblouis or liblouisxml or liblouisutdml */
4166 	/* TODO: These definitions seem to be necessary for proper functioning of
4167 	   liblouisutdml. Find a way to satisfy those requirements without hard coding
4168 	   some characters in every table notably behind the users back */
4169 	compileString("space \\xffff 123456789abcdef LOU_ENDSEGMENT", translationTable,
4170 			displayTable);
4171 
4172 	if (displayTable && translationTable && strcmp(tableList, displayTableList) == 0) {
4173 		/* Compile the display and translation tables in one go */
4174 
4175 		/* Compile all subtables in the list */
4176 		if (!(tableFiles = _lou_resolveTable(tableList, NULL))) {
4177 			errorCount++;
4178 			goto cleanup;
4179 		}
4180 		for (subTable = tableFiles; *subTable; subTable++)
4181 			if (!compileFile(*subTable, translationTable, displayTable)) goto cleanup;
4182 	} else {
4183 		/* Compile the display and translation tables separately */
4184 
4185 		if (displayTable) {
4186 			if (!(tableFiles = _lou_resolveTable(displayTableList, NULL))) {
4187 				errorCount++;
4188 				goto cleanup;
4189 			}
4190 			for (subTable = tableFiles; *subTable; subTable++)
4191 				if (!compileFile(*subTable, NULL, displayTable)) goto cleanup;
4192 			free_tablefiles(tableFiles);
4193 			tableFiles = NULL;
4194 		}
4195 		if (translationTable) {
4196 			if (!(tableFiles = _lou_resolveTable(tableList, NULL))) {
4197 				errorCount++;
4198 				goto cleanup;
4199 			}
4200 			for (subTable = tableFiles; *subTable; subTable++)
4201 				if (!compileFile(*subTable, translationTable, NULL)) goto cleanup;
4202 		}
4203 	}
4204 
4205 /* Clean up after compiling files */
4206 cleanup:
4207 	free_tablefiles(tableFiles);
4208 	if (warningCount) _lou_logMessage(LOU_LOG_WARN, "%d warnings issued", warningCount);
4209 	if (!errorCount) {
4210 		if (translationTable) setDefaults(*translationTable);
4211 		return 1;
4212 	} else {
4213 		_lou_logMessage(LOU_LOG_ERROR, "%d errors found.", errorCount);
4214 		if (translationTable) {
4215 			if (*translationTable) free(*translationTable);
4216 			*translationTable = NULL;
4217 		}
4218 		if (displayTable) {
4219 			if (*displayTable) free(*displayTable);
4220 			*displayTable = NULL;
4221 		}
4222 		return 0;
4223 	}
4224 }
4225 
4226 /* Return the emphasis classes declared in tableList. */
4227 char const **EXPORT_CALL
lou_getEmphClasses(const char * tableList)4228 lou_getEmphClasses(const char *tableList) {
4229 	const char *names[MAX_EMPH_CLASSES + 1];
4230 	unsigned int count = 0;
4231 	const TranslationTableHeader *table = _lou_getTranslationTable(tableList);
4232 	if (!table) return NULL;
4233 
4234 	while (count < MAX_EMPH_CLASSES) {
4235 		char const *name = table->emphClasses[count];
4236 		if (!name) break;
4237 		names[count++] = name;
4238 	}
4239 	names[count++] = NULL;
4240 
4241 	{
4242 		unsigned int size = count * sizeof(names[0]);
4243 		char const **result = malloc(size);
4244 		if (!result) return NULL;
4245 		/* The void* cast is necessary to stop MSVC from warning about
4246 		 * different 'const' qualifiers (C4090). */
4247 		memcpy((void *)result, names, size);
4248 		return result;
4249 	}
4250 }
4251 
4252 void
4253 getTable(const char *tableList, const char *displayTableList,
4254 		TranslationTableHeader **translationTable, DisplayTableHeader **displayTable);
4255 
4256 void EXPORT_CALL
_lou_getTable(const char * tableList,const char * displayTableList,const TranslationTableHeader ** translationTable,const DisplayTableHeader ** displayTable)4257 _lou_getTable(const char *tableList, const char *displayTableList,
4258 		const TranslationTableHeader **translationTable,
4259 		const DisplayTableHeader **displayTable) {
4260 	TranslationTableHeader *newTable;
4261 	DisplayTableHeader *newDisplayTable;
4262 	getTable(tableList, displayTableList, &newTable, &newDisplayTable);
4263 	*translationTable = newTable;
4264 	*displayTable = newDisplayTable;
4265 }
4266 
4267 /* Checks and loads tableList. */
4268 const void *EXPORT_CALL
lou_getTable(const char * tableList)4269 lou_getTable(const char *tableList) {
4270 	const TranslationTableHeader *table;
4271 	const DisplayTableHeader *displayTable;
4272 	_lou_getTable(tableList, tableList, &table, &displayTable);
4273 	if (!table || !displayTable) return NULL;
4274 	return table;
4275 }
4276 
4277 const TranslationTableHeader *EXPORT_CALL
_lou_getTranslationTable(const char * tableList)4278 _lou_getTranslationTable(const char *tableList) {
4279 	TranslationTableHeader *table;
4280 	getTable(tableList, NULL, &table, NULL);
4281 	return table;
4282 }
4283 
4284 const DisplayTableHeader *EXPORT_CALL
_lou_getDisplayTable(const char * tableList)4285 _lou_getDisplayTable(const char *tableList) {
4286 	DisplayTableHeader *table;
4287 	getTable(NULL, tableList, NULL, &table);
4288 	return table;
4289 }
4290 
4291 void
getTable(const char * translationTableList,const char * displayTableList,TranslationTableHeader ** translationTable,DisplayTableHeader ** displayTable)4292 getTable(const char *translationTableList, const char *displayTableList,
4293 		TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) {
4294 	/* Keep track of which tables have already been compiled */
4295 	int translationTableListLen, displayTableListLen = 0;
4296 	if (translationTableList == NULL || *translationTableList == 0)
4297 		translationTable = NULL;
4298 	if (displayTableList == NULL || *displayTableList == 0) displayTable = NULL;
4299 	/* See if translation table has already been compiled */
4300 	if (translationTable) {
4301 		translationTableListLen = (int)strlen(translationTableList);
4302 		*translationTable = NULL;
4303 		TranslationTableChainEntry *currentEntry = translationTableChain;
4304 		TranslationTableChainEntry *prevEntry = NULL;
4305 		while (currentEntry != NULL) {
4306 			if (translationTableListLen == currentEntry->tableListLength &&
4307 					(memcmp(&currentEntry->tableList[0], translationTableList,
4308 							translationTableListLen)) == 0) {
4309 				/* Move the table to the top of the table chain. */
4310 				if (prevEntry != NULL) {
4311 					prevEntry->next = currentEntry->next;
4312 					currentEntry->next = translationTableChain;
4313 					translationTableChain = currentEntry;
4314 				}
4315 				*translationTable = currentEntry->table;
4316 				break;
4317 			}
4318 			prevEntry = currentEntry;
4319 			currentEntry = currentEntry->next;
4320 		}
4321 	}
4322 	/* See if display table has already been compiled */
4323 	if (displayTable) {
4324 		displayTableListLen = (int)strlen(displayTableList);
4325 		*displayTable = NULL;
4326 		DisplayTableChainEntry *currentEntry = displayTableChain;
4327 		DisplayTableChainEntry *prevEntry = NULL;
4328 		while (currentEntry != NULL) {
4329 			if (displayTableListLen == currentEntry->tableListLength &&
4330 					(memcmp(&currentEntry->tableList[0], displayTableList,
4331 							displayTableListLen)) == 0) {
4332 				/* Move the table to the top of the table chain. */
4333 				if (prevEntry != NULL) {
4334 					prevEntry->next = currentEntry->next;
4335 					currentEntry->next = displayTableChain;
4336 					displayTableChain = currentEntry;
4337 				}
4338 				*displayTable = currentEntry->table;
4339 				break;
4340 			}
4341 			prevEntry = currentEntry;
4342 			currentEntry = currentEntry->next;
4343 		}
4344 	}
4345 	if ((translationTable && *translationTable == NULL) ||
4346 			(displayTable && *displayTable == NULL)) {
4347 		TranslationTableHeader *newTranslationTable = NULL;
4348 		DisplayTableHeader *newDisplayTable = NULL;
4349 		if (compileTable(translationTableList, displayTableList,
4350 					(translationTable && *translationTable == NULL) ? &newTranslationTable
4351 																	: NULL,
4352 					(displayTable && *displayTable == NULL) ? &newDisplayTable : NULL)) {
4353 			/* Add a new entry to the top of the table chain. */
4354 			if (newTranslationTable != NULL) {
4355 				int entrySize =
4356 						sizeof(TranslationTableChainEntry) + translationTableListLen;
4357 				TranslationTableChainEntry *newEntry = malloc(entrySize);
4358 				if (!newEntry) _lou_outOfMemory();
4359 				newEntry->next = translationTableChain;
4360 				newEntry->table = newTranslationTable;
4361 				newEntry->tableListLength = translationTableListLen;
4362 				memcpy(&newEntry->tableList[0], translationTableList,
4363 						translationTableListLen);
4364 				translationTableChain = newEntry;
4365 				*translationTable = newTranslationTable;
4366 			}
4367 			if (newDisplayTable != NULL) {
4368 				int entrySize = sizeof(DisplayTableChainEntry) + displayTableListLen;
4369 				DisplayTableChainEntry *newEntry = malloc(entrySize);
4370 				if (!newEntry) _lou_outOfMemory();
4371 				newEntry->next = displayTableChain;
4372 				newEntry->table = newDisplayTable;
4373 				newEntry->tableListLength = displayTableListLen;
4374 				memcpy(&newEntry->tableList[0], displayTableList, displayTableListLen);
4375 				displayTableChain = newEntry;
4376 				*displayTable = newDisplayTable;
4377 			}
4378 		} else {
4379 			_lou_logMessage(
4380 					LOU_LOG_ERROR, "%s could not be compiled", translationTableList);
4381 			return;
4382 		}
4383 	}
4384 }
4385 
4386 int EXPORT_CALL
lou_checkTable(const char * tableList)4387 lou_checkTable(const char *tableList) {
4388 	if (lou_getTable(tableList)) return 1;
4389 	return 0;
4390 }
4391 
4392 formtype EXPORT_CALL
lou_getTypeformForEmphClass(const char * tableList,const char * emphClass)4393 lou_getTypeformForEmphClass(const char *tableList, const char *emphClass) {
4394 	int i;
4395 	const TranslationTableHeader *table = _lou_getTranslationTable(tableList);
4396 	if (!table) return 0;
4397 	for (i = 0; table->emphClasses[i]; i++)
4398 		if (strcmp(emphClass, table->emphClasses[i]) == 0) return italic << i;
4399 	return 0;
4400 }
4401 
4402 static unsigned char *destSpacing = NULL;
4403 static int sizeDestSpacing = 0;
4404 static formtype *typebuf = NULL;
4405 static unsigned int *wordBuffer = NULL;
4406 static EmphasisInfo *emphasisBuffer = NULL;
4407 static int sizeTypebuf = 0;
4408 static widechar *passbuf[MAXPASSBUF] = { NULL };
4409 static int sizePassbuf[MAXPASSBUF] = { 0 };
4410 static int *posMapping1 = NULL;
4411 static int sizePosMapping1 = 0;
4412 static int *posMapping2 = NULL;
4413 static int sizePosMapping2 = 0;
4414 static int *posMapping3 = NULL;
4415 static int sizePosMapping3 = 0;
4416 void *EXPORT_CALL
_lou_allocMem(AllocBuf buffer,int index,int srcmax,int destmax)4417 _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax) {
4418 	if (srcmax < 1024) srcmax = 1024;
4419 	if (destmax < 1024) destmax = 1024;
4420 	switch (buffer) {
4421 	case alloc_typebuf:
4422 		if (destmax > sizeTypebuf) {
4423 			if (typebuf != NULL) free(typebuf);
4424 			// TODO: should this be srcmax?
4425 			typebuf = malloc((destmax + 4) * sizeof(formtype));
4426 			if (!typebuf) _lou_outOfMemory();
4427 			sizeTypebuf = destmax;
4428 		}
4429 		return typebuf;
4430 
4431 	case alloc_wordBuffer:
4432 
4433 		if (wordBuffer != NULL) free(wordBuffer);
4434 		wordBuffer = malloc((srcmax + 4) * sizeof(unsigned int));
4435 		if (!wordBuffer) _lou_outOfMemory();
4436 		return wordBuffer;
4437 
4438 	case alloc_emphasisBuffer:
4439 
4440 		if (emphasisBuffer != NULL) free(emphasisBuffer);
4441 		emphasisBuffer = malloc((srcmax + 4) * sizeof(EmphasisInfo));
4442 		if (!emphasisBuffer) _lou_outOfMemory();
4443 		return emphasisBuffer;
4444 
4445 	case alloc_destSpacing:
4446 		if (destmax > sizeDestSpacing) {
4447 			if (destSpacing != NULL) free(destSpacing);
4448 			destSpacing = malloc(destmax + 4);
4449 			if (!destSpacing) _lou_outOfMemory();
4450 			sizeDestSpacing = destmax;
4451 		}
4452 		return destSpacing;
4453 	case alloc_passbuf:
4454 		if (index < 0 || index >= MAXPASSBUF) {
4455 			_lou_logMessage(LOU_LOG_FATAL, "Index out of bounds: %d\n", index);
4456 			exit(3);
4457 		}
4458 		if (destmax > sizePassbuf[index]) {
4459 			if (passbuf[index] != NULL) free(passbuf[index]);
4460 			passbuf[index] = malloc((destmax + 4) * CHARSIZE);
4461 			if (!passbuf[index]) _lou_outOfMemory();
4462 			sizePassbuf[index] = destmax;
4463 		}
4464 		return passbuf[index];
4465 	case alloc_posMapping1: {
4466 		int mapSize;
4467 		if (srcmax >= destmax)
4468 			mapSize = srcmax;
4469 		else
4470 			mapSize = destmax;
4471 		if (mapSize > sizePosMapping1) {
4472 			if (posMapping1 != NULL) free(posMapping1);
4473 			posMapping1 = malloc((mapSize + 4) * sizeof(int));
4474 			if (!posMapping1) _lou_outOfMemory();
4475 			sizePosMapping1 = mapSize;
4476 		}
4477 	}
4478 		return posMapping1;
4479 	case alloc_posMapping2: {
4480 		int mapSize;
4481 		if (srcmax >= destmax)
4482 			mapSize = srcmax;
4483 		else
4484 			mapSize = destmax;
4485 		if (mapSize > sizePosMapping2) {
4486 			if (posMapping2 != NULL) free(posMapping2);
4487 			posMapping2 = malloc((mapSize + 4) * sizeof(int));
4488 			if (!posMapping2) _lou_outOfMemory();
4489 			sizePosMapping2 = mapSize;
4490 		}
4491 	}
4492 		return posMapping2;
4493 	case alloc_posMapping3: {
4494 		int mapSize;
4495 		if (srcmax >= destmax)
4496 			mapSize = srcmax;
4497 		else
4498 			mapSize = destmax;
4499 		if (mapSize > sizePosMapping3) {
4500 			if (posMapping3 != NULL) free(posMapping3);
4501 			posMapping3 = malloc((mapSize + 4) * sizeof(int));
4502 			if (!posMapping3) _lou_outOfMemory();
4503 			sizePosMapping3 = mapSize;
4504 		}
4505 	}
4506 		return posMapping3;
4507 	default:
4508 		return NULL;
4509 	}
4510 }
4511 
4512 void EXPORT_CALL
lou_free(void)4513 lou_free(void) {
4514 	TranslationTableChainEntry *currentEntry;
4515 	TranslationTableChainEntry *previousEntry;
4516 	lou_logEnd();
4517 	if (translationTableChain != NULL) {
4518 		currentEntry = translationTableChain;
4519 		while (currentEntry) {
4520 			int i;
4521 			TranslationTableHeader *t = (TranslationTableHeader *)currentEntry->table;
4522 			for (i = 0; t->emphClasses[i]; i++) free(t->emphClasses[i]);
4523 			if (t->characterClasses) deallocateCharacterClasses(t);
4524 			if (t->ruleNames) deallocateRuleNames(t);
4525 			free(t);
4526 			previousEntry = currentEntry;
4527 			currentEntry = currentEntry->next;
4528 			free(previousEntry);
4529 		}
4530 		translationTableChain = NULL;
4531 	}
4532 	if (typebuf != NULL) free(typebuf);
4533 	typebuf = NULL;
4534 	if (wordBuffer != NULL) free(wordBuffer);
4535 	wordBuffer = NULL;
4536 	if (emphasisBuffer != NULL) free(emphasisBuffer);
4537 	emphasisBuffer = NULL;
4538 	sizeTypebuf = 0;
4539 	if (destSpacing != NULL) free(destSpacing);
4540 	destSpacing = NULL;
4541 	sizeDestSpacing = 0;
4542 	{
4543 		int k;
4544 		for (k = 0; k < MAXPASSBUF; k++) {
4545 			if (passbuf[k] != NULL) free(passbuf[k]);
4546 			passbuf[k] = NULL;
4547 			sizePassbuf[k] = 0;
4548 		}
4549 	}
4550 	if (posMapping1 != NULL) free(posMapping1);
4551 	posMapping1 = NULL;
4552 	sizePosMapping1 = 0;
4553 	if (posMapping2 != NULL) free(posMapping2);
4554 	posMapping2 = NULL;
4555 	sizePosMapping2 = 0;
4556 	if (posMapping3 != NULL) free(posMapping3);
4557 	posMapping3 = NULL;
4558 	sizePosMapping3 = 0;
4559 	opcodeLengths[0] = 0;
4560 }
4561 
4562 const char *EXPORT_CALL
lou_version(void)4563 lou_version(void) {
4564 	static const char *version = PACKAGE_VERSION;
4565 	return version;
4566 }
4567 
4568 int EXPORT_CALL
lou_charSize(void)4569 lou_charSize(void) {
4570 	return CHARSIZE;
4571 }
4572 
4573 int EXPORT_CALL
lou_compileString(const char * tableList,const char * inString)4574 lou_compileString(const char *tableList, const char *inString) {
4575 	TranslationTableHeader *table;
4576 	DisplayTableHeader *displayTable;
4577 	getTable(tableList, tableList, &table, &displayTable);
4578 	if (!table) return 0;
4579 	if (!compileString(inString, &table, &displayTable)) return 0;
4580 	return 1;
4581 }
4582 
4583 int EXPORT_CALL
_lou_compileTranslationRule(const char * tableList,const char * inString)4584 _lou_compileTranslationRule(const char *tableList, const char *inString) {
4585 	TranslationTableHeader *table;
4586 	getTable(tableList, NULL, &table, NULL);
4587 	return compileString(inString, &table, NULL);
4588 }
4589 
4590 int EXPORT_CALL
_lou_compileDisplayRule(const char * tableList,const char * inString)4591 _lou_compileDisplayRule(const char *tableList, const char *inString) {
4592 	DisplayTableHeader *table;
4593 	getTable(NULL, tableList, NULL, &table);
4594 	return compileString(inString, NULL, &table);
4595 }
4596 
4597 /**
4598  * This procedure provides a target for cals that serve as breakpoints
4599  * for gdb.
4600  */
4601 // char *EXPORT_CALL
4602 // lou_getTablePaths (void)
4603 // {
4604 //   static char paths[MAXSTRING];
4605 //   static char scratchBuf[MAXSTRING];
4606 //   char *pathList;
4607 //   strcpy (paths, tablePath);
4608 //   strcat (paths, ",");
4609 //   pathList = getenv ("LOUIS_TABLEPATH");
4610 //   if (pathList)
4611 //     {
4612 //       strcat (paths, pathList);
4613 //       strcat (paths, ",");
4614 //     }
4615 //   pathList = getcwd (scratchBuf, MAXSTRING);
4616 //   if (pathList)
4617 //     {
4618 //       strcat (paths, pathList);
4619 //       strcat (paths, ",");
4620 //     }
4621 //   pathList = lou_getDataPath ();
4622 //   if (pathList)
4623 //     {
4624 //       strcat (paths, pathList);
4625 //       strcat (paths, ",");
4626 //     }
4627 // #ifdef _WIN32
4628 //   strcpy (paths, lou_getProgramPath ());
4629 //   strcat (paths, "\\share\\liblouss\\tables\\");
4630 // #else
4631 //   strcpy (paths, TABLESDIR);
4632 // #endif
4633 //   return paths;
4634 // }
4635