1 /* liblouis Braille Translation and Back-Translation Library
2
3 Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4 BRLTTY Team
5
6 Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7 Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8 Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9 Copyright (C) 2016 Davy Kager, Dedicon
10
11 This file is part of liblouis.
12
13 liblouis is free software: you can redistribute it and/or modify it
14 under the terms of the GNU Lesser General Public License as published
15 by the Free Software Foundation, either version 2.1 of the License, or
16 (at your option) any later version.
17
18 liblouis is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27 /**
28 * @file
29 * @brief Read and compile translation tables
30 */
31
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdarg.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39
40 #include "internal.h"
41 #include "config.h"
42
43 #define QUOTESUB 28 /* Stand-in for double quotes in strings */
44
45 /* needed to make debuggin easier */
46 #ifdef DEBUG
47 wchar_t wchar;
48 #endif
49
50 /* The following variables and functions make it possible to specify the
51 * path on which all tables for liblouis and all files for liblouisutdml,
52 * in their proper directories, will be found.
53 */
54
55 static char *dataPathPtr;
56
57 char *EXPORT_CALL
lou_setDataPath(const char * path)58 lou_setDataPath(const char *path) {
59 static char dataPath[MAXSTRING];
60 dataPathPtr = NULL;
61 if (path == NULL) return NULL;
62 strcpy(dataPath, path);
63 dataPathPtr = dataPath;
64 return dataPathPtr;
65 }
66
67 char *EXPORT_CALL
lou_getDataPath(void)68 lou_getDataPath(void) {
69 return dataPathPtr;
70 }
71
72 /* End of dataPath code. */
73
74 static int
eqasc2uni(const unsigned char * a,const widechar * b,const int len)75 eqasc2uni(const unsigned char *a, const widechar *b, const int len) {
76 int k;
77 for (k = 0; k < len; k++)
78 if ((widechar)a[k] != b[k]) return 0;
79 return 1;
80 }
81
82 typedef struct CharsString {
83 widechar length;
84 widechar chars[MAXSTRING];
85 } CharsString;
86
87 static int errorCount;
88 static int warningCount;
89
90 typedef struct TranslationTableChainEntry {
91 struct TranslationTableChainEntry *next;
92 TranslationTableHeader *table;
93 int tableListLength;
94 char tableList[1];
95 } TranslationTableChainEntry;
96
97 static TranslationTableChainEntry *translationTableChain = NULL;
98
99 typedef struct DisplayTableChainEntry {
100 struct DisplayTableChainEntry *next;
101 DisplayTableHeader *table;
102 int tableListLength;
103 char tableList[1];
104 } DisplayTableChainEntry;
105
106 static DisplayTableChainEntry *displayTableChain = NULL;
107
108 /* predifined character classes */
109 static const char *characterClassNames[] = {
110 "space",
111 "letter",
112 "digit",
113 "punctuation",
114 "uppercase",
115 "lowercase",
116 "math",
117 "sign",
118 "litdigit",
119 NULL,
120 };
121
122 static const char *opcodeNames[CTO_None] = {
123 "include",
124 "locale",
125 "undefined",
126 "capsletter",
127 "begcapsword",
128 "endcapsword",
129 "begcaps",
130 "endcaps",
131 "begcapsphrase",
132 "endcapsphrase",
133 "lencapsphrase",
134 "letsign",
135 "noletsignbefore",
136 "noletsign",
137 "noletsignafter",
138 "numsign",
139 "numericmodechars",
140 "midendnumericmodechars",
141 "numericnocontchars",
142 "seqdelimiter",
143 "seqbeforechars",
144 "seqafterchars",
145 "seqafterpattern",
146 "seqafterexpression",
147 "emphclass",
148 "emphletter",
149 "begemphword",
150 "endemphword",
151 "begemph",
152 "endemph",
153 "begemphphrase",
154 "endemphphrase",
155 "lenemphphrase",
156 "capsmodechars",
157 "emphmodechars",
158 "begcomp",
159 "compbegemph1",
160 "compendemph1",
161 "compbegemph2",
162 "compendemph2",
163 "compbegemph3",
164 "compendemph3",
165 "compcapsign",
166 "compbegcaps",
167 "compendcaps",
168 "endcomp",
169 "nocontractsign",
170 "multind",
171 "compdots",
172 "comp6",
173 "class",
174 "after",
175 "before",
176 "noback",
177 "nofor",
178 "empmatchbefore",
179 "empmatchafter",
180 "swapcc",
181 "swapcd",
182 "swapdd",
183 "space",
184 "digit",
185 "punctuation",
186 "math",
187 "sign",
188 "letter",
189 "uppercase",
190 "lowercase",
191 "grouping",
192 "uplow",
193 "litdigit",
194 "display",
195 "replace",
196 "context",
197 "correct",
198 "pass2",
199 "pass3",
200 "pass4",
201 "repeated",
202 "repword",
203 "capsnocont",
204 "always",
205 "exactdots",
206 "nocross",
207 "syllable",
208 "nocont",
209 "compbrl",
210 "literal",
211 "largesign",
212 "word",
213 "partword",
214 "joinnum",
215 "joinword",
216 "lowword",
217 "contraction",
218 "sufword",
219 "prfword",
220 "begword",
221 "begmidword",
222 "midword",
223 "midendword",
224 "endword",
225 "prepunc",
226 "postpunc",
227 "begnum",
228 "midnum",
229 "endnum",
230 "decpoint",
231 "hyphen",
232 // "apostrophe",
233 // "initial",
234 "nobreak",
235 "match",
236 "backmatch",
237 "attribute",
238 };
239
240 static short opcodeLengths[CTO_None] = { 0 };
241
242 static void
243 compileError(FileInfo *nested, const char *format, ...);
244
245 static void
246 free_tablefiles(char **tables);
247
248 static int
getAChar(FileInfo * nested)249 getAChar(FileInfo *nested) {
250 /* Read a big endian, little endian or ASCII 8 file and convert it to
251 * 16- or 32-bit unsigned integers */
252 int ch1 = 0, ch2 = 0;
253 widechar character;
254 if (nested->encoding == ascii8)
255 if (nested->status == 2) {
256 nested->status++;
257 return nested->checkencoding[1];
258 }
259 while ((ch1 = fgetc(nested->in)) != EOF) {
260 if (nested->status < 2) nested->checkencoding[nested->status] = ch1;
261 nested->status++;
262 if (nested->status == 2) {
263 if (nested->checkencoding[0] == 0xfe && nested->checkencoding[1] == 0xff)
264 nested->encoding = bigEndian;
265 else if (nested->checkencoding[0] == 0xff && nested->checkencoding[1] == 0xfe)
266 nested->encoding = littleEndian;
267 else if (nested->checkencoding[0] < 128 && nested->checkencoding[1] < 128) {
268 nested->encoding = ascii8;
269 return nested->checkencoding[0];
270 } else {
271 compileError(nested,
272 "encoding is neither big-endian, little-endian nor ASCII 8.");
273 ch1 = EOF;
274 break;
275 ;
276 }
277 continue;
278 }
279 switch (nested->encoding) {
280 case noEncoding:
281 break;
282 case ascii8:
283 return ch1;
284 break;
285 case bigEndian:
286 ch2 = fgetc(nested->in);
287 if (ch2 == EOF) break;
288 character = (widechar)(ch1 << 8) | ch2;
289 return (int)character;
290 break;
291 case littleEndian:
292 ch2 = fgetc(nested->in);
293 if (ch2 == EOF) break;
294 character = (widechar)(ch2 << 8) | ch1;
295 return (int)character;
296 break;
297 }
298 if (ch1 == EOF || ch2 == EOF) break;
299 }
300 return EOF;
301 }
302
303 int EXPORT_CALL
_lou_getALine(FileInfo * nested)304 _lou_getALine(FileInfo *nested) {
305 /* Read a line of widechar's from an input file */
306 int ch;
307 int pch = 0;
308 nested->linelen = 0;
309 while ((ch = getAChar(nested)) != EOF) {
310 if (ch == 13) continue;
311 if (pch == '\\' && ch == 10) {
312 nested->linelen--;
313 pch = ch;
314 continue;
315 }
316 if (ch == 10 || nested->linelen >= MAXSTRING - 1) break;
317 nested->line[nested->linelen++] = (widechar)ch;
318 pch = ch;
319 }
320 nested->line[nested->linelen] = 0;
321 nested->linepos = 0;
322 if (ch == EOF) return 0;
323 nested->lineNumber++;
324 return 1;
325 }
326
327 static inline int
atEndOfLine(FileInfo * nested)328 atEndOfLine(FileInfo *nested) {
329 return nested->linepos >= nested->linelen;
330 }
331
332 static inline int
atTokenDelimiter(FileInfo * nested)333 atTokenDelimiter(FileInfo *nested) {
334 return nested->line[nested->linepos] <= 32;
335 }
336
337 static int
getToken(FileInfo * nested,CharsString * result,const char * description,int * lastToken)338 getToken(FileInfo *nested, CharsString *result, const char *description, int *lastToken) {
339 /* Find the next string of contiguous non-whitespace characters. If this
340 * is the last token on the line, return 2 instead of 1. */
341 while (!atEndOfLine(nested) && atTokenDelimiter(nested)) nested->linepos++;
342 result->length = 0;
343 while (!atEndOfLine(nested) && !atTokenDelimiter(nested)) {
344 int maxlen = MAXSTRING;
345 if (result->length >= maxlen) {
346 compileError(nested, "more than %d characters (bytes)", maxlen);
347 return 0;
348 } else
349 result->chars[result->length++] = nested->line[nested->linepos++];
350 }
351 if (!result->length) {
352 /* Not enough tokens */
353 if (description) compileError(nested, "%s not specified.", description);
354 return 0;
355 }
356 result->chars[result->length] = 0;
357 while (!atEndOfLine(nested) && atTokenDelimiter(nested)) nested->linepos++;
358 return (*lastToken = atEndOfLine(nested)) ? 2 : 1;
359 }
360
361 static void
compileError(FileInfo * nested,const char * format,...)362 compileError(FileInfo *nested, const char *format, ...) {
363 #ifndef __SYMBIAN32__
364 char buffer[MAXSTRING];
365 va_list arguments;
366 va_start(arguments, format);
367 vsnprintf(buffer, sizeof(buffer), format, arguments);
368 va_end(arguments);
369 if (nested)
370 _lou_logMessage(LOU_LOG_ERROR, "%s:%d: error: %s", nested->fileName,
371 nested->lineNumber, buffer);
372 else
373 _lou_logMessage(LOU_LOG_ERROR, "error: %s", buffer);
374 errorCount++;
375 #endif
376 }
377
378 static void
compileWarning(FileInfo * nested,const char * format,...)379 compileWarning(FileInfo *nested, const char *format, ...) {
380 #ifndef __SYMBIAN32__
381 char buffer[MAXSTRING];
382 va_list arguments;
383 va_start(arguments, format);
384 vsnprintf(buffer, sizeof(buffer), format, arguments);
385 va_end(arguments);
386 if (nested)
387 _lou_logMessage(LOU_LOG_WARN, "%s:%d: warning: %s", nested->fileName,
388 nested->lineNumber, buffer);
389 else
390 _lou_logMessage(LOU_LOG_WARN, "warning: %s", buffer);
391 warningCount++;
392 #endif
393 }
394
395 static int
allocateSpaceInTranslationTable(FileInfo * nested,TranslationTableOffset * offset,int count,TranslationTableHeader ** table)396 allocateSpaceInTranslationTable(FileInfo *nested, TranslationTableOffset *offset,
397 int count, TranslationTableHeader **table) {
398 /* allocate memory for table and expand previously allocated memory if necessary */
399 int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
400 TranslationTableOffset newSize = (*table)->bytesUsed + spaceNeeded;
401 TranslationTableOffset size = (*table)->tableSize;
402 if (newSize > size) {
403 TranslationTableHeader *newTable;
404 newSize += (newSize / OFFSETSIZE);
405 newTable = realloc(*table, newSize);
406 if (!newTable) {
407 compileError(nested, "Not enough memory for translation table.");
408 _lou_outOfMemory();
409 }
410 memset(((unsigned char *)newTable) + size, 0, newSize - size);
411 /* update references to the old table */
412 {
413 TranslationTableChainEntry *entry;
414 for (entry = translationTableChain; entry != NULL; entry = entry->next)
415 if (entry->table == *table)
416 entry->table = (TranslationTableHeader *)newTable;
417 }
418 *table = (TranslationTableHeader *)newTable;
419 (*table)->tableSize = newSize;
420 }
421 if (offset != NULL) {
422 *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE;
423 (*table)->bytesUsed += spaceNeeded;
424 }
425 return 1;
426 }
427
428 static int
allocateSpaceInDisplayTable(FileInfo * nested,TranslationTableOffset * offset,int count,DisplayTableHeader ** table)429 allocateSpaceInDisplayTable(FileInfo *nested, TranslationTableOffset *offset, int count,
430 DisplayTableHeader **table) {
431 /* allocate memory for table and expand previously allocated memory if necessary */
432 int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
433 TranslationTableOffset newSize = (*table)->bytesUsed + spaceNeeded;
434 TranslationTableOffset size = (*table)->tableSize;
435 if (newSize > size) {
436 DisplayTableHeader *newTable;
437 newSize += (newSize / OFFSETSIZE);
438 newTable = realloc(*table, newSize);
439 if (!newTable) {
440 compileError(nested, "Not enough memory for display table.");
441 _lou_outOfMemory();
442 }
443 memset(((unsigned char *)newTable) + size, 0, newSize - size);
444 /* update references to the old table */
445 {
446 DisplayTableChainEntry *entry;
447 for (entry = displayTableChain; entry != NULL; entry = entry->next)
448 if (entry->table == *table) entry->table = (DisplayTableHeader *)newTable;
449 }
450 *table = (DisplayTableHeader *)newTable;
451 (*table)->tableSize = newSize;
452 }
453 if (offset != NULL) {
454 *offset = ((*table)->bytesUsed - sizeof(**table)) / OFFSETSIZE;
455 (*table)->bytesUsed += spaceNeeded;
456 }
457 return 1;
458 }
459
460 static int
allocateTranslationTable(FileInfo * nested,TranslationTableHeader ** table)461 allocateTranslationTable(FileInfo *nested, TranslationTableHeader **table) {
462 /* Allocate memory for the table and a guess on the number of rules */
463 const TranslationTableOffset startSize = 2 * sizeof(**table);
464 if (*table) return 1;
465 TranslationTableOffset bytesUsed =
466 sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */
467 if (!(*table = malloc(startSize))) {
468 compileError(nested, "Not enough memory");
469 if (*table != NULL) free(*table);
470 *table = NULL;
471 _lou_outOfMemory();
472 }
473 memset(*table, 0, startSize);
474 (*table)->tableSize = startSize;
475 (*table)->bytesUsed = bytesUsed;
476 return 1;
477 }
478
479 static int
allocateDisplayTable(FileInfo * nested,DisplayTableHeader ** table)480 allocateDisplayTable(FileInfo *nested, DisplayTableHeader **table) {
481 /* Allocate memory for the table and a guess on the number of rules */
482 const TranslationTableOffset startSize = 2 * sizeof(**table);
483 if (*table) return 1;
484 TranslationTableOffset bytesUsed =
485 sizeof(**table) + OFFSETSIZE; /* So no offset is ever zero */
486 if (!(*table = malloc(startSize))) {
487 compileError(nested, "Not enough memory");
488 if (*table != NULL) free(*table);
489 *table = NULL;
490 _lou_outOfMemory();
491 }
492 memset(*table, 0, startSize);
493 (*table)->tableSize = startSize;
494 (*table)->bytesUsed = bytesUsed;
495 return 1;
496 }
497
498 static TranslationTableCharacter *
compile_findCharOrDots(widechar c,int m,TranslationTableHeader * table)499 compile_findCharOrDots(widechar c, int m, TranslationTableHeader *table) {
500 /* Look up a character or dot pattern. If m is 0 look up a character,
501 * otherwise look up a dot pattern. Although the algorithms are almost
502 * identical, different tables are needed for characters and dots because
503 * of the possibility of conflicts. */
504 TranslationTableCharacter *character;
505 TranslationTableOffset bucket;
506 unsigned long int makeHash = _lou_charHash(c);
507 if (m == 0)
508 bucket = table->characters[makeHash];
509 else
510 bucket = table->dots[makeHash];
511 while (bucket) {
512 character = (TranslationTableCharacter *)&table->ruleArea[bucket];
513 if (character->realchar == c) return character;
514 bucket = character->next;
515 }
516 return NULL;
517 }
518
519 static TranslationTableCharacter *
addCharOrDots(FileInfo * nested,widechar c,int m,TranslationTableHeader ** table)520 addCharOrDots(FileInfo *nested, widechar c, int m, TranslationTableHeader **table) {
521 /* See if a character or dot pattern is in the appropriate table. If not,
522 * insert it. In either
523 * case, return a pointer to it. */
524 TranslationTableOffset bucket;
525 TranslationTableCharacter *character;
526 TranslationTableCharacter *oldchar;
527 TranslationTableOffset offset;
528 unsigned long int makeHash;
529 if ((character = compile_findCharOrDots(c, m, *table))) return character;
530 if (!allocateSpaceInTranslationTable(nested, &offset, sizeof(*character), table))
531 return NULL;
532 character = (TranslationTableCharacter *)&(*table)->ruleArea[offset];
533 memset(character, 0, sizeof(*character));
534 character->realchar = c;
535 makeHash = _lou_charHash(c);
536 if (m == 0)
537 bucket = (*table)->characters[makeHash];
538 else
539 bucket = (*table)->dots[makeHash];
540 if (!bucket) {
541 if (m == 0)
542 (*table)->characters[makeHash] = offset;
543 else
544 (*table)->dots[makeHash] = offset;
545 } else {
546 oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[bucket];
547 while (oldchar->next)
548 oldchar = (TranslationTableCharacter *)&(*table)->ruleArea[oldchar->next];
549 oldchar->next = offset;
550 }
551 return character;
552 }
553
554 static CharOrDots *
getCharOrDots(widechar c,int m,const DisplayTableHeader * table)555 getCharOrDots(widechar c, int m, const DisplayTableHeader *table) {
556 CharOrDots *cdPtr;
557 TranslationTableOffset bucket;
558 unsigned long int makeHash = _lou_charHash(c);
559 if (m == 0)
560 bucket = table->charToDots[makeHash];
561 else
562 bucket = table->dotsToChar[makeHash];
563 while (bucket) {
564 cdPtr = (CharOrDots *)&table->ruleArea[bucket];
565 if (cdPtr->lookFor == c) return cdPtr;
566 bucket = cdPtr->next;
567 }
568 return NULL;
569 }
570
571 widechar EXPORT_CALL
_lou_getDotsForChar(widechar c,const DisplayTableHeader * table)572 _lou_getDotsForChar(widechar c, const DisplayTableHeader *table) {
573 CharOrDots *cdPtr = getCharOrDots(c, 0, table);
574 if (cdPtr) return cdPtr->found;
575 return LOU_DOTS;
576 }
577
578 widechar EXPORT_CALL
_lou_getCharFromDots(widechar d,const DisplayTableHeader * table)579 _lou_getCharFromDots(widechar d, const DisplayTableHeader *table) {
580 CharOrDots *cdPtr = getCharOrDots(d, 1, table);
581 if (cdPtr) return cdPtr->found;
582 return '\0';
583 }
584
585 static int
putCharAndDots(FileInfo * nested,widechar c,widechar d,DisplayTableHeader ** table)586 putCharAndDots(FileInfo *nested, widechar c, widechar d, DisplayTableHeader **table) {
587 TranslationTableOffset bucket;
588 CharOrDots *cdPtr;
589 CharOrDots *oldcdPtr = NULL;
590 TranslationTableOffset offset;
591 unsigned long int makeHash;
592 if (!(cdPtr = getCharOrDots(c, 0, *table))) {
593 if (!allocateSpaceInDisplayTable(nested, &offset, sizeof(*cdPtr), table))
594 return 0;
595 cdPtr = (CharOrDots *)&(*table)->ruleArea[offset];
596 cdPtr->next = 0;
597 cdPtr->lookFor = c;
598 cdPtr->found = d;
599 makeHash = _lou_charHash(c);
600 bucket = (*table)->charToDots[makeHash];
601 if (!bucket)
602 (*table)->charToDots[makeHash] = offset;
603 else {
604 oldcdPtr = (CharOrDots *)&(*table)->ruleArea[bucket];
605 while (oldcdPtr->next)
606 oldcdPtr = (CharOrDots *)&(*table)->ruleArea[oldcdPtr->next];
607 oldcdPtr->next = offset;
608 }
609 }
610 if (!(cdPtr = getCharOrDots(d, 1, *table))) {
611 if (!allocateSpaceInDisplayTable(nested, &offset, sizeof(*cdPtr), table))
612 return 0;
613 cdPtr = (CharOrDots *)&(*table)->ruleArea[offset];
614 cdPtr->next = 0;
615 cdPtr->lookFor = d;
616 cdPtr->found = c;
617 makeHash = _lou_charHash(d);
618 bucket = (*table)->dotsToChar[makeHash];
619 if (!bucket)
620 (*table)->dotsToChar[makeHash] = offset;
621 else {
622 oldcdPtr = (CharOrDots *)&(*table)->ruleArea[bucket];
623 while (oldcdPtr->next)
624 oldcdPtr = (CharOrDots *)&(*table)->ruleArea[oldcdPtr->next];
625 oldcdPtr->next = offset;
626 }
627 }
628 return 1;
629 }
630
631 static inline const char *
getPartName(int actionPart)632 getPartName(int actionPart) {
633 return actionPart ? "action" : "test";
634 }
635
636 static int
passFindCharacters(FileInfo * nested,widechar * instructions,int end,widechar ** characters,int * length)637 passFindCharacters(FileInfo *nested, widechar *instructions, int end,
638 widechar **characters, int *length) {
639 int IC = 0;
640 int lookback = 0;
641
642 *characters = NULL;
643 *length = 0;
644
645 while (IC < end) {
646 widechar instruction = instructions[IC];
647
648 switch (instruction) {
649 case pass_string:
650 case pass_dots: {
651 int count = instructions[IC + 1];
652 IC += 2;
653 if (count > lookback) {
654 *characters = &instructions[IC + lookback];
655 *length = count - lookback;
656 return 1;
657 } else {
658 lookback -= count;
659 }
660 IC += count;
661 continue;
662 }
663
664 case pass_attributes:
665 IC += 5;
666 if (instructions[IC - 2] == instructions[IC - 1] &&
667 instructions[IC - 1] <= lookback) {
668 lookback -= instructions[IC - 1];
669 continue;
670 }
671 goto NO_CHARACTERS;
672
673 case pass_swap:
674 IC += 2;
675 /* fall through */
676
677 case pass_groupstart:
678 case pass_groupend:
679 case pass_groupreplace:
680 IC += 3;
681
682 NO_CHARACTERS : { return 1; }
683
684 case pass_eq:
685 case pass_lt:
686 case pass_gt:
687 case pass_lteq:
688 case pass_gteq:
689 IC += 3;
690 continue;
691
692 case pass_lookback:
693 lookback += instructions[IC + 1];
694 IC += 2;
695 continue;
696
697 case pass_not:
698 case pass_startReplace:
699 case pass_endReplace:
700 case pass_first:
701 case pass_last:
702 case pass_copy:
703 case pass_omit:
704 case pass_plus:
705 case pass_hyphen:
706 IC += 1;
707 continue;
708
709 case pass_endTest:
710 goto NO_CHARACTERS;
711
712 default:
713 compileError(nested, "unhandled test suboperand: \\x%02x", instruction);
714 return 0;
715 }
716 }
717 goto NO_CHARACTERS;
718 }
719
720 /* The following functions are called by addRule to handle various cases. */
721
722 static void
addForwardRuleWithSingleChar(FileInfo * nested,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader ** table)723 addForwardRuleWithSingleChar(FileInfo *nested, TranslationTableOffset newRuleOffset,
724 TranslationTableRule *newRule, TranslationTableHeader **table) {
725 /* direction = 0, newRule->charslen = 1 */
726 TranslationTableRule *currentRule;
727 TranslationTableOffset *currentOffsetPtr;
728 TranslationTableCharacter *character;
729 int m = 0;
730 if (newRule->opcode == CTO_CompDots || newRule->opcode == CTO_Comp6) return;
731 if (newRule->opcode >= CTO_Pass2 && newRule->opcode <= CTO_Pass4) m = 1;
732 // get the character from the table, or if the character is not defined yet, define it
733 // (without adding attributes)
734 character = addCharOrDots(nested, newRule->charsdots[0], m, table);
735 if (m != 1 && character->attributes & CTC_Letter &&
736 (newRule->opcode == CTO_WholeWord || newRule->opcode == CTO_LargeSign)) {
737 if ((*table)->noLetsignCount < LETSIGNSIZE)
738 (*table)->noLetsign[(*table)->noLetsignCount++] = newRule->charsdots[0];
739 }
740 // if the new rule is a character definition rule, set the main definition rule of
741 // this character to it
742 // (possibly overwriting previous definition rules)
743 // adding the attributes to the character has already been done elsewhere
744 if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
745 character->definitionRule = newRuleOffset;
746 // add the new rule to the list of rules associated with this character
747 // if the new rule is a character definition rule, it is inserted at the end of the
748 // list
749 // otherwise it is inserted before the first character definition rule
750 currentOffsetPtr = &character->otherRules;
751 while (*currentOffsetPtr) {
752 currentRule = (TranslationTableRule *)&(*table)->ruleArea[*currentOffsetPtr];
753 if (currentRule->charslen == 0) break;
754 if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
755 if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)) break;
756 currentOffsetPtr = ¤tRule->charsnext;
757 }
758 newRule->charsnext = *currentOffsetPtr;
759 *currentOffsetPtr = newRuleOffset;
760 }
761
762 static void
addForwardRuleWithMultipleChars(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)763 addForwardRuleWithMultipleChars(TranslationTableOffset newRuleOffset,
764 TranslationTableRule *newRule, TranslationTableHeader *table) {
765 /* direction = 0 newRule->charslen > 1 */
766 TranslationTableRule *currentRule = NULL;
767 TranslationTableOffset *currentOffsetPtr =
768 &table->forRules[_lou_stringHash(&newRule->charsdots[0], 0, NULL)];
769 while (*currentOffsetPtr) {
770 currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
771 if (newRule->charslen > currentRule->charslen) break;
772 if (newRule->charslen == currentRule->charslen)
773 if ((currentRule->opcode == CTO_Always) && (newRule->opcode != CTO_Always))
774 break;
775 currentOffsetPtr = ¤tRule->charsnext;
776 }
777 newRule->charsnext = *currentOffsetPtr;
778 *currentOffsetPtr = newRuleOffset;
779 }
780
781 static void
addBackwardRuleWithSingleCell(FileInfo * nested,widechar cell,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader ** table)782 addBackwardRuleWithSingleCell(FileInfo *nested, widechar cell,
783 TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
784 TranslationTableHeader **table) {
785 /* direction = 1, newRule->dotslen = 1 */
786 TranslationTableRule *currentRule;
787 TranslationTableOffset *currentOffsetPtr;
788 TranslationTableCharacter *dots;
789 if (newRule->opcode == CTO_SwapCc || newRule->opcode == CTO_Repeated)
790 return; /* too ambiguous */
791 // get the cell from the table, or if the cell is not defined yet, define it (without
792 // adding attributes)
793 dots = addCharOrDots(nested, cell, 1, table);
794 if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
795 dots->definitionRule = newRuleOffset;
796 currentOffsetPtr = &dots->otherRules;
797 while (*currentOffsetPtr) {
798 currentRule = (TranslationTableRule *)&(*table)->ruleArea[*currentOffsetPtr];
799 if (newRule->charslen > currentRule->charslen || currentRule->dotslen == 0) break;
800 if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
801 if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)) break;
802 currentOffsetPtr = ¤tRule->dotsnext;
803 }
804 newRule->dotsnext = *currentOffsetPtr;
805 *currentOffsetPtr = newRuleOffset;
806 }
807
808 static void
addBackwardRuleWithMultipleCells(widechar * cells,int count,TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)809 addBackwardRuleWithMultipleCells(widechar *cells, int count,
810 TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
811 TranslationTableHeader *table) {
812 /* direction = 1, newRule->dotslen > 1 */
813 TranslationTableRule *currentRule = NULL;
814 TranslationTableOffset *currentOffsetPtr =
815 &table->backRules[_lou_stringHash(cells, 0, NULL)];
816 if (newRule->opcode == CTO_SwapCc) return;
817 while (*currentOffsetPtr) {
818 int currentLength;
819 int newLength;
820 currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
821 currentLength = currentRule->dotslen + currentRule->charslen;
822 newLength = count + newRule->charslen;
823 if (newLength > currentLength) break;
824 if (currentLength == newLength)
825 if ((currentRule->opcode == CTO_Always) && (newRule->opcode != CTO_Always))
826 break;
827 currentOffsetPtr = ¤tRule->dotsnext;
828 }
829 newRule->dotsnext = *currentOffsetPtr;
830 *currentOffsetPtr = newRuleOffset;
831 }
832
833 static int
addForwardPassRule(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)834 addForwardPassRule(TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
835 TranslationTableHeader *table) {
836 TranslationTableOffset *currentOffsetPtr;
837 TranslationTableRule *currentRule;
838 switch (newRule->opcode) {
839 case CTO_Correct:
840 currentOffsetPtr = &table->forPassRules[0];
841 break;
842 case CTO_Context:
843 currentOffsetPtr = &table->forPassRules[1];
844 break;
845 case CTO_Pass2:
846 currentOffsetPtr = &table->forPassRules[2];
847 break;
848 case CTO_Pass3:
849 currentOffsetPtr = &table->forPassRules[3];
850 break;
851 case CTO_Pass4:
852 currentOffsetPtr = &table->forPassRules[4];
853 break;
854 default:
855 return 0;
856 }
857 while (*currentOffsetPtr) {
858 currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
859 if (newRule->charslen > currentRule->charslen) break;
860 currentOffsetPtr = ¤tRule->charsnext;
861 }
862 newRule->charsnext = *currentOffsetPtr;
863 *currentOffsetPtr = newRuleOffset;
864 return 1;
865 }
866
867 static int
addBackwardPassRule(TranslationTableOffset newRuleOffset,TranslationTableRule * newRule,TranslationTableHeader * table)868 addBackwardPassRule(TranslationTableOffset newRuleOffset, TranslationTableRule *newRule,
869 TranslationTableHeader *table) {
870 TranslationTableOffset *currentOffsetPtr;
871 TranslationTableRule *currentRule;
872 switch (newRule->opcode) {
873 case CTO_Correct:
874 currentOffsetPtr = &table->backPassRules[0];
875 break;
876 case CTO_Context:
877 currentOffsetPtr = &table->backPassRules[1];
878 break;
879 case CTO_Pass2:
880 currentOffsetPtr = &table->backPassRules[2];
881 break;
882 case CTO_Pass3:
883 currentOffsetPtr = &table->backPassRules[3];
884 break;
885 case CTO_Pass4:
886 currentOffsetPtr = &table->backPassRules[4];
887 break;
888 default:
889 return 0;
890 }
891 while (*currentOffsetPtr) {
892 currentRule = (TranslationTableRule *)&table->ruleArea[*currentOffsetPtr];
893 if (newRule->charslen > currentRule->charslen) break;
894 currentOffsetPtr = ¤tRule->dotsnext;
895 }
896 newRule->dotsnext = *currentOffsetPtr;
897 *currentOffsetPtr = newRuleOffset;
898 return 1;
899 }
900
901 static int
addRule(FileInfo * nested,TranslationTableOpcode opcode,CharsString * ruleChars,CharsString * ruleDots,TranslationTableCharacterAttributes after,TranslationTableCharacterAttributes before,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)902 addRule(FileInfo *nested, TranslationTableOpcode opcode, CharsString *ruleChars,
903 CharsString *ruleDots, TranslationTableCharacterAttributes after,
904 TranslationTableCharacterAttributes before, TranslationTableOffset *newRuleOffset,
905 TranslationTableRule **newRule, int noback, int nofor,
906 TranslationTableHeader **table) {
907 /* Add a rule to the table, using the hash function to find the start of
908 * chains and chaining both the chars and dots strings */
909 TranslationTableOffset ruleOffset;
910 int ruleSize = sizeof(TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE);
911 if (ruleChars) ruleSize += CHARSIZE * ruleChars->length;
912 if (ruleDots) ruleSize += CHARSIZE * ruleDots->length;
913 if (!allocateSpaceInTranslationTable(nested, &ruleOffset, ruleSize, table)) return 0;
914 TranslationTableRule *rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
915 if (newRule) *newRule = rule;
916 if (newRuleOffset) *newRuleOffset = ruleOffset;
917 rule->opcode = opcode;
918 rule->after = after;
919 rule->before = before;
920 if (ruleChars)
921 memcpy(&rule->charsdots[0], &ruleChars->chars[0],
922 CHARSIZE * (rule->charslen = ruleChars->length));
923 else
924 rule->charslen = 0;
925 if (ruleDots)
926 memcpy(&rule->charsdots[rule->charslen], &ruleDots->chars[0],
927 CHARSIZE * (rule->dotslen = ruleDots->length));
928 else
929 rule->dotslen = 0;
930
931 /* link new rule into table. */
932 if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd) return 1;
933 if (opcode >= CTO_Context && opcode <= CTO_Pass4)
934 if (!(opcode == CTO_Context && rule->charslen > 0)) {
935 if (!nofor)
936 if (!addForwardPassRule(ruleOffset, rule, *table)) return 0;
937 if (!noback)
938 if (!addBackwardPassRule(ruleOffset, rule, *table)) return 0;
939 return 1;
940 }
941 if (!nofor) {
942 if (rule->charslen == 1)
943 addForwardRuleWithSingleChar(nested, ruleOffset, rule, table);
944 else if (rule->charslen > 1)
945 addForwardRuleWithMultipleChars(ruleOffset, rule, *table);
946 }
947 if (!noback) {
948 widechar *cells;
949 int count;
950
951 if (rule->opcode == CTO_Context) {
952 cells = &rule->charsdots[0];
953 count = rule->charslen;
954 } else {
955 cells = &rule->charsdots[rule->charslen];
956 count = rule->dotslen;
957 }
958
959 if (count == 1)
960 addBackwardRuleWithSingleCell(nested, *cells, ruleOffset, rule, table);
961 else if (count > 1)
962 addBackwardRuleWithMultipleCells(cells, count, ruleOffset, rule, *table);
963 }
964 return 1;
965 }
966
967 static const CharacterClass *
findCharacterClass(const CharsString * name,const TranslationTableHeader * table)968 findCharacterClass(const CharsString *name, const TranslationTableHeader *table) {
969 /* Find a character class, whether predefined or user-defined */
970 const CharacterClass *class = table->characterClasses;
971 while (class) {
972 if ((name->length == class->length) &&
973 (memcmp(&name->chars[0], class->name, CHARSIZE * name->length) == 0))
974 return class;
975 class = class->next;
976 }
977 return NULL;
978 }
979
980 static CharacterClass *
addCharacterClass(FileInfo * nested,const widechar * name,int length,TranslationTableHeader * table)981 addCharacterClass(FileInfo *nested, const widechar *name, int length,
982 TranslationTableHeader *table) {
983 /* Define a character class, Whether predefined or user-defined */
984 CharacterClass **classes = &table->characterClasses;
985 ;
986 TranslationTableCharacterAttributes *nextAttribute =
987 &table->nextCharacterClassAttribute;
988 CharacterClass *class;
989 if (*nextAttribute) {
990 if (!(class = malloc(sizeof(*class) + CHARSIZE * (length - 1))))
991 _lou_outOfMemory();
992 else {
993 memset(class, 0, sizeof(*class));
994 memcpy(class->name, name, CHARSIZE * (class->length = length));
995 class->attribute = *nextAttribute;
996 if (*nextAttribute == CTC_Class4)
997 *nextAttribute = CTC_UserDefined0;
998 else if (*nextAttribute == CTC_UserDefined7)
999 *nextAttribute = CTC_Class13;
1000 else
1001 *nextAttribute <<= 1;
1002 class->next = *classes;
1003 *classes = class;
1004 return class;
1005 }
1006 }
1007 compileError(nested, "character class table overflow.");
1008 return NULL;
1009 }
1010
1011 static void
deallocateCharacterClasses(TranslationTableHeader * table)1012 deallocateCharacterClasses(TranslationTableHeader *table) {
1013 CharacterClass **classes = &table->characterClasses;
1014 while (*classes) {
1015 CharacterClass *class = *classes;
1016 *classes = (*classes)->next;
1017 if (class) free(class);
1018 }
1019 }
1020
1021 static int
allocateCharacterClasses(TranslationTableHeader * table)1022 allocateCharacterClasses(TranslationTableHeader *table) {
1023 /* Allocate memory for predifined character classes */
1024 int k = 0;
1025 table->characterClasses = NULL;
1026 table->nextCharacterClassAttribute = 1;
1027 while (characterClassNames[k]) {
1028 widechar wname[MAXSTRING];
1029 int length = (int)strlen(characterClassNames[k]);
1030 int kk;
1031 for (kk = 0; kk < length; kk++) wname[kk] = (widechar)characterClassNames[k][kk];
1032 if (!addCharacterClass(NULL, wname, length, table)) {
1033 deallocateCharacterClasses(table);
1034 return 0;
1035 }
1036 k++;
1037 }
1038 return 1;
1039 }
1040
1041 static TranslationTableOpcode
getOpcode(FileInfo * nested,const CharsString * token)1042 getOpcode(FileInfo *nested, const CharsString *token) {
1043 static TranslationTableOpcode lastOpcode = 0;
1044 TranslationTableOpcode opcode = lastOpcode;
1045
1046 do {
1047 if (token->length == opcodeLengths[opcode])
1048 if (eqasc2uni((unsigned char *)opcodeNames[opcode], &token->chars[0],
1049 token->length)) {
1050 lastOpcode = opcode;
1051 return opcode;
1052 }
1053 opcode++;
1054 if (opcode >= CTO_None) opcode = 0;
1055 } while (opcode != lastOpcode);
1056 compileError(nested, "opcode %s not defined.",
1057 _lou_showString(&token->chars[0], token->length, 0));
1058 return CTO_None;
1059 }
1060
1061 TranslationTableOpcode EXPORT_CALL
_lou_findOpcodeNumber(const char * toFind)1062 _lou_findOpcodeNumber(const char *toFind) {
1063 /* Used by tools such as lou_debug */
1064 static TranslationTableOpcode lastOpcode = 0;
1065 TranslationTableOpcode opcode = lastOpcode;
1066 int length = (int)strlen(toFind);
1067 do {
1068 if (length == opcodeLengths[opcode] &&
1069 strcasecmp(toFind, opcodeNames[opcode]) == 0) {
1070 lastOpcode = opcode;
1071 return opcode;
1072 }
1073 opcode++;
1074 if (opcode >= CTO_None) opcode = 0;
1075 } while (opcode != lastOpcode);
1076 return CTO_None;
1077 }
1078
1079 const char *EXPORT_CALL
_lou_findOpcodeName(TranslationTableOpcode opcode)1080 _lou_findOpcodeName(TranslationTableOpcode opcode) {
1081 static char scratchBuf[MAXSTRING];
1082 /* Used by tools such as lou_debug */
1083 if (opcode < 0 || opcode >= CTO_None) {
1084 sprintf(scratchBuf, "%u", opcode);
1085 return scratchBuf;
1086 }
1087 return opcodeNames[opcode];
1088 }
1089
1090 static widechar
hexValue(FileInfo * nested,const widechar * digits,int length)1091 hexValue(FileInfo *nested, const widechar *digits, int length) {
1092 int k;
1093 unsigned int binaryValue = 0;
1094 for (k = 0; k < length; k++) {
1095 unsigned int hexDigit = 0;
1096 if (digits[k] >= '0' && digits[k] <= '9')
1097 hexDigit = digits[k] - '0';
1098 else if (digits[k] >= 'a' && digits[k] <= 'f')
1099 hexDigit = digits[k] - 'a' + 10;
1100 else if (digits[k] >= 'A' && digits[k] <= 'F')
1101 hexDigit = digits[k] - 'A' + 10;
1102 else {
1103 compileError(nested, "invalid %d-digit hexadecimal number", length);
1104 return (widechar)0xffffffff;
1105 }
1106 binaryValue |= hexDigit << (4 * (length - 1 - k));
1107 }
1108 return (widechar)binaryValue;
1109 }
1110
1111 #define MAXBYTES 7
1112 static const unsigned int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC,
1113 0XFE };
1114
1115 static int
parseChars(FileInfo * nested,CharsString * result,CharsString * token)1116 parseChars(FileInfo *nested, CharsString *result, CharsString *token) {
1117 int in = 0;
1118 int out = 0;
1119 int lastOutSize = 0;
1120 int lastIn;
1121 unsigned int ch = 0;
1122 int numBytes = 0;
1123 unsigned int utf32 = 0;
1124 int k;
1125 while (in < token->length) {
1126 ch = token->chars[in++] & 0xff;
1127 if (ch < 128) {
1128 if (ch == '\\') { /* escape sequence */
1129 switch (ch = token->chars[in]) {
1130 case '\\':
1131 break;
1132 case 'e':
1133 ch = 0x1b;
1134 break;
1135 case 'f':
1136 ch = 12;
1137 break;
1138 case 'n':
1139 ch = 10;
1140 break;
1141 case 'r':
1142 ch = 13;
1143 break;
1144 case 's':
1145 ch = ' ';
1146 break;
1147 case 't':
1148 ch = 9;
1149 break;
1150 case 'v':
1151 ch = 11;
1152 break;
1153 case 'w':
1154 ch = LOU_ENDSEGMENT;
1155 break;
1156 case 34:
1157 ch = QUOTESUB;
1158 break;
1159 case 'X':
1160 compileWarning(nested, "\\Xhhhh (with a capital 'X') is deprecated.");
1161 case 'x':
1162 if (token->length - in > 4) {
1163 ch = hexValue(nested, &token->chars[in + 1], 4);
1164 in += 4;
1165 }
1166 break;
1167 case 'Y':
1168 compileWarning(
1169 nested, "\\Yhhhhh (with a capital 'Y') is deprecated.");
1170 case 'y':
1171 if (CHARSIZE == 2) {
1172 not32:
1173 compileError(nested,
1174 "liblouis has not been compiled for 32-bit Unicode");
1175 break;
1176 }
1177 if (token->length - in > 5) {
1178 ch = hexValue(nested, &token->chars[in + 1], 5);
1179 in += 5;
1180 }
1181 break;
1182 case 'Z':
1183 compileWarning(
1184 nested, "\\Zhhhhhhhh (with a capital 'Z') is deprecated.");
1185 case 'z':
1186 if (CHARSIZE == 2) goto not32;
1187 if (token->length - in > 8) {
1188 ch = hexValue(nested, &token->chars[in + 1], 8);
1189 in += 8;
1190 }
1191 break;
1192 default:
1193 compileError(nested, "invalid escape sequence '\\%c'", ch);
1194 break;
1195 }
1196 in++;
1197 }
1198 if (out >= MAXSTRING - 1) {
1199 compileError(nested, "Token too long");
1200 result->length = MAXSTRING - 1;
1201 return 1;
1202 }
1203 result->chars[out++] = (widechar)ch;
1204 continue;
1205 }
1206 lastOutSize = out;
1207 lastIn = in;
1208 for (numBytes = MAXBYTES - 1; numBytes > 0; numBytes--)
1209 if (ch >= first0Bit[numBytes]) break;
1210 utf32 = ch & (0XFF - first0Bit[numBytes]);
1211 for (k = 0; k < numBytes; k++) {
1212 if (in >= MAXSTRING - 1) break;
1213 if (out >= MAXSTRING - 1) {
1214 compileError(nested, "Token too long");
1215 result->length = lastOutSize;
1216 return 1;
1217 }
1218 if (token->chars[in] < 128 || (token->chars[in] & 0x0040)) {
1219 compileWarning(nested, "invalid UTF-8. Assuming Latin-1.");
1220 result->chars[out++] = token->chars[lastIn];
1221 in = lastIn + 1;
1222 continue;
1223 }
1224 utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f);
1225 }
1226 if (out >= MAXSTRING - 1) {
1227 compileError(nested, "Token too long");
1228 result->length = lastOutSize;
1229 return 1;
1230 }
1231 if (CHARSIZE == 2 && utf32 > 0xffff) utf32 = 0xffff;
1232 result->chars[out++] = (widechar)utf32;
1233 }
1234 result->length = out;
1235 return 1;
1236 }
1237
1238 int EXPORT_CALL
_lou_extParseChars(const char * inString,widechar * outString)1239 _lou_extParseChars(const char *inString, widechar *outString) {
1240 /* Parse external character strings */
1241 CharsString wideIn;
1242 CharsString result;
1243 int k;
1244 for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k];
1245 wideIn.chars[k] = 0;
1246 wideIn.length = k;
1247 parseChars(NULL, &result, &wideIn);
1248 if (errorCount) {
1249 errorCount = 0;
1250 return 0;
1251 }
1252 for (k = 0; k < result.length; k++) outString[k] = result.chars[k];
1253 return result.length;
1254 }
1255
1256 static int
parseDots(FileInfo * nested,CharsString * cells,const CharsString * token)1257 parseDots(FileInfo *nested, CharsString *cells, const CharsString *token) {
1258 /* get dot patterns */
1259 widechar cell = 0; /* assembly place for dots */
1260 int cellCount = 0;
1261 int index;
1262 int start = 0;
1263
1264 for (index = 0; index < token->length; index++) {
1265 int started = index != start;
1266 widechar character = token->chars[index];
1267 switch (character) { /* or dots to make up Braille cell */
1268 {
1269 int dot;
1270 case '1':
1271 dot = LOU_DOT_1;
1272 goto haveDot;
1273 case '2':
1274 dot = LOU_DOT_2;
1275 goto haveDot;
1276 case '3':
1277 dot = LOU_DOT_3;
1278 goto haveDot;
1279 case '4':
1280 dot = LOU_DOT_4;
1281 goto haveDot;
1282 case '5':
1283 dot = LOU_DOT_5;
1284 goto haveDot;
1285 case '6':
1286 dot = LOU_DOT_6;
1287 goto haveDot;
1288 case '7':
1289 dot = LOU_DOT_7;
1290 goto haveDot;
1291 case '8':
1292 dot = LOU_DOT_8;
1293 goto haveDot;
1294 case '9':
1295 dot = LOU_DOT_9;
1296 goto haveDot;
1297 case 'a':
1298 case 'A':
1299 dot = LOU_DOT_10;
1300 goto haveDot;
1301 case 'b':
1302 case 'B':
1303 dot = LOU_DOT_11;
1304 goto haveDot;
1305 case 'c':
1306 case 'C':
1307 dot = LOU_DOT_12;
1308 goto haveDot;
1309 case 'd':
1310 case 'D':
1311 dot = LOU_DOT_13;
1312 goto haveDot;
1313 case 'e':
1314 case 'E':
1315 dot = LOU_DOT_14;
1316 goto haveDot;
1317 case 'f':
1318 case 'F':
1319 dot = LOU_DOT_15;
1320 haveDot:
1321 if (started && !cell) goto invalid;
1322 if (cell & dot) {
1323 compileError(nested, "dot specified more than once.");
1324 return 0;
1325 }
1326 cell |= dot;
1327 break;
1328 }
1329 case '0': /* blank */
1330 if (started) goto invalid;
1331 break;
1332 case '-': /* got all dots for this cell */
1333 if (!started) {
1334 compileError(nested, "missing cell specification.");
1335 return 0;
1336 }
1337 cells->chars[cellCount++] = cell | LOU_DOTS;
1338 cell = 0;
1339 start = index + 1;
1340 break;
1341 default:
1342 invalid:
1343 compileError(
1344 nested, "invalid dot number %s.", _lou_showString(&character, 1, 0));
1345 return 0;
1346 }
1347 }
1348 if (index == start) {
1349 compileError(nested, "missing cell specification.");
1350 return 0;
1351 }
1352 cells->chars[cellCount++] = cell | LOU_DOTS; /* last cell */
1353 cells->length = cellCount;
1354 return 1;
1355 }
1356
1357 int EXPORT_CALL
_lou_extParseDots(const char * inString,widechar * outString)1358 _lou_extParseDots(const char *inString, widechar *outString) {
1359 /* Parse external dot patterns */
1360 CharsString wideIn;
1361 CharsString result;
1362 int k;
1363 for (k = 0; inString[k] && k < MAXSTRING - 1; k++) wideIn.chars[k] = inString[k];
1364 wideIn.chars[k] = 0;
1365 wideIn.length = k;
1366 parseDots(NULL, &result, &wideIn);
1367 if (errorCount) {
1368 errorCount = 0;
1369 return 0;
1370 }
1371 for (k = 0; k < result.length; k++) outString[k] = result.chars[k];
1372 outString[k] = 0;
1373 return result.length;
1374 }
1375
1376 static int
getCharacters(FileInfo * nested,CharsString * characters,int * lastToken)1377 getCharacters(FileInfo *nested, CharsString *characters, int *lastToken) {
1378 /* Get ruleChars string */
1379 CharsString token;
1380 if (getToken(nested, &token, "characters", lastToken))
1381 if (parseChars(nested, characters, &token)) return 1;
1382 return 0;
1383 }
1384
1385 static int
getRuleCharsText(FileInfo * nested,CharsString * ruleChars,int * lastToken)1386 getRuleCharsText(FileInfo *nested, CharsString *ruleChars, int *lastToken) {
1387 CharsString token;
1388 if (getToken(nested, &token, "Characters operand", lastToken))
1389 if (parseChars(nested, ruleChars, &token)) return 1;
1390 return 0;
1391 }
1392
1393 static int
getRuleDotsText(FileInfo * nested,CharsString * ruleDots,int * lastToken)1394 getRuleDotsText(FileInfo *nested, CharsString *ruleDots, int *lastToken) {
1395 CharsString token;
1396 if (getToken(nested, &token, "characters", lastToken))
1397 if (parseChars(nested, ruleDots, &token)) return 1;
1398 return 0;
1399 }
1400
1401 static int
getRuleDotsPattern(FileInfo * nested,CharsString * ruleDots,int * lastToken)1402 getRuleDotsPattern(FileInfo *nested, CharsString *ruleDots, int *lastToken) {
1403 /* Interpret the dets operand */
1404 CharsString token;
1405 if (getToken(nested, &token, "Dots operand", lastToken)) {
1406 if (token.length == 1 && token.chars[0] == '=') {
1407 ruleDots->length = 0;
1408 return 1;
1409 }
1410 if (parseDots(nested, ruleDots, &token)) return 1;
1411 }
1412 return 0;
1413 }
1414
1415 static int
getCharacterClass(FileInfo * nested,const CharacterClass ** class,const TranslationTableHeader * table,int * lastToken)1416 getCharacterClass(FileInfo *nested, const CharacterClass **class,
1417 const TranslationTableHeader *table, int *lastToken) {
1418 CharsString token;
1419 if (getToken(nested, &token, "character class name", lastToken)) {
1420 if ((*class = findCharacterClass(&token, table))) return 1;
1421 compileError(nested, "character class not defined.");
1422 }
1423 return 0;
1424 }
1425
1426 static int
1427 includeFile(FileInfo *nested, CharsString *includedFile, TranslationTableHeader **table,
1428 DisplayTableHeader **displayTable);
1429
1430 static TranslationTableOffset
findRuleName(const CharsString * name,const TranslationTableHeader * table)1431 findRuleName(const CharsString *name, const TranslationTableHeader *table) {
1432 const RuleName *nameRule = table->ruleNames;
1433 while (nameRule) {
1434 if ((name->length == nameRule->length) &&
1435 (memcmp(&name->chars[0], nameRule->name, CHARSIZE * name->length) == 0))
1436 return nameRule->ruleOffset;
1437 nameRule = nameRule->next;
1438 }
1439 return 0;
1440 }
1441
1442 static int
addRuleName(FileInfo * nested,CharsString * name,TranslationTableOffset newRuleOffset,TranslationTableHeader * table)1443 addRuleName(FileInfo *nested, CharsString *name, TranslationTableOffset newRuleOffset,
1444 TranslationTableHeader *table) {
1445 int k;
1446 RuleName *nameRule;
1447 if (!(nameRule = malloc(sizeof(*nameRule) + CHARSIZE * (name->length - 1)))) {
1448 compileError(nested, "not enough memory");
1449 _lou_outOfMemory();
1450 }
1451 memset(nameRule, 0, sizeof(*nameRule));
1452 // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z'
1453 for (k = 0; k < name->length; k++) {
1454 widechar c = name->chars[k];
1455 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
1456 nameRule->name[k] = c;
1457 else {
1458 compileError(nested, "a name may contain only letters");
1459 return 0;
1460 }
1461 }
1462 nameRule->length = name->length;
1463 nameRule->ruleOffset = newRuleOffset;
1464 nameRule->next = table->ruleNames;
1465 table->ruleNames = nameRule;
1466 return 1;
1467 }
1468
1469 static void
deallocateRuleNames(TranslationTableHeader * table)1470 deallocateRuleNames(TranslationTableHeader *table) {
1471 RuleName **ruleNames = &table->ruleNames;
1472 while (*ruleNames) {
1473 RuleName *nameRule = *ruleNames;
1474 *ruleNames = nameRule->next;
1475 if (nameRule) free(nameRule);
1476 }
1477 }
1478
1479 static int
compileSwapDots(FileInfo * nested,CharsString * source,CharsString * dest)1480 compileSwapDots(FileInfo *nested, CharsString *source, CharsString *dest) {
1481 int k = 0;
1482 int kk = 0;
1483 CharsString dotsSource;
1484 CharsString dotsDest;
1485 dest->length = 0;
1486 dotsSource.length = 0;
1487 while (k <= source->length) {
1488 if (source->chars[k] != ',' && k != source->length)
1489 dotsSource.chars[dotsSource.length++] = source->chars[k];
1490 else {
1491 if (!parseDots(nested, &dotsDest, &dotsSource)) return 0;
1492 dest->chars[dest->length++] = dotsDest.length + 1;
1493 for (kk = 0; kk < dotsDest.length; kk++)
1494 dest->chars[dest->length++] = dotsDest.chars[kk];
1495 dotsSource.length = 0;
1496 }
1497 k++;
1498 }
1499 return 1;
1500 }
1501
1502 static int
compileSwap(FileInfo * nested,TranslationTableOpcode opcode,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)1503 compileSwap(FileInfo *nested, TranslationTableOpcode opcode, int *lastToken,
1504 TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
1505 int nofor, TranslationTableHeader **table) {
1506 CharsString ruleChars;
1507 CharsString ruleDots;
1508 CharsString name;
1509 CharsString matches;
1510 CharsString replacements;
1511 TranslationTableOffset ruleOffset;
1512 if (!getToken(nested, &name, "name operand", lastToken)) return 0;
1513 if (!getToken(nested, &matches, "matches operand", lastToken)) return 0;
1514 if (!getToken(nested, &replacements, "replacements operand", lastToken)) return 0;
1515 if (opcode == CTO_SwapCc || opcode == CTO_SwapCd) {
1516 if (!parseChars(nested, &ruleChars, &matches)) return 0;
1517 } else {
1518 if (!compileSwapDots(nested, &matches, &ruleChars)) return 0;
1519 }
1520 if (opcode == CTO_SwapCc) {
1521 if (!parseChars(nested, &ruleDots, &replacements)) return 0;
1522 } else {
1523 if (!compileSwapDots(nested, &replacements, &ruleDots)) return 0;
1524 }
1525 if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, newRule,
1526 noback, nofor, table))
1527 return 0;
1528 if (!addRuleName(nested, &name, ruleOffset, *table)) return 0;
1529 if (newRuleOffset) *newRuleOffset = ruleOffset;
1530 return 1;
1531 }
1532
1533 static int
getNumber(widechar * source,widechar * dest)1534 getNumber(widechar *source, widechar *dest) {
1535 /* Convert a string of wide character digits to an integer */
1536 int k = 0;
1537 *dest = 0;
1538 while (source[k] >= '0' && source[k] <= '9') *dest = 10 * *dest + (source[k++] - '0');
1539 return k;
1540 }
1541
1542 /* Start of multipass compiler */
1543
1544 static int
passGetAttributes(CharsString * passLine,int * passLinepos,TranslationTableCharacterAttributes * passAttributes,FileInfo * passNested)1545 passGetAttributes(CharsString *passLine, int *passLinepos,
1546 TranslationTableCharacterAttributes *passAttributes, FileInfo *passNested) {
1547 int more = 1;
1548 *passAttributes = 0;
1549 while (more) {
1550 switch (passLine->chars[*passLinepos]) {
1551 case pass_any:
1552 *passAttributes = 0xffffffff;
1553 break;
1554 case pass_digit:
1555 *passAttributes |= CTC_Digit;
1556 break;
1557 case pass_litDigit:
1558 *passAttributes |= CTC_LitDigit;
1559 break;
1560 case pass_letter:
1561 *passAttributes |= CTC_Letter;
1562 break;
1563 case pass_math:
1564 *passAttributes |= CTC_Math;
1565 break;
1566 case pass_punctuation:
1567 *passAttributes |= CTC_Punctuation;
1568 break;
1569 case pass_sign:
1570 *passAttributes |= CTC_Sign;
1571 break;
1572 case pass_space:
1573 *passAttributes |= CTC_Space;
1574 break;
1575 case pass_uppercase:
1576 *passAttributes |= CTC_UpperCase;
1577 break;
1578 case pass_lowercase:
1579 *passAttributes |= CTC_LowerCase;
1580 break;
1581 case pass_class1:
1582 *passAttributes |= CTC_Class1;
1583 break;
1584 case pass_class2:
1585 *passAttributes |= CTC_Class2;
1586 break;
1587 case pass_class3:
1588 *passAttributes |= CTC_Class3;
1589 break;
1590 case pass_class4:
1591 *passAttributes |= CTC_Class4;
1592 break;
1593 default:
1594 more = 0;
1595 break;
1596 }
1597 if (more) (*passLinepos)++;
1598 }
1599 if (!*passAttributes) {
1600 compileError(passNested, "missing attribute");
1601 (*passLinepos)--;
1602 return 0;
1603 }
1604 return 1;
1605 }
1606
1607 static int
passGetDots(CharsString * passLine,int * passLinepos,CharsString * passHoldString,FileInfo * passNested)1608 passGetDots(CharsString *passLine, int *passLinepos, CharsString *passHoldString,
1609 FileInfo *passNested) {
1610 CharsString collectDots;
1611 collectDots.length = 0;
1612 while (*passLinepos < passLine->length &&
1613 (passLine->chars[*passLinepos] == '-' ||
1614 (passLine->chars[*passLinepos] >= '0' &&
1615 passLine->chars[*passLinepos] <= '9') ||
1616 ((passLine->chars[*passLinepos] | 32) >= 'a' &&
1617 (passLine->chars[*passLinepos] | 32) <= 'f')))
1618 collectDots.chars[collectDots.length++] = passLine->chars[(*passLinepos)++];
1619 if (!parseDots(passNested, passHoldString, &collectDots)) return 0;
1620 return 1;
1621 }
1622
1623 static int
passGetString(CharsString * passLine,int * passLinepos,CharsString * passHoldString,FileInfo * passNested)1624 passGetString(CharsString *passLine, int *passLinepos, CharsString *passHoldString,
1625 FileInfo *passNested) {
1626 passHoldString->length = 0;
1627 while (1) {
1628 if ((*passLinepos >= passLine->length) || !passLine->chars[*passLinepos]) {
1629 compileError(passNested, "unterminated string");
1630 return 0;
1631 }
1632 if (passLine->chars[*passLinepos] == 34) break;
1633 if (passLine->chars[*passLinepos] == QUOTESUB)
1634 passHoldString->chars[passHoldString->length++] = 34;
1635 else
1636 passHoldString->chars[passHoldString->length++] =
1637 passLine->chars[*passLinepos];
1638 (*passLinepos)++;
1639 }
1640 passHoldString->chars[passHoldString->length] = 0;
1641 (*passLinepos)++;
1642 return 1;
1643 }
1644
1645 static int
passGetNumber(CharsString * passLine,int * passLinepos,widechar * passHoldNumber)1646 passGetNumber(CharsString *passLine, int *passLinepos, widechar *passHoldNumber) {
1647 /* Convert a string of wide character digits to an integer */
1648 *passHoldNumber = 0;
1649 while ((*passLinepos < passLine->length) && (passLine->chars[*passLinepos] >= '0') &&
1650 (passLine->chars[*passLinepos] <= '9'))
1651 *passHoldNumber =
1652 10 * (*passHoldNumber) + (passLine->chars[(*passLinepos)++] - '0');
1653 return 1;
1654 }
1655
1656 static int
passGetVariableNumber(FileInfo * nested,CharsString * passLine,int * passLinepos,widechar * passHoldNumber)1657 passGetVariableNumber(FileInfo *nested, CharsString *passLine, int *passLinepos,
1658 widechar *passHoldNumber) {
1659 if (!passGetNumber(passLine, passLinepos, passHoldNumber)) return 0;
1660 if ((*passHoldNumber >= 0) && (*passHoldNumber < NUMVAR)) return 1;
1661 compileError(nested, "variable number out of range");
1662 return 0;
1663 }
1664
1665 static int
passGetName(CharsString * passLine,int * passLinepos,CharsString * passHoldString)1666 passGetName(CharsString *passLine, int *passLinepos, CharsString *passHoldString) {
1667 passHoldString->length = 0;
1668 // a name is a sequence of characters in the ranges 'a'..'z' and 'A'..'Z'
1669 do {
1670 widechar c = passLine->chars[*passLinepos];
1671 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
1672 passHoldString->chars[passHoldString->length++] = c;
1673 (*passLinepos)++;
1674 } else {
1675 break;
1676 }
1677 } while (*passLinepos < passLine->length);
1678 return 1;
1679 }
1680
1681 static inline int
wantsString(TranslationTableOpcode opcode,int actionPart,int nofor)1682 wantsString(TranslationTableOpcode opcode, int actionPart, int nofor) {
1683 if (opcode == CTO_Correct) return 1;
1684 if (opcode != CTO_Context) return 0;
1685 return !nofor == !actionPart;
1686 }
1687
1688 static int
verifyStringOrDots(FileInfo * nested,TranslationTableOpcode opcode,int isString,int actionPart,int nofor)1689 verifyStringOrDots(FileInfo *nested, TranslationTableOpcode opcode, int isString,
1690 int actionPart, int nofor) {
1691 if (!wantsString(opcode, actionPart, nofor) == !isString) return 1;
1692
1693 compileError(nested, "%s are not allowed in the %s part of a %s translation %s rule.",
1694 isString ? "strings" : "dots", getPartName(actionPart),
1695 nofor ? "backward" : "forward", _lou_findOpcodeName(opcode));
1696
1697 return 0;
1698 }
1699
1700 static int
compilePassOpcode(FileInfo * nested,TranslationTableOpcode opcode,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)1701 compilePassOpcode(FileInfo *nested, TranslationTableOpcode opcode,
1702 TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
1703 int nofor, TranslationTableHeader **table) {
1704 static CharsString passRuleChars;
1705 static CharsString passRuleDots;
1706 /* Compile the operands of a pass opcode */
1707 widechar passSubOp;
1708 const CharacterClass *class;
1709 TranslationTableOffset ruleOffset = 0;
1710 TranslationTableRule *rule = NULL;
1711 int k;
1712 int kk = 0;
1713 int endTest = 0;
1714 widechar *passInstructions = passRuleDots.chars;
1715 int passIC = 0; /* Instruction counter */
1716 passRuleChars.length = 0;
1717 FileInfo *passNested = nested;
1718 CharsString passHoldString;
1719 widechar passHoldNumber;
1720 CharsString passLine;
1721 int passLinepos = 0;
1722 TranslationTableCharacterAttributes passAttributes;
1723 passHoldString.length = 0;
1724 for (k = nested->linepos; k < nested->linelen; k++)
1725 passHoldString.chars[passHoldString.length++] = nested->line[k];
1726 #define SEPCHAR 0x0001
1727 for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32; k++)
1728 ;
1729 if (k < passHoldString.length)
1730 passHoldString.chars[k] = SEPCHAR;
1731 else {
1732 compileError(passNested, "Invalid multipass operands");
1733 return 0;
1734 }
1735 parseChars(passNested, &passLine, &passHoldString);
1736 /* Compile test part */
1737 for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++)
1738 ;
1739 endTest = k;
1740 passLine.chars[endTest] = pass_endTest;
1741 passLinepos = 0;
1742 while (passLinepos <= endTest) {
1743 if (passIC >= MAXSTRING) {
1744 compileError(passNested, "Test part in multipass operand too long");
1745 return 0;
1746 }
1747 switch ((passSubOp = passLine.chars[passLinepos])) {
1748 case pass_lookback:
1749 passInstructions[passIC++] = pass_lookback;
1750 passLinepos++;
1751 passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1752 if (passHoldNumber == 0) passHoldNumber = 1;
1753 passInstructions[passIC++] = passHoldNumber;
1754 break;
1755 case pass_not:
1756 passInstructions[passIC++] = pass_not;
1757 passLinepos++;
1758 break;
1759 case pass_first:
1760 passInstructions[passIC++] = pass_first;
1761 passLinepos++;
1762 break;
1763 case pass_last:
1764 passInstructions[passIC++] = pass_last;
1765 passLinepos++;
1766 break;
1767 case pass_search:
1768 passInstructions[passIC++] = pass_search;
1769 passLinepos++;
1770 break;
1771 case pass_string:
1772 if (!verifyStringOrDots(nested, opcode, 1, 0, nofor)) {
1773 return 0;
1774 }
1775 passLinepos++;
1776 passInstructions[passIC++] = pass_string;
1777 passGetString(&passLine, &passLinepos, &passHoldString, passNested);
1778 goto testDoCharsDots;
1779 case pass_dots:
1780 if (!verifyStringOrDots(nested, opcode, 0, 0, nofor)) {
1781 return 0;
1782 }
1783 passLinepos++;
1784 passInstructions[passIC++] = pass_dots;
1785 passGetDots(&passLine, &passLinepos, &passHoldString, passNested);
1786 testDoCharsDots:
1787 if (passHoldString.length == 0) return 0;
1788 if (passIC >= MAXSTRING) {
1789 compileError(passNested,
1790 "@ operand in test part of multipass operand too long");
1791 return 0;
1792 }
1793 passInstructions[passIC++] = passHoldString.length;
1794 for (kk = 0; kk < passHoldString.length; kk++) {
1795 if (passIC >= MAXSTRING) {
1796 compileError(passNested,
1797 "@ operand in test part of multipass operand too long");
1798 return 0;
1799 }
1800 passInstructions[passIC++] = passHoldString.chars[kk];
1801 }
1802 break;
1803 case pass_startReplace:
1804 passInstructions[passIC++] = pass_startReplace;
1805 passLinepos++;
1806 break;
1807 case pass_endReplace:
1808 passInstructions[passIC++] = pass_endReplace;
1809 passLinepos++;
1810 break;
1811 case pass_variable:
1812 passLinepos++;
1813 if (!passGetVariableNumber(nested, &passLine, &passLinepos, &passHoldNumber))
1814 return 0;
1815 switch (passLine.chars[passLinepos]) {
1816 case pass_eq:
1817 passInstructions[passIC++] = pass_eq;
1818 goto doComp;
1819 case pass_lt:
1820 if (passLine.chars[passLinepos + 1] == pass_eq) {
1821 passLinepos++;
1822 passInstructions[passIC++] = pass_lteq;
1823 } else
1824 passInstructions[passIC++] = pass_lt;
1825 goto doComp;
1826 case pass_gt:
1827 if (passLine.chars[passLinepos + 1] == pass_eq) {
1828 passLinepos++;
1829 passInstructions[passIC++] = pass_gteq;
1830 } else
1831 passInstructions[passIC++] = pass_gt;
1832 doComp:
1833 passInstructions[passIC++] = passHoldNumber;
1834 passLinepos++;
1835 passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1836 passInstructions[passIC++] = passHoldNumber;
1837 break;
1838 default:
1839 compileError(passNested, "incorrect comparison operator");
1840 return 0;
1841 }
1842 break;
1843 case pass_attributes:
1844 passLinepos++;
1845 if (!passGetAttributes(&passLine, &passLinepos, &passAttributes, passNested))
1846 return 0;
1847 insertAttributes:
1848 passInstructions[passIC++] = pass_attributes;
1849 passInstructions[passIC++] = passAttributes >> 16;
1850 passInstructions[passIC++] = passAttributes & 0xffff;
1851 getRange:
1852 if (passLine.chars[passLinepos] == pass_until) {
1853 passLinepos++;
1854 passInstructions[passIC++] = 1;
1855 passInstructions[passIC++] = 0xffff;
1856 break;
1857 }
1858 passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1859 if (passHoldNumber == 0) {
1860 passHoldNumber = passInstructions[passIC++] = 1;
1861 passInstructions[passIC++] = 1; /* This is not an error */
1862 break;
1863 }
1864 passInstructions[passIC++] = passHoldNumber;
1865 if (passLine.chars[passLinepos] != pass_hyphen) {
1866 passInstructions[passIC++] = passHoldNumber;
1867 break;
1868 }
1869 passLinepos++;
1870 passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1871 if (passHoldNumber == 0) {
1872 compileError(passNested, "invalid range");
1873 return 0;
1874 }
1875 passInstructions[passIC++] = passHoldNumber;
1876 break;
1877 case pass_groupstart:
1878 case pass_groupend:
1879 passLinepos++;
1880 passGetName(&passLine, &passLinepos, &passHoldString);
1881 ruleOffset = findRuleName(&passHoldString, *table);
1882 if (ruleOffset)
1883 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
1884 if (rule && rule->opcode == CTO_Grouping) {
1885 passInstructions[passIC++] = passSubOp;
1886 passInstructions[passIC++] = ruleOffset >> 16;
1887 passInstructions[passIC++] = ruleOffset & 0xffff;
1888 break;
1889 } else {
1890 compileError(passNested, "%s is not a grouping name",
1891 _lou_showString(
1892 &passHoldString.chars[0], passHoldString.length, 0));
1893 return 0;
1894 }
1895 break;
1896 case pass_swap:
1897 passLinepos++;
1898 passGetName(&passLine, &passLinepos, &passHoldString);
1899 if ((class = findCharacterClass(&passHoldString, *table))) {
1900 passAttributes = class->attribute;
1901 goto insertAttributes;
1902 }
1903 ruleOffset = findRuleName(&passHoldString, *table);
1904 if (ruleOffset)
1905 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
1906 if (rule &&
1907 (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd ||
1908 rule->opcode == CTO_SwapDd)) {
1909 passInstructions[passIC++] = pass_swap;
1910 passInstructions[passIC++] = ruleOffset >> 16;
1911 passInstructions[passIC++] = ruleOffset & 0xffff;
1912 goto getRange;
1913 }
1914 compileError(passNested, "%s is neither a class name nor a swap name.",
1915 _lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
1916 return 0;
1917 case pass_endTest:
1918 passInstructions[passIC++] = pass_endTest;
1919 passLinepos++;
1920 break;
1921 default:
1922 compileError(passNested, "incorrect operator '%c ' in test part",
1923 passLine.chars[passLinepos]);
1924 return 0;
1925 }
1926
1927 } /* Compile action part */
1928
1929 /* Compile action part */
1930 while (passLinepos < passLine.length && passLine.chars[passLinepos] <= 32)
1931 passLinepos++;
1932 while (passLinepos < passLine.length && passLine.chars[passLinepos] > 32) {
1933 if (passIC >= MAXSTRING) {
1934 compileError(passNested, "Action part in multipass operand too long");
1935 return 0;
1936 }
1937 switch ((passSubOp = passLine.chars[passLinepos])) {
1938 case pass_string:
1939 if (!verifyStringOrDots(nested, opcode, 1, 1, nofor)) {
1940 return 0;
1941 }
1942 passLinepos++;
1943 passInstructions[passIC++] = pass_string;
1944 passGetString(&passLine, &passLinepos, &passHoldString, passNested);
1945 goto actionDoCharsDots;
1946 case pass_dots:
1947 if (!verifyStringOrDots(nested, opcode, 0, 1, nofor)) {
1948 return 0;
1949 }
1950 passLinepos++;
1951 passGetDots(&passLine, &passLinepos, &passHoldString, passNested);
1952 passInstructions[passIC++] = pass_dots;
1953 actionDoCharsDots:
1954 if (passHoldString.length == 0) return 0;
1955 if (passIC >= MAXSTRING) {
1956 compileError(passNested,
1957 "@ operand in action part of multipass operand too long");
1958 return 0;
1959 }
1960 passInstructions[passIC++] = passHoldString.length;
1961 for (kk = 0; kk < passHoldString.length; kk++) {
1962 if (passIC >= MAXSTRING) {
1963 compileError(passNested,
1964 "@ operand in action part of multipass operand too long");
1965 return 0;
1966 }
1967 passInstructions[passIC++] = passHoldString.chars[kk];
1968 }
1969 break;
1970 case pass_variable:
1971 passLinepos++;
1972 if (!passGetVariableNumber(nested, &passLine, &passLinepos, &passHoldNumber))
1973 return 0;
1974 switch (passLine.chars[passLinepos]) {
1975 case pass_eq:
1976 passInstructions[passIC++] = pass_eq;
1977 passInstructions[passIC++] = passHoldNumber;
1978 passLinepos++;
1979 passGetNumber(&passLine, &passLinepos, &passHoldNumber);
1980 passInstructions[passIC++] = passHoldNumber;
1981 break;
1982 case pass_plus:
1983 case pass_hyphen:
1984 passInstructions[passIC++] = passLine.chars[passLinepos++];
1985 passInstructions[passIC++] = passHoldNumber;
1986 break;
1987 default:
1988 compileError(passNested, "incorrect variable operator in action part");
1989 return 0;
1990 }
1991 break;
1992 case pass_copy:
1993 passInstructions[passIC++] = pass_copy;
1994 passLinepos++;
1995 break;
1996 case pass_omit:
1997 passInstructions[passIC++] = pass_omit;
1998 passLinepos++;
1999 break;
2000 case pass_groupreplace:
2001 case pass_groupstart:
2002 case pass_groupend:
2003 passLinepos++;
2004 passGetName(&passLine, &passLinepos, &passHoldString);
2005 ruleOffset = findRuleName(&passHoldString, *table);
2006 if (ruleOffset)
2007 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2008 if (rule && rule->opcode == CTO_Grouping) {
2009 passInstructions[passIC++] = passSubOp;
2010 passInstructions[passIC++] = ruleOffset >> 16;
2011 passInstructions[passIC++] = ruleOffset & 0xffff;
2012 break;
2013 }
2014 compileError(passNested, "%s is not a grouping name",
2015 _lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
2016 return 0;
2017 case pass_swap:
2018 passLinepos++;
2019 passGetName(&passLine, &passLinepos, &passHoldString);
2020 ruleOffset = findRuleName(&passHoldString, *table);
2021 if (ruleOffset)
2022 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2023 if (rule &&
2024 (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd ||
2025 rule->opcode == CTO_SwapDd)) {
2026 passInstructions[passIC++] = pass_swap;
2027 passInstructions[passIC++] = ruleOffset >> 16;
2028 passInstructions[passIC++] = ruleOffset & 0xffff;
2029 break;
2030 }
2031 compileError(passNested, "%s is not a swap name.",
2032 _lou_showString(&passHoldString.chars[0], passHoldString.length, 0));
2033 return 0;
2034 break;
2035 default:
2036 compileError(passNested, "incorrect operator in action part");
2037 return 0;
2038 }
2039 }
2040
2041 /* Analyze and add rule */
2042 passRuleDots.length = passIC;
2043
2044 {
2045 widechar *characters;
2046 int length;
2047 int found = passFindCharacters(
2048 passNested, passInstructions, passRuleDots.length, &characters, &length);
2049
2050 if (!found) return 0;
2051
2052 if (characters) {
2053 for (k = 0; k < length; k += 1) passRuleChars.chars[k] = characters[k];
2054 passRuleChars.length = k;
2055 }
2056 }
2057
2058 if (!addRule(passNested, opcode, &passRuleChars, &passRuleDots, 0, 0, newRuleOffset,
2059 newRule, noback, nofor, table))
2060 return 0;
2061 return 1;
2062 }
2063
2064 /* End of multipass compiler */
2065
2066 static int
compileBrailleIndicator(FileInfo * nested,const char * ermsg,TranslationTableOpcode opcode,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table)2067 compileBrailleIndicator(FileInfo *nested, const char *ermsg,
2068 TranslationTableOpcode opcode, int *lastToken,
2069 TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
2070 int nofor, TranslationTableHeader **table) {
2071 CharsString token;
2072 CharsString cells;
2073 if (getToken(nested, &token, ermsg, lastToken))
2074 if (parseDots(nested, &cells, &token))
2075 if (!addRule(nested, opcode, NULL, &cells, 0, 0, newRuleOffset, newRule,
2076 noback, nofor, table))
2077 return 0;
2078 return 1;
2079 }
2080
2081 static int
compileNumber(FileInfo * nested,int * lastToken)2082 compileNumber(FileInfo *nested, int *lastToken) {
2083 CharsString token;
2084 widechar dest;
2085 if (!getToken(nested, &token, "number", lastToken)) return 0;
2086 getNumber(&token.chars[0], &dest);
2087 if (!(dest > 0)) {
2088 compileError(nested, "a nonzero positive number is required");
2089 return 0;
2090 }
2091 return dest;
2092 }
2093
2094 static int
compileGrouping(FileInfo * nested,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2095 compileGrouping(FileInfo *nested, int *lastToken, TranslationTableOffset *newRuleOffset,
2096 TranslationTableRule **newRule, int noback, int nofor,
2097 TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2098 int k;
2099 CharsString name;
2100 CharsString groupChars;
2101 CharsString groupDots;
2102 CharsString dotsParsed;
2103 if (!getToken(nested, &name, "name operand", lastToken)) return 0;
2104 if (!getRuleCharsText(nested, &groupChars, lastToken)) return 0;
2105 if (!getToken(nested, &groupDots, "dots operand", lastToken)) return 0;
2106 for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++)
2107 ;
2108 if (k == groupDots.length) {
2109 compileError(
2110 nested, "Dots operand must consist of two cells separated by a comma");
2111 return 0;
2112 }
2113 groupDots.chars[k] = '-';
2114 if (!parseDots(nested, &dotsParsed, &groupDots)) return 0;
2115 if (groupChars.length != 2 || dotsParsed.length != 2) {
2116 compileError(nested,
2117 "two Unicode characters and two cells separated by a comma are needed.");
2118 return 0;
2119 }
2120 if (table) {
2121 TranslationTableOffset ruleOffset;
2122 TranslationTableCharacter *charsDotsPtr;
2123 charsDotsPtr = addCharOrDots(nested, groupChars.chars[0], 0, table);
2124 charsDotsPtr->attributes |= CTC_Math;
2125 charsDotsPtr->uppercase = charsDotsPtr->realchar;
2126 charsDotsPtr->lowercase = charsDotsPtr->realchar;
2127 charsDotsPtr = addCharOrDots(nested, groupChars.chars[1], 0, table);
2128 charsDotsPtr->attributes |= CTC_Math;
2129 charsDotsPtr->uppercase = charsDotsPtr->realchar;
2130 charsDotsPtr->lowercase = charsDotsPtr->realchar;
2131 charsDotsPtr = addCharOrDots(nested, dotsParsed.chars[0], 1, table);
2132 charsDotsPtr->attributes |= CTC_Math;
2133 charsDotsPtr->uppercase = charsDotsPtr->realchar;
2134 charsDotsPtr->lowercase = charsDotsPtr->realchar;
2135 charsDotsPtr = addCharOrDots(nested, dotsParsed.chars[1], 1, table);
2136 charsDotsPtr->attributes |= CTC_Math;
2137 charsDotsPtr->uppercase = charsDotsPtr->realchar;
2138 charsDotsPtr->lowercase = charsDotsPtr->realchar;
2139 if (!addRule(nested, CTO_Grouping, &groupChars, &dotsParsed, 0, 0, &ruleOffset,
2140 newRule, noback, nofor, table))
2141 return 0;
2142 if (!addRuleName(nested, &name, ruleOffset, *table)) return 0;
2143 if (newRuleOffset) *newRuleOffset = ruleOffset;
2144 }
2145 if (displayTable) {
2146 putCharAndDots(nested, groupChars.chars[0], dotsParsed.chars[0], displayTable);
2147 putCharAndDots(nested, groupChars.chars[1], dotsParsed.chars[1], displayTable);
2148 }
2149 if (table) {
2150 widechar endChar;
2151 widechar endDots;
2152 endChar = groupChars.chars[1];
2153 endDots = dotsParsed.chars[1];
2154 groupChars.length = dotsParsed.length = 1;
2155 if (!addRule(nested, CTO_Math, &groupChars, &dotsParsed, 0, 0, newRuleOffset,
2156 newRule, noback, nofor, table))
2157 return 0;
2158 groupChars.chars[0] = endChar;
2159 dotsParsed.chars[0] = endDots;
2160 if (!addRule(nested, CTO_Math, &groupChars, &dotsParsed, 0, 0, newRuleOffset,
2161 newRule, noback, nofor, table))
2162 return 0;
2163 }
2164 return 1;
2165 }
2166
2167 static int
compileUplow(FileInfo * nested,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2168 compileUplow(FileInfo *nested, int *lastToken, TranslationTableOffset *newRuleOffset,
2169 TranslationTableRule **newRule, int noback, int nofor,
2170 TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2171 int k;
2172 TranslationTableCharacter *upperChar;
2173 TranslationTableCharacter *lowerChar;
2174 TranslationTableCharacter *upperCell = NULL;
2175 TranslationTableCharacter *lowerCell = NULL;
2176 CharsString ruleChars;
2177 CharsString ruleDots;
2178 CharsString upperDots;
2179 CharsString lowerDots;
2180 int haveLowerDots = 0;
2181 TranslationTableCharacterAttributes attr;
2182 if (!getRuleCharsText(nested, &ruleChars, lastToken)) return 0;
2183 if (!getToken(nested, &ruleDots, "dots operand", lastToken)) return 0;
2184 for (k = 0; k < ruleDots.length && ruleDots.chars[k] != ','; k++)
2185 ;
2186 if (k == ruleDots.length) {
2187 if (!parseDots(nested, &upperDots, &ruleDots)) return 0;
2188 lowerDots.length = upperDots.length;
2189 for (k = 0; k < upperDots.length; k++) lowerDots.chars[k] = upperDots.chars[k];
2190 lowerDots.chars[k] = 0;
2191 } else {
2192 haveLowerDots = ruleDots.length;
2193 ruleDots.length = k;
2194 if (!parseDots(nested, &upperDots, &ruleDots)) return 0;
2195 ruleDots.length = 0;
2196 k++;
2197 for (; k < haveLowerDots; k++)
2198 ruleDots.chars[ruleDots.length++] = ruleDots.chars[k];
2199 if (!parseDots(nested, &lowerDots, &ruleDots)) return 0;
2200 }
2201 if (ruleChars.length != 2 || upperDots.length < 1) {
2202 compileError(nested,
2203 "Exactly two Unicode characters and at least one cell are required.");
2204 return 0;
2205 }
2206 if (haveLowerDots && lowerDots.length < 1) {
2207 compileError(nested, "at least one cell is required after the comma.");
2208 return 0;
2209 }
2210 if (table) {
2211 upperChar = addCharOrDots(nested, ruleChars.chars[0], 0, table);
2212 upperChar->attributes |= CTC_Letter | CTC_UpperCase;
2213 upperChar->uppercase = ruleChars.chars[0];
2214 upperChar->lowercase = ruleChars.chars[1];
2215 lowerChar = addCharOrDots(nested, ruleChars.chars[1], 0, table);
2216 lowerChar->attributes |= CTC_Letter | CTC_LowerCase;
2217 lowerChar->uppercase = ruleChars.chars[0];
2218 lowerChar->lowercase = ruleChars.chars[1];
2219 for (k = 0; k < upperDots.length; k++)
2220 if (!compile_findCharOrDots(upperDots.chars[k], 1, *table)) {
2221 attr = CTC_Letter | CTC_UpperCase;
2222 upperCell = addCharOrDots(nested, upperDots.chars[k], 1, table);
2223 upperCell->attributes |= attr;
2224 upperCell->uppercase = upperCell->realchar;
2225 }
2226 if (haveLowerDots) {
2227 for (k = 0; k < lowerDots.length; k++)
2228 if (!compile_findCharOrDots(lowerDots.chars[k], 1, *table)) {
2229 attr = CTC_Letter | CTC_LowerCase;
2230 lowerCell = addCharOrDots(nested, lowerDots.chars[k], 1, table);
2231 if (lowerDots.length != 1) attr = CTC_Space;
2232 lowerCell->attributes |= attr;
2233 lowerCell->lowercase = lowerCell->realchar;
2234 }
2235 } else if (upperCell != NULL && upperDots.length == 1)
2236 upperCell->attributes |= CTC_LowerCase;
2237 if (upperCell != NULL) upperCell->lowercase = lowerDots.chars[0];
2238 if (lowerCell != NULL) lowerCell->uppercase = upperDots.chars[0];
2239 }
2240 if (displayTable) {
2241 if (lowerDots.length == 1)
2242 putCharAndDots(nested, ruleChars.chars[1], lowerDots.chars[0], displayTable);
2243 if (upperDots.length == 1)
2244 putCharAndDots(nested, ruleChars.chars[0], upperDots.chars[0], displayTable);
2245 }
2246 if (table) {
2247 ruleChars.length = 1;
2248 ruleChars.chars[2] = ruleChars.chars[0];
2249 ruleChars.chars[0] = ruleChars.chars[1];
2250 if (!addRule(nested, CTO_LowerCase, &ruleChars, &lowerDots, 0, 0, newRuleOffset,
2251 newRule, noback, nofor, table))
2252 return 0;
2253 ruleChars.chars[0] = ruleChars.chars[2];
2254 if (!addRule(nested, CTO_UpperCase, &ruleChars, &upperDots, 0, 0, newRuleOffset,
2255 newRule, noback, nofor, table))
2256 return 0;
2257 }
2258 return 1;
2259 }
2260
2261 /* Functions for compiling hyphenation tables */
2262
2263 typedef struct HyphenDict { /* hyphenation dictionary: finite state machine */
2264 int numStates;
2265 HyphenationState *states;
2266 } HyphenDict;
2267
2268 #define DEFAULTSTATE 0xffff
2269 #define HYPHENHASHSIZE 8191
2270
2271 typedef struct HyphenHashEntry {
2272 struct HyphenHashEntry *next;
2273 CharsString *key;
2274 int val;
2275 } HyphenHashEntry;
2276
2277 typedef struct HyphenHashTab {
2278 HyphenHashEntry *entries[HYPHENHASHSIZE];
2279 } HyphenHashTab;
2280
2281 /* a hash function from ASU - adapted from Gtk+ */
2282 static unsigned int
hyphenStringHash(const CharsString * s)2283 hyphenStringHash(const CharsString *s) {
2284 int k;
2285 unsigned int h = 0, g;
2286 for (k = 0; k < s->length; k++) {
2287 h = (h << 4) + s->chars[k];
2288 if ((g = h & 0xf0000000)) {
2289 h = h ^ (g >> 24);
2290 h = h ^ g;
2291 }
2292 }
2293 return h;
2294 }
2295
2296 static HyphenHashTab *
hyphenHashNew(void)2297 hyphenHashNew(void) {
2298 HyphenHashTab *hashTab;
2299 if (!(hashTab = malloc(sizeof(HyphenHashTab)))) _lou_outOfMemory();
2300 memset(hashTab, 0, sizeof(HyphenHashTab));
2301 return hashTab;
2302 }
2303
2304 static void
hyphenHashFree(HyphenHashTab * hashTab)2305 hyphenHashFree(HyphenHashTab *hashTab) {
2306 int i;
2307 HyphenHashEntry *e, *next;
2308 for (i = 0; i < HYPHENHASHSIZE; i++)
2309 for (e = hashTab->entries[i]; e; e = next) {
2310 next = e->next;
2311 free(e->key);
2312 free(e);
2313 }
2314 free(hashTab);
2315 }
2316
2317 /* assumes that key is not already present! */
2318 static void
hyphenHashInsert(HyphenHashTab * hashTab,const CharsString * key,int val)2319 hyphenHashInsert(HyphenHashTab *hashTab, const CharsString *key, int val) {
2320 int i, j;
2321 HyphenHashEntry *e;
2322 i = hyphenStringHash(key) % HYPHENHASHSIZE;
2323 if (!(e = malloc(sizeof(HyphenHashEntry)))) _lou_outOfMemory();
2324 e->next = hashTab->entries[i];
2325 e->key = malloc((key->length + 1) * CHARSIZE);
2326 if (!e->key) _lou_outOfMemory();
2327 e->key->length = key->length;
2328 for (j = 0; j < key->length; j++) e->key->chars[j] = key->chars[j];
2329 e->val = val;
2330 hashTab->entries[i] = e;
2331 }
2332
2333 /* return val if found, otherwise DEFAULTSTATE */
2334 static int
hyphenHashLookup(HyphenHashTab * hashTab,const CharsString * key)2335 hyphenHashLookup(HyphenHashTab *hashTab, const CharsString *key) {
2336 int i, j;
2337 HyphenHashEntry *e;
2338 if (key->length == 0) return 0;
2339 i = hyphenStringHash(key) % HYPHENHASHSIZE;
2340 for (e = hashTab->entries[i]; e; e = e->next) {
2341 if (key->length != e->key->length) continue;
2342 for (j = 0; j < key->length; j++)
2343 if (key->chars[j] != e->key->chars[j]) break;
2344 if (j == key->length) return e->val;
2345 }
2346 return DEFAULTSTATE;
2347 }
2348
2349 static int
hyphenGetNewState(HyphenDict * dict,HyphenHashTab * hashTab,const CharsString * string)2350 hyphenGetNewState(HyphenDict *dict, HyphenHashTab *hashTab, const CharsString *string) {
2351 hyphenHashInsert(hashTab, string, dict->numStates);
2352 /* predicate is true if dict->numStates is a power of two */
2353 if (!(dict->numStates & (dict->numStates - 1)))
2354 dict->states =
2355 realloc(dict->states, (dict->numStates << 1) * sizeof(HyphenationState));
2356 if (!dict->states) _lou_outOfMemory();
2357 dict->states[dict->numStates].hyphenPattern = 0;
2358 dict->states[dict->numStates].fallbackState = DEFAULTSTATE;
2359 dict->states[dict->numStates].numTrans = 0;
2360 dict->states[dict->numStates].trans.pointer = NULL;
2361 return dict->numStates++;
2362 }
2363
2364 /* add a transition from state1 to state2 through ch - assumes that the
2365 * transition does not already exist */
2366 static void
hyphenAddTrans(HyphenDict * dict,int state1,int state2,widechar ch)2367 hyphenAddTrans(HyphenDict *dict, int state1, int state2, widechar ch) {
2368 int numTrans;
2369 numTrans = dict->states[state1].numTrans;
2370 if (numTrans == 0)
2371 dict->states[state1].trans.pointer = malloc(sizeof(HyphenationTrans));
2372 else if (!(numTrans & (numTrans - 1)))
2373 dict->states[state1].trans.pointer = realloc(dict->states[state1].trans.pointer,
2374 (numTrans << 1) * sizeof(HyphenationTrans));
2375 dict->states[state1].trans.pointer[numTrans].ch = ch;
2376 dict->states[state1].trans.pointer[numTrans].newState = state2;
2377 dict->states[state1].numTrans++;
2378 }
2379
2380 static int
compileHyphenation(FileInfo * nested,CharsString * encoding,int * lastToken,TranslationTableHeader ** table)2381 compileHyphenation(FileInfo *nested, CharsString *encoding, int *lastToken,
2382 TranslationTableHeader **table) {
2383 CharsString hyph;
2384 HyphenationTrans *holdPointer;
2385 HyphenHashTab *hashTab;
2386 CharsString word;
2387 char pattern[MAXSTRING + 1];
2388 unsigned int stateNum = 0, lastState = 0;
2389 int i, j, k = encoding->length;
2390 widechar ch;
2391 int found;
2392 HyphenHashEntry *e;
2393 HyphenDict dict;
2394 TranslationTableOffset holdOffset;
2395 /* Set aside enough space for hyphenation states and transitions in
2396 * translation table. Must be done before anything else */
2397 allocateSpaceInTranslationTable(nested, NULL, 250000, table);
2398 hashTab = hyphenHashNew();
2399 dict.numStates = 1;
2400 dict.states = malloc(sizeof(HyphenationState));
2401 if (!dict.states) _lou_outOfMemory();
2402 dict.states[0].hyphenPattern = 0;
2403 dict.states[0].fallbackState = DEFAULTSTATE;
2404 dict.states[0].numTrans = 0;
2405 dict.states[0].trans.pointer = NULL;
2406 do {
2407 if (encoding->chars[0] == 'I') {
2408 if (!getToken(nested, &hyph, NULL, lastToken)) continue;
2409 } else {
2410 /* UTF-8 */
2411 if (!getToken(nested, &word, NULL, lastToken)) continue;
2412 parseChars(nested, &hyph, &word);
2413 }
2414 if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] == '%' ||
2415 hyph.chars[0] == '<')
2416 continue; /* comment */
2417 j = 0;
2418 pattern[j] = '0';
2419 for (i = 0; i < hyph.length; i++) {
2420 if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9')
2421 pattern[j] = (char)hyph.chars[i];
2422 else {
2423 word.chars[j] = hyph.chars[i];
2424 pattern[++j] = '0';
2425 }
2426 }
2427 word.chars[j] = 0;
2428 word.length = j;
2429 pattern[j + 1] = 0;
2430 for (i = 0; pattern[i] == '0'; i++)
2431 ;
2432 found = hyphenHashLookup(hashTab, &word);
2433 if (found != DEFAULTSTATE)
2434 stateNum = found;
2435 else
2436 stateNum = hyphenGetNewState(&dict, hashTab, &word);
2437 k = j + 2 - i;
2438 if (k > 0) {
2439 allocateSpaceInTranslationTable(
2440 nested, &dict.states[stateNum].hyphenPattern, k, table);
2441 memcpy(&(*table)->ruleArea[dict.states[stateNum].hyphenPattern], &pattern[i],
2442 k);
2443 }
2444 /* now, put in the prefix transitions */
2445 while (found == DEFAULTSTATE) {
2446 lastState = stateNum;
2447 ch = word.chars[word.length-- - 1];
2448 found = hyphenHashLookup(hashTab, &word);
2449 if (found != DEFAULTSTATE)
2450 stateNum = found;
2451 else
2452 stateNum = hyphenGetNewState(&dict, hashTab, &word);
2453 hyphenAddTrans(&dict, stateNum, lastState, ch);
2454 }
2455 } while (_lou_getALine(nested));
2456 /* put in the fallback states */
2457 for (i = 0; i < HYPHENHASHSIZE; i++) {
2458 for (e = hashTab->entries[i]; e; e = e->next) {
2459 for (j = 1; j <= e->key->length; j++) {
2460 word.length = 0;
2461 for (k = j; k < e->key->length; k++)
2462 word.chars[word.length++] = e->key->chars[k];
2463 stateNum = hyphenHashLookup(hashTab, &word);
2464 if (stateNum != DEFAULTSTATE) break;
2465 }
2466 if (e->val) dict.states[e->val].fallbackState = stateNum;
2467 }
2468 }
2469 hyphenHashFree(hashTab);
2470 /* Transfer hyphenation information to table */
2471 for (i = 0; i < dict.numStates; i++) {
2472 if (dict.states[i].numTrans == 0)
2473 dict.states[i].trans.offset = 0;
2474 else {
2475 holdPointer = dict.states[i].trans.pointer;
2476 allocateSpaceInTranslationTable(nested, &dict.states[i].trans.offset,
2477 dict.states[i].numTrans * sizeof(HyphenationTrans), table);
2478 memcpy(&(*table)->ruleArea[dict.states[i].trans.offset], holdPointer,
2479 dict.states[i].numTrans * sizeof(HyphenationTrans));
2480 free(holdPointer);
2481 }
2482 }
2483 allocateSpaceInTranslationTable(
2484 nested, &holdOffset, dict.numStates * sizeof(HyphenationState), table);
2485 (*table)->hyphenStatesArray = holdOffset;
2486 /* Prevents segmentation fault if table is reallocated */
2487 memcpy(&(*table)->ruleArea[(*table)->hyphenStatesArray], &dict.states[0],
2488 dict.numStates * sizeof(HyphenationState));
2489 free(dict.states);
2490 return 1;
2491 }
2492
2493 static int
compileCharDef(FileInfo * nested,TranslationTableOpcode opcode,TranslationTableCharacterAttributes attributes,int * lastToken,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,int noback,int nofor,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2494 compileCharDef(FileInfo *nested, TranslationTableOpcode opcode,
2495 TranslationTableCharacterAttributes attributes, int *lastToken,
2496 TranslationTableOffset *newRuleOffset, TranslationTableRule **newRule, int noback,
2497 int nofor, TranslationTableHeader **table, DisplayTableHeader **displayTable) {
2498 CharsString ruleChars;
2499 CharsString ruleDots;
2500 if (!getRuleCharsText(nested, &ruleChars, lastToken)) return 0;
2501 if (!getRuleDotsPattern(nested, &ruleDots, lastToken)) return 0;
2502 if (ruleChars.length != 1) {
2503 compileError(nested, "Exactly one character is required.");
2504 return 0;
2505 }
2506 if (ruleDots.length < 1) {
2507 compileError(nested, "At least one cell is required.");
2508 return 0;
2509 }
2510 if (table) {
2511 TranslationTableCharacter *character;
2512 TranslationTableCharacter *cell = NULL;
2513 int k;
2514 if (attributes & (CTC_UpperCase | CTC_LowerCase)) attributes |= CTC_Letter;
2515 character = addCharOrDots(nested, ruleChars.chars[0], 0, table);
2516 character->attributes |= attributes;
2517 character->uppercase = character->lowercase = character->realchar;
2518 for (k = ruleDots.length - 1; k >= 0; k -= 1) {
2519 cell = compile_findCharOrDots(ruleDots.chars[k], 1, *table);
2520 if (!cell) {
2521 cell = addCharOrDots(nested, ruleDots.chars[k], 1, table);
2522 cell->uppercase = cell->lowercase = cell->realchar;
2523 }
2524 }
2525 if (ruleDots.length == 1) cell->attributes |= attributes;
2526 }
2527 if (displayTable && ruleDots.length == 1)
2528 putCharAndDots(nested, ruleChars.chars[0], ruleDots.chars[0], displayTable);
2529 if (table)
2530 if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, newRuleOffset, newRule,
2531 noback, nofor, table))
2532 return 0;
2533 return 1;
2534 }
2535
2536 static int
compileBeforeAfter(FileInfo * nested,int * lastToken)2537 compileBeforeAfter(FileInfo *nested, int *lastToken) {
2538 /* 1=before, 2=after, 0=error */
2539 CharsString token;
2540 CharsString tmp;
2541 if (getToken(nested, &token, "last word before or after", lastToken))
2542 if (parseChars(nested, &tmp, &token)) {
2543 if (eqasc2uni((unsigned char *)"before", tmp.chars, 6)) return 1;
2544 if (eqasc2uni((unsigned char *)"after", tmp.chars, 5)) return 2;
2545 }
2546 return 0;
2547 }
2548
2549 static int
compileRule(FileInfo * nested,TranslationTableOffset * newRuleOffset,TranslationTableRule ** newRule,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)2550 compileRule(FileInfo *nested, TranslationTableOffset *newRuleOffset,
2551 TranslationTableRule **newRule, TranslationTableHeader **table,
2552 DisplayTableHeader **displayTable) {
2553 int lastToken = 0;
2554 int ok = 1;
2555 CharsString token;
2556 TranslationTableOpcode opcode;
2557 CharsString ruleChars;
2558 CharsString ruleDots;
2559 CharsString cells;
2560 CharsString scratchPad;
2561 CharsString emphClass;
2562 TranslationTableCharacterAttributes after = 0;
2563 TranslationTableCharacterAttributes before = 0;
2564 TranslationTableCharacter *c = NULL;
2565 widechar *patterns = NULL;
2566 int k, i;
2567 int noback, nofor;
2568 noback = nofor = 0;
2569 doOpcode:
2570 if (!getToken(nested, &token, NULL, &lastToken)) return 1; /* blank line */
2571 if (token.chars[0] == '#' || token.chars[0] == '<') return 1; /* comment */
2572 if (nested->lineNumber == 1 &&
2573 (eqasc2uni((unsigned char *)"ISO", token.chars, 3) ||
2574 eqasc2uni((unsigned char *)"UTF-8", token.chars, 5))) {
2575 if (table)
2576 compileHyphenation(nested, &token, &lastToken, table);
2577 else
2578 /* ignore the whole file */
2579 while (_lou_getALine(nested))
2580 ;
2581 return 1;
2582 }
2583 opcode = getOpcode(nested, &token);
2584 switch (opcode) {
2585 case CTO_IncludeFile: {
2586 CharsString includedFile;
2587 if (getToken(nested, &token, "include file name", &lastToken))
2588 if (parseChars(nested, &includedFile, &token))
2589 if (!includeFile(nested, &includedFile, table, displayTable)) ok = 0;
2590 break;
2591 }
2592 case CTO_NoBack:
2593 if (nofor) {
2594 compileError(nested, "%s already specified.", _lou_findOpcodeName(CTO_NoFor));
2595 ok = 0;
2596 break;
2597 }
2598 noback = 1;
2599 goto doOpcode;
2600 case CTO_NoFor:
2601 if (noback) {
2602 compileError(
2603 nested, "%s already specified.", _lou_findOpcodeName(CTO_NoBack));
2604 ok = 0;
2605 break;
2606 }
2607 nofor = 1;
2608 goto doOpcode;
2609 case CTO_Space:
2610 compileCharDef(nested, opcode, CTC_Space, &lastToken, newRuleOffset, newRule,
2611 noback, nofor, table, displayTable);
2612 break;
2613 case CTO_Digit:
2614 compileCharDef(nested, opcode, CTC_Digit, &lastToken, newRuleOffset, newRule,
2615 noback, nofor, table, displayTable);
2616 break;
2617 case CTO_LitDigit:
2618 compileCharDef(nested, opcode, CTC_LitDigit, &lastToken, newRuleOffset, newRule,
2619 noback, nofor, table, displayTable);
2620 break;
2621 case CTO_Punctuation:
2622 compileCharDef(nested, opcode, CTC_Punctuation, &lastToken, newRuleOffset,
2623 newRule, noback, nofor, table, displayTable);
2624 break;
2625 case CTO_Math:
2626 compileCharDef(nested, opcode, CTC_Math, &lastToken, newRuleOffset, newRule,
2627 noback, nofor, table, displayTable);
2628 break;
2629 case CTO_Sign:
2630 compileCharDef(nested, opcode, CTC_Sign, &lastToken, newRuleOffset, newRule,
2631 noback, nofor, table, displayTable);
2632 break;
2633 case CTO_Letter:
2634 compileCharDef(nested, opcode, CTC_Letter, &lastToken, newRuleOffset, newRule,
2635 noback, nofor, table, displayTable);
2636 break;
2637 case CTO_UpperCase:
2638 compileCharDef(nested, opcode, CTC_UpperCase, &lastToken, newRuleOffset, newRule,
2639 noback, nofor, table, displayTable);
2640 break;
2641 case CTO_LowerCase:
2642 compileCharDef(nested, opcode, CTC_LowerCase, &lastToken, newRuleOffset, newRule,
2643 noback, nofor, table, displayTable);
2644 break;
2645 case CTO_Grouping:
2646 ok = compileGrouping(nested, &lastToken, newRuleOffset, newRule, noback, nofor,
2647 table, displayTable);
2648 break;
2649 case CTO_UpLow:
2650 ok = compileUplow(nested, &lastToken, newRuleOffset, newRule, noback, nofor,
2651 table, displayTable);
2652 break;
2653 case CTO_Display:
2654 if (!displayTable) break;
2655 if (getRuleCharsText(nested, &ruleChars, &lastToken))
2656 if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
2657 if (ruleChars.length != 1 || ruleDots.length != 1) {
2658 compileError(
2659 nested, "Exactly one character and one cell are required.");
2660 ok = 0;
2661 }
2662 putCharAndDots(
2663 nested, ruleChars.chars[0], ruleDots.chars[0], displayTable);
2664 }
2665 break;
2666 /* now only opcodes follow that don't modify the display table */
2667 default:
2668 if (!table) break;
2669 switch (opcode) {
2670 case CTO_None:
2671 break;
2672 case CTO_Locale:
2673 compileWarning(nested,
2674 "The locale opcode is not implemented. Use the locale meta data "
2675 "instead.");
2676 break;
2677 case CTO_Undefined: {
2678 // not passing pointer because compileBrailleIndicator may reallocate table
2679 TranslationTableOffset ruleOffset = (*table)->undefined;
2680 ok = compileBrailleIndicator(nested, "undefined character opcode",
2681 CTO_Undefined, &lastToken, &ruleOffset, newRule, noback, nofor,
2682 table);
2683 (*table)->undefined = ruleOffset;
2684 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2685 break;
2686 }
2687 case CTO_Match: {
2688 TranslationTableRule *rule;
2689 TranslationTableOffset ruleOffset;
2690 CharsString ptn_before, ptn_after;
2691 TranslationTableOffset patternsOffset;
2692 int len, mrk;
2693
2694 size_t patternsByteSize = sizeof(*patterns) * 27720;
2695 patterns = (widechar *)malloc(patternsByteSize);
2696 if (!patterns) _lou_outOfMemory();
2697 memset(patterns, 0xffff, patternsByteSize);
2698
2699 noback = 1;
2700 getCharacters(nested, &ptn_before, &lastToken);
2701 getRuleCharsText(nested, &ruleChars, &lastToken);
2702 getCharacters(nested, &ptn_after, &lastToken);
2703 getRuleDotsPattern(nested, &ruleDots, &lastToken);
2704
2705 if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
2706 &ruleOffset, &rule, noback, nofor, table)) {
2707 ok = 0;
2708 break;
2709 }
2710 if (ptn_before.chars[0] == '-' && ptn_before.length == 1)
2711 len = _lou_pattern_compile(
2712 &ptn_before.chars[0], 0, &patterns[1], 13841, *table);
2713 else
2714 len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length,
2715 &patterns[1], 13841, *table);
2716 if (!len) {
2717 ok = 0;
2718 break;
2719 }
2720 mrk = patterns[0] = len + 1;
2721 _lou_pattern_reverse(&patterns[1]);
2722
2723 if (ptn_after.chars[0] == '-' && ptn_after.length == 1)
2724 len = _lou_pattern_compile(
2725 &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table);
2726 else
2727 len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length,
2728 &patterns[mrk], 13841, *table);
2729 if (!len) {
2730 ok = 0;
2731 break;
2732 }
2733 len += mrk;
2734
2735 if (!allocateSpaceInTranslationTable(
2736 nested, &patternsOffset, len * sizeof(widechar), table)) {
2737 ok = 0;
2738 break;
2739 }
2740
2741 /* realloc may have moved table, so make sure rule is still valid */
2742 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2743 memcpy(&(*table)->ruleArea[patternsOffset], patterns, len * sizeof(widechar));
2744 rule->patterns = patternsOffset;
2745
2746 if (newRule) *newRule = rule;
2747 if (newRuleOffset) *newRuleOffset = ruleOffset;
2748 break;
2749 }
2750
2751 case CTO_BackMatch: {
2752 TranslationTableRule *rule;
2753 TranslationTableOffset ruleOffset;
2754 CharsString ptn_before, ptn_after;
2755 TranslationTableOffset patternOffset;
2756 int len, mrk;
2757
2758 size_t patternsByteSize = sizeof(*patterns) * 27720;
2759 patterns = (widechar *)malloc(patternsByteSize);
2760 if (!patterns) _lou_outOfMemory();
2761 memset(patterns, 0xffff, patternsByteSize);
2762
2763 nofor = 1;
2764 getCharacters(nested, &ptn_before, &lastToken);
2765 getRuleCharsText(nested, &ruleChars, &lastToken);
2766 getCharacters(nested, &ptn_after, &lastToken);
2767 getRuleDotsPattern(nested, &ruleDots, &lastToken);
2768
2769 if (!addRule(nested, opcode, &ruleChars, &ruleDots, 0, 0, &ruleOffset, &rule,
2770 noback, nofor, table)) {
2771 ok = 0;
2772 break;
2773 }
2774 if (ptn_before.chars[0] == '-' && ptn_before.length == 1)
2775 len = _lou_pattern_compile(
2776 &ptn_before.chars[0], 0, &patterns[1], 13841, *table);
2777 else
2778 len = _lou_pattern_compile(&ptn_before.chars[0], ptn_before.length,
2779 &patterns[1], 13841, *table);
2780 if (!len) {
2781 ok = 0;
2782 break;
2783 }
2784 mrk = patterns[0] = len + 1;
2785 _lou_pattern_reverse(&patterns[1]);
2786
2787 if (ptn_after.chars[0] == '-' && ptn_after.length == 1)
2788 len = _lou_pattern_compile(
2789 &ptn_after.chars[0], 0, &patterns[mrk], 13841, *table);
2790 else
2791 len = _lou_pattern_compile(&ptn_after.chars[0], ptn_after.length,
2792 &patterns[mrk], 13841, *table);
2793 if (!len) {
2794 ok = 0;
2795 break;
2796 }
2797 len += mrk;
2798
2799 if (!allocateSpaceInTranslationTable(
2800 nested, &patternOffset, len * sizeof(widechar), table)) {
2801 ok = 0;
2802 break;
2803 }
2804
2805 /* realloc may have moved table, so make sure rule is still valid */
2806 rule = (TranslationTableRule *)&(*table)->ruleArea[ruleOffset];
2807
2808 memcpy(&(*table)->ruleArea[patternOffset], patterns, len * sizeof(widechar));
2809 rule->patterns = patternOffset;
2810
2811 if (newRule) *newRule = rule;
2812 if (newRuleOffset) *newRuleOffset = ruleOffset;
2813 break;
2814 }
2815
2816 case CTO_BegCapsPhrase: {
2817 // not passing pointer because compileBrailleIndicator may reallocate table
2818 TranslationTableOffset ruleOffset =
2819 (*table)->emphRules[capsRule][begPhraseOffset];
2820 ok = compileBrailleIndicator(nested, "first word capital sign",
2821 CTO_BegCapsPhraseRule, &lastToken, &ruleOffset, newRule, noback,
2822 nofor, table);
2823 (*table)->emphRules[capsRule][begPhraseOffset] = ruleOffset;
2824 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2825 break;
2826 }
2827 case CTO_EndCapsPhrase: {
2828 TranslationTableOffset ruleOffset;
2829 switch (compileBeforeAfter(nested, &lastToken)) {
2830 case 1: // before
2831 if ((*table)->emphRules[capsRule][endPhraseAfterOffset]) {
2832 compileError(nested, "Capital sign after last word already defined.");
2833 ok = 0;
2834 break;
2835 }
2836 // not passing pointer because compileBrailleIndicator may reallocate
2837 // table
2838 ruleOffset = (*table)->emphRules[capsRule][endPhraseBeforeOffset];
2839 ok = compileBrailleIndicator(nested, "capital sign before last word",
2840 CTO_EndCapsPhraseBeforeRule, &lastToken, &ruleOffset, newRule,
2841 noback, nofor, table);
2842 (*table)->emphRules[capsRule][endPhraseBeforeOffset] = ruleOffset;
2843 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2844 break;
2845 case 2: // after
2846 if ((*table)->emphRules[capsRule][endPhraseBeforeOffset]) {
2847 compileError(
2848 nested, "Capital sign before last word already defined.");
2849 ok = 0;
2850 break;
2851 }
2852 // not passing pointer because compileBrailleIndicator may reallocate
2853 // table
2854 ruleOffset = (*table)->emphRules[capsRule][endPhraseAfterOffset];
2855 ok = compileBrailleIndicator(nested, "capital sign after last word",
2856 CTO_EndCapsPhraseAfterRule, &lastToken, &ruleOffset, newRule,
2857 noback, nofor, table);
2858 (*table)->emphRules[capsRule][endPhraseAfterOffset] = ruleOffset;
2859 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2860 break;
2861 default: // error
2862 compileError(nested, "Invalid lastword indicator location.");
2863 ok = 0;
2864 break;
2865 }
2866 break;
2867 }
2868 case CTO_BegCaps: {
2869 // not passing pointer because compileBrailleIndicator may reallocate table
2870 TranslationTableOffset ruleOffset = (*table)->emphRules[capsRule][begOffset];
2871 ok = compileBrailleIndicator(nested, "first letter capital sign",
2872 CTO_BegCapsRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2873 table);
2874 (*table)->emphRules[capsRule][begOffset] = ruleOffset;
2875 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2876 break;
2877 }
2878 case CTO_EndCaps: {
2879 // not passing pointer because compileBrailleIndicator may reallocate table
2880 TranslationTableOffset ruleOffset = (*table)->emphRules[capsRule][endOffset];
2881 ok = compileBrailleIndicator(nested, "last letter capital sign",
2882 CTO_EndCapsRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2883 table);
2884 (*table)->emphRules[capsRule][endOffset] = ruleOffset;
2885 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2886 break;
2887 }
2888 case CTO_CapsLetter: {
2889 // not passing pointer because compileBrailleIndicator may reallocate table
2890 TranslationTableOffset ruleOffset =
2891 (*table)->emphRules[capsRule][letterOffset];
2892 ok = compileBrailleIndicator(nested, "single letter capital sign",
2893 CTO_CapsLetterRule, &lastToken, &ruleOffset, newRule, noback, nofor,
2894 table);
2895 (*table)->emphRules[capsRule][letterOffset] = ruleOffset;
2896 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2897 break;
2898 }
2899 case CTO_BegCapsWord: {
2900 // not passing pointer because compileBrailleIndicator may reallocate table
2901 TranslationTableOffset ruleOffset =
2902 (*table)->emphRules[capsRule][begWordOffset];
2903 ok = compileBrailleIndicator(nested, "capital word", CTO_BegCapsWordRule,
2904 &lastToken, &ruleOffset, newRule, noback, nofor, table);
2905 (*table)->emphRules[capsRule][begWordOffset] = ruleOffset;
2906 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2907 break;
2908 }
2909 case CTO_EndCapsWord: {
2910 // not passing pointer because compileBrailleIndicator may reallocate table
2911 TranslationTableOffset ruleOffset =
2912 (*table)->emphRules[capsRule][endWordOffset];
2913 ok = compileBrailleIndicator(nested, "capital word stop", CTO_EndCapsWordRule,
2914 &lastToken, &ruleOffset, newRule, noback, nofor, table);
2915 (*table)->emphRules[capsRule][endWordOffset] = ruleOffset;
2916 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
2917 break;
2918 }
2919 case CTO_LenCapsPhrase:
2920 ok = (*table)->emphRules[capsRule][lenPhraseOffset] =
2921 compileNumber(nested, &lastToken);
2922 break;
2923
2924 /* these 9 general purpose emphasis opcodes are compiled further down to more
2925 * specific internal opcodes:
2926 * - emphletter
2927 * - begemphword
2928 * - endemphword
2929 * - begemph
2930 * - endemph
2931 * - begemphphrase
2932 * - endemphphrase
2933 * - lenemphphrase
2934 */
2935 case CTO_EmphClass:
2936 if (getToken(nested, &token, "emphasis class", &lastToken))
2937 if (parseChars(nested, &emphClass, &token)) {
2938 char *s = malloc(sizeof(char) * (emphClass.length + 1));
2939 for (k = 0; k < emphClass.length; k++)
2940 s[k] = (char)emphClass.chars[k];
2941 s[k++] = '\0';
2942 for (i = 0; (*table)->emphClasses[i]; i++)
2943 if (strcmp(s, (*table)->emphClasses[i]) == 0) {
2944 _lou_logMessage(
2945 LOU_LOG_WARN, "Duplicate emphasis class: %s", s);
2946 warningCount++;
2947 free(s);
2948 return 1;
2949 }
2950 if (i < MAX_EMPH_CLASSES) {
2951 switch (i) {
2952 /* For backwards compatibility (i.e. because programs will assume
2953 * the first 3 typeform bits are `italic', `underline' and `bold')
2954 * we require that the first 3 emphclass definitions are (in that
2955 * order):
2956 *
2957 * emphclass italic
2958 * emphclass underline
2959 * emphclass bold
2960 *
2961 * While it would be possible to use the emphclass opcode only for
2962 * defining
2963 * _additional_ classes (not allowing for them to be called
2964 * italic, underline or bold), thereby reducing the amount of
2965 * boilerplate, we deliberately choose not to do that in order to
2966 * not give italic, underline and bold any special status. The
2967 * hope is that eventually all programs will use liblouis for
2968 * emphasis the recommended way (i.e. by looking up the supported
2969 * typeforms in
2970 * the documentation or API) so that we can drop this restriction.
2971 */
2972 case 0:
2973 if (strcmp(s, "italic") != 0) {
2974 _lou_logMessage(LOU_LOG_ERROR,
2975 "First emphasis class must be \"italic\" but got "
2976 "%s",
2977 s);
2978 errorCount++;
2979 free(s);
2980 return 0;
2981 }
2982 break;
2983 case 1:
2984 if (strcmp(s, "underline") != 0) {
2985 _lou_logMessage(LOU_LOG_ERROR,
2986 "Second emphasis class must be \"underline\" but "
2987 "got "
2988 "%s",
2989 s);
2990 errorCount++;
2991 free(s);
2992 return 0;
2993 }
2994 break;
2995 case 2:
2996 if (strcmp(s, "bold") != 0) {
2997 _lou_logMessage(LOU_LOG_ERROR,
2998 "Third emphasis class must be \"bold\" but got "
2999 "%s",
3000 s);
3001 errorCount++;
3002 free(s);
3003 return 0;
3004 }
3005 break;
3006 }
3007 (*table)->emphClasses[i] = s;
3008 (*table)->emphClasses[i + 1] = NULL;
3009 ok = 1;
3010 break;
3011 } else {
3012 _lou_logMessage(LOU_LOG_ERROR,
3013 "Max number of emphasis classes (%i) reached",
3014 MAX_EMPH_CLASSES);
3015 errorCount++;
3016 free(s);
3017 ok = 0;
3018 break;
3019 }
3020 }
3021 compileError(nested, "emphclass must be followed by a valid class name.");
3022 ok = 0;
3023 break;
3024 case CTO_EmphLetter:
3025 case CTO_BegEmphWord:
3026 case CTO_EndEmphWord:
3027 case CTO_BegEmph:
3028 case CTO_EndEmph:
3029 case CTO_BegEmphPhrase:
3030 case CTO_EndEmphPhrase:
3031 case CTO_LenEmphPhrase: {
3032 ok = 0;
3033 TranslationTableOffset ruleOffset = 0;
3034 if (getToken(nested, &token, "emphasis class", &lastToken))
3035 if (parseChars(nested, &emphClass, &token)) {
3036 char *s = malloc(sizeof(char) * (emphClass.length + 1));
3037 for (k = 0; k < emphClass.length; k++)
3038 s[k] = (char)emphClass.chars[k];
3039 s[k++] = '\0';
3040 for (i = 0; (*table)->emphClasses[i]; i++)
3041 if (strcmp(s, (*table)->emphClasses[i]) == 0) break;
3042 if (!(*table)->emphClasses[i]) {
3043 _lou_logMessage(
3044 LOU_LOG_ERROR, "Emphasis class %s not declared", s);
3045 errorCount++;
3046 free(s);
3047 break;
3048 }
3049 i++; // in table->emphRules the first index is used for caps
3050 if (opcode == CTO_EmphLetter) {
3051 // not passing pointer because compileBrailleIndicator may
3052 // reallocate table
3053 ruleOffset = (*table)->emphRules[i][letterOffset];
3054 ok = compileBrailleIndicator(nested, "single letter",
3055 CTO_Emph1LetterRule + letterOffset + (8 * i), &lastToken,
3056 &ruleOffset, newRule, noback, nofor, table);
3057 (*table)->emphRules[i][letterOffset] = ruleOffset;
3058 } else if (opcode == CTO_BegEmphWord) {
3059 // not passing pointer because compileBrailleIndicator may
3060 // reallocate table
3061 ruleOffset = (*table)->emphRules[i][begWordOffset];
3062 ok = compileBrailleIndicator(nested, "word",
3063 CTO_Emph1LetterRule + begWordOffset + (8 * i), &lastToken,
3064 &ruleOffset, newRule, noback, nofor, table);
3065 (*table)->emphRules[i][begWordOffset] = ruleOffset;
3066 } else if (opcode == CTO_EndEmphWord) {
3067 // not passing pointer because compileBrailleIndicator may
3068 // reallocate table
3069 ruleOffset = (*table)->emphRules[i][endWordOffset];
3070 ok = compileBrailleIndicator(nested, "word stop",
3071 CTO_Emph1LetterRule + endWordOffset + (8 * i), &lastToken,
3072 &ruleOffset, newRule, noback, nofor, table);
3073 (*table)->emphRules[i][endWordOffset] = ruleOffset;
3074 } else if (opcode == CTO_BegEmph) {
3075 /* fail if both begemph and any of begemphphrase or begemphword
3076 * are defined */
3077 if ((*table)->emphRules[i][begWordOffset] ||
3078 (*table)->emphRules[i][begPhraseOffset]) {
3079 compileError(nested,
3080 "Cannot define emphasis for both no context and word "
3081 "or "
3082 "phrase context, i.e. cannot have both begemph and "
3083 "begemphword or begemphphrase.");
3084 ok = 0;
3085 break;
3086 }
3087 // not passing pointer because compileBrailleIndicator may
3088 // reallocate table
3089 ruleOffset = (*table)->emphRules[i][begOffset];
3090 ok = compileBrailleIndicator(nested, "first letter",
3091 CTO_Emph1LetterRule + begOffset + (8 * i), &lastToken,
3092 &ruleOffset, newRule, noback, nofor, table);
3093 (*table)->emphRules[i][begOffset] = ruleOffset;
3094 } else if (opcode == CTO_EndEmph) {
3095 if ((*table)->emphRules[i][endWordOffset] ||
3096 (*table)->emphRules[i][endPhraseBeforeOffset] ||
3097 (*table)->emphRules[i][endPhraseAfterOffset]) {
3098 compileError(nested,
3099 "Cannot define emphasis for both no context and word "
3100 "or "
3101 "phrase context, i.e. cannot have both endemph and "
3102 "endemphword or endemphphrase.");
3103 ok = 0;
3104 break;
3105 }
3106 // not passing pointer because compileBrailleIndicator may
3107 // reallocate table
3108 ruleOffset = (*table)->emphRules[i][endOffset];
3109 ok = compileBrailleIndicator(nested, "last letter",
3110 CTO_Emph1LetterRule + endOffset + (8 * i), &lastToken,
3111 &ruleOffset, newRule, noback, nofor, table);
3112 (*table)->emphRules[i][endOffset] = ruleOffset;
3113 } else if (opcode == CTO_BegEmphPhrase) {
3114 // not passing pointer because compileBrailleIndicator may
3115 // reallocate table
3116 ruleOffset = (*table)->emphRules[i][begPhraseOffset];
3117 ok = compileBrailleIndicator(nested, "first word",
3118 CTO_Emph1LetterRule + begPhraseOffset + (8 * i),
3119 &lastToken, &ruleOffset, newRule, noback, nofor, table);
3120 (*table)->emphRules[i][begPhraseOffset] = ruleOffset;
3121 } else if (opcode == CTO_EndEmphPhrase)
3122 switch (compileBeforeAfter(nested, &lastToken)) {
3123 case 1: // before
3124 if ((*table)->emphRules[i][endPhraseAfterOffset]) {
3125 compileError(nested, "last word after already defined.");
3126 ok = 0;
3127 break;
3128 }
3129 // not passing pointer because compileBrailleIndicator may
3130 // reallocate table
3131 ruleOffset = (*table)->emphRules[i][endPhraseBeforeOffset];
3132 ok = compileBrailleIndicator(nested, "last word before",
3133 CTO_Emph1LetterRule + endPhraseBeforeOffset + (8 * i),
3134 &lastToken, &ruleOffset, newRule, noback, nofor,
3135 table);
3136 (*table)->emphRules[i][endPhraseBeforeOffset] = ruleOffset;
3137 break;
3138 case 2: // after
3139 if ((*table)->emphRules[i][endPhraseBeforeOffset]) {
3140 compileError(nested, "last word before already defined.");
3141 ok = 0;
3142 break;
3143 }
3144 // not passing pointer because compileBrailleIndicator may
3145 // reallocate table
3146 ruleOffset = (*table)->emphRules[i][endPhraseAfterOffset];
3147 ok = compileBrailleIndicator(nested, "last word after",
3148 CTO_Emph1LetterRule + endPhraseAfterOffset + (8 * i),
3149 &lastToken, &ruleOffset, newRule, noback, nofor,
3150 table);
3151 (*table)->emphRules[i][endPhraseAfterOffset] = ruleOffset;
3152 break;
3153 default: // error
3154 compileError(nested, "Invalid lastword indicator location.");
3155 ok = 0;
3156 break;
3157 }
3158 else if (opcode == CTO_LenEmphPhrase)
3159 ok = (*table)->emphRules[i][lenPhraseOffset] =
3160 compileNumber(nested, &lastToken);
3161 free(s);
3162 }
3163 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3164 break;
3165 }
3166 case CTO_LetterSign: {
3167 // not passing pointer because compileBrailleIndicator may reallocate table
3168 TranslationTableOffset ruleOffset = (*table)->letterSign;
3169 ok = compileBrailleIndicator(nested, "letter sign", CTO_LetterRule,
3170 &lastToken, &ruleOffset, newRule, noback, nofor, table);
3171 (*table)->letterSign = ruleOffset;
3172 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3173 break;
3174 }
3175 case CTO_NoLetsignBefore:
3176 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3177 if (((*table)->noLetsignBeforeCount + ruleChars.length) > LETSIGNSIZE) {
3178 compileError(nested, "More than %d characters", LETSIGNSIZE);
3179 ok = 0;
3180 break;
3181 }
3182 for (k = 0; k < ruleChars.length; k++)
3183 (*table)->noLetsignBefore[(*table)->noLetsignBeforeCount++] =
3184 ruleChars.chars[k];
3185 }
3186 break;
3187 case CTO_NoLetsign:
3188 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3189 if (((*table)->noLetsignCount + ruleChars.length) > LETSIGNSIZE) {
3190 compileError(nested, "More than %d characters", LETSIGNSIZE);
3191 ok = 0;
3192 break;
3193 }
3194 for (k = 0; k < ruleChars.length; k++)
3195 (*table)->noLetsign[(*table)->noLetsignCount++] = ruleChars.chars[k];
3196 }
3197 break;
3198 case CTO_NoLetsignAfter:
3199 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3200 if (((*table)->noLetsignAfterCount + ruleChars.length) > LETSIGNSIZE) {
3201 compileError(nested, "More than %d characters", LETSIGNSIZE);
3202 ok = 0;
3203 break;
3204 }
3205 for (k = 0; k < ruleChars.length; k++)
3206 (*table)->noLetsignAfter[(*table)->noLetsignAfterCount++] =
3207 ruleChars.chars[k];
3208 }
3209 break;
3210 case CTO_NumberSign: {
3211 // not passing pointer because compileBrailleIndicator may reallocate table
3212 TranslationTableOffset ruleOffset = (*table)->numberSign;
3213 ok = compileBrailleIndicator(nested, "number sign", CTO_NumberRule,
3214 &lastToken, &ruleOffset, newRule, noback, nofor, table);
3215 (*table)->numberSign = ruleOffset;
3216 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3217 break;
3218 }
3219 case CTO_Attribute:
3220
3221 c = NULL;
3222 ok = 1;
3223 if (!getToken(nested, &ruleChars, "attribute number", &lastToken)) {
3224 compileError(nested, "Expected attribute number.");
3225 ok = 0;
3226 break;
3227 }
3228
3229 k = -1;
3230 switch (ruleChars.chars[0]) {
3231 case '0':
3232 k = 0;
3233 break;
3234 case '1':
3235 k = 1;
3236 break;
3237 case '2':
3238 k = 2;
3239 break;
3240 case '3':
3241 k = 3;
3242 break;
3243 case '4':
3244 k = 4;
3245 break;
3246 case '5':
3247 k = 5;
3248 break;
3249 case '6':
3250 k = 6;
3251 break;
3252 case '7':
3253 k = 7;
3254 break;
3255 }
3256 if (k == -1) {
3257 compileError(nested, "Invalid attribute number.");
3258 ok = 0;
3259 break;
3260 }
3261
3262 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3263 for (i = 0; i < ruleChars.length; i++) {
3264 c = compile_findCharOrDots(ruleChars.chars[i], 0, *table);
3265 if (c)
3266 c->attributes |= (CTC_UserDefined0 << k);
3267 else {
3268 compileError(nested, "Attribute character undefined");
3269 ok = 0;
3270 break;
3271 }
3272 }
3273 }
3274 break;
3275
3276 case CTO_NumericModeChars:
3277
3278 c = NULL;
3279 ok = 1;
3280 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3281 for (k = 0; k < ruleChars.length; k++) {
3282 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3283 if (c)
3284 c->attributes |= CTC_NumericMode;
3285 else {
3286 compileError(nested, "Numeric mode character undefined");
3287 ok = 0;
3288 break;
3289 }
3290 }
3291 (*table)->usesNumericMode = 1;
3292 }
3293 break;
3294
3295 case CTO_MidEndNumericModeChars:
3296
3297 c = NULL;
3298 ok = 1;
3299 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3300 for (k = 0; k < ruleChars.length; k++) {
3301 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3302 if (c)
3303 c->attributes |= CTC_MidEndNumericMode;
3304 else {
3305 compileError(nested, "Midendnumeric mode character undefined");
3306 ok = 0;
3307 break;
3308 }
3309 }
3310 (*table)->usesNumericMode = 1;
3311 }
3312 break;
3313
3314 case CTO_NumericNoContractChars:
3315
3316 c = NULL;
3317 ok = 1;
3318 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3319 for (k = 0; k < ruleChars.length; k++) {
3320 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3321 if (c)
3322 c->attributes |= CTC_NumericNoContract;
3323 else {
3324 compileError(
3325 nested, "Numeric no contraction character undefined");
3326 ok = 0;
3327 break;
3328 }
3329 }
3330 (*table)->usesNumericMode = 1;
3331 }
3332 break;
3333
3334 case CTO_NoContractSign: {
3335 // not passing pointer because compileBrailleIndicator may reallocate table
3336 TranslationTableOffset ruleOffset = (*table)->noContractSign;
3337 ok = compileBrailleIndicator(nested, "no contractions sign",
3338 CTO_NoContractRule, &lastToken, &ruleOffset, newRule, noback, nofor,
3339 table);
3340 (*table)->noContractSign = ruleOffset;
3341 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3342 break;
3343 }
3344 case CTO_SeqDelimiter:
3345
3346 c = NULL;
3347 ok = 1;
3348 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3349 for (k = 0; k < ruleChars.length; k++) {
3350 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3351 if (c)
3352 c->attributes |= CTC_SeqDelimiter;
3353 else {
3354 compileError(nested, "Sequence delimiter character undefined");
3355 ok = 0;
3356 break;
3357 }
3358 }
3359 (*table)->usesSequences = 1;
3360 }
3361 break;
3362
3363 case CTO_SeqBeforeChars:
3364
3365 c = NULL;
3366 ok = 1;
3367 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3368 for (k = 0; k < ruleChars.length; k++) {
3369 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3370 if (c)
3371 c->attributes |= CTC_SeqBefore;
3372 else {
3373 compileError(nested, "Sequence before character undefined");
3374 ok = 0;
3375 break;
3376 }
3377 }
3378 }
3379 break;
3380
3381 case CTO_SeqAfterChars:
3382
3383 c = NULL;
3384 ok = 1;
3385 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3386 for (k = 0; k < ruleChars.length; k++) {
3387 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3388 if (c)
3389 c->attributes |= CTC_SeqAfter;
3390 else {
3391 compileError(nested, "Sequence after character undefined");
3392 ok = 0;
3393 break;
3394 }
3395 }
3396 }
3397 break;
3398
3399 case CTO_SeqAfterPattern:
3400
3401 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3402 if (((*table)->seqPatternsCount + ruleChars.length + 1) >
3403 SEQPATTERNSIZE) {
3404 compileError(nested, "More than %d characters", SEQPATTERNSIZE);
3405 ok = 0;
3406 break;
3407 }
3408 for (k = 0; k < ruleChars.length; k++)
3409 (*table)->seqPatterns[(*table)->seqPatternsCount++] =
3410 ruleChars.chars[k];
3411 (*table)->seqPatterns[(*table)->seqPatternsCount++] = 0;
3412 }
3413 break;
3414 case CTO_SeqAfterExpression:
3415
3416 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3417 for ((*table)->seqAfterExpressionLength = 0;
3418 (*table)->seqAfterExpressionLength < ruleChars.length;
3419 (*table)->seqAfterExpressionLength++)
3420 (*table)->seqAfterExpression[(*table)->seqAfterExpressionLength] =
3421 ruleChars.chars[(*table)->seqAfterExpressionLength];
3422 (*table)->seqAfterExpression[(*table)->seqAfterExpressionLength] = 0;
3423 }
3424 break;
3425
3426 case CTO_CapsModeChars:
3427
3428 c = NULL;
3429 ok = 1;
3430 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3431 for (k = 0; k < ruleChars.length; k++) {
3432 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3433 if (c)
3434 c->attributes |= CTC_CapsMode;
3435 else {
3436 compileError(nested, "Capital mode character undefined");
3437 ok = 0;
3438 break;
3439 }
3440 }
3441 }
3442 break;
3443
3444 case CTO_EmphModeChars:
3445
3446 c = NULL;
3447 ok = 1;
3448 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3449 for (k = 0; k < ruleChars.length; k++) {
3450 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3451 if (c)
3452 c->attributes |= CTC_EmphMode;
3453 else {
3454 compileError(nested, "Emphasis mode character undefined");
3455 ok = 0;
3456 break;
3457 }
3458 }
3459 }
3460 (*table)->usesEmphMode = 1;
3461 break;
3462
3463 case CTO_BegComp: {
3464 // not passing pointer because compileBrailleIndicator may reallocate table
3465 TranslationTableOffset ruleOffset = (*table)->begComp;
3466 ok = compileBrailleIndicator(nested, "begin computer braille",
3467 CTO_BegCompRule, &lastToken, &ruleOffset, newRule, noback, nofor,
3468 table);
3469 (*table)->begComp = ruleOffset;
3470 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3471 break;
3472 }
3473 case CTO_EndComp: {
3474 // not passing pointer because compileBrailleIndicator may reallocate table
3475 TranslationTableOffset ruleOffset = (*table)->endComp;
3476 ok = compileBrailleIndicator(nested, "end computer braslle", CTO_EndCompRule,
3477 &lastToken, &ruleOffset, newRule, noback, nofor, table);
3478 (*table)->endComp = ruleOffset;
3479 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3480 break;
3481 }
3482 case CTO_Syllable:
3483 (*table)->syllables = 1;
3484 case CTO_Always:
3485 case CTO_NoCross:
3486 case CTO_LargeSign:
3487 case CTO_WholeWord:
3488 case CTO_PartWord:
3489 case CTO_JoinNum:
3490 case CTO_JoinableWord:
3491 case CTO_LowWord:
3492 case CTO_SuffixableWord:
3493 case CTO_PrefixableWord:
3494 case CTO_BegWord:
3495 case CTO_BegMidWord:
3496 case CTO_MidWord:
3497 case CTO_MidEndWord:
3498 case CTO_EndWord:
3499 case CTO_PrePunc:
3500 case CTO_PostPunc:
3501 case CTO_BegNum:
3502 case CTO_MidNum:
3503 case CTO_EndNum:
3504 case CTO_Repeated:
3505 case CTO_RepWord:
3506 if (getRuleCharsText(nested, &ruleChars, &lastToken))
3507 if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
3508 if (ruleDots.length == 0) // `=`
3509 for (k = 0; k < ruleChars.length; k++) {
3510 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3511 if (!c || !c->definitionRule) {
3512 compileError(nested, "Character %s is not defined",
3513 _lou_showString(&ruleChars.chars[k], 1, 0));
3514 return 0;
3515 }
3516 }
3517 if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3518 newRuleOffset, newRule, noback, nofor, table))
3519 ok = 0;
3520 }
3521 // if (opcode == CTO_MidNum)
3522 // {
3523 // TranslationTableCharacter *c = compile_findCharOrDots(ruleChars.chars[0],
3524 // 0); if(c)
3525 // c->attributes |= CTC_NumericMode;
3526 // }
3527 break;
3528 case CTO_CompDots:
3529 case CTO_Comp6: {
3530 TranslationTableOffset ruleOffset;
3531 if (!getRuleCharsText(nested, &ruleChars, &lastToken)) return 0;
3532 if (ruleChars.length != 1 || ruleChars.chars[0] > 255) {
3533 compileError(nested, "first operand must be 1 character and < 256");
3534 return 0;
3535 }
3536 if (!getRuleDotsPattern(nested, &ruleDots, &lastToken)) return 0;
3537 if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3538 &ruleOffset, newRule, noback, nofor, table))
3539 ok = 0;
3540 (*table)->compdotsPattern[ruleChars.chars[0]] = ruleOffset;
3541 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3542 break;
3543 }
3544 case CTO_ExactDots:
3545 if (!getRuleCharsText(nested, &ruleChars, &lastToken)) return 0;
3546 if (ruleChars.chars[0] != '@') {
3547 compileError(nested, "The operand must begin with an at sign (@)");
3548 return 0;
3549 }
3550 for (k = 1; k < ruleChars.length; k++)
3551 scratchPad.chars[k - 1] = ruleChars.chars[k];
3552 scratchPad.length = ruleChars.length - 1;
3553 if (!parseDots(nested, &ruleDots, &scratchPad)) return 0;
3554 if (!addRule(nested, opcode, &ruleChars, &ruleDots, before, after,
3555 newRuleOffset, newRule, noback, nofor, table))
3556 ok = 0;
3557 break;
3558 case CTO_CapsNoCont: {
3559 TranslationTableOffset ruleOffset;
3560 ruleChars.length = 1;
3561 ruleChars.chars[0] = 'a';
3562 if (!addRule(nested, CTO_CapsNoContRule, &ruleChars, NULL, after, before,
3563 &ruleOffset, newRule, noback, nofor, table))
3564 ok = 0;
3565 (*table)->capsNoCont = ruleOffset;
3566 if (ok && newRuleOffset) *newRuleOffset = ruleOffset;
3567 break;
3568 }
3569 case CTO_Replace:
3570 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3571 if (lastToken)
3572 ruleDots.length = ruleDots.chars[0] = 0;
3573 else {
3574 getRuleDotsText(nested, &ruleDots, &lastToken);
3575 if (ruleDots.chars[0] == '#')
3576 ruleDots.length = ruleDots.chars[0] = 0;
3577 else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#')
3578 memcpy(&ruleDots.chars[0], &ruleDots.chars[1],
3579 ruleDots.length-- * CHARSIZE);
3580 }
3581 }
3582 for (k = 0; k < ruleChars.length; k++)
3583 addCharOrDots(nested, ruleChars.chars[k], 0, table);
3584 for (k = 0; k < ruleDots.length; k++)
3585 addCharOrDots(nested, ruleDots.chars[k], 0, table);
3586 if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3587 newRuleOffset, newRule, noback, nofor, table))
3588 ok = 0;
3589 break;
3590 case CTO_Correct:
3591 (*table)->corrections = 1;
3592 goto doPass;
3593 case CTO_Pass2:
3594 if ((*table)->numPasses < 2) (*table)->numPasses = 2;
3595 goto doPass;
3596 case CTO_Pass3:
3597 if ((*table)->numPasses < 3) (*table)->numPasses = 3;
3598 goto doPass;
3599 case CTO_Pass4:
3600 if ((*table)->numPasses < 4) (*table)->numPasses = 4;
3601 doPass:
3602 case CTO_Context:
3603 if (!(nofor || noback)) {
3604 compileError(nested, "%s or %s must be specified.",
3605 _lou_findOpcodeName(CTO_NoFor), _lou_findOpcodeName(CTO_NoBack));
3606 ok = 0;
3607 break;
3608 }
3609 if (!compilePassOpcode(
3610 nested, opcode, newRuleOffset, newRule, noback, nofor, table))
3611 ok = 0;
3612 break;
3613 case CTO_Contraction:
3614 case CTO_NoCont:
3615 case CTO_CompBrl:
3616 case CTO_Literal:
3617 if (getRuleCharsText(nested, &ruleChars, &lastToken)) {
3618 for (k = 0; k < ruleChars.length; k++) {
3619 c = compile_findCharOrDots(ruleChars.chars[k], 0, *table);
3620 if (!c || !c->definitionRule) {
3621 compileError(nested, "Character %s is not defined",
3622 _lou_showString(&ruleChars.chars[k], 1, 0));
3623 return 0;
3624 }
3625 }
3626 if (!addRule(nested, opcode, &ruleChars, NULL, after, before,
3627 newRuleOffset, newRule, noback, nofor, table))
3628 ok = 0;
3629 }
3630 break;
3631 case CTO_MultInd: {
3632 int t;
3633 ruleChars.length = 0;
3634 if (getToken(nested, &token, "multiple braille indicators", &lastToken) &&
3635 parseDots(nested, &cells, &token)) {
3636 while ((t = getToken(nested, &token, "multind opcodes", &lastToken))) {
3637 opcode = getOpcode(nested, &token);
3638 if (opcode >= CTO_CapsLetter && opcode < CTO_MultInd)
3639 ruleChars.chars[ruleChars.length++] = (widechar)opcode;
3640 else {
3641 compileError(nested, "Not a braille indicator opcode.");
3642 ok = 0;
3643 }
3644 if (t == 2) break;
3645 }
3646 } else
3647 ok = 0;
3648 if (!addRule(nested, CTO_MultInd, &ruleChars, &cells, after, before,
3649 newRuleOffset, newRule, noback, nofor, table))
3650 ok = 0;
3651 break;
3652 }
3653
3654 case CTO_Class: {
3655 CharsString characters;
3656 const CharacterClass *class;
3657 if (!(*table)->characterClasses) {
3658 if (!allocateCharacterClasses(*table)) ok = 0;
3659 }
3660 if (getToken(nested, &token, "character class name", &lastToken)) {
3661 class = findCharacterClass(&token, *table);
3662 if (!class)
3663 // no class with that name: create one
3664 class = addCharacterClass(
3665 nested, &token.chars[0], token.length, *table);
3666 if (class) {
3667 // there is a class with that name or a new class was successfully
3668 // created
3669 if (getCharacters(nested, &characters, &lastToken)) {
3670 int index;
3671 for (index = 0; index < characters.length; ++index) {
3672 TranslationTableRule *defRule;
3673 // get the character from the table and add the new class to
3674 // its attributes if the character is not defined yet, define
3675 // it
3676 TranslationTableCharacter *character = addCharOrDots(
3677 nested, characters.chars[index], 0, table);
3678 character->attributes |= class->attribute;
3679 // also add the attribute to the associated dots (if any)
3680 if (character->definitionRule) {
3681 defRule = (TranslationTableRule *)&(*table)
3682 ->ruleArea[character->definitionRule];
3683 if (defRule->dotslen == 1) {
3684 character = compile_findCharOrDots(
3685 defRule->charsdots[defRule->charslen], 1,
3686 *table);
3687 if (character)
3688 character->attributes |= class->attribute;
3689 }
3690 }
3691 }
3692 }
3693 }
3694 }
3695 break;
3696 }
3697
3698 {
3699 TranslationTableCharacterAttributes *attributes;
3700 const CharacterClass *class;
3701 case CTO_After:
3702 attributes = &after;
3703 goto doClass;
3704 case CTO_Before:
3705 attributes = &before;
3706 doClass:
3707 if (!(*table)->characterClasses) {
3708 if (!allocateCharacterClasses(*table)) ok = 0;
3709 }
3710 if (getCharacterClass(nested, &class, *table, &lastToken)) {
3711 *attributes |= class->attribute;
3712 goto doOpcode;
3713 }
3714 break;
3715 }
3716
3717 case CTO_EmpMatchBefore:
3718 before |= CTC_EmpMatch;
3719 goto doOpcode;
3720 case CTO_EmpMatchAfter:
3721 after |= CTC_EmpMatch;
3722 goto doOpcode;
3723
3724 case CTO_SwapCc:
3725 case CTO_SwapCd:
3726 case CTO_SwapDd:
3727 if (!compileSwap(nested, opcode, &lastToken, newRuleOffset, newRule, noback,
3728 nofor, table))
3729 ok = 0;
3730 break;
3731 case CTO_Hyphen:
3732 case CTO_DecPoint:
3733 // case CTO_Apostrophe:
3734 // case CTO_Initial:
3735 if (getRuleCharsText(nested, &ruleChars, &lastToken))
3736 if (getRuleDotsPattern(nested, &ruleDots, &lastToken)) {
3737 if (ruleChars.length != 1 || ruleDots.length < 1) {
3738 compileError(nested,
3739 "One Unicode character and at least one cell are "
3740 "required.");
3741 ok = 0;
3742 }
3743 if (!addRule(nested, opcode, &ruleChars, &ruleDots, after, before,
3744 newRuleOffset, newRule, noback, nofor, table))
3745 ok = 0;
3746 // if (opcode == CTO_DecPoint)
3747 // {
3748 // TranslationTableCharacter *c =
3749 // compile_findCharOrDots(ruleChars.chars[0], 0);
3750 // if(c)
3751 // c->attributes |= CTC_NumericMode;
3752 // }
3753 }
3754 break;
3755 default:
3756 compileError(nested, "unimplemented opcode.");
3757 ok = 0;
3758 break;
3759 }
3760 }
3761
3762 if (patterns != NULL) free(patterns);
3763
3764 return ok;
3765 }
3766
3767 int EXPORT_CALL
lou_readCharFromFile(const char * fileName,int * mode)3768 lou_readCharFromFile(const char *fileName, int *mode) {
3769 /* Read a character from a file, whether big-endian, little-endian or
3770 * ASCII8 */
3771 int ch;
3772 static FileInfo nested;
3773 if (fileName == NULL) return 0;
3774 if (*mode == 1) {
3775 *mode = 0;
3776 nested.fileName = fileName;
3777 nested.encoding = noEncoding;
3778 nested.status = 0;
3779 nested.lineNumber = 0;
3780 if (!(nested.in = fopen(nested.fileName, "r"))) {
3781 _lou_logMessage(LOU_LOG_ERROR, "Cannot open file '%s'", nested.fileName);
3782 *mode = 1;
3783 return EOF;
3784 }
3785 }
3786 if (nested.in == NULL) {
3787 *mode = 1;
3788 return EOF;
3789 }
3790 ch = getAChar(&nested);
3791 if (ch == EOF) {
3792 fclose(nested.in);
3793 nested.in = NULL;
3794 *mode = 1;
3795 }
3796 return ch;
3797 }
3798
3799 static int
compileString(const char * inString,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)3800 compileString(const char *inString, TranslationTableHeader **table,
3801 DisplayTableHeader **displayTable) {
3802 /* This function can be used to make changes to tables on the fly. */
3803 int k;
3804 FileInfo nested;
3805 if (inString == NULL) return 0;
3806 memset(&nested, 0, sizeof(nested));
3807 nested.fileName = inString;
3808 nested.encoding = noEncoding;
3809 nested.lineNumber = 1;
3810 nested.status = 0;
3811 nested.linepos = 0;
3812 for (k = 0; inString[k]; k++) nested.line[k] = inString[k];
3813 nested.line[k] = 0;
3814 nested.linelen = k;
3815 return compileRule(&nested, NULL, NULL, table, displayTable);
3816 }
3817
3818 static int
setDefaults(TranslationTableHeader * table)3819 setDefaults(TranslationTableHeader *table) {
3820 if (!table->emphRules[emph1Rule][lenPhraseOffset])
3821 table->emphRules[emph1Rule][lenPhraseOffset] = 4;
3822 if (!table->emphRules[emph2Rule][lenPhraseOffset])
3823 table->emphRules[emph2Rule][lenPhraseOffset] = 4;
3824 if (!table->emphRules[emph3Rule][lenPhraseOffset])
3825 table->emphRules[emph3Rule][lenPhraseOffset] = 4;
3826 if (table->numPasses == 0) table->numPasses = 1;
3827 return 1;
3828 }
3829
3830 /* =============== *
3831 * TABLE RESOLVING *
3832 * =============== *
3833 *
3834 * A table resolver is a function that resolves a `tableList` path against a
3835 * `base` path, and returns the resolved table(s) as a list of absolute file
3836 * paths.
3837 *
3838 * The function must have the following signature:
3839 *
3840 * char ** (const char * tableList, const char * base)
3841 *
3842 * In general, `tableList` is a path in the broad sense. The default
3843 * implementation accepts only *file* paths. But another implementation could
3844 * for instance handle URI's. `base` is always a file path however.
3845 *
3846 * The idea is to give other programs that use liblouis the ability to define
3847 * their own table resolver (in C, Java, Python, etc.) when the default
3848 * resolver is not satisfying. (see also lou_registerTableResolver)
3849 *
3850 */
3851
3852 /**
3853 * Resolve a single (sub)table.
3854 *
3855 * Tries to resolve `table` against `base` if base is an absolute path. If
3856 * that fails, searches `searchPath`.
3857 *
3858 */
3859 static char *
resolveSubtable(const char * table,const char * base,const char * searchPath)3860 resolveSubtable(const char *table, const char *base, const char *searchPath) {
3861 char *tableFile;
3862 static struct stat info;
3863
3864 if (table == NULL || table[0] == '\0') return NULL;
3865 tableFile = (char *)malloc(MAXSTRING * sizeof(char) * 2);
3866
3867 //
3868 // First try to resolve against base
3869 //
3870 if (base) {
3871 int k;
3872 strcpy(tableFile, base);
3873 k = (int)strlen(tableFile);
3874 while (k >= 0 && tableFile[k] != '/' && tableFile[k] != '\\') k--;
3875 tableFile[++k] = '\0';
3876 strcat(tableFile, table);
3877 if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3878 _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3879 return tableFile;
3880 }
3881 }
3882
3883 //
3884 // It could be an absolute path, or a path relative to the current working
3885 // directory
3886 //
3887 strcpy(tableFile, table);
3888 if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3889 _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3890 return tableFile;
3891 }
3892
3893 //
3894 // Then search `LOUIS_TABLEPATH`, `dataPath` and `programPath`
3895 //
3896 if (searchPath[0] != '\0') {
3897 char *dir;
3898 int last;
3899 char *cp;
3900 char *searchPath_copy = strdup(searchPath);
3901 for (dir = searchPath_copy;; dir = cp + 1) {
3902 for (cp = dir; *cp != '\0' && *cp != ','; cp++)
3903 ;
3904 last = (*cp == '\0');
3905 *cp = '\0';
3906 if (dir == cp) dir = ".";
3907 sprintf(tableFile, "%s%c%s", dir, DIR_SEP, table);
3908 if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3909 _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3910 free(searchPath_copy);
3911 return tableFile;
3912 }
3913 if (last) break;
3914 sprintf(tableFile, "%s%c%s%c%s%c%s", dir, DIR_SEP, "liblouis", DIR_SEP,
3915 "tables", DIR_SEP, table);
3916 if (stat(tableFile, &info) == 0 && !(info.st_mode & S_IFDIR)) {
3917 _lou_logMessage(LOU_LOG_DEBUG, "found table %s", tableFile);
3918 free(searchPath_copy);
3919 return tableFile;
3920 }
3921 if (last) break;
3922 }
3923 free(searchPath_copy);
3924 }
3925 free(tableFile);
3926 return NULL;
3927 }
3928
3929 char *EXPORT_CALL
_lou_getTablePath(void)3930 _lou_getTablePath(void) {
3931 char searchPath[MAXSTRING];
3932 char *path;
3933 char *cp;
3934 int envset = 0;
3935 cp = searchPath;
3936 path = getenv("LOUIS_TABLEPATH");
3937 if (path != NULL && path[0] != '\0') {
3938 envset = 1;
3939 cp += sprintf(cp, ",%s", path);
3940 }
3941 path = lou_getDataPath();
3942 if (path != NULL && path[0] != '\0')
3943 cp += sprintf(cp, ",%s%c%s%c%s", path, DIR_SEP, "liblouis", DIR_SEP, "tables");
3944 if (!envset) {
3945 #ifdef _WIN32
3946 path = lou_getProgramPath();
3947 if (path != NULL) {
3948 if (path[0] != '\0')
3949 cp += sprintf(cp, ",%s%s", path, "\\share\\liblouis\\tables");
3950 free(path);
3951 }
3952 #else
3953 cp += sprintf(cp, ",%s", TABLESDIR);
3954 #endif
3955 }
3956 if (searchPath[0] != '\0')
3957 return strdup(&searchPath[1]);
3958 else
3959 return strdup(".");
3960 }
3961
3962 /**
3963 * The default table resolver
3964 *
3965 * Tries to resolve tableList against base. The search path is set to
3966 * `LOUIS_TABLEPATH`, `dataPath` and `programPath` (in that order).
3967 *
3968 * @param table A file path, may be absolute or relative. May be a list of
3969 * tables separated by comma's. In that case, the first table
3970 * is used as the base for the other subtables.
3971 * @param base A file path or directory path, or NULL.
3972 * @return The file paths of the resolved subtables, or NULL if the table
3973 * could not be resolved.
3974 *
3975 */
3976 char **EXPORT_CALL
_lou_defaultTableResolver(const char * tableList,const char * base)3977 _lou_defaultTableResolver(const char *tableList, const char *base) {
3978 char *searchPath;
3979 char **tableFiles;
3980 char *subTable;
3981 char *tableList_copy;
3982 char *cp;
3983 int last;
3984 int k;
3985
3986 /* Set up search path */
3987 searchPath = _lou_getTablePath();
3988
3989 /* Count number of subtables in table list */
3990 k = 0;
3991 for (cp = (char *)tableList; *cp != '\0'; cp++)
3992 if (*cp == ',') k++;
3993 tableFiles = (char **)calloc(k + 2, sizeof(char *));
3994 if (!tableFiles) _lou_outOfMemory();
3995
3996 /* Resolve subtables */
3997 k = 0;
3998 tableList_copy = strdup(tableList);
3999 for (subTable = tableList_copy;; subTable = cp + 1) {
4000 for (cp = subTable; *cp != '\0' && *cp != ','; cp++)
4001 ;
4002 last = (*cp == '\0');
4003 *cp = '\0';
4004 if (!(tableFiles[k++] = resolveSubtable(subTable, base, searchPath))) {
4005 char *path;
4006 _lou_logMessage(LOU_LOG_ERROR, "Cannot resolve table '%s'", subTable);
4007 path = getenv("LOUIS_TABLEPATH");
4008 if (path != NULL && path[0] != '\0')
4009 _lou_logMessage(LOU_LOG_ERROR, "LOUIS_TABLEPATH=%s", path);
4010 free(searchPath);
4011 free(tableList_copy);
4012 free_tablefiles(tableFiles);
4013 return NULL;
4014 }
4015 if (k == 1) base = subTable;
4016 if (last) break;
4017 }
4018 free(searchPath);
4019 free(tableList_copy);
4020 tableFiles[k] = NULL;
4021 return tableFiles;
4022 }
4023
4024 static char **(EXPORT_CALL *tableResolver)(
4025 const char *tableList, const char *base) = &_lou_defaultTableResolver;
4026
4027 static char **
copyStringArray(char ** array)4028 copyStringArray(char **array) {
4029 int len;
4030 char **copy;
4031 if (!array) return NULL;
4032 len = 0;
4033 while (array[len]) len++;
4034 copy = malloc((len + 1) * sizeof(char *));
4035 copy[len] = NULL;
4036 while (len) {
4037 len--;
4038 copy[len] = strdup(array[len]);
4039 }
4040 return copy;
4041 }
4042
4043 char **EXPORT_CALL
_lou_resolveTable(const char * tableList,const char * base)4044 _lou_resolveTable(const char *tableList, const char *base) {
4045 char **tableFiles = (*tableResolver)(tableList, base);
4046 char **result = copyStringArray(tableFiles);
4047 if (tableResolver == &_lou_defaultTableResolver) free_tablefiles(tableFiles);
4048 return result;
4049 }
4050
4051 /**
4052 * Register a new table resolver. Overrides the default resolver.
4053 *
4054 * @param resolver The new resolver as a function pointer.
4055 *
4056 */
4057 void EXPORT_CALL
lou_registerTableResolver(char ** (EXPORT_CALL * resolver)(const char * tableList,const char * base))4058 lou_registerTableResolver(
4059 char **(EXPORT_CALL *resolver)(const char *tableList, const char *base)) {
4060 tableResolver = resolver;
4061 }
4062
4063 static int fileCount = 0;
4064
4065 /**
4066 * Compile a single file
4067 *
4068 */
4069 static int
compileFile(const char * fileName,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)4070 compileFile(const char *fileName, TranslationTableHeader **table,
4071 DisplayTableHeader **displayTable) {
4072 FileInfo nested;
4073 fileCount++;
4074 nested.fileName = fileName;
4075 nested.encoding = noEncoding;
4076 nested.status = 0;
4077 nested.lineNumber = 0;
4078 if ((nested.in = fopen(nested.fileName, "rb"))) {
4079 while (_lou_getALine(&nested))
4080 compileRule(&nested, NULL, NULL, table, displayTable);
4081 fclose(nested.in);
4082 return 1;
4083 } else
4084 _lou_logMessage(LOU_LOG_ERROR, "Cannot open table '%s'", nested.fileName);
4085 errorCount++;
4086 return 0;
4087 }
4088
4089 /**
4090 * Free a char** array
4091 */
4092 static void
free_tablefiles(char ** tables)4093 free_tablefiles(char **tables) {
4094 char **table;
4095 if (!tables) return;
4096 for (table = tables; *table; table++) free(*table);
4097 free(tables);
4098 }
4099
4100 /**
4101 * Implement include opcode
4102 *
4103 */
4104 static int
includeFile(FileInfo * nested,CharsString * includedFile,TranslationTableHeader ** table,DisplayTableHeader ** displayTable)4105 includeFile(FileInfo *nested, CharsString *includedFile, TranslationTableHeader **table,
4106 DisplayTableHeader **displayTable) {
4107 int k;
4108 char includeThis[MAXSTRING];
4109 char **tableFiles;
4110 int rv;
4111 for (k = 0; k < includedFile->length; k++)
4112 includeThis[k] = (char)includedFile->chars[k];
4113 if (k >= MAXSTRING) {
4114 compileError(nested, "Include statement too long: 'include %s'", includeThis);
4115 return 0;
4116 }
4117 includeThis[k] = 0;
4118 tableFiles = _lou_resolveTable(includeThis, nested->fileName);
4119 if (tableFiles == NULL) {
4120 errorCount++;
4121 return 0;
4122 }
4123 if (tableFiles[1] != NULL) {
4124 free_tablefiles(tableFiles);
4125 compileError(nested,
4126 "Table list not supported in include statement: 'include %s'",
4127 includeThis);
4128 return 0;
4129 }
4130 rv = compileFile(*tableFiles, table, displayTable);
4131 free_tablefiles(tableFiles);
4132 return rv;
4133 }
4134
4135 /**
4136 * Compile source tables into a table in memory
4137 *
4138 */
4139 static int
compileTable(const char * tableList,const char * displayTableList,TranslationTableHeader ** translationTable,DisplayTableHeader ** displayTable)4140 compileTable(const char *tableList, const char *displayTableList,
4141 TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) {
4142 char **tableFiles;
4143 char **subTable;
4144 if (translationTable && !tableList) return 0;
4145 if (displayTable && !displayTableList) return 0;
4146 if (!translationTable && !displayTable) return 0;
4147 if (translationTable) *translationTable = NULL;
4148 if (displayTable) *displayTable = NULL;
4149 errorCount = warningCount = fileCount = 0;
4150 if (!opcodeLengths[0]) {
4151 TranslationTableOpcode opcode;
4152 for (opcode = 0; opcode < CTO_None; opcode++)
4153 opcodeLengths[opcode] = (short)strlen(opcodeNames[opcode]);
4154 }
4155 if (translationTable) allocateTranslationTable(NULL, translationTable);
4156 if (displayTable) allocateDisplayTable(NULL, displayTable);
4157
4158 if (translationTable) {
4159 (*translationTable)->emphClasses[0] = NULL;
4160 (*translationTable)->characterClasses = NULL;
4161 (*translationTable)->ruleNames = NULL;
4162 }
4163
4164 /* Compile things that are necesary for the proper operation of
4165 * liblouis or liblouisxml or liblouisutdml */
4166 /* TODO: These definitions seem to be necessary for proper functioning of
4167 liblouisutdml. Find a way to satisfy those requirements without hard coding
4168 some characters in every table notably behind the users back */
4169 compileString("space \\xffff 123456789abcdef LOU_ENDSEGMENT", translationTable,
4170 displayTable);
4171
4172 if (displayTable && translationTable && strcmp(tableList, displayTableList) == 0) {
4173 /* Compile the display and translation tables in one go */
4174
4175 /* Compile all subtables in the list */
4176 if (!(tableFiles = _lou_resolveTable(tableList, NULL))) {
4177 errorCount++;
4178 goto cleanup;
4179 }
4180 for (subTable = tableFiles; *subTable; subTable++)
4181 if (!compileFile(*subTable, translationTable, displayTable)) goto cleanup;
4182 } else {
4183 /* Compile the display and translation tables separately */
4184
4185 if (displayTable) {
4186 if (!(tableFiles = _lou_resolveTable(displayTableList, NULL))) {
4187 errorCount++;
4188 goto cleanup;
4189 }
4190 for (subTable = tableFiles; *subTable; subTable++)
4191 if (!compileFile(*subTable, NULL, displayTable)) goto cleanup;
4192 free_tablefiles(tableFiles);
4193 tableFiles = NULL;
4194 }
4195 if (translationTable) {
4196 if (!(tableFiles = _lou_resolveTable(tableList, NULL))) {
4197 errorCount++;
4198 goto cleanup;
4199 }
4200 for (subTable = tableFiles; *subTable; subTable++)
4201 if (!compileFile(*subTable, translationTable, NULL)) goto cleanup;
4202 }
4203 }
4204
4205 /* Clean up after compiling files */
4206 cleanup:
4207 free_tablefiles(tableFiles);
4208 if (warningCount) _lou_logMessage(LOU_LOG_WARN, "%d warnings issued", warningCount);
4209 if (!errorCount) {
4210 if (translationTable) setDefaults(*translationTable);
4211 return 1;
4212 } else {
4213 _lou_logMessage(LOU_LOG_ERROR, "%d errors found.", errorCount);
4214 if (translationTable) {
4215 if (*translationTable) free(*translationTable);
4216 *translationTable = NULL;
4217 }
4218 if (displayTable) {
4219 if (*displayTable) free(*displayTable);
4220 *displayTable = NULL;
4221 }
4222 return 0;
4223 }
4224 }
4225
4226 /* Return the emphasis classes declared in tableList. */
4227 char const **EXPORT_CALL
lou_getEmphClasses(const char * tableList)4228 lou_getEmphClasses(const char *tableList) {
4229 const char *names[MAX_EMPH_CLASSES + 1];
4230 unsigned int count = 0;
4231 const TranslationTableHeader *table = _lou_getTranslationTable(tableList);
4232 if (!table) return NULL;
4233
4234 while (count < MAX_EMPH_CLASSES) {
4235 char const *name = table->emphClasses[count];
4236 if (!name) break;
4237 names[count++] = name;
4238 }
4239 names[count++] = NULL;
4240
4241 {
4242 unsigned int size = count * sizeof(names[0]);
4243 char const **result = malloc(size);
4244 if (!result) return NULL;
4245 /* The void* cast is necessary to stop MSVC from warning about
4246 * different 'const' qualifiers (C4090). */
4247 memcpy((void *)result, names, size);
4248 return result;
4249 }
4250 }
4251
4252 void
4253 getTable(const char *tableList, const char *displayTableList,
4254 TranslationTableHeader **translationTable, DisplayTableHeader **displayTable);
4255
4256 void EXPORT_CALL
_lou_getTable(const char * tableList,const char * displayTableList,const TranslationTableHeader ** translationTable,const DisplayTableHeader ** displayTable)4257 _lou_getTable(const char *tableList, const char *displayTableList,
4258 const TranslationTableHeader **translationTable,
4259 const DisplayTableHeader **displayTable) {
4260 TranslationTableHeader *newTable;
4261 DisplayTableHeader *newDisplayTable;
4262 getTable(tableList, displayTableList, &newTable, &newDisplayTable);
4263 *translationTable = newTable;
4264 *displayTable = newDisplayTable;
4265 }
4266
4267 /* Checks and loads tableList. */
4268 const void *EXPORT_CALL
lou_getTable(const char * tableList)4269 lou_getTable(const char *tableList) {
4270 const TranslationTableHeader *table;
4271 const DisplayTableHeader *displayTable;
4272 _lou_getTable(tableList, tableList, &table, &displayTable);
4273 if (!table || !displayTable) return NULL;
4274 return table;
4275 }
4276
4277 const TranslationTableHeader *EXPORT_CALL
_lou_getTranslationTable(const char * tableList)4278 _lou_getTranslationTable(const char *tableList) {
4279 TranslationTableHeader *table;
4280 getTable(tableList, NULL, &table, NULL);
4281 return table;
4282 }
4283
4284 const DisplayTableHeader *EXPORT_CALL
_lou_getDisplayTable(const char * tableList)4285 _lou_getDisplayTable(const char *tableList) {
4286 DisplayTableHeader *table;
4287 getTable(NULL, tableList, NULL, &table);
4288 return table;
4289 }
4290
4291 void
getTable(const char * translationTableList,const char * displayTableList,TranslationTableHeader ** translationTable,DisplayTableHeader ** displayTable)4292 getTable(const char *translationTableList, const char *displayTableList,
4293 TranslationTableHeader **translationTable, DisplayTableHeader **displayTable) {
4294 /* Keep track of which tables have already been compiled */
4295 int translationTableListLen, displayTableListLen = 0;
4296 if (translationTableList == NULL || *translationTableList == 0)
4297 translationTable = NULL;
4298 if (displayTableList == NULL || *displayTableList == 0) displayTable = NULL;
4299 /* See if translation table has already been compiled */
4300 if (translationTable) {
4301 translationTableListLen = (int)strlen(translationTableList);
4302 *translationTable = NULL;
4303 TranslationTableChainEntry *currentEntry = translationTableChain;
4304 TranslationTableChainEntry *prevEntry = NULL;
4305 while (currentEntry != NULL) {
4306 if (translationTableListLen == currentEntry->tableListLength &&
4307 (memcmp(¤tEntry->tableList[0], translationTableList,
4308 translationTableListLen)) == 0) {
4309 /* Move the table to the top of the table chain. */
4310 if (prevEntry != NULL) {
4311 prevEntry->next = currentEntry->next;
4312 currentEntry->next = translationTableChain;
4313 translationTableChain = currentEntry;
4314 }
4315 *translationTable = currentEntry->table;
4316 break;
4317 }
4318 prevEntry = currentEntry;
4319 currentEntry = currentEntry->next;
4320 }
4321 }
4322 /* See if display table has already been compiled */
4323 if (displayTable) {
4324 displayTableListLen = (int)strlen(displayTableList);
4325 *displayTable = NULL;
4326 DisplayTableChainEntry *currentEntry = displayTableChain;
4327 DisplayTableChainEntry *prevEntry = NULL;
4328 while (currentEntry != NULL) {
4329 if (displayTableListLen == currentEntry->tableListLength &&
4330 (memcmp(¤tEntry->tableList[0], displayTableList,
4331 displayTableListLen)) == 0) {
4332 /* Move the table to the top of the table chain. */
4333 if (prevEntry != NULL) {
4334 prevEntry->next = currentEntry->next;
4335 currentEntry->next = displayTableChain;
4336 displayTableChain = currentEntry;
4337 }
4338 *displayTable = currentEntry->table;
4339 break;
4340 }
4341 prevEntry = currentEntry;
4342 currentEntry = currentEntry->next;
4343 }
4344 }
4345 if ((translationTable && *translationTable == NULL) ||
4346 (displayTable && *displayTable == NULL)) {
4347 TranslationTableHeader *newTranslationTable = NULL;
4348 DisplayTableHeader *newDisplayTable = NULL;
4349 if (compileTable(translationTableList, displayTableList,
4350 (translationTable && *translationTable == NULL) ? &newTranslationTable
4351 : NULL,
4352 (displayTable && *displayTable == NULL) ? &newDisplayTable : NULL)) {
4353 /* Add a new entry to the top of the table chain. */
4354 if (newTranslationTable != NULL) {
4355 int entrySize =
4356 sizeof(TranslationTableChainEntry) + translationTableListLen;
4357 TranslationTableChainEntry *newEntry = malloc(entrySize);
4358 if (!newEntry) _lou_outOfMemory();
4359 newEntry->next = translationTableChain;
4360 newEntry->table = newTranslationTable;
4361 newEntry->tableListLength = translationTableListLen;
4362 memcpy(&newEntry->tableList[0], translationTableList,
4363 translationTableListLen);
4364 translationTableChain = newEntry;
4365 *translationTable = newTranslationTable;
4366 }
4367 if (newDisplayTable != NULL) {
4368 int entrySize = sizeof(DisplayTableChainEntry) + displayTableListLen;
4369 DisplayTableChainEntry *newEntry = malloc(entrySize);
4370 if (!newEntry) _lou_outOfMemory();
4371 newEntry->next = displayTableChain;
4372 newEntry->table = newDisplayTable;
4373 newEntry->tableListLength = displayTableListLen;
4374 memcpy(&newEntry->tableList[0], displayTableList, displayTableListLen);
4375 displayTableChain = newEntry;
4376 *displayTable = newDisplayTable;
4377 }
4378 } else {
4379 _lou_logMessage(
4380 LOU_LOG_ERROR, "%s could not be compiled", translationTableList);
4381 return;
4382 }
4383 }
4384 }
4385
4386 int EXPORT_CALL
lou_checkTable(const char * tableList)4387 lou_checkTable(const char *tableList) {
4388 if (lou_getTable(tableList)) return 1;
4389 return 0;
4390 }
4391
4392 formtype EXPORT_CALL
lou_getTypeformForEmphClass(const char * tableList,const char * emphClass)4393 lou_getTypeformForEmphClass(const char *tableList, const char *emphClass) {
4394 int i;
4395 const TranslationTableHeader *table = _lou_getTranslationTable(tableList);
4396 if (!table) return 0;
4397 for (i = 0; table->emphClasses[i]; i++)
4398 if (strcmp(emphClass, table->emphClasses[i]) == 0) return italic << i;
4399 return 0;
4400 }
4401
4402 static unsigned char *destSpacing = NULL;
4403 static int sizeDestSpacing = 0;
4404 static formtype *typebuf = NULL;
4405 static unsigned int *wordBuffer = NULL;
4406 static EmphasisInfo *emphasisBuffer = NULL;
4407 static int sizeTypebuf = 0;
4408 static widechar *passbuf[MAXPASSBUF] = { NULL };
4409 static int sizePassbuf[MAXPASSBUF] = { 0 };
4410 static int *posMapping1 = NULL;
4411 static int sizePosMapping1 = 0;
4412 static int *posMapping2 = NULL;
4413 static int sizePosMapping2 = 0;
4414 static int *posMapping3 = NULL;
4415 static int sizePosMapping3 = 0;
4416 void *EXPORT_CALL
_lou_allocMem(AllocBuf buffer,int index,int srcmax,int destmax)4417 _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax) {
4418 if (srcmax < 1024) srcmax = 1024;
4419 if (destmax < 1024) destmax = 1024;
4420 switch (buffer) {
4421 case alloc_typebuf:
4422 if (destmax > sizeTypebuf) {
4423 if (typebuf != NULL) free(typebuf);
4424 // TODO: should this be srcmax?
4425 typebuf = malloc((destmax + 4) * sizeof(formtype));
4426 if (!typebuf) _lou_outOfMemory();
4427 sizeTypebuf = destmax;
4428 }
4429 return typebuf;
4430
4431 case alloc_wordBuffer:
4432
4433 if (wordBuffer != NULL) free(wordBuffer);
4434 wordBuffer = malloc((srcmax + 4) * sizeof(unsigned int));
4435 if (!wordBuffer) _lou_outOfMemory();
4436 return wordBuffer;
4437
4438 case alloc_emphasisBuffer:
4439
4440 if (emphasisBuffer != NULL) free(emphasisBuffer);
4441 emphasisBuffer = malloc((srcmax + 4) * sizeof(EmphasisInfo));
4442 if (!emphasisBuffer) _lou_outOfMemory();
4443 return emphasisBuffer;
4444
4445 case alloc_destSpacing:
4446 if (destmax > sizeDestSpacing) {
4447 if (destSpacing != NULL) free(destSpacing);
4448 destSpacing = malloc(destmax + 4);
4449 if (!destSpacing) _lou_outOfMemory();
4450 sizeDestSpacing = destmax;
4451 }
4452 return destSpacing;
4453 case alloc_passbuf:
4454 if (index < 0 || index >= MAXPASSBUF) {
4455 _lou_logMessage(LOU_LOG_FATAL, "Index out of bounds: %d\n", index);
4456 exit(3);
4457 }
4458 if (destmax > sizePassbuf[index]) {
4459 if (passbuf[index] != NULL) free(passbuf[index]);
4460 passbuf[index] = malloc((destmax + 4) * CHARSIZE);
4461 if (!passbuf[index]) _lou_outOfMemory();
4462 sizePassbuf[index] = destmax;
4463 }
4464 return passbuf[index];
4465 case alloc_posMapping1: {
4466 int mapSize;
4467 if (srcmax >= destmax)
4468 mapSize = srcmax;
4469 else
4470 mapSize = destmax;
4471 if (mapSize > sizePosMapping1) {
4472 if (posMapping1 != NULL) free(posMapping1);
4473 posMapping1 = malloc((mapSize + 4) * sizeof(int));
4474 if (!posMapping1) _lou_outOfMemory();
4475 sizePosMapping1 = mapSize;
4476 }
4477 }
4478 return posMapping1;
4479 case alloc_posMapping2: {
4480 int mapSize;
4481 if (srcmax >= destmax)
4482 mapSize = srcmax;
4483 else
4484 mapSize = destmax;
4485 if (mapSize > sizePosMapping2) {
4486 if (posMapping2 != NULL) free(posMapping2);
4487 posMapping2 = malloc((mapSize + 4) * sizeof(int));
4488 if (!posMapping2) _lou_outOfMemory();
4489 sizePosMapping2 = mapSize;
4490 }
4491 }
4492 return posMapping2;
4493 case alloc_posMapping3: {
4494 int mapSize;
4495 if (srcmax >= destmax)
4496 mapSize = srcmax;
4497 else
4498 mapSize = destmax;
4499 if (mapSize > sizePosMapping3) {
4500 if (posMapping3 != NULL) free(posMapping3);
4501 posMapping3 = malloc((mapSize + 4) * sizeof(int));
4502 if (!posMapping3) _lou_outOfMemory();
4503 sizePosMapping3 = mapSize;
4504 }
4505 }
4506 return posMapping3;
4507 default:
4508 return NULL;
4509 }
4510 }
4511
4512 void EXPORT_CALL
lou_free(void)4513 lou_free(void) {
4514 TranslationTableChainEntry *currentEntry;
4515 TranslationTableChainEntry *previousEntry;
4516 lou_logEnd();
4517 if (translationTableChain != NULL) {
4518 currentEntry = translationTableChain;
4519 while (currentEntry) {
4520 int i;
4521 TranslationTableHeader *t = (TranslationTableHeader *)currentEntry->table;
4522 for (i = 0; t->emphClasses[i]; i++) free(t->emphClasses[i]);
4523 if (t->characterClasses) deallocateCharacterClasses(t);
4524 if (t->ruleNames) deallocateRuleNames(t);
4525 free(t);
4526 previousEntry = currentEntry;
4527 currentEntry = currentEntry->next;
4528 free(previousEntry);
4529 }
4530 translationTableChain = NULL;
4531 }
4532 if (typebuf != NULL) free(typebuf);
4533 typebuf = NULL;
4534 if (wordBuffer != NULL) free(wordBuffer);
4535 wordBuffer = NULL;
4536 if (emphasisBuffer != NULL) free(emphasisBuffer);
4537 emphasisBuffer = NULL;
4538 sizeTypebuf = 0;
4539 if (destSpacing != NULL) free(destSpacing);
4540 destSpacing = NULL;
4541 sizeDestSpacing = 0;
4542 {
4543 int k;
4544 for (k = 0; k < MAXPASSBUF; k++) {
4545 if (passbuf[k] != NULL) free(passbuf[k]);
4546 passbuf[k] = NULL;
4547 sizePassbuf[k] = 0;
4548 }
4549 }
4550 if (posMapping1 != NULL) free(posMapping1);
4551 posMapping1 = NULL;
4552 sizePosMapping1 = 0;
4553 if (posMapping2 != NULL) free(posMapping2);
4554 posMapping2 = NULL;
4555 sizePosMapping2 = 0;
4556 if (posMapping3 != NULL) free(posMapping3);
4557 posMapping3 = NULL;
4558 sizePosMapping3 = 0;
4559 opcodeLengths[0] = 0;
4560 }
4561
4562 const char *EXPORT_CALL
lou_version(void)4563 lou_version(void) {
4564 static const char *version = PACKAGE_VERSION;
4565 return version;
4566 }
4567
4568 int EXPORT_CALL
lou_charSize(void)4569 lou_charSize(void) {
4570 return CHARSIZE;
4571 }
4572
4573 int EXPORT_CALL
lou_compileString(const char * tableList,const char * inString)4574 lou_compileString(const char *tableList, const char *inString) {
4575 TranslationTableHeader *table;
4576 DisplayTableHeader *displayTable;
4577 getTable(tableList, tableList, &table, &displayTable);
4578 if (!table) return 0;
4579 if (!compileString(inString, &table, &displayTable)) return 0;
4580 return 1;
4581 }
4582
4583 int EXPORT_CALL
_lou_compileTranslationRule(const char * tableList,const char * inString)4584 _lou_compileTranslationRule(const char *tableList, const char *inString) {
4585 TranslationTableHeader *table;
4586 getTable(tableList, NULL, &table, NULL);
4587 return compileString(inString, &table, NULL);
4588 }
4589
4590 int EXPORT_CALL
_lou_compileDisplayRule(const char * tableList,const char * inString)4591 _lou_compileDisplayRule(const char *tableList, const char *inString) {
4592 DisplayTableHeader *table;
4593 getTable(NULL, tableList, NULL, &table);
4594 return compileString(inString, NULL, &table);
4595 }
4596
4597 /**
4598 * This procedure provides a target for cals that serve as breakpoints
4599 * for gdb.
4600 */
4601 // char *EXPORT_CALL
4602 // lou_getTablePaths (void)
4603 // {
4604 // static char paths[MAXSTRING];
4605 // static char scratchBuf[MAXSTRING];
4606 // char *pathList;
4607 // strcpy (paths, tablePath);
4608 // strcat (paths, ",");
4609 // pathList = getenv ("LOUIS_TABLEPATH");
4610 // if (pathList)
4611 // {
4612 // strcat (paths, pathList);
4613 // strcat (paths, ",");
4614 // }
4615 // pathList = getcwd (scratchBuf, MAXSTRING);
4616 // if (pathList)
4617 // {
4618 // strcat (paths, pathList);
4619 // strcat (paths, ",");
4620 // }
4621 // pathList = lou_getDataPath ();
4622 // if (pathList)
4623 // {
4624 // strcat (paths, pathList);
4625 // strcat (paths, ",");
4626 // }
4627 // #ifdef _WIN32
4628 // strcpy (paths, lou_getProgramPath ());
4629 // strcat (paths, "\\share\\liblouss\\tables\\");
4630 // #else
4631 // strcpy (paths, TABLESDIR);
4632 // #endif
4633 // return paths;
4634 // }
4635