1 /* liblouis Braille Translation and Back-Translation Library
2
3 Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4 BRLTTY Team
5
6 Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7 Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8 Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9 Copyright (C) 2016 Davy Kager, Dedicon
10
11 This file is part of liblouis.
12
13 liblouis is free software: you can redistribute it and/or modify it
14 under the terms of the GNU Lesser General Public License as published
15 by the Free Software Foundation, either version 2.1 of the License, or
16 (at your option) any later version.
17
18 liblouis is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27 /**
28 * @file
29 * @brief Internal API of liblouis
30 */
31
32 #ifndef __LOUIS_H_
33 #define __LOUIS_H_
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif /* __cplusplus */
38
39 #include <stdio.h>
40 #include "liblouis.h"
41
42 /* Unlike Windows, Mingw can handle forward slashes as directory
43 separator, see http://mingw.org/wiki/Posix_path_conversion */
44 #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__MINGW64__)
45 #define PATH_SEP ';'
46 #define DIR_SEP '\\'
47 #else
48 #define PATH_SEP ':'
49 #define DIR_SEP '/'
50 #endif
51
52 #ifdef _MSC_VER
53 #define strcasecmp _stricmp
54 #endif
55
56 #define NUMVAR 50
57 #define EMPHMODECHARSSIZE 256
58 #define NOEMPHCHARSSIZE 256
59 #define LETSIGNSIZE 256
60 // noletsignbefore and noletsignafter is hardly ever used and usually
61 // only with very few chars, so it only needs a small array
62 #define LETSIGNBEFORESIZE 64
63 #define LETSIGNAFTERSIZE 64
64 #define SEQPATTERNSIZE 128
65 #define CHARSIZE sizeof(widechar)
66 #define DEFAULTRULESIZE 50
67
68 typedef struct intCharTupple {
69 unsigned long long key;
70 char value;
71 } intCharTupple;
72
73 /* HASHNUM must be prime */
74 #define HASHNUM 1123
75
76 #define MAXPASS 4
77 #define MAXSTRING 2048
78 #define MAX_MACRO_VAR 100 // maximal number of variable substitutions a macro can contain
79 #define MAX_EMPH_CLASSES 10 // maximal number of emphasis classes
80 #define MAX_MODES 6 // maximal number of modes that can be handled
81 #define MAX_SOURCE_FILES 100 // maximal number of files a table can consist of
82
83 typedef unsigned int TranslationTableOffset;
84
85 /* Basic type for translation table data, which carries all alignment
86 * constraints that fields contained in translation table may have.
87 * Notably TranslationTableCharacterAttributes is unsigned long long, so we need
88 * at least this big basic type. */
89 typedef unsigned long long TranslationTableData;
90 #define OFFSETSIZE sizeof(TranslationTableData)
91
92 typedef enum {
93 /* The first 8 are the predefined character classes. They need to be listed first and
94 in this order because of how allocateCharacterClasses works. */
95 CTC_Space = 0x1,
96 CTC_Letter = 0x2,
97 CTC_Digit = 0x4,
98 CTC_Punctuation = 0x8,
99 CTC_UpperCase = 0x10,
100 CTC_LowerCase = 0x20,
101 CTC_Math = 0x40,
102 CTC_Sign = 0x80,
103 CTC_LitDigit = 0x100,
104 CTC_CapsMode = 0x200,
105 // bit 0x400 used to be taken by CTC_EmphMode
106 CTC_NumericMode = 0x800,
107 CTC_NumericNoContract = 0x1000,
108 CTC_SeqDelimiter = 0x2000,
109 CTC_SeqBefore = 0x4000,
110 CTC_SeqAfter = 0x8000,
111 /* The following 8 are reserved for %0 to %7 (in no particular order) */
112 /* Be careful with changing these values (and also CTC_EndOfInput) because in
113 pattern_compile_expression they are stored in a unsigned int after cutting of the
114 16 least significant bits. */
115 CTC_UserDefined1 = 0x10000,
116 CTC_UserDefined2 = 0x20000,
117 CTC_UserDefined3 = 0x40000,
118 CTC_UserDefined4 = 0x80000,
119 CTC_UserDefined5 = 0x100000,
120 CTC_UserDefined6 = 0x200000,
121 CTC_UserDefined7 = 0x400000,
122 CTC_UserDefined8 = 0x800000,
123 CTC_EndOfInput = 0x1000000, // only used by pattern matcher
124 CTC_EmpMatch = 0x2000000, // only used in TranslationTableRule->before and
125 // TranslationTableRule->after
126 CTC_MidEndNumericMode = 0x4000000,
127 /* At least 37 more bits available in a unsigned long long (at least 64 bits). Used
128 for custom attributes 9 to 45. These need to be the last values of the enum. */
129 CTC_UserDefined9 = 0x8000000,
130 CTC_UserDefined10 = 0x10000000,
131 CTC_UserDefined11 = 0x20000000,
132 CTC_UserDefined12 = 0x40000000,
133 } TranslationTableCharacterAttribute;
134
135 typedef enum {
136 pass_first = '`',
137 pass_last = '~',
138 pass_lookback = '_',
139 pass_string = '\"',
140 pass_dots = '@',
141 pass_omit = '?',
142 pass_startReplace = '[',
143 pass_endReplace = ']',
144 pass_startGroup = '{',
145 pass_endGroup = '}',
146 pass_variable = '#',
147 pass_not = '!',
148 pass_search = '/',
149 pass_any = 'a',
150 pass_digit = 'd',
151 pass_litDigit = 'D',
152 pass_letter = 'l',
153 pass_math = 'm',
154 pass_punctuation = 'p',
155 pass_sign = 'S',
156 pass_space = 's',
157 pass_uppercase = 'U',
158 pass_lowercase = 'u',
159 pass_class1 = 'w',
160 pass_class2 = 'x',
161 pass_class3 = 'y',
162 pass_class4 = 'z',
163 pass_attributes = '$',
164 pass_groupstart = '{',
165 pass_groupend = '}',
166 pass_groupreplace = ';',
167 pass_swap = '%',
168 pass_hyphen = '-',
169 pass_until = '.',
170 pass_eq = '=',
171 pass_lt = '<',
172 pass_gt = '>',
173 pass_endTest = 32,
174 pass_plus = '+',
175 pass_copy = '*',
176 pass_leftParen = '(',
177 pass_rightParen = ')',
178 pass_comma = ',',
179 pass_lteq = 130,
180 pass_gteq = 131,
181 pass_invalidToken = 132,
182 pass_noteq = 133,
183 pass_and = 134,
184 pass_or = 135,
185 pass_nameFound = 136,
186 pass_numberFound = 137,
187 pass_boolean = 138,
188 pass_class = 139,
189 pass_define = 140,
190 pass_emphasis = 141,
191 pass_group = 142,
192 pass_mark = 143,
193 pass_repGroup = 143,
194 pass_script = 144,
195 pass_noMoreTokens = 145,
196 pass_replace = 146,
197 pass_if = 147,
198 pass_then = 148,
199 pass_all = 255
200 } pass_Codes;
201
202 typedef unsigned long long TranslationTableCharacterAttributes;
203
204 typedef struct {
205 TranslationTableOffset next;
206 widechar lookFor;
207 widechar found;
208 } CharDotsMapping;
209
210 typedef struct {
211 const char *sourceFile;
212 int sourceLine;
213 TranslationTableOffset next;
214 TranslationTableOffset definitionRule;
215 TranslationTableOffset otherRules;
216 TranslationTableCharacterAttributes attributes;
217 TranslationTableCharacterAttributes mode;
218 TranslationTableOffset compRule;
219 widechar value;
220 TranslationTableOffset basechar;
221 TranslationTableOffset linked;
222 } TranslationTableCharacter;
223
224 typedef enum { /* Op codes */
225 CTO_IncludeFile,
226 CTO_Locale, /* Deprecated, do not use */
227 CTO_Undefined,
228 /* Do not change the order of the following opcodes! */
229 CTO_CapsLetter,
230 CTO_BegCapsWord,
231 CTO_EndCapsWord,
232 CTO_BegCaps,
233 CTO_EndCaps,
234 CTO_BegCapsPhrase,
235 CTO_EndCapsPhrase,
236 CTO_LenCapsPhrase,
237 CTO_ModeLetter,
238 CTO_BegModeWord,
239 CTO_EndModeWord,
240 CTO_BegMode,
241 CTO_EndMode,
242 CTO_BegModePhrase,
243 CTO_EndModePhrase,
244 CTO_LenModePhrase,
245 /* End of ordered opcodes */
246 CTO_LetterSign,
247 CTO_NoLetsignBefore,
248 CTO_NoLetsign,
249 CTO_NoLetsignAfter,
250 CTO_NumberSign,
251 CTO_NumericModeChars,
252 CTO_MidEndNumericModeChars,
253 CTO_NumericNoContractChars,
254 CTO_SeqDelimiter,
255 CTO_SeqBeforeChars,
256 CTO_SeqAfterChars,
257 CTO_SeqAfterPattern,
258 CTO_SeqAfterExpression,
259 CTO_EmphClass,
260
261 /* Do not change the order of the following opcodes! */
262 CTO_EmphLetter,
263 CTO_BegEmphWord,
264 CTO_EndEmphWord,
265 CTO_BegEmph,
266 CTO_EndEmph,
267 CTO_BegEmphPhrase,
268 CTO_EndEmphPhrase,
269 CTO_LenEmphPhrase,
270 /* End of ordered opcodes */
271
272 CTO_CapsModeChars,
273 CTO_EmphModeChars,
274 CTO_NoEmphChars,
275 CTO_BegComp,
276 CTO_EndComp,
277 CTO_NoContractSign,
278 CTO_MultInd,
279 CTO_CompDots,
280 CTO_Comp6,
281 CTO_Class, /* define a character class */
282 CTO_After, /* only match if after character in class */
283 CTO_Before, /* only match if before character in class 30 */
284 CTO_NoBack,
285 CTO_NoFor,
286 CTO_EmpMatchBefore,
287 CTO_EmpMatchAfter,
288 CTO_SwapCc,
289 CTO_SwapCd,
290 CTO_SwapDd,
291 CTO_Space,
292 CTO_Digit,
293 CTO_Punctuation,
294 CTO_Math,
295 CTO_Sign,
296 CTO_Letter,
297 CTO_UpperCase,
298 CTO_LowerCase,
299 CTO_Grouping,
300 CTO_UpLow,
301 CTO_LitDigit,
302 CTO_Display,
303 CTO_Replace,
304 CTO_Context,
305 CTO_Correct,
306 CTO_Pass2,
307 CTO_Pass3,
308 CTO_Pass4,
309 CTO_Repeated,
310 CTO_RepWord,
311 CTO_RepEndWord,
312 CTO_CapsNoCont,
313 CTO_Always,
314 CTO_ExactDots,
315 CTO_NoCross,
316 CTO_Syllable,
317 CTO_NoCont,
318 CTO_CompBrl,
319 CTO_Literal,
320 CTO_LargeSign,
321 CTO_WholeWord,
322 CTO_PartWord,
323 CTO_JoinNum,
324 CTO_JoinableWord,
325 CTO_LowWord,
326 CTO_Contraction,
327 CTO_SuffixableWord, /** whole word or beginning of word */
328 CTO_PrefixableWord, /** whole word or end of word */
329 CTO_BegWord, /** beginning of word only */
330 CTO_BegMidWord, /** beginning or middle of word */
331 CTO_MidWord, /** middle of word only 20 */
332 CTO_MidEndWord, /** middle or end of word */
333 CTO_EndWord, /** end of word only */
334 CTO_PrePunc, /** punctuation in string at beginning of word */
335 CTO_PostPunc, /** punctuation in string at end of word */
336 CTO_BegNum, /** beginning of number */
337 CTO_MidNum, /** middle of number, e.g., decimal point */
338 CTO_EndNum, /** end of number */
339 CTO_DecPoint,
340 CTO_Hyphen,
341 // CTO_Apostrophe,
342 // CTO_Initial,
343 CTO_NoBreak,
344 CTO_Match,
345 CTO_BackMatch,
346 CTO_Attribute,
347 CTO_Base,
348 CTO_Macro,
349 CTO_None,
350
351 /* More internal opcodes */
352 CTO_LetterRule,
353 CTO_NumberRule,
354 CTO_NoContractRule,
355
356 /* Start of (16 x 8) internal opcodes values
357 * Do not change the order of the following opcodes! */
358 CTO_CapsLetterRule,
359 CTO_BegCapsWordRule,
360 CTO_EndCapsWordRule,
361 CTO_BegCapsRule,
362 CTO_EndCapsRule,
363 CTO_BegCapsPhraseRule,
364 CTO_EndCapsPhraseBeforeRule,
365 CTO_EndCapsPhraseAfterRule,
366 CTO_Mode2LetterRule,
367 CTO_BegMode2WordRule,
368 CTO_EndMode2WordRule,
369 CTO_BegMode2Rule,
370 CTO_EndMode2Rule,
371 CTO_BegMode2PhraseRule,
372 CTO_EndMode2PhraseBeforeRule,
373 CTO_EndMode2PhraseAfterRule,
374 CTO_Mode3LetterRule,
375 CTO_BegMode3WordRule,
376 CTO_EndMode3WordRule,
377 CTO_BegMode3Rule,
378 CTO_EndMode3Rule,
379 CTO_BegMode3PhraseRule,
380 CTO_EndMode3PhraseBeforeRule,
381 CTO_EndMode3PhraseAfterRule,
382 CTO_Mode4LetterRule,
383 CTO_BegMode4WordRule,
384 CTO_EndMode4WordRule,
385 CTO_BegMode4Rule,
386 CTO_EndMode4Rule,
387 CTO_BegMode4PhraseRule,
388 CTO_EndMode4PhraseBeforeRule,
389 CTO_EndMode4PhraseAfterRule,
390 CTO_Mode5LetterRule,
391 CTO_BegMode5WordRule,
392 CTO_EndMode5WordRule,
393 CTO_BegMode5Rule,
394 CTO_EndMode5Rule,
395 CTO_BegMode5PhraseRule,
396 CTO_EndMode5PhraseBeforeRule,
397 CTO_EndMode5PhraseAfterRule,
398 CTO_Mode6LetterRule,
399 CTO_BegMode6WordRule,
400 CTO_EndMode6WordRule,
401 CTO_BegMode6Rule,
402 CTO_EndMode6Rule,
403 CTO_BegMode6PhraseRule,
404 CTO_EndMode6PhraseBeforeRule,
405 CTO_EndMode6PhraseAfterRule,
406 CTO_Emph1LetterRule,
407 CTO_BegEmph1WordRule,
408 CTO_EndEmph1WordRule,
409 CTO_BegEmph1Rule,
410 CTO_EndEmph1Rule,
411 CTO_BegEmph1PhraseRule,
412 CTO_EndEmph1PhraseBeforeRule,
413 CTO_EndEmph1PhraseAfterRule,
414 CTO_Emph2LetterRule,
415 CTO_BegEmph2WordRule,
416 CTO_EndEmph2WordRule,
417 CTO_BegEmph2Rule,
418 CTO_EndEmph2Rule,
419 CTO_BegEmph2PhraseRule,
420 CTO_EndEmph2PhraseBeforeRule,
421 CTO_EndEmph2PhraseAfterRule,
422 CTO_Emph3LetterRule,
423 CTO_BegEmph3WordRule,
424 CTO_EndEmph3WordRule,
425 CTO_BegEmph3Rule,
426 CTO_EndEmph3Rule,
427 CTO_BegEmph3PhraseRule,
428 CTO_EndEmph3PhraseBeforeRule,
429 CTO_EndEmph3PhraseAfterRule,
430 CTO_Emph4LetterRule,
431 CTO_BegEmph4WordRule,
432 CTO_EndEmph4WordRule,
433 CTO_BegEmph4Rule,
434 CTO_EndEmph4Rule,
435 CTO_BegEmph4PhraseRule,
436 CTO_EndEmph4PhraseBeforeRule,
437 CTO_EndEmph4PhraseAfterRule,
438 CTO_Emph5LetterRule,
439 CTO_BegEmph5WordRule,
440 CTO_EndEmph5WordRule,
441 CTO_BegEmph5Rule,
442 CTO_EndEmph5Rule,
443 CTO_BegEmph5PhraseRule,
444 CTO_EndEmph5PhraseBeforeRule,
445 CTO_EndEmph5PhraseAfterRule,
446 CTO_Emph6LetterRule,
447 CTO_BegEmph6WordRule,
448 CTO_EndEmph6WordRule,
449 CTO_BegEmph6Rule,
450 CTO_EndEmph6Rule,
451 CTO_BegEmph6PhraseRule,
452 CTO_EndEmph6PhraseBeforeRule,
453 CTO_EndEmph6PhraseAfterRule,
454 CTO_Emph7LetterRule,
455 CTO_BegEmph7WordRule,
456 CTO_EndEmph7WordRule,
457 CTO_BegEmph7Rule,
458 CTO_EndEmph7Rule,
459 CTO_BegEmph7PhraseRule,
460 CTO_EndEmph7PhraseBeforeRule,
461 CTO_EndEmph7PhraseAfterRule,
462 CTO_Emph8LetterRule,
463 CTO_BegEmph8WordRule,
464 CTO_EndEmph8WordRule,
465 CTO_BegEmph8Rule,
466 CTO_EndEmph8Rule,
467 CTO_BegEmph8PhraseRule,
468 CTO_EndEmph8PhraseBeforeRule,
469 CTO_EndEmph8PhraseAfterRule,
470 CTO_Emph9LetterRule,
471 CTO_BegEmph9WordRule,
472 CTO_EndEmph9WordRule,
473 CTO_BegEmph9Rule,
474 CTO_EndEmph9Rule,
475 CTO_BegEmph9PhraseRule,
476 CTO_EndEmph9PhraseBeforeRule,
477 CTO_EndEmph9PhraseAfterRule,
478 CTO_Emph10LetterRule,
479 CTO_BegEmph10WordRule,
480 CTO_EndEmph10WordRule,
481 CTO_BegEmph10Rule,
482 CTO_EndEmph10Rule,
483 CTO_BegEmph10PhraseRule,
484 CTO_EndEmph10PhraseBeforeRule,
485 CTO_EndEmph10PhraseAfterRule,
486 /* End of ordered (16 x 8) internal opcodes */
487
488 CTO_BegCompRule,
489 CTO_EndCompRule,
490 CTO_CapsNoContRule,
491 CTO_All
492 } TranslationTableOpcode;
493
494 typedef struct {
495 const char *sourceFile;
496 int sourceLine;
497 TranslationTableOffset charsnext; /** next chars entry */
498 TranslationTableOffset dotsnext; /** next dots entry */
499 TranslationTableCharacterAttributes after; /** character types which must follow */
500 TranslationTableCharacterAttributes before; /** character types which must precede */
501 TranslationTableOffset patterns; /** before and after patterns */
502 TranslationTableOpcode opcode; /** rule for testing validity of replacement */
503 char nocross;
504 short charslen; /** length of string to be replaced */
505 short dotslen; /** length of replacement string */
506 widechar charsdots[DEFAULTRULESIZE]; /** find and replacement strings */
507 } TranslationTableRule;
508
509 typedef struct /* state transition */
510 {
511 widechar ch;
512 widechar newState;
513 } HyphenationTrans;
514
515 typedef union {
516 HyphenationTrans *pointer;
517 TranslationTableOffset offset;
518 } PointOff;
519
520 typedef struct /* one state */
521 {
522 PointOff trans;
523 TranslationTableOffset hyphenPattern;
524 widechar fallbackState;
525 widechar numTrans;
526 } HyphenationState;
527
528 typedef struct CharacterClass {
529 struct CharacterClass *next;
530 TranslationTableCharacterAttributes attribute;
531 widechar length;
532 widechar name[1];
533 } CharacterClass;
534
535 typedef struct RuleName {
536 struct RuleName *next;
537 TranslationTableOffset ruleOffset;
538 widechar length;
539 widechar name[1];
540 } RuleName;
541
542 typedef struct {
543 /* either typeform or mode should be set, not both */
544 formtype typeform; /* corresponding value in "typeforms" enum */
545 TranslationTableCharacterAttributes mode; /* corresponding character attribute */
546 unsigned int value; /* bit field that contains a single "1" */
547 unsigned short
548 rule; /* emphasis rules (index in emphRules, emphModeChars and noEmphChars) */
549 } EmphasisClass;
550
551 typedef struct {
552 TranslationTableOffset tableSize;
553 TranslationTableOffset bytesUsed;
554 TranslationTableOffset charToDots[HASHNUM];
555 TranslationTableOffset dotsToChar[HASHNUM];
556 TranslationTableData ruleArea[1]; /** Space for storing all rules and values */
557 } DisplayTableHeader;
558
559 /**
560 * Translation table header
561 */
562 typedef struct { /* translation table */
563
564 /* state needed during compilation */
565 TranslationTableOffset tableSize;
566 TranslationTableOffset bytesUsed;
567 CharacterClass *characterClasses;
568 TranslationTableCharacterAttributes nextCharacterClassAttribute;
569 TranslationTableCharacterAttributes nextNumberedCharacterClassAttribute;
570 RuleName *ruleNames;
571 TranslationTableCharacterAttributes
572 numberedAttributes[8]; /* attributes 0-7 used in match rules (could also be
573 stored in `characterClasses', but this is slightly
574 faster) */
575 int usesAttributeOrClass; /* 1 = attribute, 2 = class */
576 char *sourceFiles[MAX_SOURCE_FILES + 1];
577
578 /* needed for translation or other api functions */
579 int finalized;
580 int capsNoCont;
581 int numPasses;
582 int corrections;
583 int syllables;
584 int usesSequences;
585 int usesNumericMode;
586 int hasCapsModeChars;
587 TranslationTableOffset undefined;
588 TranslationTableOffset letterSign;
589 TranslationTableOffset numberSign;
590 TranslationTableOffset noContractSign;
591 widechar seqPatterns[SEQPATTERNSIZE];
592 char *emphClassNames[MAX_EMPH_CLASSES];
593 EmphasisClass emphClasses[MAX_EMPH_CLASSES];
594 EmphasisClass modes[MAX_MODES];
595 int seqPatternsCount;
596 widechar seqAfterExpression[SEQPATTERNSIZE];
597 int seqAfterExpressionLength;
598 TranslationTableOffset emphRules[MAX_EMPH_CLASSES + MAX_MODES]
599 [9]; /* 9 is the size of the EmphCodeOffset enum */
600 TranslationTableOffset begComp;
601 TranslationTableOffset endComp;
602 TranslationTableOffset hyphenStatesArray;
603 widechar noLetsignBefore[LETSIGNBEFORESIZE];
604 int noLetsignBeforeCount;
605 widechar noLetsign[LETSIGNSIZE];
606 int noLetsignCount;
607 widechar noLetsignAfter[LETSIGNAFTERSIZE];
608 int noLetsignAfterCount;
609 widechar emphModeChars[MAX_EMPH_CLASSES] /* does not include caps: capsmodechars are
610 * currently stored as character attributes
611 */
612 [EMPHMODECHARSSIZE + 1];
613 widechar noEmphChars[MAX_EMPH_CLASSES] /* does not include caps */
614 [NOEMPHCHARSSIZE + 1];
615 TranslationTableOffset characters[HASHNUM]; /** Character definitions */
616 TranslationTableOffset dots[HASHNUM]; /** Dot definitions */
617 TranslationTableOffset forPassRules[MAXPASS + 1];
618 TranslationTableOffset backPassRules[MAXPASS + 1];
619 TranslationTableOffset forRules[HASHNUM]; /** chains of forward rules */
620 TranslationTableOffset backRules[HASHNUM]; /** Chains of backward rules */
621 TranslationTableData ruleArea[1]; /** Space for storing all rules and values */
622 } TranslationTableHeader;
623
624 typedef enum {
625 alloc_typebuf,
626 alloc_wordBuffer,
627 alloc_emphasisBuffer,
628 alloc_destSpacing,
629 alloc_passbuf,
630 alloc_posMapping1,
631 alloc_posMapping2,
632 alloc_posMapping3
633 } AllocBuf;
634
635 #define MAXPASSBUF 3
636
637 typedef enum {
638 begPhraseOffset = 0,
639 endPhraseBeforeOffset = 1,
640 endPhraseAfterOffset = 2,
641 begOffset = 3,
642 endOffset = 4,
643 letterOffset = 5,
644 begWordOffset = 6,
645 endWordOffset = 7,
646 lenPhraseOffset = 8
647 } EmphCodeOffset;
648
649 /* Grouping the begin, end, word and symbol bits and using the type of
650 * a single bit group for representing the emphasis classes allows us
651 * to do simple bit operations. */
652
653 /* fields contain sums of EmphasisClass.value */
654 /* MAX_EMPH_CLASSES + MAX_MODES may not exceed 16 */
655 typedef struct {
656 unsigned int begin : 16;
657 unsigned int end : 16;
658 unsigned int word : 16;
659 unsigned int symbol : 16;
660 } EmphasisInfo;
661
662 typedef enum { noEncoding, bigEndian, littleEndian, ascii8 } EncodingType;
663
664 typedef struct {
665 const char *fileName;
666 const char *sourceFile;
667 FILE *in;
668 int lineNumber;
669 EncodingType encoding;
670 int status;
671 int linelen;
672 int linepos;
673 int checkencoding[2];
674 widechar line[MAXSTRING];
675 } FileInfo;
676
677 /* The following function definitions are hooks into
678 * compileTranslationTable.c. Some are used by other library modules.
679 * Others are used by tools like lou_allround.c and lou_debug.c. */
680
681 /**
682 * Comma separated list of directories to search for tables.
683 */
684 char *EXPORT_CALL
685 _lou_getTablePath(void);
686
687 /**
688 * Resolve tableList against base.
689 */
690 char **EXPORT_CALL
691 _lou_resolveTable(const char *tableList, const char *base);
692
693 /**
694 * The default table resolver
695 */
696 char **EXPORT_CALL
697 _lou_defaultTableResolver(const char *tableList, const char *base);
698
699 /**
700 * Return single-cell dot pattern corresponding to a character.
701 * TODO: move to commonTranslationFunctions.c
702 */
703 widechar EXPORT_CALL
704 _lou_getDotsForChar(widechar c, const DisplayTableHeader *table);
705
706 /**
707 * Return character corresponding to a single-cell dot pattern.
708 * TODO: move to commonTranslationFunctions.c
709 */
710 widechar EXPORT_CALL
711 _lou_getCharForDots(widechar d, const DisplayTableHeader *table);
712
713 void EXPORT_CALL
714 _lou_getTable(const char *tableList, const char *displayTableList,
715 const TranslationTableHeader **translationTable,
716 const DisplayTableHeader **displayTable);
717
718 const TranslationTableHeader *EXPORT_CALL
719 _lou_getTranslationTable(const char *tableList);
720
721 const DisplayTableHeader *EXPORT_CALL
722 _lou_getDisplayTable(const char *tableList);
723
724 int EXPORT_CALL
725 _lou_compileTranslationRule(const char *tableList, const char *inString);
726
727 int EXPORT_CALL
728 _lou_compileDisplayRule(const char *tableList, const char *inString);
729
730 /**
731 * Allocate memory for internal buffers
732 *
733 * Used by lou_translateString.c and lou_backTranslateString.c ONLY
734 * to allocate memory for internal buffers.
735 * TODO: move to utils.c
736 */
737 void *EXPORT_CALL
738 _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax);
739
740 /**
741 * Hash function for character strings
742 *
743 * @param lowercase Whether to convert the string to lowercase because
744 * making the hash of it.
745 */
746 unsigned long int EXPORT_CALL
747 _lou_stringHash(const widechar *c, int lowercase, const TranslationTableHeader *table);
748
749 /**
750 * Hash function for single characters
751 */
752 unsigned long int EXPORT_CALL
753 _lou_charHash(widechar c);
754
755 /**
756 * Return a string in the same format as the characters operand in opcodes
757 */
758 const char *EXPORT_CALL
759 _lou_showString(widechar const *chars, int length, int forceHex);
760
761 /**
762 * Print out dot numbers
763 *
764 * @return a string containing the dot numbers. The longest possible
765 * output is "\123456789ABCDEF0/"
766 */
767 const char *EXPORT_CALL
768 _lou_unknownDots(widechar dots);
769
770 /**
771 * Return a character string in the format of the dots operand
772 */
773 const char *EXPORT_CALL
774 _lou_showDots(widechar const *dots, int length);
775
776 /**
777 * Return a character string where the attributes are indicated
778 * by the attribute letters used in multipass opcodes
779 */
780 char *EXPORT_CALL
781 _lou_showAttributes(TranslationTableCharacterAttributes a);
782
783 /**
784 * Return number of the opcode
785 *
786 * @param toFind the opcodes
787 */
788 TranslationTableOpcode EXPORT_CALL
789 _lou_findOpcodeNumber(const char *tofind);
790
791 /**
792 * Return the name of the opcode associated with an opcode number
793 *
794 * @param opcode an opcode
795 */
796 const char *EXPORT_CALL
797 _lou_findOpcodeName(TranslationTableOpcode opcode);
798
799 /**
800 * Convert string to wide characters
801 *
802 * Takes a character string and produces a sequence of wide characters.
803 * Opposite of _lou_showString.
804 *
805 * @param inString the input string
806 * @param outString the output wide char sequence
807 * @return length of the widechar sequence.
808 */
809 int EXPORT_CALL
810 _lou_extParseChars(const char *inString, widechar *outString);
811
812 /**
813 * Convert string to wide characters containing dot patterns
814 *
815 * Takes a character string and produces a sequence of wide characters
816 * containing dot patterns. Opposite of _lou_showDots.
817 * @param inString the input string
818 * @param outString the output wide char sequence
819 * @return length of the widechar sequence.
820 */
821 int EXPORT_CALL
822 _lou_extParseDots(const char *inString, widechar *outString);
823
824 int EXPORT_CALL
825 _lou_translate(const char *tableList, const char *displayTableList, const widechar *inbuf,
826 int *inlen, widechar *outbuf, int *outlen, formtype *typeform, char *spacing,
827 int *outputPos, int *inputPos, int *cursorPos, int mode,
828 const TranslationTableRule **rules, int *rulesLen);
829
830 int EXPORT_CALL
831 _lou_backTranslate(const char *tableList, const char *displayTableList,
832 const widechar *inbuf, int *inlen, widechar *outbuf, int *outlen,
833 formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos,
834 int mode, const TranslationTableRule **rules, int *rulesLen);
835
836 void EXPORT_CALL
837 _lou_resetPassVariables(void);
838
839 int EXPORT_CALL
840 _lou_handlePassVariableTest(const widechar *instructions, int *IC, int *itsTrue);
841
842 int EXPORT_CALL
843 _lou_handlePassVariableAction(const widechar *instructions, int *IC);
844
845 int EXPORT_CALL
846 _lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
847 const int expr_max, TranslationTableHeader *table, const FileInfo *nested);
848
849 void EXPORT_CALL
850 _lou_pattern_reverse(widechar *expr_data);
851
852 int EXPORT_CALL
853 _lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
854 const int input_dir, const widechar *expr_data,
855 const TranslationTableHeader *table);
856
857 /**
858 * Read a line of widechar's from an input file
859 */
860 int EXPORT_CALL
861 _lou_getALine(FileInfo *info);
862
863 #ifdef DEBUG
864 /* Can be inserted in code to be used as a breakpoint in gdb */
865 void EXPORT_CALL
866 _lou_debugHook(void);
867 #endif
868
869 /**
870 * Print an out-of-memory message and exit
871 */
872 void EXPORT_CALL
873 _lou_outOfMemory(void);
874
875 /**
876 * Helper for logging a widechar buffer
877 */
878 void EXPORT_CALL
879 _lou_logWidecharBuf(logLevels level, const char *msg, const widechar *wbuf, int wlen);
880
881 void EXPORT_CALL
882 _lou_logMessage(logLevels level, const char *format, ...);
883
884 extern int translation_direction;
885
886 /**
887 * Return 1 if given translation mode is valid. Return 0 otherwise.
888 */
889 int EXPORT_CALL
890 _lou_isValidMode(int mode);
891
892 /**
893 * Return the default braille representation for a character.
894 */
895 widechar EXPORT_CALL
896 _lou_charToFallbackDots(widechar c);
897
898 static inline int
isASCII(widechar c)899 isASCII(widechar c) {
900 return (c >= 0X20) && (c < 0X7F);
901 }
902
903 #ifdef __cplusplus
904 }
905 #endif /* __cplusplus */
906
907 #endif /* __LOUIS_H_ */
908