1 /* liblouis Braille Translation and Back-Translation Library
2 
3    Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4    BRLTTY Team
5 
6    Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7    Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8    Copyright (C) 2016 Mike Gray, American Printing House for the Blind
9    Copyright (C) 2016 Davy Kager, Dedicon
10 
11    This file is part of liblouis.
12 
13    liblouis is free software: you can redistribute it and/or modify it
14    under the terms of the GNU Lesser General Public License as published
15    by the Free Software Foundation, either version 2.1 of the License, or
16    (at your option) any later version.
17 
18    liblouis is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21    Lesser General Public License for more details.
22 
23    You should have received a copy of the GNU Lesser General Public
24    License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 /**
28  * @file
29  * @brief Internal API of liblouis
30  */
31 
32 #ifndef __LOUIS_H_
33 #define __LOUIS_H_
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif /* __cplusplus */
38 
39 #include <stdio.h>
40 #include "liblouis.h"
41 
42 /* Unlike Windows, Mingw can handle forward slashes as directory
43    separator, see http://mingw.org/wiki/Posix_path_conversion */
44 #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__MINGW64__)
45 #define PATH_SEP ';'
46 #define DIR_SEP '\\'
47 #else
48 #define PATH_SEP ':'
49 #define DIR_SEP '/'
50 #endif
51 
52 #ifdef _MSC_VER
53 #define strcasecmp _stricmp
54 #endif
55 
56 #define NUMVAR 50
57 #define EMPHMODECHARSSIZE 256
58 #define NOEMPHCHARSSIZE 256
59 #define LETSIGNSIZE 256
60 // noletsignbefore and noletsignafter is hardly ever used and usually
61 // only with very few chars, so it only needs a small array
62 #define LETSIGNBEFORESIZE 64
63 #define LETSIGNAFTERSIZE 64
64 #define SEQPATTERNSIZE 128
65 #define CHARSIZE sizeof(widechar)
66 #define DEFAULTRULESIZE 50
67 
68 typedef struct intCharTupple {
69 	unsigned long long key;
70 	char value;
71 } intCharTupple;
72 
73 /* HASHNUM must be prime */
74 #define HASHNUM 1123
75 
76 #define MAXPASS 4
77 #define MAXSTRING 2048
78 #define MAX_MACRO_VAR 100  // maximal number of variable substitutions a macro can contain
79 #define MAX_EMPH_CLASSES 10   // maximal number of emphasis classes
80 #define MAX_MODES 6			  // maximal number of modes that can be handled
81 #define MAX_SOURCE_FILES 100  // maximal number of files a table can consist of
82 
83 typedef unsigned int TranslationTableOffset;
84 
85 /* Basic type for translation table data, which carries all alignment
86  * constraints that fields contained in translation table may have.
87  * Notably TranslationTableCharacterAttributes is unsigned long long, so we need
88  * at least this big basic type. */
89 typedef unsigned long long TranslationTableData;
90 #define OFFSETSIZE sizeof(TranslationTableData)
91 
92 typedef enum {
93 	/* The first 8 are the predefined character classes. They need to be listed first and
94 	   in this order because of how allocateCharacterClasses works. */
95 	CTC_Space = 0x1,
96 	CTC_Letter = 0x2,
97 	CTC_Digit = 0x4,
98 	CTC_Punctuation = 0x8,
99 	CTC_UpperCase = 0x10,
100 	CTC_LowerCase = 0x20,
101 	CTC_Math = 0x40,
102 	CTC_Sign = 0x80,
103 	CTC_LitDigit = 0x100,
104 	CTC_CapsMode = 0x200,
105 	// bit 0x400 used to be taken by CTC_EmphMode
106 	CTC_NumericMode = 0x800,
107 	CTC_NumericNoContract = 0x1000,
108 	CTC_SeqDelimiter = 0x2000,
109 	CTC_SeqBefore = 0x4000,
110 	CTC_SeqAfter = 0x8000,
111 	/* The following 8 are reserved for %0 to %7 (in no particular order) */
112 	/* Be careful with changing these values (and also CTC_EndOfInput) because in
113 	   pattern_compile_expression they are stored in a unsigned int after cutting of the
114 	   16 least significant bits. */
115 	CTC_UserDefined1 = 0x10000,
116 	CTC_UserDefined2 = 0x20000,
117 	CTC_UserDefined3 = 0x40000,
118 	CTC_UserDefined4 = 0x80000,
119 	CTC_UserDefined5 = 0x100000,
120 	CTC_UserDefined6 = 0x200000,
121 	CTC_UserDefined7 = 0x400000,
122 	CTC_UserDefined8 = 0x800000,
123 	CTC_EndOfInput = 0x1000000,  // only used by pattern matcher
124 	CTC_EmpMatch = 0x2000000,	// only used in TranslationTableRule->before and
125 								 // TranslationTableRule->after
126 	CTC_MidEndNumericMode = 0x4000000,
127 	/* At least 37 more bits available in a unsigned long long (at least 64 bits). Used
128 	   for custom attributes 9 to 45. These need to be the last values of the enum. */
129 	CTC_UserDefined9 = 0x8000000,
130 	CTC_UserDefined10 = 0x10000000,
131 	CTC_UserDefined11 = 0x20000000,
132 	CTC_UserDefined12 = 0x40000000,
133 } TranslationTableCharacterAttribute;
134 
135 typedef enum {
136 	pass_first = '`',
137 	pass_last = '~',
138 	pass_lookback = '_',
139 	pass_string = '\"',
140 	pass_dots = '@',
141 	pass_omit = '?',
142 	pass_startReplace = '[',
143 	pass_endReplace = ']',
144 	pass_startGroup = '{',
145 	pass_endGroup = '}',
146 	pass_variable = '#',
147 	pass_not = '!',
148 	pass_search = '/',
149 	pass_any = 'a',
150 	pass_digit = 'd',
151 	pass_litDigit = 'D',
152 	pass_letter = 'l',
153 	pass_math = 'm',
154 	pass_punctuation = 'p',
155 	pass_sign = 'S',
156 	pass_space = 's',
157 	pass_uppercase = 'U',
158 	pass_lowercase = 'u',
159 	pass_class1 = 'w',
160 	pass_class2 = 'x',
161 	pass_class3 = 'y',
162 	pass_class4 = 'z',
163 	pass_attributes = '$',
164 	pass_groupstart = '{',
165 	pass_groupend = '}',
166 	pass_groupreplace = ';',
167 	pass_swap = '%',
168 	pass_hyphen = '-',
169 	pass_until = '.',
170 	pass_eq = '=',
171 	pass_lt = '<',
172 	pass_gt = '>',
173 	pass_endTest = 32,
174 	pass_plus = '+',
175 	pass_copy = '*',
176 	pass_leftParen = '(',
177 	pass_rightParen = ')',
178 	pass_comma = ',',
179 	pass_lteq = 130,
180 	pass_gteq = 131,
181 	pass_invalidToken = 132,
182 	pass_noteq = 133,
183 	pass_and = 134,
184 	pass_or = 135,
185 	pass_nameFound = 136,
186 	pass_numberFound = 137,
187 	pass_boolean = 138,
188 	pass_class = 139,
189 	pass_define = 140,
190 	pass_emphasis = 141,
191 	pass_group = 142,
192 	pass_mark = 143,
193 	pass_repGroup = 143,
194 	pass_script = 144,
195 	pass_noMoreTokens = 145,
196 	pass_replace = 146,
197 	pass_if = 147,
198 	pass_then = 148,
199 	pass_all = 255
200 } pass_Codes;
201 
202 typedef unsigned long long TranslationTableCharacterAttributes;
203 
204 typedef struct {
205 	TranslationTableOffset next;
206 	widechar lookFor;
207 	widechar found;
208 } CharDotsMapping;
209 
210 typedef struct {
211 	const char *sourceFile;
212 	int sourceLine;
213 	TranslationTableOffset next;
214 	TranslationTableOffset definitionRule;
215 	TranslationTableOffset otherRules;
216 	TranslationTableCharacterAttributes attributes;
217 	TranslationTableCharacterAttributes mode;
218 	TranslationTableOffset compRule;
219 	widechar value;
220 	TranslationTableOffset basechar;
221 	TranslationTableOffset linked;
222 } TranslationTableCharacter;
223 
224 typedef enum { /* Op codes */
225 	CTO_IncludeFile,
226 	CTO_Locale, /* Deprecated, do not use */
227 	CTO_Undefined,
228 	/* Do not change the order of the following opcodes! */
229 	CTO_CapsLetter,
230 	CTO_BegCapsWord,
231 	CTO_EndCapsWord,
232 	CTO_BegCaps,
233 	CTO_EndCaps,
234 	CTO_BegCapsPhrase,
235 	CTO_EndCapsPhrase,
236 	CTO_LenCapsPhrase,
237 	CTO_ModeLetter,
238 	CTO_BegModeWord,
239 	CTO_EndModeWord,
240 	CTO_BegMode,
241 	CTO_EndMode,
242 	CTO_BegModePhrase,
243 	CTO_EndModePhrase,
244 	CTO_LenModePhrase,
245 	/* End of ordered opcodes */
246 	CTO_LetterSign,
247 	CTO_NoLetsignBefore,
248 	CTO_NoLetsign,
249 	CTO_NoLetsignAfter,
250 	CTO_NumberSign,
251 	CTO_NumericModeChars,
252 	CTO_MidEndNumericModeChars,
253 	CTO_NumericNoContractChars,
254 	CTO_SeqDelimiter,
255 	CTO_SeqBeforeChars,
256 	CTO_SeqAfterChars,
257 	CTO_SeqAfterPattern,
258 	CTO_SeqAfterExpression,
259 	CTO_EmphClass,
260 
261 	/* Do not change the order of the following opcodes! */
262 	CTO_EmphLetter,
263 	CTO_BegEmphWord,
264 	CTO_EndEmphWord,
265 	CTO_BegEmph,
266 	CTO_EndEmph,
267 	CTO_BegEmphPhrase,
268 	CTO_EndEmphPhrase,
269 	CTO_LenEmphPhrase,
270 	/* End of ordered opcodes */
271 
272 	CTO_CapsModeChars,
273 	CTO_EmphModeChars,
274 	CTO_NoEmphChars,
275 	CTO_BegComp,
276 	CTO_EndComp,
277 	CTO_NoContractSign,
278 	CTO_MultInd,
279 	CTO_CompDots,
280 	CTO_Comp6,
281 	CTO_Class,  /* define a character class */
282 	CTO_After,  /* only match if after character in class */
283 	CTO_Before, /* only match if before character in class 30 */
284 	CTO_NoBack,
285 	CTO_NoFor,
286 	CTO_EmpMatchBefore,
287 	CTO_EmpMatchAfter,
288 	CTO_SwapCc,
289 	CTO_SwapCd,
290 	CTO_SwapDd,
291 	CTO_Space,
292 	CTO_Digit,
293 	CTO_Punctuation,
294 	CTO_Math,
295 	CTO_Sign,
296 	CTO_Letter,
297 	CTO_UpperCase,
298 	CTO_LowerCase,
299 	CTO_Grouping,
300 	CTO_UpLow,
301 	CTO_LitDigit,
302 	CTO_Display,
303 	CTO_Replace,
304 	CTO_Context,
305 	CTO_Correct,
306 	CTO_Pass2,
307 	CTO_Pass3,
308 	CTO_Pass4,
309 	CTO_Repeated,
310 	CTO_RepWord,
311 	CTO_RepEndWord,
312 	CTO_CapsNoCont,
313 	CTO_Always,
314 	CTO_ExactDots,
315 	CTO_NoCross,
316 	CTO_Syllable,
317 	CTO_NoCont,
318 	CTO_CompBrl,
319 	CTO_Literal,
320 	CTO_LargeSign,
321 	CTO_WholeWord,
322 	CTO_PartWord,
323 	CTO_JoinNum,
324 	CTO_JoinableWord,
325 	CTO_LowWord,
326 	CTO_Contraction,
327 	CTO_SuffixableWord, /** whole word or beginning of word */
328 	CTO_PrefixableWord, /** whole word or end of word */
329 	CTO_BegWord,		/** beginning of word only */
330 	CTO_BegMidWord,		/** beginning or middle of word */
331 	CTO_MidWord,		/** middle of word only 20 */
332 	CTO_MidEndWord,		/** middle or end of word */
333 	CTO_EndWord,		/** end of word only */
334 	CTO_PrePunc,		/** punctuation in string at beginning of word */
335 	CTO_PostPunc,		/** punctuation in string at end of word */
336 	CTO_BegNum,			/** beginning of number */
337 	CTO_MidNum,			/** middle of number, e.g., decimal point */
338 	CTO_EndNum,			/** end of number */
339 	CTO_DecPoint,
340 	CTO_Hyphen,
341 	// CTO_Apostrophe,
342 	// CTO_Initial,
343 	CTO_NoBreak,
344 	CTO_Match,
345 	CTO_BackMatch,
346 	CTO_Attribute,
347 	CTO_Base,
348 	CTO_Macro,
349 	CTO_None,
350 
351 	/* More internal opcodes */
352 	CTO_LetterRule,
353 	CTO_NumberRule,
354 	CTO_NoContractRule,
355 
356 	/* Start of (16 x 8) internal opcodes values
357 	 * Do not change the order of the following opcodes! */
358 	CTO_CapsLetterRule,
359 	CTO_BegCapsWordRule,
360 	CTO_EndCapsWordRule,
361 	CTO_BegCapsRule,
362 	CTO_EndCapsRule,
363 	CTO_BegCapsPhraseRule,
364 	CTO_EndCapsPhraseBeforeRule,
365 	CTO_EndCapsPhraseAfterRule,
366 	CTO_Mode2LetterRule,
367 	CTO_BegMode2WordRule,
368 	CTO_EndMode2WordRule,
369 	CTO_BegMode2Rule,
370 	CTO_EndMode2Rule,
371 	CTO_BegMode2PhraseRule,
372 	CTO_EndMode2PhraseBeforeRule,
373 	CTO_EndMode2PhraseAfterRule,
374 	CTO_Mode3LetterRule,
375 	CTO_BegMode3WordRule,
376 	CTO_EndMode3WordRule,
377 	CTO_BegMode3Rule,
378 	CTO_EndMode3Rule,
379 	CTO_BegMode3PhraseRule,
380 	CTO_EndMode3PhraseBeforeRule,
381 	CTO_EndMode3PhraseAfterRule,
382 	CTO_Mode4LetterRule,
383 	CTO_BegMode4WordRule,
384 	CTO_EndMode4WordRule,
385 	CTO_BegMode4Rule,
386 	CTO_EndMode4Rule,
387 	CTO_BegMode4PhraseRule,
388 	CTO_EndMode4PhraseBeforeRule,
389 	CTO_EndMode4PhraseAfterRule,
390 	CTO_Mode5LetterRule,
391 	CTO_BegMode5WordRule,
392 	CTO_EndMode5WordRule,
393 	CTO_BegMode5Rule,
394 	CTO_EndMode5Rule,
395 	CTO_BegMode5PhraseRule,
396 	CTO_EndMode5PhraseBeforeRule,
397 	CTO_EndMode5PhraseAfterRule,
398 	CTO_Mode6LetterRule,
399 	CTO_BegMode6WordRule,
400 	CTO_EndMode6WordRule,
401 	CTO_BegMode6Rule,
402 	CTO_EndMode6Rule,
403 	CTO_BegMode6PhraseRule,
404 	CTO_EndMode6PhraseBeforeRule,
405 	CTO_EndMode6PhraseAfterRule,
406 	CTO_Emph1LetterRule,
407 	CTO_BegEmph1WordRule,
408 	CTO_EndEmph1WordRule,
409 	CTO_BegEmph1Rule,
410 	CTO_EndEmph1Rule,
411 	CTO_BegEmph1PhraseRule,
412 	CTO_EndEmph1PhraseBeforeRule,
413 	CTO_EndEmph1PhraseAfterRule,
414 	CTO_Emph2LetterRule,
415 	CTO_BegEmph2WordRule,
416 	CTO_EndEmph2WordRule,
417 	CTO_BegEmph2Rule,
418 	CTO_EndEmph2Rule,
419 	CTO_BegEmph2PhraseRule,
420 	CTO_EndEmph2PhraseBeforeRule,
421 	CTO_EndEmph2PhraseAfterRule,
422 	CTO_Emph3LetterRule,
423 	CTO_BegEmph3WordRule,
424 	CTO_EndEmph3WordRule,
425 	CTO_BegEmph3Rule,
426 	CTO_EndEmph3Rule,
427 	CTO_BegEmph3PhraseRule,
428 	CTO_EndEmph3PhraseBeforeRule,
429 	CTO_EndEmph3PhraseAfterRule,
430 	CTO_Emph4LetterRule,
431 	CTO_BegEmph4WordRule,
432 	CTO_EndEmph4WordRule,
433 	CTO_BegEmph4Rule,
434 	CTO_EndEmph4Rule,
435 	CTO_BegEmph4PhraseRule,
436 	CTO_EndEmph4PhraseBeforeRule,
437 	CTO_EndEmph4PhraseAfterRule,
438 	CTO_Emph5LetterRule,
439 	CTO_BegEmph5WordRule,
440 	CTO_EndEmph5WordRule,
441 	CTO_BegEmph5Rule,
442 	CTO_EndEmph5Rule,
443 	CTO_BegEmph5PhraseRule,
444 	CTO_EndEmph5PhraseBeforeRule,
445 	CTO_EndEmph5PhraseAfterRule,
446 	CTO_Emph6LetterRule,
447 	CTO_BegEmph6WordRule,
448 	CTO_EndEmph6WordRule,
449 	CTO_BegEmph6Rule,
450 	CTO_EndEmph6Rule,
451 	CTO_BegEmph6PhraseRule,
452 	CTO_EndEmph6PhraseBeforeRule,
453 	CTO_EndEmph6PhraseAfterRule,
454 	CTO_Emph7LetterRule,
455 	CTO_BegEmph7WordRule,
456 	CTO_EndEmph7WordRule,
457 	CTO_BegEmph7Rule,
458 	CTO_EndEmph7Rule,
459 	CTO_BegEmph7PhraseRule,
460 	CTO_EndEmph7PhraseBeforeRule,
461 	CTO_EndEmph7PhraseAfterRule,
462 	CTO_Emph8LetterRule,
463 	CTO_BegEmph8WordRule,
464 	CTO_EndEmph8WordRule,
465 	CTO_BegEmph8Rule,
466 	CTO_EndEmph8Rule,
467 	CTO_BegEmph8PhraseRule,
468 	CTO_EndEmph8PhraseBeforeRule,
469 	CTO_EndEmph8PhraseAfterRule,
470 	CTO_Emph9LetterRule,
471 	CTO_BegEmph9WordRule,
472 	CTO_EndEmph9WordRule,
473 	CTO_BegEmph9Rule,
474 	CTO_EndEmph9Rule,
475 	CTO_BegEmph9PhraseRule,
476 	CTO_EndEmph9PhraseBeforeRule,
477 	CTO_EndEmph9PhraseAfterRule,
478 	CTO_Emph10LetterRule,
479 	CTO_BegEmph10WordRule,
480 	CTO_EndEmph10WordRule,
481 	CTO_BegEmph10Rule,
482 	CTO_EndEmph10Rule,
483 	CTO_BegEmph10PhraseRule,
484 	CTO_EndEmph10PhraseBeforeRule,
485 	CTO_EndEmph10PhraseAfterRule,
486 	/* End of ordered (16 x 8) internal opcodes */
487 
488 	CTO_BegCompRule,
489 	CTO_EndCompRule,
490 	CTO_CapsNoContRule,
491 	CTO_All
492 } TranslationTableOpcode;
493 
494 typedef struct {
495 	const char *sourceFile;
496 	int sourceLine;
497 	TranslationTableOffset charsnext;			/** next chars entry */
498 	TranslationTableOffset dotsnext;			/** next dots entry */
499 	TranslationTableCharacterAttributes after;  /** character types which must follow */
500 	TranslationTableCharacterAttributes before; /** character types which must precede */
501 	TranslationTableOffset patterns;			/** before and after patterns */
502 	TranslationTableOpcode opcode; /** rule for testing validity of replacement */
503 	char nocross;
504 	short charslen;						 /** length of string to be replaced */
505 	short dotslen;						 /** length of replacement string */
506 	widechar charsdots[DEFAULTRULESIZE]; /** find and replacement strings */
507 } TranslationTableRule;
508 
509 typedef struct /* state transition */
510 {
511 	widechar ch;
512 	widechar newState;
513 } HyphenationTrans;
514 
515 typedef union {
516 	HyphenationTrans *pointer;
517 	TranslationTableOffset offset;
518 } PointOff;
519 
520 typedef struct /* one state */
521 {
522 	PointOff trans;
523 	TranslationTableOffset hyphenPattern;
524 	widechar fallbackState;
525 	widechar numTrans;
526 } HyphenationState;
527 
528 typedef struct CharacterClass {
529 	struct CharacterClass *next;
530 	TranslationTableCharacterAttributes attribute;
531 	widechar length;
532 	widechar name[1];
533 } CharacterClass;
534 
535 typedef struct RuleName {
536 	struct RuleName *next;
537 	TranslationTableOffset ruleOffset;
538 	widechar length;
539 	widechar name[1];
540 } RuleName;
541 
542 typedef struct {
543 	/* either typeform or mode should be set, not both */
544 	formtype typeform; /* corresponding value in "typeforms" enum */
545 	TranslationTableCharacterAttributes mode; /* corresponding character attribute */
546 	unsigned int value;						  /* bit field that contains a single "1" */
547 	unsigned short
548 			rule; /* emphasis rules (index in emphRules, emphModeChars and noEmphChars) */
549 } EmphasisClass;
550 
551 typedef struct {
552 	TranslationTableOffset tableSize;
553 	TranslationTableOffset bytesUsed;
554 	TranslationTableOffset charToDots[HASHNUM];
555 	TranslationTableOffset dotsToChar[HASHNUM];
556 	TranslationTableData ruleArea[1]; /** Space for storing all rules and values */
557 } DisplayTableHeader;
558 
559 /**
560  * Translation table header
561  */
562 typedef struct { /* translation table */
563 
564 	/* state needed during compilation */
565 	TranslationTableOffset tableSize;
566 	TranslationTableOffset bytesUsed;
567 	CharacterClass *characterClasses;
568 	TranslationTableCharacterAttributes nextCharacterClassAttribute;
569 	TranslationTableCharacterAttributes nextNumberedCharacterClassAttribute;
570 	RuleName *ruleNames;
571 	TranslationTableCharacterAttributes
572 			numberedAttributes[8]; /* attributes 0-7 used in match rules (could also be
573 								   stored in `characterClasses', but this is slightly
574 								   faster) */
575 	int usesAttributeOrClass;	  /* 1 = attribute, 2 = class */
576 	char *sourceFiles[MAX_SOURCE_FILES + 1];
577 
578 	/* needed for translation or other api functions */
579 	int finalized;
580 	int capsNoCont;
581 	int numPasses;
582 	int corrections;
583 	int syllables;
584 	int usesSequences;
585 	int usesNumericMode;
586 	int hasCapsModeChars;
587 	TranslationTableOffset undefined;
588 	TranslationTableOffset letterSign;
589 	TranslationTableOffset numberSign;
590 	TranslationTableOffset noContractSign;
591 	widechar seqPatterns[SEQPATTERNSIZE];
592 	char *emphClassNames[MAX_EMPH_CLASSES];
593 	EmphasisClass emphClasses[MAX_EMPH_CLASSES];
594 	EmphasisClass modes[MAX_MODES];
595 	int seqPatternsCount;
596 	widechar seqAfterExpression[SEQPATTERNSIZE];
597 	int seqAfterExpressionLength;
598 	TranslationTableOffset emphRules[MAX_EMPH_CLASSES + MAX_MODES]
599 									[9]; /* 9 is the size of the EmphCodeOffset enum */
600 	TranslationTableOffset begComp;
601 	TranslationTableOffset endComp;
602 	TranslationTableOffset hyphenStatesArray;
603 	widechar noLetsignBefore[LETSIGNBEFORESIZE];
604 	int noLetsignBeforeCount;
605 	widechar noLetsign[LETSIGNSIZE];
606 	int noLetsignCount;
607 	widechar noLetsignAfter[LETSIGNAFTERSIZE];
608 	int noLetsignAfterCount;
609 	widechar emphModeChars[MAX_EMPH_CLASSES] /* does not include caps: capsmodechars are
610 											  * currently stored as character attributes
611 											  */
612 						  [EMPHMODECHARSSIZE + 1];
613 	widechar noEmphChars[MAX_EMPH_CLASSES] /* does not include caps */
614 						[NOEMPHCHARSSIZE + 1];
615 	TranslationTableOffset characters[HASHNUM]; /** Character definitions */
616 	TranslationTableOffset dots[HASHNUM];		/** Dot definitions */
617 	TranslationTableOffset forPassRules[MAXPASS + 1];
618 	TranslationTableOffset backPassRules[MAXPASS + 1];
619 	TranslationTableOffset forRules[HASHNUM];  /** chains of forward rules */
620 	TranslationTableOffset backRules[HASHNUM]; /** Chains of backward rules */
621 	TranslationTableData ruleArea[1]; /** Space for storing all rules and values */
622 } TranslationTableHeader;
623 
624 typedef enum {
625 	alloc_typebuf,
626 	alloc_wordBuffer,
627 	alloc_emphasisBuffer,
628 	alloc_destSpacing,
629 	alloc_passbuf,
630 	alloc_posMapping1,
631 	alloc_posMapping2,
632 	alloc_posMapping3
633 } AllocBuf;
634 
635 #define MAXPASSBUF 3
636 
637 typedef enum {
638 	begPhraseOffset = 0,
639 	endPhraseBeforeOffset = 1,
640 	endPhraseAfterOffset = 2,
641 	begOffset = 3,
642 	endOffset = 4,
643 	letterOffset = 5,
644 	begWordOffset = 6,
645 	endWordOffset = 7,
646 	lenPhraseOffset = 8
647 } EmphCodeOffset;
648 
649 /* Grouping the begin, end, word and symbol bits and using the type of
650  * a single bit group for representing the emphasis classes allows us
651  * to do simple bit operations. */
652 
653 /* fields contain sums of EmphasisClass.value */
654 /* MAX_EMPH_CLASSES + MAX_MODES may not exceed 16 */
655 typedef struct {
656 	unsigned int begin : 16;
657 	unsigned int end : 16;
658 	unsigned int word : 16;
659 	unsigned int symbol : 16;
660 } EmphasisInfo;
661 
662 typedef enum { noEncoding, bigEndian, littleEndian, ascii8 } EncodingType;
663 
664 typedef struct {
665 	const char *fileName;
666 	const char *sourceFile;
667 	FILE *in;
668 	int lineNumber;
669 	EncodingType encoding;
670 	int status;
671 	int linelen;
672 	int linepos;
673 	int checkencoding[2];
674 	widechar line[MAXSTRING];
675 } FileInfo;
676 
677 /* The following function definitions are hooks into
678  * compileTranslationTable.c. Some are used by other library modules.
679  * Others are used by tools like lou_allround.c and lou_debug.c. */
680 
681 /**
682  * Comma separated list of directories to search for tables.
683  */
684 char *EXPORT_CALL
685 _lou_getTablePath(void);
686 
687 /**
688  * Resolve tableList against base.
689  */
690 char **EXPORT_CALL
691 _lou_resolveTable(const char *tableList, const char *base);
692 
693 /**
694  * The default table resolver
695  */
696 char **EXPORT_CALL
697 _lou_defaultTableResolver(const char *tableList, const char *base);
698 
699 /**
700  * Return single-cell dot pattern corresponding to a character.
701  * TODO: move to commonTranslationFunctions.c
702  */
703 widechar EXPORT_CALL
704 _lou_getDotsForChar(widechar c, const DisplayTableHeader *table);
705 
706 /**
707  * Return character corresponding to a single-cell dot pattern.
708  * TODO: move to commonTranslationFunctions.c
709  */
710 widechar EXPORT_CALL
711 _lou_getCharForDots(widechar d, const DisplayTableHeader *table);
712 
713 void EXPORT_CALL
714 _lou_getTable(const char *tableList, const char *displayTableList,
715 		const TranslationTableHeader **translationTable,
716 		const DisplayTableHeader **displayTable);
717 
718 const TranslationTableHeader *EXPORT_CALL
719 _lou_getTranslationTable(const char *tableList);
720 
721 const DisplayTableHeader *EXPORT_CALL
722 _lou_getDisplayTable(const char *tableList);
723 
724 int EXPORT_CALL
725 _lou_compileTranslationRule(const char *tableList, const char *inString);
726 
727 int EXPORT_CALL
728 _lou_compileDisplayRule(const char *tableList, const char *inString);
729 
730 /**
731  * Allocate memory for internal buffers
732  *
733  * Used by lou_translateString.c and lou_backTranslateString.c ONLY
734  * to allocate memory for internal buffers.
735  * TODO: move to utils.c
736  */
737 void *EXPORT_CALL
738 _lou_allocMem(AllocBuf buffer, int index, int srcmax, int destmax);
739 
740 /**
741  * Hash function for character strings
742  *
743  * @param lowercase Whether to convert the string to lowercase because
744  *                  making the hash of it.
745  */
746 unsigned long int EXPORT_CALL
747 _lou_stringHash(const widechar *c, int lowercase, const TranslationTableHeader *table);
748 
749 /**
750  * Hash function for single characters
751  */
752 unsigned long int EXPORT_CALL
753 _lou_charHash(widechar c);
754 
755 /**
756  * Return a string in the same format as the characters operand in opcodes
757  */
758 const char *EXPORT_CALL
759 _lou_showString(widechar const *chars, int length, int forceHex);
760 
761 /**
762  * Print out dot numbers
763  *
764  * @return a string containing the dot numbers. The longest possible
765  * output is "\123456789ABCDEF0/"
766  */
767 const char *EXPORT_CALL
768 _lou_unknownDots(widechar dots);
769 
770 /**
771  * Return a character string in the format of the dots operand
772  */
773 const char *EXPORT_CALL
774 _lou_showDots(widechar const *dots, int length);
775 
776 /**
777  * Return a character string where the attributes are indicated
778  * by the attribute letters used in multipass opcodes
779  */
780 char *EXPORT_CALL
781 _lou_showAttributes(TranslationTableCharacterAttributes a);
782 
783 /**
784  * Return number of the opcode
785  *
786  * @param toFind the opcodes
787  */
788 TranslationTableOpcode EXPORT_CALL
789 _lou_findOpcodeNumber(const char *tofind);
790 
791 /**
792  * Return the name of the opcode associated with an opcode number
793  *
794  * @param opcode an opcode
795  */
796 const char *EXPORT_CALL
797 _lou_findOpcodeName(TranslationTableOpcode opcode);
798 
799 /**
800  * Convert string to wide characters
801  *
802  * Takes a character string and produces a sequence of wide characters.
803  * Opposite of _lou_showString.
804  *
805  * @param inString the input string
806  * @param outString the output wide char sequence
807  * @return length of the widechar sequence.
808  */
809 int EXPORT_CALL
810 _lou_extParseChars(const char *inString, widechar *outString);
811 
812 /**
813  * Convert string to wide characters containing dot patterns
814  *
815  * Takes a character string and produces a sequence of wide characters
816  * containing dot patterns. Opposite of _lou_showDots.
817  * @param inString the input string
818  * @param outString the output wide char sequence
819  * @return length of the widechar sequence.
820  */
821 int EXPORT_CALL
822 _lou_extParseDots(const char *inString, widechar *outString);
823 
824 int EXPORT_CALL
825 _lou_translate(const char *tableList, const char *displayTableList, const widechar *inbuf,
826 		int *inlen, widechar *outbuf, int *outlen, formtype *typeform, char *spacing,
827 		int *outputPos, int *inputPos, int *cursorPos, int mode,
828 		const TranslationTableRule **rules, int *rulesLen);
829 
830 int EXPORT_CALL
831 _lou_backTranslate(const char *tableList, const char *displayTableList,
832 		const widechar *inbuf, int *inlen, widechar *outbuf, int *outlen,
833 		formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos,
834 		int mode, const TranslationTableRule **rules, int *rulesLen);
835 
836 void EXPORT_CALL
837 _lou_resetPassVariables(void);
838 
839 int EXPORT_CALL
840 _lou_handlePassVariableTest(const widechar *instructions, int *IC, int *itsTrue);
841 
842 int EXPORT_CALL
843 _lou_handlePassVariableAction(const widechar *instructions, int *IC);
844 
845 int EXPORT_CALL
846 _lou_pattern_compile(const widechar *input, const int input_max, widechar *expr_data,
847 		const int expr_max, TranslationTableHeader *table, const FileInfo *nested);
848 
849 void EXPORT_CALL
850 _lou_pattern_reverse(widechar *expr_data);
851 
852 int EXPORT_CALL
853 _lou_pattern_check(const widechar *input, const int input_start, const int input_minmax,
854 		const int input_dir, const widechar *expr_data,
855 		const TranslationTableHeader *table);
856 
857 /**
858  * Read a line of widechar's from an input file
859  */
860 int EXPORT_CALL
861 _lou_getALine(FileInfo *info);
862 
863 #ifdef DEBUG
864 /* Can be inserted in code to be used as a breakpoint in gdb */
865 void EXPORT_CALL
866 _lou_debugHook(void);
867 #endif
868 
869 /**
870  * Print an out-of-memory message and exit
871  */
872 void EXPORT_CALL
873 _lou_outOfMemory(void);
874 
875 /**
876  * Helper for logging a widechar buffer
877  */
878 void EXPORT_CALL
879 _lou_logWidecharBuf(logLevels level, const char *msg, const widechar *wbuf, int wlen);
880 
881 void EXPORT_CALL
882 _lou_logMessage(logLevels level, const char *format, ...);
883 
884 extern int translation_direction;
885 
886 /**
887  * Return 1 if given translation mode is valid. Return 0 otherwise.
888  */
889 int EXPORT_CALL
890 _lou_isValidMode(int mode);
891 
892 /**
893  * Return the default braille representation for a character.
894  */
895 widechar EXPORT_CALL
896 _lou_charToFallbackDots(widechar c);
897 
898 static inline int
isASCII(widechar c)899 isASCII(widechar c) {
900 	return (c >= 0X20) && (c < 0X7F);
901 }
902 
903 #ifdef __cplusplus
904 }
905 #endif /* __cplusplus */
906 
907 #endif /* __LOUIS_H_ */
908