1 /* liblouis Braille Translation and Back-Translation Library
2
3 Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by The
4 BRLTTY Team
5
6 Copyright (C) 2004, 2005, 2006 ViewPlus Technologies, Inc. www.viewplus.com
7 Copyright (C) 2004, 2005, 2006 JJB Software, Inc. www.jjb-software.com
8
9 This file is part of liblouis.
10
11 liblouis is free software: you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published
13 by the Free Software Foundation, either version 2.1 of the License, or
14 (at your option) any later version.
15
16 liblouis is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public
22 License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
23 */
24
25 /**
26 * @file
27 * @brief Translate from braille
28 */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "internal.h"
35
36 typedef struct {
37 int size;
38 widechar **buffers;
39 int *inUse;
40 widechar *(*alloc)(int index, int length);
41 void (*free)(widechar *);
42 } StringBufferPool;
43
44 static widechar *
allocStringBuffer(int index,int length)45 allocStringBuffer(int index, int length) {
46 return _lou_allocMem(alloc_passbuf, index, 0, length);
47 }
48
49 static const StringBufferPool *stringBufferPool = NULL;
50
51 static void
initStringBufferPool()52 initStringBufferPool() {
53 static widechar *stringBuffers[MAXPASSBUF] = { NULL };
54 static int stringBuffersInUse[MAXPASSBUF] = { 0 };
55 StringBufferPool *pool = malloc(sizeof(StringBufferPool));
56 pool->size = MAXPASSBUF;
57 pool->buffers = stringBuffers;
58 pool->inUse = stringBuffersInUse;
59 pool->alloc = &allocStringBuffer;
60 pool->free = NULL;
61 stringBufferPool = pool;
62 }
63
64 static int
getStringBuffer(int length)65 getStringBuffer(int length) {
66 int i;
67 for (i = 0; i < stringBufferPool->size; i++) {
68 if (!stringBufferPool->inUse[i]) {
69 stringBufferPool->buffers[i] = stringBufferPool->alloc(i, length);
70 stringBufferPool->inUse[i] = 1;
71 return i;
72 }
73 }
74 _lou_outOfMemory();
75 return -1;
76 }
77
78 static int
releaseStringBuffer(int idx)79 releaseStringBuffer(int idx) {
80 if (idx >= 0 && idx < stringBufferPool->size) {
81 int inUse = stringBufferPool->inUse[idx];
82 if (inUse && stringBufferPool->free)
83 stringBufferPool->free(stringBufferPool->buffers[idx]);
84 stringBufferPool->inUse[idx] = 0;
85 return inUse;
86 }
87 return 0;
88 }
89
90 typedef struct {
91 int bufferIndex;
92 const widechar *chars;
93 int length;
94 } InString;
95
96 typedef struct {
97 int bufferIndex;
98 widechar *chars;
99 int maxlength;
100 int length;
101 } OutString;
102
103 typedef struct {
104 int startMatch;
105 int startReplace;
106 int endReplace;
107 int endMatch;
108 } PassRuleMatch;
109
110 static int
111 backTranslateString(const TranslationTableHeader *table,
112 const DisplayTableHeader *displayTable, int mode, int currentPass,
113 const InString *input, OutString *output, char *spacebuf, int *posMapping,
114 int *realInlen, int *cursorPosition, int *cursorStatus,
115 const TranslationTableRule **appliedRules, int *appliedRulesCount,
116 int maxAppliedRules);
117 static int
118 makeCorrections(const TranslationTableHeader *table,
119 const DisplayTableHeader *displayTable, int mode, int currentPass,
120 const InString *input, OutString *output, int *posMapping, int *realInlen,
121 int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
122 int *appliedRulesCount, int maxAppliedRules);
123 static int
124 translatePass(const TranslationTableHeader *table, const DisplayTableHeader *displayTable,
125 int mode, int currentPass, const InString *input, OutString *output,
126 int *posMapping, int *realInlen, int *cursorPosition, int *cursorStatus,
127 const TranslationTableRule **appliedRules, int *appliedRulesCount,
128 int maxAppliedRules);
129 static void
130 passSelectRule(const TranslationTableHeader *table, int pos, int currentPass,
131 const InString *input, TranslationTableOpcode *currentOpcode,
132 const TranslationTableRule **currentRule, const widechar **passInstructions,
133 int *passIC, PassRuleMatch *match);
134
135 int EXPORT_CALL
lou_backTranslateString(const char * tableList,const widechar * inbuf,int * inlen,widechar * outbuf,int * outlen,formtype * typeform,char * spacing,int modex)136 lou_backTranslateString(const char *tableList, const widechar *inbuf, int *inlen,
137 widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int modex) {
138 return lou_backTranslate(tableList, inbuf, inlen, outbuf, outlen, typeform, spacing,
139 NULL, NULL, NULL, modex);
140 }
141
142 int EXPORT_CALL
lou_backTranslate(const char * tableList,const widechar * inbuf,int * inlen,widechar * outbuf,int * outlen,formtype * typeform,char * spacing,int * outputPos,int * inputPos,int * cursorPos,int modex)143 lou_backTranslate(const char *tableList, const widechar *inbuf, int *inlen,
144 widechar *outbuf, int *outlen, formtype *typeform, char *spacing, int *outputPos,
145 int *inputPos, int *cursorPos, int modex) {
146 return _lou_backTranslate(tableList, tableList, inbuf, inlen, outbuf, outlen,
147 typeform, spacing, outputPos, inputPos, cursorPos, modex, NULL, NULL);
148 }
149
150 int EXPORT_CALL
_lou_backTranslate(const char * tableList,const char * displayTableList,const widechar * inbuf,int * inlen,widechar * outbuf,int * outlen,formtype * typeform,char * spacing,int * outputPos,int * inputPos,int * cursorPos,int mode,const TranslationTableRule ** rules,int * rulesLen)151 _lou_backTranslate(const char *tableList, const char *displayTableList,
152 const widechar *inbuf, int *inlen, widechar *outbuf, int *outlen,
153 formtype *typeform, char *spacing, int *outputPos, int *inputPos, int *cursorPos,
154 int mode, const TranslationTableRule **rules, int *rulesLen) {
155 const TranslationTableHeader *table;
156 const DisplayTableHeader *displayTable;
157 InString input;
158 OutString output;
159 unsigned char *typebuf = NULL;
160 char *spacebuf;
161 // posMapping contains position mapping info between the output of the current pass
162 // and the initial input. It is 1 longer than the input. The values are monotonically
163 // increasing and can range between -1 and the output length. At the end the position
164 // info is passed to the user as an inputPos and outputPos array. inputPos has the
165 // length of the final output and has values ranging from 0 to inlen-1. outputPos has
166 // the length of the initial input and has values ranging from 0 to outlen-1.
167 int *posMapping = NULL;
168 int *posMapping1;
169 int *posMapping2;
170 int *posMapping3;
171 int cursorPosition;
172 int cursorStatus;
173 const TranslationTableRule **appliedRules;
174 int maxAppliedRules;
175 int appliedRulesCount;
176 int k;
177 int goodTrans = 1;
178 int idx;
179 if (tableList == NULL || inbuf == NULL || inlen == NULL || outbuf == NULL ||
180 outlen == NULL)
181 return 0;
182 if (displayTableList == NULL) displayTableList = tableList;
183 _lou_getTable(tableList, displayTableList, &table, &displayTable);
184 if (table == NULL) return 0;
185
186 if (!_lou_isValidMode(mode))
187 _lou_logMessage(LOU_LOG_ERROR, "Invalid mode parameter: %d", mode);
188
189 if (!stringBufferPool) initStringBufferPool();
190 for (idx = 0; idx < stringBufferPool->size; idx++) releaseStringBuffer(idx);
191 {
192 widechar *passbuf1;
193 int srcmax;
194 k = 0;
195 while (k < *inlen && inbuf[k]) k++;
196 srcmax = k;
197 idx = getStringBuffer(srcmax);
198 passbuf1 = stringBufferPool->buffers[idx];
199 for (k = 0; k < srcmax; k++)
200 if ((mode & dotsIO))
201 passbuf1[k] = inbuf[k] | LOU_DOTS;
202 else
203 passbuf1[k] = _lou_getDotsForChar(inbuf[k], displayTable);
204 passbuf1[srcmax] = _lou_getDotsForChar(' ', displayTable);
205 input = (InString){ .chars = passbuf1, .length = srcmax, .bufferIndex = idx };
206 }
207 idx = getStringBuffer(*outlen);
208 output = (OutString){ .chars = stringBufferPool->buffers[idx],
209 .maxlength = *outlen,
210 .length = 0,
211 .bufferIndex = idx };
212 typebuf = (unsigned char *)typeform;
213 spacebuf = spacing;
214 if (outputPos != NULL)
215 for (k = 0; k < input.length; k++) outputPos[k] = -1;
216 if (cursorPos != NULL)
217 cursorPosition = *cursorPos;
218 else
219 cursorPosition = -1;
220 cursorStatus = 0;
221 if (typebuf != NULL) memset(typebuf, '0', *outlen);
222 if (spacebuf != NULL) memset(spacebuf, '*', *outlen);
223 if (!(posMapping1 = _lou_allocMem(alloc_posMapping1, 0, input.length, *outlen)))
224 return 0;
225 if (table->numPasses > 1 || table->corrections) {
226 if (!(posMapping2 = _lou_allocMem(alloc_posMapping2, 0, input.length, *outlen)))
227 return 0;
228 if (!(posMapping3 = _lou_allocMem(alloc_posMapping3, 0, input.length, *outlen)))
229 return 0;
230 }
231 appliedRulesCount = 0;
232 if (rules != NULL && rulesLen != NULL) {
233 appliedRules = rules;
234 maxAppliedRules = *rulesLen;
235 } else {
236 appliedRules = NULL;
237 maxAppliedRules = 0;
238 }
239
240 posMapping = posMapping1;
241 int currentPass = table->numPasses;
242 int lastPass = table->corrections ? 0 : 1;
243 int *passPosMapping = posMapping;
244 while (1) {
245 int realInlen;
246 switch (currentPass) {
247 case 1:
248 goodTrans = backTranslateString(table, displayTable, mode, currentPass,
249 &input, &output, spacebuf, passPosMapping, &realInlen,
250 &cursorPosition, &cursorStatus, appliedRules, &appliedRulesCount,
251 maxAppliedRules);
252 break;
253 case 0:
254 goodTrans = makeCorrections(table, displayTable, mode, currentPass, &input,
255 &output, passPosMapping, &realInlen, &cursorPosition, &cursorStatus,
256 appliedRules, &appliedRulesCount, maxAppliedRules);
257 break;
258 default:
259 goodTrans = translatePass(table, displayTable, mode, currentPass, &input,
260 &output, passPosMapping, &realInlen, &cursorPosition, &cursorStatus,
261 appliedRules, &appliedRulesCount, maxAppliedRules);
262 break;
263 }
264 passPosMapping[realInlen] = output.length;
265 if (passPosMapping == posMapping) {
266 passPosMapping = posMapping2;
267 } else {
268 int *prevPosMapping = posMapping3;
269 memcpy((int *)prevPosMapping, posMapping, (*inlen + 1) * sizeof(int));
270 for (k = 0; k <= *inlen; k++) {
271 if (prevPosMapping[k] > realInlen) {
272 *inlen = k;
273 posMapping[k] = output.length;
274 break;
275 } else if (prevPosMapping[k] < 0)
276 posMapping[k] = passPosMapping[0];
277 else
278 posMapping[k] = passPosMapping[prevPosMapping[k]];
279 }
280 }
281 currentPass--;
282 if (currentPass >= lastPass && goodTrans) {
283 releaseStringBuffer(input.bufferIndex);
284 input = (InString){ .chars = output.chars,
285 .length = output.length,
286 .bufferIndex = output.bufferIndex };
287 idx = getStringBuffer(*outlen);
288 output = (OutString){ .chars = stringBufferPool->buffers[idx],
289 .maxlength = *outlen,
290 .length = 0,
291 .bufferIndex = idx };
292 continue;
293 }
294 break;
295 }
296 if (goodTrans) {
297 for (k = 0; k < output.length; k++) outbuf[k] = output.chars[k];
298 *outlen = output.length;
299 if (inputPos != NULL) {
300 int inpos = -1;
301 int outpos = -1;
302 for (k = 0; k < *inlen; k++)
303 if (posMapping[k] > outpos) {
304 while (outpos < posMapping[k]) {
305 if (outpos >= 0 && outpos < *outlen)
306 inputPos[outpos] = inpos < 0 ? 0 : inpos;
307 outpos++;
308 }
309 inpos = k;
310 }
311 if (outpos < 0) outpos = 0;
312 while (outpos < *outlen) inputPos[outpos++] = inpos;
313 }
314 if (outputPos != NULL) {
315 for (k = 0; k < *inlen; k++)
316 if (posMapping[k] < 0)
317 outputPos[k] = 0;
318 else if (posMapping[k] > *outlen - 1)
319 outputPos[k] = *outlen - 1;
320 else
321 outputPos[k] = posMapping[k];
322 }
323 }
324 if (cursorPos != NULL && *cursorPos != -1) {
325 if (outputPos != NULL)
326 *cursorPos = outputPos[*cursorPos];
327 else
328 *cursorPos = cursorPosition;
329 }
330 if (rulesLen != NULL) *rulesLen = appliedRulesCount;
331 return goodTrans;
332 }
333
334 static TranslationTableCharacter *
back_findCharOrDots(widechar c,int m,const TranslationTableHeader * table)335 back_findCharOrDots(widechar c, int m, const TranslationTableHeader *table) {
336 /* Look up character or dot pattern in the appropriate
337 * table. */
338 static TranslationTableCharacter noChar = { 0, 0, 0, CTC_Space, 32, 32, 32 };
339 static TranslationTableCharacter noDots = { 0, 0, 0, CTC_Space, LOU_DOTS, LOU_DOTS,
340 LOU_DOTS };
341 TranslationTableCharacter *notFound;
342 TranslationTableCharacter *character;
343 TranslationTableOffset bucket;
344 unsigned long int makeHash = _lou_charHash(c);
345 if (m == 0) {
346 bucket = table->characters[makeHash];
347 notFound = &noChar;
348 } else {
349 bucket = table->dots[makeHash];
350 notFound = &noDots;
351 }
352 while (bucket) {
353 character = (TranslationTableCharacter *)&table->ruleArea[bucket];
354 if (character->realchar == c) return character;
355 bucket = character->next;
356 }
357 notFound->realchar = notFound->uppercase = notFound->lowercase = c;
358 return notFound;
359 }
360
361 static int
checkAttr(const widechar c,const TranslationTableCharacterAttributes a,int m,const TranslationTableHeader * table)362 checkAttr(const widechar c, const TranslationTableCharacterAttributes a, int m,
363 const TranslationTableHeader *table) {
364 static widechar prevc = 0;
365 static TranslationTableCharacterAttributes preva = 0;
366 if (c != prevc) {
367 preva = (back_findCharOrDots(c, m, table))->attributes;
368 prevc = c;
369 }
370 return ((preva & a) ? 1 : 0);
371 }
372
373 static int
compareDots(const widechar * address1,const widechar * address2,int count)374 compareDots(const widechar *address1, const widechar *address2, int count) {
375 int k;
376 if (!count) return 0;
377 for (k = 0; k < count; k++)
378 if (address1[k] != address2[k]) return 0;
379 return 1;
380 }
381
382 static void
back_setBefore(const TranslationTableHeader * table,OutString * output,TranslationTableCharacterAttributes * beforeAttributes)383 back_setBefore(const TranslationTableHeader *table, OutString *output,
384 TranslationTableCharacterAttributes *beforeAttributes) {
385 widechar before = (output->length == 0) ? ' ' : output->chars[output->length - 1];
386 *beforeAttributes = (back_findCharOrDots(before, 0, table))->attributes;
387 }
388
389 static void
back_setAfter(int length,const TranslationTableHeader * table,int pos,const InString * input,TranslationTableCharacterAttributes * afterAttributes)390 back_setAfter(int length, const TranslationTableHeader *table, int pos,
391 const InString *input, TranslationTableCharacterAttributes *afterAttributes) {
392 widechar after = (pos + length < input->length) ? input->chars[pos + length] : ' ';
393 *afterAttributes = (back_findCharOrDots(after, 1, table))->attributes;
394 }
395
396 static int
isBegWord(const TranslationTableHeader * table,OutString * output)397 isBegWord(const TranslationTableHeader *table, OutString *output) {
398 /* See if this is really the beginning of a word. Look at what has
399 * already been translated. */
400 int k;
401 if (output->length == 0) return 1;
402 for (k = output->length - 1; k >= 0; k--) {
403 const TranslationTableCharacter *ch =
404 back_findCharOrDots(output->chars[k], 0, table);
405 if (ch->attributes & CTC_Space) break;
406 if (ch->attributes & (CTC_Letter | CTC_Digit | CTC_Math | CTC_Sign)) return 0;
407 }
408 return 1;
409 }
410
411 static int
isEndWord(const TranslationTableHeader * table,int pos,int mode,const InString * input,int currentDotslen)412 isEndWord(const TranslationTableHeader *table, int pos, int mode, const InString *input,
413 int currentDotslen) {
414 if (mode & partialTrans) return 0;
415 /* See if this is really the end of a word. */
416 int k;
417 const TranslationTableCharacter *dots;
418 TranslationTableOffset testRuleOffset;
419 TranslationTableRule *testRule;
420 for (k = pos + currentDotslen; k < input->length; k++) {
421 int postpuncFound = 0;
422 int TranslationFound = 0;
423 dots = back_findCharOrDots(input->chars[k], 1, table);
424 testRuleOffset = dots->otherRules;
425 if (dots->attributes & CTC_Space) break;
426 if (dots->attributes & CTC_Letter) return 0;
427 while (testRuleOffset) {
428 testRule = (TranslationTableRule *)&table->ruleArea[testRuleOffset];
429 /* #360: Don't treat begword/midword as definite translations here
430 * because we don't know whether they apply yet. Subsequent
431 * input will allow us to determine whether the word continues.
432 */
433 if (testRule->charslen > 1 && testRule->opcode != CTO_BegWord &&
434 testRule->opcode != CTO_MidWord)
435 TranslationFound = 1;
436 if (testRule->opcode == CTO_PostPunc) postpuncFound = 1;
437 if (testRule->opcode == CTO_Hyphen) return 1;
438 testRuleOffset = testRule->dotsnext;
439 }
440 if (TranslationFound && !postpuncFound) return 0;
441 }
442 return 1;
443 }
444 static int
findBrailleIndicatorRule(TranslationTableOffset offset,const TranslationTableHeader * table,int * currentDotslen,TranslationTableOpcode * currentOpcode,const TranslationTableRule ** currentRule)445 findBrailleIndicatorRule(TranslationTableOffset offset,
446 const TranslationTableHeader *table, int *currentDotslen,
447 TranslationTableOpcode *currentOpcode, const TranslationTableRule **currentRule) {
448 if (!offset) return 0;
449 *currentRule = (TranslationTableRule *)&table->ruleArea[offset];
450 *currentOpcode = (*currentRule)->opcode;
451 *currentDotslen = (*currentRule)->dotslen;
452 return 1;
453 }
454
455 static int
handleMultind(const TranslationTableHeader * table,int * currentDotslen,TranslationTableOpcode * currentOpcode,const TranslationTableRule ** currentRule,int * doingMultind,const TranslationTableRule * multindRule)456 handleMultind(const TranslationTableHeader *table, int *currentDotslen,
457 TranslationTableOpcode *currentOpcode, const TranslationTableRule **currentRule,
458 int *doingMultind, const TranslationTableRule *multindRule) {
459 /* Handle multille braille indicators */
460 int found = 0;
461 if (!*doingMultind) return 0;
462 switch (multindRule->charsdots[multindRule->charslen - *doingMultind]) {
463 case CTO_CapsLetterRule: // FIXME: make sure this works
464 found = findBrailleIndicatorRule(table->emphRules[capsRule][letterOffset], table,
465 currentDotslen, currentOpcode, currentRule);
466 break;
467 // NOTE: following fixme is based on the names at the time of
468 // commit f22f91eb510cb4eef33dfb4950a297235dd2f9f1.
469 // FIXME: the next two opcodes were begcaps/endcaps,
470 // and they were aliased to opcodes capsword/capswordstop.
471 // However, the table attributes they use are
472 // table->beginCapitalSign and table->endCapitalSign.
473 // These are actually compiled with firstlettercaps/lastlettercaps.
474 // Which to use here?
475 case CTO_BegCapsWordRule:
476 found = findBrailleIndicatorRule(table->emphRules[capsRule][begWordOffset], table,
477 currentDotslen, currentOpcode, currentRule);
478 break;
479 case CTO_EndCapsWordRule:
480 found = findBrailleIndicatorRule(table->emphRules[capsRule][endWordOffset], table,
481 currentDotslen, currentOpcode, currentRule);
482 break;
483 case CTO_LetterSign:
484 found = findBrailleIndicatorRule(
485 table->letterSign, table, currentDotslen, currentOpcode, currentRule);
486 break;
487 case CTO_NoContractSign:
488 found = findBrailleIndicatorRule(
489 table->noContractSign, table, currentDotslen, currentOpcode, currentRule);
490 break;
491 case CTO_NumberSign:
492 found = findBrailleIndicatorRule(
493 table->numberSign, table, currentDotslen, currentOpcode, currentRule);
494 break;
495 case CTO_EndEmph1PhraseBeforeRule:
496 found = findBrailleIndicatorRule(
497 table->emphRules[emph1Rule][endPhraseBeforeOffset], table, currentDotslen,
498 currentOpcode, currentRule);
499 break;
500 case CTO_BegEmph1Rule:
501 found = findBrailleIndicatorRule(table->emphRules[emph1Rule][begOffset], table,
502 currentDotslen, currentOpcode, currentRule);
503 break;
504 case CTO_EndEmph1Rule:
505 found = findBrailleIndicatorRule(table->emphRules[emph1Rule][endOffset], table,
506 currentDotslen, currentOpcode, currentRule);
507 break;
508 case CTO_EndEmph2PhraseBeforeRule:
509 found = findBrailleIndicatorRule(
510 table->emphRules[emph2Rule][endPhraseBeforeOffset], table, currentDotslen,
511 currentOpcode, currentRule);
512 break;
513 case CTO_BegEmph2Rule:
514 found = findBrailleIndicatorRule(table->emphRules[emph2Rule][begOffset], table,
515 currentDotslen, currentOpcode, currentRule);
516 break;
517 case CTO_EndEmph2Rule:
518 found = findBrailleIndicatorRule(table->emphRules[emph2Rule][endOffset], table,
519 currentDotslen, currentOpcode, currentRule);
520 break;
521 case CTO_EndEmph3PhraseBeforeRule:
522 found = findBrailleIndicatorRule(
523 table->emphRules[emph3Rule][endPhraseBeforeOffset], table, currentDotslen,
524 currentOpcode, currentRule);
525 break;
526 case CTO_BegEmph3Rule:
527 found = findBrailleIndicatorRule(table->emphRules[emph3Rule][begOffset], table,
528 currentDotslen, currentOpcode, currentRule);
529 break;
530 case CTO_EndEmph3Rule:
531 found = findBrailleIndicatorRule(table->emphRules[emph3Rule][endOffset], table,
532 currentDotslen, currentOpcode, currentRule);
533 break;
534 case CTO_BegComp:
535 found = findBrailleIndicatorRule(
536 table->begComp, table, currentDotslen, currentOpcode, currentRule);
537 break;
538 case CTO_EndComp:
539 found = findBrailleIndicatorRule(
540 table->endComp, table, currentDotslen, currentOpcode, currentRule);
541 break;
542 default:
543 found = 0;
544 break;
545 }
546 (*doingMultind)--;
547 return found;
548 }
549
550 static int
551 back_passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
552 TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
553 const widechar **passInstructions, int *passIC, PassRuleMatch *match);
554 static int
555 back_passDoAction(const TranslationTableHeader *table,
556 const DisplayTableHeader *displayTable, int *pos, int mode, const InString *input,
557 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
558 int *nextUpper, int allUpper, int allUpperPhrase,
559 TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
560 const widechar *passInstructions, int passIC, PassRuleMatch match);
561
562 static int
findBackPassRule(const TranslationTableHeader * table,int pos,int currentPass,const InString * input,TranslationTableOpcode * currentOpcode,const TranslationTableRule ** currentRule,const widechar ** passInstructions,int * passIC,PassRuleMatch * match)563 findBackPassRule(const TranslationTableHeader *table, int pos, int currentPass,
564 const InString *input, TranslationTableOpcode *currentOpcode,
565 const TranslationTableRule **currentRule, const widechar **passInstructions,
566 int *passIC, PassRuleMatch *match) {
567 TranslationTableOffset ruleOffset;
568 ruleOffset = table->backPassRules[currentPass];
569
570 while (ruleOffset) {
571 *currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
572 *currentOpcode = (*currentRule)->opcode;
573
574 switch (*currentOpcode) {
575 case CTO_Correct:
576 if (currentPass != 0) goto NEXT_RULE;
577 break;
578 case CTO_Context:
579 if (currentPass != 1) goto NEXT_RULE;
580 break;
581 case CTO_Pass2:
582 if (currentPass != 2) goto NEXT_RULE;
583 break;
584 case CTO_Pass3:
585 if (currentPass != 3) goto NEXT_RULE;
586 break;
587 case CTO_Pass4:
588 if (currentPass != 4) goto NEXT_RULE;
589 break;
590 default:
591 goto NEXT_RULE;
592 }
593
594 if (back_passDoTest(table, pos, input, *currentOpcode, *currentRule,
595 passInstructions, passIC, match))
596 return 1;
597
598 NEXT_RULE:
599 ruleOffset = (*currentRule)->dotsnext;
600 }
601
602 return 0;
603 }
604
605 static void
back_selectRule(const TranslationTableHeader * table,int pos,int mode,const InString * input,OutString * output,int itsANumber,int itsALetter,int * currentDotslen,TranslationTableOpcode * currentOpcode,const TranslationTableRule ** currentRule,TranslationTableOpcode previousOpcode,int * doingMultind,const TranslationTableRule ** multindRule,TranslationTableCharacterAttributes beforeAttributes,const widechar ** passInstructions,int * passIC,PassRuleMatch * patternMatch)606 back_selectRule(const TranslationTableHeader *table, int pos, int mode,
607 const InString *input, OutString *output, int itsANumber, int itsALetter,
608 int *currentDotslen, TranslationTableOpcode *currentOpcode,
609 const TranslationTableRule **currentRule, TranslationTableOpcode previousOpcode,
610 int *doingMultind, const TranslationTableRule **multindRule,
611 TranslationTableCharacterAttributes beforeAttributes,
612 const widechar **passInstructions, int *passIC, PassRuleMatch *patternMatch) {
613 /* check for valid back-translations */
614 int length = input->length - pos;
615 TranslationTableOffset ruleOffset = 0;
616 static TranslationTableRule pseudoRule = { 0 };
617 unsigned long int makeHash = 0;
618 const TranslationTableCharacter *dots =
619 back_findCharOrDots(input->chars[pos], 1, table);
620 int tryThis;
621 if (handleMultind(table, currentDotslen, currentOpcode, currentRule, doingMultind,
622 *multindRule))
623 return;
624 for (tryThis = 0; tryThis < 3; tryThis++) {
625 switch (tryThis) {
626 case 0:
627 if (length < 2 || (itsANumber && (dots->attributes & CTC_LitDigit))) break;
628 /* Hash function optimized for backward translation */
629 makeHash = (unsigned long int)dots->realchar << 8;
630 makeHash += (unsigned long int)(back_findCharOrDots(
631 input->chars[pos + 1], 1, table))
632 ->realchar;
633 makeHash %= HASHNUM;
634 ruleOffset = table->backRules[makeHash];
635 break;
636 case 1:
637 if (!(length >= 1)) break;
638 length = 1;
639 ruleOffset = dots->otherRules;
640 break;
641 case 2: /* No rule found */
642 *currentRule = &pseudoRule;
643 *currentOpcode = pseudoRule.opcode = CTO_None;
644 *currentDotslen = pseudoRule.dotslen = 1;
645 pseudoRule.charsdots[0] = input->chars[pos];
646 pseudoRule.charslen = 0;
647 return;
648 break;
649 }
650 while (ruleOffset) {
651 const widechar *currentDots;
652 *currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
653 *currentOpcode = (*currentRule)->opcode;
654 if (*currentOpcode == CTO_Context) {
655 currentDots = &(*currentRule)->charsdots[0];
656 *currentDotslen = (*currentRule)->charslen;
657 } else {
658 currentDots = &(*currentRule)->charsdots[(*currentRule)->charslen];
659 *currentDotslen = (*currentRule)->dotslen;
660 }
661 if (((*currentDotslen <= length) &&
662 compareDots(&input->chars[pos], currentDots, *currentDotslen))) {
663 TranslationTableCharacterAttributes afterAttributes;
664 /* check this rule */
665 back_setAfter(*currentDotslen, table, pos, input, &afterAttributes);
666 if ((!((*currentRule)->after & ~CTC_EmpMatch) ||
667 (beforeAttributes & (*currentRule)->after)) &&
668 (!((*currentRule)->before & ~CTC_EmpMatch) ||
669 (afterAttributes & (*currentRule)->before))) {
670 switch (*currentOpcode) { /* check validity of this Translation */
671 case CTO_Context:
672 if (back_passDoTest(table, pos, input, *currentOpcode,
673 *currentRule, passInstructions, passIC, patternMatch))
674 return;
675 break;
676 case CTO_Space:
677 case CTO_Digit:
678 case CTO_Letter:
679 case CTO_UpperCase:
680 case CTO_LowerCase:
681 case CTO_Punctuation:
682 case CTO_Math:
683 case CTO_Sign:
684 case CTO_ExactDots:
685 case CTO_NoCross:
686 case CTO_Repeated:
687 case CTO_Replace:
688 case CTO_Hyphen:
689 return;
690 case CTO_LitDigit:
691 if (itsANumber) return;
692 break;
693 case CTO_CapsLetterRule:
694 case CTO_BegCapsRule:
695 case CTO_EndCapsRule:
696 case CTO_BegCapsWordRule:
697 case CTO_EndCapsWordRule:
698 case CTO_BegEmph1Rule:
699 case CTO_EndEmph1Rule:
700 case CTO_BegEmph2Rule:
701 case CTO_EndEmph2Rule:
702 case CTO_BegEmph3Rule:
703 case CTO_EndEmph3Rule:
704 case CTO_NumberRule:
705 case CTO_BegCompRule:
706 case CTO_EndCompRule:
707 return;
708 case CTO_LetterRule:
709 case CTO_NoContractRule:
710 // BF: This is just a heuristic test. During forward translation,
711 // the
712 // nocontractsign is inserted either when in numeric mode and the
713 // next
714 // character is not numeric (CTC_Digit | CTC_LitDigit |
715 // CTC_NumericMode | CTC_MidEndNumericMode),
716 // or when a "contraction" rule is matched and the characters are
717 // preceded and followed by space or punctuation (CTC_Space |
718 // CTC_Punctuation).
719 if (!(beforeAttributes & CTC_Letter) &&
720 (afterAttributes & (CTC_Letter | CTC_Sign)))
721 return;
722 break;
723 case CTO_MultInd:
724 *doingMultind = *currentDotslen;
725 *multindRule = *currentRule;
726 if (handleMultind(table, currentDotslen, currentOpcode,
727 currentRule, doingMultind, *multindRule))
728 return;
729 break;
730 case CTO_LargeSign:
731 return;
732 case CTO_WholeWord:
733 if (mode & partialTrans) break;
734 if (itsALetter || itsANumber) break;
735 case CTO_Contraction:
736 if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
737 ((afterAttributes & CTC_Space) ||
738 isEndWord(table, pos, mode, input,
739 *currentDotslen)))
740 return;
741 break;
742 case CTO_LowWord:
743 if (mode & partialTrans) break;
744 if ((beforeAttributes & CTC_Space) &&
745 (afterAttributes & CTC_Space) &&
746 (previousOpcode != CTO_JoinableWord))
747 return;
748 break;
749 case CTO_JoinNum:
750 case CTO_JoinableWord:
751 if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
752 (!(afterAttributes & CTC_Space) || mode & partialTrans))
753 return;
754 break;
755 case CTO_SuffixableWord:
756 if (beforeAttributes & (CTC_Space | CTC_Punctuation)) return;
757 break;
758 case CTO_PrefixableWord:
759 if ((beforeAttributes &
760 (CTC_Space | CTC_Letter | CTC_Punctuation)) &&
761 isEndWord(table, pos, mode, input, *currentDotslen))
762 return;
763 break;
764 case CTO_BegWord:
765 if ((beforeAttributes & (CTC_Space | CTC_Punctuation)) &&
766 (!isEndWord(table, pos, mode, input, *currentDotslen)))
767 return;
768 break;
769 case CTO_BegMidWord:
770 if ((beforeAttributes &
771 (CTC_Letter | CTC_Space | CTC_Punctuation)) &&
772 (!isEndWord(table, pos, mode, input, *currentDotslen)))
773 return;
774 break;
775 case CTO_PartWord:
776 if (!(beforeAttributes & CTC_LitDigit) &&
777 (beforeAttributes & CTC_Letter ||
778 !isEndWord(table, pos, mode, input,
779 *currentDotslen)))
780 return;
781 break;
782 case CTO_MidWord:
783 if (beforeAttributes & CTC_Letter &&
784 !isEndWord(table, pos, mode, input, *currentDotslen))
785 return;
786 break;
787 case CTO_MidEndWord:
788 if ((beforeAttributes & CTC_Letter)) return;
789 break;
790 case CTO_EndWord:
791 if ((beforeAttributes & CTC_Letter) &&
792 isEndWord(table, pos, mode, input, *currentDotslen))
793 return;
794 break;
795 case CTO_BegNum:
796 if (beforeAttributes & (CTC_Space | CTC_Punctuation) &&
797 (afterAttributes & (CTC_LitDigit | CTC_Sign)))
798 return;
799 break;
800 case CTO_MidNum:
801 if (beforeAttributes & CTC_Digit &&
802 afterAttributes & CTC_LitDigit)
803 return;
804 break;
805 case CTO_EndNum:
806 if (itsANumber && !(afterAttributes & CTC_LitDigit)) return;
807 break;
808 case CTO_DecPoint:
809 if (afterAttributes & (CTC_Digit | CTC_LitDigit)) return;
810 break;
811 case CTO_PrePunc:
812 if (isBegWord(table, output)) return;
813 break;
814
815 case CTO_PostPunc:
816 if (isEndWord(table, pos, mode, input, *currentDotslen)) return;
817 break;
818 case CTO_Always:
819 if ((beforeAttributes & CTC_LitDigit) &&
820 (afterAttributes & CTC_LitDigit) &&
821 (*currentRule)->charslen > 1)
822 break;
823 return;
824
825 case CTO_BackMatch: {
826 widechar *patterns, *pattern;
827
828 // if(dontContract || (mode & noContractions))
829 // break;
830 // if(checkEmphasisChange(0))
831 // break;
832
833 patterns = (widechar *)&table->ruleArea[(*currentRule)->patterns];
834
835 /* check before pattern */
836 pattern = &patterns[1];
837 if (!_lou_pattern_check(
838 input->chars, pos - 1, -1, -1, pattern, table))
839 break;
840
841 /* check after pattern */
842 pattern = &patterns[patterns[0]];
843 if (!_lou_pattern_check(input->chars,
844 pos + (*currentRule)->dotslen, input->length, 1,
845 pattern, table))
846 break;
847
848 return;
849 }
850 default:
851 break;
852 }
853 }
854 } /* Done with checking this rule */
855 ruleOffset = (*currentRule)->dotsnext;
856 }
857 }
858 }
859
860 static int
putchars(const widechar * chars,int count,const TranslationTableHeader * table,OutString * output,int * nextUpper,int allUpper,int allUpperPhrase)861 putchars(const widechar *chars, int count, const TranslationTableHeader *table,
862 OutString *output, int *nextUpper, int allUpper, int allUpperPhrase) {
863 int k = 0;
864 if (!count || (output->length + count) > output->maxlength) return 0;
865 if (*nextUpper) {
866 output->chars[(output->length)++] =
867 (back_findCharOrDots(chars[k++], 0, table))->uppercase;
868 *nextUpper = 0;
869 }
870 if (!allUpper && !allUpperPhrase) {
871 memcpy(&output->chars[output->length], &chars[k], CHARSIZE * (count - k));
872 output->length += count - k;
873 } else
874 for (; k < count; k++)
875 output->chars[(output->length)++] =
876 (back_findCharOrDots(chars[k], 0, table))->uppercase;
877 return 1;
878 }
879
880 static int
back_updatePositions(const widechar * outChars,int inLength,int outLength,const TranslationTableHeader * table,int pos,const InString * input,OutString * output,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase)881 back_updatePositions(const widechar *outChars, int inLength, int outLength,
882 const TranslationTableHeader *table, int pos, const InString *input,
883 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
884 int *nextUpper, int allUpper, int allUpperPhrase) {
885 int k;
886 if ((output->length + outLength) > output->maxlength ||
887 (pos + inLength) > input->length)
888 return 0;
889 if (!*cursorStatus && *cursorPosition >= pos && *cursorPosition < (pos + inLength)) {
890 *cursorPosition = output->length + outLength / 2;
891 *cursorStatus = 1;
892 }
893 for (k = 0; k < inLength; k++) posMapping[pos + k] = output->length;
894 return putchars(
895 outChars, outLength, table, output, nextUpper, allUpper, allUpperPhrase);
896 }
897
898 static int
undefinedDots(widechar dots,int mode,OutString * output,int pos,int * posMapping)899 undefinedDots(widechar dots, int mode, OutString *output, int pos, int *posMapping) {
900 posMapping[pos] = output->length;
901 if (mode & noUndefined) return 1;
902
903 /* Print out dot numbers */
904 const char *buffer = _lou_unknownDots(dots);
905 size_t buflen = strlen(buffer);
906 if ((output->length + buflen) > output->maxlength) return 0;
907
908 for (unsigned int k = 0; k < buflen; k += 1) {
909 output->chars[output->length++] = buffer[k];
910 }
911
912 return 1;
913 }
914
915 static int
putCharacter(widechar dots,const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int pos,int mode,const InString * input,OutString * output,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase)916 putCharacter(widechar dots, const TranslationTableHeader *table,
917 const DisplayTableHeader *displayTable, int pos, int mode, const InString *input,
918 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
919 int *nextUpper, int allUpper, int allUpperPhrase) {
920 /* Output character(s) corresponding to a Unicode braille Character */
921 TranslationTableOffset offset = (back_findCharOrDots(dots, 1, table))->definitionRule;
922 if (offset) {
923 widechar c;
924 const TranslationTableRule *rule =
925 (TranslationTableRule *)&table->ruleArea[offset];
926 if (rule->charslen)
927 return back_updatePositions(&rule->charsdots[0], rule->dotslen,
928 rule->charslen, table, pos, input, output, posMapping, cursorPosition,
929 cursorStatus, nextUpper, allUpper, allUpperPhrase);
930 c = _lou_getCharFromDots(dots, displayTable);
931 if (c == '\0') c = ' ';
932 return back_updatePositions(&c, 1, 1, table, pos, input, output, posMapping,
933 cursorPosition, cursorStatus, nextUpper, allUpper, allUpperPhrase);
934 }
935 return undefinedDots(dots, mode, output, pos, posMapping);
936 }
937
938 static int
putCharacters(const widechar * characters,int count,const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int pos,int mode,const InString * input,OutString * output,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase)939 putCharacters(const widechar *characters, int count, const TranslationTableHeader *table,
940 const DisplayTableHeader *displayTable, int pos, int mode, const InString *input,
941 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
942 int *nextUpper, int allUpper, int allUpperPhrase) {
943 int k;
944 for (k = 0; k < count; k++)
945 if (!putCharacter(characters[k], table, displayTable, pos, mode, input, output,
946 posMapping, cursorPosition, cursorStatus, nextUpper, allUpper,
947 allUpperPhrase))
948 return 0;
949 return 1;
950 }
951
952 static int
insertSpace(const TranslationTableHeader * table,int pos,const InString * input,OutString * output,char * spacebuf,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase)953 insertSpace(const TranslationTableHeader *table, int pos, const InString *input,
954 OutString *output, char *spacebuf, int *posMapping, int *cursorPosition,
955 int *cursorStatus, int *nextUpper, int allUpper, int allUpperPhrase) {
956 widechar c = ' ';
957 if (!back_updatePositions(&c, 1, 1, table, pos, input, output, posMapping,
958 cursorPosition, cursorStatus, nextUpper, allUpper, allUpperPhrase))
959 return 0;
960 if (spacebuf) spacebuf[output->length - 1] = '1';
961 return 1;
962 }
963
964 static int
compareChars(const widechar * address1,const widechar * address2,int count,int m,const TranslationTableHeader * table)965 compareChars(const widechar *address1, const widechar *address2, int count, int m,
966 const TranslationTableHeader *table) {
967 int k;
968 if (!count) return 0;
969 for (k = 0; k < count; k++)
970 if ((back_findCharOrDots(address1[k], m, table))->lowercase !=
971 (back_findCharOrDots(address2[k], m, table))->lowercase)
972 return 0;
973 return 1;
974 }
975
976 static int
makeCorrections(const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int mode,int currentPass,const InString * input,OutString * output,int * posMapping,int * realInlen,int * cursorPosition,int * cursorStatus,const TranslationTableRule ** appliedRules,int * appliedRulesCount,int maxAppliedRules)977 makeCorrections(const TranslationTableHeader *table,
978 const DisplayTableHeader *displayTable, int mode, int currentPass,
979 const InString *input, OutString *output, int *posMapping, int *realInlen,
980 int *cursorPosition, int *cursorStatus, const TranslationTableRule **appliedRules,
981 int *appliedRulesCount, int maxAppliedRules) {
982 int pos;
983 int nextUpper = 0;
984 int allUpper = 0;
985 int allUpperPhrase = 0;
986 if (!table->corrections) return 1;
987 pos = 0;
988 output->length = 0;
989 _lou_resetPassVariables();
990 while (pos < input->length) {
991 TranslationTableOpcode currentOpcode;
992 const TranslationTableRule *currentRule; /* pointer to current rule in table */
993 const widechar *passInstructions;
994 int passIC; /* Instruction counter */
995 PassRuleMatch patternMatch;
996 int length = input->length - pos;
997 const TranslationTableCharacter *character =
998 back_findCharOrDots(input->chars[pos], 0, table);
999 const TranslationTableCharacter *character2;
1000 int tryThis = 0;
1001 if (!findBackPassRule(table, pos, currentPass, input, ¤tOpcode,
1002 ¤tRule, &passInstructions, &passIC, &patternMatch))
1003 while (tryThis < 3) {
1004 TranslationTableOffset ruleOffset = 0;
1005 unsigned long int makeHash = 0;
1006 switch (tryThis) {
1007 case 0:
1008 if (!(length >= 2)) break;
1009 makeHash = (unsigned long int)character->lowercase << 8;
1010 character2 = back_findCharOrDots(input->chars[pos + 1], 0, table);
1011 makeHash += (unsigned long int)character2->lowercase;
1012 makeHash %= HASHNUM;
1013 ruleOffset = table->forRules[makeHash];
1014 break;
1015 case 1:
1016 if (!(length >= 1)) break;
1017 length = 1;
1018 ruleOffset = character->otherRules;
1019 break;
1020 case 2: /* No rule found */
1021 currentOpcode = CTO_Always;
1022 ruleOffset = 0;
1023 break;
1024 }
1025 while (ruleOffset) {
1026 currentRule = (TranslationTableRule *)&table->ruleArea[ruleOffset];
1027 currentOpcode = currentRule->opcode;
1028 int currentCharslen = currentRule->charslen;
1029 if (tryThis == 1 ||
1030 (currentCharslen <= length &&
1031 compareChars(¤tRule->charsdots[0],
1032 &input->chars[pos], currentCharslen, 0,
1033 table))) {
1034 if (currentOpcode == CTO_Correct &&
1035 back_passDoTest(table, pos, input, currentOpcode,
1036 currentRule, &passInstructions, &passIC,
1037 &patternMatch)) {
1038 tryThis = 4;
1039 break;
1040 }
1041 }
1042 ruleOffset = currentRule->dotsnext;
1043 }
1044 tryThis++;
1045 }
1046 switch (currentOpcode) {
1047 case CTO_Always:
1048 if (output->length >= output->maxlength) goto failure;
1049 posMapping[pos] = output->length;
1050 output->chars[(output->length)++] = input->chars[pos++];
1051 break;
1052 case CTO_Correct:
1053 if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
1054 appliedRules[(*appliedRulesCount)++] = currentRule;
1055 if (!back_passDoAction(table, displayTable, &pos, mode, input, output,
1056 posMapping, cursorPosition, cursorStatus, &nextUpper, allUpper,
1057 allUpperPhrase, currentOpcode, currentRule, passInstructions,
1058 passIC, patternMatch))
1059 goto failure;
1060 break;
1061 default:
1062 break;
1063 }
1064 }
1065 failure:
1066 *realInlen = pos;
1067 return 1;
1068 }
1069
1070 static int
backTranslateString(const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int mode,int currentPass,const InString * input,OutString * output,char * spacebuf,int * posMapping,int * realInlen,int * cursorPosition,int * cursorStatus,const TranslationTableRule ** appliedRules,int * appliedRulesCount,int maxAppliedRules)1071 backTranslateString(const TranslationTableHeader *table,
1072 const DisplayTableHeader *displayTable, int mode, int currentPass,
1073 const InString *input, OutString *output, char *spacebuf, int *posMapping,
1074 int *realInlen, int *cursorPosition, int *cursorStatus,
1075 const TranslationTableRule **appliedRules, int *appliedRulesCount,
1076 int maxAppliedRules) {
1077 int pos;
1078 int nextUpper;
1079 int allUpper;
1080 int allUpperPhrase;
1081 int itsANumber;
1082 int itsALetter;
1083 /* Back translation */
1084 int srcword = 0;
1085 int destword = 0; /* last word translated */
1086 TranslationTableOpcode previousOpcode;
1087 int doingMultind = 0;
1088 const TranslationTableRule *multindRule;
1089 _lou_resetPassVariables();
1090 translation_direction = 0;
1091 nextUpper = allUpper = allUpperPhrase = itsANumber = itsALetter = 0;
1092 previousOpcode = CTO_None;
1093 pos = output->length = 0;
1094 while (pos < input->length) {
1095 /* the main translation loop */
1096 int currentDotslen; /* length of current find string */
1097 TranslationTableOpcode currentOpcode;
1098 const TranslationTableRule *currentRule; /* pointer to current rule in table */
1099 TranslationTableCharacterAttributes beforeAttributes;
1100 const widechar *passInstructions;
1101 int passIC; /* Instruction counter */
1102 PassRuleMatch patternMatch;
1103 back_setBefore(table, output, &beforeAttributes);
1104 if ((allUpper == 1) && (beforeAttributes & CTC_UpperCase))
1105 // Capsword in progress
1106 allUpper = 2;
1107 else if ((allUpper == 2) && !(beforeAttributes & CTC_UpperCase) &&
1108 !(beforeAttributes & CTC_CapsMode))
1109 // terminate capsword
1110 allUpper = 0;
1111 if ((itsANumber == 2) && output->length > 0 &&
1112 !(beforeAttributes & CTC_LitDigit) &&
1113 !(beforeAttributes & CTC_NumericMode) &&
1114 !(beforeAttributes & CTC_MidEndNumericMode))
1115 itsANumber = 0;
1116 back_selectRule(table, pos, mode, input, output, itsANumber, itsALetter,
1117 ¤tDotslen, ¤tOpcode, ¤tRule, previousOpcode,
1118 &doingMultind, &multindRule, beforeAttributes, &passInstructions, &passIC,
1119 &patternMatch);
1120 if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
1121 appliedRules[(*appliedRulesCount)++] = currentRule;
1122 /* processing before replacement */
1123 switch (currentOpcode) {
1124 case CTO_LargeSign:
1125 if (previousOpcode == CTO_LargeSign)
1126 if (!insertSpace(table, pos, input, output, spacebuf, posMapping,
1127 cursorPosition, cursorStatus, &nextUpper, allUpper,
1128 allUpperPhrase))
1129 goto failure;
1130 break;
1131 case CTO_CapsLetterRule:
1132 nextUpper = 1;
1133 allUpper = 0;
1134 itsANumber = 0;
1135 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1136 continue;
1137 break;
1138 case CTO_BegCapsWordRule:
1139 allUpper = 1;
1140 itsANumber = 0;
1141 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1142 continue;
1143 break;
1144 case CTO_BegCapsRule:
1145 allUpperPhrase = 1;
1146 itsANumber = 0;
1147 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1148 continue;
1149 break;
1150 case CTO_EndCapsWordRule:
1151 allUpper = 0;
1152 itsANumber = 0;
1153 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1154 continue;
1155 break;
1156 case CTO_EndCapsRule:
1157 allUpperPhrase = 0;
1158 itsANumber = 0;
1159 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1160 continue;
1161 break;
1162 case CTO_LetterRule:
1163 case CTO_NoContractRule:
1164 itsALetter = 1;
1165 itsANumber = 0;
1166 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1167 continue;
1168 break;
1169 case CTO_NumberRule:
1170 itsANumber = 1; // Starting number
1171 allUpper = 0;
1172 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1173 continue;
1174 break;
1175 case CTO_LitDigit:
1176 itsANumber = 2; // In the middle of a number
1177 break;
1178 case CTO_BegCompRule:
1179 itsANumber = 0;
1180 case CTO_BegEmph1Rule:
1181 case CTO_BegEmph2Rule:
1182 case CTO_BegEmph3Rule:
1183 case CTO_EndEmph1Rule:
1184 case CTO_EndEmph2Rule:
1185 case CTO_EndEmph3Rule:
1186 case CTO_EndCompRule:
1187 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1188 continue;
1189 break;
1190
1191 default:
1192 break;
1193 }
1194
1195 /* replacement processing */
1196 switch (currentOpcode) {
1197 case CTO_Context:
1198 if (!back_passDoAction(table, displayTable, &pos, mode, input, output,
1199 posMapping, cursorPosition, cursorStatus, &nextUpper, allUpper,
1200 allUpperPhrase, currentOpcode, currentRule, passInstructions,
1201 passIC, patternMatch))
1202 return 0;
1203 break;
1204 case CTO_Replace:
1205 while (currentDotslen-- > 0) posMapping[pos++] = output->length;
1206 if (!putCharacters(¤tRule->charsdots[0], currentRule->charslen, table,
1207 displayTable, pos, mode, input, output, posMapping,
1208 cursorPosition, cursorStatus, &nextUpper, allUpper,
1209 allUpperPhrase))
1210 goto failure;
1211 break;
1212 case CTO_None:
1213 if (!undefinedDots(input->chars[pos], mode, output, pos, posMapping))
1214 goto failure;
1215 pos++;
1216 break;
1217 case CTO_BegNum:
1218 itsANumber = 1;
1219 goto insertChars;
1220 case CTO_EndNum:
1221 itsANumber = 0;
1222 goto insertChars;
1223 case CTO_Space:
1224 itsALetter = itsANumber = allUpper = nextUpper = 0;
1225 goto insertChars;
1226 default:
1227 insertChars:
1228 if (currentRule->charslen) {
1229 if (!back_updatePositions(¤tRule->charsdots[0],
1230 currentRule->dotslen, currentRule->charslen, table, pos,
1231 input, output, posMapping, cursorPosition, cursorStatus,
1232 &nextUpper, allUpper, allUpperPhrase))
1233 goto failure;
1234 pos += currentDotslen;
1235 } else {
1236 int srclim = pos + currentDotslen;
1237 while (1) {
1238 if (!putCharacter(input->chars[pos], table, displayTable, pos, mode,
1239 input, output, posMapping, cursorPosition, cursorStatus,
1240 &nextUpper, allUpper, allUpperPhrase))
1241 goto failure;
1242 if (++pos == srclim) break;
1243 }
1244 }
1245 }
1246
1247 /* processing after replacement */
1248 switch (currentOpcode) {
1249 case CTO_JoinNum:
1250 case CTO_JoinableWord:
1251 if (!insertSpace(table, pos, input, output, spacebuf, posMapping,
1252 cursorPosition, cursorStatus, &nextUpper, allUpper,
1253 allUpperPhrase))
1254 goto failure;
1255 break;
1256 default:
1257 passSelectRule(table, pos, currentPass, input, ¤tOpcode, ¤tRule,
1258 &passInstructions, &passIC, &patternMatch);
1259 if (currentOpcode == CTO_Context) {
1260 back_passDoAction(table, displayTable, &pos, mode, input, output,
1261 posMapping, cursorPosition, cursorStatus, &nextUpper, allUpper,
1262 allUpperPhrase, currentOpcode, currentRule, passInstructions,
1263 passIC, patternMatch);
1264 }
1265 break;
1266 }
1267 if (((pos > 0) && checkAttr(input->chars[pos - 1], CTC_Space, 1, table) &&
1268 (currentOpcode != CTO_JoinableWord))) {
1269 srcword = pos;
1270 destword = output->length;
1271 }
1272 if ((currentOpcode >= CTO_Always && currentOpcode <= CTO_None) ||
1273 (currentOpcode >= CTO_Digit && currentOpcode <= CTO_LitDigit))
1274 previousOpcode = currentOpcode;
1275 } /* end of translation loop */
1276 failure:
1277
1278 if (destword != 0 && pos < input->length &&
1279 !checkAttr(input->chars[pos], CTC_Space, 1, table)) {
1280 pos = srcword;
1281 output->length = destword;
1282 }
1283 if (pos < input->length) {
1284 while (checkAttr(input->chars[pos], CTC_Space, 1, table))
1285 if (++pos == input->length) break;
1286 }
1287 *realInlen = pos;
1288 return 1;
1289 } /* translation completed */
1290
1291 /* Multipass translation */
1292
1293 static int
matchCurrentInput(const InString * input,int pos,const widechar * passInstructions,int passIC)1294 matchCurrentInput(
1295 const InString *input, int pos, const widechar *passInstructions, int passIC) {
1296 int k;
1297 int kk = pos;
1298 for (k = passIC + 2; k < passIC + 2 + passInstructions[passIC + 1]; k++)
1299 if (passInstructions[k] != input->chars[kk++]) return 0;
1300 return 1;
1301 }
1302
1303 static int
back_swapTest(const TranslationTableHeader * table,const InString * input,int * pos,const widechar * passInstructions,int passIC)1304 back_swapTest(const TranslationTableHeader *table, const InString *input, int *pos,
1305 const widechar *passInstructions, int passIC) {
1306 int curLen;
1307 int curTest;
1308 int curSrc = *pos;
1309 TranslationTableOffset swapRuleOffset;
1310 TranslationTableRule *swapRule;
1311 swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
1312 swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
1313 for (curLen = 0; curLen < passInstructions[passIC] + 3; curLen++) {
1314 for (curTest = 0; curTest < swapRule->charslen; curTest++) {
1315 if (input->chars[curSrc] == swapRule->charsdots[curTest]) break;
1316 }
1317 if (curTest == swapRule->charslen) return 0;
1318 curSrc++;
1319 }
1320 if (passInstructions[passIC + 2] == passInstructions[passIC + 3]) {
1321 *pos = curSrc;
1322 return 1;
1323 }
1324 while (curLen < passInstructions[passIC + 4]) {
1325 for (curTest = 0; curTest < swapRule->charslen; curTest++) {
1326 if (input->chars[curSrc] != swapRule->charsdots[curTest]) break;
1327 }
1328 if (curTest < swapRule->charslen)
1329 if (curTest < swapRule->charslen) {
1330 *pos = curSrc;
1331 return 1;
1332 }
1333 curSrc++;
1334 curLen++;
1335 }
1336 *pos = curSrc;
1337 return 1;
1338 }
1339
1340 static int
back_swapReplace(int start,int end,const TranslationTableHeader * table,const InString * input,OutString * output,int * posMapping,const widechar * passInstructions,int passIC)1341 back_swapReplace(int start, int end, const TranslationTableHeader *table,
1342 const InString *input, OutString *output, int *posMapping,
1343 const widechar *passInstructions, int passIC) {
1344 TranslationTableOffset swapRuleOffset;
1345 TranslationTableRule *swapRule;
1346 widechar *replacements;
1347 int p;
1348 int lastPos = 0;
1349 int lastRep = 0;
1350 swapRuleOffset = (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
1351 swapRule = (TranslationTableRule *)&table->ruleArea[swapRuleOffset];
1352 replacements = &swapRule->charsdots[swapRule->charslen];
1353 for (p = start; p < end; p++) {
1354 int rep;
1355 int test;
1356 int k;
1357 for (test = 0; test < swapRule->charslen; test++)
1358 if (input->chars[p] == swapRule->charsdots[test]) break;
1359 if (test == swapRule->charslen) return p;
1360 if (test >= lastRep) {
1361 k = lastPos;
1362 rep = lastRep;
1363 } else {
1364 k = 0;
1365 rep = 0;
1366 }
1367 while (k < swapRule->dotslen) {
1368 if (rep == test) {
1369 int l = replacements[k] - 1;
1370 if (output->length + l >= output->maxlength) return 0;
1371 posMapping[p] = output->length;
1372 memcpy(&output->chars[output->length], &replacements[k + 1],
1373 l * CHARSIZE);
1374 output->length += l;
1375 lastPos = k;
1376 lastRep = rep;
1377 break;
1378 }
1379 rep++;
1380 k += replacements[k];
1381 }
1382 }
1383 return p;
1384 }
1385
1386 static int
back_passDoTest(const TranslationTableHeader * table,int pos,const InString * input,TranslationTableOpcode currentOpcode,const TranslationTableRule * currentRule,const widechar ** passInstructions,int * passIC,PassRuleMatch * match)1387 back_passDoTest(const TranslationTableHeader *table, int pos, const InString *input,
1388 TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
1389 const widechar **passInstructions, int *passIC, PassRuleMatch *match) {
1390 int k;
1391 int m;
1392 int not = 0;
1393 TranslationTableCharacterAttributes attributes;
1394 *passInstructions = ¤tRule->charsdots[currentRule->charslen];
1395 *passIC = 0;
1396 match->startMatch = match->endMatch = pos;
1397 match->startReplace = -1;
1398 if (currentOpcode == CTO_Correct)
1399 m = 0;
1400 else
1401 m = 1;
1402 while (*passIC < currentRule->dotslen) {
1403 int itsTrue = 1;
1404 if (pos > input->length) return 0;
1405 switch ((*passInstructions)[*passIC]) {
1406 case pass_first:
1407 if (pos != 0) itsTrue = 0;
1408 (*passIC)++;
1409 break;
1410 case pass_last:
1411 if (pos != input->length) itsTrue = 0;
1412 (*passIC)++;
1413 break;
1414 case pass_lookback:
1415 pos -= (*passInstructions)[*passIC + 1];
1416 if (pos < 0) {
1417 pos = 0;
1418 itsTrue = 0;
1419 }
1420 *passIC += 2;
1421 break;
1422 case pass_not:
1423 not = !not;
1424 (*passIC)++;
1425 continue;
1426 case pass_string:
1427 case pass_dots:
1428 itsTrue = matchCurrentInput(input, pos, *passInstructions, *passIC);
1429 pos += (*passInstructions)[*passIC + 1];
1430 *passIC += (*passInstructions)[*passIC + 1] + 2;
1431 break;
1432 case pass_startReplace:
1433 match->startReplace = pos;
1434 (*passIC)++;
1435 break;
1436 case pass_endReplace:
1437 match->endReplace = pos;
1438 (*passIC)++;
1439 break;
1440 case pass_attributes:
1441 attributes = ((*passInstructions)[*passIC + 1] << 16) |
1442 (*passInstructions)[*passIC + 2];
1443 for (k = 0; k < (*passInstructions)[*passIC + 3]; k++) {
1444 if (pos >= input->length) {
1445 itsTrue = 0;
1446 break;
1447 }
1448 if (!(back_findCharOrDots(input->chars[pos], m, table)->attributes &
1449 attributes)) {
1450 itsTrue = 0;
1451 break;
1452 }
1453 pos++;
1454 }
1455 if (itsTrue) {
1456 for (k = (*passInstructions)[*passIC + 3];
1457 k < (*passInstructions)[*passIC + 4] && pos < input->length;
1458 k++) {
1459 if (!(back_findCharOrDots(input->chars[pos], m, table)->attributes &
1460 attributes))
1461 break;
1462 pos++;
1463 }
1464 }
1465 *passIC += 5;
1466 break;
1467 case pass_swap:
1468 itsTrue = back_swapTest(table, input, &pos, *passInstructions, *passIC);
1469 *passIC += 5;
1470 break;
1471 case pass_endTest: {
1472 (*passIC)++;
1473 match->endMatch = pos;
1474 if (match->startReplace == -1) {
1475 match->startReplace = match->startMatch;
1476 match->endReplace = match->endMatch;
1477 }
1478 return 1;
1479 break;
1480 }
1481 default:
1482 if (_lou_handlePassVariableTest(*passInstructions, passIC, &itsTrue)) break;
1483 return 0;
1484 }
1485 if ((!not&&!itsTrue) || (not&&itsTrue)) return 0;
1486 not = 0;
1487 }
1488 return 1;
1489 }
1490
1491 static int
copyCharacters(int from,int to,const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int mode,const InString * input,OutString * output,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase,TranslationTableOpcode currentOpcode)1492 copyCharacters(int from, int to, const TranslationTableHeader *table,
1493 const DisplayTableHeader *displayTable, int mode, const InString *input,
1494 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
1495 int *nextUpper, int allUpper, int allUpperPhrase,
1496 TranslationTableOpcode currentOpcode) {
1497 if (currentOpcode == CTO_Context) {
1498 while (from < to) {
1499 if (!putCharacter(input->chars[from], table, displayTable, from, mode, input,
1500 output, posMapping, cursorPosition, cursorStatus, nextUpper,
1501 allUpper, allUpperPhrase))
1502 return 0;
1503 from++;
1504 }
1505 } else {
1506 if (to > from) {
1507 if ((output->length + to - from) > output->maxlength) return 0;
1508 while (to > from) {
1509 posMapping[from] = output->length;
1510 output->chars[output->length] = input->chars[from];
1511 output->length++;
1512 from++;
1513 }
1514 }
1515 }
1516
1517 return 1;
1518 }
1519
1520 static int
back_passDoAction(const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int * pos,int mode,const InString * input,OutString * output,int * posMapping,int * cursorPosition,int * cursorStatus,int * nextUpper,int allUpper,int allUpperPhrase,TranslationTableOpcode currentOpcode,const TranslationTableRule * currentRule,const widechar * passInstructions,int passIC,PassRuleMatch match)1521 back_passDoAction(const TranslationTableHeader *table,
1522 const DisplayTableHeader *displayTable, int *pos, int mode, const InString *input,
1523 OutString *output, int *posMapping, int *cursorPosition, int *cursorStatus,
1524 int *nextUpper, int allUpper, int allUpperPhrase,
1525 TranslationTableOpcode currentOpcode, const TranslationTableRule *currentRule,
1526 const widechar *passInstructions, int passIC, PassRuleMatch match) {
1527 int k;
1528 int destStartMatch = output->length;
1529 int destStartReplace;
1530 int newPos = match.endReplace;
1531
1532 if (!copyCharacters(match.startMatch, match.startReplace, table, displayTable, mode,
1533 input, output, posMapping, cursorPosition, cursorStatus, nextUpper,
1534 allUpper, allUpperPhrase, currentOpcode))
1535 return 0;
1536 destStartReplace = output->length;
1537
1538 for (k = match.startReplace; k < match.endReplace; k++)
1539 posMapping[k] = output->length;
1540 while (passIC < currentRule->dotslen) switch (passInstructions[passIC]) {
1541 case pass_string:
1542 case pass_dots:
1543 if ((output->length + passInstructions[passIC + 1]) > output->maxlength)
1544 return 0;
1545 memcpy(&output->chars[output->length], &passInstructions[passIC + 2],
1546 passInstructions[passIC + 1] * sizeof(*output->chars));
1547 output->length += passInstructions[passIC + 1];
1548 passIC += passInstructions[passIC + 1] + 2;
1549 break;
1550 case pass_swap:
1551 if (!back_swapReplace(match.startReplace, match.endReplace, table, input,
1552 output, posMapping, passInstructions, passIC))
1553 return 0;
1554 passIC += 3;
1555 break;
1556 case pass_omit:
1557 passIC++;
1558 break;
1559 case pass_copy: {
1560 int count = destStartReplace - destStartMatch;
1561 if (count > 0) {
1562 memmove(&output->chars[destStartMatch], &output->chars[destStartReplace],
1563 count * sizeof(*output->chars));
1564 output->length -= count;
1565 destStartReplace = destStartMatch;
1566 }
1567 }
1568
1569 if (!copyCharacters(match.startReplace, match.endReplace, table, displayTable,
1570 mode, input, output, posMapping, cursorPosition, cursorStatus,
1571 nextUpper, allUpper, allUpperPhrase, currentOpcode))
1572 return 0;
1573 newPos = match.endMatch;
1574 passIC++;
1575 break;
1576 default:
1577 if (_lou_handlePassVariableAction(passInstructions, &passIC)) break;
1578 return 0;
1579 }
1580 *pos = newPos;
1581 return 1;
1582 }
1583
1584 static void
passSelectRule(const TranslationTableHeader * table,int pos,int currentPass,const InString * input,TranslationTableOpcode * currentOpcode,const TranslationTableRule ** currentRule,const widechar ** passInstructions,int * passIC,PassRuleMatch * match)1585 passSelectRule(const TranslationTableHeader *table, int pos, int currentPass,
1586 const InString *input, TranslationTableOpcode *currentOpcode,
1587 const TranslationTableRule **currentRule, const widechar **passInstructions,
1588 int *passIC, PassRuleMatch *match) {
1589 if (!findBackPassRule(table, pos, currentPass, input, currentOpcode, currentRule,
1590 passInstructions, passIC, match)) {
1591 *currentOpcode = CTO_Always;
1592 }
1593 }
1594
1595 static int
translatePass(const TranslationTableHeader * table,const DisplayTableHeader * displayTable,int mode,int currentPass,const InString * input,OutString * output,int * posMapping,int * realInlen,int * cursorPosition,int * cursorStatus,const TranslationTableRule ** appliedRules,int * appliedRulesCount,int maxAppliedRules)1596 translatePass(const TranslationTableHeader *table, const DisplayTableHeader *displayTable,
1597 int mode, int currentPass, const InString *input, OutString *output,
1598 int *posMapping, int *realInlen, int *cursorPosition, int *cursorStatus,
1599 const TranslationTableRule **appliedRules, int *appliedRulesCount,
1600 int maxAppliedRules) {
1601 int pos;
1602 int nextUpper = 0;
1603 int allUpper = 0;
1604 int allUpperPhrase = 0;
1605 pos = output->length = 0;
1606 _lou_resetPassVariables();
1607 while (pos < input->length) { /* the main multipass translation loop */
1608 TranslationTableOpcode currentOpcode;
1609 const TranslationTableRule *currentRule; /* pointer to current rule in table */
1610 const widechar *passInstructions;
1611 int passIC; /* Instruction counter */
1612 PassRuleMatch patternMatch;
1613 passSelectRule(table, pos, currentPass, input, ¤tOpcode, ¤tRule,
1614 &passInstructions, &passIC, &patternMatch);
1615 switch (currentOpcode) {
1616 case CTO_Pass2:
1617 case CTO_Pass3:
1618 case CTO_Pass4:
1619 if (appliedRules != NULL && *appliedRulesCount < maxAppliedRules)
1620 appliedRules[(*appliedRulesCount)++] = currentRule;
1621 if (!back_passDoAction(table, displayTable, &pos, mode, input, output,
1622 posMapping, cursorPosition, cursorStatus, &nextUpper, allUpper,
1623 allUpperPhrase, currentOpcode, currentRule, passInstructions,
1624 passIC, patternMatch))
1625 goto failure;
1626 break;
1627 case CTO_Always:
1628 if ((output->length + 1) > output->maxlength) goto failure;
1629 posMapping[pos] = output->length;
1630 output->chars[(output->length)++] = input->chars[pos++];
1631 break;
1632 default:
1633 goto failure;
1634 }
1635 }
1636 failure:
1637 if (pos < input->length) {
1638 while (checkAttr(input->chars[pos], CTC_Space, 1, table))
1639 if (++pos == input->length) break;
1640 }
1641 *realInlen = pos;
1642 return 1;
1643 }
1644