1 /* liblouis Braille Translation and Back-Translation Library
2
3 Copyright (C) 2017 Bert Frees
4
5 This file is part of liblouis.
6
7 liblouis is free software: you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 2.1 of the License, or
10 (at your option) any later version.
11
12 liblouis is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <stdio.h>
22 #include <string.h>
23 #include <stdlib.h>
24 #include "internal.h"
25
26 static const TranslationTableHeader *table;
27 static const DisplayTableHeader *displayTable;
28
29 extern void
loadTable(const char * tableList)30 loadTable(const char *tableList) {
31 _lou_getTable(tableList, tableList, &table, &displayTable);
32 }
33
34 extern int
hyphenationEnabled()35 hyphenationEnabled() {
36 return table->hyphenStatesArray;
37 }
38
39 extern int
isLetter(widechar c)40 isLetter(widechar c) {
41 static unsigned long int hash;
42 static TranslationTableOffset offset;
43 static TranslationTableCharacter *character;
44 hash = _lou_charHash(c);
45 offset = table->characters[hash];
46 while (offset) {
47 character = (TranslationTableCharacter *)&table->ruleArea[offset];
48 if (character->value == c) return character->attributes & CTC_Letter;
49 offset = character->next;
50 }
51 return 0;
52 }
53
54 extern widechar
toLowercase(widechar c)55 toLowercase(widechar c) {
56 static TranslationTableOffset offset;
57 static TranslationTableCharacter *character;
58 offset = table->characters[_lou_charHash(c)];
59 while (offset) {
60 character = (TranslationTableCharacter *)&table->ruleArea[offset];
61 if (character->value == c) {
62 if (character->mode & CTC_UpperCase) {
63 const TranslationTableCharacter *c = character;
64 if (c->basechar)
65 c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
66 while (1) {
67 if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
68 (character->mode & ~CTC_UpperCase))
69 return c->value;
70 if (!c->linked) break;
71 c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
72 }
73 }
74 return character->value;
75 }
76 offset = character->next;
77 }
78 return c;
79 }
80
81 extern void
toDotPattern(widechar * braille,char * pattern)82 toDotPattern(widechar *braille, char *pattern) {
83 int length;
84 widechar *dots;
85 int i;
86 for (length = 0; braille[length]; length++)
87 ;
88 dots = (widechar *)malloc((length + 1) * sizeof(widechar));
89 for (i = 0; i < length; i++) dots[i] = _lou_getDotsForChar(braille[i], displayTable);
90 strcpy(pattern, _lou_showDots(dots, length));
91 free(dots);
92 }
93
94 extern int
printRule(TranslationTableRule * rule,widechar * rule_string)95 printRule(TranslationTableRule *rule, widechar *rule_string) {
96 switch (rule->opcode) {
97 case CTO_Context:
98 case CTO_Correct:
99 case CTO_SwapCd:
100 case CTO_SwapDd:
101 case CTO_Pass2:
102 case CTO_Pass3:
103 case CTO_Pass4:
104 return 0;
105 default: {
106 int l = 0;
107 if (rule->nocross)
108 for (char *c = "nocross "; *c; c++) rule_string[l++] = *c;
109 const char *opcode = _lou_findOpcodeName(rule->opcode);
110 for (size_t k = 0; k < strlen(opcode); k++) rule_string[l++] = opcode[k];
111 rule_string[l++] = '\t';
112 for (int k = 0; k < rule->charslen; k++) rule_string[l++] = rule->charsdots[k];
113 rule_string[l++] = '\t';
114 for (int k = 0; k < rule->dotslen; k++) {
115 rule_string[l] = _lou_getCharForDots(
116 rule->charsdots[rule->charslen + k], displayTable);
117 if (rule_string[l] == '\0') {
118 // if a dot pattern can not be displayed, print an error message
119 char *message = (char *)malloc(50 * sizeof(char));
120 sprintf(message, "ERROR: provide a display rule for dots %s",
121 _lou_showDots(&rule->charsdots[rule->charslen + k], 1));
122 l = 0;
123 while (message[l]) {
124 rule_string[l] = message[l];
125 l++;
126 }
127 rule_string[l++] = '\0';
128 free(message);
129 return 1;
130 }
131 l++;
132 }
133 rule_string[l++] = '\0';
134 return 1;
135 }
136 }
137 }
138
139 #define DEBUG 0
140
141 #if DEBUG
142 #define debug(fmt, ...) \
143 do { \
144 if (DEBUG) printf("%*s" fmt "\n", debug_indent, "", ##__VA_ARGS__); \
145 } while (0)
146 #else
147 #define debug(fmt, ...)
148 #endif
149
150 static int
find_matching_rules(widechar * text,int text_len,widechar * braille,int braille_len,char * data,int clear_data)151 find_matching_rules(widechar *text, int text_len, widechar *braille, int braille_len,
152 char *data, int clear_data) {
153 TranslationTableOffset offset;
154 TranslationTableRule *rule;
155 TranslationTableCharacter *character;
156 char *data_save;
157 int hash_len, k;
158 #if DEBUG
159 static int initial_text_len = 0;
160 int debug_indent = 0;
161 if (data[-1] == '^') {
162 initial_text_len = text_len;
163 for (k = 0; k < text_len; k++) printf("%c", text[k]);
164 printf(" <=> ");
165 for (k = 0; k < braille_len; k++) printf("%c", braille[k]);
166 printf("\n");
167 } else
168 debug_indent = initial_text_len - text_len;
169 #endif
170
171 /* finish */
172 if (text_len == 0 && braille_len == 0) {
173 data[-1] = '$';
174 return 1;
175 }
176
177 /* save data */
178 data_save = (char *)malloc(text_len * sizeof(char));
179 memcpy(data_save, data, text_len);
180
181 for (k = 0; k < text_len; k++)
182 if (data[k] == ')')
183 data[k] = '>';
184 else if (clear_data)
185 data[k] = '-';
186 debug("%s", data);
187
188 /* iterate over rules */
189 for (hash_len = 2; hash_len >= 1; hash_len--) {
190 offset = 0;
191 switch (hash_len) {
192 case 2:
193 if (text_len < 2) break;
194 offset = table->forRules[_lou_stringHash(text, 1, table)];
195 break;
196 case 1:
197 offset = table->characters[_lou_charHash(text[0])];
198 while (offset) {
199 character = (TranslationTableCharacter *)&table->ruleArea[offset];
200 if (character->value == text[0]) {
201 offset = character->otherRules;
202 break;
203 } else
204 offset = character->next;
205 }
206 }
207 while (offset) {
208 rule = (TranslationTableRule *)&table->ruleArea[offset];
209 #if DEBUG
210 widechar print_string[128];
211 printRule(rule, print_string);
212 printf("%*s=> ", debug_indent, "");
213 for (k = 0; print_string[k]; k++) printf("%c", print_string[k]);
214 printf("\n");
215 #endif
216
217 /* select rule */
218 if (rule->charslen == 0 || rule->dotslen == 0) goto next_rule;
219 if (rule->charslen > text_len) goto next_rule;
220 switch (rule->opcode) {
221 case CTO_WholeWord:
222 if (data[-1] == '^' && rule->charslen == text_len) break;
223 goto next_rule;
224 case CTO_SuffixableWord:
225 if (data[-1] == '^') break;
226 goto next_rule;
227 case CTO_PrefixableWord:
228 if (rule->charslen == text_len) break;
229 goto next_rule;
230 case CTO_BegWord:
231 if (data[-1] == '^' && rule->charslen < text_len) break;
232 goto next_rule;
233 case CTO_BegMidWord:
234 if (rule->charslen < text_len) break;
235 goto next_rule;
236 case CTO_MidWord:
237 if (data[-1] != '^' && rule->charslen < text_len) break;
238 goto next_rule;
239 case CTO_MidEndWord:
240 if (data[-1] != '^') break;
241 goto next_rule;
242 case CTO_EndWord:
243 if (data[-1] != '^' && rule->charslen == text_len) break;
244 goto next_rule;
245 case CTO_Letter:
246 case CTO_UpperCase:
247 case CTO_LowerCase:
248 case CTO_Punctuation:
249 case CTO_Always:
250 break;
251 default:
252 goto next_rule;
253 }
254 for (k = 0; k < rule->charslen; k++)
255 if (rule->charsdots[k] != text[k]) goto next_rule;
256 debug("** rule selected **");
257
258 /* inhibit rule */
259 if (rule->dotslen > braille_len ||
260 (rule->charslen == text_len && rule->dotslen < braille_len) ||
261 (rule->dotslen == braille_len && rule->charslen < text_len))
262 goto inhibit;
263 for (k = 0; k < rule->dotslen; k++)
264 if (_lou_getCharForDots(rule->charsdots[rule->charslen + k],
265 displayTable) != braille[k])
266 goto inhibit;
267
268 /* don't let this rule be inhibited by an earlier rule */
269 int inhibit_all = 0;
270 if (rule->nocross)
271 for (k = 0; k < rule->charslen - 1; k++)
272 if (data[k + 1] == '>') {
273 if (data[-1] == 'x')
274 inhibit_all = 1;
275 else
276 goto next_rule;
277 }
278
279 /* fill data */
280 if (rule->nocross)
281 ; // deferred: see success
282 else {
283 k = 0;
284 while (k < rule->charslen - 1) {
285 if (data[k + 1] == '>') {
286 data[k++] = '1';
287 memset(&data[k], '-', text_len - k);
288 } else
289 data[k++] = 'x';
290 }
291 }
292 if (data[rule->charslen] == '>' || data[rule->charslen] == ')') {
293 data[rule->charslen - 1] = '1';
294 memset(&data[rule->charslen], '-', text_len - rule->charslen);
295 } else
296 data[rule->charslen - 1] = 'x';
297 debug("%s", data);
298
299 /* recur */
300 if (find_matching_rules(&text[rule->charslen], text_len - rule->charslen,
301 &braille[rule->dotslen], braille_len - rule->dotslen,
302 &data[rule->charslen], inhibit_all))
303 goto success;
304
305 inhibit:
306 debug("** rule inhibited **");
307 if (rule->nocross) {
308 if (rule->charslen < 2) goto abort;
309 /* inhibited by earlier rule */
310 for (k = 0; k < rule->charslen - 1; k++)
311 if (data[k + 1] == '>' && data[-1] != 'x') goto next_rule;
312 data[rule->charslen - 1] = ')';
313 debug("%s", data);
314 goto next_rule;
315 } else {
316 goto abort;
317 }
318
319 success:
320 /* fill data (deferred) */
321 if (inhibit_all) data[-1] = '1';
322 if (rule->nocross) {
323 memset(data, '0', rule->charslen - 1);
324 debug("%s", data);
325 }
326 free(data_save);
327 return 1;
328
329 next_rule:
330 offset = rule->charsnext;
331 }
332 }
333
334 abort:
335 /* restore data */
336 memcpy(data, data_save, text_len);
337 free(data_save);
338 debug("** abort **");
339 return 0;
340 }
341
342 /*
343 * - begin with all -
344 * - set cursor position right before the word
345 * - put a ^
346 * - match rules
347 * - when a rule has been selected
348 * - if the braille does not match: try inhibiting the rule
349 * - if it's a nocross rule (longer than a single character)
350 * - if there's a > within or right after the rule and there's no x right before
351 * the rule
352 * - already inhibited
353 * - else: put a ) at the position right after the rule
354 * - else: abort this match
355 * - else (the braille does match)
356 * - if it's a nocross rule
357 * - if there's a > within or right after the rule
358 * - if there's a x at the position right before the rule
359 * - put a 1 at that position
360 * - reset all >
361 * - else
362 * - continue with next matched rule
363 * - put a 0 at each position within the rule
364 * - else
365 * - for each position within the rule
366 * - if there's a > at the next position
367 * - put a 1
368 * - reset all >
369 * - else put a x
370 * - move cursor to the position right after the rule
371 * - put a $ if we're at the end of the word
372 * - change all ) to >
373 * - else if there's a > or a ) at the next position
374 * - put a 1
375 * - reset all >
376 * - match rules at the new cursor position
377 * - if match was aborted
378 * - revert changes
379 * - try inhibiting the last rule
380 * - go back to the position before the rule
381 * - continue with next matched rule
382 * - else put a x
383 */
384 extern int
suggestChunks(widechar * text,widechar * braille,char * hyphen_string)385 suggestChunks(widechar *text, widechar *braille, char *hyphen_string) {
386 int text_len, braille_len;
387 for (text_len = 0; text[text_len]; text_len++)
388 ;
389 for (braille_len = 0; braille[braille_len]; braille_len++)
390 ;
391 if (text_len == 0 || braille_len == 0) return 0;
392 hyphen_string[0] = '^';
393 hyphen_string[text_len + 1] = '\0';
394 memset(&hyphen_string[1], '-', text_len);
395 return find_matching_rules(
396 text, text_len, braille, braille_len, &hyphen_string[1], 0);
397 }
398
399 extern void
findRelevantRules(widechar * text,widechar ** rules_str)400 findRelevantRules(widechar *text, widechar **rules_str) {
401 int text_len, rules_len;
402 TranslationTableOffset offset;
403 TranslationTableCharacter *character;
404 TranslationTableRule *rule;
405 TranslationTableRule **rules;
406 int hash_len, k, m, n;
407 for (text_len = 0; text[text_len]; text_len++)
408 ;
409 for (rules_len = 0; rules_str[rules_len]; rules_len++)
410 ;
411 rules = (TranslationTableRule **)malloc(
412 (rules_len + 1) * sizeof(TranslationTableRule *));
413 m = n = 0;
414 while (text[n]) {
415 for (hash_len = 2; hash_len >= 1; hash_len--) {
416 offset = 0;
417 switch (hash_len) {
418 case 2:
419 if (text_len - n < 2) break;
420 offset = table->forRules[_lou_stringHash(&text[n], 1, table)];
421 break;
422 case 1:
423 offset = table->characters[_lou_charHash(text[n])];
424 while (offset) {
425 character = (TranslationTableCharacter *)&table->ruleArea[offset];
426 if (character->value == text[0]) {
427 offset = character->otherRules;
428 break;
429 } else
430 offset = character->next;
431 }
432 }
433 while (offset) {
434 rule = (TranslationTableRule *)&table->ruleArea[offset];
435 switch (rule->opcode) {
436 case CTO_Always:
437 case CTO_WholeWord:
438 case CTO_SuffixableWord:
439 case CTO_PrefixableWord:
440 case CTO_BegWord:
441 case CTO_BegMidWord:
442 case CTO_MidWord:
443 case CTO_MidEndWord:
444 case CTO_EndWord:
445 break;
446 default:
447 goto next_rule;
448 }
449 if (rule->charslen == 0 || rule->dotslen == 0 ||
450 rule->charslen > text_len - n)
451 goto next_rule;
452 for (k = 0; k < rule->charslen; k++)
453 if (rule->charsdots[k] != text[n + k]) goto next_rule;
454 rules[m++] = rule;
455 if (m == rules_len) goto finish;
456 next_rule:
457 offset = rule->charsnext;
458 }
459 }
460 n++;
461 }
462 finish:
463 rules_str[m--] = NULL;
464 for (; m >= 0; m--) printRule(rules[m], rules_str[m]);
465 free(rules);
466 }
467