1 /* liblouis Braille Translation and Back-Translation Library
2 
3    Copyright (C) 2017 Bert Frees
4 
5    This file is part of liblouis.
6 
7    liblouis is free software: you can redistribute it and/or modify it
8    under the terms of the GNU Lesser General Public License as published
9    by the Free Software Foundation, either version 2.1 of the License, or
10    (at your option) any later version.
11 
12    liblouis is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Lesser General Public License for more details.
16 
17    You should have received a copy of the GNU Lesser General Public
18    License along with liblouis. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <stdlib.h>
24 #include "internal.h"
25 
26 static const TranslationTableHeader *table;
27 static const DisplayTableHeader *displayTable;
28 
29 extern void
loadTable(const char * tableList)30 loadTable(const char *tableList) {
31 	_lou_getTable(tableList, tableList, &table, &displayTable);
32 }
33 
34 extern int
hyphenationEnabled()35 hyphenationEnabled() {
36 	return table->hyphenStatesArray;
37 }
38 
39 extern int
isLetter(widechar c)40 isLetter(widechar c) {
41 	static unsigned long int hash;
42 	static TranslationTableOffset offset;
43 	static TranslationTableCharacter *character;
44 	hash = _lou_charHash(c);
45 	offset = table->characters[hash];
46 	while (offset) {
47 		character = (TranslationTableCharacter *)&table->ruleArea[offset];
48 		if (character->value == c) return character->attributes & CTC_Letter;
49 		offset = character->next;
50 	}
51 	return 0;
52 }
53 
54 extern widechar
toLowercase(widechar c)55 toLowercase(widechar c) {
56 	static TranslationTableOffset offset;
57 	static TranslationTableCharacter *character;
58 	offset = table->characters[_lou_charHash(c)];
59 	while (offset) {
60 		character = (TranslationTableCharacter *)&table->ruleArea[offset];
61 		if (character->value == c) {
62 			if (character->mode & CTC_UpperCase) {
63 				const TranslationTableCharacter *c = character;
64 				if (c->basechar)
65 					c = (TranslationTableCharacter *)&table->ruleArea[c->basechar];
66 				while (1) {
67 					if ((c->mode & (character->mode & ~CTC_UpperCase)) ==
68 							(character->mode & ~CTC_UpperCase))
69 						return c->value;
70 					if (!c->linked) break;
71 					c = (TranslationTableCharacter *)&table->ruleArea[c->linked];
72 				}
73 			}
74 			return character->value;
75 		}
76 		offset = character->next;
77 	}
78 	return c;
79 }
80 
81 extern void
toDotPattern(widechar * braille,char * pattern)82 toDotPattern(widechar *braille, char *pattern) {
83 	int length;
84 	widechar *dots;
85 	int i;
86 	for (length = 0; braille[length]; length++)
87 		;
88 	dots = (widechar *)malloc((length + 1) * sizeof(widechar));
89 	for (i = 0; i < length; i++) dots[i] = _lou_getDotsForChar(braille[i], displayTable);
90 	strcpy(pattern, _lou_showDots(dots, length));
91 	free(dots);
92 }
93 
94 extern int
printRule(TranslationTableRule * rule,widechar * rule_string)95 printRule(TranslationTableRule *rule, widechar *rule_string) {
96 	switch (rule->opcode) {
97 	case CTO_Context:
98 	case CTO_Correct:
99 	case CTO_SwapCd:
100 	case CTO_SwapDd:
101 	case CTO_Pass2:
102 	case CTO_Pass3:
103 	case CTO_Pass4:
104 		return 0;
105 	default: {
106 		int l = 0;
107 		if (rule->nocross)
108 			for (char *c = "nocross "; *c; c++) rule_string[l++] = *c;
109 		const char *opcode = _lou_findOpcodeName(rule->opcode);
110 		for (size_t k = 0; k < strlen(opcode); k++) rule_string[l++] = opcode[k];
111 		rule_string[l++] = '\t';
112 		for (int k = 0; k < rule->charslen; k++) rule_string[l++] = rule->charsdots[k];
113 		rule_string[l++] = '\t';
114 		for (int k = 0; k < rule->dotslen; k++) {
115 			rule_string[l] = _lou_getCharForDots(
116 					rule->charsdots[rule->charslen + k], displayTable);
117 			if (rule_string[l] == '\0') {
118 				// if a dot pattern can not be displayed, print an error message
119 				char *message = (char *)malloc(50 * sizeof(char));
120 				sprintf(message, "ERROR: provide a display rule for dots %s",
121 						_lou_showDots(&rule->charsdots[rule->charslen + k], 1));
122 				l = 0;
123 				while (message[l]) {
124 					rule_string[l] = message[l];
125 					l++;
126 				}
127 				rule_string[l++] = '\0';
128 				free(message);
129 				return 1;
130 			}
131 			l++;
132 		}
133 		rule_string[l++] = '\0';
134 		return 1;
135 	}
136 	}
137 }
138 
139 #define DEBUG 0
140 
141 #if DEBUG
142 #define debug(fmt, ...)                                                     \
143 	do {                                                                    \
144 		if (DEBUG) printf("%*s" fmt "\n", debug_indent, "", ##__VA_ARGS__); \
145 	} while (0)
146 #else
147 #define debug(fmt, ...)
148 #endif
149 
150 static int
find_matching_rules(widechar * text,int text_len,widechar * braille,int braille_len,char * data,int clear_data)151 find_matching_rules(widechar *text, int text_len, widechar *braille, int braille_len,
152 		char *data, int clear_data) {
153 	TranslationTableOffset offset;
154 	TranslationTableRule *rule;
155 	TranslationTableCharacter *character;
156 	char *data_save;
157 	int hash_len, k;
158 #if DEBUG
159 	static int initial_text_len = 0;
160 	int debug_indent = 0;
161 	if (data[-1] == '^') {
162 		initial_text_len = text_len;
163 		for (k = 0; k < text_len; k++) printf("%c", text[k]);
164 		printf(" <=> ");
165 		for (k = 0; k < braille_len; k++) printf("%c", braille[k]);
166 		printf("\n");
167 	} else
168 		debug_indent = initial_text_len - text_len;
169 #endif
170 
171 	/* finish */
172 	if (text_len == 0 && braille_len == 0) {
173 		data[-1] = '$';
174 		return 1;
175 	}
176 
177 	/* save data */
178 	data_save = (char *)malloc(text_len * sizeof(char));
179 	memcpy(data_save, data, text_len);
180 
181 	for (k = 0; k < text_len; k++)
182 		if (data[k] == ')')
183 			data[k] = '>';
184 		else if (clear_data)
185 			data[k] = '-';
186 	debug("%s", data);
187 
188 	/* iterate over rules */
189 	for (hash_len = 2; hash_len >= 1; hash_len--) {
190 		offset = 0;
191 		switch (hash_len) {
192 		case 2:
193 			if (text_len < 2) break;
194 			offset = table->forRules[_lou_stringHash(text, 1, table)];
195 			break;
196 		case 1:
197 			offset = table->characters[_lou_charHash(text[0])];
198 			while (offset) {
199 				character = (TranslationTableCharacter *)&table->ruleArea[offset];
200 				if (character->value == text[0]) {
201 					offset = character->otherRules;
202 					break;
203 				} else
204 					offset = character->next;
205 			}
206 		}
207 		while (offset) {
208 			rule = (TranslationTableRule *)&table->ruleArea[offset];
209 #if DEBUG
210 			widechar print_string[128];
211 			printRule(rule, print_string);
212 			printf("%*s=> ", debug_indent, "");
213 			for (k = 0; print_string[k]; k++) printf("%c", print_string[k]);
214 			printf("\n");
215 #endif
216 
217 			/* select rule */
218 			if (rule->charslen == 0 || rule->dotslen == 0) goto next_rule;
219 			if (rule->charslen > text_len) goto next_rule;
220 			switch (rule->opcode) {
221 			case CTO_WholeWord:
222 				if (data[-1] == '^' && rule->charslen == text_len) break;
223 				goto next_rule;
224 			case CTO_SuffixableWord:
225 				if (data[-1] == '^') break;
226 				goto next_rule;
227 			case CTO_PrefixableWord:
228 				if (rule->charslen == text_len) break;
229 				goto next_rule;
230 			case CTO_BegWord:
231 				if (data[-1] == '^' && rule->charslen < text_len) break;
232 				goto next_rule;
233 			case CTO_BegMidWord:
234 				if (rule->charslen < text_len) break;
235 				goto next_rule;
236 			case CTO_MidWord:
237 				if (data[-1] != '^' && rule->charslen < text_len) break;
238 				goto next_rule;
239 			case CTO_MidEndWord:
240 				if (data[-1] != '^') break;
241 				goto next_rule;
242 			case CTO_EndWord:
243 				if (data[-1] != '^' && rule->charslen == text_len) break;
244 				goto next_rule;
245 			case CTO_Letter:
246 			case CTO_UpperCase:
247 			case CTO_LowerCase:
248 			case CTO_Punctuation:
249 			case CTO_Always:
250 				break;
251 			default:
252 				goto next_rule;
253 			}
254 			for (k = 0; k < rule->charslen; k++)
255 				if (rule->charsdots[k] != text[k]) goto next_rule;
256 			debug("** rule selected **");
257 
258 			/* inhibit rule */
259 			if (rule->dotslen > braille_len ||
260 					(rule->charslen == text_len && rule->dotslen < braille_len) ||
261 					(rule->dotslen == braille_len && rule->charslen < text_len))
262 				goto inhibit;
263 			for (k = 0; k < rule->dotslen; k++)
264 				if (_lou_getCharForDots(rule->charsdots[rule->charslen + k],
265 							displayTable) != braille[k])
266 					goto inhibit;
267 
268 			/* don't let this rule be inhibited by an earlier rule */
269 			int inhibit_all = 0;
270 			if (rule->nocross)
271 				for (k = 0; k < rule->charslen - 1; k++)
272 					if (data[k + 1] == '>') {
273 						if (data[-1] == 'x')
274 							inhibit_all = 1;
275 						else
276 							goto next_rule;
277 					}
278 
279 			/* fill data */
280 			if (rule->nocross)
281 				;  // deferred: see success
282 			else {
283 				k = 0;
284 				while (k < rule->charslen - 1) {
285 					if (data[k + 1] == '>') {
286 						data[k++] = '1';
287 						memset(&data[k], '-', text_len - k);
288 					} else
289 						data[k++] = 'x';
290 				}
291 			}
292 			if (data[rule->charslen] == '>' || data[rule->charslen] == ')') {
293 				data[rule->charslen - 1] = '1';
294 				memset(&data[rule->charslen], '-', text_len - rule->charslen);
295 			} else
296 				data[rule->charslen - 1] = 'x';
297 			debug("%s", data);
298 
299 			/* recur */
300 			if (find_matching_rules(&text[rule->charslen], text_len - rule->charslen,
301 						&braille[rule->dotslen], braille_len - rule->dotslen,
302 						&data[rule->charslen], inhibit_all))
303 				goto success;
304 
305 		inhibit:
306 			debug("** rule inhibited **");
307 			if (rule->nocross) {
308 				if (rule->charslen < 2) goto abort;
309 				/* inhibited by earlier rule */
310 				for (k = 0; k < rule->charslen - 1; k++)
311 					if (data[k + 1] == '>' && data[-1] != 'x') goto next_rule;
312 				data[rule->charslen - 1] = ')';
313 				debug("%s", data);
314 				goto next_rule;
315 			} else {
316 				goto abort;
317 			}
318 
319 		success:
320 			/* fill data (deferred) */
321 			if (inhibit_all) data[-1] = '1';
322 			if (rule->nocross) {
323 				memset(data, '0', rule->charslen - 1);
324 				debug("%s", data);
325 			}
326 			free(data_save);
327 			return 1;
328 
329 		next_rule:
330 			offset = rule->charsnext;
331 		}
332 	}
333 
334 abort:
335 	/* restore data */
336 	memcpy(data, data_save, text_len);
337 	free(data_save);
338 	debug("** abort **");
339 	return 0;
340 }
341 
342 /*
343  * - begin with all -
344  * - set cursor position right before the word
345  * - put a ^
346  * - match rules
347  *   - when a rule has been selected
348  *     - if the braille does not match: try inhibiting the rule
349  *       - if it's a nocross rule (longer than a single character)
350  *         - if there's a > within or right after the rule and there's no x right before
351  *           the rule
352  *           - already inhibited
353  *         - else: put a ) at the position right after the rule
354  *       - else: abort this match
355  *     - else (the braille does match)
356  *       - if it's a nocross rule
357  *         - if there's a > within or right after the rule
358  *           - if there's a x at the position right before the rule
359  *             - put a 1 at that position
360  *             - reset all >
361  *           - else
362  *             - continue with next matched rule
363  *         - put a 0 at each position within the rule
364  *       - else
365  *         - for each position within the rule
366  *           - if there's a > at the next position
367  *             - put a 1
368  *             - reset all >
369  *           - else put a x
370  *       - move cursor to the position right after the rule
371  *       - put a $ if we're at the end of the word
372  *       - change all ) to >
373  *       - else if there's a > or a ) at the next position
374  *         - put a 1
375  *         - reset all >
376  *         - match rules at the new cursor position
377  *           - if match was aborted
378  *             - revert changes
379  *             - try inhibiting the last rule
380  *             - go back to the position before the rule
381  *             - continue with next matched rule
382  *       - else put a x
383  */
384 extern int
suggestChunks(widechar * text,widechar * braille,char * hyphen_string)385 suggestChunks(widechar *text, widechar *braille, char *hyphen_string) {
386 	int text_len, braille_len;
387 	for (text_len = 0; text[text_len]; text_len++)
388 		;
389 	for (braille_len = 0; braille[braille_len]; braille_len++)
390 		;
391 	if (text_len == 0 || braille_len == 0) return 0;
392 	hyphen_string[0] = '^';
393 	hyphen_string[text_len + 1] = '\0';
394 	memset(&hyphen_string[1], '-', text_len);
395 	return find_matching_rules(
396 			text, text_len, braille, braille_len, &hyphen_string[1], 0);
397 }
398 
399 extern void
findRelevantRules(widechar * text,widechar ** rules_str)400 findRelevantRules(widechar *text, widechar **rules_str) {
401 	int text_len, rules_len;
402 	TranslationTableOffset offset;
403 	TranslationTableCharacter *character;
404 	TranslationTableRule *rule;
405 	TranslationTableRule **rules;
406 	int hash_len, k, m, n;
407 	for (text_len = 0; text[text_len]; text_len++)
408 		;
409 	for (rules_len = 0; rules_str[rules_len]; rules_len++)
410 		;
411 	rules = (TranslationTableRule **)malloc(
412 			(rules_len + 1) * sizeof(TranslationTableRule *));
413 	m = n = 0;
414 	while (text[n]) {
415 		for (hash_len = 2; hash_len >= 1; hash_len--) {
416 			offset = 0;
417 			switch (hash_len) {
418 			case 2:
419 				if (text_len - n < 2) break;
420 				offset = table->forRules[_lou_stringHash(&text[n], 1, table)];
421 				break;
422 			case 1:
423 				offset = table->characters[_lou_charHash(text[n])];
424 				while (offset) {
425 					character = (TranslationTableCharacter *)&table->ruleArea[offset];
426 					if (character->value == text[0]) {
427 						offset = character->otherRules;
428 						break;
429 					} else
430 						offset = character->next;
431 				}
432 			}
433 			while (offset) {
434 				rule = (TranslationTableRule *)&table->ruleArea[offset];
435 				switch (rule->opcode) {
436 				case CTO_Always:
437 				case CTO_WholeWord:
438 				case CTO_SuffixableWord:
439 				case CTO_PrefixableWord:
440 				case CTO_BegWord:
441 				case CTO_BegMidWord:
442 				case CTO_MidWord:
443 				case CTO_MidEndWord:
444 				case CTO_EndWord:
445 					break;
446 				default:
447 					goto next_rule;
448 				}
449 				if (rule->charslen == 0 || rule->dotslen == 0 ||
450 						rule->charslen > text_len - n)
451 					goto next_rule;
452 				for (k = 0; k < rule->charslen; k++)
453 					if (rule->charsdots[k] != text[n + k]) goto next_rule;
454 				rules[m++] = rule;
455 				if (m == rules_len) goto finish;
456 			next_rule:
457 				offset = rule->charsnext;
458 			}
459 		}
460 		n++;
461 	}
462 finish:
463 	rules_str[m--] = NULL;
464 	for (; m >= 0; m--) printRule(rules[m], rules_str[m]);
465 	free(rules);
466 }
467