1 /* liblouis Braille Translation and Back-Translation Library
2 
3    Copyright (C) 2008 Eitan Isaacson <eitan@ascender.com>
4    Copyright (C) 2012 James Teh <jamie@nvaccess.org>
5    Copyright (C) 2012 Bert Frees <bertfrees@gmail.com>
6    Copyright (C) 2014 Mesar Hameed <mesar.hameed@gmail.com>
7    Copyright (C) 2015 Mike Gray <mgray@aph.org>
8    Copyright (C) 2010-2017 Swiss Library for the Blind, Visually Impaired and Print
9    Disabled
10    Copyright (C) 2016-2017 Davy Kager <mail@davykager.nl>
11 
12    Copying and distribution of this file, with or without modification,
13    are permitted in any medium without royalty provided the copyright
14    notice and this notice are preserved. This file is offered as-is,
15    without any warranty.
16 */
17 
18 /**
19  * @file
20  * @brief Test helper functions
21  */
22 
23 #include <config.h>
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include "liblouis.h"
29 #include "internal.h"
30 #include "brl_checks.h"
31 #include "unistr.h"
32 
33 static void
print_int_array(const char * prefix,int * pos_list,int len)34 print_int_array(const char *prefix, int *pos_list, int len) {
35 	int i;
36 	fprintf(stderr, "%s ", prefix);
37 	for (i = 0; i < len; i++) fprintf(stderr, "%d ", pos_list[i]);
38 	fprintf(stderr, "\n");
39 }
40 
41 static void
print_typeform(const formtype * typeform,int len)42 print_typeform(const formtype *typeform, int len) {
43 	int i;
44 	fprintf(stderr, "Typeform:  ");
45 	for (i = 0; i < len; i++) fprintf(stderr, "%hi", typeform[i]);
46 	fprintf(stderr, "\n");
47 }
48 
49 static void
print_widechars(widechar * buffer,int length)50 print_widechars(widechar *buffer, int length) {
51 	uint8_t *result_buf;
52 	size_t result_len;
53 
54 #ifdef WIDECHARS_ARE_UCS4
55 	result_buf = u32_to_u8(buffer, length, NULL, &result_len);
56 #else
57 	result_buf = u16_to_u8(buffer, length, NULL, &result_len);
58 #endif
59 	fprintf(stderr, "%.*s", (int)result_len, result_buf);
60 	free(result_buf);
61 }
62 
63 /* direction, 0=forward, 1=backwards, 2=both directions. If diagnostics is 1 then
64  * print diagnostics in case where the translation is not as
65  * expected */
66 
67 int
check_base(const char * tableList,const char * input,const char * expected,optional_test_params in)68 check_base(const char *tableList, const char *input, const char *expected,
69 		optional_test_params in) {
70 
71 	int i, retval = 0;
72 	int direction = in.direction;
73 	const int *expected_inputPos = in.expected_inputPos;
74 	const int *expected_outputPos = in.expected_outputPos;
75 	if (in.direction < 0 || in.direction > 2) {
76 		fprintf(stderr, "Invalid direction.\n");
77 		return 1;
78 	}
79 	if (in.direction != 0 && in.typeform != NULL) {
80 		// Currently, in backward translation, nothing is done with the initial value of
81 		// the typeform argument, and on return it always contains all zeros, so it
82 		// doesn't make any sense to use typeforms in backward translation tests.
83 		fprintf(stderr, "typeforms only supported with testmode 'forward'\n");
84 		return 1;
85 	}
86 	if (in.direction == 2 && in.cursorPos >= 0) {
87 		fprintf(stderr, "cursorPos not supported with testmode 'bothDirections'\n");
88 		return 1;
89 	}
90 	if (in.direction == 2 && in.max_outlen >= 0) {
91 		fprintf(stderr, "maxOutputLength not supported with testmode 'bothDirections'\n");
92 		return 1;
93 	}
94 	if (in.real_inlen >= 0 && in.max_outlen < 0) {
95 		fprintf(stderr,
96 				"realInputLength not supported when maxOutputLength is not specified\n");
97 		return 1;
98 	}
99 	while (1) {
100 		widechar *inbuf, *outbuf, *expectedbuf;
101 		int inlen = strlen(input);
102 		int actualInlen;
103 		const int outlen_multiplier = 4 + sizeof(widechar) * 2;
104 		int outlen = inlen * outlen_multiplier;
105 		int expectedlen = strlen(expected);
106 		int funcStatus = 0;
107 		formtype *typeformbuf = NULL;
108 		int *inputPos = NULL;
109 		int *outputPos = NULL;
110 		int cursorPos = 0;
111 		inbuf = malloc(sizeof(widechar) * inlen);
112 		outbuf = malloc(sizeof(widechar) * outlen);
113 		expectedbuf = malloc(sizeof(widechar) * expectedlen);
114 		if (in.typeform != NULL) {
115 			typeformbuf = malloc(outlen * sizeof(formtype));
116 			memcpy(typeformbuf, in.typeform, inlen * sizeof(formtype));
117 		}
118 		if (in.cursorPos >= 0) {
119 			cursorPos = in.cursorPos;
120 		}
121 		if (in.max_outlen >= 0) {
122 			outlen = in.max_outlen;
123 		}
124 		inlen = _lou_extParseChars(input, inbuf);
125 		if (!inlen) {
126 			fprintf(stderr, "Cannot parse input string.\n");
127 			retval = 1;
128 			goto fail;
129 		}
130 		if (in.real_inlen > inlen) {
131 			fprintf(stderr,
132 					"expected realInputLength (%d) may not exceed total input length "
133 					"(%d)\n",
134 					in.real_inlen, inlen);
135 			return 1;
136 		}
137 		if (expected_inputPos) {
138 			inputPos = malloc(sizeof(int) * outlen);
139 		}
140 		if (expected_outputPos) {
141 			outputPos = malloc(sizeof(int) * inlen);
142 		}
143 		actualInlen = inlen;
144 		// Note that this loop is not strictly needed to make the current tests pass, but
145 		// in the general case it is needed because it is theoretically possible that we
146 		// provided a too short output buffer.
147 		for (int k = 1; k <= 3; k++) {
148 			if (direction == 1) {
149 				funcStatus = _lou_backTranslate(tableList, in.display_table, inbuf,
150 						&actualInlen, outbuf, &outlen, typeformbuf, NULL, outputPos,
151 						inputPos, &cursorPos, in.mode, NULL, NULL);
152 			} else {
153 				funcStatus = _lou_translate(tableList, in.display_table, inbuf,
154 						&actualInlen, outbuf, &outlen, typeformbuf, NULL, outputPos,
155 						inputPos, &cursorPos, in.mode, NULL, NULL);
156 			}
157 			if (!funcStatus) {
158 				fprintf(stderr, "Translation failed.\n");
159 				retval = 1;
160 				goto fail;
161 			}
162 			if (in.max_outlen >= 0 || inlen == actualInlen) {
163 				break;
164 			} else if (k < 3) {
165 				// Hm, something is not quite right. Try again with a larger outbuf
166 				free(outbuf);
167 				outlen = inlen * outlen_multiplier * (k + 1);
168 				outbuf = malloc(sizeof(widechar) * outlen);
169 				if (expected_inputPos) {
170 					free(inputPos);
171 					inputPos = malloc(sizeof(int) * outlen);
172 				}
173 				fprintf(stderr,
174 						"Warning: For %s: returned inlen (%d) differs from passed inlen "
175 						"(%d) "
176 						"using outbuf of size %d. Trying again with bigger outbuf "
177 						"(%d).\n",
178 						input, actualInlen, inlen, inlen * outlen_multiplier * k, outlen);
179 				actualInlen = inlen;
180 			}
181 		}
182 		expectedlen = _lou_extParseChars(expected, expectedbuf);
183 		for (i = 0; i < outlen && i < expectedlen && expectedbuf[i] == outbuf[i]; i++)
184 			;
185 		if (i < outlen || i < expectedlen) {
186 			retval = 1;
187 			if (in.diagnostics) {
188 				outbuf[outlen] = 0;
189 				fprintf(stderr, "Input:    '%s'\n", input);
190 				/* Print the original typeform not the typeformbuf, as the
191 				 * latter has been modified by the translation and contains some
192 				 * information about outbuf */
193 				if (in.typeform != NULL) print_typeform(in.typeform, inlen);
194 				if (in.cursorPos >= 0) fprintf(stderr, "Cursor:   %d\n", in.cursorPos);
195 				fprintf(stderr, "Expected: '%s' (length %d)\n", expected, expectedlen);
196 				fprintf(stderr, "Received: '");
197 				print_widechars(outbuf, outlen);
198 				fprintf(stderr, "' (length %d)\n", outlen);
199 
200 				uint8_t *expected_utf8;
201 				uint8_t *out_utf8;
202 				size_t expected_utf8_len;
203 				size_t out_utf8_len;
204 #ifdef WIDECHARS_ARE_UCS4
205 				expected_utf8 = u32_to_u8(&expectedbuf[i], 1, NULL, &expected_utf8_len);
206 				out_utf8 = u32_to_u8(&outbuf[i], 1, NULL, &out_utf8_len);
207 #else
208 				expected_utf8 = u16_to_u8(&expectedbuf[i], 1, NULL, &expected_utf8_len);
209 				out_utf8 = u16_to_u8(&outbuf[i], 1, NULL, &out_utf8_len);
210 #endif
211 
212 				if (i < outlen && i < expectedlen) {
213 					fprintf(stderr,
214 							"Diff:     Expected '%.*s' but received '%.*s' in index %d\n",
215 							(int)expected_utf8_len, expected_utf8, (int)out_utf8_len,
216 							out_utf8, i);
217 				} else if (i < expectedlen) {
218 					fprintf(stderr,
219 							"Diff:     Expected '%.*s' but received nothing in index "
220 							"%d\n",
221 							(int)expected_utf8_len, expected_utf8, i);
222 				} else {
223 					fprintf(stderr,
224 							"Diff:     Expected nothing but received '%.*s' in index "
225 							"%d\n",
226 							(int)out_utf8_len, out_utf8, i);
227 				}
228 				free(expected_utf8);
229 				free(out_utf8);
230 			}
231 		}
232 		if (expected_inputPos) {
233 			int error_printed = 0;
234 			for (i = 0; i < outlen; i++) {
235 				if (expected_inputPos[i] != inputPos[i]) {
236 					retval = 1;
237 					if (in.diagnostics) {
238 						if (!error_printed) {  // Print only once
239 							fprintf(stderr, "Input position failure:\n");
240 							error_printed = 1;
241 						}
242 						fprintf(stderr, "Expected %d, received %d in index %d\n",
243 								expected_inputPos[i], inputPos[i], i);
244 					}
245 				}
246 			}
247 		}
248 		if (expected_outputPos) {
249 			int error_printed = 0;
250 			for (i = 0; i < inlen; i++) {
251 				if (expected_outputPos[i] != outputPos[i]) {
252 					retval = 1;
253 					if (in.diagnostics) {
254 						if (!error_printed) {  // Print only once
255 							fprintf(stderr, "Output position failure:\n");
256 							error_printed = 1;
257 						}
258 						fprintf(stderr, "Expected %d, received %d in index %d\n",
259 								expected_outputPos[i], outputPos[i], i);
260 					}
261 				}
262 			}
263 		}
264 		if ((in.expected_cursorPos >= 0) && (cursorPos != in.expected_cursorPos)) {
265 			retval = 1;
266 			if (in.diagnostics) {
267 				fprintf(stderr, "Cursor position failure:\n");
268 				fprintf(stderr, "Initial:%d Expected:%d Actual:%d \n", in.cursorPos,
269 						in.expected_cursorPos, cursorPos);
270 			}
271 		}
272 		if (in.max_outlen < 0 && inlen != actualInlen) {
273 			retval = 1;
274 			if (in.diagnostics) {
275 				fprintf(stderr,
276 						"Unexpected error happened: input length is not the same before "
277 						"as "
278 						"after the translation:\n");
279 				fprintf(stderr, "Before: %d After: %d \n", inlen, actualInlen);
280 			}
281 		} else if (actualInlen > inlen) {
282 			retval = 1;
283 			if (in.diagnostics) {
284 				fprintf(stderr,
285 						"Unexpected error happened: returned input length (%d) exceeds "
286 						"total input length (%d)\n",
287 						actualInlen, inlen);
288 			}
289 		} else if (in.real_inlen >= 0 && in.real_inlen != actualInlen) {
290 			retval = 1;
291 			if (in.diagnostics) {
292 				fprintf(stderr, "Real input length failure:\n");
293 				fprintf(stderr, "Expected: %d, received: %d\n", in.real_inlen,
294 						actualInlen);
295 			}
296 		}
297 
298 	fail:
299 		free(inbuf);
300 		free(outbuf);
301 		free(expectedbuf);
302 		free(typeformbuf);
303 		free(inputPos);
304 		free(outputPos);
305 
306 		if (direction == 2) {
307 			const char *tmp = input;
308 			input = expected;
309 			expected = tmp;
310 			expected_inputPos = in.expected_outputPos;
311 			expected_outputPos = in.expected_inputPos;
312 			direction = 1;
313 			continue;
314 		} else {
315 			break;
316 		}
317 	}
318 
319 	return retval;
320 }
321 
322 /* Helper function to convert a typeform string of '0's, '1's, '2's etc.
323  * to the required format, which is an array of 0s, 1s, 2s, etc.
324  * For example, "0000011111000" is converted to {0,0,0,0,0,1,1,1,1,1,0,0,0}
325  * The caller is responsible for freeing the returned array. */
326 formtype *
convert_typeform(const char * typeform_string)327 convert_typeform(const char *typeform_string) {
328 	int len = strlen(typeform_string);
329 	formtype *typeform = malloc(len * sizeof(formtype));
330 	int i;
331 	for (i = 0; i < len; i++) typeform[i] = typeform_string[i] - '0';
332 	return typeform;
333 }
334 
335 void
update_typeform(const char * typeform_string,formtype * typeform,const typeforms kind)336 update_typeform(const char *typeform_string, formtype *typeform, const typeforms kind) {
337 	int len = strlen(typeform_string);
338 	int i;
339 	for (i = 0; i < len; i++)
340 		if (typeform_string[i] != ' ') typeform[i] |= kind;
341 }
342 
343 int
check_cursor_pos(const char * tableList,const char * str,const int * expected_pos)344 check_cursor_pos(const char *tableList, const char *str, const int *expected_pos) {
345 	widechar *inbuf;
346 	widechar *outbuf;
347 	int *inpos, *outpos;
348 	int inlen = strlen(str);
349 	int outlen = inlen;
350 	int cursor_pos;
351 	int i, retval = 0;
352 
353 	inbuf = malloc(sizeof(widechar) * inlen);
354 	outbuf = malloc(sizeof(widechar) * inlen);
355 	inpos = malloc(sizeof(int) * inlen);
356 	outpos = malloc(sizeof(int) * inlen);
357 	inlen = _lou_extParseChars(str, inbuf);
358 
359 	for (i = 0; i < inlen; i++) {
360 		cursor_pos = i;
361 		if (!lou_translate(tableList, inbuf, &inlen, outbuf, &outlen, NULL, NULL, NULL,
362 					NULL, &cursor_pos, compbrlAtCursor)) {
363 			fprintf(stderr, "Translation failed.\n");
364 			retval = 1;
365 			goto fail;
366 		}
367 		if (expected_pos[i] != cursor_pos) {
368 			if (!retval)  // Print only once
369 				fprintf(stderr, "Cursorpos failure:\n");
370 			fprintf(stderr,
371 					"string='%s' cursor=%d ('%c') expected=%d received=%d ('%c')\n", str,
372 					i, str[i], expected_pos[i], cursor_pos, (char)outbuf[cursor_pos]);
373 			retval = 1;
374 		}
375 	}
376 
377 fail:
378 	free(inbuf);
379 	free(outbuf);
380 	free(inpos);
381 	free(outpos);
382 	return retval;
383 }
384 
385 /** Check if a display table maps characters to the right dots.
386  *
387  * The dots are read as Unicode braille. Multiple input characters are
388  * allowed to map to the same dot pattern. Virtual dots in the actual
389  * output are discarded.
390  *
391  * @return 0 if the result is as expected and 1 otherwise.
392  */
393 int
check_display(const char * displayTableList,const char * input,const char * expected)394 check_display(const char *displayTableList, const char *input, const char *expected) {
395 	widechar *inbuf = NULL;
396 	widechar *outbuf = NULL;
397 	widechar *expectedbuf = NULL;
398 	int retval = 0;
399 	int inlen = strlen(input);
400 	inbuf = malloc(sizeof(widechar) * inlen);
401 	inlen = _lou_extParseChars(input, inbuf);
402 	if (!inlen) {
403 		fprintf(stderr, "Cannot parse input string.\n");
404 		retval = 1;
405 		goto fail;
406 	}
407 	int expectedlen = strlen(expected);
408 	expectedbuf = malloc(sizeof(widechar) * expectedlen);
409 	expectedlen = _lou_extParseChars(expected, expectedbuf);
410 	if (!expectedlen) {
411 		fprintf(stderr, "Cannot parse output string.\n");
412 		retval = 1;
413 		goto fail;
414 	}
415 	if (inlen != expectedlen) {
416 		fprintf(stderr, "Input and output string must be the same length.\n");
417 		retval = 1;
418 		goto fail;
419 	}
420 	for (int i = 0; i < expectedlen; i++) {
421 		if ((expectedbuf[i] & 0xff00) != LOU_ROW_BRAILLE) {
422 			fprintf(stderr, "Output string must be Unicode braille.\n");
423 			retval = 1;
424 			goto fail;
425 		}
426 	}
427 	outbuf = malloc(sizeof(widechar) * inlen);
428 	if (!lou_charToDots(displayTableList, inbuf, outbuf, inlen, ucBrl)) {
429 		// This should only happen if the table can not be compiled.
430 		// If the table does not have a display rule for a character
431 		// in the input, it will result in a blank dot pattern in the
432 		// output.
433 		fprintf(stderr, "Mapping to dots failed.\n");
434 		retval = 1;
435 		goto fail;
436 	}
437 	for (int i = 0; i < inlen; i++) {
438 		if (outbuf[i] != expectedbuf[i]) {
439 			retval = 1;
440 			fprintf(stderr, "Input:    '%s'\n", input);
441 			fprintf(stderr, "Expected: '%s'\n", expected);
442 			fprintf(stderr, "Received: '");
443 			print_widechars(outbuf, inlen);
444 			fprintf(stderr, "'\n");
445 			uint8_t *expected_utf8;
446 			uint8_t *out_utf8;
447 			size_t expected_utf8_len;
448 			size_t out_utf8_len;
449 #ifdef WIDECHARS_ARE_UCS4
450 			expected_utf8 = u32_to_u8(&expectedbuf[i], 1, NULL, &expected_utf8_len);
451 			out_utf8 = u32_to_u8(&outbuf[i], 1, NULL, &out_utf8_len);
452 #else
453 			expected_utf8 = u16_to_u8(&expectedbuf[i], 1, NULL, &expected_utf8_len);
454 			out_utf8 = u16_to_u8(&outbuf[i], 1, NULL, &out_utf8_len);
455 #endif
456 			expectedbuf[i] = (expectedbuf[i] & 0x00ff) | LOU_DOTS;
457 			outbuf[i] = (outbuf[i] & 0x00ff) | LOU_DOTS;
458 			fprintf(stderr, "Diff:     Expected '%.*s' (dots %s)", (int)expected_utf8_len,
459 					expected_utf8, _lou_showDots(&expectedbuf[i], 1));
460 			fprintf(stderr, " but received '%.*s' (dots %s) in index %d\n",
461 					(int)out_utf8_len, out_utf8, _lou_showDots(&outbuf[i], 1), i);
462 			free(expected_utf8);
463 			free(out_utf8);
464 			break;
465 		}
466 	}
467 fail:
468 	free(inbuf);
469 	free(outbuf);
470 	free(expectedbuf);
471 	return retval;
472 }
473 
474 /* Check if a string is hyphenated as expected, by passing the
475  * expected hyphenation position array.
476  *
477  * @return 0 if the hyphenation is as expected and 1 otherwise.
478  */
479 int
check_hyphenation_pos(const char * tableList,const char * str,const char * expected)480 check_hyphenation_pos(const char *tableList, const char *str, const char *expected) {
481 	widechar *inbuf;
482 	char *hyphens = NULL;
483 	int inlen = strlen(str);
484 	int retval = 0;
485 
486 	inbuf = malloc(sizeof(widechar) * inlen);
487 	inlen = _lou_extParseChars(str, inbuf);
488 	if (!inlen) {
489 		fprintf(stderr, "Cannot parse input string.\n");
490 		retval = 1;
491 		goto fail;
492 	}
493 	hyphens = calloc(inlen + 1, sizeof(char));
494 
495 	if (!lou_hyphenate(tableList, inbuf, inlen, hyphens, 0)) {
496 		fprintf(stderr, "Hyphenation failed.\n");
497 		retval = 1;
498 		goto fail;
499 	}
500 
501 	if (strcmp(expected, hyphens)) {
502 		fprintf(stderr, "Input:    '%s'\n", str);
503 		fprintf(stderr, "Expected: '%s'\n", expected);
504 		fprintf(stderr, "Received: '%s'\n", hyphens);
505 		retval = 1;
506 	}
507 
508 fail:
509 	free(inbuf);
510 	free(hyphens);
511 	return retval;
512 }
513 
514 /** Check if a string is hyphenated as expected.
515  *
516  * mode is '0' when input is text and '1' when input is braille
517  *
518  * @return 0 if the hyphenation is as expected and 1 otherwise.
519  */
520 int
check_hyphenation(const char * tableList,const char * str,const char * expected,int mode)521 check_hyphenation(
522 		const char *tableList, const char *str, const char *expected, int mode) {
523 	widechar *inbuf;
524 	widechar *hyphenatedbuf = NULL;
525 	uint8_t *hyphenated = NULL;
526 	char *hyphens = NULL;
527 	int inlen = strlen(str);
528 	size_t hyphenatedlen = inlen * 2;
529 	int retval = 0;
530 
531 	inbuf = malloc(sizeof(widechar) * inlen);
532 	inlen = _lou_extParseChars(str, inbuf);
533 	if (!inlen) {
534 		fprintf(stderr, "Cannot parse input string.\n");
535 		retval = 1;
536 		goto fail;
537 	}
538 	hyphens = calloc(inlen + 1, sizeof(char));
539 
540 	if (!lou_hyphenate(tableList, inbuf, inlen, hyphens, mode)) {
541 		fprintf(stderr, "Hyphenation failed.\n");
542 		retval = 1;
543 		goto fail;
544 	}
545 	if (hyphens[0] != '0') {
546 		fprintf(stderr, "Unexpected output from lou_hyphenate.\n");
547 		retval = 1;
548 		goto fail;
549 	}
550 
551 	hyphenatedbuf = malloc(sizeof(widechar) * hyphenatedlen);
552 	int i = 0;
553 	int j = 0;
554 	hyphenatedbuf[i++] = inbuf[j++];
555 	for (; j < inlen; j++) {
556 		if (hyphens[j] == '2')
557 			hyphenatedbuf[i++] = (widechar)'|';
558 		else if (hyphens[j] != '0')
559 			hyphenatedbuf[i++] = (widechar)'-';
560 		hyphenatedbuf[i++] = inbuf[j];
561 	}
562 
563 #ifdef WIDECHARS_ARE_UCS4
564 	hyphenated = u32_to_u8(hyphenatedbuf, i, NULL, &hyphenatedlen);
565 #else
566 	hyphenated = u16_to_u8(hyphenatedbuf, i, NULL, &hyphenatedlen);
567 #endif
568 
569 	if (!hyphenated) {
570 		fprintf(stderr, "Unexpected error during UTF-8 encoding\n");
571 		free(hyphenatedbuf);
572 		retval = 2;
573 		goto fail;
574 	}
575 
576 	if (strlen(expected) != hyphenatedlen ||
577 			strncmp(expected, (const char *)hyphenated, hyphenatedlen)) {
578 		fprintf(stderr, "Input:    '%s'\n", str);
579 		fprintf(stderr, "Expected: '%s'\n", expected);
580 		fprintf(stderr, "Received: '%.*s'\n", (int)hyphenatedlen, hyphenated);
581 		retval = 1;
582 	}
583 
584 	free(hyphenatedbuf);
585 	free(hyphenated);
586 
587 fail:
588 	free(inbuf);
589 	free(hyphens);
590 	return retval;
591 }
592