1 #include "unicode_config.h"
2 #include "courier-unicode.h"
3 #include <iostream>
4 #include <iterator>
5 #include <sstream>
6 #include <fstream>
7 #include <cstdint>
8 #include <iomanip>
9 #include <algorithm>
10 #include <unistd.h>
11
12 FILE *DEBUGDUMP;
13
14 #define BIDI_DEBUG
15
16 extern "C" {
17 #if 0
18 }
19 #endif
20
21 #include "unicode_bidi.c"
22
23 }
24
latin_test()25 void latin_test()
26 {
27 for (char32_t c=32; c<256; c++)
28 {
29 std::u32string s;
30
31 s += c;
32
33 std::vector<unicode_bidi_level_t> levels={UNICODE_BIDI_LR};
34
35 auto new_string=unicode::bidi_embed(s, levels,
36 UNICODE_BIDI_LR);
37
38 if (new_string != s)
39 {
40 std::cerr << "Character " << (int)c
41 << " does not work." << std::endl;
42 exit(1);
43 }
44 }
45
46 std::u32string s;
47 std::vector<unicode_bidi_level_t> levels;
48
49 for (char32_t c=32; c<256; c++)
50 {
51 s += c;
52 levels.push_back(UNICODE_BIDI_LR);
53 }
54
55 auto new_string=unicode::bidi_embed(s, levels,
56 UNICODE_BIDI_LR);
57
58 if (new_string != s)
59 {
60 std::cerr << "iso-8859-1 string does not work."
61 << std::endl;
62 exit(1);
63 }
64 }
65
character_test()66 void character_test()
67 {
68 std::ifstream fp("BidiCharacterTest.txt");
69
70 if (!fp.is_open())
71 {
72 std::cerr << "Cannot open BidiCharacterTest.txt" << std::endl;
73 exit(1);
74 }
75
76 std::string buf;
77
78 size_t linenum=0;
79 size_t nextlogline=0;
80 std::string logmsg;
81
82 while (1)
83 {
84 buf.clear();
85
86 bool iseof=std::getline(fp, buf).eof() && buf.empty();
87
88 if (iseof || ++linenum >= nextlogline)
89 {
90 alarm(300);
91 std::cout << logmsg;
92
93 std::ostringstream o;
94
95 o << std::setw(6) << linenum << " lines processed... ";
96
97 logmsg=o.str();
98
99 std::cout << logmsg << std::flush;
100
101 std::fill(logmsg.begin(), logmsg.end(), '\b');
102
103 nextlogline += 20000;
104 }
105
106 if (iseof)
107 break;
108 auto p=buf.find('#');
109
110 if (p != buf.npos)
111 buf=buf.substr(0, p);
112
113 p=buf.find(';');
114
115 if (p == buf.npos)
116 continue;
117
118 std::istringstream chars{buf.substr(0, p)};
119
120 auto q=buf.find(';', ++p);
121
122 if (q == buf.npos)
123 {
124 std::cerr << "Cannot parse line " << linenum
125 << std::endl;
126 exit(2);
127 }
128
129 int direction;
130
131 if (!(std::istringstream{buf.substr(p, q-p)} >> direction))
132 {
133 std::cerr << "Cannot parse line " << linenum
134 << std::endl;
135 exit(3);
136 }
137
138 p=++q;
139 q=buf.find(';', p);
140
141 if (q == buf.npos)
142 {
143 std::cerr << "Cannot parse line " << linenum
144 << std::endl;
145 exit(4);
146 }
147
148 int paragraph_embedding_level;
149
150 if (!(std::istringstream{buf.substr(p, q-p)} >>
151 paragraph_embedding_level))
152 {
153 std::cerr << "Cannot parse line " << linenum
154 << std::endl;
155 exit(5);
156 }
157 p=++q;
158 q=buf.find(';', p);
159
160 if (q == buf.npos)
161 {
162 std::cerr << "Cannot parse line " << linenum
163 << std::endl;
164 exit(6);
165 }
166
167 std::vector<unicode_bidi_level_t> levels;
168
169 {
170 std::istringstream level_s{buf.substr(p, q-p)};
171
172 std::string s;
173
174 while (level_s >> s)
175 {
176 size_t l;
177
178 if (!(std::istringstream{s} >> l))
179 {
180 l=UNICODE_BIDI_SKIP;
181 }
182 levels.push_back(l);
183 }
184 }
185
186 std::vector<size_t> render_order;
187
188 {
189 size_t n;
190
191 std::istringstream order_i{buf.substr(++q)};
192
193 while (order_i >> n)
194 render_order.push_back(n);
195 }
196 std::u32string s;
197 uintmax_t c;
198
199 while (chars >> std::hex >> c)
200 s.push_back(c);
201
202 auto ret=direction == UNICODE_BIDI_LR ||
203 direction == UNICODE_BIDI_RL
204 ? unicode::bidi_calc(s, direction)
205 : unicode::bidi_calc(s);
206
207 if (std::get<1>(ret).direction != paragraph_embedding_level)
208 {
209 std::cerr << "Regression, line "
210 << linenum
211 << ": expected "
212 << paragraph_embedding_level
213 << " paragraph embedding level, got "
214 << (int)std::get<1>(ret).direction
215 << std::endl;
216 exit(1);
217 }
218
219 if (std::get<0>(ret) != levels)
220 {
221 fclose(DEBUGDUMP);
222 DEBUGDUMP=stderr;
223
224 (void)(direction == UNICODE_BIDI_LR ||
225 direction == UNICODE_BIDI_RL
226 ? unicode::bidi_calc(s, direction)
227 : unicode::bidi_calc(s));
228
229 std::cerr << "Regression, line "
230 << linenum
231 << ": embedding levels"
232 << std::endl
233 << " Expected:";
234
235 for (int l:levels)
236 {
237 std::cerr << " ";
238 if (l == UNICODE_BIDI_SKIP)
239 std::cerr << "x";
240 else
241 std::cerr << l;
242 }
243
244 std::cerr << std::endl
245 << " Actual:";
246
247 for (int l:std::get<0>(ret))
248 {
249 std::cerr << " ";
250 if (l == UNICODE_BIDI_SKIP)
251 std::cerr << "x";
252 else
253 std::cerr << l;
254 }
255 std::cerr << std::endl;
256 exit(1);
257 }
258
259 std::vector<size_t> actual_render_order;
260
261 size_t n=0;
262
263 std::generate_n(std::back_inserter(actual_render_order),
264 s.size(),
265 [&] { return n++; });
266
267 unicode::bidi_reorder
268 (s, levels,
269 [&]
270 (size_t index,
271 size_t n)
272 {
273 auto b=actual_render_order.begin();
274 std::reverse(b+index, b+index+n);
275 });
276
277 size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(),
278 s.size(), 0);
279
280 n=0;
281 unicode::bidi_cleanup
282 (s, levels,
283 [&]
284 (size_t i)
285 {
286 actual_render_order.erase
287 (actual_render_order.begin()+i-n);
288 ++n;
289 });
290
291 if (cleaned_size != s.size())
292 {
293 std::cerr << "Regression, line "
294 << linenum
295 << ": default cleaned size"
296 << std::endl
297 << " Expected size: " << cleaned_size
298 << ", actual size: " << s.size()
299 << std::endl;
300 exit(1);
301 }
302 if (render_order != actual_render_order)
303 {
304 std::cerr << "Regression, line "
305 << linenum
306 << ": render order"
307 << std::endl
308 << " Expected:";
309 for (auto n:render_order)
310 {
311 std::cerr << " " << n;
312 }
313 std::cerr << std::endl
314 << " Actual:";
315
316 for (auto n:actual_render_order)
317 {
318 std::cerr << " " << n;
319 }
320 std::cerr << std::endl;
321 exit(1);
322 }
323
324 unicode::bidi_cleanup(s, levels,
325 [](size_t) {},
326 UNICODE_BIDI_CLEANUP_CANONICAL);
327
328 auto dump_ls=
329 [&]
330 (const std::u32string &s,
331 const std::vector<unicode_bidi_level_t> &l)
332 {
333 for (size_t i=0; i<s.size(); ++i)
334 {
335 std::cerr << " " << std::hex
336 << std::setw(4)
337 << std::setfill('0')
338 << s[i] << "/"
339 << std::dec
340 << (int)l[i];
341 }
342 };
343
344 for (int pass=0; pass<4; pass++)
345 {
346 int paragraph=pass & 1;
347 int use_default=pass & 2;
348
349 for (size_t i=0; i<s.size(); ++i)
350 {
351 /* L1 */
352 switch (unicode_bidi_type(s[i])) {
353 case UNICODE_BIDI_TYPE_S:
354 case UNICODE_BIDI_TYPE_B:
355 levels.at(i)=paragraph;
356 default:
357 break;
358 }
359 }
360
361 auto logical_string=s;
362 auto logical_levels=levels;
363
364 unicode::bidi_logical_order(logical_string,
365 logical_levels,
366 paragraph,
367 []
368 (size_t, size_t) {},
369 0,
370 logical_string.size());
371
372 std::u32string new_string;
373
374 unicode::bidi_embed
375 (logical_string,
376 logical_levels,
377 paragraph,
378 [&]
379 (const char32_t *string,
380 size_t n,
381 bool is_part_of_string)
382 {
383 if ((std::less_equal<const char32_t *>
384 {}(logical_string.c_str(),
385 string) &&
386 std::less<const
387 char32_t *>
388 {}(string,
389 logical_string.c_str()
390 +logical_string.size()))
391 != is_part_of_string)
392 {
393 std::cerr <<
394 "bidi_embed passed in "
395 "wrong value for "
396 "is_part_of_string"
397 << std::endl;
398 exit(1);
399 }
400
401 new_string.insert
402 (new_string.end(),
403 string,
404 string+n);
405 });
406
407 auto save_string=new_string;
408
409 if (use_default)
410 {
411 auto marker=unicode::bidi_embed_paragraph_level
412 (new_string, paragraph);
413
414 if (marker)
415 new_string.insert(0, 1, marker);
416
417 ret=unicode::bidi_calc(new_string);
418 }
419 else
420 {
421 ret=unicode::bidi_calc(new_string, paragraph);
422 }
423
424 unicode::bidi_reorder(new_string, std::get<0>(ret));
425
426 cleaned_size=unicode_bidi_cleaned_size
427 (new_string.c_str(),
428 new_string.size(),
429 UNICODE_BIDI_CLEANUP_CANONICAL);
430
431 unicode::bidi_cleanup(new_string,
432 std::get<0>(ret),
433 []
434 (size_t)
435 {
436 },
437 UNICODE_BIDI_CLEANUP_CANONICAL);
438
439 if (cleaned_size != new_string.size())
440 {
441 std::cerr << "Regression, line "
442 << linenum
443 << ": canonoical cleaned size"
444 << std::endl
445 << " Expected size: "
446 << cleaned_size
447 << ", actual size: "
448 << new_string.size()
449 << std::endl;
450 exit(1);
451 }
452
453 /* New string is now back in logical order */
454
455 if (new_string == s && std::get<0>(ret) == levels)
456 continue;
457
458 fclose(DEBUGDUMP);
459 DEBUGDUMP=stderr;
460
461 std::cerr << "Regression, line "
462 << linenum
463 << ": embedding markers"
464 << std::endl
465 << " Paragraph embedding level: "
466 << paragraph;
467
468 if (use_default)
469 std::cerr << " (defaulted)";
470
471 std::cerr << std::endl
472 << "String (1):";
473
474 dump_ls(s, levels);
475
476 std::cerr << std::endl << "String (2):";
477
478 dump_ls(new_string, std::get<0>(ret));
479 std::cerr << std::endl;
480
481 std::cerr << "Embedding:";
482 dump_ls(logical_string, logical_levels);
483 std::cerr << std::endl;
484
485 unicode::bidi_embed(logical_string,
486 logical_levels,
487 paragraph);
488
489 std::cerr << std::endl
490 << "Embedded string:";
491
492 for (auto c:save_string)
493 {
494 std::cerr << " ";
495
496 switch (c) {
497 case UNICODE_LRM: std::cerr << "LRM"; break;
498 case UNICODE_RLM: std::cerr << "RLM"; break;
499 case UNICODE_ALM: std::cerr << "ALM"; break;
500 case UNICODE_RLI: std::cerr << "RLI"; break;
501 case UNICODE_LRI: std::cerr << "LRI"; break;
502 case UNICODE_RLO: std::cerr << "RLO"; break;
503 case UNICODE_LRO: std::cerr << "LRO"; break;
504 case UNICODE_PDF: std::cerr << "PDF"; break;
505 case UNICODE_PDI: std::cerr << "PDI"; break;
506 default:
507 std::cerr << std::hex << std::setw(4)
508 << std::setfill('0')
509 << c;
510 break;
511 }
512 }
513 std::cerr << std::dec << std::endl << std::flush;
514
515 ret=unicode::bidi_calc(save_string, paragraph);
516 unicode::bidi_reorder(save_string, std::get<0>(ret));
517 exit(1);
518 }
519 }
520 std::cout << std::endl;
521 }
522
exception_test()523 void exception_test()
524 {
525 std::u32string s{U"שלום"};
526
527 auto res=unicode::bidi_calc(s);
528
529 int thrown=0;
530 int caught=0;
531
532 try
533 {
534 unicode::bidi_reorder(s, std::get<0>(res),
535 [&]
536 (size_t, size_t)
537 {
538 ++thrown;
539 throw 42;
540 });
541 } catch(int n)
542 {
543 caught += n;
544 }
545
546 if (thrown != 1 || caught != 42)
547 {
548 std::cerr << "Exception handling failed"
549 << std::endl;
550 }
551 }
552
partial_reorder_cleanup()553 void partial_reorder_cleanup()
554 {
555 std::u32string s{U"שלום"};
556
557 auto res=unicode::bidi_calc(s);
558
559 unicode::bidi_reorder(s, std::get<0>(res));
560
561 unicode::bidi_cleanup(s, std::get<0>(res),
562 []
563 (size_t)
564 {
565 },
566 0,
567 0, 3);
568 }
569
null_character_test()570 void null_character_test()
571 {
572 std::u32string s{{0}};
573
574 auto res=unicode::bidi_calc(s);
575
576 unicode::bidi_reorder(s, std::get<0>(res));
577
578 unicode::bidi_cleanup(s, std::get<0>(res),
579 []
580 (size_t)
581 {
582 },
583 UNICODE_BIDI_CLEANUP_EXTRA,
584 0, 3);
585
586 s=U"";
587 res=unicode::bidi_calc(s, UNICODE_BIDI_RL);
588
589 if (std::get<1>(res).direction != UNICODE_BIDI_RL)
590 {
591 std::cerr << "Paragraph embedding level not honored"
592 << std::endl;
593 exit(1);
594 }
595 }
596
direction_test()597 void direction_test()
598 {
599 static const struct {
600 std::u32string str;
601 unicode_bidi_level_t direction;
602 int is_explicit;
603 bool needs_embed;
604 } tests[]={
605 {
606 U"Hello",
607 UNICODE_BIDI_LR,
608 1,
609 false,
610 },
611 {
612 U" ",
613 UNICODE_BIDI_LR,
614 0,
615 false,
616 },
617 {
618 U"",
619 UNICODE_BIDI_LR,
620 0,
621 false,
622 },
623 {
624 U"שלום",
625 UNICODE_BIDI_RL,
626 1,
627 false,
628 },
629 {
630 U"Helloש",
631 UNICODE_BIDI_LR,
632 1,
633 false,
634 },
635 {
636 U"Hello" + std::u32string{unicode::literals::LRO}
637 + U"ש",
638 UNICODE_BIDI_LR,
639 1,
640 true,
641 },
642 };
643
644 for (const auto &t:tests)
645 {
646 auto ret=unicode::bidi_get_direction(t.str);
647
648 if (ret.direction != t.direction ||
649 ret.is_explicit != t.is_explicit)
650 {
651 std::cerr << "direction_test failed\n";
652 exit(1);
653 }
654
655 std::u32string s=t.str;
656 auto levels=std::get<0>(unicode::bidi_calc(s, t.direction));
657 unicode::bidi_reorder(s, levels);
658 unicode::bidi_cleanup(s, levels);
659
660 if (unicode::bidi_needs_embed(s, levels, &t.direction)
661 != t.needs_embed)
662 {
663 std::cerr << "needs embed failed\n";
664 exit(1);
665 }
666 }
667 }
668
direction_test2()669 void direction_test2()
670 {
671 static const struct {
672 std::u32string str;
673 std::vector<unicode_bidi_level_t> directions;
674 unicode_bidi_level_t direction;
675 bool needs_embed;
676 } tests[]={
677 {
678 U"Hello world!",
679 {UNICODE_BIDI_LR,
680 UNICODE_BIDI_LR,
681 UNICODE_BIDI_LR,
682 UNICODE_BIDI_LR,
683 UNICODE_BIDI_LR,
684 UNICODE_BIDI_LR,
685 UNICODE_BIDI_LR,
686 UNICODE_BIDI_LR,
687 UNICODE_BIDI_LR,
688 UNICODE_BIDI_LR,
689 UNICODE_BIDI_LR,
690 UNICODE_BIDI_LR},
691 UNICODE_BIDI_LR,
692 false,
693 },
694 {
695 U"Hello world!",
696 {UNICODE_BIDI_RL,
697 UNICODE_BIDI_RL,
698 UNICODE_BIDI_RL,
699 UNICODE_BIDI_RL,
700 UNICODE_BIDI_RL,
701 UNICODE_BIDI_RL,
702 UNICODE_BIDI_RL,
703 UNICODE_BIDI_RL,
704 UNICODE_BIDI_RL,
705 UNICODE_BIDI_RL,
706 UNICODE_BIDI_RL,
707 UNICODE_BIDI_RL},
708 UNICODE_BIDI_LR,
709 true,
710 },
711 };
712
713 for (const auto &t:tests)
714 {
715 if (t.str.size() != t.directions.size())
716 {
717 std::cerr << "direction_test2 bad data\n";
718 exit(1);
719 }
720
721 if (unicode::bidi_needs_embed(t.str, t.directions, &t.direction)
722 != t.needs_embed)
723 {
724 std::cerr << "direction-test2 failed\n";
725 exit(1);
726 }
727 }
728 }
729
composition_test()730 void composition_test()
731 {
732 typedef std::tuple<unicode_bidi_level_t,
733 size_t, size_t, size_t,
734 size_t> results_t;
735
736 static const struct {
737 std::u32string str;
738 std::vector<unicode_bidi_level_t> levels;
739 std::vector<results_t> results;
740 } tests[] = {
741 // Test 1
742 {
743 U"a\u0303\u0303b\u0303\u0303c",
744 {0, 0, 0, 0, 0, 0, 0},
745 {
746 results_t{0, 0, 7, 1, 2},
747 results_t{0, 0, 7, 4, 2},
748 }
749 },
750 // Test 2
751 {
752 U"\u0303ab\u0303",
753 {0, 0, 0, 0},
754 {
755 results_t{0, 0, 4, 0, 1},
756 results_t{0, 0, 4, 3, 1},
757 }
758 },
759 // Test 3
760 {
761 U"a\u0303\u0303b",
762 {0, 0, 1, 1},
763 {
764 results_t{0, 0, 2, 1, 1},
765 results_t{1, 2, 2, 2, 1},
766 }
767 },
768 // Test 4
769 {
770 U"\u0303a\u0303a",
771 {0, 0, 0, 0},
772 {
773 results_t{0, 0, 4, 0, 1},
774 results_t{0, 0, 4, 2, 1},
775 }
776 },
777 };
778
779 int testnum=0;
780
781 for (const auto &t:tests)
782 {
783 ++testnum;
784
785 std::vector<std::tuple<unicode_bidi_level_t,
786 size_t, size_t, size_t, size_t>> actual;
787
788 auto copy=t.str;
789
790 unicode::bidi_combinings(copy, t.levels,
791 [&]
792 (unicode_bidi_level_t level,
793 size_t level_start,
794 size_t n_chars,
795 size_t comb_start,
796 size_t n_comb_chars)
797 {
798 actual.emplace_back
799 (level,
800 level_start,
801 n_chars,
802 comb_start,
803 n_comb_chars);
804
805 auto b=copy.begin()+comb_start;
806 auto e=b+n_comb_chars;
807
808 if (comb_start + n_comb_chars
809 < level_start + n_chars)
810 ++e;
811
812 while (b < e)
813 {
814 --e;
815 std::swap(*b, *e);
816 ++b;
817 }
818 });
819
820 if (actual != t.results)
821 {
822 std::cerr << "composition test " << testnum
823 << " failed\n";
824 exit(1);
825 }
826 }
827 }
main(int argc,char ** argv)828 int main(int argc, char **argv)
829 {
830 DEBUGDUMP=fopen("/dev/null", "w");
831 if (!DEBUGDUMP)
832 {
833 perror("/dev/null");
834 exit(1);
835 }
836 exception_test();
837 composition_test();
838 partial_reorder_cleanup();
839 null_character_test();
840 latin_test();
841 character_test();
842 direction_test();
843 direction_test2();
844 return 0;
845 }
846