1 #include "tests.h"
2 #include <ctype.h>
3 #include <wchar.h>
4 
my_unassigned(int c)5 static int my_unassigned(int c) {
6     int cat = utf8proc_get_property(c)->category;
7     return (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO);
8 }
9 
my_isprint(int c)10 static int my_isprint(int c) {
11     int cat = utf8proc_get_property(c)->category;
12     return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
13            (c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd || c == 0x00ad) ||
14            (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO);
15 }
16 
main(int argc,char ** argv)17 int main(int argc, char **argv)
18 {
19     int c, error = 0, updates = 0;
20 
21     (void) argc; /* unused */
22     (void) argv; /* unused */
23 
24     /* some simple sanity tests of the character widths */
25     for (c = 0; c <= 0x110000; ++c) {
26         int cat = utf8proc_get_property(c)->category;
27         int w = utf8proc_charwidth(c);
28         if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) && w > 0) {
29             fprintf(stderr, "nonzero width %d for combining char %x\n", w, c);
30             error += 1;
31         }
32         if (w == 0 &&
33             ((cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) ||
34              (cat >= UTF8PROC_CATEGORY_ND && cat <= UTF8PROC_CATEGORY_SC) ||
35              (cat >= UTF8PROC_CATEGORY_SO && cat <= UTF8PROC_CATEGORY_ZS))) {
36             fprintf(stderr, "zero width for symbol-like char %x\n", c);
37             error += 1;
38         }
39         if (c <= 127 && ((!isprint(c) && w > 0) || (isprint(c) && wcwidth(c) != w))) {
40             fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n",
41             wcwidth(c), w,
42             isprint(c) ? "printable" : "non-printable", c);
43             error += 1;
44         }
45         if (!my_isprint(c) && w > 0) {
46             fprintf(stderr, "non-printing %x had width %d\n", c, w);
47             error += 1;
48         }
49         if (my_unassigned(c) && w != 1) {
50             fprintf(stderr, "unexpected width %d for unassigned char %x\n", w, c);
51             error += 1;
52         }
53     }
54     check(!error, "utf8proc_charwidth FAILED %d tests.", error);
55 
56     check(utf8proc_charwidth(0x00ad) == 1, "incorrect width for U+00AD (soft hyphen)");
57     check(utf8proc_charwidth(0xe000) == 1, "incorrect width for U+e000 (PUA)");
58 
59     /* print some other information by compariing with system wcwidth */
60     printf("Mismatches with system wcwidth (not necessarily errors):\n");
61     for (c = 0; c <= 0x110000; ++c) {
62         int w = utf8proc_charwidth(c);
63         int wc = wcwidth(c);
64         if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue;
65         /* lots of these errors for out-of-date system unicode tables */
66         if (wc == -1 && my_isprint(c) && !my_unassigned(c) && w > 0)
67             updates += 1;
68         if (wc == -1 && !my_isprint(c) && w > 0)
69             printf("  wcwidth(%x) = -1 for non-printable width-%d char\n", c, w);
70         if (wc >= 0 && wc != w)
71             printf("  wcwidth(%x) = %d != charwidth %d\n", c, wc, w);
72     }
73     printf("   ... (positive widths for %d chars unknown to wcwidth) ...\n", updates);
74     printf("Character-width tests SUCCEEDED.\n");
75 
76     return 0;
77 }
78