1 /*
2 ** Copyright 2011-2020 Double Precision, Inc.
3 ** See COPYING for distribution information.
4 **
5 */
6
7 #include "unicode_config.h"
8 #include "courier-unicode.h"
9 #include <unistd.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <stdlib.h>
13
14 #define UNICODE_GRAPHEMEBREAK_ANY 0x00
15 #define UNICODE_GRAPHEMEBREAK_CR 0x01
16 #define UNICODE_GRAPHEMEBREAK_LF 0x02
17 #define UNICODE_GRAPHEMEBREAK_Control 0x03
18 #define UNICODE_GRAPHEMEBREAK_Extend 0x04
19 #define UNICODE_GRAPHEMEBREAK_Prepend 0x05
20 #define UNICODE_GRAPHEMEBREAK_SpacingMark 0x06
21 #define UNICODE_GRAPHEMEBREAK_L 0x07
22 #define UNICODE_GRAPHEMEBREAK_V 0x08
23 #define UNICODE_GRAPHEMEBREAK_T 0x09
24 #define UNICODE_GRAPHEMEBREAK_LV 0x0A
25 #define UNICODE_GRAPHEMEBREAK_LVT 0x0B
26 #define UNICODE_GRAPHEMEBREAK_Regional_Indicator 0x0C
27
28 #define UNICODE_GRAPHEMEBREAK_ZWJ 0x0D
29
30 #define UNICODE_GRAPHEMEBREAK_SOT 0xFF
31
32 #include "graphemebreaktab.h"
33
34 struct unicode_grapheme_break_info_s {
35 uint8_t prev_class;
36 unsigned prev_count;
37 };
38
unicode_grapheme_break_init()39 unicode_grapheme_break_info_t unicode_grapheme_break_init()
40 {
41 unicode_grapheme_break_info_t t=(unicode_grapheme_break_info_t)
42 calloc(1, sizeof(struct unicode_grapheme_break_info_s));
43
44 if (!t)
45 abort();
46
47 t->prev_class=UNICODE_GRAPHEMEBREAK_SOT;
48
49 return t;
50 }
51
unicode_grapheme_break_deinit(unicode_grapheme_break_info_t t)52 void unicode_grapheme_break_deinit(unicode_grapheme_break_info_t t)
53 {
54 free(t);
55 }
56
unicode_grapheme_break(char32_t a,char32_t b)57 int unicode_grapheme_break(char32_t a, char32_t b)
58 {
59 struct unicode_grapheme_break_info_s s;
60
61 memset((char *)&s, 0, sizeof(s));
62
63 (void)unicode_grapheme_break_next(&s, a);
64
65 return unicode_grapheme_break_next(&s, b);
66 }
67
unicode_grapheme_break_next(unicode_grapheme_break_info_t t,char32_t b)68 int unicode_grapheme_break_next(unicode_grapheme_break_info_t t, char32_t b)
69 {
70 uint8_t ac=t->prev_class;
71 uint8_t bc=unicode_tab_lookup(b,
72 unicode_starting_indextab,
73 unicode_starting_pagetab,
74 sizeof(unicode_starting_indextab)/
75 sizeof(unicode_starting_indextab[0]),
76 unicode_rangetab,
77 sizeof(unicode_rangetab)/
78 sizeof(unicode_rangetab[0]),
79 unicode_classtab,
80 UNICODE_GRAPHEMEBREAK_ANY);
81
82 if (ac != bc)
83 t->prev_count=0;
84 ++t->prev_count;
85
86 t->prev_class=bc;
87
88 if (ac == UNICODE_GRAPHEMEBREAK_SOT)
89 return 1; /* GB1, GB2 is implied */
90
91 if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
92 return 0; /* GB3 */
93
94
95 switch (ac) {
96 case UNICODE_GRAPHEMEBREAK_CR:
97 case UNICODE_GRAPHEMEBREAK_LF:
98 case UNICODE_GRAPHEMEBREAK_Control:
99 return 1; /* GB4 */
100 default:
101 break;
102 }
103
104 switch (bc) {
105 case UNICODE_GRAPHEMEBREAK_CR:
106 case UNICODE_GRAPHEMEBREAK_LF:
107 case UNICODE_GRAPHEMEBREAK_Control:
108 return 1; /* GB5 */
109 default:
110 break;
111 }
112
113 if (ac == UNICODE_GRAPHEMEBREAK_L)
114 switch (bc) {
115 case UNICODE_GRAPHEMEBREAK_L:
116 case UNICODE_GRAPHEMEBREAK_V:
117 case UNICODE_GRAPHEMEBREAK_LV:
118 case UNICODE_GRAPHEMEBREAK_LVT:
119 return 0; /* GB6 */
120 }
121
122 if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
123 ac == UNICODE_GRAPHEMEBREAK_V) &&
124 (bc == UNICODE_GRAPHEMEBREAK_V ||
125 bc == UNICODE_GRAPHEMEBREAK_T))
126 return 0; /* GB7 */
127
128 if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
129 ac == UNICODE_GRAPHEMEBREAK_T) &&
130 bc == UNICODE_GRAPHEMEBREAK_T)
131 return 0; /* GB8 */
132
133 if (bc == UNICODE_GRAPHEMEBREAK_Extend ||
134 bc == UNICODE_GRAPHEMEBREAK_ZWJ)
135 return 0; /* GB9 */
136
137 if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
138 return 0; /* GB9a */
139
140 if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
141 return 0; /* GB9b */
142
143 if (ac == UNICODE_GRAPHEMEBREAK_Extend ||
144 ac == UNICODE_GRAPHEMEBREAK_ZWJ)
145 return 0; /* GB11? */
146
147 if (ac == UNICODE_GRAPHEMEBREAK_Regional_Indicator &&
148 bc == UNICODE_GRAPHEMEBREAK_Regional_Indicator &&
149 (t->prev_count % 2) == 0)
150 return 0; /* GB12, GB13 */
151
152 return 1; /* GB999 */
153 }
154