1 /*
2 ** Copyright 2011-2020 Double Precision, Inc.
3 ** See COPYING for distribution information.
4 **
5 */
6 
7 #include	"unicode_config.h"
8 #include	"courier-unicode.h"
9 #include	<unistd.h>
10 #include	<stdint.h>
11 #include	<string.h>
12 #include	<stdlib.h>
13 
14 #define UNICODE_GRAPHEMEBREAK_ANY		0x00
15 #define UNICODE_GRAPHEMEBREAK_CR		0x01
16 #define UNICODE_GRAPHEMEBREAK_LF		0x02
17 #define UNICODE_GRAPHEMEBREAK_Control		0x03
18 #define UNICODE_GRAPHEMEBREAK_Extend		0x04
19 #define UNICODE_GRAPHEMEBREAK_Prepend		0x05
20 #define UNICODE_GRAPHEMEBREAK_SpacingMark	0x06
21 #define UNICODE_GRAPHEMEBREAK_L			0x07
22 #define UNICODE_GRAPHEMEBREAK_V			0x08
23 #define UNICODE_GRAPHEMEBREAK_T			0x09
24 #define UNICODE_GRAPHEMEBREAK_LV		0x0A
25 #define UNICODE_GRAPHEMEBREAK_LVT		0x0B
26 #define UNICODE_GRAPHEMEBREAK_Regional_Indicator 0x0C
27 
28 #define UNICODE_GRAPHEMEBREAK_ZWJ		0x0D
29 
30 #define UNICODE_GRAPHEMEBREAK_SOT		0xFF
31 
32 #include "graphemebreaktab.h"
33 
34 struct unicode_grapheme_break_info_s {
35 	uint8_t prev_class;
36 	unsigned prev_count;
37 };
38 
unicode_grapheme_break_init()39 unicode_grapheme_break_info_t unicode_grapheme_break_init()
40 {
41 	unicode_grapheme_break_info_t t=(unicode_grapheme_break_info_t)
42 		calloc(1, sizeof(struct unicode_grapheme_break_info_s));
43 
44 	if (!t)
45 		abort();
46 
47 	t->prev_class=UNICODE_GRAPHEMEBREAK_SOT;
48 
49 	return t;
50 }
51 
unicode_grapheme_break_deinit(unicode_grapheme_break_info_t t)52 void unicode_grapheme_break_deinit(unicode_grapheme_break_info_t t)
53 {
54 	free(t);
55 }
56 
unicode_grapheme_break(char32_t a,char32_t b)57 int unicode_grapheme_break(char32_t a, char32_t b)
58 {
59 	struct unicode_grapheme_break_info_s s;
60 
61 	memset((char *)&s, 0, sizeof(s));
62 
63 	(void)unicode_grapheme_break_next(&s, a);
64 
65 	return unicode_grapheme_break_next(&s, b);
66 }
67 
unicode_grapheme_break_next(unicode_grapheme_break_info_t t,char32_t b)68 int unicode_grapheme_break_next(unicode_grapheme_break_info_t t, char32_t b)
69 {
70 	uint8_t ac=t->prev_class;
71 	uint8_t bc=unicode_tab_lookup(b,
72 				      unicode_starting_indextab,
73 				      unicode_starting_pagetab,
74 				      sizeof(unicode_starting_indextab)/
75 				      sizeof(unicode_starting_indextab[0]),
76 				      unicode_rangetab,
77 				      sizeof(unicode_rangetab)/
78 				      sizeof(unicode_rangetab[0]),
79 				      unicode_classtab,
80 				      UNICODE_GRAPHEMEBREAK_ANY);
81 
82 	if (ac != bc)
83 		t->prev_count=0;
84 	++t->prev_count;
85 
86 	t->prev_class=bc;
87 
88 	if (ac == UNICODE_GRAPHEMEBREAK_SOT)
89 		return 1; /* GB1, GB2 is implied */
90 
91 	if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
92 		return 0; /* GB3 */
93 
94 
95 	switch (ac) {
96 	case UNICODE_GRAPHEMEBREAK_CR:
97 	case UNICODE_GRAPHEMEBREAK_LF:
98 	case UNICODE_GRAPHEMEBREAK_Control:
99 		return 1; /* GB4 */
100 	default:
101 		break;
102 	}
103 
104 	switch (bc) {
105 	case UNICODE_GRAPHEMEBREAK_CR:
106 	case UNICODE_GRAPHEMEBREAK_LF:
107 	case UNICODE_GRAPHEMEBREAK_Control:
108 		return 1; /* GB5 */
109 	default:
110 		break;
111 	}
112 
113 	if (ac == UNICODE_GRAPHEMEBREAK_L)
114 		switch (bc) {
115 		case UNICODE_GRAPHEMEBREAK_L:
116 		case UNICODE_GRAPHEMEBREAK_V:
117 		case UNICODE_GRAPHEMEBREAK_LV:
118 		case UNICODE_GRAPHEMEBREAK_LVT:
119 			return 0; /* GB6 */
120 		}
121 
122 	if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
123 	     ac == UNICODE_GRAPHEMEBREAK_V) &&
124 	    (bc == UNICODE_GRAPHEMEBREAK_V ||
125 	     bc == UNICODE_GRAPHEMEBREAK_T))
126 		return 0; /* GB7 */
127 
128 	if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
129 	     ac == UNICODE_GRAPHEMEBREAK_T) &&
130 	    bc == UNICODE_GRAPHEMEBREAK_T)
131 		return 0; /* GB8 */
132 
133 	if (bc == UNICODE_GRAPHEMEBREAK_Extend ||
134 	    bc == UNICODE_GRAPHEMEBREAK_ZWJ)
135 		return 0; /* GB9 */
136 
137 	if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
138 		return 0; /* GB9a */
139 
140 	if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
141 		return 0; /* GB9b */
142 
143 	if (ac == UNICODE_GRAPHEMEBREAK_Extend ||
144 	    ac == UNICODE_GRAPHEMEBREAK_ZWJ)
145 		return 0; /* GB11? */
146 
147 	if (ac == UNICODE_GRAPHEMEBREAK_Regional_Indicator &&
148 	    bc == UNICODE_GRAPHEMEBREAK_Regional_Indicator &&
149 	    (t->prev_count % 2) == 0)
150 		return 0; /* GB12, GB13 */
151 
152 	return 1; /* GB999 */
153 }
154