1 /*======================================================================*\
2 | Functions to enquire character width
3 | as depending on terminal properties,
4 | varying with terminal kind and version,
5 | according to different Unicode versions and other features
6 \*======================================================================*/
7
8 #include "termprop.h"
9
10
11 /*======================================================================*\
12 | Table structure: tables of character ranges; lookup function
13 \*======================================================================*/
14
15 struct interval {
16 unsigned long first;
17 unsigned long last;
18 };
19
20 static
21 struct interval *
lookup(ucs,table,length)22 lookup (ucs, table, length)
23 unsigned long ucs;
24 struct interval * table;
25 int length;
26 {
27 int min = 0;
28 int mid;
29 int max = length - 1;
30
31 /* first quick check for Latin-1 etc. characters */
32 if (table && ucs < table [0].first) {
33 return 0;
34 }
35
36 /* binary search */
37 while (max >= min) {
38 mid = (min + max) / 2;
39 if (ucs > table [mid].last) {
40 min = mid + 1;
41 } else if (ucs < table [mid].first) {
42 max = mid - 1;
43 } else {
44 return & table [mid];
45 }
46 }
47
48 return 0;
49 }
50
51
52 /*======================================================================*\
53 | Table variables
54 | - to be assigned depending on detected Unicode data version
55 \*======================================================================*/
56
57 static struct interval * combining_table = 0;
58 static int combining_len = -1;
59 static struct interval * spacing_combining_table = 0;
60 static int spacing_combining_len = -1;
61 static struct interval * assigned_table = 0;
62 static int assigned_len = -1;
63
64 #define arrlen(arr) (sizeof (arr) / sizeof (* arr))
65
66
67 /*======================================================================*\
68 | Actual data tables
69 | - some explicitly included here
70 | - some generated from Unicode data (#include "width.t")
71 \*======================================================================*/
72
73 /* Tables of combining characters, according to different versions
74 of Unicode or wcwidth.c, respectively,
75 arranged as sorted lists of non-overlapping intervals.
76 */
77
78 /**
79 Combining characters
80 ~ Unicode 3.0 with some exceptions
81 xterm < 157
82 */
83 static struct interval
84 combining_old [] = {
85 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
86 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
87 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
88 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
89 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
90 { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
91 { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
92 { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
93 { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
94 { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
95 { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
96 { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
97 { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
98 { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
99 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
100 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
101 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
102 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
103 { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
104 { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
105 { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
106 { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
107 { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
108 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
109 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
110 { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
111 { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
112 { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
113 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
114 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
115 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
116 };
117
118 /**
119 Combining characters
120 Unicode 3.0
121 wcwidth 2001-01-12
122 xterm 157...166
123 */
124 /*static struct interval combining_300 [] = { defined in width.t };*/
125
126 /**
127 Combining characters
128 Unicode 3.2
129 wcwidth.c 2002-05-08
130 xterm 167..179
131 */
132 /*static struct interval combining_320 [] = { defined in width.t };*/
133
134 /**
135 Combining characters
136 Unicode 4.0
137 wcwidth.c 2003-05-20
138 xterm 180...201
139 */
140 /*static struct interval combining_400 [] = { defined in width.t };*/
141
142 /**
143 Combining characters
144 Unicode 4.1
145 xterm 202...
146 */
147 /*static struct interval combining_410 [] = { defined in width.t };*/
148
149 /**
150 Combining characters
151 Unicode 5.0
152 */
153 /*static struct interval combining_500 [] = { defined in width.t };*/
154
155
156 /* Tables of specific width sets.
157 */
158
159 /**
160 Poderosa terminal emulator in Latin-1 mode.
161 */
162 static struct interval
163 wide_poderosa [] = {
164 { 0x00A7, 0x00A8 }, { 0x00B0, 0x00B1 },
165 { 0x00B4, 0x00B4 }, { 0x00B6, 0x00B6 }, { 0x00D7, 0x00D7 }, { 0x00F7, 0x00F7 }
166 };
167
168
169 /* Tables of ambiguous width characters, according to different versions
170 of Unicode or wcwidth.c, respectively,
171 arranged as sorted lists of non-overlapping intervals.
172 */
173
174 /**
175 Ambiguous width characters
176 Unicode 3.2 without Private Use range plus extended end/non-BMP ranges
177 wcwidth.c 2002-05-08 plus end/non-BMP ranges
178 xterm 167...179 (different version in earlier xterm releases was unused)
179 */
180 static struct interval
181 ambiguous_old [] = {
182 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
183 { 0x00AA, 0x00AA }, { 0x00AD, 0x00AE }, { 0x00B0, 0x00B4 },
184 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
185 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
186 { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
187 { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
188 { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
189 { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
190 { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
191 { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
192 { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
193 { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
194 { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
195 { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
196 { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
197 { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
198 { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
199 { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
200 { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
201 { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
202 { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
203 { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
204 { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
205 { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
206 { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
207 { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
208 { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
209 { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
210 { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
211 { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
212 { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
213 { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
214 { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
215 { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
216 { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
217 { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
218 { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
219 { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
220 { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
221 { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
222 { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x24FE },
223 { 0x2500, 0x254B }, { 0x2550, 0x2573 }, { 0x2580, 0x258F },
224 { 0x2592, 0x2595 }, { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 },
225 { 0x25B2, 0x25B3 }, { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD },
226 { 0x25C0, 0x25C1 }, { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB },
227 { 0x25CE, 0x25D1 }, { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF },
228 { 0x2605, 0x2606 }, { 0x2609, 0x2609 }, { 0x260E, 0x260F },
229 { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
230 { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
231 { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
232 { 0x273D, 0x273D }, { 0x2776, 0x277F },
233 { 0xFFFD, 0xFFFF }, { 0xF0000, 0x7FFFFFFF }
234 };
235
236 /**
237 Ambiguous width characters
238 Unicode 4.0 plus extended end/non-BMP ranges
239 wcwidth.c 2003-05-20 plus extended end/non-BMP ranges
240 wcwidth.c 2007-05-25 plus extended end/non-BMP ranges
241 xterm 180...
242 */
243 static struct interval
244 ambiguous_400 [] = {
245 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
246 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
247 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
248 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
249 { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
250 { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
251 { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
252 { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
253 { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
254 { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
255 { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
256 { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
257 { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
258 { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
259 { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
260 { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
261 { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
262 { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
263 { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
264 { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
265 { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
266 { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
267 { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
268 { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
269 { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
270 { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
271 { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
272 { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
273 { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
274 { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
275 { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
276 { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
277 { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
278 { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
279 { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
280 { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
281 { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
282 { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
283 { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
284 { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
285 { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
286 { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
287 { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
288 { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
289 { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
290 { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
291 { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
292 { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
293 { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
294 { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
295 { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
296 { 0xFFFD, 0xFFFF }, { 0xF0000, 0x7FFFFFFF }
297 };
298
299 static struct interval
300 ambiguous_520 [] = {
301 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
302 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
303 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
304 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
305 { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
306 { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
307 { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
308 { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
309 { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
310 { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
311 { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
312 { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
313 { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
314 { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
315 { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
316 { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
317 { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
318 { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
319 { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
320 { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
321 { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
322 { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
323 { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
324 { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
325 { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
326 { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
327 { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
328 { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
329 { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
330 { 0x2189, 0x2189 }, { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 },
331 { 0x21D2, 0x21D2 }, { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 },
332 { 0x2200, 0x2200 }, { 0x2202, 0x2203 }, { 0x2207, 0x2208 },
333 { 0x220B, 0x220B }, { 0x220F, 0x220F }, { 0x2211, 0x2211 },
334 { 0x2215, 0x2215 }, { 0x221A, 0x221A }, { 0x221D, 0x2220 },
335 { 0x2223, 0x2223 }, { 0x2225, 0x2225 }, { 0x2227, 0x222C },
336 { 0x222E, 0x222E }, { 0x2234, 0x2237 }, { 0x223C, 0x223D },
337 { 0x2248, 0x2248 }, { 0x224C, 0x224C }, { 0x2252, 0x2252 },
338 { 0x2260, 0x2261 }, { 0x2264, 0x2267 }, { 0x226A, 0x226B },
339 { 0x226E, 0x226F }, { 0x2282, 0x2283 }, { 0x2286, 0x2287 },
340 { 0x2295, 0x2295 }, { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 },
341 { 0x22BF, 0x22BF }, { 0x2312, 0x2312 }, { 0x2460, 0x24E9 },
342 { 0x24EB, 0x254B }, { 0x2550, 0x2573 }, { 0x2580, 0x258F },
343 { 0x2592, 0x2595 }, { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 },
344 { 0x25B2, 0x25B3 }, { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD },
345 { 0x25C0, 0x25C1 }, { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB },
346 { 0x25CE, 0x25D1 }, { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF },
347 { 0x2605, 0x2606 }, { 0x2609, 0x2609 }, { 0x260E, 0x260F },
348 { 0x2614, 0x2615 }, { 0x261C, 0x261C }, { 0x261E, 0x261E },
349 { 0x2640, 0x2640 }, { 0x2642, 0x2642 }, { 0x2660, 0x2661 },
350 { 0x2663, 0x2665 }, { 0x2667, 0x266A }, { 0x266C, 0x266D },
351 { 0x266F, 0x266F }, { 0x269E, 0x269F }, { 0x26BE, 0x26BF },
352 { 0x26C4, 0x26CD }, { 0x26CF, 0x26E1 }, { 0x26E3, 0x26E3 },
353 { 0x26E8, 0x26FF }, { 0x273D, 0x273D }, { 0x2757, 0x2757 },
354 { 0x2776, 0x277F }, { 0x2B55, 0x2B59 }, { 0x3248, 0x324F },
355 { 0xE000, 0xF8FF }, { 0xFFFD, 0xFFFD }, { 0x1F100, 0x1F10A },
356 { 0x1F110, 0x1F12D }, { 0x1F131, 0x1F131 }, { 0x1F13D, 0x1F13D },
357 { 0x1F13F, 0x1F13F }, { 0x1F142, 0x1F142 }, { 0x1F146, 0x1F146 },
358 { 0x1F14A, 0x1F14E }, { 0x1F157, 0x1F157 }, { 0x1F15F, 0x1F15F },
359 { 0x1F179, 0x1F179 }, { 0x1F17B, 0x1F17C }, { 0x1F17F, 0x1F17F },
360 { 0x1F18A, 0x1F18D }, { 0x1F190, 0x1F190 }, { 0xF0000, 0xFFFFD },
361 { 0x100000, 0x10FFFD }
362 };
363
364
365 #ifdef __TURBOC__
366 static struct interval
367 spacing_combining_300 [] = { {0, 0} };
368 static struct interval
369 assigned_300 [] = { {0, 0} };
370 #else
371 #include "width.t"
372 #endif
373
374
375 /*======================================================================*\
376 | Width enquiry functions; and table setup
377 \*======================================================================*/
378
379 static int configured_combining_data_version = -1;
380
381 static
382 void
term_setup_data()383 term_setup_data ()
384 {
385 #ifndef __TURBOC__
386 if (combining_data_version >= U700) {
387 combining_table = combining_700;
388 combining_len = arrlen (combining_700);
389 spacing_combining_table = spacing_combining_700;
390 spacing_combining_len = arrlen (spacing_combining_700);
391 assigned_table = assigned_700;
392 assigned_len = arrlen (assigned_700);
393 } else if (combining_data_version >= U630) {
394 combining_table = combining_630;
395 combining_len = arrlen (combining_630);
396 spacing_combining_table = spacing_combining_630;
397 spacing_combining_len = arrlen (spacing_combining_630);
398 assigned_table = assigned_630;
399 assigned_len = arrlen (assigned_630);
400 } else if (combining_data_version >= U620) {
401 combining_table = combining_620;
402 combining_len = arrlen (combining_620);
403 spacing_combining_table = spacing_combining_620;
404 spacing_combining_len = arrlen (spacing_combining_620);
405 assigned_table = assigned_620;
406 assigned_len = arrlen (assigned_620);
407 } else if (combining_data_version >= U600) {
408 combining_table = combining_600;
409 combining_len = arrlen (combining_600);
410 spacing_combining_table = spacing_combining_600;
411 spacing_combining_len = arrlen (spacing_combining_600);
412 assigned_table = assigned_600;
413 assigned_len = arrlen (assigned_600);
414 } else if (combining_data_version >= U520) {
415 combining_table = combining_520;
416 combining_len = arrlen (combining_520);
417 spacing_combining_table = spacing_combining_520;
418 spacing_combining_len = arrlen (spacing_combining_520);
419 assigned_table = assigned_520;
420 assigned_len = arrlen (assigned_520);
421 } else if (combining_data_version >= U510) {
422 combining_table = combining_510;
423 combining_len = arrlen (combining_510);
424 spacing_combining_table = spacing_combining_510;
425 spacing_combining_len = arrlen (spacing_combining_510);
426 assigned_table = assigned_510;
427 assigned_len = arrlen (assigned_510);
428 } else if (combining_data_version >= U500) {
429 combining_table = combining_500;
430 combining_len = arrlen (combining_500);
431 spacing_combining_table = spacing_combining_500;
432 spacing_combining_len = arrlen (spacing_combining_500);
433 assigned_table = assigned_500;
434 assigned_len = arrlen (assigned_500);
435 } else if (combining_data_version == U410) {
436 combining_table = combining_410;
437 combining_len = arrlen (combining_410);
438 spacing_combining_table = spacing_combining_410;
439 spacing_combining_len = arrlen (spacing_combining_410);
440 assigned_table = assigned_410;
441 assigned_len = arrlen (assigned_410);
442 } else if (combining_data_version == U400) {
443 combining_table = combining_400;
444 combining_len = arrlen (combining_400);
445 spacing_combining_table = spacing_combining_400;
446 spacing_combining_len = arrlen (spacing_combining_400);
447 assigned_table = assigned_400;
448 assigned_len = arrlen (assigned_400);
449 } else if (combining_data_version == U320) {
450 combining_table = combining_320;
451 combining_len = arrlen (combining_320);
452 spacing_combining_table = spacing_combining_320;
453 spacing_combining_len = arrlen (spacing_combining_320);
454 assigned_table = assigned_320;
455 assigned_len = arrlen (assigned_320);
456 } else if (combining_data_version == U300) {
457 combining_table = combining_300;
458 combining_len = arrlen (combining_300);
459 spacing_combining_table = spacing_combining_300;
460 spacing_combining_len = arrlen (spacing_combining_300);
461 assigned_table = assigned_300;
462 assigned_len = arrlen (assigned_300);
463 } else
464 #endif
465 {
466 combining_table = combining_old;
467 combining_len = arrlen (combining_old);
468 spacing_combining_table = spacing_combining_300;
469 spacing_combining_len = arrlen (spacing_combining_300);
470 assigned_table = assigned_300;
471 assigned_len = arrlen (assigned_300);
472 }
473 configured_combining_data_version = combining_data_version;
474 }
475
476 static
477 int
iscombining_listed(ucs)478 iscombining_listed (ucs)
479 unsigned long ucs;
480 {
481 if (configured_combining_data_version != combining_data_version) {
482 term_setup_data ();
483 }
484
485 if (printable_bidi_controls) { /* since xterm 230 */
486 /*
487 U+200E;LEFT-TO-RIGHT MARK
488 U+200F;RIGHT-TO-LEFT MARK
489 U+202A;LEFT-TO-RIGHT EMBEDDING
490 U+202B;RIGHT-TO-LEFT EMBEDDING
491 U+202C;POP DIRECTIONAL FORMATTING
492 U+202D;LEFT-TO-RIGHT OVERRIDE
493 U+202E;RIGHT-TO-LEFT OVERRIDE
494 */
495 if (ucs >= 0x200E && ucs <= 0x202E && (ucs <= 0x200F || ucs >= 0x202A)) {
496 return 0;
497 }
498 }
499
500 #ifdef wide_combining_not_combining
501 /* workaround for bug in MinTTY 0.4 beta */
502 if (ucs >= 0x302A && ucs <= 0x309A && (ucs <= 0x302F || ucs >= 0x3099)) {
503 return 0;
504 }
505 #endif
506
507 if (lookup (ucs, combining_table, combining_len)
508 || (spacing_combining && lookup (ucs, spacing_combining_table, spacing_combining_len))
509 ) {
510 if (ucs < 0x10000) {
511 return 1;
512 } else if (suppress_non_BMP) {
513 return 0;
514 } else if (ucs >= 0xE0000) {
515 return plane_14_combining;
516 } else {
517 return plane_1_combining;
518 }
519 } else {
520 return 0;
521 }
522 }
523
524 /* Check whether a Unicode code is a valid (assigned) Unicode character.
525 */
526 int
term_isassigned(ucs)527 term_isassigned (ucs)
528 unsigned long ucs;
529 {
530 if (configured_combining_data_version != combining_data_version) {
531 term_setup_data ();
532 }
533
534 if (lookup (ucs, assigned_table, assigned_len)) {
535 return 1;
536 } else {
537 return 0;
538 }
539 }
540
541 /* Check whether a Unicode character is a combining character, based on its
542 Unicode general category being Mn (Mark Nonspacing), Me (Mark Enclosing),
543 Cf (Other Format),
544 or (depending on terminal features) Mc (Mark Spacing Combining).
545 */
546 int
term_iscombining(ucs)547 term_iscombining (ucs)
548 unsigned long ucs;
549 {
550 if (unassigned_single_width) {
551 if (rxvt_version > 0) {
552 /* handle weird mapping of non-Unicode ranges */
553 if (ucs < 0x80000000) {
554 ucs &= 0x1FFFFF;
555 }
556 }
557 if (! bidi_screen) {
558 /* special case of rxvt, not mlterm */
559 if (ucs >= 0xF8F0 && ucs <= 0xF8FF) {
560 return 1;
561 }
562 }
563 return iscombining_listed (ucs) && term_isassigned (ucs);
564 } else if (ucs >= 0xD7B0 && ucs <= 0xD7FF) {
565 return hangul_jamo_extended;
566 } else if (konsole_version > 0 && ucs >= 0x200000) {
567 return 1;
568 } else {
569 return iscombining_listed (ucs);
570 }
571 }
572
573 #define dont_debug_width
574 #define dont_debug_width_all
575
576 #ifdef debug_width
577 #include <stdio.h>
578 #define do_trace(c) if (ucs == c) _do_trace = 1;
579 #define trace_width(tag, res) if (_do_trace) printf ("iswide (%04lX) [%s]: %d\n", ucs, tag, res);
580 #else
581 #define trace_width(tag, res)
582 #endif
583
584 /* Check whether a Unicode character is a wide character, based on its
585 Unicode category being East Asian Wide (W) or East Asian FullWidth (F)
586 as defined in Unicode Technical Report #11, East Asian Ambiguous (A)
587 if the terminal is running in CJK compatibility mode (xterm -cjk_width).
588 Data taken from different versions of
589 http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
590 */
591 int
term_iswide(ucs)592 term_iswide (ucs)
593 unsigned long ucs;
594 {
595 #ifdef debug_width
596 int _do_trace = 0;
597 #ifdef debug_width_all
598 if (ucs >= 0x80) {
599 _do_trace = 1;
600 }
601 #endif
602
603 do_trace (0x4DC0);
604 do_trace (0xFE19);
605 do_trace (0x232A);
606 do_trace (0x3099);
607 do_trace (0x302A);
608 #endif
609
610 if (width_data_version == 0) {
611 trace_width ("none", 0);
612 return 0;
613 }
614
615 if (ucs >= 0x4DC0 && ucs <= 0x4DFF) {
616 trace_width ("hexagram", wide_Yijing_hexagrams);
617 return wide_Yijing_hexagrams;
618 }
619
620 if (unassigned_single_width) {
621 if (rxvt_version > 0) {
622 /* handle weird mapping of non-Unicode ranges */
623 if (ucs < 0x80000000) {
624 ucs &= 0x1FFFFF;
625 }
626 }
627 if (! term_isassigned (ucs)) {
628 trace_width ("unassigned_single_width", 0);
629 return 0;
630 }
631 }
632
633 if (cjk_currency_width == 2) {
634 if (ucs == 0xA2 || ucs == 0xA3 || ucs == 0xA5) {
635 return 1;
636 }
637 }
638
639 /* handle xterm -cjk_width */
640 if (cjk_width_data_version) {
641 if (utf8_screen || ucs >= 0x80 || cjk_wide_latin1) {
642 /* look up ambiguous character */
643
644 if (cjk_width_data_version >= U520) {
645 if (lookup (ucs, ambiguous_520, arrlen (ambiguous_520))) {
646 trace_width ("cjk 520", 1);
647 return 1;
648 }
649 } else if (cjk_width_data_version >= U400) {
650 if (lookup (ucs, ambiguous_400, arrlen (ambiguous_400))) {
651 trace_width ("cjk 400", 1);
652 return 1;
653 }
654 } else if (cjk_width_data_version >= U320beta) {
655 if (lookup (ucs, ambiguous_old, arrlen (ambiguous_old))) {
656 trace_width ("cjk 320", 1);
657 return 1;
658 }
659 }
660 }
661
662 if (cjk_width_data_version == U300beta
663 && (lookup (ucs, wide_poderosa, arrlen (wide_poderosa))
664 || ucs >= 0x100
665 )
666 )
667 {
668 trace_width ("wide poderosa", 1);
669 return 1;
670 }
671
672 /* Surrogates are also displayed wide by xterm -cjk_width */
673 if (ucs >= 0xD800 && ucs <= 0xDFFF) {
674 trace_width ("cjk surrogates", 1);
675 return 1;
676 }
677
678 /* Non-BMP are also displayed wide by xterm -cjk_width */
679 if (ucs >= 0x10000 && plane_2_double_width) {
680 trace_width ("cjk non-bmp", 1);
681 return 1;
682 }
683 }
684
685 /* first quick check for Latin-1 etc. characters */
686 if (ucs < 0x1100) {
687 trace_width ("low", 0);
688 return 0;
689 }
690
691 if (bidi_screen && mintty_version <= 0) {
692 /* handle mlterm deviations */
693 if (ucs == 0x2329 || ucs == 0x232A || /* angle brackets */
694 (ucs >= 0xA000 && ucs <= 0xA4C6) /* Yi */
695 ) {
696 trace_width ("bidi", 0);
697 return 0;
698 }
699 }
700
701 if (width_data_version <= U300) {
702 trace_width ("<=300", -1);
703 return
704 /* wide character ranges
705 Unicode 3.0
706 wcwidth 2001-01-12
707 xterm 157...166
708 */
709 (ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
710 (ucs >= 0x2e80 && ucs <= 0xa4cf
711 && (ucs & (unsigned long) ~0x0011) != 0x300a
712 && ucs != 0x303f) || /* CJK ... Yi */
713 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
714 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
715 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
716 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
717 (ucs >= 0xffe0 && ucs <= 0xffe6)
718 || (ucs >= 0x20000 && ucs <= 0x2ffff && plane_2_double_width) /* missing before xterm 157 */
719 ;
720 } else {
721 trace_width (">300", -1);
722 return
723 /* wide character ranges
724 Unicode 3.2 - without 0x3???? range:
725 wcwidth 2002-05-08
726 xterm 167...179
727 Unicode 4.0 - including 0x3???? range:
728 wcwidth 2003-05-20
729 xterm 180...225
730 */
731 /* wide character ranges
732 Unicode 4.1
733 wcwidth 2007-05-25
734 xterm 226...
735 */
736 (ucs >= 0x1100 &&
737 (ucs <= 0x115f || /* Hangul Jamo init. consonants */
738 ucs == 0x2329 || ucs == 0x232a || /* angle brackets */
739 (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) || /* CJK ... Yi */
740 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
741 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
742 (width_data_version >= U410 && ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
743 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
744 (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
745 (ucs >= 0xffe0 && ucs <= 0xffe6) ||
746 (width_data_version >= U520 && ucs >= 0xA960 && ucs <= 0xA97F) || /* Hangul Jamo Extended-A */
747 (width_data_version >= U600 && ucs >= 0x1B000 && ucs <= 0x1B0FF) || /* Kana Supplement */
748 (width_data_version >= U520 && ucs >= 0x1F200 && ucs <= 0x1F2FF) || /* Enclosed Ideographic Supplement */
749 (plane_2_double_width && ucs >= 0x20000 && ucs <= 0x3ffff)
750 ));
751 }
752 }
753
754
755 /*======================================================================*\
756 | End
757 \*======================================================================*/
758