1 /*======================================================================*\
2 |	Functions to enquire character width
3 |	as depending on terminal properties,
4 |	varying with terminal kind and version,
5 |	according to different Unicode versions and other features
6 \*======================================================================*/
7 
8 #include "termprop.h"
9 
10 
11 /*======================================================================*\
12 |	Table structure: tables of character ranges; lookup function
13 \*======================================================================*/
14 
15 struct interval {
16     unsigned long first;
17     unsigned long last;
18 };
19 
20 static
21 struct interval *
lookup(ucs,table,length)22 lookup (ucs, table, length)
23   unsigned long ucs;
24   struct interval * table;
25   int length;
26 {
27   int min = 0;
28   int mid;
29   int max = length - 1;
30 
31   /* first quick check for Latin-1 etc. characters */
32   if (table && ucs < table [0].first) {
33 	return 0;
34   }
35 
36   /* binary search */
37   while (max >= min) {
38 	mid = (min + max) / 2;
39 	if (ucs > table [mid].last) {
40 		min = mid + 1;
41 	} else if (ucs < table [mid].first) {
42 		max = mid - 1;
43 	} else {
44 		return & table [mid];
45 	}
46   }
47 
48   return 0;
49 }
50 
51 
52 /*======================================================================*\
53 |	Table variables
54 |	- to be assigned depending on detected Unicode data version
55 \*======================================================================*/
56 
57 static struct interval * combining_table = 0;
58 static int combining_len = -1;
59 static struct interval * spacing_combining_table = 0;
60 static int spacing_combining_len = -1;
61 static struct interval * assigned_table = 0;
62 static int assigned_len = -1;
63 
64 #define arrlen(arr)	(sizeof (arr) / sizeof (* arr))
65 
66 
67 /*======================================================================*\
68 |	Actual data tables
69 |	- some explicitly included here
70 |	- some generated from Unicode data (#include "width.t")
71 \*======================================================================*/
72 
73 /* Tables of combining characters, according to different versions
74    of Unicode or wcwidth.c, respectively,
75    arranged as sorted lists of non-overlapping intervals.
76  */
77 
78 /**
79    Combining characters
80    ~ Unicode 3.0 with some exceptions
81    xterm < 157
82  */
83 static struct interval
84 combining_old [] = {
85   { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
86   { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
87   { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
88   { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
89   { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
90   { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
91   { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
92   { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
93   { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
94   { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
95   { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
96   { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
97   { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
98   { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
99   { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
100   { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
101   { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
102   { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
103   { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
104   { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
105   { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
106   { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
107   { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
108   { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
109   { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
110   { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
111   { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
112   { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
113   { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
114   { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
115   { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
116 };
117 
118 /**
119    Combining characters
120    Unicode 3.0
121    wcwidth 2001-01-12
122    xterm 157...166
123  */
124 /*static struct interval combining_300 [] = { defined in width.t };*/
125 
126 /**
127    Combining characters
128    Unicode 3.2
129    wcwidth.c 2002-05-08
130    xterm 167..179
131  */
132 /*static struct interval combining_320 [] = { defined in width.t };*/
133 
134 /**
135    Combining characters
136    Unicode 4.0
137    wcwidth.c 2003-05-20
138    xterm 180...201
139  */
140 /*static struct interval combining_400 [] = { defined in width.t };*/
141 
142 /**
143    Combining characters
144    Unicode 4.1
145    xterm 202...
146  */
147 /*static struct interval combining_410 [] = { defined in width.t };*/
148 
149 /**
150    Combining characters
151    Unicode 5.0
152  */
153 /*static struct interval combining_500 [] = { defined in width.t };*/
154 
155 
156 /* Tables of specific width sets.
157  */
158 
159 /**
160    Poderosa terminal emulator in Latin-1 mode.
161  */
162 static struct interval
163 wide_poderosa [] = {
164     { 0x00A7, 0x00A8 }, { 0x00B0, 0x00B1 },
165     { 0x00B4, 0x00B4 }, { 0x00B6, 0x00B6 }, { 0x00D7, 0x00D7 }, { 0x00F7, 0x00F7 }
166 };
167 
168 
169 /* Tables of ambiguous width characters, according to different versions
170    of Unicode or wcwidth.c, respectively,
171    arranged as sorted lists of non-overlapping intervals.
172  */
173 
174 /**
175    Ambiguous width characters
176    Unicode 3.2 without Private Use range plus extended end/non-BMP ranges
177    wcwidth.c 2002-05-08 plus end/non-BMP ranges
178    xterm 167...179 (different version in earlier xterm releases was unused)
179  */
180 static struct interval
181 ambiguous_old [] = {
182     { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
183     { 0x00AA, 0x00AA }, { 0x00AD, 0x00AE }, { 0x00B0, 0x00B4 },
184     { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
185     { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
186     { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
187     { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
188     { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
189     { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
190     { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
191     { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
192     { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
193     { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
194     { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
195     { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
196     { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
197     { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
198     { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
199     { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
200     { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
201     { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
202     { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
203     { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
204     { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
205     { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
206     { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
207     { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
208     { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
209     { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
210     { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
211     { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
212     { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
213     { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
214     { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
215     { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
216     { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
217     { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
218     { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
219     { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
220     { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
221     { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
222     { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x24FE },
223     { 0x2500, 0x254B }, { 0x2550, 0x2573 }, { 0x2580, 0x258F },
224     { 0x2592, 0x2595 }, { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 },
225     { 0x25B2, 0x25B3 }, { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD },
226     { 0x25C0, 0x25C1 }, { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB },
227     { 0x25CE, 0x25D1 }, { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF },
228     { 0x2605, 0x2606 }, { 0x2609, 0x2609 }, { 0x260E, 0x260F },
229     { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
230     { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
231     { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
232     { 0x273D, 0x273D }, { 0x2776, 0x277F },
233     { 0xFFFD, 0xFFFF }, { 0xF0000, 0x7FFFFFFF }
234 };
235 
236 /**
237    Ambiguous width characters
238    Unicode 4.0 plus extended end/non-BMP ranges
239    wcwidth.c 2003-05-20 plus extended end/non-BMP ranges
240    wcwidth.c 2007-05-25 plus extended end/non-BMP ranges
241    xterm 180...
242  */
243 static struct interval
244 ambiguous_400 [] = {
245     { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
246     { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
247     { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
248     { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
249     { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
250     { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
251     { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
252     { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
253     { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
254     { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
255     { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
256     { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
257     { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
258     { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
259     { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
260     { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
261     { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
262     { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
263     { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
264     { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
265     { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
266     { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
267     { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
268     { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
269     { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
270     { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
271     { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
272     { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
273     { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
274     { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
275     { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
276     { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
277     { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
278     { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
279     { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
280     { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
281     { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
282     { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
283     { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
284     { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
285     { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
286     { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
287     { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
288     { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
289     { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
290     { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
291     { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
292     { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
293     { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
294     { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
295     { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
296     { 0xFFFD, 0xFFFF }, { 0xF0000, 0x7FFFFFFF }
297 };
298 
299 static struct interval
300 ambiguous_520 [] = {
301   { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
302   { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
303   { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
304   { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
305   { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
306   { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
307   { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
308   { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
309   { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
310   { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
311   { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
312   { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
313   { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
314   { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
315   { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
316   { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
317   { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
318   { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
319   { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
320   { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
321   { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
322   { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
323   { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
324   { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
325   { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
326   { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
327   { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
328   { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
329   { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
330   { 0x2189, 0x2189 }, { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 },
331   { 0x21D2, 0x21D2 }, { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 },
332   { 0x2200, 0x2200 }, { 0x2202, 0x2203 }, { 0x2207, 0x2208 },
333   { 0x220B, 0x220B }, { 0x220F, 0x220F }, { 0x2211, 0x2211 },
334   { 0x2215, 0x2215 }, { 0x221A, 0x221A }, { 0x221D, 0x2220 },
335   { 0x2223, 0x2223 }, { 0x2225, 0x2225 }, { 0x2227, 0x222C },
336   { 0x222E, 0x222E }, { 0x2234, 0x2237 }, { 0x223C, 0x223D },
337   { 0x2248, 0x2248 }, { 0x224C, 0x224C }, { 0x2252, 0x2252 },
338   { 0x2260, 0x2261 }, { 0x2264, 0x2267 }, { 0x226A, 0x226B },
339   { 0x226E, 0x226F }, { 0x2282, 0x2283 }, { 0x2286, 0x2287 },
340   { 0x2295, 0x2295 }, { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 },
341   { 0x22BF, 0x22BF }, { 0x2312, 0x2312 }, { 0x2460, 0x24E9 },
342   { 0x24EB, 0x254B }, { 0x2550, 0x2573 }, { 0x2580, 0x258F },
343   { 0x2592, 0x2595 }, { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 },
344   { 0x25B2, 0x25B3 }, { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD },
345   { 0x25C0, 0x25C1 }, { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB },
346   { 0x25CE, 0x25D1 }, { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF },
347   { 0x2605, 0x2606 }, { 0x2609, 0x2609 }, { 0x260E, 0x260F },
348   { 0x2614, 0x2615 }, { 0x261C, 0x261C }, { 0x261E, 0x261E },
349   { 0x2640, 0x2640 }, { 0x2642, 0x2642 }, { 0x2660, 0x2661 },
350   { 0x2663, 0x2665 }, { 0x2667, 0x266A }, { 0x266C, 0x266D },
351   { 0x266F, 0x266F }, { 0x269E, 0x269F }, { 0x26BE, 0x26BF },
352   { 0x26C4, 0x26CD }, { 0x26CF, 0x26E1 }, { 0x26E3, 0x26E3 },
353   { 0x26E8, 0x26FF }, { 0x273D, 0x273D }, { 0x2757, 0x2757 },
354   { 0x2776, 0x277F }, { 0x2B55, 0x2B59 }, { 0x3248, 0x324F },
355   { 0xE000, 0xF8FF }, { 0xFFFD, 0xFFFD }, { 0x1F100, 0x1F10A },
356   { 0x1F110, 0x1F12D }, { 0x1F131, 0x1F131 }, { 0x1F13D, 0x1F13D },
357   { 0x1F13F, 0x1F13F }, { 0x1F142, 0x1F142 }, { 0x1F146, 0x1F146 },
358   { 0x1F14A, 0x1F14E }, { 0x1F157, 0x1F157 }, { 0x1F15F, 0x1F15F },
359   { 0x1F179, 0x1F179 }, { 0x1F17B, 0x1F17C }, { 0x1F17F, 0x1F17F },
360   { 0x1F18A, 0x1F18D }, { 0x1F190, 0x1F190 }, { 0xF0000, 0xFFFFD },
361   { 0x100000, 0x10FFFD }
362 };
363 
364 
365 #ifdef __TURBOC__
366 static struct interval
367 spacing_combining_300 [] = { {0, 0} };
368 static struct interval
369 assigned_300 [] = { {0, 0} };
370 #else
371 #include "width.t"
372 #endif
373 
374 
375 /*======================================================================*\
376 |	Width enquiry functions; and table setup
377 \*======================================================================*/
378 
379 static int configured_combining_data_version = -1;
380 
381 static
382 void
term_setup_data()383 term_setup_data ()
384 {
385 #ifndef __TURBOC__
386   if (combining_data_version >= U700) {
387 	combining_table = combining_700;
388 	combining_len = arrlen (combining_700);
389 	spacing_combining_table = spacing_combining_700;
390 	spacing_combining_len = arrlen (spacing_combining_700);
391 	assigned_table = assigned_700;
392 	assigned_len = arrlen (assigned_700);
393   } else if (combining_data_version >= U630) {
394 	combining_table = combining_630;
395 	combining_len = arrlen (combining_630);
396 	spacing_combining_table = spacing_combining_630;
397 	spacing_combining_len = arrlen (spacing_combining_630);
398 	assigned_table = assigned_630;
399 	assigned_len = arrlen (assigned_630);
400   } else if (combining_data_version >= U620) {
401 	combining_table = combining_620;
402 	combining_len = arrlen (combining_620);
403 	spacing_combining_table = spacing_combining_620;
404 	spacing_combining_len = arrlen (spacing_combining_620);
405 	assigned_table = assigned_620;
406 	assigned_len = arrlen (assigned_620);
407   } else if (combining_data_version >= U600) {
408 	combining_table = combining_600;
409 	combining_len = arrlen (combining_600);
410 	spacing_combining_table = spacing_combining_600;
411 	spacing_combining_len = arrlen (spacing_combining_600);
412 	assigned_table = assigned_600;
413 	assigned_len = arrlen (assigned_600);
414   } else if (combining_data_version >= U520) {
415 	combining_table = combining_520;
416 	combining_len = arrlen (combining_520);
417 	spacing_combining_table = spacing_combining_520;
418 	spacing_combining_len = arrlen (spacing_combining_520);
419 	assigned_table = assigned_520;
420 	assigned_len = arrlen (assigned_520);
421   } else if (combining_data_version >= U510) {
422 	combining_table = combining_510;
423 	combining_len = arrlen (combining_510);
424 	spacing_combining_table = spacing_combining_510;
425 	spacing_combining_len = arrlen (spacing_combining_510);
426 	assigned_table = assigned_510;
427 	assigned_len = arrlen (assigned_510);
428   } else if (combining_data_version >= U500) {
429 	combining_table = combining_500;
430 	combining_len = arrlen (combining_500);
431 	spacing_combining_table = spacing_combining_500;
432 	spacing_combining_len = arrlen (spacing_combining_500);
433 	assigned_table = assigned_500;
434 	assigned_len = arrlen (assigned_500);
435   } else if (combining_data_version == U410) {
436 	combining_table = combining_410;
437 	combining_len = arrlen (combining_410);
438 	spacing_combining_table = spacing_combining_410;
439 	spacing_combining_len = arrlen (spacing_combining_410);
440 	assigned_table = assigned_410;
441 	assigned_len = arrlen (assigned_410);
442   } else if (combining_data_version == U400) {
443 	combining_table = combining_400;
444 	combining_len = arrlen (combining_400);
445 	spacing_combining_table = spacing_combining_400;
446 	spacing_combining_len = arrlen (spacing_combining_400);
447 	assigned_table = assigned_400;
448 	assigned_len = arrlen (assigned_400);
449   } else if (combining_data_version == U320) {
450 	combining_table = combining_320;
451 	combining_len = arrlen (combining_320);
452 	spacing_combining_table = spacing_combining_320;
453 	spacing_combining_len = arrlen (spacing_combining_320);
454 	assigned_table = assigned_320;
455 	assigned_len = arrlen (assigned_320);
456   } else if (combining_data_version == U300) {
457 	combining_table = combining_300;
458 	combining_len = arrlen (combining_300);
459 	spacing_combining_table = spacing_combining_300;
460 	spacing_combining_len = arrlen (spacing_combining_300);
461 	assigned_table = assigned_300;
462 	assigned_len = arrlen (assigned_300);
463   } else
464 #endif
465   {
466 	combining_table = combining_old;
467 	combining_len = arrlen (combining_old);
468 	spacing_combining_table = spacing_combining_300;
469 	spacing_combining_len = arrlen (spacing_combining_300);
470 	assigned_table = assigned_300;
471 	assigned_len = arrlen (assigned_300);
472   }
473   configured_combining_data_version = combining_data_version;
474 }
475 
476 static
477 int
iscombining_listed(ucs)478 iscombining_listed (ucs)
479   unsigned long ucs;
480 {
481   if (configured_combining_data_version != combining_data_version) {
482 	term_setup_data ();
483   }
484 
485   if (printable_bidi_controls) {	/* since xterm 230 */
486 	/*
487 	‎ U+200E;LEFT-TO-RIGHT MARK
488 	‏ U+200F;RIGHT-TO-LEFT MARK
489 	‪ U+202A;LEFT-TO-RIGHT EMBEDDING
490 	‫ U+202B;RIGHT-TO-LEFT EMBEDDING
491 	‬ U+202C;POP DIRECTIONAL FORMATTING
492 	‭ U+202D;LEFT-TO-RIGHT OVERRIDE
493 	‮ U+202E;RIGHT-TO-LEFT OVERRIDE
494 	*/
495 	if (ucs >= 0x200E && ucs <= 0x202E && (ucs <= 0x200F || ucs >= 0x202A)) {
496 		return 0;
497 	}
498   }
499 
500 #ifdef wide_combining_not_combining
501 	/* workaround for bug in MinTTY 0.4 beta */
502 	if (ucs >= 0x302A && ucs <= 0x309A && (ucs <= 0x302F || ucs >= 0x3099)) {
503 		return 0;
504 	}
505 #endif
506 
507   if (lookup (ucs, combining_table, combining_len)
508    || (spacing_combining && lookup (ucs, spacing_combining_table, spacing_combining_len))
509      ) {
510 	if (ucs < 0x10000) {
511 		return 1;
512 	} else if (suppress_non_BMP) {
513 		return 0;
514 	} else if (ucs >= 0xE0000) {
515 		return plane_14_combining;
516 	} else {
517 		return plane_1_combining;
518 	}
519   } else {
520 	return 0;
521   }
522 }
523 
524 /* Check whether a Unicode code is a valid (assigned) Unicode character.
525  */
526 int
term_isassigned(ucs)527 term_isassigned (ucs)
528   unsigned long ucs;
529 {
530   if (configured_combining_data_version != combining_data_version) {
531 	term_setup_data ();
532   }
533 
534   if (lookup (ucs, assigned_table, assigned_len)) {
535 	return 1;
536   } else {
537 	return 0;
538   }
539 }
540 
541 /* Check whether a Unicode character is a combining character, based on its
542    Unicode general category being Mn (Mark Nonspacing), Me (Mark Enclosing),
543    Cf (Other Format),
544    or (depending on terminal features) Mc (Mark Spacing Combining).
545  */
546 int
term_iscombining(ucs)547 term_iscombining (ucs)
548   unsigned long ucs;
549 {
550   if (unassigned_single_width) {
551 	if (rxvt_version > 0) {
552 		/* handle weird mapping of non-Unicode ranges */
553 		if (ucs < 0x80000000) {
554 			ucs &= 0x1FFFFF;
555 		}
556 	}
557 	if (! bidi_screen) {
558 		/* special case of rxvt, not mlterm */
559 		if (ucs >= 0xF8F0 && ucs <= 0xF8FF) {
560 			return 1;
561 		}
562 	}
563 	return iscombining_listed (ucs) && term_isassigned (ucs);
564   } else if (ucs >= 0xD7B0 && ucs <= 0xD7FF) {
565 	return hangul_jamo_extended;
566   } else if (konsole_version > 0 && ucs >= 0x200000) {
567 	return 1;
568   } else {
569 	return iscombining_listed (ucs);
570   }
571 }
572 
573 #define dont_debug_width
574 #define dont_debug_width_all
575 
576 #ifdef debug_width
577 #include <stdio.h>
578 #define do_trace(c)	if (ucs == c) _do_trace = 1;
579 #define trace_width(tag, res)	if (_do_trace) printf ("iswide (%04lX) [%s]: %d\n", ucs, tag, res);
580 #else
581 #define trace_width(tag, res)
582 #endif
583 
584 /* Check whether a Unicode character is a wide character, based on its
585    Unicode category being East Asian Wide (W) or East Asian FullWidth (F)
586    as defined in Unicode Technical Report #11, East Asian Ambiguous (A)
587    if the terminal is running in CJK compatibility mode (xterm -cjk_width).
588    Data taken from different versions of
589    http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
590  */
591 int
term_iswide(ucs)592 term_iswide (ucs)
593   unsigned long ucs;
594 {
595 #ifdef debug_width
596   int _do_trace = 0;
597 #ifdef debug_width_all
598   if (ucs >= 0x80) {
599 	_do_trace = 1;
600   }
601 #endif
602 
603   do_trace (0x4DC0);
604   do_trace (0xFE19);
605   do_trace (0x232A);
606   do_trace (0x3099);
607   do_trace (0x302A);
608 #endif
609 
610   if (width_data_version == 0) {
611 	trace_width ("none", 0);
612 	return 0;
613   }
614 
615   if (ucs >= 0x4DC0 && ucs <= 0x4DFF) {
616 	trace_width ("hexagram", wide_Yijing_hexagrams);
617 	return wide_Yijing_hexagrams;
618   }
619 
620   if (unassigned_single_width) {
621 	if (rxvt_version > 0) {
622 		/* handle weird mapping of non-Unicode ranges */
623 		if (ucs < 0x80000000) {
624 			ucs &= 0x1FFFFF;
625 		}
626 	}
627 	if (! term_isassigned (ucs)) {
628 		trace_width ("unassigned_single_width", 0);
629 		return 0;
630 	}
631   }
632 
633   if (cjk_currency_width == 2) {
634 	if (ucs == 0xA2 || ucs == 0xA3 || ucs == 0xA5) {
635 		return 1;
636 	}
637   }
638 
639   /* handle xterm -cjk_width */
640   if (cjk_width_data_version) {
641     if (utf8_screen || ucs >= 0x80 || cjk_wide_latin1) {
642 	/* look up ambiguous character */
643 
644 	if (cjk_width_data_version >= U520) {
645 		if (lookup (ucs, ambiguous_520, arrlen (ambiguous_520))) {
646 			trace_width ("cjk 520", 1);
647 			return 1;
648 		}
649 	} else if (cjk_width_data_version >= U400) {
650 		if (lookup (ucs, ambiguous_400, arrlen (ambiguous_400))) {
651 			trace_width ("cjk 400", 1);
652 			return 1;
653 		}
654 	} else if (cjk_width_data_version >= U320beta) {
655 		if (lookup (ucs, ambiguous_old, arrlen (ambiguous_old))) {
656 			trace_width ("cjk 320", 1);
657 			return 1;
658 		}
659 	}
660     }
661 
662     if (cjk_width_data_version == U300beta
663         && (lookup (ucs, wide_poderosa, arrlen (wide_poderosa))
664             || ucs >= 0x100
665            )
666        )
667     {
668       trace_width ("wide poderosa", 1);
669       return 1;
670     }
671 
672     /* Surrogates are also displayed wide by xterm -cjk_width */
673     if (ucs >= 0xD800 && ucs <= 0xDFFF) {
674       trace_width ("cjk surrogates", 1);
675       return 1;
676     }
677 
678     /* Non-BMP are also displayed wide by xterm -cjk_width */
679     if (ucs >= 0x10000 && plane_2_double_width) {
680       trace_width ("cjk non-bmp", 1);
681       return 1;
682     }
683   }
684 
685   /* first quick check for Latin-1 etc. characters */
686   if (ucs < 0x1100) {
687       trace_width ("low", 0);
688       return 0;
689   }
690 
691   if (bidi_screen && mintty_version <= 0) {
692     /* handle mlterm deviations */
693     if (ucs == 0x2329 || ucs == 0x232A || /* angle brackets */
694         (ucs >= 0xA000 && ucs <= 0xA4C6) /* Yi */
695        ) {
696       trace_width ("bidi", 0);
697       return 0;
698     }
699   }
700 
701   if (width_data_version <= U300) {
702    trace_width ("<=300", -1);
703    return
704     /* wide character ranges
705        Unicode 3.0
706        wcwidth 2001-01-12
707        xterm 157...166
708      */
709     (ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
710     (ucs >= 0x2e80 && ucs <= 0xa4cf
711      && (ucs & (unsigned long) ~0x0011) != 0x300a
712      && ucs != 0x303f) ||                  /* CJK ... Yi */
713     (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
714     (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
715     (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
716     (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
717     (ucs >= 0xffe0 && ucs <= 0xffe6)
718     || (ucs >= 0x20000 && ucs <= 0x2ffff && plane_2_double_width) /* missing before xterm 157 */
719     ;
720   } else {
721    trace_width (">300", -1);
722    return
723     /* wide character ranges
724        Unicode 3.2 - without 0x3???? range:
725         wcwidth 2002-05-08
726         xterm 167...179
727        Unicode 4.0 - including 0x3???? range:
728         wcwidth 2003-05-20
729         xterm 180...225
730      */
731     /* wide character ranges
732        Unicode 4.1
733         wcwidth 2007-05-25
734         xterm 226...
735      */
736     (ucs >= 0x1100 &&
737      (ucs <= 0x115f ||                  /* Hangul Jamo init. consonants */
738       ucs == 0x2329 || ucs == 0x232a || /* angle brackets */
739       (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) || /* CJK ... Yi */
740       (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
741       (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
742       (width_data_version >= U410 && ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
743       (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
744       (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
745       (ucs >= 0xffe0 && ucs <= 0xffe6) ||
746       (width_data_version >= U520 && ucs >= 0xA960 && ucs <= 0xA97F) || /* Hangul Jamo Extended-A */
747       (width_data_version >= U600 && ucs >= 0x1B000 && ucs <= 0x1B0FF) || /* Kana Supplement */
748       (width_data_version >= U520 && ucs >= 0x1F200 && ucs <= 0x1F2FF) || /* Enclosed Ideographic Supplement */
749       (plane_2_double_width && ucs >= 0x20000 && ucs <= 0x3ffff)
750       ));
751   }
752 }
753 
754 
755 /*======================================================================*\
756 |	End
757 \*======================================================================*/
758