1 /* $XTermId: xutf8.c,v 1.18 2020/06/23 22:45:51 tom Exp $ */
2
3 /*
4 * Copyright 2002-2019,2020 by Thomas E. Dickey
5 * Copyright (c) 2001 by Juliusz Chroboczek
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include <xterm.h>
28
29 #include <X11/Xlib.h>
30 #include <X11/Xatom.h>
31 #include <X11/Xutil.h>
32 #include <X11/Xmu/Xmu.h>
33
34 #include <xutf8.h>
35
36 #ifndef X_HAVE_UTF8_STRING
37
38 #undef XA_UTF8_STRING
39 #define KEYSYM2UCS_INCLUDED
40
41 #include "keysym2ucs.c"
42
43 Atom
_xa_utf8_string(Display * dpy)44 _xa_utf8_string(Display *dpy)
45 {
46 static AtomPtr p = NULL;
47
48 if (p == NULL)
49 p = XmuMakeAtom("UTF8_STRING");
50
51 return XmuInternAtom(dpy, p);
52 }
53 #define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
54
55 static int
utf8countBytes(int c)56 utf8countBytes(int c)
57 {
58 if (c < 0)
59 return 0;
60
61 if (c <= 0x7F) {
62 return 1;
63 } else if (c <= 0x7FF) {
64 return 2;
65 } else if (c <= 0xFFFF) {
66 return 3;
67 } else
68 return 4;
69 }
70
71 static void
utf8insert(char * dest,int c,size_t * len_return)72 utf8insert(char *dest, int c, size_t *len_return)
73 {
74 if (c < 0)
75 return;
76
77 if (c <= 0x7F) {
78 dest[0] = (char) c;
79 *len_return = 1;
80 } else if (c <= 0x7FF) {
81 dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
82 dest[1] = (char) (0x80 | (c & 0x3F));
83 *len_return = 2;
84 } else if (c <= 0xFFFF) {
85 dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
86 dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
87 dest[2] = (char) (0x80 | (c & 0x3F));
88 *len_return = 3;
89 } else {
90 dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
91 dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
92 dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
93 dest[3] = (char) (0x80 | (c & 0x3f));
94 *len_return = 4;
95 }
96 }
97
98 static size_t
l1countUtf8Bytes(char * s,size_t len)99 l1countUtf8Bytes(char *s, size_t len)
100 {
101 size_t l = 0;
102 while (len != 0) {
103 if ((*s & 0x80) == 0)
104 l++;
105 else
106 l += 2;
107 s++;
108 len--;
109 }
110 return l;
111 }
112
113 static void
l1utf8copy(char * d,char * s,size_t len)114 l1utf8copy(char *d, char *s, size_t len)
115 {
116 size_t l;
117 while (len != 0) {
118 utf8insert(d, (*s) & 0xFF, &l);
119 d += (int) l;
120 s++;
121 len--;
122 }
123 }
124
125 static void
utf8l1strcpy(char * d,char * s)126 utf8l1strcpy(char *d, char *s)
127 {
128 #define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
129 while (*s) {
130 if ((*s & 0x80) == 0)
131 *d++ = *s++;
132 else if ((*s & 0x7C) == 0x40) {
133 if ((s[1] & 0x80) == 0) {
134 s++; /* incorrect UTF-8 */
135 continue;
136 } else if ((*s & 0x7C) == 0x40) {
137 *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
138 s += 2;
139 } else {
140 *d++ = BAD_ASCII;
141 SKIP;
142 }
143 } else {
144 *d++ = BAD_ASCII;
145 SKIP;
146 }
147 }
148 *d = 0;
149 #undef SKIP
150 }
151
152 /* Keep this in sync with utf8l1strcpy! */
153 static int
utf8l1strlen(char * s)154 utf8l1strlen(char *s)
155 {
156 #define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
157 int len = 0;
158 while (*s) {
159 if ((*s & 0x80) == 0) {
160 s++;
161 len++;
162 } else if ((*s & 0x7C) == 0x40) {
163 if ((s[1] & 0x80) == 0) {
164 s++;
165 continue;
166 } else if ((*s & 0x7C) == 0x40) {
167 len++;
168 s += 2;
169 } else {
170 len++;
171 SKIP;
172 }
173 } else {
174 len++;
175 SKIP;
176 }
177 }
178 #undef SKIP
179 return len;
180 }
181
182 int
Xutf8TextPropertyToTextList(Display * dpy,const XTextProperty * tp,char *** list_return,int * count_return)183 Xutf8TextPropertyToTextList(Display *dpy,
184 const XTextProperty * tp,
185 char ***list_return,
186 int *count_return)
187 {
188 int utf8;
189 char **list;
190 int nelements;
191 char *cp;
192 char *start;
193 size_t i;
194 int j;
195 size_t datalen = tp->nitems;
196 size_t len;
197
198 if (tp->format != 8)
199 return XConverterNotFound;
200
201 if (tp->encoding == XA_STRING)
202 utf8 = 0;
203 else if (tp->encoding == XA_UTF8_STRING(dpy))
204 utf8 = 1;
205 else
206 return XConverterNotFound;
207
208 if (datalen == 0) {
209 *list_return = NULL;
210 *count_return = 0;
211 return 0;
212 }
213
214 nelements = 1;
215 for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
216 if (*cp == '\0')
217 nelements++;
218 }
219
220 list = TypeMallocN(char *, (unsigned) nelements);
221 if (!list)
222 return XNoMemory;
223
224 if (utf8)
225 len = datalen;
226 else
227 len = l1countUtf8Bytes((char *) tp->value, datalen);
228
229 start = malloc(len + 1);
230 if (!start) {
231 free(list);
232 return XNoMemory;
233 }
234
235 if (utf8)
236 memcpy(start, (char *) tp->value, datalen);
237 else
238 l1utf8copy(start, (char *) tp->value, datalen);
239 start[len] = '\0';
240
241 for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
242 if (*cp == '\0') {
243 list[j] = start;
244 start = (cp + 1);
245 j++;
246 }
247 }
248
249 list[j] = NULL;
250 *list_return = list;
251 *count_return = nelements;
252 return 0;
253 }
254
255 int
Xutf8TextListToTextProperty(Display * dpy,char ** list,int count,XICCEncodingStyle style,XTextProperty * text_prop)256 Xutf8TextListToTextProperty(Display *dpy,
257 char **list,
258 int count,
259 XICCEncodingStyle style,
260 XTextProperty * text_prop)
261 {
262 XTextProperty proto;
263 unsigned int nbytes;
264 int i;
265
266 if (style != XStringStyle &&
267 style != XCompoundTextStyle &&
268 style != XStdICCTextStyle &&
269 style != XUTF8StringStyle)
270 return XConverterNotFound;
271
272 if (style == XUTF8StringStyle) {
273 for (i = 0, nbytes = 0; i < count; i++) {
274 nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
275 }
276 } else {
277 for (i = 0, nbytes = 0; i < count; i++) {
278 nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
279 }
280 }
281
282 if (style == XCompoundTextStyle)
283 proto.encoding = XA_COMPOUND_TEXT(dpy);
284 else if (style == XUTF8StringStyle)
285 proto.encoding = XA_UTF8_STRING(dpy);
286 else
287 proto.encoding = XA_STRING;
288 proto.format = 8;
289 if (nbytes)
290 proto.nitems = nbytes - 1;
291 else
292 proto.nitems = 0;
293 proto.value = NULL;
294
295 if (nbytes > 0) {
296 char *buf = TypeMallocN(char, nbytes);
297 if (!buf)
298 return XNoMemory;
299
300 proto.value = (unsigned char *) buf;
301 for (i = 0; i < count; i++) {
302 char *arg = list[i];
303
304 if (arg) {
305 if (style == XUTF8StringStyle) {
306 strcpy(buf, arg);
307 } else {
308 utf8l1strcpy(buf, arg);
309 }
310 buf += (strlen(buf) + 1);
311 } else {
312 *buf++ = '\0';
313 }
314 }
315 } else {
316 proto.value = CastMalloc(unsigned char); /* easier for client */
317 if (!proto.value)
318 return XNoMemory;
319
320 proto.value[0] = '\0';
321 }
322
323 *text_prop = proto;
324 return 0;
325 }
326
327 int
Xutf8LookupString(XIC ic GCC_UNUSED,XKeyEvent * ev,char * buffer,int nbytes,KeySym * keysym_return,Status * status_return)328 Xutf8LookupString(XIC ic GCC_UNUSED,
329 XKeyEvent *ev,
330 char *buffer,
331 int nbytes,
332 KeySym * keysym_return,
333 Status * status_return)
334 {
335 int rc;
336 KeySym keysym;
337 int codepoint;
338 size_t len;
339
340 rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
341
342 if (rc > 0) {
343 codepoint = buffer[0] & 0xFF;
344 } else {
345 codepoint = keysym2ucs(keysym);
346 }
347
348 if (codepoint < 0) {
349 if (keysym == None) {
350 *status_return = XLookupNone;
351 } else {
352 *status_return = XLookupKeySym;
353 *keysym_return = keysym;
354 }
355 return 0;
356 }
357
358 if (nbytes < utf8countBytes(codepoint)) {
359 *status_return = XBufferOverflow;
360 return utf8countBytes(codepoint);
361 }
362
363 utf8insert(buffer, codepoint, &len);
364
365 if (keysym != None) {
366 *keysym_return = keysym;
367 *status_return = XLookupBoth;
368 } else {
369 *status_return = XLookupChars;
370 }
371 return (int) len;
372 }
373
374 #else /* X_HAVE_UTF8_STRING */
375 /* Silence the compiler */
376 void
xutf8_dummy(void)377 xutf8_dummy(void)
378 {
379 return;
380 }
381 #endif
382