1 /* $XTermId: xutf8.c,v 1.18 2020/06/23 22:45:51 tom Exp $ */
2 
3 /*
4  * Copyright 2002-2019,2020 by Thomas E. Dickey
5  * Copyright (c) 2001 by Juliusz Chroboczek
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include <xterm.h>
28 
29 #include <X11/Xlib.h>
30 #include <X11/Xatom.h>
31 #include <X11/Xutil.h>
32 #include <X11/Xmu/Xmu.h>
33 
34 #include <xutf8.h>
35 
36 #ifndef X_HAVE_UTF8_STRING
37 
38 #undef XA_UTF8_STRING
39 #define KEYSYM2UCS_INCLUDED
40 
41 #include "keysym2ucs.c"
42 
43 Atom
_xa_utf8_string(Display * dpy)44 _xa_utf8_string(Display *dpy)
45 {
46     static AtomPtr p = NULL;
47 
48     if (p == NULL)
49 	p = XmuMakeAtom("UTF8_STRING");
50 
51     return XmuInternAtom(dpy, p);
52 }
53 #define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
54 
55 static int
utf8countBytes(int c)56 utf8countBytes(int c)
57 {
58     if (c < 0)
59 	return 0;
60 
61     if (c <= 0x7F) {
62 	return 1;
63     } else if (c <= 0x7FF) {
64 	return 2;
65     } else if (c <= 0xFFFF) {
66 	return 3;
67     } else
68 	return 4;
69 }
70 
71 static void
utf8insert(char * dest,int c,size_t * len_return)72 utf8insert(char *dest, int c, size_t *len_return)
73 {
74     if (c < 0)
75 	return;
76 
77     if (c <= 0x7F) {
78 	dest[0] = (char) c;
79 	*len_return = 1;
80     } else if (c <= 0x7FF) {
81 	dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
82 	dest[1] = (char) (0x80 | (c & 0x3F));
83 	*len_return = 2;
84     } else if (c <= 0xFFFF) {
85 	dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
86 	dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
87 	dest[2] = (char) (0x80 | (c & 0x3F));
88 	*len_return = 3;
89     } else {
90 	dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
91 	dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
92 	dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
93 	dest[3] = (char) (0x80 | (c & 0x3f));
94 	*len_return = 4;
95     }
96 }
97 
98 static size_t
l1countUtf8Bytes(char * s,size_t len)99 l1countUtf8Bytes(char *s, size_t len)
100 {
101     size_t l = 0;
102     while (len != 0) {
103 	if ((*s & 0x80) == 0)
104 	    l++;
105 	else
106 	    l += 2;
107 	s++;
108 	len--;
109     }
110     return l;
111 }
112 
113 static void
l1utf8copy(char * d,char * s,size_t len)114 l1utf8copy(char *d, char *s, size_t len)
115 {
116     size_t l;
117     while (len != 0) {
118 	utf8insert(d, (*s) & 0xFF, &l);
119 	d += (int) l;
120 	s++;
121 	len--;
122     }
123 }
124 
125 static void
utf8l1strcpy(char * d,char * s)126 utf8l1strcpy(char *d, char *s)
127 {
128 #define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
129     while (*s) {
130 	if ((*s & 0x80) == 0)
131 	    *d++ = *s++;
132 	else if ((*s & 0x7C) == 0x40) {
133 	    if ((s[1] & 0x80) == 0) {
134 		s++;		/* incorrect UTF-8 */
135 		continue;
136 	    } else if ((*s & 0x7C) == 0x40) {
137 		*d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
138 		s += 2;
139 	    } else {
140 		*d++ = BAD_ASCII;
141 		SKIP;
142 	    }
143 	} else {
144 	    *d++ = BAD_ASCII;
145 	    SKIP;
146 	}
147     }
148     *d = 0;
149 #undef SKIP
150 }
151 
152 /* Keep this in sync with utf8l1strcpy! */
153 static int
utf8l1strlen(char * s)154 utf8l1strlen(char *s)
155 {
156 #define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
157     int len = 0;
158     while (*s) {
159 	if ((*s & 0x80) == 0) {
160 	    s++;
161 	    len++;
162 	} else if ((*s & 0x7C) == 0x40) {
163 	    if ((s[1] & 0x80) == 0) {
164 		s++;
165 		continue;
166 	    } else if ((*s & 0x7C) == 0x40) {
167 		len++;
168 		s += 2;
169 	    } else {
170 		len++;
171 		SKIP;
172 	    }
173 	} else {
174 	    len++;
175 	    SKIP;
176 	}
177     }
178 #undef SKIP
179     return len;
180 }
181 
182 int
Xutf8TextPropertyToTextList(Display * dpy,const XTextProperty * tp,char *** list_return,int * count_return)183 Xutf8TextPropertyToTextList(Display *dpy,
184 			    const XTextProperty * tp,
185 			    char ***list_return,
186 			    int *count_return)
187 {
188     int utf8;
189     char **list;
190     int nelements;
191     char *cp;
192     char *start;
193     size_t i;
194     int j;
195     size_t datalen = tp->nitems;
196     size_t len;
197 
198     if (tp->format != 8)
199 	return XConverterNotFound;
200 
201     if (tp->encoding == XA_STRING)
202 	utf8 = 0;
203     else if (tp->encoding == XA_UTF8_STRING(dpy))
204 	utf8 = 1;
205     else
206 	return XConverterNotFound;
207 
208     if (datalen == 0) {
209 	*list_return = NULL;
210 	*count_return = 0;
211 	return 0;
212     }
213 
214     nelements = 1;
215     for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
216 	if (*cp == '\0')
217 	    nelements++;
218     }
219 
220     list = TypeMallocN(char *, (unsigned) nelements);
221     if (!list)
222 	return XNoMemory;
223 
224     if (utf8)
225 	len = datalen;
226     else
227 	len = l1countUtf8Bytes((char *) tp->value, datalen);
228 
229     start = malloc(len + 1);
230     if (!start) {
231 	free(list);
232 	return XNoMemory;
233     }
234 
235     if (utf8)
236 	memcpy(start, (char *) tp->value, datalen);
237     else
238 	l1utf8copy(start, (char *) tp->value, datalen);
239     start[len] = '\0';
240 
241     for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
242 	if (*cp == '\0') {
243 	    list[j] = start;
244 	    start = (cp + 1);
245 	    j++;
246 	}
247     }
248 
249     list[j] = NULL;
250     *list_return = list;
251     *count_return = nelements;
252     return 0;
253 }
254 
255 int
Xutf8TextListToTextProperty(Display * dpy,char ** list,int count,XICCEncodingStyle style,XTextProperty * text_prop)256 Xutf8TextListToTextProperty(Display *dpy,
257 			    char **list,
258 			    int count,
259 			    XICCEncodingStyle style,
260 			    XTextProperty * text_prop)
261 {
262     XTextProperty proto;
263     unsigned int nbytes;
264     int i;
265 
266     if (style != XStringStyle &&
267 	style != XCompoundTextStyle &&
268 	style != XStdICCTextStyle &&
269 	style != XUTF8StringStyle)
270 	return XConverterNotFound;
271 
272     if (style == XUTF8StringStyle) {
273 	for (i = 0, nbytes = 0; i < count; i++) {
274 	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
275 	}
276     } else {
277 	for (i = 0, nbytes = 0; i < count; i++) {
278 	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
279 	}
280     }
281 
282     if (style == XCompoundTextStyle)
283 	proto.encoding = XA_COMPOUND_TEXT(dpy);
284     else if (style == XUTF8StringStyle)
285 	proto.encoding = XA_UTF8_STRING(dpy);
286     else
287 	proto.encoding = XA_STRING;
288     proto.format = 8;
289     if (nbytes)
290 	proto.nitems = nbytes - 1;
291     else
292 	proto.nitems = 0;
293     proto.value = NULL;
294 
295     if (nbytes > 0) {
296 	char *buf = TypeMallocN(char, nbytes);
297 	if (!buf)
298 	    return XNoMemory;
299 
300 	proto.value = (unsigned char *) buf;
301 	for (i = 0; i < count; i++) {
302 	    char *arg = list[i];
303 
304 	    if (arg) {
305 		if (style == XUTF8StringStyle) {
306 		    strcpy(buf, arg);
307 		} else {
308 		    utf8l1strcpy(buf, arg);
309 		}
310 		buf += (strlen(buf) + 1);
311 	    } else {
312 		*buf++ = '\0';
313 	    }
314 	}
315     } else {
316 	proto.value = CastMalloc(unsigned char);	/* easier for client */
317 	if (!proto.value)
318 	    return XNoMemory;
319 
320 	proto.value[0] = '\0';
321     }
322 
323     *text_prop = proto;
324     return 0;
325 }
326 
327 int
Xutf8LookupString(XIC ic GCC_UNUSED,XKeyEvent * ev,char * buffer,int nbytes,KeySym * keysym_return,Status * status_return)328 Xutf8LookupString(XIC ic GCC_UNUSED,
329 		  XKeyEvent *ev,
330 		  char *buffer,
331 		  int nbytes,
332 		  KeySym * keysym_return,
333 		  Status * status_return)
334 {
335     int rc;
336     KeySym keysym;
337     int codepoint;
338     size_t len;
339 
340     rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
341 
342     if (rc > 0) {
343 	codepoint = buffer[0] & 0xFF;
344     } else {
345 	codepoint = keysym2ucs(keysym);
346     }
347 
348     if (codepoint < 0) {
349 	if (keysym == None) {
350 	    *status_return = XLookupNone;
351 	} else {
352 	    *status_return = XLookupKeySym;
353 	    *keysym_return = keysym;
354 	}
355 	return 0;
356     }
357 
358     if (nbytes < utf8countBytes(codepoint)) {
359 	*status_return = XBufferOverflow;
360 	return utf8countBytes(codepoint);
361     }
362 
363     utf8insert(buffer, codepoint, &len);
364 
365     if (keysym != None) {
366 	*keysym_return = keysym;
367 	*status_return = XLookupBoth;
368     } else {
369 	*status_return = XLookupChars;
370     }
371     return (int) len;
372 }
373 
374 #else /* X_HAVE_UTF8_STRING */
375 /* Silence the compiler */
376 void
xutf8_dummy(void)377 xutf8_dummy(void)
378 {
379     return;
380 }
381 #endif
382