1 /******************************************************************
2 
3               Copyright 1993 by SunSoft, Inc.
4               Copyright 1999-2000 by Bruno Haible
5 
6 Permission to use, copy, modify, distribute, and sell this software
7 and its documentation for any purpose is hereby granted without fee,
8 provided that the above copyright notice appear in all copies and
9 that both that copyright notice and this permission notice appear
10 in supporting documentation, and that the names of SunSoft, Inc. and
11 Bruno Haible not be used in advertising or publicity pertaining to
12 distribution of the software without specific, written prior
13 permission.  SunSoft, Inc. and Bruno Haible make no representations
14 about the suitability of this software for any purpose.  It is
15 provided "as is" without express or implied warranty.
16 
17 SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
18 TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
19 AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
20 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
22 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
23 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 
25 ******************************************************************/
26 
27 /*
28  * This file contains:
29  *
30  * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
31  *
32  *    Used for three purposes:
33  *      1. The UTF-8 locales, see below.
34  *      2. Unicode aware applications for which the use of 8-bit character
35  *         sets is an anachronism.
36  *      3. For conversion from keysym to locale encoding.
37  *
38  * II. Conversion files for an UTF-8 locale loader.
39  *     Supports: all locales with codeset UTF-8.
40  *     How: Provides converters for UTF-8.
41  *     Platforms: all systems.
42  *
43  * The loader itself is located in lcUTF8.c.
44  */
45 
46 /*
47  * The conversion from UTF-8 to CompoundText is realized in a very
48  * conservative way. Recall that CompoundText data is used for inter-client
49  * communication purposes. We distinguish three classes of clients:
50  * - Clients which accept only those pieces of CompoundText which belong to
51  *   the character set understood by the current locale.
52  *   (Example: clients which are linked to an older X11 library.)
53  * - Clients which accept CompoundText with multiple character sets and parse
54  *   it themselves.
55  *   (Example: emacs, xemacs.)
56  * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
57  *   functions for the conversion of CompoundText to their current locale's
58  *   multi-byte/wide-character format.
59  * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
60  * follows. For every character, it first tests whether the character is
61  * representable in the current locale's original (non-UTF-8) character set.
62  * If not, it goes through the list of predefined character sets for
63  * CompoundText and tests if the character is representable in that character
64  * set. If so, it encodes the character using its code within that character
65  * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
66  * clients of the first and second kind ignore such encapsulated text,
67  * this encapsulation is kept to a minimum and terminated as early as possible.
68  *
69  * In a distant future, when clients of the first and second kind will have
70  * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
71  * without first going through the list of predefined character sets.
72  */
73 
74 #ifdef HAVE_CONFIG_H
75 #include <config.h>
76 #endif
77 #include <stdio.h>
78 #include "Xlibint.h"
79 #include "XlcPubI.h"
80 #include "XlcGeneric.h"
81 
82 static XlcConv
create_conv(XLCd lcd,XlcConvMethods methods)83 create_conv(
84     XLCd lcd,
85     XlcConvMethods methods)
86 {
87     XlcConv conv;
88 
89     conv = Xmalloc(sizeof(XlcConvRec));
90     if (conv == (XlcConv) NULL)
91 	return (XlcConv) NULL;
92 
93     conv->methods = methods;
94     conv->state = NULL;
95 
96     return conv;
97 }
98 
99 static void
close_converter(XlcConv conv)100 close_converter(
101     XlcConv conv)
102 {
103     Xfree(conv);
104 }
105 
106 /* Replacement character for invalid multibyte sequence or wide character. */
107 #define BAD_WCHAR ((ucs4_t) 0xfffd)
108 #define BAD_CHAR '?'
109 
110 /***************************************************************************/
111 /* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
112  *
113  * Note that this code works in any locale. We store Unicode values in
114  * `ucs4_t' variables, but don't pass them to the user.
115  *
116  * This code has to support all character sets that are used for CompoundText,
117  * nothing more, nothing less. See the table in lcCT.c.
118  * Since the conversion _to_ CompoundText is likely to need the tables for all
119  * character sets at once, we don't use dynamic loading (of tables or shared
120  * libraries through iconv()). Use a fixed set of tables instead.
121  *
122  * We use statically computed tables, not dynamically allocated arrays,
123  * because it's more memory efficient: Different processes using the same
124  * libX11 shared library share the "text" and read-only "data" sections.
125  */
126 
127 typedef unsigned int ucs4_t;
128 #define conv_t XlcConv
129 
130 typedef struct _Utf8ConvRec {
131     const char *name;
132     XrmQuark xrm_name;
133     int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
134     int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
135 } Utf8ConvRec, *Utf8Conv;
136 
137 /*
138  * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
139  * converts the byte sequence starting at s to a wide character. Up to n bytes
140  * are available at s. n is >= 1.
141  * Result is number of bytes consumed (if a wide character was read),
142  * or 0 if invalid, or -1 if n too small.
143  *
144  * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
145  * converts the wide character wc to the character set xxx, and stores the
146  * result beginning at r. Up to n bytes may be written at r. n is >= 1.
147  * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
148  */
149 
150 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
151 #define RET_ILSEQ      0
152 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
153 #define RET_TOOFEW(n)  (-1-(n))
154 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
155 #define RET_TOOSMALL   -1
156 
157 /*
158  * The tables below are bijective. It would be possible to extend the
159  * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
160  * but *only* with characters not contained in any other table, and *only*
161  * when the current locale is not an UTF-8 locale.
162  */
163 
164 #include "lcUniConv/utf8.h"
165 #include "lcUniConv/ucs2be.h"
166 #ifdef notused
167 #include "lcUniConv/ascii.h"
168 #endif
169 #include "lcUniConv/iso8859_1.h"
170 #include "lcUniConv/iso8859_2.h"
171 #include "lcUniConv/iso8859_3.h"
172 #include "lcUniConv/iso8859_4.h"
173 #include "lcUniConv/iso8859_5.h"
174 #include "lcUniConv/iso8859_6.h"
175 #include "lcUniConv/iso8859_7.h"
176 #include "lcUniConv/iso8859_8.h"
177 #include "lcUniConv/iso8859_9.h"
178 #include "lcUniConv/iso8859_10.h"
179 #include "lcUniConv/iso8859_11.h"
180 #include "lcUniConv/iso8859_13.h"
181 #include "lcUniConv/iso8859_14.h"
182 #include "lcUniConv/iso8859_15.h"
183 #include "lcUniConv/iso8859_16.h"
184 #include "lcUniConv/iso8859_9e.h"
185 #include "lcUniConv/jisx0201.h"
186 #include "lcUniConv/tis620.h"
187 #include "lcUniConv/koi8_r.h"
188 #include "lcUniConv/koi8_u.h"
189 #include "lcUniConv/koi8_c.h"
190 #include "lcUniConv/armscii_8.h"
191 #include "lcUniConv/cp1133.h"
192 #include "lcUniConv/mulelao.h"
193 #include "lcUniConv/viscii.h"
194 #include "lcUniConv/tcvn.h"
195 #include "lcUniConv/georgian_academy.h"
196 #include "lcUniConv/georgian_ps.h"
197 #include "lcUniConv/cp1251.h"
198 #include "lcUniConv/cp1255.h"
199 #include "lcUniConv/cp1256.h"
200 #include "lcUniConv/tatar_cyr.h"
201 
202 typedef struct {
203     unsigned short indx; /* index into big table */
204     unsigned short used; /* bitmask of used entries */
205 } Summary16;
206 
207 #include "lcUniConv/gb2312.h"
208 #include "lcUniConv/jisx0208.h"
209 #include "lcUniConv/jisx0212.h"
210 #include "lcUniConv/ksc5601.h"
211 #include "lcUniConv/big5.h"
212 #include "lcUniConv/big5_emacs.h"
213 #include "lcUniConv/big5hkscs.h"
214 #include "lcUniConv/gbk.h"
215 
216 static Utf8ConvRec all_charsets[] = {
217     /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
218        (for lookup speed), once at the end (as a fallback).  */
219     { "ISO10646-1", NULLQUARK,
220 	utf8_mbtowc, utf8_wctomb
221     },
222 
223     { "ISO8859-1", NULLQUARK,
224 	iso8859_1_mbtowc, iso8859_1_wctomb
225     },
226     { "ISO8859-2", NULLQUARK,
227 	iso8859_2_mbtowc, iso8859_2_wctomb
228     },
229     { "ISO8859-3", NULLQUARK,
230 	iso8859_3_mbtowc, iso8859_3_wctomb
231     },
232     { "ISO8859-4", NULLQUARK,
233 	iso8859_4_mbtowc, iso8859_4_wctomb
234     },
235     { "ISO8859-5", NULLQUARK,
236 	iso8859_5_mbtowc, iso8859_5_wctomb
237     },
238     { "ISO8859-6", NULLQUARK,
239 	iso8859_6_mbtowc, iso8859_6_wctomb
240     },
241     { "ISO8859-7", NULLQUARK,
242 	iso8859_7_mbtowc, iso8859_7_wctomb
243     },
244     { "ISO8859-8", NULLQUARK,
245 	iso8859_8_mbtowc, iso8859_8_wctomb
246     },
247     { "ISO8859-9", NULLQUARK,
248 	iso8859_9_mbtowc, iso8859_9_wctomb
249     },
250     { "ISO8859-10", NULLQUARK,
251 	iso8859_10_mbtowc, iso8859_10_wctomb
252     },
253     { "ISO8859-11", NULLQUARK,
254 	iso8859_11_mbtowc, iso8859_11_wctomb
255     },
256     { "ISO8859-13", NULLQUARK,
257 	iso8859_13_mbtowc, iso8859_13_wctomb
258     },
259     { "ISO8859-14", NULLQUARK,
260 	iso8859_14_mbtowc, iso8859_14_wctomb
261     },
262     { "ISO8859-15", NULLQUARK,
263 	iso8859_15_mbtowc, iso8859_15_wctomb
264     },
265     { "ISO8859-16", NULLQUARK,
266 	iso8859_16_mbtowc, iso8859_16_wctomb
267     },
268     { "JISX0201.1976-0", NULLQUARK,
269 	jisx0201_mbtowc, jisx0201_wctomb
270     },
271     { "TIS620-0", NULLQUARK,
272 	tis620_mbtowc, tis620_wctomb
273     },
274     { "GB2312.1980-0", NULLQUARK,
275 	gb2312_mbtowc, gb2312_wctomb
276     },
277     { "JISX0208.1983-0", NULLQUARK,
278 	jisx0208_mbtowc, jisx0208_wctomb
279     },
280     { "JISX0208.1990-0", NULLQUARK,
281 	jisx0208_mbtowc, jisx0208_wctomb
282     },
283     { "JISX0212.1990-0", NULLQUARK,
284 	jisx0212_mbtowc, jisx0212_wctomb
285     },
286     { "KSC5601.1987-0", NULLQUARK,
287 	ksc5601_mbtowc, ksc5601_wctomb
288     },
289     { "KOI8-R", NULLQUARK,
290 	koi8_r_mbtowc, koi8_r_wctomb
291     },
292     { "KOI8-U", NULLQUARK,
293 	koi8_u_mbtowc, koi8_u_wctomb
294     },
295     { "KOI8-C", NULLQUARK,
296 	koi8_c_mbtowc, koi8_c_wctomb
297     },
298     { "TATAR-CYR", NULLQUARK,
299 	tatar_cyr_mbtowc, tatar_cyr_wctomb
300     },
301     { "ARMSCII-8", NULLQUARK,
302 	armscii_8_mbtowc, armscii_8_wctomb
303     },
304     { "IBM-CP1133", NULLQUARK,
305 	cp1133_mbtowc, cp1133_wctomb
306     },
307     { "MULELAO-1", NULLQUARK,
308 	mulelao_mbtowc, mulelao_wctomb
309     },
310     { "VISCII1.1-1", NULLQUARK,
311 	viscii_mbtowc, viscii_wctomb
312     },
313     { "TCVN-5712", NULLQUARK,
314 	tcvn_mbtowc, tcvn_wctomb
315     },
316     { "GEORGIAN-ACADEMY", NULLQUARK,
317 	georgian_academy_mbtowc, georgian_academy_wctomb
318     },
319     { "GEORGIAN-PS", NULLQUARK,
320 	georgian_ps_mbtowc, georgian_ps_wctomb
321     },
322     { "ISO8859-9E", NULLQUARK,
323 	iso8859_9e_mbtowc, iso8859_9e_wctomb
324     },
325     { "MICROSOFT-CP1251", NULLQUARK,
326 	cp1251_mbtowc, cp1251_wctomb
327     },
328     { "MICROSOFT-CP1255", NULLQUARK,
329 	cp1255_mbtowc, cp1255_wctomb
330     },
331     { "MICROSOFT-CP1256", NULLQUARK,
332 	cp1256_mbtowc, cp1256_wctomb
333     },
334     { "BIG5-0", NULLQUARK,
335 	big5_mbtowc, big5_wctomb
336     },
337     { "BIG5-E0", NULLQUARK,
338 	big5_0_mbtowc, big5_0_wctomb
339     },
340     { "BIG5-E1", NULLQUARK,
341 	big5_1_mbtowc, big5_1_wctomb
342     },
343     { "GBK-0", NULLQUARK,
344 	gbk_mbtowc, gbk_wctomb
345     },
346     { "BIG5HKSCS-0", NULLQUARK,
347 	big5hkscs_mbtowc, big5hkscs_wctomb
348     },
349 
350     /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
351        (for lookup speed), once at the end (as a fallback).  */
352     { "ISO10646-1", NULLQUARK,
353 	utf8_mbtowc, utf8_wctomb
354     },
355 
356     /* Encoding ISO10646-1 for fonts means UCS2-like encoding
357        so for conversion to FontCharSet we need this record */
358     { "ISO10646-1", NULLQUARK,
359 	ucs2be_mbtowc, ucs2be_wctomb
360     }
361 };
362 
363 #define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
364 #define all_charsets_count  (charsets_table_size - 1)
365 #define ucs2_conv_index     (charsets_table_size - 1)
366 
367 static void
init_all_charsets(void)368 init_all_charsets (void)
369 {
370     Utf8Conv convptr;
371     int i;
372 
373     for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
374 	convptr->xrm_name = XrmStringToQuark(convptr->name);
375 }
376 
377 #define lazy_init_all_charsets()					\
378     do {								\
379 	if (all_charsets[0].xrm_name == NULLQUARK)			\
380 	    init_all_charsets();					\
381     } while (0)
382 
383 /* from XlcNCharSet to XlcNUtf8String */
384 
385 static int
cstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)386 cstoutf8(
387     XlcConv conv,
388     XPointer *from,
389     int *from_left,
390     XPointer *to,
391     int *to_left,
392     XPointer *args,
393     int num_args)
394 {
395     XlcCharSet charset;
396     const char *name;
397     Utf8Conv convptr;
398     int i;
399     unsigned char const *src;
400     unsigned char const *srcend;
401     unsigned char *dst;
402     unsigned char *dstend;
403     int unconv_num;
404 
405     if (from == NULL || *from == NULL)
406 	return 0;
407 
408     if (num_args < 1)
409 	return -1;
410 
411     charset = (XlcCharSet) args[0];
412     name = charset->encoding_name;
413     /* not charset->name because the latter has a ":GL"/":GR" suffix */
414 
415     for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
416 	if (!strcmp(convptr->name, name))
417 	    break;
418     if (i == 0)
419 	return -1;
420 
421     src = (unsigned char const *) *from;
422     srcend = src + *from_left;
423     dst = (unsigned char *) *to;
424     dstend = dst + *to_left;
425     unconv_num = 0;
426 
427     while (src < srcend) {
428 	ucs4_t wc;
429 	int consumed;
430 	int count;
431 
432 	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
433 	if (consumed == RET_ILSEQ)
434 	    return -1;
435 	if (consumed == RET_TOOFEW(0))
436 	    break;
437 
438 	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
439 	if (count == RET_TOOSMALL)
440 	    break;
441 	if (count == RET_ILSEQ) {
442 	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
443 	    if (count == RET_TOOSMALL)
444 		break;
445 	    unconv_num++;
446 	}
447 	src += consumed;
448 	dst += count;
449     }
450 
451     *from = (XPointer) src;
452     *from_left = srcend - src;
453     *to = (XPointer) dst;
454     *to_left = dstend - dst;
455 
456     return unconv_num;
457 }
458 
459 static XlcConvMethodsRec methods_cstoutf8 = {
460     close_converter,
461     cstoutf8,
462     NULL
463 };
464 
465 static XlcConv
open_cstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)466 open_cstoutf8(
467     XLCd from_lcd,
468     const char *from_type,
469     XLCd to_lcd,
470     const char *to_type)
471 {
472     lazy_init_all_charsets();
473     return create_conv(from_lcd, &methods_cstoutf8);
474 }
475 
476 /* from XlcNUtf8String to XlcNCharSet */
477 
478 static XlcConv
create_tocs_conv(XLCd lcd,XlcConvMethods methods)479 create_tocs_conv(
480     XLCd lcd,
481     XlcConvMethods methods)
482 {
483     XlcConv conv;
484     CodeSet *codeset_list;
485     int codeset_num;
486     int charset_num;
487     int i, j, k;
488     Utf8Conv *preferred;
489 
490     lazy_init_all_charsets();
491 
492     codeset_list = XLC_GENERIC(lcd, codeset_list);
493     codeset_num = XLC_GENERIC(lcd, codeset_num);
494 
495     charset_num = 0;
496     for (i = 0; i < codeset_num; i++)
497 	charset_num += codeset_list[i]->num_charsets;
498     if (charset_num > all_charsets_count-1)
499 	charset_num = all_charsets_count-1;
500 
501     conv = Xmalloc(sizeof(XlcConvRec)
502 			     + (charset_num + 1) * sizeof(Utf8Conv));
503     if (conv == (XlcConv) NULL)
504 	return (XlcConv) NULL;
505     preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
506 
507     /* Loop through all codesets mentioned in the locale. */
508     charset_num = 0;
509     for (i = 0; i < codeset_num; i++) {
510 	XlcCharSet *charsets = codeset_list[i]->charset_list;
511 	int num_charsets = codeset_list[i]->num_charsets;
512 	for (j = 0; j < num_charsets; j++) {
513 	    const char *name = charsets[j]->encoding_name;
514 	    /* If it wasn't already encountered... */
515 	    for (k = charset_num-1; k >= 0; k--)
516 		if (!strcmp(preferred[k]->name, name))
517 		    break;
518 	    if (k < 0) {
519 		/* Look it up in all_charsets[]. */
520 		for (k = 0; k < all_charsets_count-1; k++)
521 		    if (!strcmp(all_charsets[k].name, name)) {
522 			/* Add it to the preferred set. */
523 			preferred[charset_num++] = &all_charsets[k];
524 			break;
525 		    }
526 	    }
527 	}
528     }
529     preferred[charset_num] = (Utf8Conv) NULL;
530 
531     conv->methods = methods;
532     conv->state = (XPointer) preferred;
533 
534     return conv;
535 }
536 
537 static void
close_tocs_converter(XlcConv conv)538 close_tocs_converter(
539     XlcConv conv)
540 {
541     /* conv->state is allocated together with conv, free both at once.  */
542     Xfree(conv);
543 }
544 
545 /*
546  * Converts a Unicode character to an appropriate character set. The NULL
547  * terminated array of preferred character sets is passed as first argument.
548  * If successful, *charsetp is set to the character set that was used, and
549  * *sidep is set to the character set side (XlcGL or XlcGR).
550  */
551 static int
charset_wctocs(Utf8Conv * preferred,Utf8Conv * charsetp,XlcSide * sidep,XlcConv conv,unsigned char * r,ucs4_t wc,int n)552 charset_wctocs(
553     Utf8Conv *preferred,
554     Utf8Conv *charsetp,
555     XlcSide *sidep,
556     XlcConv conv,
557     unsigned char *r,
558     ucs4_t wc,
559     int n)
560 {
561     int count;
562     Utf8Conv convptr;
563     int i;
564 
565     for (; *preferred != (Utf8Conv) NULL; preferred++) {
566 	convptr = *preferred;
567 	count = convptr->wctocs(conv, r, wc, n);
568 	if (count == RET_TOOSMALL)
569 	    return RET_TOOSMALL;
570 	if (count != RET_ILSEQ) {
571 	    *charsetp = convptr;
572 	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
573 	    return count;
574 	}
575     }
576     for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
577 	count = convptr->wctocs(conv, r, wc, n);
578 	if (count == RET_TOOSMALL)
579 	    return RET_TOOSMALL;
580 	if (count != RET_ILSEQ) {
581 	    *charsetp = convptr;
582 	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
583 	    return count;
584 	}
585     }
586     return RET_ILSEQ;
587 }
588 
589 static int
utf8tocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)590 utf8tocs(
591     XlcConv conv,
592     XPointer *from,
593     int *from_left,
594     XPointer *to,
595     int *to_left,
596     XPointer *args,
597     int num_args)
598 {
599     Utf8Conv *preferred_charsets;
600     XlcCharSet last_charset = NULL;
601     unsigned char const *src;
602     unsigned char const *srcend;
603     unsigned char *dst;
604     unsigned char *dstend;
605     int unconv_num;
606 
607     if (from == NULL || *from == NULL)
608 	return 0;
609 
610     preferred_charsets = (Utf8Conv *) conv->state;
611     src = (unsigned char const *) *from;
612     srcend = src + *from_left;
613     dst = (unsigned char *) *to;
614     dstend = dst + *to_left;
615     unconv_num = 0;
616 
617     while (src < srcend && dst < dstend) {
618 	Utf8Conv chosen_charset = NULL;
619 	XlcSide chosen_side = XlcNONE;
620 	ucs4_t wc;
621 	int consumed;
622 	int count;
623 
624 	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
625 	if (consumed == RET_TOOFEW(0))
626 	    break;
627 	if (consumed == RET_ILSEQ) {
628 	    src++;
629 	    unconv_num++;
630 	    continue;
631 	}
632 
633 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
634 	if (count == RET_TOOSMALL)
635 	    break;
636 	if (count == RET_ILSEQ) {
637 	    src += consumed;
638 	    unconv_num++;
639 	    continue;
640 	}
641 
642 	if (last_charset == NULL) {
643 	    last_charset =
644 	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
645 	    if (last_charset == NULL) {
646 		src += consumed;
647 		unconv_num++;
648 		continue;
649 	    }
650 	} else {
651 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
652 	          && (last_charset->side == XlcGLGR
653 	              || last_charset->side == chosen_side)))
654 		break;
655 	}
656 	src += consumed;
657 	dst += count;
658     }
659 
660     if (last_charset == NULL)
661 	return -1;
662 
663     *from = (XPointer) src;
664     *from_left = srcend - src;
665     *to = (XPointer) dst;
666     *to_left = dstend - dst;
667 
668     if (num_args >= 1)
669 	*((XlcCharSet *)args[0]) = last_charset;
670 
671     return unconv_num;
672 }
673 
674 static XlcConvMethodsRec methods_utf8tocs = {
675     close_tocs_converter,
676     utf8tocs,
677     NULL
678 };
679 
680 static XlcConv
open_utf8tocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)681 open_utf8tocs(
682     XLCd from_lcd,
683     const char *from_type,
684     XLCd to_lcd,
685     const char *to_type)
686 {
687     return create_tocs_conv(from_lcd, &methods_utf8tocs);
688 }
689 
690 /* from XlcNUtf8String to XlcNChar */
691 
692 static int
utf8tocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)693 utf8tocs1(
694     XlcConv conv,
695     XPointer *from,
696     int *from_left,
697     XPointer *to,
698     int *to_left,
699     XPointer *args,
700     int num_args)
701 {
702     Utf8Conv *preferred_charsets;
703     XlcCharSet last_charset = NULL;
704     unsigned char const *src;
705     unsigned char const *srcend;
706     unsigned char *dst;
707     unsigned char *dstend;
708     int unconv_num;
709 
710     if (from == NULL || *from == NULL)
711 	return 0;
712 
713     preferred_charsets = (Utf8Conv *) conv->state;
714     src = (unsigned char const *) *from;
715     srcend = src + *from_left;
716     dst = (unsigned char *) *to;
717     dstend = dst + *to_left;
718     unconv_num = 0;
719 
720     while (src < srcend && dst < dstend) {
721 	Utf8Conv chosen_charset = NULL;
722 	XlcSide chosen_side = XlcNONE;
723 	ucs4_t wc;
724 	int consumed;
725 	int count;
726 
727 	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
728 	if (consumed == RET_TOOFEW(0))
729 	    break;
730 	if (consumed == RET_ILSEQ) {
731 	    src++;
732 	    unconv_num++;
733 	    continue;
734 	}
735 
736 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
737 	if (count == RET_TOOSMALL)
738 	    break;
739 	if (count == RET_ILSEQ) {
740 	    src += consumed;
741 	    unconv_num++;
742 	    continue;
743 	}
744 
745 	if (last_charset == NULL) {
746 	    last_charset =
747 	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
748 	    if (last_charset == NULL) {
749 		src += consumed;
750 		unconv_num++;
751 		continue;
752 	    }
753 	} else {
754 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
755 	          && (last_charset->side == XlcGLGR
756 	              || last_charset->side == chosen_side)))
757 		break;
758 	}
759 	src += consumed;
760 	dst += count;
761 	break;
762     }
763 
764     if (last_charset == NULL)
765 	return -1;
766 
767     *from = (XPointer) src;
768     *from_left = srcend - src;
769     *to = (XPointer) dst;
770     *to_left = dstend - dst;
771 
772     if (num_args >= 1)
773 	*((XlcCharSet *)args[0]) = last_charset;
774 
775     return unconv_num;
776 }
777 
778 static XlcConvMethodsRec methods_utf8tocs1 = {
779     close_tocs_converter,
780     utf8tocs1,
781     NULL
782 };
783 
784 static XlcConv
open_utf8tocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)785 open_utf8tocs1(
786     XLCd from_lcd,
787     const char *from_type,
788     XLCd to_lcd,
789     const char *to_type)
790 {
791     return create_tocs_conv(from_lcd, &methods_utf8tocs1);
792 }
793 
794 /* from XlcNUtf8String to XlcNString */
795 
796 static int
utf8tostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)797 utf8tostr(
798     XlcConv conv,
799     XPointer *from,
800     int *from_left,
801     XPointer *to,
802     int *to_left,
803     XPointer *args,
804     int num_args)
805 {
806     unsigned char const *src;
807     unsigned char const *srcend;
808     unsigned char *dst;
809     unsigned char *dstend;
810     int unconv_num;
811 
812     if (from == NULL || *from == NULL)
813 	return 0;
814 
815     src = (unsigned char const *) *from;
816     srcend = src + *from_left;
817     dst = (unsigned char *) *to;
818     dstend = dst + *to_left;
819     unconv_num = 0;
820 
821     while (src < srcend) {
822 	unsigned char c;
823 	ucs4_t wc;
824 	int consumed;
825 
826 	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
827 	if (consumed == RET_TOOFEW(0))
828 	    break;
829 	if (dst == dstend)
830 	    break;
831 	if (consumed == RET_ILSEQ) {
832 	    consumed = 1;
833 	    c = BAD_CHAR;
834 	    unconv_num++;
835 	} else {
836 	    if ((wc & ~(ucs4_t)0xff) != 0) {
837 		c = BAD_CHAR;
838 		unconv_num++;
839 	    } else
840 		c = (unsigned char) wc;
841 	}
842 	*dst++ = c;
843 	src += consumed;
844     }
845 
846     *from = (XPointer) src;
847     *from_left = srcend - src;
848     *to = (XPointer) dst;
849     *to_left = dstend - dst;
850 
851     return unconv_num;
852 }
853 
854 static XlcConvMethodsRec methods_utf8tostr = {
855     close_converter,
856     utf8tostr,
857     NULL
858 };
859 
860 static XlcConv
open_utf8tostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)861 open_utf8tostr(
862     XLCd from_lcd,
863     const char *from_type,
864     XLCd to_lcd,
865     const char *to_type)
866 {
867     return create_conv(from_lcd, &methods_utf8tostr);
868 }
869 
870 /* from XlcNString to XlcNUtf8String */
871 
872 static int
strtoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)873 strtoutf8(
874     XlcConv conv,
875     XPointer *from,
876     int *from_left,
877     XPointer *to,
878     int *to_left,
879     XPointer *args,
880     int num_args)
881 {
882     unsigned char const *src;
883     unsigned char const *srcend;
884     unsigned char *dst;
885     unsigned char *dstend;
886 
887     if (from == NULL || *from == NULL)
888 	return 0;
889 
890     src = (unsigned char const *) *from;
891     srcend = src + *from_left;
892     dst = (unsigned char *) *to;
893     dstend = dst + *to_left;
894 
895     while (src < srcend) {
896 	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
897 	if (count == RET_TOOSMALL)
898 	    break;
899 	dst += count;
900 	src++;
901     }
902 
903     *from = (XPointer) src;
904     *from_left = srcend - src;
905     *to = (XPointer) dst;
906     *to_left = dstend - dst;
907 
908     return 0;
909 }
910 
911 static XlcConvMethodsRec methods_strtoutf8 = {
912     close_converter,
913     strtoutf8,
914     NULL
915 };
916 
917 static XlcConv
open_strtoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)918 open_strtoutf8(
919     XLCd from_lcd,
920     const char *from_type,
921     XLCd to_lcd,
922     const char *to_type)
923 {
924     return create_conv(from_lcd, &methods_strtoutf8);
925 }
926 
927 /* Support for the input methods. */
928 
929 XPointer
_Utf8GetConvByName(const char * name)930 _Utf8GetConvByName(
931     const char *name)
932 {
933     XrmQuark xrm_name;
934     Utf8Conv convptr;
935     int i;
936 
937     if (name == NULL)
938         return (XPointer) NULL;
939 
940     lazy_init_all_charsets();
941     xrm_name = XrmStringToQuark(name);
942 
943     for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
944 	if (convptr->xrm_name == xrm_name)
945 	    return (XPointer) convptr->wctocs;
946     return (XPointer) NULL;
947 }
948 
949 /* from XlcNUcsChar to XlcNChar, needed for input methods */
950 
951 static XlcConv
create_ucstocs_conv(XLCd lcd,XlcConvMethods methods)952 create_ucstocs_conv(
953     XLCd lcd,
954     XlcConvMethods methods)
955 {
956 
957     if (XLC_PUBLIC_PART(lcd)->codeset
958 	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
959 	XlcConv conv;
960 	Utf8Conv *preferred;
961 
962 	lazy_init_all_charsets();
963 
964 	conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
965 	if (conv == (XlcConv) NULL)
966 	    return (XlcConv) NULL;
967 	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
968 
969 	preferred[0] = &all_charsets[0]; /* ISO10646 */
970 	preferred[1] = (Utf8Conv) NULL;
971 
972 	conv->methods = methods;
973 	conv->state = (XPointer) preferred;
974 
975 	return conv;
976     } else {
977 	return create_tocs_conv(lcd, methods);
978     }
979 }
980 
981 static int
charset_wctocs_exactly(Utf8Conv * preferred,Utf8Conv * charsetp,XlcSide * sidep,XlcConv conv,unsigned char * r,ucs4_t wc,int n)982 charset_wctocs_exactly(
983     Utf8Conv *preferred,
984     Utf8Conv *charsetp,
985     XlcSide *sidep,
986     XlcConv conv,
987     unsigned char *r,
988     ucs4_t wc,
989     int n)
990 {
991     int count;
992     Utf8Conv convptr;
993 
994     for (; *preferred != (Utf8Conv) NULL; preferred++) {
995 	convptr = *preferred;
996 	count = convptr->wctocs(conv, r, wc, n);
997 	if (count == RET_TOOSMALL)
998 	    return RET_TOOSMALL;
999 	if (count != RET_ILSEQ) {
1000 	    *charsetp = convptr;
1001 	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
1002 	    return count;
1003 	}
1004     }
1005     return RET_ILSEQ;
1006 }
1007 
1008 static int
ucstocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1009 ucstocs1(
1010     XlcConv conv,
1011     XPointer *from,
1012     int *from_left,
1013     XPointer *to,
1014     int *to_left,
1015     XPointer *args,
1016     int num_args)
1017 {
1018     ucs4_t const *src;
1019     unsigned char *dst = (unsigned char *) *to;
1020     int unconv_num = 0;
1021     Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1022     Utf8Conv chosen_charset = NULL;
1023     XlcSide chosen_side = XlcNONE;
1024     XlcCharSet charset = NULL;
1025     int count;
1026 
1027     if (from == NULL || *from == NULL)
1028 	return 0;
1029 
1030     src = (ucs4_t const *) *from;
1031 
1032     count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1033                                    &chosen_side, conv, dst, *src, *to_left);
1034     if (count < 1) {
1035         unconv_num++;
1036         count = 0;
1037     } else {
1038         charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1039     }
1040     if (charset == NULL)
1041 	return -1;
1042 
1043     *from = (XPointer) ++src;
1044     (*from_left)--;
1045     *to = (XPointer) dst;
1046     *to_left -= count;
1047 
1048     if (num_args >= 1)
1049 	*((XlcCharSet *)args[0]) = charset;
1050 
1051     return unconv_num;
1052 }
1053 
1054 static XlcConvMethodsRec methods_ucstocs1 = {
1055     close_tocs_converter,
1056     ucstocs1,
1057     NULL
1058 };
1059 
1060 static XlcConv
open_ucstocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1061 open_ucstocs1(
1062     XLCd from_lcd,
1063     const char *from_type,
1064     XLCd to_lcd,
1065     const char *to_type)
1066 {
1067     return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1068 }
1069 
1070 /* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1071 
1072 static int
ucstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1073 ucstoutf8(
1074     XlcConv conv,
1075     XPointer *from,
1076     int *from_left,
1077     XPointer *to,
1078     int *to_left,
1079     XPointer *args,
1080     int num_args)
1081 {
1082     const ucs4_t *src;
1083     const ucs4_t *srcend;
1084     unsigned char *dst;
1085     unsigned char *dstend;
1086     int unconv_num;
1087 
1088     if (from == NULL || *from == NULL)
1089 	return 0;
1090 
1091     src = (const ucs4_t *) *from;
1092     srcend = src + *from_left;
1093     dst = (unsigned char *) *to;
1094     dstend = dst + *to_left;
1095     unconv_num = 0;
1096 
1097     while (src < srcend) {
1098 	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1099 	if (count == RET_TOOSMALL)
1100 	    break;
1101 	if (count == RET_ILSEQ)
1102 	    unconv_num++;
1103 	src++;
1104 	dst += count;
1105     }
1106 
1107     *from = (XPointer) src;
1108     *from_left = srcend - src;
1109     *to = (XPointer) dst;
1110     *to_left = dstend - dst;
1111 
1112     return unconv_num;
1113 }
1114 
1115 static XlcConvMethodsRec methods_ucstoutf8 = {
1116     close_converter,
1117     ucstoutf8,
1118     NULL
1119 };
1120 
1121 static XlcConv
open_ucstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1122 open_ucstoutf8(
1123     XLCd from_lcd,
1124     const char *from_type,
1125     XLCd to_lcd,
1126     const char *to_type)
1127 {
1128     return create_conv(from_lcd, &methods_ucstoutf8);
1129 }
1130 
1131 /* Registers UTF-8 converters for a non-UTF-8 locale. */
1132 void
_XlcAddUtf8Converters(XLCd lcd)1133 _XlcAddUtf8Converters(
1134     XLCd lcd)
1135 {
1136     _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1137     _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1138     _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1139     _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1140     _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1141     _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
1142     _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
1143 }
1144 
1145 /***************************************************************************/
1146 /* Part II: UTF-8 locale loader conversion files
1147  *
1148  * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1149  */
1150 
1151 /* from XlcNMultiByte to XlcNWideChar */
1152 
1153 static int
utf8towcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1154 utf8towcs(
1155     XlcConv conv,
1156     XPointer *from,
1157     int *from_left,
1158     XPointer *to,
1159     int *to_left,
1160     XPointer *args,
1161     int num_args)
1162 {
1163     unsigned char const *src;
1164     unsigned char const *srcend;
1165     wchar_t *dst;
1166     wchar_t *dstend;
1167     int unconv_num;
1168 
1169     if (from == NULL || *from == NULL)
1170 	return 0;
1171 
1172     src = (unsigned char const *) *from;
1173     srcend = src + *from_left;
1174     dst = (wchar_t *) *to;
1175     dstend = dst + *to_left;
1176     unconv_num = 0;
1177 
1178     while (src < srcend && dst < dstend) {
1179 	ucs4_t wc;
1180 	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1181 	if (consumed == RET_TOOFEW(0))
1182 	    break;
1183 	if (consumed == RET_ILSEQ) {
1184 	    src++;
1185 	    *dst = BAD_WCHAR;
1186 	    unconv_num++;
1187 	} else {
1188 	    src += consumed;
1189 	    *dst = wc;
1190 	}
1191 	dst++;
1192     }
1193 
1194     *from = (XPointer) src;
1195     *from_left = srcend - src;
1196     *to = (XPointer) dst;
1197     *to_left = dstend - dst;
1198 
1199     return unconv_num;
1200 }
1201 
1202 static XlcConvMethodsRec methods_utf8towcs = {
1203     close_converter,
1204     utf8towcs,
1205     NULL
1206 };
1207 
1208 static XlcConv
open_utf8towcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1209 open_utf8towcs(
1210     XLCd from_lcd,
1211     const char *from_type,
1212     XLCd to_lcd,
1213     const char *to_type)
1214 {
1215     return create_conv(from_lcd, &methods_utf8towcs);
1216 }
1217 
1218 /* from XlcNWideChar to XlcNMultiByte */
1219 
1220 static int
wcstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1221 wcstoutf8(
1222     XlcConv conv,
1223     XPointer *from,
1224     int *from_left,
1225     XPointer *to,
1226     int *to_left,
1227     XPointer *args,
1228     int num_args)
1229 {
1230     wchar_t const *src;
1231     wchar_t const *srcend;
1232     unsigned char *dst;
1233     unsigned char *dstend;
1234     int unconv_num;
1235 
1236     if (from == NULL || *from == NULL)
1237 	return 0;
1238 
1239     src = (wchar_t const *) *from;
1240     srcend = src + *from_left;
1241     dst = (unsigned char *) *to;
1242     dstend = dst + *to_left;
1243     unconv_num = 0;
1244 
1245     while (src < srcend) {
1246 	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1247 	if (count == RET_TOOSMALL)
1248 	    break;
1249 	if (count == RET_ILSEQ) {
1250 	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1251 	    if (count == RET_TOOSMALL)
1252 		break;
1253 	    unconv_num++;
1254 	}
1255 	dst += count;
1256 	src++;
1257     }
1258 
1259     *from = (XPointer) src;
1260     *from_left = srcend - src;
1261     *to = (XPointer) dst;
1262     *to_left = dstend - dst;
1263 
1264     return unconv_num;
1265 }
1266 
1267 static XlcConvMethodsRec methods_wcstoutf8 = {
1268     close_converter,
1269     wcstoutf8,
1270     NULL
1271 };
1272 
1273 static XlcConv
open_wcstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1274 open_wcstoutf8(
1275     XLCd from_lcd,
1276     const char *from_type,
1277     XLCd to_lcd,
1278     const char *to_type)
1279 {
1280     return create_conv(from_lcd, &methods_wcstoutf8);
1281 }
1282 
1283 /* from XlcNString to XlcNWideChar */
1284 
1285 static int
our_strtowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1286 our_strtowcs(
1287     XlcConv conv,
1288     XPointer *from,
1289     int *from_left,
1290     XPointer *to,
1291     int *to_left,
1292     XPointer *args,
1293     int num_args)
1294 {
1295     unsigned char const *src;
1296     unsigned char const *srcend;
1297     wchar_t *dst;
1298     wchar_t *dstend;
1299 
1300     if (from == NULL || *from == NULL)
1301 	return 0;
1302 
1303     src = (unsigned char const *) *from;
1304     srcend = src + *from_left;
1305     dst = (wchar_t *) *to;
1306     dstend = dst + *to_left;
1307 
1308     while (src < srcend && dst < dstend)
1309 	*dst++ = (wchar_t) *src++;
1310 
1311     *from = (XPointer) src;
1312     *from_left = srcend - src;
1313     *to = (XPointer) dst;
1314     *to_left = dstend - dst;
1315 
1316     return 0;
1317 }
1318 
1319 static XlcConvMethodsRec methods_strtowcs = {
1320     close_converter,
1321     our_strtowcs,
1322     NULL
1323 };
1324 
1325 static XlcConv
open_strtowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1326 open_strtowcs(
1327     XLCd from_lcd,
1328     const char *from_type,
1329     XLCd to_lcd,
1330     const char *to_type)
1331 {
1332     return create_conv(from_lcd, &methods_strtowcs);
1333 }
1334 
1335 /* from XlcNWideChar to XlcNString */
1336 
1337 static int
our_wcstostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1338 our_wcstostr(
1339     XlcConv conv,
1340     XPointer *from,
1341     int *from_left,
1342     XPointer *to,
1343     int *to_left,
1344     XPointer *args,
1345     int num_args)
1346 {
1347     wchar_t const *src;
1348     wchar_t const *srcend;
1349     unsigned char *dst;
1350     unsigned char *dstend;
1351     int unconv_num;
1352 
1353     if (from == NULL || *from == NULL)
1354 	return 0;
1355 
1356     src = (wchar_t const *) *from;
1357     srcend = src + *from_left;
1358     dst = (unsigned char *) *to;
1359     dstend = dst + *to_left;
1360     unconv_num = 0;
1361 
1362     while (src < srcend && dst < dstend) {
1363 	unsigned int wc = *src++;
1364 	if (wc < 0x80)
1365 	    *dst = wc;
1366 	else {
1367 	    *dst = BAD_CHAR;
1368 	    unconv_num++;
1369 	}
1370 	dst++;
1371     }
1372 
1373     *from = (XPointer) src;
1374     *from_left = srcend - src;
1375     *to = (XPointer) dst;
1376     *to_left = dstend - dst;
1377 
1378     return unconv_num;
1379 }
1380 
1381 static XlcConvMethodsRec methods_wcstostr = {
1382     close_converter,
1383     our_wcstostr,
1384     NULL
1385 };
1386 
1387 static XlcConv
open_wcstostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1388 open_wcstostr(
1389     XLCd from_lcd,
1390     const char *from_type,
1391     XLCd to_lcd,
1392     const char *to_type)
1393 {
1394     return create_conv(from_lcd, &methods_wcstostr);
1395 }
1396 
1397 /* from XlcNCharSet to XlcNWideChar */
1398 
1399 static int
cstowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1400 cstowcs(
1401     XlcConv conv,
1402     XPointer *from,
1403     int *from_left,
1404     XPointer *to,
1405     int *to_left,
1406     XPointer *args,
1407     int num_args)
1408 {
1409     XlcCharSet charset;
1410     const char *name;
1411     Utf8Conv convptr;
1412     int i;
1413     unsigned char const *src;
1414     unsigned char const *srcend;
1415     wchar_t *dst;
1416     wchar_t *dstend;
1417     int unconv_num;
1418 
1419     if (from == NULL || *from == NULL)
1420 	return 0;
1421 
1422     if (num_args < 1)
1423 	return -1;
1424 
1425     charset = (XlcCharSet) args[0];
1426     name = charset->encoding_name;
1427     /* not charset->name because the latter has a ":GL"/":GR" suffix */
1428 
1429     for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1430 	if (!strcmp(convptr->name, name))
1431 	    break;
1432     if (i == 0)
1433 	return -1;
1434 
1435     src = (unsigned char const *) *from;
1436     srcend = src + *from_left;
1437     dst = (wchar_t *) *to;
1438     dstend = dst + *to_left;
1439     unconv_num = 0;
1440 
1441     while (src < srcend && dst < dstend) {
1442 	unsigned int wc;
1443 	int consumed;
1444 
1445 	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1446 	if (consumed == RET_ILSEQ)
1447 	    return -1;
1448 	if (consumed == RET_TOOFEW(0))
1449 	    break;
1450 
1451 	*dst++ = wc;
1452 	src += consumed;
1453     }
1454 
1455     *from = (XPointer) src;
1456     *from_left = srcend - src;
1457     *to = (XPointer) dst;
1458     *to_left = dstend - dst;
1459 
1460     return unconv_num;
1461 }
1462 
1463 static XlcConvMethodsRec methods_cstowcs = {
1464     close_converter,
1465     cstowcs,
1466     NULL
1467 };
1468 
1469 static XlcConv
open_cstowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1470 open_cstowcs(
1471     XLCd from_lcd,
1472     const char *from_type,
1473     XLCd to_lcd,
1474     const char *to_type)
1475 {
1476     lazy_init_all_charsets();
1477     return create_conv(from_lcd, &methods_cstowcs);
1478 }
1479 
1480 /* from XlcNWideChar to XlcNCharSet */
1481 
1482 static int
wcstocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1483 wcstocs(
1484     XlcConv conv,
1485     XPointer *from,
1486     int *from_left,
1487     XPointer *to,
1488     int *to_left,
1489     XPointer *args,
1490     int num_args)
1491 {
1492     Utf8Conv *preferred_charsets;
1493     XlcCharSet last_charset = NULL;
1494     wchar_t const *src;
1495     wchar_t const *srcend;
1496     unsigned char *dst;
1497     unsigned char *dstend;
1498     int unconv_num;
1499 
1500     if (from == NULL || *from == NULL)
1501 	return 0;
1502 
1503     preferred_charsets = (Utf8Conv *) conv->state;
1504     src = (wchar_t const *) *from;
1505     srcend = src + *from_left;
1506     dst = (unsigned char *) *to;
1507     dstend = dst + *to_left;
1508     unconv_num = 0;
1509 
1510     while (src < srcend && dst < dstend) {
1511 	Utf8Conv chosen_charset = NULL;
1512 	XlcSide chosen_side = XlcNONE;
1513 	wchar_t wc = *src;
1514 	int count;
1515 
1516 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1517 	if (count == RET_TOOSMALL)
1518 	    break;
1519 	if (count == RET_ILSEQ) {
1520 	    src++;
1521 	    unconv_num++;
1522 	    continue;
1523 	}
1524 
1525 	if (last_charset == NULL) {
1526 	    last_charset =
1527 	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1528 	    if (last_charset == NULL) {
1529 		src++;
1530 		unconv_num++;
1531 		continue;
1532 	    }
1533 	} else {
1534 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1535 	          && (last_charset->side == XlcGLGR
1536 	              || last_charset->side == chosen_side)))
1537 		break;
1538 	}
1539 	src++;
1540 	dst += count;
1541     }
1542 
1543     if (last_charset == NULL)
1544 	return -1;
1545 
1546     *from = (XPointer) src;
1547     *from_left = srcend - src;
1548     *to = (XPointer) dst;
1549     *to_left = dstend - dst;
1550 
1551     if (num_args >= 1)
1552 	*((XlcCharSet *)args[0]) = last_charset;
1553 
1554     return unconv_num;
1555 }
1556 
1557 static XlcConvMethodsRec methods_wcstocs = {
1558     close_tocs_converter,
1559     wcstocs,
1560     NULL
1561 };
1562 
1563 static XlcConv
open_wcstocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1564 open_wcstocs(
1565     XLCd from_lcd,
1566     const char *from_type,
1567     XLCd to_lcd,
1568     const char *to_type)
1569 {
1570     return create_tocs_conv(from_lcd, &methods_wcstocs);
1571 }
1572 
1573 /* from XlcNWideChar to XlcNChar */
1574 
1575 static int
wcstocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1576 wcstocs1(
1577     XlcConv conv,
1578     XPointer *from,
1579     int *from_left,
1580     XPointer *to,
1581     int *to_left,
1582     XPointer *args,
1583     int num_args)
1584 {
1585     Utf8Conv *preferred_charsets;
1586     XlcCharSet last_charset = NULL;
1587     wchar_t const *src;
1588     wchar_t const *srcend;
1589     unsigned char *dst;
1590     unsigned char *dstend;
1591     int unconv_num;
1592 
1593     if (from == NULL || *from == NULL)
1594 	return 0;
1595 
1596     preferred_charsets = (Utf8Conv *) conv->state;
1597     src = (wchar_t const *) *from;
1598     srcend = src + *from_left;
1599     dst = (unsigned char *) *to;
1600     dstend = dst + *to_left;
1601     unconv_num = 0;
1602 
1603     while (src < srcend && dst < dstend) {
1604 	Utf8Conv chosen_charset = NULL;
1605 	XlcSide chosen_side = XlcNONE;
1606 	wchar_t wc = *src;
1607 	int count;
1608 
1609 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1610 	if (count == RET_TOOSMALL)
1611 	    break;
1612 	if (count == RET_ILSEQ) {
1613 	    src++;
1614 	    unconv_num++;
1615 	    continue;
1616 	}
1617 
1618 	if (last_charset == NULL) {
1619 	    last_charset =
1620 	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1621 	    if (last_charset == NULL) {
1622 		src++;
1623 		unconv_num++;
1624 		continue;
1625 	    }
1626 	} else {
1627 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1628 	          && (last_charset->side == XlcGLGR
1629 	              || last_charset->side == chosen_side)))
1630 		break;
1631 	}
1632 	src++;
1633 	dst += count;
1634 	break;
1635     }
1636 
1637     if (last_charset == NULL)
1638 	return -1;
1639 
1640     *from = (XPointer) src;
1641     *from_left = srcend - src;
1642     *to = (XPointer) dst;
1643     *to_left = dstend - dst;
1644 
1645     if (num_args >= 1)
1646 	*((XlcCharSet *)args[0]) = last_charset;
1647 
1648     return unconv_num;
1649 }
1650 
1651 static XlcConvMethodsRec methods_wcstocs1 = {
1652     close_tocs_converter,
1653     wcstocs1,
1654     NULL
1655 };
1656 
1657 static XlcConv
open_wcstocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1658 open_wcstocs1(
1659     XLCd from_lcd,
1660     const char *from_type,
1661     XLCd to_lcd,
1662     const char *to_type)
1663 {
1664     return create_tocs_conv(from_lcd, &methods_wcstocs1);
1665 }
1666 
1667 /* trivial, no conversion */
1668 
1669 static int
identity(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1670 identity(
1671     XlcConv conv,
1672     XPointer *from,
1673     int *from_left,
1674     XPointer *to,
1675     int *to_left,
1676     XPointer *args,
1677     int num_args)
1678 {
1679     unsigned char const *src;
1680     unsigned char const *srcend;
1681     unsigned char *dst;
1682     unsigned char *dstend;
1683 
1684     if (from == NULL || *from == NULL)
1685 	return 0;
1686 
1687     src = (unsigned char const *) *from;
1688     srcend = src + *from_left;
1689     dst = (unsigned char *) *to;
1690     dstend = dst + *to_left;
1691 
1692     while (src < srcend && dst < dstend)
1693 	*dst++ = *src++;
1694 
1695     *from = (XPointer) src;
1696     *from_left = srcend - src;
1697     *to = (XPointer) dst;
1698     *to_left = dstend - dst;
1699 
1700     return 0;
1701 }
1702 
1703 static XlcConvMethodsRec methods_identity = {
1704     close_converter,
1705     identity,
1706     NULL
1707 };
1708 
1709 static XlcConv
open_identity(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1710 open_identity(
1711     XLCd from_lcd,
1712     const char *from_type,
1713     XLCd to_lcd,
1714     const char *to_type)
1715 {
1716     return create_conv(from_lcd, &methods_identity);
1717 }
1718 
1719 /* from MultiByte/WideChar to FontCharSet. */
1720 /* They really use converters to CharSet
1721  * but with different create_conv procedure. */
1722 
1723 static XlcConv
create_tofontcs_conv(XLCd lcd,XlcConvMethods methods)1724 create_tofontcs_conv(
1725     XLCd lcd,
1726     XlcConvMethods methods)
1727 {
1728     XlcConv conv;
1729     int i, num, k, count;
1730     char **value, buf[32];
1731     Utf8Conv *preferred;
1732 
1733     lazy_init_all_charsets();
1734 
1735     for (i = 0, num = 0;; i++) {
1736 	snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1737 	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1738 	if (count < 1) {
1739 	    snprintf(buf, sizeof(buf), "fs%d.charset", i);
1740 	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1741 	    if (count < 1)
1742 		break;
1743 	}
1744 	num += count;
1745     }
1746 
1747     conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1748     if (conv == (XlcConv) NULL)
1749 	return (XlcConv) NULL;
1750     preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1751 
1752     /* Loop through all fontsets mentioned in the locale. */
1753     for (i = 0, num = 0;; i++) {
1754         snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1755         _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1756         if (count < 1) {
1757             snprintf(buf, sizeof(buf), "fs%d.charset", i);
1758             _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1759             if (count < 1)
1760                 break;
1761         }
1762 	while (count-- > 0) {
1763 	    XlcCharSet charset = _XlcGetCharSet(*value++);
1764 	    const char *name;
1765 
1766 	    if (charset == (XlcCharSet) NULL)
1767 		continue;
1768 
1769 	    name = charset->encoding_name;
1770 	    /* If it wasn't already encountered... */
1771 	    for (k = num - 1; k >= 0; k--)
1772 		if (!strcmp(preferred[k]->name, name))
1773 		    break;
1774 	    if (k < 0) {
1775                 /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1776                 if (!strcmp("ISO10646-1", name)) {
1777                     preferred[num++] = &all_charsets[ucs2_conv_index];
1778                     continue;
1779                 }
1780 		/* Look it up in all_charsets[]. */
1781 		for (k = 0; k < all_charsets_count-1; k++)
1782 		    if (!strcmp(all_charsets[k].name, name)) {
1783 			/* Add it to the preferred set. */
1784 			preferred[num++] = &all_charsets[k];
1785 			break;
1786 		    }
1787 	    }
1788         }
1789     }
1790     preferred[num] = (Utf8Conv) NULL;
1791 
1792     conv->methods = methods;
1793     conv->state = (XPointer) preferred;
1794 
1795     return conv;
1796 }
1797 
1798 static XlcConv
open_wcstofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1799 open_wcstofcs(
1800     XLCd from_lcd,
1801     const char *from_type,
1802     XLCd to_lcd,
1803     const char *to_type)
1804 {
1805     return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1806 }
1807 
1808 static XlcConv
open_utf8tofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1809 open_utf8tofcs(
1810     XLCd from_lcd,
1811     const char *from_type,
1812     XLCd to_lcd,
1813     const char *to_type)
1814 {
1815     return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1816 }
1817 
1818 /* ========================== iconv Stuff ================================ */
1819 
1820 /* from XlcNCharSet to XlcNMultiByte */
1821 
1822 static int
iconv_cstombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1823 iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1824 	      XPointer *to, int *to_left, XPointer *args, int num_args)
1825 {
1826     XlcCharSet charset;
1827     char const *name;
1828     Utf8Conv convptr;
1829     int i;
1830     unsigned char const *src;
1831     unsigned char const *srcend;
1832     unsigned char *dst;
1833     unsigned char *dstend;
1834     int unconv_num;
1835 
1836     if (from == NULL || *from == NULL)
1837 	return 0;
1838 
1839     if (num_args < 1)
1840 	return -1;
1841 
1842     charset = (XlcCharSet) args[0];
1843     name = charset->encoding_name;
1844     /* not charset->name because the latter has a ":GL"/":GR" suffix */
1845 
1846     for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1847 	if (!strcmp(convptr->name, name))
1848 	    break;
1849     if (i == 0)
1850 	return -1;
1851 
1852     src = (unsigned char const *) *from;
1853     srcend = src + *from_left;
1854     dst = (unsigned char *) *to;
1855     dstend = dst + *to_left;
1856     unconv_num = 0;
1857 
1858     while (src < srcend) {
1859 	ucs4_t wc;
1860 	int consumed;
1861 	int count;
1862 
1863 	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1864 	if (consumed == RET_ILSEQ)
1865 	    return -1;
1866 	if (consumed == RET_TOOFEW(0))
1867 	    break;
1868 
1869     /* Use stdc iconv to convert widechar -> multibyte */
1870 
1871 	count = wctomb((char *)dst, wc);
1872 	if (count == 0)
1873 	    break;
1874 	if (count == -1) {
1875 	    count = wctomb((char *)dst, BAD_WCHAR);
1876 	    if (count == 0)
1877 		break;
1878 	    unconv_num++;
1879 	}
1880 	src += consumed;
1881 	dst += count;
1882     }
1883 
1884     *from = (XPointer) src;
1885     *from_left = srcend - src;
1886     *to = (XPointer) dst;
1887     *to_left = dstend - dst;
1888 
1889     return unconv_num;
1890 
1891 }
1892 
1893 static XlcConvMethodsRec iconv_cstombs_methods = {
1894     close_converter,
1895     iconv_cstombs,
1896     NULL
1897 };
1898 
1899 static XlcConv
open_iconv_cstombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1900 open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1901 {
1902     lazy_init_all_charsets();
1903     return create_conv(from_lcd, &iconv_cstombs_methods);
1904 }
1905 
1906 static int
iconv_mbstocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1907 iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1908 	      XPointer *to, int *to_left, XPointer *args, int num_args)
1909 {
1910     Utf8Conv *preferred_charsets;
1911     XlcCharSet last_charset = NULL;
1912     unsigned char const *src;
1913     unsigned char const *srcend;
1914     unsigned char *dst;
1915     unsigned char *dstend;
1916     int unconv_num;
1917 
1918     if (from == NULL || *from == NULL)
1919 	return 0;
1920 
1921     preferred_charsets = (Utf8Conv *) conv->state;
1922     src = (unsigned char const *) *from;
1923     srcend = src + *from_left;
1924     dst = (unsigned char *) *to;
1925     dstend = dst + *to_left;
1926     unconv_num = 0;
1927 
1928     while (src < srcend && dst < dstend) {
1929 	Utf8Conv chosen_charset = NULL;
1930 	XlcSide chosen_side = XlcNONE;
1931 	wchar_t wc;
1932 	int consumed;
1933 	int count;
1934 
1935     /* Uses stdc iconv to convert multibyte -> widechar */
1936 
1937 	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
1938 	if (consumed == 0)
1939 	    break;
1940 	if (consumed == -1) {
1941 	    src++;
1942 	    unconv_num++;
1943 	    continue;
1944 	}
1945 
1946 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1947 
1948 	if (count == RET_TOOSMALL)
1949 	    break;
1950 	if (count == RET_ILSEQ) {
1951 	    src += consumed;
1952 	    unconv_num++;
1953 	    continue;
1954 	}
1955 
1956 	if (last_charset == NULL) {
1957 	    last_charset =
1958 	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1959 	    if (last_charset == NULL) {
1960 		src += consumed;
1961 		unconv_num++;
1962 		continue;
1963 	    }
1964 	} else {
1965 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1966 	          && (last_charset->side == XlcGLGR
1967 	              || last_charset->side == chosen_side)))
1968 		break;
1969 	}
1970 	src += consumed;
1971 	dst += count;
1972     }
1973 
1974     if (last_charset == NULL)
1975 	return -1;
1976 
1977     *from = (XPointer) src;
1978     *from_left = srcend - src;
1979     *to = (XPointer) dst;
1980     *to_left = dstend - dst;
1981 
1982     if (num_args >= 1)
1983 	*((XlcCharSet *)args[0]) = last_charset;
1984 
1985     return unconv_num;
1986 }
1987 
1988 static XlcConvMethodsRec iconv_mbstocs_methods = {
1989     close_tocs_converter,
1990     iconv_mbstocs,
1991     NULL
1992 };
1993 
1994 static XlcConv
open_iconv_mbstocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1995 open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1996 {
1997     return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1998 }
1999 
2000 /* from XlcNMultiByte to XlcNChar */
2001 
2002 static int
iconv_mbtocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2003 iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
2004 	     XPointer *to, int *to_left, XPointer *args, int num_args)
2005 {
2006     Utf8Conv *preferred_charsets;
2007     XlcCharSet last_charset = NULL;
2008     unsigned char const *src;
2009     unsigned char const *srcend;
2010     unsigned char *dst;
2011     unsigned char *dstend;
2012     int unconv_num;
2013 
2014     if (from == NULL || *from == NULL)
2015 	return 0;
2016 
2017     preferred_charsets = (Utf8Conv *) conv->state;
2018     src = (unsigned char const *) *from;
2019     srcend = src + *from_left;
2020     dst = (unsigned char *) *to;
2021     dstend = dst + *to_left;
2022     unconv_num = 0;
2023 
2024     while (src < srcend && dst < dstend) {
2025 	Utf8Conv chosen_charset = NULL;
2026 	XlcSide chosen_side = XlcNONE;
2027 	wchar_t wc;
2028 	int consumed;
2029 	int count;
2030 
2031     /* Uses stdc iconv to convert multibyte -> widechar */
2032 
2033 	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2034 	if (consumed == 0)
2035 	    break;
2036 	if (consumed == -1) {
2037 	    src++;
2038 	    unconv_num++;
2039 	    continue;
2040 	}
2041 
2042 	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2043 	if (count == RET_TOOSMALL)
2044 	    break;
2045 	if (count == RET_ILSEQ) {
2046 	    src += consumed;
2047 	    unconv_num++;
2048 	    continue;
2049 	}
2050 
2051 	if (last_charset == NULL) {
2052 	    last_charset =
2053 		_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2054 	    if (last_charset == NULL) {
2055 		src += consumed;
2056 		unconv_num++;
2057 		continue;
2058 	    }
2059 	} else {
2060 	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2061 		  && (last_charset->side == XlcGLGR
2062 		      || last_charset->side == chosen_side)))
2063 		break;
2064 	}
2065 	src += consumed;
2066 	dst += count;
2067     }
2068 
2069     if (last_charset == NULL)
2070 	return -1;
2071 
2072     *from = (XPointer) src;
2073     *from_left = srcend - src;
2074     *to = (XPointer) dst;
2075     *to_left = dstend - dst;
2076 
2077     if (num_args >= 1)
2078 	*((XlcCharSet *)args[0]) = last_charset;
2079 
2080     return unconv_num;
2081 }
2082 
2083 static XlcConvMethodsRec iconv_mbtocs_methods = {
2084     close_tocs_converter,
2085     iconv_mbtocs,
2086     NULL
2087 };
2088 
2089 static XlcConv
open_iconv_mbtocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2090 open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2091 {
2092     return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2093 }
2094 
2095 /* from XlcNMultiByte to XlcNString */
2096 
2097 static int
iconv_mbstostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2098 iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2099 	       XPointer *to, int *to_left, XPointer *args, int num_args)
2100 {
2101     unsigned char const *src;
2102     unsigned char const *srcend;
2103     unsigned char *dst;
2104     unsigned char *dstend;
2105     int unconv_num;
2106 
2107     if (from == NULL || *from == NULL)
2108 	return 0;
2109 
2110     src = (unsigned char const *) *from;
2111     srcend = src + *from_left;
2112     dst = (unsigned char *) *to;
2113     dstend = dst + *to_left;
2114     unconv_num = 0;
2115 
2116     while (src < srcend) {
2117 	unsigned char c;
2118 	wchar_t wc;
2119 	int consumed;
2120 
2121     /* Uses stdc iconv to convert multibyte -> widechar */
2122 
2123 	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2124 	if (consumed == 0)
2125 	    break;
2126 	if (dst == dstend)
2127 	    break;
2128 	if (consumed == -1) {
2129 	    consumed = 1;
2130 	    c = BAD_CHAR;
2131 	    unconv_num++;
2132 	} else {
2133 	    if ((wc & ~(wchar_t)0xff) != 0) {
2134 		c = BAD_CHAR;
2135 		unconv_num++;
2136 	    } else
2137 		c = (unsigned char) wc;
2138 	}
2139 	*dst++ = c;
2140 	src += consumed;
2141     }
2142 
2143     *from = (XPointer) src;
2144     *from_left = srcend - src;
2145     *to = (XPointer) dst;
2146     *to_left = dstend - dst;
2147 
2148     return unconv_num;
2149 }
2150 
2151 static XlcConvMethodsRec iconv_mbstostr_methods = {
2152     close_converter,
2153     iconv_mbstostr,
2154     NULL
2155 };
2156 
2157 static XlcConv
open_iconv_mbstostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2158 open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2159 {
2160     return create_conv(from_lcd, &iconv_mbstostr_methods);
2161 }
2162 
2163 /* from XlcNString to XlcNMultiByte */
2164 static int
iconv_strtombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2165 iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2166 	       XPointer *to, int *to_left, XPointer *args, int num_args)
2167 {
2168     unsigned char const *src;
2169     unsigned char const *srcend;
2170     unsigned char *dst;
2171     unsigned char *dstend;
2172 
2173     if (from == NULL || *from == NULL)
2174 	return 0;
2175 
2176     src = (unsigned char const *) *from;
2177     srcend = src + *from_left;
2178     dst = (unsigned char *) *to;
2179     dstend = dst + *to_left;
2180 
2181     while (src < srcend) {
2182 	int count = wctomb((char *)dst, *src);
2183 	if (count < 0)
2184 	    break;
2185 	dst += count;
2186 	src++;
2187     }
2188 
2189     *from = (XPointer) src;
2190     *from_left = srcend - src;
2191     *to = (XPointer) dst;
2192     *to_left = dstend - dst;
2193 
2194     return 0;
2195 }
2196 
2197 static XlcConvMethodsRec iconv_strtombs_methods= {
2198     close_converter,
2199     iconv_strtombs,
2200     NULL
2201 };
2202 
2203 static XlcConv
open_iconv_strtombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2204 open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2205 {
2206     return create_conv(from_lcd, &iconv_strtombs_methods);
2207 }
2208 
2209 /***************************************************************************/
2210 /* Part II: An iconv locale loader.
2211  *
2212  *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2213  */
2214 
2215 /* from XlcNMultiByte to XlcNWideChar */
2216 static int
iconv_mbstowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2217 iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2218 	       XPointer *to, int *to_left, XPointer *args,  int num_args)
2219 {
2220     char *src = *((char **) from);
2221     wchar_t *dst = *((wchar_t **) to);
2222     int src_left = *from_left;
2223     int dst_left = *to_left;
2224     int length, unconv_num = 0;
2225 
2226     while (src_left > 0 && dst_left > 0) {
2227 	length = mbtowc(dst, src, (size_t) src_left);
2228 
2229 	if (length > 0) {
2230 	    src += length;
2231 	    src_left -= length;
2232 	    if (dst)
2233 	        dst++;
2234 	    dst_left--;
2235 	} else if (length < 0) {
2236 	    src++;
2237 	    src_left--;
2238 	    unconv_num++;
2239         } else {
2240             /* null ? */
2241             src++;
2242             src_left--;
2243             if (dst)
2244                 *dst++ = L'\0';
2245             dst_left--;
2246         }
2247     }
2248 
2249     *from = (XPointer) src;
2250     if (dst)
2251 	*to = (XPointer) dst;
2252     *from_left = src_left;
2253     *to_left = dst_left;
2254 
2255     return unconv_num;
2256 }
2257 
2258 static XlcConvMethodsRec iconv_mbstowcs_methods = {
2259     close_converter,
2260     iconv_mbstowcs,
2261     NULL
2262 } ;
2263 
2264 static XlcConv
open_iconv_mbstowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2265 open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2266 {
2267     return create_conv(from_lcd, &iconv_mbstowcs_methods);
2268 }
2269 
2270 static int
iconv_wcstombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2271 iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2272 	       XPointer *to, int *to_left, XPointer *args, int num_args)
2273 {
2274     wchar_t *src = *((wchar_t **) from);
2275     char *dst = *((char **) to);
2276     int src_left = *from_left;
2277     int dst_left = *to_left;
2278     int length, unconv_num = 0;
2279 
2280     while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2281 	length = wctomb(dst, *src);		/* XXX */
2282 
2283         if (length > 0) {
2284 	    src++;
2285 	    src_left--;
2286 	    if (dst)
2287 		dst += length;
2288 	    dst_left -= length;
2289 	} else if (length < 0) {
2290 	    src++;
2291 	    src_left--;
2292 	    unconv_num++;
2293 	}
2294     }
2295 
2296     *from = (XPointer) src;
2297     if (dst)
2298       *to = (XPointer) dst;
2299     *from_left = src_left;
2300     *to_left = dst_left;
2301 
2302     return unconv_num;
2303 }
2304 
2305 static XlcConvMethodsRec iconv_wcstombs_methods = {
2306     close_converter,
2307     iconv_wcstombs,
2308     NULL
2309 } ;
2310 
2311 static XlcConv
open_iconv_wcstombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2312 open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2313 {
2314     return create_conv(from_lcd, &iconv_wcstombs_methods);
2315 }
2316 
2317 static XlcConv
open_iconv_mbstofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2318 open_iconv_mbstofcs(
2319     XLCd from_lcd,
2320     const char *from_type,
2321     XLCd to_lcd,
2322     const char *to_type)
2323 {
2324     return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2325 }
2326 
2327 /* Registers UTF-8 converters for a UTF-8 locale. */
2328 
2329 void
_XlcAddUtf8LocaleConverters(XLCd lcd)2330 _XlcAddUtf8LocaleConverters(
2331     XLCd lcd)
2332 {
2333     /* Register elementary converters. */
2334 
2335     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2336 
2337     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2338     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2339 
2340     _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2341 
2342     /* Register converters for XlcNCharSet. This implicitly provides
2343      * converters from and to XlcNCompoundText. */
2344 
2345     _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2346     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2347     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2348 
2349     _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2350     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2351     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2352 
2353     _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2354     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2355     _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2356     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2357 
2358     /* Register converters for XlcNFontCharSet */
2359     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2360     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2361 }
2362 
2363 void
_XlcAddGB18030LocaleConverters(XLCd lcd)2364 _XlcAddGB18030LocaleConverters(
2365     XLCd lcd)
2366 {
2367 
2368     /* Register elementary converters. */
2369     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2370     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2371 
2372     /* Register converters for XlcNCharSet. This implicitly provides
2373      * converters from and to XlcNCompoundText. */
2374 
2375     _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2376     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2377     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2378     _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2379     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2380 
2381     /* Register converters for XlcNFontCharSet */
2382     _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2383 
2384     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2385     _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2386     _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2387     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2388     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2389 
2390     /* Register converters for XlcNFontCharSet */
2391     _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2392 }
2393