1 /******************************************************************
2
3 Copyright 1993 by SunSoft, Inc.
4 Copyright 1999-2000 by Bruno Haible
5
6 Permission to use, copy, modify, distribute, and sell this software
7 and its documentation for any purpose is hereby granted without fee,
8 provided that the above copyright notice appear in all copies and
9 that both that copyright notice and this permission notice appear
10 in supporting documentation, and that the names of SunSoft, Inc. and
11 Bruno Haible not be used in advertising or publicity pertaining to
12 distribution of the software without specific, written prior
13 permission. SunSoft, Inc. and Bruno Haible make no representations
14 about the suitability of this software for any purpose. It is
15 provided "as is" without express or implied warranty.
16
17 SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
18 TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
19 AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
20 FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
22 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
23 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24
25 ******************************************************************/
26
27 /*
28 * This file contains:
29 *
30 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
31 *
32 * Used for three purposes:
33 * 1. The UTF-8 locales, see below.
34 * 2. Unicode aware applications for which the use of 8-bit character
35 * sets is an anachronism.
36 * 3. For conversion from keysym to locale encoding.
37 *
38 * II. Conversion files for an UTF-8 locale loader.
39 * Supports: all locales with codeset UTF-8.
40 * How: Provides converters for UTF-8.
41 * Platforms: all systems.
42 *
43 * The loader itself is located in lcUTF8.c.
44 */
45
46 /*
47 * The conversion from UTF-8 to CompoundText is realized in a very
48 * conservative way. Recall that CompoundText data is used for inter-client
49 * communication purposes. We distinguish three classes of clients:
50 * - Clients which accept only those pieces of CompoundText which belong to
51 * the character set understood by the current locale.
52 * (Example: clients which are linked to an older X11 library.)
53 * - Clients which accept CompoundText with multiple character sets and parse
54 * it themselves.
55 * (Example: emacs, xemacs.)
56 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
57 * functions for the conversion of CompoundText to their current locale's
58 * multi-byte/wide-character format.
59 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
60 * follows. For every character, it first tests whether the character is
61 * representable in the current locale's original (non-UTF-8) character set.
62 * If not, it goes through the list of predefined character sets for
63 * CompoundText and tests if the character is representable in that character
64 * set. If so, it encodes the character using its code within that character
65 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
66 * clients of the first and second kind ignore such encapsulated text,
67 * this encapsulation is kept to a minimum and terminated as early as possible.
68 *
69 * In a distant future, when clients of the first and second kind will have
70 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
71 * without first going through the list of predefined character sets.
72 */
73
74 #ifdef HAVE_CONFIG_H
75 #include <config.h>
76 #endif
77 #include <stdio.h>
78 #include "Xlibint.h"
79 #include "XlcPubI.h"
80 #include "XlcGeneric.h"
81
82 static XlcConv
create_conv(XLCd lcd,XlcConvMethods methods)83 create_conv(
84 XLCd lcd,
85 XlcConvMethods methods)
86 {
87 XlcConv conv;
88
89 conv = Xmalloc(sizeof(XlcConvRec));
90 if (conv == (XlcConv) NULL)
91 return (XlcConv) NULL;
92
93 conv->methods = methods;
94 conv->state = NULL;
95
96 return conv;
97 }
98
99 static void
close_converter(XlcConv conv)100 close_converter(
101 XlcConv conv)
102 {
103 Xfree(conv);
104 }
105
106 /* Replacement character for invalid multibyte sequence or wide character. */
107 #define BAD_WCHAR ((ucs4_t) 0xfffd)
108 #define BAD_CHAR '?'
109
110 /***************************************************************************/
111 /* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
112 *
113 * Note that this code works in any locale. We store Unicode values in
114 * `ucs4_t' variables, but don't pass them to the user.
115 *
116 * This code has to support all character sets that are used for CompoundText,
117 * nothing more, nothing less. See the table in lcCT.c.
118 * Since the conversion _to_ CompoundText is likely to need the tables for all
119 * character sets at once, we don't use dynamic loading (of tables or shared
120 * libraries through iconv()). Use a fixed set of tables instead.
121 *
122 * We use statically computed tables, not dynamically allocated arrays,
123 * because it's more memory efficient: Different processes using the same
124 * libX11 shared library share the "text" and read-only "data" sections.
125 */
126
127 typedef unsigned int ucs4_t;
128 #define conv_t XlcConv
129
130 typedef struct _Utf8ConvRec {
131 const char *name;
132 XrmQuark xrm_name;
133 int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
134 int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
135 } Utf8ConvRec, *Utf8Conv;
136
137 /*
138 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
139 * converts the byte sequence starting at s to a wide character. Up to n bytes
140 * are available at s. n is >= 1.
141 * Result is number of bytes consumed (if a wide character was read),
142 * or 0 if invalid, or -1 if n too small.
143 *
144 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
145 * converts the wide character wc to the character set xxx, and stores the
146 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
147 * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
148 */
149
150 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
151 #define RET_ILSEQ 0
152 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
153 #define RET_TOOFEW(n) (-1-(n))
154 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
155 #define RET_TOOSMALL -1
156
157 /*
158 * The tables below are bijective. It would be possible to extend the
159 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
160 * but *only* with characters not contained in any other table, and *only*
161 * when the current locale is not an UTF-8 locale.
162 */
163
164 #include "lcUniConv/utf8.h"
165 #include "lcUniConv/ucs2be.h"
166 #ifdef notused
167 #include "lcUniConv/ascii.h"
168 #endif
169 #include "lcUniConv/iso8859_1.h"
170 #include "lcUniConv/iso8859_2.h"
171 #include "lcUniConv/iso8859_3.h"
172 #include "lcUniConv/iso8859_4.h"
173 #include "lcUniConv/iso8859_5.h"
174 #include "lcUniConv/iso8859_6.h"
175 #include "lcUniConv/iso8859_7.h"
176 #include "lcUniConv/iso8859_8.h"
177 #include "lcUniConv/iso8859_9.h"
178 #include "lcUniConv/iso8859_10.h"
179 #include "lcUniConv/iso8859_11.h"
180 #include "lcUniConv/iso8859_13.h"
181 #include "lcUniConv/iso8859_14.h"
182 #include "lcUniConv/iso8859_15.h"
183 #include "lcUniConv/iso8859_16.h"
184 #include "lcUniConv/iso8859_9e.h"
185 #include "lcUniConv/jisx0201.h"
186 #include "lcUniConv/tis620.h"
187 #include "lcUniConv/koi8_r.h"
188 #include "lcUniConv/koi8_u.h"
189 #include "lcUniConv/koi8_c.h"
190 #include "lcUniConv/armscii_8.h"
191 #include "lcUniConv/cp1133.h"
192 #include "lcUniConv/mulelao.h"
193 #include "lcUniConv/viscii.h"
194 #include "lcUniConv/tcvn.h"
195 #include "lcUniConv/georgian_academy.h"
196 #include "lcUniConv/georgian_ps.h"
197 #include "lcUniConv/cp1251.h"
198 #include "lcUniConv/cp1255.h"
199 #include "lcUniConv/cp1256.h"
200 #include "lcUniConv/tatar_cyr.h"
201
202 typedef struct {
203 unsigned short indx; /* index into big table */
204 unsigned short used; /* bitmask of used entries */
205 } Summary16;
206
207 #include "lcUniConv/gb2312.h"
208 #include "lcUniConv/jisx0208.h"
209 #include "lcUniConv/jisx0212.h"
210 #include "lcUniConv/ksc5601.h"
211 #include "lcUniConv/big5.h"
212 #include "lcUniConv/big5_emacs.h"
213 #include "lcUniConv/big5hkscs.h"
214 #include "lcUniConv/gbk.h"
215
216 static Utf8ConvRec all_charsets[] = {
217 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
218 (for lookup speed), once at the end (as a fallback). */
219 { "ISO10646-1", NULLQUARK,
220 utf8_mbtowc, utf8_wctomb
221 },
222
223 { "ISO8859-1", NULLQUARK,
224 iso8859_1_mbtowc, iso8859_1_wctomb
225 },
226 { "ISO8859-2", NULLQUARK,
227 iso8859_2_mbtowc, iso8859_2_wctomb
228 },
229 { "ISO8859-3", NULLQUARK,
230 iso8859_3_mbtowc, iso8859_3_wctomb
231 },
232 { "ISO8859-4", NULLQUARK,
233 iso8859_4_mbtowc, iso8859_4_wctomb
234 },
235 { "ISO8859-5", NULLQUARK,
236 iso8859_5_mbtowc, iso8859_5_wctomb
237 },
238 { "ISO8859-6", NULLQUARK,
239 iso8859_6_mbtowc, iso8859_6_wctomb
240 },
241 { "ISO8859-7", NULLQUARK,
242 iso8859_7_mbtowc, iso8859_7_wctomb
243 },
244 { "ISO8859-8", NULLQUARK,
245 iso8859_8_mbtowc, iso8859_8_wctomb
246 },
247 { "ISO8859-9", NULLQUARK,
248 iso8859_9_mbtowc, iso8859_9_wctomb
249 },
250 { "ISO8859-10", NULLQUARK,
251 iso8859_10_mbtowc, iso8859_10_wctomb
252 },
253 { "ISO8859-11", NULLQUARK,
254 iso8859_11_mbtowc, iso8859_11_wctomb
255 },
256 { "ISO8859-13", NULLQUARK,
257 iso8859_13_mbtowc, iso8859_13_wctomb
258 },
259 { "ISO8859-14", NULLQUARK,
260 iso8859_14_mbtowc, iso8859_14_wctomb
261 },
262 { "ISO8859-15", NULLQUARK,
263 iso8859_15_mbtowc, iso8859_15_wctomb
264 },
265 { "ISO8859-16", NULLQUARK,
266 iso8859_16_mbtowc, iso8859_16_wctomb
267 },
268 { "JISX0201.1976-0", NULLQUARK,
269 jisx0201_mbtowc, jisx0201_wctomb
270 },
271 { "TIS620-0", NULLQUARK,
272 tis620_mbtowc, tis620_wctomb
273 },
274 { "GB2312.1980-0", NULLQUARK,
275 gb2312_mbtowc, gb2312_wctomb
276 },
277 { "JISX0208.1983-0", NULLQUARK,
278 jisx0208_mbtowc, jisx0208_wctomb
279 },
280 { "JISX0208.1990-0", NULLQUARK,
281 jisx0208_mbtowc, jisx0208_wctomb
282 },
283 { "JISX0212.1990-0", NULLQUARK,
284 jisx0212_mbtowc, jisx0212_wctomb
285 },
286 { "KSC5601.1987-0", NULLQUARK,
287 ksc5601_mbtowc, ksc5601_wctomb
288 },
289 { "KOI8-R", NULLQUARK,
290 koi8_r_mbtowc, koi8_r_wctomb
291 },
292 { "KOI8-U", NULLQUARK,
293 koi8_u_mbtowc, koi8_u_wctomb
294 },
295 { "KOI8-C", NULLQUARK,
296 koi8_c_mbtowc, koi8_c_wctomb
297 },
298 { "TATAR-CYR", NULLQUARK,
299 tatar_cyr_mbtowc, tatar_cyr_wctomb
300 },
301 { "ARMSCII-8", NULLQUARK,
302 armscii_8_mbtowc, armscii_8_wctomb
303 },
304 { "IBM-CP1133", NULLQUARK,
305 cp1133_mbtowc, cp1133_wctomb
306 },
307 { "MULELAO-1", NULLQUARK,
308 mulelao_mbtowc, mulelao_wctomb
309 },
310 { "VISCII1.1-1", NULLQUARK,
311 viscii_mbtowc, viscii_wctomb
312 },
313 { "TCVN-5712", NULLQUARK,
314 tcvn_mbtowc, tcvn_wctomb
315 },
316 { "GEORGIAN-ACADEMY", NULLQUARK,
317 georgian_academy_mbtowc, georgian_academy_wctomb
318 },
319 { "GEORGIAN-PS", NULLQUARK,
320 georgian_ps_mbtowc, georgian_ps_wctomb
321 },
322 { "ISO8859-9E", NULLQUARK,
323 iso8859_9e_mbtowc, iso8859_9e_wctomb
324 },
325 { "MICROSOFT-CP1251", NULLQUARK,
326 cp1251_mbtowc, cp1251_wctomb
327 },
328 { "MICROSOFT-CP1255", NULLQUARK,
329 cp1255_mbtowc, cp1255_wctomb
330 },
331 { "MICROSOFT-CP1256", NULLQUARK,
332 cp1256_mbtowc, cp1256_wctomb
333 },
334 { "BIG5-0", NULLQUARK,
335 big5_mbtowc, big5_wctomb
336 },
337 { "BIG5-E0", NULLQUARK,
338 big5_0_mbtowc, big5_0_wctomb
339 },
340 { "BIG5-E1", NULLQUARK,
341 big5_1_mbtowc, big5_1_wctomb
342 },
343 { "GBK-0", NULLQUARK,
344 gbk_mbtowc, gbk_wctomb
345 },
346 { "BIG5HKSCS-0", NULLQUARK,
347 big5hkscs_mbtowc, big5hkscs_wctomb
348 },
349
350 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
351 (for lookup speed), once at the end (as a fallback). */
352 { "ISO10646-1", NULLQUARK,
353 utf8_mbtowc, utf8_wctomb
354 },
355
356 /* Encoding ISO10646-1 for fonts means UCS2-like encoding
357 so for conversion to FontCharSet we need this record */
358 { "ISO10646-1", NULLQUARK,
359 ucs2be_mbtowc, ucs2be_wctomb
360 }
361 };
362
363 #define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
364 #define all_charsets_count (charsets_table_size - 1)
365 #define ucs2_conv_index (charsets_table_size - 1)
366
367 static void
init_all_charsets(void)368 init_all_charsets (void)
369 {
370 Utf8Conv convptr;
371 int i;
372
373 for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
374 convptr->xrm_name = XrmStringToQuark(convptr->name);
375 }
376
377 #define lazy_init_all_charsets() \
378 do { \
379 if (all_charsets[0].xrm_name == NULLQUARK) \
380 init_all_charsets(); \
381 } while (0)
382
383 /* from XlcNCharSet to XlcNUtf8String */
384
385 static int
cstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)386 cstoutf8(
387 XlcConv conv,
388 XPointer *from,
389 int *from_left,
390 XPointer *to,
391 int *to_left,
392 XPointer *args,
393 int num_args)
394 {
395 XlcCharSet charset;
396 const char *name;
397 Utf8Conv convptr;
398 int i;
399 unsigned char const *src;
400 unsigned char const *srcend;
401 unsigned char *dst;
402 unsigned char *dstend;
403 int unconv_num;
404
405 if (from == NULL || *from == NULL)
406 return 0;
407
408 if (num_args < 1)
409 return -1;
410
411 charset = (XlcCharSet) args[0];
412 name = charset->encoding_name;
413 /* not charset->name because the latter has a ":GL"/":GR" suffix */
414
415 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
416 if (!strcmp(convptr->name, name))
417 break;
418 if (i == 0)
419 return -1;
420
421 src = (unsigned char const *) *from;
422 srcend = src + *from_left;
423 dst = (unsigned char *) *to;
424 dstend = dst + *to_left;
425 unconv_num = 0;
426
427 while (src < srcend) {
428 ucs4_t wc;
429 int consumed;
430 int count;
431
432 consumed = convptr->cstowc(conv, &wc, src, srcend-src);
433 if (consumed == RET_ILSEQ)
434 return -1;
435 if (consumed == RET_TOOFEW(0))
436 break;
437
438 count = utf8_wctomb(NULL, dst, wc, dstend-dst);
439 if (count == RET_TOOSMALL)
440 break;
441 if (count == RET_ILSEQ) {
442 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
443 if (count == RET_TOOSMALL)
444 break;
445 unconv_num++;
446 }
447 src += consumed;
448 dst += count;
449 }
450
451 *from = (XPointer) src;
452 *from_left = srcend - src;
453 *to = (XPointer) dst;
454 *to_left = dstend - dst;
455
456 return unconv_num;
457 }
458
459 static XlcConvMethodsRec methods_cstoutf8 = {
460 close_converter,
461 cstoutf8,
462 NULL
463 };
464
465 static XlcConv
open_cstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)466 open_cstoutf8(
467 XLCd from_lcd,
468 const char *from_type,
469 XLCd to_lcd,
470 const char *to_type)
471 {
472 lazy_init_all_charsets();
473 return create_conv(from_lcd, &methods_cstoutf8);
474 }
475
476 /* from XlcNUtf8String to XlcNCharSet */
477
478 static XlcConv
create_tocs_conv(XLCd lcd,XlcConvMethods methods)479 create_tocs_conv(
480 XLCd lcd,
481 XlcConvMethods methods)
482 {
483 XlcConv conv;
484 CodeSet *codeset_list;
485 int codeset_num;
486 int charset_num;
487 int i, j, k;
488 Utf8Conv *preferred;
489
490 lazy_init_all_charsets();
491
492 codeset_list = XLC_GENERIC(lcd, codeset_list);
493 codeset_num = XLC_GENERIC(lcd, codeset_num);
494
495 charset_num = 0;
496 for (i = 0; i < codeset_num; i++)
497 charset_num += codeset_list[i]->num_charsets;
498 if (charset_num > all_charsets_count-1)
499 charset_num = all_charsets_count-1;
500
501 conv = Xmalloc(sizeof(XlcConvRec)
502 + (charset_num + 1) * sizeof(Utf8Conv));
503 if (conv == (XlcConv) NULL)
504 return (XlcConv) NULL;
505 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
506
507 /* Loop through all codesets mentioned in the locale. */
508 charset_num = 0;
509 for (i = 0; i < codeset_num; i++) {
510 XlcCharSet *charsets = codeset_list[i]->charset_list;
511 int num_charsets = codeset_list[i]->num_charsets;
512 for (j = 0; j < num_charsets; j++) {
513 const char *name = charsets[j]->encoding_name;
514 /* If it wasn't already encountered... */
515 for (k = charset_num-1; k >= 0; k--)
516 if (!strcmp(preferred[k]->name, name))
517 break;
518 if (k < 0) {
519 /* Look it up in all_charsets[]. */
520 for (k = 0; k < all_charsets_count-1; k++)
521 if (!strcmp(all_charsets[k].name, name)) {
522 /* Add it to the preferred set. */
523 preferred[charset_num++] = &all_charsets[k];
524 break;
525 }
526 }
527 }
528 }
529 preferred[charset_num] = (Utf8Conv) NULL;
530
531 conv->methods = methods;
532 conv->state = (XPointer) preferred;
533
534 return conv;
535 }
536
537 static void
close_tocs_converter(XlcConv conv)538 close_tocs_converter(
539 XlcConv conv)
540 {
541 /* conv->state is allocated together with conv, free both at once. */
542 Xfree(conv);
543 }
544
545 /*
546 * Converts a Unicode character to an appropriate character set. The NULL
547 * terminated array of preferred character sets is passed as first argument.
548 * If successful, *charsetp is set to the character set that was used, and
549 * *sidep is set to the character set side (XlcGL or XlcGR).
550 */
551 static int
charset_wctocs(Utf8Conv * preferred,Utf8Conv * charsetp,XlcSide * sidep,XlcConv conv,unsigned char * r,ucs4_t wc,int n)552 charset_wctocs(
553 Utf8Conv *preferred,
554 Utf8Conv *charsetp,
555 XlcSide *sidep,
556 XlcConv conv,
557 unsigned char *r,
558 ucs4_t wc,
559 int n)
560 {
561 int count;
562 Utf8Conv convptr;
563 int i;
564
565 for (; *preferred != (Utf8Conv) NULL; preferred++) {
566 convptr = *preferred;
567 count = convptr->wctocs(conv, r, wc, n);
568 if (count == RET_TOOSMALL)
569 return RET_TOOSMALL;
570 if (count != RET_ILSEQ) {
571 *charsetp = convptr;
572 *sidep = (*r < 0x80 ? XlcGL : XlcGR);
573 return count;
574 }
575 }
576 for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
577 count = convptr->wctocs(conv, r, wc, n);
578 if (count == RET_TOOSMALL)
579 return RET_TOOSMALL;
580 if (count != RET_ILSEQ) {
581 *charsetp = convptr;
582 *sidep = (*r < 0x80 ? XlcGL : XlcGR);
583 return count;
584 }
585 }
586 return RET_ILSEQ;
587 }
588
589 static int
utf8tocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)590 utf8tocs(
591 XlcConv conv,
592 XPointer *from,
593 int *from_left,
594 XPointer *to,
595 int *to_left,
596 XPointer *args,
597 int num_args)
598 {
599 Utf8Conv *preferred_charsets;
600 XlcCharSet last_charset = NULL;
601 unsigned char const *src;
602 unsigned char const *srcend;
603 unsigned char *dst;
604 unsigned char *dstend;
605 int unconv_num;
606
607 if (from == NULL || *from == NULL)
608 return 0;
609
610 preferred_charsets = (Utf8Conv *) conv->state;
611 src = (unsigned char const *) *from;
612 srcend = src + *from_left;
613 dst = (unsigned char *) *to;
614 dstend = dst + *to_left;
615 unconv_num = 0;
616
617 while (src < srcend && dst < dstend) {
618 Utf8Conv chosen_charset = NULL;
619 XlcSide chosen_side = XlcNONE;
620 ucs4_t wc;
621 int consumed;
622 int count;
623
624 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
625 if (consumed == RET_TOOFEW(0))
626 break;
627 if (consumed == RET_ILSEQ) {
628 src++;
629 unconv_num++;
630 continue;
631 }
632
633 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
634 if (count == RET_TOOSMALL)
635 break;
636 if (count == RET_ILSEQ) {
637 src += consumed;
638 unconv_num++;
639 continue;
640 }
641
642 if (last_charset == NULL) {
643 last_charset =
644 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
645 if (last_charset == NULL) {
646 src += consumed;
647 unconv_num++;
648 continue;
649 }
650 } else {
651 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
652 && (last_charset->side == XlcGLGR
653 || last_charset->side == chosen_side)))
654 break;
655 }
656 src += consumed;
657 dst += count;
658 }
659
660 if (last_charset == NULL)
661 return -1;
662
663 *from = (XPointer) src;
664 *from_left = srcend - src;
665 *to = (XPointer) dst;
666 *to_left = dstend - dst;
667
668 if (num_args >= 1)
669 *((XlcCharSet *)args[0]) = last_charset;
670
671 return unconv_num;
672 }
673
674 static XlcConvMethodsRec methods_utf8tocs = {
675 close_tocs_converter,
676 utf8tocs,
677 NULL
678 };
679
680 static XlcConv
open_utf8tocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)681 open_utf8tocs(
682 XLCd from_lcd,
683 const char *from_type,
684 XLCd to_lcd,
685 const char *to_type)
686 {
687 return create_tocs_conv(from_lcd, &methods_utf8tocs);
688 }
689
690 /* from XlcNUtf8String to XlcNChar */
691
692 static int
utf8tocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)693 utf8tocs1(
694 XlcConv conv,
695 XPointer *from,
696 int *from_left,
697 XPointer *to,
698 int *to_left,
699 XPointer *args,
700 int num_args)
701 {
702 Utf8Conv *preferred_charsets;
703 XlcCharSet last_charset = NULL;
704 unsigned char const *src;
705 unsigned char const *srcend;
706 unsigned char *dst;
707 unsigned char *dstend;
708 int unconv_num;
709
710 if (from == NULL || *from == NULL)
711 return 0;
712
713 preferred_charsets = (Utf8Conv *) conv->state;
714 src = (unsigned char const *) *from;
715 srcend = src + *from_left;
716 dst = (unsigned char *) *to;
717 dstend = dst + *to_left;
718 unconv_num = 0;
719
720 while (src < srcend && dst < dstend) {
721 Utf8Conv chosen_charset = NULL;
722 XlcSide chosen_side = XlcNONE;
723 ucs4_t wc;
724 int consumed;
725 int count;
726
727 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
728 if (consumed == RET_TOOFEW(0))
729 break;
730 if (consumed == RET_ILSEQ) {
731 src++;
732 unconv_num++;
733 continue;
734 }
735
736 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
737 if (count == RET_TOOSMALL)
738 break;
739 if (count == RET_ILSEQ) {
740 src += consumed;
741 unconv_num++;
742 continue;
743 }
744
745 if (last_charset == NULL) {
746 last_charset =
747 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
748 if (last_charset == NULL) {
749 src += consumed;
750 unconv_num++;
751 continue;
752 }
753 } else {
754 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
755 && (last_charset->side == XlcGLGR
756 || last_charset->side == chosen_side)))
757 break;
758 }
759 src += consumed;
760 dst += count;
761 break;
762 }
763
764 if (last_charset == NULL)
765 return -1;
766
767 *from = (XPointer) src;
768 *from_left = srcend - src;
769 *to = (XPointer) dst;
770 *to_left = dstend - dst;
771
772 if (num_args >= 1)
773 *((XlcCharSet *)args[0]) = last_charset;
774
775 return unconv_num;
776 }
777
778 static XlcConvMethodsRec methods_utf8tocs1 = {
779 close_tocs_converter,
780 utf8tocs1,
781 NULL
782 };
783
784 static XlcConv
open_utf8tocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)785 open_utf8tocs1(
786 XLCd from_lcd,
787 const char *from_type,
788 XLCd to_lcd,
789 const char *to_type)
790 {
791 return create_tocs_conv(from_lcd, &methods_utf8tocs1);
792 }
793
794 /* from XlcNUtf8String to XlcNString */
795
796 static int
utf8tostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)797 utf8tostr(
798 XlcConv conv,
799 XPointer *from,
800 int *from_left,
801 XPointer *to,
802 int *to_left,
803 XPointer *args,
804 int num_args)
805 {
806 unsigned char const *src;
807 unsigned char const *srcend;
808 unsigned char *dst;
809 unsigned char *dstend;
810 int unconv_num;
811
812 if (from == NULL || *from == NULL)
813 return 0;
814
815 src = (unsigned char const *) *from;
816 srcend = src + *from_left;
817 dst = (unsigned char *) *to;
818 dstend = dst + *to_left;
819 unconv_num = 0;
820
821 while (src < srcend) {
822 unsigned char c;
823 ucs4_t wc;
824 int consumed;
825
826 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
827 if (consumed == RET_TOOFEW(0))
828 break;
829 if (dst == dstend)
830 break;
831 if (consumed == RET_ILSEQ) {
832 consumed = 1;
833 c = BAD_CHAR;
834 unconv_num++;
835 } else {
836 if ((wc & ~(ucs4_t)0xff) != 0) {
837 c = BAD_CHAR;
838 unconv_num++;
839 } else
840 c = (unsigned char) wc;
841 }
842 *dst++ = c;
843 src += consumed;
844 }
845
846 *from = (XPointer) src;
847 *from_left = srcend - src;
848 *to = (XPointer) dst;
849 *to_left = dstend - dst;
850
851 return unconv_num;
852 }
853
854 static XlcConvMethodsRec methods_utf8tostr = {
855 close_converter,
856 utf8tostr,
857 NULL
858 };
859
860 static XlcConv
open_utf8tostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)861 open_utf8tostr(
862 XLCd from_lcd,
863 const char *from_type,
864 XLCd to_lcd,
865 const char *to_type)
866 {
867 return create_conv(from_lcd, &methods_utf8tostr);
868 }
869
870 /* from XlcNString to XlcNUtf8String */
871
872 static int
strtoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)873 strtoutf8(
874 XlcConv conv,
875 XPointer *from,
876 int *from_left,
877 XPointer *to,
878 int *to_left,
879 XPointer *args,
880 int num_args)
881 {
882 unsigned char const *src;
883 unsigned char const *srcend;
884 unsigned char *dst;
885 unsigned char *dstend;
886
887 if (from == NULL || *from == NULL)
888 return 0;
889
890 src = (unsigned char const *) *from;
891 srcend = src + *from_left;
892 dst = (unsigned char *) *to;
893 dstend = dst + *to_left;
894
895 while (src < srcend) {
896 int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
897 if (count == RET_TOOSMALL)
898 break;
899 dst += count;
900 src++;
901 }
902
903 *from = (XPointer) src;
904 *from_left = srcend - src;
905 *to = (XPointer) dst;
906 *to_left = dstend - dst;
907
908 return 0;
909 }
910
911 static XlcConvMethodsRec methods_strtoutf8 = {
912 close_converter,
913 strtoutf8,
914 NULL
915 };
916
917 static XlcConv
open_strtoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)918 open_strtoutf8(
919 XLCd from_lcd,
920 const char *from_type,
921 XLCd to_lcd,
922 const char *to_type)
923 {
924 return create_conv(from_lcd, &methods_strtoutf8);
925 }
926
927 /* Support for the input methods. */
928
929 XPointer
_Utf8GetConvByName(const char * name)930 _Utf8GetConvByName(
931 const char *name)
932 {
933 XrmQuark xrm_name;
934 Utf8Conv convptr;
935 int i;
936
937 if (name == NULL)
938 return (XPointer) NULL;
939
940 lazy_init_all_charsets();
941 xrm_name = XrmStringToQuark(name);
942
943 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
944 if (convptr->xrm_name == xrm_name)
945 return (XPointer) convptr->wctocs;
946 return (XPointer) NULL;
947 }
948
949 /* from XlcNUcsChar to XlcNChar, needed for input methods */
950
951 static XlcConv
create_ucstocs_conv(XLCd lcd,XlcConvMethods methods)952 create_ucstocs_conv(
953 XLCd lcd,
954 XlcConvMethods methods)
955 {
956
957 if (XLC_PUBLIC_PART(lcd)->codeset
958 && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
959 XlcConv conv;
960 Utf8Conv *preferred;
961
962 lazy_init_all_charsets();
963
964 conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
965 if (conv == (XlcConv) NULL)
966 return (XlcConv) NULL;
967 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
968
969 preferred[0] = &all_charsets[0]; /* ISO10646 */
970 preferred[1] = (Utf8Conv) NULL;
971
972 conv->methods = methods;
973 conv->state = (XPointer) preferred;
974
975 return conv;
976 } else {
977 return create_tocs_conv(lcd, methods);
978 }
979 }
980
981 static int
charset_wctocs_exactly(Utf8Conv * preferred,Utf8Conv * charsetp,XlcSide * sidep,XlcConv conv,unsigned char * r,ucs4_t wc,int n)982 charset_wctocs_exactly(
983 Utf8Conv *preferred,
984 Utf8Conv *charsetp,
985 XlcSide *sidep,
986 XlcConv conv,
987 unsigned char *r,
988 ucs4_t wc,
989 int n)
990 {
991 int count;
992 Utf8Conv convptr;
993
994 for (; *preferred != (Utf8Conv) NULL; preferred++) {
995 convptr = *preferred;
996 count = convptr->wctocs(conv, r, wc, n);
997 if (count == RET_TOOSMALL)
998 return RET_TOOSMALL;
999 if (count != RET_ILSEQ) {
1000 *charsetp = convptr;
1001 *sidep = (*r < 0x80 ? XlcGL : XlcGR);
1002 return count;
1003 }
1004 }
1005 return RET_ILSEQ;
1006 }
1007
1008 static int
ucstocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1009 ucstocs1(
1010 XlcConv conv,
1011 XPointer *from,
1012 int *from_left,
1013 XPointer *to,
1014 int *to_left,
1015 XPointer *args,
1016 int num_args)
1017 {
1018 ucs4_t const *src;
1019 unsigned char *dst = (unsigned char *) *to;
1020 int unconv_num = 0;
1021 Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1022 Utf8Conv chosen_charset = NULL;
1023 XlcSide chosen_side = XlcNONE;
1024 XlcCharSet charset = NULL;
1025 int count;
1026
1027 if (from == NULL || *from == NULL)
1028 return 0;
1029
1030 src = (ucs4_t const *) *from;
1031
1032 count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1033 &chosen_side, conv, dst, *src, *to_left);
1034 if (count < 1) {
1035 unconv_num++;
1036 count = 0;
1037 } else {
1038 charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1039 }
1040 if (charset == NULL)
1041 return -1;
1042
1043 *from = (XPointer) ++src;
1044 (*from_left)--;
1045 *to = (XPointer) dst;
1046 *to_left -= count;
1047
1048 if (num_args >= 1)
1049 *((XlcCharSet *)args[0]) = charset;
1050
1051 return unconv_num;
1052 }
1053
1054 static XlcConvMethodsRec methods_ucstocs1 = {
1055 close_tocs_converter,
1056 ucstocs1,
1057 NULL
1058 };
1059
1060 static XlcConv
open_ucstocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1061 open_ucstocs1(
1062 XLCd from_lcd,
1063 const char *from_type,
1064 XLCd to_lcd,
1065 const char *to_type)
1066 {
1067 return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1068 }
1069
1070 /* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1071
1072 static int
ucstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1073 ucstoutf8(
1074 XlcConv conv,
1075 XPointer *from,
1076 int *from_left,
1077 XPointer *to,
1078 int *to_left,
1079 XPointer *args,
1080 int num_args)
1081 {
1082 const ucs4_t *src;
1083 const ucs4_t *srcend;
1084 unsigned char *dst;
1085 unsigned char *dstend;
1086 int unconv_num;
1087
1088 if (from == NULL || *from == NULL)
1089 return 0;
1090
1091 src = (const ucs4_t *) *from;
1092 srcend = src + *from_left;
1093 dst = (unsigned char *) *to;
1094 dstend = dst + *to_left;
1095 unconv_num = 0;
1096
1097 while (src < srcend) {
1098 int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1099 if (count == RET_TOOSMALL)
1100 break;
1101 if (count == RET_ILSEQ)
1102 unconv_num++;
1103 src++;
1104 dst += count;
1105 }
1106
1107 *from = (XPointer) src;
1108 *from_left = srcend - src;
1109 *to = (XPointer) dst;
1110 *to_left = dstend - dst;
1111
1112 return unconv_num;
1113 }
1114
1115 static XlcConvMethodsRec methods_ucstoutf8 = {
1116 close_converter,
1117 ucstoutf8,
1118 NULL
1119 };
1120
1121 static XlcConv
open_ucstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1122 open_ucstoutf8(
1123 XLCd from_lcd,
1124 const char *from_type,
1125 XLCd to_lcd,
1126 const char *to_type)
1127 {
1128 return create_conv(from_lcd, &methods_ucstoutf8);
1129 }
1130
1131 /* Registers UTF-8 converters for a non-UTF-8 locale. */
1132 void
_XlcAddUtf8Converters(XLCd lcd)1133 _XlcAddUtf8Converters(
1134 XLCd lcd)
1135 {
1136 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1137 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1138 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1139 _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1140 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1141 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1);
1142 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8);
1143 }
1144
1145 /***************************************************************************/
1146 /* Part II: UTF-8 locale loader conversion files
1147 *
1148 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1149 */
1150
1151 /* from XlcNMultiByte to XlcNWideChar */
1152
1153 static int
utf8towcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1154 utf8towcs(
1155 XlcConv conv,
1156 XPointer *from,
1157 int *from_left,
1158 XPointer *to,
1159 int *to_left,
1160 XPointer *args,
1161 int num_args)
1162 {
1163 unsigned char const *src;
1164 unsigned char const *srcend;
1165 wchar_t *dst;
1166 wchar_t *dstend;
1167 int unconv_num;
1168
1169 if (from == NULL || *from == NULL)
1170 return 0;
1171
1172 src = (unsigned char const *) *from;
1173 srcend = src + *from_left;
1174 dst = (wchar_t *) *to;
1175 dstend = dst + *to_left;
1176 unconv_num = 0;
1177
1178 while (src < srcend && dst < dstend) {
1179 ucs4_t wc;
1180 int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1181 if (consumed == RET_TOOFEW(0))
1182 break;
1183 if (consumed == RET_ILSEQ) {
1184 src++;
1185 *dst = BAD_WCHAR;
1186 unconv_num++;
1187 } else {
1188 src += consumed;
1189 *dst = wc;
1190 }
1191 dst++;
1192 }
1193
1194 *from = (XPointer) src;
1195 *from_left = srcend - src;
1196 *to = (XPointer) dst;
1197 *to_left = dstend - dst;
1198
1199 return unconv_num;
1200 }
1201
1202 static XlcConvMethodsRec methods_utf8towcs = {
1203 close_converter,
1204 utf8towcs,
1205 NULL
1206 };
1207
1208 static XlcConv
open_utf8towcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1209 open_utf8towcs(
1210 XLCd from_lcd,
1211 const char *from_type,
1212 XLCd to_lcd,
1213 const char *to_type)
1214 {
1215 return create_conv(from_lcd, &methods_utf8towcs);
1216 }
1217
1218 /* from XlcNWideChar to XlcNMultiByte */
1219
1220 static int
wcstoutf8(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1221 wcstoutf8(
1222 XlcConv conv,
1223 XPointer *from,
1224 int *from_left,
1225 XPointer *to,
1226 int *to_left,
1227 XPointer *args,
1228 int num_args)
1229 {
1230 wchar_t const *src;
1231 wchar_t const *srcend;
1232 unsigned char *dst;
1233 unsigned char *dstend;
1234 int unconv_num;
1235
1236 if (from == NULL || *from == NULL)
1237 return 0;
1238
1239 src = (wchar_t const *) *from;
1240 srcend = src + *from_left;
1241 dst = (unsigned char *) *to;
1242 dstend = dst + *to_left;
1243 unconv_num = 0;
1244
1245 while (src < srcend) {
1246 int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1247 if (count == RET_TOOSMALL)
1248 break;
1249 if (count == RET_ILSEQ) {
1250 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1251 if (count == RET_TOOSMALL)
1252 break;
1253 unconv_num++;
1254 }
1255 dst += count;
1256 src++;
1257 }
1258
1259 *from = (XPointer) src;
1260 *from_left = srcend - src;
1261 *to = (XPointer) dst;
1262 *to_left = dstend - dst;
1263
1264 return unconv_num;
1265 }
1266
1267 static XlcConvMethodsRec methods_wcstoutf8 = {
1268 close_converter,
1269 wcstoutf8,
1270 NULL
1271 };
1272
1273 static XlcConv
open_wcstoutf8(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1274 open_wcstoutf8(
1275 XLCd from_lcd,
1276 const char *from_type,
1277 XLCd to_lcd,
1278 const char *to_type)
1279 {
1280 return create_conv(from_lcd, &methods_wcstoutf8);
1281 }
1282
1283 /* from XlcNString to XlcNWideChar */
1284
1285 static int
our_strtowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1286 our_strtowcs(
1287 XlcConv conv,
1288 XPointer *from,
1289 int *from_left,
1290 XPointer *to,
1291 int *to_left,
1292 XPointer *args,
1293 int num_args)
1294 {
1295 unsigned char const *src;
1296 unsigned char const *srcend;
1297 wchar_t *dst;
1298 wchar_t *dstend;
1299
1300 if (from == NULL || *from == NULL)
1301 return 0;
1302
1303 src = (unsigned char const *) *from;
1304 srcend = src + *from_left;
1305 dst = (wchar_t *) *to;
1306 dstend = dst + *to_left;
1307
1308 while (src < srcend && dst < dstend)
1309 *dst++ = (wchar_t) *src++;
1310
1311 *from = (XPointer) src;
1312 *from_left = srcend - src;
1313 *to = (XPointer) dst;
1314 *to_left = dstend - dst;
1315
1316 return 0;
1317 }
1318
1319 static XlcConvMethodsRec methods_strtowcs = {
1320 close_converter,
1321 our_strtowcs,
1322 NULL
1323 };
1324
1325 static XlcConv
open_strtowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1326 open_strtowcs(
1327 XLCd from_lcd,
1328 const char *from_type,
1329 XLCd to_lcd,
1330 const char *to_type)
1331 {
1332 return create_conv(from_lcd, &methods_strtowcs);
1333 }
1334
1335 /* from XlcNWideChar to XlcNString */
1336
1337 static int
our_wcstostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1338 our_wcstostr(
1339 XlcConv conv,
1340 XPointer *from,
1341 int *from_left,
1342 XPointer *to,
1343 int *to_left,
1344 XPointer *args,
1345 int num_args)
1346 {
1347 wchar_t const *src;
1348 wchar_t const *srcend;
1349 unsigned char *dst;
1350 unsigned char *dstend;
1351 int unconv_num;
1352
1353 if (from == NULL || *from == NULL)
1354 return 0;
1355
1356 src = (wchar_t const *) *from;
1357 srcend = src + *from_left;
1358 dst = (unsigned char *) *to;
1359 dstend = dst + *to_left;
1360 unconv_num = 0;
1361
1362 while (src < srcend && dst < dstend) {
1363 unsigned int wc = *src++;
1364 if (wc < 0x80)
1365 *dst = wc;
1366 else {
1367 *dst = BAD_CHAR;
1368 unconv_num++;
1369 }
1370 dst++;
1371 }
1372
1373 *from = (XPointer) src;
1374 *from_left = srcend - src;
1375 *to = (XPointer) dst;
1376 *to_left = dstend - dst;
1377
1378 return unconv_num;
1379 }
1380
1381 static XlcConvMethodsRec methods_wcstostr = {
1382 close_converter,
1383 our_wcstostr,
1384 NULL
1385 };
1386
1387 static XlcConv
open_wcstostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1388 open_wcstostr(
1389 XLCd from_lcd,
1390 const char *from_type,
1391 XLCd to_lcd,
1392 const char *to_type)
1393 {
1394 return create_conv(from_lcd, &methods_wcstostr);
1395 }
1396
1397 /* from XlcNCharSet to XlcNWideChar */
1398
1399 static int
cstowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1400 cstowcs(
1401 XlcConv conv,
1402 XPointer *from,
1403 int *from_left,
1404 XPointer *to,
1405 int *to_left,
1406 XPointer *args,
1407 int num_args)
1408 {
1409 XlcCharSet charset;
1410 const char *name;
1411 Utf8Conv convptr;
1412 int i;
1413 unsigned char const *src;
1414 unsigned char const *srcend;
1415 wchar_t *dst;
1416 wchar_t *dstend;
1417 int unconv_num;
1418
1419 if (from == NULL || *from == NULL)
1420 return 0;
1421
1422 if (num_args < 1)
1423 return -1;
1424
1425 charset = (XlcCharSet) args[0];
1426 name = charset->encoding_name;
1427 /* not charset->name because the latter has a ":GL"/":GR" suffix */
1428
1429 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1430 if (!strcmp(convptr->name, name))
1431 break;
1432 if (i == 0)
1433 return -1;
1434
1435 src = (unsigned char const *) *from;
1436 srcend = src + *from_left;
1437 dst = (wchar_t *) *to;
1438 dstend = dst + *to_left;
1439 unconv_num = 0;
1440
1441 while (src < srcend && dst < dstend) {
1442 unsigned int wc;
1443 int consumed;
1444
1445 consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1446 if (consumed == RET_ILSEQ)
1447 return -1;
1448 if (consumed == RET_TOOFEW(0))
1449 break;
1450
1451 *dst++ = wc;
1452 src += consumed;
1453 }
1454
1455 *from = (XPointer) src;
1456 *from_left = srcend - src;
1457 *to = (XPointer) dst;
1458 *to_left = dstend - dst;
1459
1460 return unconv_num;
1461 }
1462
1463 static XlcConvMethodsRec methods_cstowcs = {
1464 close_converter,
1465 cstowcs,
1466 NULL
1467 };
1468
1469 static XlcConv
open_cstowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1470 open_cstowcs(
1471 XLCd from_lcd,
1472 const char *from_type,
1473 XLCd to_lcd,
1474 const char *to_type)
1475 {
1476 lazy_init_all_charsets();
1477 return create_conv(from_lcd, &methods_cstowcs);
1478 }
1479
1480 /* from XlcNWideChar to XlcNCharSet */
1481
1482 static int
wcstocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1483 wcstocs(
1484 XlcConv conv,
1485 XPointer *from,
1486 int *from_left,
1487 XPointer *to,
1488 int *to_left,
1489 XPointer *args,
1490 int num_args)
1491 {
1492 Utf8Conv *preferred_charsets;
1493 XlcCharSet last_charset = NULL;
1494 wchar_t const *src;
1495 wchar_t const *srcend;
1496 unsigned char *dst;
1497 unsigned char *dstend;
1498 int unconv_num;
1499
1500 if (from == NULL || *from == NULL)
1501 return 0;
1502
1503 preferred_charsets = (Utf8Conv *) conv->state;
1504 src = (wchar_t const *) *from;
1505 srcend = src + *from_left;
1506 dst = (unsigned char *) *to;
1507 dstend = dst + *to_left;
1508 unconv_num = 0;
1509
1510 while (src < srcend && dst < dstend) {
1511 Utf8Conv chosen_charset = NULL;
1512 XlcSide chosen_side = XlcNONE;
1513 wchar_t wc = *src;
1514 int count;
1515
1516 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1517 if (count == RET_TOOSMALL)
1518 break;
1519 if (count == RET_ILSEQ) {
1520 src++;
1521 unconv_num++;
1522 continue;
1523 }
1524
1525 if (last_charset == NULL) {
1526 last_charset =
1527 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1528 if (last_charset == NULL) {
1529 src++;
1530 unconv_num++;
1531 continue;
1532 }
1533 } else {
1534 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1535 && (last_charset->side == XlcGLGR
1536 || last_charset->side == chosen_side)))
1537 break;
1538 }
1539 src++;
1540 dst += count;
1541 }
1542
1543 if (last_charset == NULL)
1544 return -1;
1545
1546 *from = (XPointer) src;
1547 *from_left = srcend - src;
1548 *to = (XPointer) dst;
1549 *to_left = dstend - dst;
1550
1551 if (num_args >= 1)
1552 *((XlcCharSet *)args[0]) = last_charset;
1553
1554 return unconv_num;
1555 }
1556
1557 static XlcConvMethodsRec methods_wcstocs = {
1558 close_tocs_converter,
1559 wcstocs,
1560 NULL
1561 };
1562
1563 static XlcConv
open_wcstocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1564 open_wcstocs(
1565 XLCd from_lcd,
1566 const char *from_type,
1567 XLCd to_lcd,
1568 const char *to_type)
1569 {
1570 return create_tocs_conv(from_lcd, &methods_wcstocs);
1571 }
1572
1573 /* from XlcNWideChar to XlcNChar */
1574
1575 static int
wcstocs1(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1576 wcstocs1(
1577 XlcConv conv,
1578 XPointer *from,
1579 int *from_left,
1580 XPointer *to,
1581 int *to_left,
1582 XPointer *args,
1583 int num_args)
1584 {
1585 Utf8Conv *preferred_charsets;
1586 XlcCharSet last_charset = NULL;
1587 wchar_t const *src;
1588 wchar_t const *srcend;
1589 unsigned char *dst;
1590 unsigned char *dstend;
1591 int unconv_num;
1592
1593 if (from == NULL || *from == NULL)
1594 return 0;
1595
1596 preferred_charsets = (Utf8Conv *) conv->state;
1597 src = (wchar_t const *) *from;
1598 srcend = src + *from_left;
1599 dst = (unsigned char *) *to;
1600 dstend = dst + *to_left;
1601 unconv_num = 0;
1602
1603 while (src < srcend && dst < dstend) {
1604 Utf8Conv chosen_charset = NULL;
1605 XlcSide chosen_side = XlcNONE;
1606 wchar_t wc = *src;
1607 int count;
1608
1609 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1610 if (count == RET_TOOSMALL)
1611 break;
1612 if (count == RET_ILSEQ) {
1613 src++;
1614 unconv_num++;
1615 continue;
1616 }
1617
1618 if (last_charset == NULL) {
1619 last_charset =
1620 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1621 if (last_charset == NULL) {
1622 src++;
1623 unconv_num++;
1624 continue;
1625 }
1626 } else {
1627 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1628 && (last_charset->side == XlcGLGR
1629 || last_charset->side == chosen_side)))
1630 break;
1631 }
1632 src++;
1633 dst += count;
1634 break;
1635 }
1636
1637 if (last_charset == NULL)
1638 return -1;
1639
1640 *from = (XPointer) src;
1641 *from_left = srcend - src;
1642 *to = (XPointer) dst;
1643 *to_left = dstend - dst;
1644
1645 if (num_args >= 1)
1646 *((XlcCharSet *)args[0]) = last_charset;
1647
1648 return unconv_num;
1649 }
1650
1651 static XlcConvMethodsRec methods_wcstocs1 = {
1652 close_tocs_converter,
1653 wcstocs1,
1654 NULL
1655 };
1656
1657 static XlcConv
open_wcstocs1(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1658 open_wcstocs1(
1659 XLCd from_lcd,
1660 const char *from_type,
1661 XLCd to_lcd,
1662 const char *to_type)
1663 {
1664 return create_tocs_conv(from_lcd, &methods_wcstocs1);
1665 }
1666
1667 /* trivial, no conversion */
1668
1669 static int
identity(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1670 identity(
1671 XlcConv conv,
1672 XPointer *from,
1673 int *from_left,
1674 XPointer *to,
1675 int *to_left,
1676 XPointer *args,
1677 int num_args)
1678 {
1679 unsigned char const *src;
1680 unsigned char const *srcend;
1681 unsigned char *dst;
1682 unsigned char *dstend;
1683
1684 if (from == NULL || *from == NULL)
1685 return 0;
1686
1687 src = (unsigned char const *) *from;
1688 srcend = src + *from_left;
1689 dst = (unsigned char *) *to;
1690 dstend = dst + *to_left;
1691
1692 while (src < srcend && dst < dstend)
1693 *dst++ = *src++;
1694
1695 *from = (XPointer) src;
1696 *from_left = srcend - src;
1697 *to = (XPointer) dst;
1698 *to_left = dstend - dst;
1699
1700 return 0;
1701 }
1702
1703 static XlcConvMethodsRec methods_identity = {
1704 close_converter,
1705 identity,
1706 NULL
1707 };
1708
1709 static XlcConv
open_identity(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1710 open_identity(
1711 XLCd from_lcd,
1712 const char *from_type,
1713 XLCd to_lcd,
1714 const char *to_type)
1715 {
1716 return create_conv(from_lcd, &methods_identity);
1717 }
1718
1719 /* from MultiByte/WideChar to FontCharSet. */
1720 /* They really use converters to CharSet
1721 * but with different create_conv procedure. */
1722
1723 static XlcConv
create_tofontcs_conv(XLCd lcd,XlcConvMethods methods)1724 create_tofontcs_conv(
1725 XLCd lcd,
1726 XlcConvMethods methods)
1727 {
1728 XlcConv conv;
1729 int i, num, k, count;
1730 char **value, buf[32];
1731 Utf8Conv *preferred;
1732
1733 lazy_init_all_charsets();
1734
1735 for (i = 0, num = 0;; i++) {
1736 snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1737 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1738 if (count < 1) {
1739 snprintf(buf, sizeof(buf), "fs%d.charset", i);
1740 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1741 if (count < 1)
1742 break;
1743 }
1744 num += count;
1745 }
1746
1747 conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1748 if (conv == (XlcConv) NULL)
1749 return (XlcConv) NULL;
1750 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1751
1752 /* Loop through all fontsets mentioned in the locale. */
1753 for (i = 0, num = 0;; i++) {
1754 snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1755 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1756 if (count < 1) {
1757 snprintf(buf, sizeof(buf), "fs%d.charset", i);
1758 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1759 if (count < 1)
1760 break;
1761 }
1762 while (count-- > 0) {
1763 XlcCharSet charset = _XlcGetCharSet(*value++);
1764 const char *name;
1765
1766 if (charset == (XlcCharSet) NULL)
1767 continue;
1768
1769 name = charset->encoding_name;
1770 /* If it wasn't already encountered... */
1771 for (k = num - 1; k >= 0; k--)
1772 if (!strcmp(preferred[k]->name, name))
1773 break;
1774 if (k < 0) {
1775 /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1776 if (!strcmp("ISO10646-1", name)) {
1777 preferred[num++] = &all_charsets[ucs2_conv_index];
1778 continue;
1779 }
1780 /* Look it up in all_charsets[]. */
1781 for (k = 0; k < all_charsets_count-1; k++)
1782 if (!strcmp(all_charsets[k].name, name)) {
1783 /* Add it to the preferred set. */
1784 preferred[num++] = &all_charsets[k];
1785 break;
1786 }
1787 }
1788 }
1789 }
1790 preferred[num] = (Utf8Conv) NULL;
1791
1792 conv->methods = methods;
1793 conv->state = (XPointer) preferred;
1794
1795 return conv;
1796 }
1797
1798 static XlcConv
open_wcstofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1799 open_wcstofcs(
1800 XLCd from_lcd,
1801 const char *from_type,
1802 XLCd to_lcd,
1803 const char *to_type)
1804 {
1805 return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1806 }
1807
1808 static XlcConv
open_utf8tofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1809 open_utf8tofcs(
1810 XLCd from_lcd,
1811 const char *from_type,
1812 XLCd to_lcd,
1813 const char *to_type)
1814 {
1815 return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1816 }
1817
1818 /* ========================== iconv Stuff ================================ */
1819
1820 /* from XlcNCharSet to XlcNMultiByte */
1821
1822 static int
iconv_cstombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1823 iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1824 XPointer *to, int *to_left, XPointer *args, int num_args)
1825 {
1826 XlcCharSet charset;
1827 char const *name;
1828 Utf8Conv convptr;
1829 int i;
1830 unsigned char const *src;
1831 unsigned char const *srcend;
1832 unsigned char *dst;
1833 unsigned char *dstend;
1834 int unconv_num;
1835
1836 if (from == NULL || *from == NULL)
1837 return 0;
1838
1839 if (num_args < 1)
1840 return -1;
1841
1842 charset = (XlcCharSet) args[0];
1843 name = charset->encoding_name;
1844 /* not charset->name because the latter has a ":GL"/":GR" suffix */
1845
1846 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1847 if (!strcmp(convptr->name, name))
1848 break;
1849 if (i == 0)
1850 return -1;
1851
1852 src = (unsigned char const *) *from;
1853 srcend = src + *from_left;
1854 dst = (unsigned char *) *to;
1855 dstend = dst + *to_left;
1856 unconv_num = 0;
1857
1858 while (src < srcend) {
1859 ucs4_t wc;
1860 int consumed;
1861 int count;
1862
1863 consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1864 if (consumed == RET_ILSEQ)
1865 return -1;
1866 if (consumed == RET_TOOFEW(0))
1867 break;
1868
1869 /* Use stdc iconv to convert widechar -> multibyte */
1870
1871 count = wctomb((char *)dst, wc);
1872 if (count == 0)
1873 break;
1874 if (count == -1) {
1875 count = wctomb((char *)dst, BAD_WCHAR);
1876 if (count == 0)
1877 break;
1878 unconv_num++;
1879 }
1880 src += consumed;
1881 dst += count;
1882 }
1883
1884 *from = (XPointer) src;
1885 *from_left = srcend - src;
1886 *to = (XPointer) dst;
1887 *to_left = dstend - dst;
1888
1889 return unconv_num;
1890
1891 }
1892
1893 static XlcConvMethodsRec iconv_cstombs_methods = {
1894 close_converter,
1895 iconv_cstombs,
1896 NULL
1897 };
1898
1899 static XlcConv
open_iconv_cstombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1900 open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1901 {
1902 lazy_init_all_charsets();
1903 return create_conv(from_lcd, &iconv_cstombs_methods);
1904 }
1905
1906 static int
iconv_mbstocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)1907 iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1908 XPointer *to, int *to_left, XPointer *args, int num_args)
1909 {
1910 Utf8Conv *preferred_charsets;
1911 XlcCharSet last_charset = NULL;
1912 unsigned char const *src;
1913 unsigned char const *srcend;
1914 unsigned char *dst;
1915 unsigned char *dstend;
1916 int unconv_num;
1917
1918 if (from == NULL || *from == NULL)
1919 return 0;
1920
1921 preferred_charsets = (Utf8Conv *) conv->state;
1922 src = (unsigned char const *) *from;
1923 srcend = src + *from_left;
1924 dst = (unsigned char *) *to;
1925 dstend = dst + *to_left;
1926 unconv_num = 0;
1927
1928 while (src < srcend && dst < dstend) {
1929 Utf8Conv chosen_charset = NULL;
1930 XlcSide chosen_side = XlcNONE;
1931 wchar_t wc;
1932 int consumed;
1933 int count;
1934
1935 /* Uses stdc iconv to convert multibyte -> widechar */
1936
1937 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
1938 if (consumed == 0)
1939 break;
1940 if (consumed == -1) {
1941 src++;
1942 unconv_num++;
1943 continue;
1944 }
1945
1946 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1947
1948 if (count == RET_TOOSMALL)
1949 break;
1950 if (count == RET_ILSEQ) {
1951 src += consumed;
1952 unconv_num++;
1953 continue;
1954 }
1955
1956 if (last_charset == NULL) {
1957 last_charset =
1958 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1959 if (last_charset == NULL) {
1960 src += consumed;
1961 unconv_num++;
1962 continue;
1963 }
1964 } else {
1965 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1966 && (last_charset->side == XlcGLGR
1967 || last_charset->side == chosen_side)))
1968 break;
1969 }
1970 src += consumed;
1971 dst += count;
1972 }
1973
1974 if (last_charset == NULL)
1975 return -1;
1976
1977 *from = (XPointer) src;
1978 *from_left = srcend - src;
1979 *to = (XPointer) dst;
1980 *to_left = dstend - dst;
1981
1982 if (num_args >= 1)
1983 *((XlcCharSet *)args[0]) = last_charset;
1984
1985 return unconv_num;
1986 }
1987
1988 static XlcConvMethodsRec iconv_mbstocs_methods = {
1989 close_tocs_converter,
1990 iconv_mbstocs,
1991 NULL
1992 };
1993
1994 static XlcConv
open_iconv_mbstocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)1995 open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1996 {
1997 return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1998 }
1999
2000 /* from XlcNMultiByte to XlcNChar */
2001
2002 static int
iconv_mbtocs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2003 iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
2004 XPointer *to, int *to_left, XPointer *args, int num_args)
2005 {
2006 Utf8Conv *preferred_charsets;
2007 XlcCharSet last_charset = NULL;
2008 unsigned char const *src;
2009 unsigned char const *srcend;
2010 unsigned char *dst;
2011 unsigned char *dstend;
2012 int unconv_num;
2013
2014 if (from == NULL || *from == NULL)
2015 return 0;
2016
2017 preferred_charsets = (Utf8Conv *) conv->state;
2018 src = (unsigned char const *) *from;
2019 srcend = src + *from_left;
2020 dst = (unsigned char *) *to;
2021 dstend = dst + *to_left;
2022 unconv_num = 0;
2023
2024 while (src < srcend && dst < dstend) {
2025 Utf8Conv chosen_charset = NULL;
2026 XlcSide chosen_side = XlcNONE;
2027 wchar_t wc;
2028 int consumed;
2029 int count;
2030
2031 /* Uses stdc iconv to convert multibyte -> widechar */
2032
2033 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2034 if (consumed == 0)
2035 break;
2036 if (consumed == -1) {
2037 src++;
2038 unconv_num++;
2039 continue;
2040 }
2041
2042 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2043 if (count == RET_TOOSMALL)
2044 break;
2045 if (count == RET_ILSEQ) {
2046 src += consumed;
2047 unconv_num++;
2048 continue;
2049 }
2050
2051 if (last_charset == NULL) {
2052 last_charset =
2053 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2054 if (last_charset == NULL) {
2055 src += consumed;
2056 unconv_num++;
2057 continue;
2058 }
2059 } else {
2060 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2061 && (last_charset->side == XlcGLGR
2062 || last_charset->side == chosen_side)))
2063 break;
2064 }
2065 src += consumed;
2066 dst += count;
2067 }
2068
2069 if (last_charset == NULL)
2070 return -1;
2071
2072 *from = (XPointer) src;
2073 *from_left = srcend - src;
2074 *to = (XPointer) dst;
2075 *to_left = dstend - dst;
2076
2077 if (num_args >= 1)
2078 *((XlcCharSet *)args[0]) = last_charset;
2079
2080 return unconv_num;
2081 }
2082
2083 static XlcConvMethodsRec iconv_mbtocs_methods = {
2084 close_tocs_converter,
2085 iconv_mbtocs,
2086 NULL
2087 };
2088
2089 static XlcConv
open_iconv_mbtocs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2090 open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2091 {
2092 return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2093 }
2094
2095 /* from XlcNMultiByte to XlcNString */
2096
2097 static int
iconv_mbstostr(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2098 iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2099 XPointer *to, int *to_left, XPointer *args, int num_args)
2100 {
2101 unsigned char const *src;
2102 unsigned char const *srcend;
2103 unsigned char *dst;
2104 unsigned char *dstend;
2105 int unconv_num;
2106
2107 if (from == NULL || *from == NULL)
2108 return 0;
2109
2110 src = (unsigned char const *) *from;
2111 srcend = src + *from_left;
2112 dst = (unsigned char *) *to;
2113 dstend = dst + *to_left;
2114 unconv_num = 0;
2115
2116 while (src < srcend) {
2117 unsigned char c;
2118 wchar_t wc;
2119 int consumed;
2120
2121 /* Uses stdc iconv to convert multibyte -> widechar */
2122
2123 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2124 if (consumed == 0)
2125 break;
2126 if (dst == dstend)
2127 break;
2128 if (consumed == -1) {
2129 consumed = 1;
2130 c = BAD_CHAR;
2131 unconv_num++;
2132 } else {
2133 if ((wc & ~(wchar_t)0xff) != 0) {
2134 c = BAD_CHAR;
2135 unconv_num++;
2136 } else
2137 c = (unsigned char) wc;
2138 }
2139 *dst++ = c;
2140 src += consumed;
2141 }
2142
2143 *from = (XPointer) src;
2144 *from_left = srcend - src;
2145 *to = (XPointer) dst;
2146 *to_left = dstend - dst;
2147
2148 return unconv_num;
2149 }
2150
2151 static XlcConvMethodsRec iconv_mbstostr_methods = {
2152 close_converter,
2153 iconv_mbstostr,
2154 NULL
2155 };
2156
2157 static XlcConv
open_iconv_mbstostr(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2158 open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2159 {
2160 return create_conv(from_lcd, &iconv_mbstostr_methods);
2161 }
2162
2163 /* from XlcNString to XlcNMultiByte */
2164 static int
iconv_strtombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2165 iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2166 XPointer *to, int *to_left, XPointer *args, int num_args)
2167 {
2168 unsigned char const *src;
2169 unsigned char const *srcend;
2170 unsigned char *dst;
2171 unsigned char *dstend;
2172
2173 if (from == NULL || *from == NULL)
2174 return 0;
2175
2176 src = (unsigned char const *) *from;
2177 srcend = src + *from_left;
2178 dst = (unsigned char *) *to;
2179 dstend = dst + *to_left;
2180
2181 while (src < srcend) {
2182 int count = wctomb((char *)dst, *src);
2183 if (count < 0)
2184 break;
2185 dst += count;
2186 src++;
2187 }
2188
2189 *from = (XPointer) src;
2190 *from_left = srcend - src;
2191 *to = (XPointer) dst;
2192 *to_left = dstend - dst;
2193
2194 return 0;
2195 }
2196
2197 static XlcConvMethodsRec iconv_strtombs_methods= {
2198 close_converter,
2199 iconv_strtombs,
2200 NULL
2201 };
2202
2203 static XlcConv
open_iconv_strtombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2204 open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2205 {
2206 return create_conv(from_lcd, &iconv_strtombs_methods);
2207 }
2208
2209 /***************************************************************************/
2210 /* Part II: An iconv locale loader.
2211 *
2212 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2213 */
2214
2215 /* from XlcNMultiByte to XlcNWideChar */
2216 static int
iconv_mbstowcs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2217 iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2218 XPointer *to, int *to_left, XPointer *args, int num_args)
2219 {
2220 char *src = *((char **) from);
2221 wchar_t *dst = *((wchar_t **) to);
2222 int src_left = *from_left;
2223 int dst_left = *to_left;
2224 int length, unconv_num = 0;
2225
2226 while (src_left > 0 && dst_left > 0) {
2227 length = mbtowc(dst, src, (size_t) src_left);
2228
2229 if (length > 0) {
2230 src += length;
2231 src_left -= length;
2232 if (dst)
2233 dst++;
2234 dst_left--;
2235 } else if (length < 0) {
2236 src++;
2237 src_left--;
2238 unconv_num++;
2239 } else {
2240 /* null ? */
2241 src++;
2242 src_left--;
2243 if (dst)
2244 *dst++ = L'\0';
2245 dst_left--;
2246 }
2247 }
2248
2249 *from = (XPointer) src;
2250 if (dst)
2251 *to = (XPointer) dst;
2252 *from_left = src_left;
2253 *to_left = dst_left;
2254
2255 return unconv_num;
2256 }
2257
2258 static XlcConvMethodsRec iconv_mbstowcs_methods = {
2259 close_converter,
2260 iconv_mbstowcs,
2261 NULL
2262 } ;
2263
2264 static XlcConv
open_iconv_mbstowcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2265 open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2266 {
2267 return create_conv(from_lcd, &iconv_mbstowcs_methods);
2268 }
2269
2270 static int
iconv_wcstombs(XlcConv conv,XPointer * from,int * from_left,XPointer * to,int * to_left,XPointer * args,int num_args)2271 iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2272 XPointer *to, int *to_left, XPointer *args, int num_args)
2273 {
2274 wchar_t *src = *((wchar_t **) from);
2275 char *dst = *((char **) to);
2276 int src_left = *from_left;
2277 int dst_left = *to_left;
2278 int length, unconv_num = 0;
2279
2280 while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2281 length = wctomb(dst, *src); /* XXX */
2282
2283 if (length > 0) {
2284 src++;
2285 src_left--;
2286 if (dst)
2287 dst += length;
2288 dst_left -= length;
2289 } else if (length < 0) {
2290 src++;
2291 src_left--;
2292 unconv_num++;
2293 }
2294 }
2295
2296 *from = (XPointer) src;
2297 if (dst)
2298 *to = (XPointer) dst;
2299 *from_left = src_left;
2300 *to_left = dst_left;
2301
2302 return unconv_num;
2303 }
2304
2305 static XlcConvMethodsRec iconv_wcstombs_methods = {
2306 close_converter,
2307 iconv_wcstombs,
2308 NULL
2309 } ;
2310
2311 static XlcConv
open_iconv_wcstombs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2312 open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2313 {
2314 return create_conv(from_lcd, &iconv_wcstombs_methods);
2315 }
2316
2317 static XlcConv
open_iconv_mbstofcs(XLCd from_lcd,const char * from_type,XLCd to_lcd,const char * to_type)2318 open_iconv_mbstofcs(
2319 XLCd from_lcd,
2320 const char *from_type,
2321 XLCd to_lcd,
2322 const char *to_type)
2323 {
2324 return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2325 }
2326
2327 /* Registers UTF-8 converters for a UTF-8 locale. */
2328
2329 void
_XlcAddUtf8LocaleConverters(XLCd lcd)2330 _XlcAddUtf8LocaleConverters(
2331 XLCd lcd)
2332 {
2333 /* Register elementary converters. */
2334
2335 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2336
2337 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2338 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2339
2340 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2341
2342 /* Register converters for XlcNCharSet. This implicitly provides
2343 * converters from and to XlcNCompoundText. */
2344
2345 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2346 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2347 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2348
2349 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2350 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2351 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2352
2353 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2354 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2355 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2356 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2357
2358 /* Register converters for XlcNFontCharSet */
2359 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2360 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2361 }
2362
2363 void
_XlcAddGB18030LocaleConverters(XLCd lcd)2364 _XlcAddGB18030LocaleConverters(
2365 XLCd lcd)
2366 {
2367
2368 /* Register elementary converters. */
2369 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2370 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2371
2372 /* Register converters for XlcNCharSet. This implicitly provides
2373 * converters from and to XlcNCompoundText. */
2374
2375 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2376 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2377 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2378 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2379 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2380
2381 /* Register converters for XlcNFontCharSet */
2382 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2383
2384 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2385 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2386 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2387 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2388 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2389
2390 /* Register converters for XlcNFontCharSet */
2391 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2392 }
2393