1 /*
2
3 Copyright (c) 2003-2013 uim Project https://github.com/uim/uim
4
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16 3. Neither the name of authors nor the names of its contributors
17 may be used to endorse or promote products derived from this software
18 without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 SUCH DAMAGE.
31 */
32
33 // Locale dependent routines
34
35 #ifdef HAVE_CONFIG_H
36 # include <config.h>
37 #endif
38
39 #include <cerrno>
40 #include <clocale>
41 #include <cstdio>
42 #include <cstring>
43 #include <iconv.h>
44 #include <X11/Xlib.h>
45 #include <X11/Xutil.h>
46 #include "ximserver.h"
47 #include "util.h"
48 #include "uim/uim-util.h"
49 #ifndef __GNUC__
50 # ifdef HAVE_ALLOCA_H
51 # include <alloca.h>
52 # endif
53 #endif
54
55 // Return code if invalid. (utf8_mbtowc, utf8_wctomb)
56 #define RET_ILSEQ 0
57 // Return code if only a shift sequence of n bytes was read. (utf8_mbtowc)
58 #define RET_TOOFEW(n) (-1-(n))
59
60 // Cache of all available locales in working system
61 static char *all_locale_names;
62
63 // This table is composed from language of m17n-libs,
64 // locale.dir in /usr/X11R6/lib/locale, and im's language of uim.
65 static struct {
66 const char *lang;
67 const char *localename;
68 const char *supplemental_encoding;
69 } locale_map[] = {
70 {"af", "af_ZA", "ISO8859-1:UTF-8"},
71 {"am", "am_ET", "UTF-8"},
72 {"ar", "ar_AA:ar_BH:ar_DZ:ar_EG:ar_IQ:ar_JO:ar_KW:ar_LB:ar_LY:ar_MA:ar_OM:ar_QA:ar_SA:ar_SD:ar_SY:ar_TN:ar_YE", "ISO8859-6:UTF-8"},
73 // {"as", "as", NULL},
74 {"az", "az_AZ", "ISO8859-9E:UTF-8"},
75 {"be", "be_BY", "CP1251:UTF-8"},
76 {"bg", "bg_BG", "ISO8859-5:CP1251:KOI8-R:UTF-8"},
77 {"bn", "bn_BD:bn_IN", "UTF-8"},
78 // {"bo", "bo", NULL},
79 {"br", "br_FR:br_FR@euro", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
80 {"ca", "ca_ES:ca_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
81 {"cs", "cs_CZ", "ISO8859-2:UTF-8"},
82 {"cy", "cy_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
83 {"cz", "cz_CZ", "ISO8859-2"},
84 {"da", "da_DK", "ISO8859-1:ISO8859-15:UTF-8"},
85 {"de", "de_DE:de_DE@euro:de_AT:de_AT@euro:de_BE:de_BE@euro:de_CH:de_LI:de_LU:de_LU@euro", "ISO8859-1:ISO8859-15:UTF-8"},
86 {"el", "el_GR:el_GR@euro", "ISO8859-7:ISO8859-15:UTF-8"},
87 {"en", "en_US:en_AU:en_BE:en_BZ:en_BW:en_CA:en_GB:en_HK:en_IE:en_IE@euro:en_IN:en_JM:en_NZ:en_PH:en_SG:en_TT:en_UK:en_ZA", "ISO8859-1:ISO8859-15:UTF-8"},
88 {"eo", "eo_XX:eo_EO", "ISO8859-3"},
89 {"es", "es_ES:es_ES@euro:es_AR:es_BO:es_CL:es_CO:es_CR:es_DO:es_EC:es_GT:es_HN:es_MX:es_NI:es_PA:es_PE:es_PR:es_PY:es_SV:es_US:es_UY:es_VE", "ISO8859-1:ISO8859-15:UTF-8"},
90 {"et", "et_EE", "ISO8859-15:ISO8859-1:ISO8859-4:UTF-8"},
91 {"eu", "eu_ES:eu_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
92 {"fa", "fa_IR", "UTF-8:ISIRI-3342"},
93 {"fi", "fi_FI:fi_FI@euro", "ISO8859-15:ISO8859-1:UTF-8"},
94 {"fo", "fo_FO", "ISO8859-1:ISO8859-15:UTF-8"},
95 {"fr", "fr_FR:fr_FR@euro:fr_BE:fr_BE@euro:fr_CA:fr_CH:fr_LU:fr_LU@euro", "ISO8859-1:ISO8859-15:UTF-8"},
96 {"ga", "ga_IE:ga_IE@euro", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
97 {"gd", "gd_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
98 {"gl", "gl_ES:gl_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
99 // {"gu", "gu", NULL},
100 {"gv", "gv_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
101 {"he", "he_IL", "ISO8859-8:CP1255:UTF-8"},
102 {"hi", "hi_IN", "ISCII-DEV:UTF-8"},
103 {"hr", "hr_HR", "ISO8859-2:UTF-8"},
104 {"hu", "hu_HU", "ISO8859-2:UTF-8"},
105 {"hy", "hy_AM", NULL},
106 {"id", "id_ID", NULL},
107 {"is", "is_IS", "ISO8859-1:ISO8859-15:UTF-8"},
108 {"it", "it_IT:it_IT@euro:it_CH", "ISO8859-1:ISO8859-15:UTF-8"},
109 {"ja", "ja_JP", "eucJP:EUC:SJIS:UTF-8"},
110 {"ka", "ka_GE", "GEORGIAN-ACADEMY:GEORGIAN-PS:UTF-8"},
111 // {"kk", "kk", NULL},
112 {"kl", "kl_GL", "ISO8859-1:ISO8859-15:UTF-8"},
113 // {"kn", "kn", NULL},
114 {"ko", "ko_KR", "eucKR:EUC:UTF-8"},
115 {"kw", "kw_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
116 {"lo", "lo_LA", "MULELAO-1:IBM-CP1133:UTF-8"},
117 {"lt", "lt_LT", "ISO8859-13:ISO8859-4:UTF-8"},
118 {"lv", "lv_LV", "ISO8859-13:UTF-8"},
119 {"mi", "mi_NZ", "ISO8859-1:ISO8859-5:ISO8859-13:UTF-8"},
120 {"mk", "mk_MK", "ISO8859-5:CP1251:UTF-8"},
121 // {"ml", "ml", NULL},
122 {"ms", "ms_MY", "ISO8859-1:UTF-8"},
123 {"mt", "mt_MT", "ISO8859-3:UTF-8"},
124 {"nb", "nb_NO", "ISO8859-1:ISO8859-15:UTF-8"},
125 {"nl", "nl_NL:nl_NL@euro:nl_BE:nl_BE@euro", "ISO8859-1:ISO8859-15:UTF-8"},
126 {"nn", "nn_NO", "ISO8859-1:ISO8859-15:UTF-8"},
127 {"no", "no_NO", "ISO8859-1:ISO8859-15:UTF-8"},
128 {"ny", "ny_NO", "ISO8859-1:ISO8859-15"},
129 {"oc", "oc_FR", "ISO8859-1:ISO8859-15:UTF-8"},
130 // {"or", "or", NULL},
131 // {"pa", "pa", NULL},
132 {"pd", "pd_DE", "ISO8859-1:ISO8859-15"},
133 {"ph", "ph_PH", "ISO8859-1"},
134 {"pl", "pl_PL", "ISO8859-2:UTF-8"},
135 {"pp", "pp_AN", "ISO8859-1"},
136 {"pt", "pt_PT:pt_PT@euro:pt_BR", "ISO8859-1:ISO8859-15:UTF-8"},
137 {"ro", "ro_RO", "ISO8859-2:UTF-8"},
138 {"ru", "ru_RU:ru_UA", "KOI8-R:ISO8859-5:CP1251:KOI8-U:UTF-8"},
139 {"sh", "sh_YU", "ISO8859-2:UTF-8"},
140 {"sk", "sk_SK", "ISO8859-2:UTF-8"},
141 {"sl", "sl_SI", "ISO8859-2:UTF-8"},
142 {"sp", "sp_YU", "ISO8859-5"},
143 {"sq", "sq_AL", "ISO8859-2:UTF-8"},
144 {"sr", "sr_YU:sr_YU@cyrillic:sr_SP", "ISO8859-2:ISO8859-5:CP1251:UTF-8"},
145 {"sv", "sv_SE:sv_SE@euro:sv_FI:sv_FI@euro", "ISO8859-1:ISO8859-15:UTF-8"},
146 {"ta", "ta_IN", "TSCII-0:UTF-8"},
147 {"te", "te_IN", "UTF-8"},
148 {"tg", "tg_TJ", "KOI8-C:KOI8-T:UTF-8"},
149 {"th", "th_TH", "ISO8859-11:TIS620:UTF-8"},
150 {"tl", "tl_PH", "ISO8859-1:UTF-8"},
151 {"tr", "tr_TR", "ISO8859-9:UTF-8"},
152 {"tt", "tt_RU", "TATAR-CYR:KOI8-C:UTF-8"},
153 {"uk", "uk_UA", "KOI8-U:ISO8859-5:CP1251:UTF-8"},
154 {"ur", "ur_PK", "CP1256:UTF-8"},
155 {"vi", "vi_VN", "TCVN:VISCII:UTF-8"},
156 {"wa", "wa_BE:wa_BE@euro", "ISO8859-1:ISO8859-15:UTF-8"},
157 {"yi", "yi_US", "CP1255:UTF-8"},
158 {"zh_CN", "zh_CN", "gb2312:eucCN:gbk:UTF-8"}, // from uim-py and uim-pyunihan
159 {"zh_TW:zh_HK", "zh_TW:zh_HK", "big5:eucTW:big5hkscs:UTF-8"}, // from uim-pinyin-big5
160 {"zh", "zh_CN:zh_TW:zh_HK", NULL}, // this entry must be here since its encoding is assigned as NULL
161 {NULL, NULL, NULL}
162 };
163
164
165 static char *
ustring_to_utf8_str(uString * s)166 ustring_to_utf8_str(uString *s)
167 {
168 uString::iterator i;
169 int l = 0, nbyte;
170 unsigned char utf8[6];
171 // count the length
172 for (i = s->begin(); i != s->end(); ++i) {
173 nbyte = utf8_wctomb(utf8, *i);
174 l += nbyte;
175 }
176 char *c = (char *)malloc(l + 1);
177 c[l] = 0;
178 l = 0;
179 for (i = s->begin(); i != s->end(); ++i) {
180 nbyte = utf8_wctomb(utf8, *i);
181 int j;
182 for (j = 0; j < nbyte; j++) {
183 c[l] = utf8[j];
184 l++;
185 }
186 }
187 return c;
188 }
189
~Locale()190 Locale::~Locale()
191 {
192 }
193
194 bool
supportOverTheSpot()195 Locale::supportOverTheSpot()
196 {
197 return false;
198 }
199
200 class UTF8_Locale : public Locale {
201 public:
202 UTF8_Locale(const char *encoding);
203 virtual ~UTF8_Locale();
204 virtual char *utf8_to_native_str(char *str);
uStringToCtext(uString * us)205 virtual char *uStringToCtext(uString *us) {
206 char *str = ustring_to_utf8_str(us);
207 XTextProperty prop;
208
209 if (!strcmp(mEncoding, "UTF-8")) {
210 XmbTextListToTextProperty(XimServer::gDpy, &str, 1,
211 XCompoundTextStyle, &prop);
212 free(str);
213 } else {
214 char *native_str;
215
216 native_str = utf8_to_native_str(str);
217 free(str);
218 if (!native_str)
219 return NULL;
220
221 XmbTextListToTextProperty(XimServer::gDpy, &native_str, 1,
222 XCompoundTextStyle, &prop);
223 free(native_str);
224 }
225 char *res = strdup((char *)prop.value);
226 XFree(prop.value);
227 return res;
228 }
supportOverTheSpot()229 virtual bool supportOverTheSpot() {
230 return true;
231 }
232 private:
233 char *mEncoding;
234 iconv_t m_iconv_cd;
235 };
236
UTF8_Locale(const char * encoding)237 UTF8_Locale::UTF8_Locale(const char *encoding)
238 {
239 mEncoding = strdup(encoding);
240 if (uim_iconv->is_convertible(encoding, "UTF-8"))
241 m_iconv_cd = (iconv_t)uim_iconv->create(encoding, "UTF-8");
242 else
243 m_iconv_cd = (iconv_t)-1;
244 }
245
~UTF8_Locale()246 UTF8_Locale::~UTF8_Locale()
247 {
248 free(mEncoding);
249 if (m_iconv_cd != (iconv_t)-1 && m_iconv_cd)
250 uim_iconv->release(m_iconv_cd);
251 }
252
utf8_to_native_str(char * utf8)253 char *UTF8_Locale::utf8_to_native_str(char *utf8)
254 {
255 char *str;
256
257 if (m_iconv_cd == (iconv_t)-1)
258 return NULL;
259
260 str = uim_iconv->convert(m_iconv_cd, utf8);
261
262 if (strlen(str) == 0) {
263 free(str);
264 return NULL;
265 }
266 return str;
267 }
268
269
270 static const char *
get_valid_locales(const char * locales)271 get_valid_locales(const char *locales)
272 {
273 char *valid_locales = NULL;
274 char *validated;
275 char *locale;
276 char *tmp, *tmpp;
277 int len = 0;
278
279 tmp = tmpp = strdup(locales);
280 char *orig_locale = strdup(setlocale(LC_CTYPE, NULL));
281
282 // locales is separated with ':'
283 while ((locale = strsep(&tmpp, ":")) != NULL) {
284 if (setlocale(LC_CTYPE, locale) != NULL) {
285 if (asprintf(&validated, "%s:", locale) == -1) {
286 free(validated);
287 continue;
288 }
289 len += static_cast<int>(strlen(validated));
290 if (valid_locales) {
291 valid_locales = (char *)realloc(valid_locales, len + 1);
292 strcat(valid_locales, validated);
293 } else
294 valid_locales = strdup(validated);
295
296 free(validated);
297 } else {
298 // retry with supplemental encodings
299 int i;
300 for (i = 0; locale_map[i].localename; i++) {
301 if (is_locale_included(locale_map[i].localename, locale))
302 break;
303 }
304 if (locale_map[i].supplemental_encoding) {
305 char *encs, *encsp, *encoding;
306 encs = encsp = strdup(locale_map[i].supplemental_encoding);
307
308 while ((encoding = strsep(&encsp, ":")) != NULL) {
309 char *test_locale = strdup(locale);
310 test_locale = (char *)realloc(test_locale, strlen(test_locale) + strlen(encoding) + 2);
311 strcat(test_locale, ".");
312 strcat(test_locale, encoding);
313
314 if (setlocale(LC_CTYPE, test_locale) != NULL) {
315 if (asprintf(&validated, "%s:", locale) == -1) {
316 free(validated);
317 continue;
318 }
319 len += static_cast<int>(strlen(validated));
320
321 if (valid_locales) {
322 valid_locales = (char *)realloc(valid_locales, len + 1);
323 strcat(valid_locales, validated);
324 } else
325 valid_locales = strdup(validated);
326
327 free(validated);
328 free(test_locale);
329 break;
330 } else
331 free(test_locale);
332 }
333 free(encs);
334 }
335 }
336 }
337 if (valid_locales)
338 valid_locales[len - 1] = '\0'; // remove trailing ':'
339 else
340 valid_locales = strdup(""); // There is no valid locale or im-lang is
341 // "". These im will be used with
342 // en_US.UTF-8.
343
344 setlocale(LC_CTYPE, orig_locale);
345 free(orig_locale);
346 free(tmp);
347
348 return valid_locales;
349 }
350
351 static const char *
all_locales(void)352 all_locales(void)
353 {
354 int i, len = 0;
355 char *locales = NULL, *tmp;
356 const char *valid_locales;
357
358 // check cache
359 if (all_locale_names)
360 return all_locale_names;
361
362 for (i = 0; locale_map[i].lang; i++) {
363 // exclude languages of which uim has its own version.
364 if (!strcmp(locale_map[i].lang, "zh"))
365 continue;
366
367 valid_locales = get_valid_locales(locale_map[i].localename);
368 if (!strcmp(valid_locales, "")) {
369 // There is no valid locale.
370 free((char *)valid_locales);
371 continue;
372 }
373
374 if (asprintf(&tmp, "%s:", valid_locales) == -1) {
375 free((char *)valid_locales);
376 free(tmp);
377 continue;
378 }
379 free((char *)valid_locales);
380
381 if (locales == NULL) {
382 len = static_cast<int>(strlen(tmp));
383 locales = strdup(tmp);
384 } else {
385 len += static_cast<int>(strlen(tmp));
386 locales = (char *)realloc(locales, len + 1);
387 strcat(locales, tmp);
388 }
389 free(tmp);
390 }
391 // remove trailing ":"
392 if (locales)
393 locales[len - 1] = '\0';
394
395 // assign result into the cache
396 all_locale_names = locales;
397
398 return locales;
399 }
400
401 const char *
compose_localenames_from_im_lang(const char * im_lang)402 compose_localenames_from_im_lang(const char *im_lang)
403 {
404 int i;
405 const char *name = NULL;
406
407 for (i = 0; locale_map[i].lang; i++) {
408 if (!strcmp(im_lang, locale_map[i].lang)) {
409 name = locale_map[i].localename;
410 break;
411 }
412 }
413
414 if (name == NULL) {
415 // No lang in locale_map.
416 if (!strcmp(im_lang, "*")) // im with lang "*" will be enabled for
417 // all locales
418 name = all_locales();
419 else if (!strcmp(im_lang, ""))
420 name = ""; // im with lang "" will be only enabled in UTF-8
421 // clients
422 else
423 name = "en_US"; // shouldn't happen
424 }
425
426 return name;
427 }
428
429 bool
is_locale_included(const char * locales,const char * locale)430 is_locale_included(const char *locales, const char *locale)
431 {
432 char *sep, *tmp, *first;
433 tmp = strdup(locales);
434 first = tmp;
435
436 while ((sep = strchr(tmp, ':')) != NULL) {
437 *sep = '\0';
438 if (!strcmp(tmp, locale)) {
439 free(first);
440 return true;
441 }
442 tmp = sep + 1;
443 }
444 if (!strcmp(tmp, locale)) {
445 free(first);
446 return true;
447 }
448 free(first);
449
450 return false;
451 }
452
453 char *
get_prefered_locale(const char * locales)454 get_prefered_locale(const char *locales)
455 {
456 char *valid_locales;
457 char *locale;
458 char *sep;
459
460 valid_locales = (char *)get_valid_locales(locales);
461 if (!strcmp(valid_locales, "")) {
462 // use en_US for im with lang "" and im without valid locale
463 free(valid_locales);
464 locale = strdup("en_US");
465 } else {
466 locale = valid_locales;
467 sep = strchr(locale, ':');
468 if (sep)
469 *sep = '\0';
470 }
471
472 return locale;
473 }
474
createLocale(const char * encoding)475 Locale *createLocale(const char *encoding)
476 {
477 return new UTF8_Locale(encoding);
478 }
479
480 int
utf8_mbtowc(uchar * wc,const unsigned char * src,int src_len)481 utf8_mbtowc(uchar *wc, const unsigned char *src, int src_len)
482 {
483 if (!wc)
484 return 0;
485
486 unsigned char c = src[0];
487 if (c < 0x80) {
488 *wc = c;
489 return 1;
490 } else if (c < 0xc2) {
491 return RET_ILSEQ;
492 } else if (c < 0xe0) {
493 if (src_len < 2)
494 return RET_TOOFEW(0);
495 if (!((src[1] ^ 0x80) < 0x40))
496 return RET_ILSEQ;
497 *wc = ((uchar)(c & 0x1f) << 6) | (uchar)(src[1] ^ 0x80);
498 return 2;
499 } else if (c < 0xf0) {
500 if (src_len < 3)
501 return RET_TOOFEW(0);
502 if (!((src[1] ^ 0x80) < 0x40 &&
503 (src[2] ^ 0x80) < 0x40 &&
504 (c >= 0xe1 || src[1] >= 0xa0)))
505 return RET_ILSEQ;
506 *wc = ((uchar)(c & 0x0f) << 12) |
507 ((uchar)(src[1] ^ 0x80) << 6) |
508 (uchar)(src[2] ^ 0x80);
509 return 3;
510 } else if (c < 0xf8) {
511 if (src_len < 4)
512 return RET_TOOFEW(0);
513 if (!((src[1] ^ 0x80) < 0x40 &&
514 (src[2] ^ 0x80) < 0x40 &&
515 (src[3] ^ 0x80) < 0x40 &&
516 (c >= 0xf1 || src[1] >= 0x90)))
517 return RET_ILSEQ;
518 *wc = ((uchar)(c & 0x07) << 18) |
519 ((uchar)(src[1] ^ 0x80) << 12) |
520 ((uchar)(src[2] ^ 0x80) << 6) |
521 (uchar)(src[3] ^ 0x80);
522 return 4;
523 } else if (c < 0xfc) {
524 if (src_len < 5)
525 return RET_TOOFEW(0);
526 if (!((src[1] ^ 0x80) < 0x40 &&
527 (src[2] ^ 0x80) < 0x40 &&
528 (src[3] ^ 0x80) < 0x40 &&
529 (src[4] ^ 0x80) < 0x40 &&
530 (c >= 0xf9 || src[1] >= 0x88)))
531 return RET_ILSEQ;
532 *wc = ((uchar)(c & 0x03) << 24) |
533 ((uchar)(src[1] ^ 0x80) << 18) |
534 ((uchar)(src[2] ^ 0x80) << 12) |
535 ((uchar)(src[3] ^ 0x80) << 6) |
536 (uchar)(src[4] ^ 0x80);
537 return 5;
538 } else if (c < 0xfe) {
539 if (src_len < 6)
540 return RET_TOOFEW(0);
541 if (!((src[1] ^ 0x80) < 0x40 &&
542 (src[2] ^ 0x80) < 0x40 &&
543 (src[3] ^ 0x80) < 0x40 &&
544 (src[4] ^ 0x80) < 0x40 &&
545 (src[5] ^ 0x80) < 0x40 &&
546 (c >= 0xfd || src[1] >= 0x84)))
547 return RET_ILSEQ;
548 *wc = ((uchar)(c & 0x01) << 30) |
549 ((uchar)(src[1] ^ 0x80) << 24) |
550 ((uchar)(src[2] ^ 0x80) << 18) |
551 ((uchar)(src[3] ^ 0x80) << 12) |
552 ((uchar)(src[4] ^ 0x80) << 6) |
553 (uchar)(src[5] ^ 0x80);
554 return 6;
555 } else
556 return RET_ILSEQ;
557 }
558
559 int
utf8_wctomb(unsigned char * dest,uchar wc)560 utf8_wctomb(unsigned char *dest, uchar wc)
561 {
562 if (!dest)
563 return 0;
564
565 int count;
566 if (wc < 0x80)
567 count = 1;
568 else if (wc < 0x800)
569 count = 2;
570 else if (wc < 0x10000)
571 count = 3;
572 else if (wc < 0x200000)
573 count = 4;
574 else if (wc < 0x4000000)
575 count = 5;
576 else if (wc <= 0x7fffffff)
577 count = 6;
578 else
579 return RET_ILSEQ;
580 switch (count) { // note: falls through cases (no break)
581 case 6:
582 dest[5] = (unsigned char)(0x80 | (wc & 0x3f));
583 wc = wc >> 6; wc |= 0x4000000;
584 case 5:
585 dest[4] = (unsigned char)(0x80 | (wc & 0x3f));
586 wc = wc >> 6; wc |= 0x200000;
587 case 4:
588 dest[3] = (unsigned char)(0x80 | (wc & 0x3f));
589 wc = wc >> 6; wc |= 0x10000;
590 case 3:
591 dest[2] = (unsigned char)(0x80 | (wc & 0x3f));
592 wc = wc >> 6; wc |= 0x800;
593 case 2:
594 dest[1] = (unsigned char)(0x80 | (wc & 0x3f));
595 wc = wc >> 6; wc |= 0xc0;
596 case 1:
597 dest[0] = (unsigned char)wc;
598 }
599 return count;
600 }
601