1 /*
2 
3   Copyright (c) 2003-2013 uim Project https://github.com/uim/uim
4 
5   All rights reserved.
6 
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions
9   are met:
10 
11   1. Redistributions of source code must retain the above copyright
12      notice, this list of conditions and the following disclaimer.
13   2. Redistributions in binary form must reproduce the above copyright
14      notice, this list of conditions and the following disclaimer in the
15      documentation and/or other materials provided with the distribution.
16   3. Neither the name of authors nor the names of its contributors
17      may be used to endorse or promote products derived from this software
18      without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30   SUCH DAMAGE.
31 */
32 
33 // Locale dependent routines
34 
35 #ifdef HAVE_CONFIG_H
36 # include <config.h>
37 #endif
38 
39 #include <cerrno>
40 #include <clocale>
41 #include <cstdio>
42 #include <cstring>
43 #include <iconv.h>
44 #include <X11/Xlib.h>
45 #include <X11/Xutil.h>
46 #include "ximserver.h"
47 #include "util.h"
48 #include "uim/uim-util.h"
49 #ifndef __GNUC__
50 # ifdef HAVE_ALLOCA_H
51 #  include <alloca.h>
52 # endif
53 #endif
54 
55 // Return code if invalid. (utf8_mbtowc, utf8_wctomb)
56 #define RET_ILSEQ	0
57 // Return code if only a shift sequence of n bytes was read. (utf8_mbtowc)
58 #define RET_TOOFEW(n)	(-1-(n))
59 
60 // Cache of all available locales in working system
61 static char *all_locale_names;
62 
63 // This table is composed from language of m17n-libs,
64 // locale.dir in /usr/X11R6/lib/locale, and im's language of uim.
65 static struct {
66     const char *lang;
67     const char *localename;
68     const char *supplemental_encoding;
69 } locale_map[] = {
70     {"af", "af_ZA", "ISO8859-1:UTF-8"},
71     {"am", "am_ET", "UTF-8"},
72     {"ar", "ar_AA:ar_BH:ar_DZ:ar_EG:ar_IQ:ar_JO:ar_KW:ar_LB:ar_LY:ar_MA:ar_OM:ar_QA:ar_SA:ar_SD:ar_SY:ar_TN:ar_YE", "ISO8859-6:UTF-8"},
73     // {"as", "as", NULL},
74     {"az", "az_AZ", "ISO8859-9E:UTF-8"},
75     {"be", "be_BY", "CP1251:UTF-8"},
76     {"bg", "bg_BG", "ISO8859-5:CP1251:KOI8-R:UTF-8"},
77     {"bn", "bn_BD:bn_IN", "UTF-8"},
78     // {"bo", "bo", NULL},
79     {"br", "br_FR:br_FR@euro", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
80     {"ca", "ca_ES:ca_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
81     {"cs", "cs_CZ", "ISO8859-2:UTF-8"},
82     {"cy", "cy_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
83     {"cz", "cz_CZ", "ISO8859-2"},
84     {"da", "da_DK", "ISO8859-1:ISO8859-15:UTF-8"},
85     {"de", "de_DE:de_DE@euro:de_AT:de_AT@euro:de_BE:de_BE@euro:de_CH:de_LI:de_LU:de_LU@euro", "ISO8859-1:ISO8859-15:UTF-8"},
86     {"el", "el_GR:el_GR@euro", "ISO8859-7:ISO8859-15:UTF-8"},
87     {"en", "en_US:en_AU:en_BE:en_BZ:en_BW:en_CA:en_GB:en_HK:en_IE:en_IE@euro:en_IN:en_JM:en_NZ:en_PH:en_SG:en_TT:en_UK:en_ZA", "ISO8859-1:ISO8859-15:UTF-8"},
88     {"eo", "eo_XX:eo_EO", "ISO8859-3"},
89     {"es", "es_ES:es_ES@euro:es_AR:es_BO:es_CL:es_CO:es_CR:es_DO:es_EC:es_GT:es_HN:es_MX:es_NI:es_PA:es_PE:es_PR:es_PY:es_SV:es_US:es_UY:es_VE", "ISO8859-1:ISO8859-15:UTF-8"},
90     {"et", "et_EE", "ISO8859-15:ISO8859-1:ISO8859-4:UTF-8"},
91     {"eu", "eu_ES:eu_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
92     {"fa", "fa_IR", "UTF-8:ISIRI-3342"},
93     {"fi", "fi_FI:fi_FI@euro", "ISO8859-15:ISO8859-1:UTF-8"},
94     {"fo", "fo_FO", "ISO8859-1:ISO8859-15:UTF-8"},
95     {"fr", "fr_FR:fr_FR@euro:fr_BE:fr_BE@euro:fr_CA:fr_CH:fr_LU:fr_LU@euro", "ISO8859-1:ISO8859-15:UTF-8"},
96     {"ga", "ga_IE:ga_IE@euro", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
97     {"gd", "gd_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
98     {"gl", "gl_ES:gl_ES@euro", "ISO8859-1:ISO8859-15:UTF-8"},
99     // {"gu", "gu", NULL},
100     {"gv", "gv_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
101     {"he", "he_IL", "ISO8859-8:CP1255:UTF-8"},
102     {"hi", "hi_IN", "ISCII-DEV:UTF-8"},
103     {"hr", "hr_HR", "ISO8859-2:UTF-8"},
104     {"hu", "hu_HU", "ISO8859-2:UTF-8"},
105     {"hy", "hy_AM", NULL},
106     {"id", "id_ID", NULL},
107     {"is", "is_IS", "ISO8859-1:ISO8859-15:UTF-8"},
108     {"it", "it_IT:it_IT@euro:it_CH", "ISO8859-1:ISO8859-15:UTF-8"},
109     {"ja", "ja_JP", "eucJP:EUC:SJIS:UTF-8"},
110     {"ka", "ka_GE", "GEORGIAN-ACADEMY:GEORGIAN-PS:UTF-8"},
111     // {"kk", "kk", NULL},
112     {"kl", "kl_GL", "ISO8859-1:ISO8859-15:UTF-8"},
113     // {"kn", "kn", NULL},
114     {"ko", "ko_KR", "eucKR:EUC:UTF-8"},
115     {"kw", "kw_GB", "ISO8859-1:ISO8859-14:ISO8859-15:UTF-8"},
116     {"lo", "lo_LA", "MULELAO-1:IBM-CP1133:UTF-8"},
117     {"lt", "lt_LT", "ISO8859-13:ISO8859-4:UTF-8"},
118     {"lv", "lv_LV", "ISO8859-13:UTF-8"},
119     {"mi", "mi_NZ", "ISO8859-1:ISO8859-5:ISO8859-13:UTF-8"},
120     {"mk", "mk_MK", "ISO8859-5:CP1251:UTF-8"},
121     // {"ml", "ml", NULL},
122     {"ms", "ms_MY", "ISO8859-1:UTF-8"},
123     {"mt", "mt_MT", "ISO8859-3:UTF-8"},
124     {"nb", "nb_NO", "ISO8859-1:ISO8859-15:UTF-8"},
125     {"nl", "nl_NL:nl_NL@euro:nl_BE:nl_BE@euro", "ISO8859-1:ISO8859-15:UTF-8"},
126     {"nn", "nn_NO", "ISO8859-1:ISO8859-15:UTF-8"},
127     {"no", "no_NO", "ISO8859-1:ISO8859-15:UTF-8"},
128     {"ny", "ny_NO", "ISO8859-1:ISO8859-15"},
129     {"oc", "oc_FR", "ISO8859-1:ISO8859-15:UTF-8"},
130     // {"or", "or", NULL},
131     // {"pa", "pa", NULL},
132     {"pd", "pd_DE", "ISO8859-1:ISO8859-15"},
133     {"ph", "ph_PH", "ISO8859-1"},
134     {"pl", "pl_PL", "ISO8859-2:UTF-8"},
135     {"pp", "pp_AN", "ISO8859-1"},
136     {"pt", "pt_PT:pt_PT@euro:pt_BR", "ISO8859-1:ISO8859-15:UTF-8"},
137     {"ro", "ro_RO", "ISO8859-2:UTF-8"},
138     {"ru", "ru_RU:ru_UA", "KOI8-R:ISO8859-5:CP1251:KOI8-U:UTF-8"},
139     {"sh", "sh_YU", "ISO8859-2:UTF-8"},
140     {"sk", "sk_SK", "ISO8859-2:UTF-8"},
141     {"sl", "sl_SI", "ISO8859-2:UTF-8"},
142     {"sp", "sp_YU", "ISO8859-5"},
143     {"sq", "sq_AL", "ISO8859-2:UTF-8"},
144     {"sr", "sr_YU:sr_YU@cyrillic:sr_SP", "ISO8859-2:ISO8859-5:CP1251:UTF-8"},
145     {"sv", "sv_SE:sv_SE@euro:sv_FI:sv_FI@euro", "ISO8859-1:ISO8859-15:UTF-8"},
146     {"ta", "ta_IN", "TSCII-0:UTF-8"},
147     {"te", "te_IN", "UTF-8"},
148     {"tg", "tg_TJ", "KOI8-C:KOI8-T:UTF-8"},
149     {"th", "th_TH", "ISO8859-11:TIS620:UTF-8"},
150     {"tl", "tl_PH", "ISO8859-1:UTF-8"},
151     {"tr", "tr_TR", "ISO8859-9:UTF-8"},
152     {"tt", "tt_RU", "TATAR-CYR:KOI8-C:UTF-8"},
153     {"uk", "uk_UA", "KOI8-U:ISO8859-5:CP1251:UTF-8"},
154     {"ur", "ur_PK", "CP1256:UTF-8"},
155     {"vi", "vi_VN", "TCVN:VISCII:UTF-8"},
156     {"wa", "wa_BE:wa_BE@euro", "ISO8859-1:ISO8859-15:UTF-8"},
157     {"yi", "yi_US", "CP1255:UTF-8"},
158     {"zh_CN", "zh_CN", "gb2312:eucCN:gbk:UTF-8"},	// from uim-py and uim-pyunihan
159     {"zh_TW:zh_HK", "zh_TW:zh_HK", "big5:eucTW:big5hkscs:UTF-8"},	// from uim-pinyin-big5
160     {"zh", "zh_CN:zh_TW:zh_HK", NULL},	// this entry must be here since its encoding is assigned as NULL
161     {NULL, NULL, NULL}
162 };
163 
164 
165 static char *
ustring_to_utf8_str(uString * s)166 ustring_to_utf8_str(uString *s)
167 {
168     uString::iterator i;
169     int l = 0, nbyte;
170     unsigned char utf8[6];
171     // count the length
172     for (i = s->begin(); i != s->end(); ++i) {
173 	nbyte = utf8_wctomb(utf8, *i);
174 	l += nbyte;
175     }
176     char *c = (char *)malloc(l + 1);
177     c[l] = 0;
178     l = 0;
179     for (i = s->begin(); i != s->end(); ++i) {
180 	nbyte = utf8_wctomb(utf8, *i);
181 	int j;
182 	for (j = 0; j < nbyte; j++) {
183 	    c[l] = utf8[j];
184 	    l++;
185 	}
186     }
187     return c;
188 }
189 
~Locale()190 Locale::~Locale()
191 {
192 }
193 
194 bool
supportOverTheSpot()195 Locale::supportOverTheSpot()
196 {
197     return false;
198 }
199 
200 class UTF8_Locale : public Locale {
201 public:
202     UTF8_Locale(const char *encoding);
203     virtual ~UTF8_Locale();
204     virtual char *utf8_to_native_str(char *str);
uStringToCtext(uString * us)205     virtual char *uStringToCtext(uString *us) {
206 	char *str = ustring_to_utf8_str(us);
207 	XTextProperty prop;
208 
209 	if (!strcmp(mEncoding, "UTF-8")) {
210 	    XmbTextListToTextProperty(XimServer::gDpy, &str, 1,
211 			    XCompoundTextStyle, &prop);
212 	    free(str);
213 	} else {
214 	    char *native_str;
215 
216 	    native_str = utf8_to_native_str(str);
217 	    free(str);
218 	    if (!native_str)
219 		return NULL;
220 
221 	    XmbTextListToTextProperty(XimServer::gDpy, &native_str, 1,
222 			    XCompoundTextStyle, &prop);
223 	    free(native_str);
224 	}
225 	char *res = strdup((char *)prop.value);
226 	XFree(prop.value);
227 	return res;
228     }
supportOverTheSpot()229     virtual bool supportOverTheSpot() {
230 	return true;
231     }
232 private:
233     char *mEncoding;
234     iconv_t m_iconv_cd;
235 };
236 
UTF8_Locale(const char * encoding)237 UTF8_Locale::UTF8_Locale(const char *encoding)
238 {
239     mEncoding = strdup(encoding);
240     if (uim_iconv->is_convertible(encoding, "UTF-8"))
241 	m_iconv_cd = (iconv_t)uim_iconv->create(encoding, "UTF-8");
242     else
243 	m_iconv_cd = (iconv_t)-1;
244 }
245 
~UTF8_Locale()246 UTF8_Locale::~UTF8_Locale()
247 {
248     free(mEncoding);
249     if (m_iconv_cd != (iconv_t)-1 && m_iconv_cd)
250         uim_iconv->release(m_iconv_cd);
251 }
252 
utf8_to_native_str(char * utf8)253 char *UTF8_Locale::utf8_to_native_str(char *utf8)
254 {
255     char *str;
256 
257     if (m_iconv_cd == (iconv_t)-1)
258 	return NULL;
259 
260     str = uim_iconv->convert(m_iconv_cd, utf8);
261 
262     if (strlen(str) == 0) {
263 	    free(str);
264 	    return NULL;
265     }
266     return str;
267 }
268 
269 
270 static const char *
get_valid_locales(const char * locales)271 get_valid_locales(const char *locales)
272 {
273     char *valid_locales = NULL;
274     char *validated;
275     char *locale;
276     char *tmp, *tmpp;
277     int len = 0;
278 
279     tmp = tmpp = strdup(locales);
280     char *orig_locale = strdup(setlocale(LC_CTYPE, NULL));
281 
282     // locales is separated with ':'
283     while ((locale = strsep(&tmpp, ":")) != NULL) {
284 	if (setlocale(LC_CTYPE, locale) != NULL) {
285 	    if (asprintf(&validated, "%s:", locale) == -1) {
286                 free(validated);
287                 continue;
288             }
289 	    len += static_cast<int>(strlen(validated));
290 	    if (valid_locales) {
291 		valid_locales = (char *)realloc(valid_locales, len + 1);
292 		strcat(valid_locales, validated);
293 	    } else
294 		valid_locales = strdup(validated);
295 
296 	    free(validated);
297 	} else {
298 	    // retry with supplemental encodings
299 	    int i;
300 	    for (i = 0; locale_map[i].localename; i++) {
301 		if (is_locale_included(locale_map[i].localename, locale))
302 		    break;
303 	    }
304 	    if (locale_map[i].supplemental_encoding) {
305 		char *encs, *encsp, *encoding;
306 		encs = encsp = strdup(locale_map[i].supplemental_encoding);
307 
308 		while ((encoding = strsep(&encsp, ":")) != NULL) {
309 		    char *test_locale = strdup(locale);
310 		    test_locale = (char *)realloc(test_locale, strlen(test_locale) + strlen(encoding) + 2);
311 		    strcat(test_locale, ".");
312 		    strcat(test_locale, encoding);
313 
314 		    if (setlocale(LC_CTYPE, test_locale) != NULL) {
315 			if (asprintf(&validated, "%s:", locale) == -1) {
316                             free(validated);
317                             continue;
318                         }
319 			len += static_cast<int>(strlen(validated));
320 
321 			if (valid_locales) {
322 			    valid_locales = (char *)realloc(valid_locales, len + 1);
323 			    strcat(valid_locales, validated);
324 			} else
325 			    valid_locales = strdup(validated);
326 
327 			free(validated);
328 			free(test_locale);
329 			break;
330 		    } else
331 			free(test_locale);
332 		}
333 		free(encs);
334 	    }
335 	}
336     }
337     if (valid_locales)
338 	valid_locales[len - 1] = '\0'; // remove trailing ':'
339     else
340 	valid_locales = strdup(""); // There is no valid locale or im-lang is
341     				    // "".  These im will be used with
342 				    // en_US.UTF-8.
343 
344     setlocale(LC_CTYPE, orig_locale);
345     free(orig_locale);
346     free(tmp);
347 
348     return valid_locales;
349 }
350 
351 static const char *
all_locales(void)352 all_locales(void)
353 {
354     int i, len = 0;
355     char *locales = NULL, *tmp;
356     const char *valid_locales;
357 
358     // check cache
359     if (all_locale_names)
360 	return all_locale_names;
361 
362     for (i = 0; locale_map[i].lang; i++) {
363 	// exclude languages of which uim has its own version.
364 	if (!strcmp(locale_map[i].lang, "zh"))
365 	    continue;
366 
367 	valid_locales = get_valid_locales(locale_map[i].localename);
368 	if (!strcmp(valid_locales, "")) {
369 	    // There is no valid locale.
370 	    free((char *)valid_locales);
371 	    continue;
372 	}
373 
374 	if (asprintf(&tmp, "%s:", valid_locales) == -1) {
375 	    free((char *)valid_locales);
376             free(tmp);
377             continue;
378         }
379 	free((char *)valid_locales);
380 
381 	if (locales == NULL) {
382 	    len = static_cast<int>(strlen(tmp));
383 	    locales = strdup(tmp);
384 	} else {
385 	    len += static_cast<int>(strlen(tmp));
386 	    locales = (char *)realloc(locales, len + 1);
387 	    strcat(locales, tmp);
388 	}
389 	free(tmp);
390     }
391     // remove trailing ":"
392     if (locales)
393 	locales[len - 1] = '\0';
394 
395     // assign result into the cache
396     all_locale_names = locales;
397 
398     return locales;
399 }
400 
401 const char *
compose_localenames_from_im_lang(const char * im_lang)402 compose_localenames_from_im_lang(const char *im_lang)
403 {
404     int i;
405     const char *name = NULL;
406 
407     for (i = 0; locale_map[i].lang; i++) {
408 	if (!strcmp(im_lang, locale_map[i].lang)) {
409 	    name = locale_map[i].localename;
410 	    break;
411 	}
412     }
413 
414     if (name == NULL) {
415 	// No lang in locale_map.
416 	if (!strcmp(im_lang, "*")) // im with lang "*" will be enabled for
417 				   // all locales
418 	    name = all_locales();
419 	else if (!strcmp(im_lang, ""))
420 	    name = "";	// im with lang "" will be only enabled in UTF-8
421 			// clients
422 	else
423 	    name = "en_US";	// shouldn't happen
424     }
425 
426     return name;
427 }
428 
429 bool
is_locale_included(const char * locales,const char * locale)430 is_locale_included(const char *locales, const char *locale)
431 {
432     char *sep, *tmp, *first;
433     tmp = strdup(locales);
434     first = tmp;
435 
436     while ((sep = strchr(tmp, ':')) != NULL) {
437 	*sep = '\0';
438 	if (!strcmp(tmp, locale)) {
439 	    free(first);
440 	    return true;
441 	}
442 	tmp = sep + 1;
443     }
444     if (!strcmp(tmp, locale)) {
445 	free(first);
446 	return true;
447     }
448     free(first);
449 
450     return false;
451 }
452 
453 char *
get_prefered_locale(const char * locales)454 get_prefered_locale(const char *locales)
455 {
456     char *valid_locales;
457     char *locale;
458     char *sep;
459 
460     valid_locales = (char *)get_valid_locales(locales);
461     if (!strcmp(valid_locales, "")) {
462 	// use en_US for im with lang "" and im without valid locale
463 	free(valid_locales);
464 	locale = strdup("en_US");
465     } else {
466 	locale = valid_locales;
467 	sep = strchr(locale, ':');
468 	if (sep)
469 	    *sep = '\0';
470     }
471 
472     return locale;
473 }
474 
createLocale(const char * encoding)475 Locale *createLocale(const char *encoding)
476 {
477     return new UTF8_Locale(encoding);
478 }
479 
480 int
utf8_mbtowc(uchar * wc,const unsigned char * src,int src_len)481 utf8_mbtowc(uchar *wc, const unsigned char *src, int src_len)
482 {
483     if (!wc)
484 	return 0;
485 
486     unsigned char c = src[0];
487     if (c < 0x80) {
488 	*wc = c;
489 	return 1;
490     } else if (c < 0xc2) {
491 	return RET_ILSEQ;
492     } else if (c < 0xe0) {
493 	if (src_len < 2)
494 	    return RET_TOOFEW(0);
495 	if (!((src[1] ^ 0x80) < 0x40))
496 	    return RET_ILSEQ;
497 	*wc = ((uchar)(c & 0x1f) << 6) | (uchar)(src[1] ^ 0x80);
498 	return 2;
499     } else if (c < 0xf0) {
500 	if (src_len < 3)
501 	    return RET_TOOFEW(0);
502 	if (!((src[1] ^ 0x80) < 0x40 &&
503 	      (src[2] ^ 0x80) < 0x40 &&
504 	      (c >= 0xe1 || src[1] >= 0xa0)))
505 	    return RET_ILSEQ;
506 	*wc = ((uchar)(c & 0x0f) << 12) |
507 	      ((uchar)(src[1] ^ 0x80) << 6) |
508 	      (uchar)(src[2] ^ 0x80);
509 	return 3;
510     } else if (c < 0xf8) {
511 	if (src_len < 4)
512 	    return RET_TOOFEW(0);
513 	if (!((src[1] ^ 0x80) < 0x40 &&
514 	      (src[2] ^ 0x80) < 0x40 &&
515 	      (src[3] ^ 0x80) < 0x40 &&
516 	      (c >= 0xf1 || src[1] >= 0x90)))
517 	    return RET_ILSEQ;
518 	*wc = ((uchar)(c & 0x07) << 18) |
519 	      ((uchar)(src[1] ^ 0x80) << 12) |
520 	      ((uchar)(src[2] ^ 0x80) << 6) |
521 	      (uchar)(src[3] ^ 0x80);
522 	return 4;
523     } else if (c < 0xfc) {
524 	if (src_len < 5)
525 	    return RET_TOOFEW(0);
526 	if (!((src[1] ^ 0x80) < 0x40 &&
527 	      (src[2] ^ 0x80) < 0x40 &&
528 	      (src[3] ^ 0x80) < 0x40 &&
529 	      (src[4] ^ 0x80) < 0x40 &&
530 	      (c >= 0xf9 || src[1] >= 0x88)))
531 	    return RET_ILSEQ;
532 	*wc = ((uchar)(c & 0x03) << 24) |
533 	      ((uchar)(src[1] ^ 0x80) << 18) |
534 	      ((uchar)(src[2] ^ 0x80) << 12) |
535 	      ((uchar)(src[3] ^ 0x80) << 6) |
536 	      (uchar)(src[4] ^ 0x80);
537 	return 5;
538     } else if (c < 0xfe) {
539 	if (src_len < 6)
540 	    return RET_TOOFEW(0);
541 	if (!((src[1] ^ 0x80) < 0x40 &&
542 	      (src[2] ^ 0x80) < 0x40 &&
543 	      (src[3] ^ 0x80) < 0x40 &&
544 	      (src[4] ^ 0x80) < 0x40 &&
545 	      (src[5] ^ 0x80) < 0x40 &&
546 	      (c >= 0xfd || src[1] >= 0x84)))
547 	    return RET_ILSEQ;
548 	*wc = ((uchar)(c & 0x01) << 30) |
549 	      ((uchar)(src[1] ^ 0x80) << 24) |
550 	      ((uchar)(src[2] ^ 0x80) << 18) |
551 	      ((uchar)(src[3] ^ 0x80) << 12) |
552 	      ((uchar)(src[4] ^ 0x80) << 6) |
553 	      (uchar)(src[5] ^ 0x80);
554 	return 6;
555     } else
556 	return RET_ILSEQ;
557 }
558 
559 int
utf8_wctomb(unsigned char * dest,uchar wc)560 utf8_wctomb(unsigned char *dest, uchar wc)
561 {
562     if (!dest)
563 	return 0;
564 
565     int count;
566     if (wc < 0x80)
567 	count = 1;
568     else if (wc < 0x800)
569 	count = 2;
570     else if (wc < 0x10000)
571 	count = 3;
572     else if (wc < 0x200000)
573 	count = 4;
574     else if (wc < 0x4000000)
575 	count = 5;
576     else if (wc <= 0x7fffffff)
577 	count = 6;
578     else
579 	return RET_ILSEQ;
580     switch (count) { // note: falls through cases (no break)
581     case 6:
582 	dest[5] = (unsigned char)(0x80 | (wc & 0x3f));
583 	wc = wc >> 6; wc |= 0x4000000;
584     case 5:
585 	dest[4] = (unsigned char)(0x80 | (wc & 0x3f));
586 	wc = wc >> 6; wc |= 0x200000;
587     case 4:
588 	dest[3] = (unsigned char)(0x80 | (wc & 0x3f));
589 	wc = wc >> 6; wc |= 0x10000;
590     case 3:
591 	dest[2] = (unsigned char)(0x80 | (wc & 0x3f));
592 	wc = wc >> 6; wc |= 0x800;
593     case 2:
594 	dest[1] = (unsigned char)(0x80 | (wc & 0x3f));
595 	wc = wc >> 6; wc |= 0xc0;
596     case 1:
597 	dest[0] = (unsigned char)wc;
598     }
599     return count;
600 }
601