1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4
5 INDEX
6 setlocale
7 INDEX
8 localeconv
9 INDEX
10 _setlocale_r
11 INDEX
12 _localeconv_r
13
14 ANSI_SYNOPSIS
15 #include <locale.h>
16 char *setlocale(int <[category]>, const char *<[locale]>);
17 lconv *localeconv(void);
18
19 char *_setlocale_r(void *<[reent]>,
20 int <[category]>, const char *<[locale]>);
21 lconv *_localeconv_r(void *<[reent]>);
22
23 TRAD_SYNOPSIS
24 #include <locale.h>
25 char *setlocale(<[category]>, <[locale]>)
26 int <[category]>;
27 char *<[locale]>;
28
29 lconv *localeconv();
30
31 char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
32 char *<[reent]>;
33 int <[category]>;
34 char *<[locale]>;
35
36 lconv *_localeconv_r(<[reent]>);
37 char *<[reent]>;
38
39 DESCRIPTION
40 <<setlocale>> is the facility defined by ANSI C to condition the
41 execution environment for international collating and formatting
42 information; <<localeconv>> reports on the settings of the current
43 locale.
44
45 This is a minimal implementation, supporting only the required <<"POSIX">>
46 and <<"C">> values for <[locale]>; strings representing other locales are not
47 honored unless _MB_CAPABLE is defined.
48
49 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
50 the form
51
52 language[_TERRITORY][.charset][@@modifier]
53
54 <<"language">> is a two character string per ISO 639, or, if not available
55 for a given language, a three character string per ISO 639-3.
56 <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
57 <<"modifier">> see below.
58
59 Additionally to the POSIX specifier, the following extension is supported
60 for backward compatibility with older implementations using newlib:
61 <<"C-charset">>.
62 Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
63 to specify language neutral locales while using other charsets than ASCII,
64 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
65 but uses the UTF-8 charset.
66
67 The following charsets are recognized:
68 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
69 <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
70 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
71 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
72 1257, 1258].
73
74 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
75 are equivalent. Charset names with dashes can also be written without
76 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
77 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
78
79 Full support for all of the above charsets requires that newlib has been
80 build with multibyte support and support for all ISO and Windows Codepage.
81 Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
82 only newlib for Cygwin is built with full charset support by default.
83 Under Cygwin, this implementation additionally supports the charsets
84 <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>. Cygwin
85 does not support <<"JIS">>.
86
87 Cygwin additionally supports locales from the file
88 /usr/share/locale/locale.alias.
89
90 (<<"">> is also accepted; if given, the settings are read from the
91 corresponding LC_* environment variables and $LANG according to POSIX rules.)
92
93 This implementation also supports the modifier <<"cjknarrow">>, which
94 affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
95 from the "CJK Ambiguous Width" category of characters described at
96 http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
97 of 1 for singlebyte charsets and a width of 2 for multibyte charsets
98 other than UTF-8. For UTF-8, their width depends on the language specifier:
99 it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
100 and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
101 independent of charset and language.
102
103 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
104 pointer to the string representing the current locale. The acceptable
105 values for <[category]> are defined in `<<locale.h>>' as macros
106 beginning with <<"LC_">>.
107
108 <<localeconv>> returns a pointer to a structure (also defined in
109 `<<locale.h>>') describing the locale-specific conventions currently
110 in effect.
111
112 <<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
113 <<localeconv>> and <<setlocale>> respectively. The extra argument
114 <[reent]> is a pointer to a reentrancy structure.
115
116 RETURNS
117 A successful call to <<setlocale>> returns a pointer to a string
118 associated with the specified category for the new locale. The string
119 returned by <<setlocale>> is such that a subsequent call using that
120 string will restore that category (or all categories in case of LC_ALL),
121 to that state. The application shall not modify the string returned
122 which may be overwritten by a subsequent call to <<setlocale>>.
123 On error, <<setlocale>> returns <<NULL>>.
124
125 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
126 which describes the formatting and collating conventions in effect (in
127 this implementation, always those of the C locale).
128
129 PORTABILITY
130 ANSI C requires <<setlocale>>, but the only locale required across all
131 implementations is the C locale.
132
133 NOTES
134 There is no ISO-8859-12 codepage. It's also refused by this implementation.
135
136 No supporting OS subroutines are required.
137 */
138
139 /* Parts of this code are originally taken from FreeBSD. */
140 /*
141 * Copyright (c) 1996 - 2002 FreeBSD Project
142 * Copyright (c) 1991, 1993
143 * The Regents of the University of California. All rights reserved.
144 *
145 * This code is derived from software contributed to Berkeley by
146 * Paul Borman at Krystal Technologies.
147 *
148 * Redistribution and use in source and binary forms, with or without
149 * modification, are permitted provided that the following conditions
150 * are met:
151 * 1. Redistributions of source code must retain the above copyright
152 * notice, this list of conditions and the following disclaimer.
153 * 2. Redistributions in binary form must reproduce the above copyright
154 * notice, this list of conditions and the following disclaimer in the
155 * documentation and/or other materials provided with the distribution.
156 * 4. Neither the name of the University nor the names of its contributors
157 * may be used to endorse or promote products derived from this software
158 * without specific prior written permission.
159 *
160 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
161 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
162 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
163 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
164 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
165 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
166 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
167 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
168 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
169 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
170 * SUCH DAMAGE.
171 */
172
173 #include <newlib.h>
174 #include <errno.h>
175 #include <locale.h>
176 #include <string.h>
177 #include <limits.h>
178 #include <reent.h>
179 #include <stdlib.h>
180 #include <wchar.h>
181 #include "lmessages.h"
182 #include "lmonetary.h"
183 #include "lnumeric.h"
184 #include "lctype.h"
185 #include "timelocal.h"
186 #include "../stdlib/local.h"
187
188 #define _LC_LAST 7
189 #define ENCODING_LEN 31
190
191 #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
192 int __EXPORT __mb_cur_max = 6;
193 #else
194 int __EXPORT __mb_cur_max = 1;
195 #endif
196
197 int __nlocale_changed = 0;
198 int __mlocale_changed = 0;
199 char *_PathLocale = NULL;
200
201 static
202 struct lconv lconv =
203 {
204 ".", "", "", "", "", "", "", "", "", "",
205 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
206 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
207 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
208 CHAR_MAX, CHAR_MAX
209 };
210
211 #ifdef _MB_CAPABLE
212 /*
213 * Category names for getenv()
214 */
215 static char *categories[_LC_LAST] = {
216 "LC_ALL",
217 "LC_COLLATE",
218 "LC_CTYPE",
219 "LC_MONETARY",
220 "LC_NUMERIC",
221 "LC_TIME",
222 "LC_MESSAGES",
223 };
224
225 /*
226 * Default locale per POSIX. Can be overridden on a per-target base.
227 */
228 #ifndef DEFAULT_LOCALE
229 #define DEFAULT_LOCALE "C"
230 #endif
231 /*
232 * This variable can be changed by any outside mechanism. This allows,
233 * for instance, to load the default locale from a file.
234 */
235 char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
236
237 /*
238 * Current locales for each category
239 */
240 static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
241 "C",
242 "C",
243 #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
244 "C.UTF-8",
245 #else
246 "C",
247 #endif
248 "C",
249 "C",
250 "C",
251 "C",
252 };
253
254 /*
255 * The locales we are going to try and load
256 */
257 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
258 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
259
260 static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
261 static char *currentlocale(void);
262 static char *loadlocale(struct _reent *, int);
263 static const char *__get_locale_env(struct _reent *, int);
264
265 #endif /* _MB_CAPABLE */
266
267 #ifdef __CYGWIN__
268 static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
269 #else
270 static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
271 #endif
272 static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
273 static int lc_ctype_cjk_lang = 0;
274
275 char *
276 _DEFUN(_setlocale_r, (p, category, locale),
277 struct _reent *p _AND
278 int category _AND
279 _CONST char *locale)
280 {
281 #ifndef _MB_CAPABLE
282 if (locale)
283 {
284 if (strcmp (locale, "POSIX") && strcmp (locale, "C")
285 && strcmp (locale, ""))
286 return NULL;
287 }
288 return "C";
289 #else /* !_MB_CAPABLE */
290 int i, j, len, saverr;
291 const char *env, *r;
292
293 if (category < LC_ALL || category >= _LC_LAST)
294 {
295 p->_errno = EINVAL;
296 return NULL;
297 }
298
299 if (locale == NULL)
300 return category != LC_ALL ? current_categories[category] : currentlocale();
301
302 /*
303 * Default to the current locale for everything.
304 */
305 for (i = 1; i < _LC_LAST; ++i)
306 strcpy (new_categories[i], current_categories[i]);
307
308 /*
309 * Now go fill up new_categories from the locale argument
310 */
311 if (!*locale)
312 {
313 if (category == LC_ALL)
314 {
315 for (i = 1; i < _LC_LAST; ++i)
316 {
317 env = __get_locale_env (p, i);
318 if (strlen (env) > ENCODING_LEN)
319 {
320 p->_errno = EINVAL;
321 return NULL;
322 }
323 strcpy (new_categories[i], env);
324 }
325 }
326 else
327 {
328 env = __get_locale_env (p, category);
329 if (strlen (env) > ENCODING_LEN)
330 {
331 p->_errno = EINVAL;
332 return NULL;
333 }
334 strcpy (new_categories[category], env);
335 }
336 }
337 else if (category != LC_ALL)
338 {
339 if (strlen (locale) > ENCODING_LEN)
340 {
341 p->_errno = EINVAL;
342 return NULL;
343 }
344 strcpy (new_categories[category], locale);
345 }
346 else
347 {
348 if ((r = strchr (locale, '/')) == NULL)
349 {
350 if (strlen (locale) > ENCODING_LEN)
351 {
352 p->_errno = EINVAL;
353 return NULL;
354 }
355 for (i = 1; i < _LC_LAST; ++i)
356 strcpy (new_categories[i], locale);
357 }
358 else
359 {
360 for (i = 1; r[1] == '/'; ++r)
361 ;
362 if (!r[1])
363 {
364 p->_errno = EINVAL;
365 return NULL; /* Hmm, just slashes... */
366 }
367 do
368 {
369 if (i == _LC_LAST)
370 break; /* Too many slashes... */
371 if ((len = r - locale) > ENCODING_LEN)
372 {
373 p->_errno = EINVAL;
374 return NULL;
375 }
376 strlcpy (new_categories[i], locale, len + 1);
377 i++;
378 while (*r == '/')
379 r++;
380 locale = r;
381 while (*r && *r != '/')
382 r++;
383 }
384 while (*locale);
385 while (i < _LC_LAST)
386 {
387 strcpy (new_categories[i], new_categories[i-1]);
388 i++;
389 }
390 }
391 }
392
393 if (category != LC_ALL)
394 return loadlocale (p, category);
395
396 for (i = 1; i < _LC_LAST; ++i)
397 {
398 strcpy (saved_categories[i], current_categories[i]);
399 if (loadlocale (p, i) == NULL)
400 {
401 saverr = p->_errno;
402 for (j = 1; j < i; j++)
403 {
404 strcpy (new_categories[j], saved_categories[j]);
405 if (loadlocale (p, j) == NULL)
406 {
407 strcpy (new_categories[j], "C");
408 loadlocale (p, j);
409 }
410 }
411 p->_errno = saverr;
412 return NULL;
413 }
414 }
415 return currentlocale ();
416 #endif /* !_MB_CAPABLE */
417 }
418
419 #ifdef _MB_CAPABLE
420 static char *
currentlocale()421 currentlocale()
422 {
423 int i;
424
425 (void)strcpy(current_locale_string, current_categories[1]);
426
427 for (i = 2; i < _LC_LAST; ++i)
428 if (strcmp(current_categories[1], current_categories[i])) {
429 for (i = 2; i < _LC_LAST; ++i) {
430 (void)strcat(current_locale_string, "/");
431 (void)strcat(current_locale_string,
432 current_categories[i]);
433 }
434 break;
435 }
436 return (current_locale_string);
437 }
438 #endif /* _MB_CAPABLE */
439
440 #ifdef _MB_CAPABLE
441 #ifdef __CYGWIN__
442 extern void __set_charset_from_locale (const char *locale, char *charset);
443 extern char *__set_locale_from_locale_alias (const char *, char *);
444 extern int __collate_load_locale (const char *, void *, const char *);
445 #endif /* __CYGWIN__ */
446
447 extern void __set_ctype (const char *charset);
448
449 static char *
loadlocale(struct _reent * p,int category)450 loadlocale(struct _reent *p, int category)
451 {
452 /* At this point a full-featured system would just load the locale
453 specific data from the locale files.
454 What we do here for now is to check the incoming string for correctness.
455 The string must be in one of the allowed locale strings, either
456 one in POSIX-style, or one in the old newlib style to maintain
457 backward compatibility. If the local string is correct, the charset
458 is extracted and stored in lc_ctype_charset or lc_message_charset
459 dependent on the cateogry. */
460 char *locale = NULL;
461 char charset[ENCODING_LEN + 1];
462 unsigned long val;
463 char *end, *c = NULL;
464 int mbc_max;
465 int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
466 int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
467 const char *, mbstate_t *);
468 int cjknarrow = 0;
469
470 /* Avoid doing everything twice if nothing has changed. */
471 if (!strcmp (new_categories[category], current_categories[category]))
472 return current_categories[category];
473
474 #ifdef __CYGWIN__
475 /* This additional code handles the case that the incoming locale string
476 is not valid. If so, it calls the function __set_locale_from_locale_alias,
477 which is only available on Cygwin right now. The function reads the
478 file /usr/share/locale/locale.alias. The file contains locale aliases
479 and their replacement locale. For instance, the alias "french" is
480 translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
481 "th_TH.TIS-620". If successful, the function returns with a pointer
482 to the second argument, which is a buffer in which the replacement locale
483 gets stored. Otherwise the function returns NULL. */
484 char tmp_locale[ENCODING_LEN + 1];
485 int ret = 0;
486
487 restart:
488 if (!locale)
489 locale = new_categories[category];
490 else if (locale != tmp_locale)
491 {
492 locale = __set_locale_from_locale_alias (locale, tmp_locale);
493 if (!locale)
494 return NULL;
495 }
496 # define FAIL goto restart
497 #else
498 locale = new_categories[category];
499 # define FAIL return NULL
500 #endif
501
502 /* "POSIX" is translated to "C", as on Linux. */
503 if (!strcmp (locale, "POSIX"))
504 strcpy (locale, "C");
505 if (!strcmp (locale, "C")) /* Default "C" locale */
506 strcpy (charset, "ASCII");
507 else if (locale[0] == 'C'
508 && (locale[1] == '-' /* Old newlib style */
509 || locale[1] == '.')) /* Extension for the C locale to allow
510 specifying different charsets while
511 sticking to the C locale in terms
512 of sort order, etc. Proposed in
513 the Debian project. */
514 {
515 char *chp;
516
517 c = locale + 2;
518 strcpy (charset, c);
519 if ((chp = strchr (charset, '@')))
520 /* Strip off modifier */
521 *chp = '\0';
522 c += strlen (charset);
523 }
524 else /* POSIX style */
525 {
526 c = locale;
527
528 /* Don't use ctype macros here, they might be localized. */
529 /* Language */
530 if (c[0] < 'a' || c[0] > 'z'
531 || c[1] < 'a' || c[1] > 'z')
532 FAIL;
533 c += 2;
534 /* Allow three character Language per ISO 639-3 */
535 if (c[0] >= 'a' && c[0] <= 'z')
536 ++c;
537 if (c[0] == '_')
538 {
539 /* Territory */
540 ++c;
541 if (c[0] < 'A' || c[0] > 'Z'
542 || c[1] < 'A' || c[1] > 'Z')
543 FAIL;
544 c += 2;
545 }
546 if (c[0] == '.')
547 {
548 /* Charset */
549 char *chp;
550
551 ++c;
552 strcpy (charset, c);
553 if ((chp = strchr (charset, '@')))
554 /* Strip off modifier */
555 *chp = '\0';
556 c += strlen (charset);
557 }
558 else if (c[0] == '\0' || c[0] == '@')
559 /* End of string or just a modifier */
560 #ifdef __CYGWIN__
561 /* The Cygwin-only function __set_charset_from_locale checks
562 for the default charset which is connected to the given locale.
563 The function uses Windows functions in turn so it can't be easily
564 adapted to other targets. However, if any other target provides
565 equivalent functionality, preferrably using the same function name
566 it would be sufficient to change the guarding #ifdef. */
567 __set_charset_from_locale (locale, charset);
568 #else
569 strcpy (charset, "ISO-8859-1");
570 #endif
571 else
572 /* Invalid string */
573 FAIL;
574 }
575 if (c && c[0] == '@')
576 {
577 /* Modifier */
578 /* Only one modifier is recognized right now. "cjknarrow" is used
579 to modify the behaviour of wcwidth() for East Asian languages.
580 For details see the comment at the end of this function. */
581 if (!strcmp (c + 1, "cjknarrow"))
582 cjknarrow = 1;
583 }
584 /* We only support this subset of charsets. */
585 switch (charset[0])
586 {
587 case 'U':
588 case 'u':
589 if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
590 FAIL;
591 strcpy (charset, "UTF-8");
592 mbc_max = 6;
593 l_wctomb = __utf8_wctomb;
594 l_mbtowc = __utf8_mbtowc;
595 break;
596 #ifndef __CYGWIN__
597 /* Cygwin does not support JIS at all. */
598 case 'J':
599 case 'j':
600 if (strcasecmp (charset, "JIS"))
601 FAIL;
602 strcpy (charset, "JIS");
603 mbc_max = 8;
604 l_wctomb = __jis_wctomb;
605 l_mbtowc = __jis_mbtowc;
606 break;
607 #endif /* !__CYGWIN__ */
608 case 'E':
609 case 'e':
610 if (strncasecmp (charset, "EUC", 3))
611 FAIL;
612 c = charset + 3;
613 if (*c == '-')
614 ++c;
615 if (!strcasecmp (c, "JP"))
616 {
617 strcpy (charset, "EUCJP");
618 mbc_max = 3;
619 l_wctomb = __eucjp_wctomb;
620 l_mbtowc = __eucjp_mbtowc;
621 }
622 #ifdef __CYGWIN__
623 /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
624 implementation requires Windows support. */
625 else if (!strcasecmp (c, "KR"))
626 {
627 strcpy (charset, "EUCKR");
628 mbc_max = 2;
629 l_wctomb = __kr_wctomb;
630 l_mbtowc = __kr_mbtowc;
631 }
632 else if (!strcasecmp (c, "CN"))
633 {
634 strcpy (charset, "EUCCN");
635 mbc_max = 2;
636 l_wctomb = __gbk_wctomb;
637 l_mbtowc = __gbk_mbtowc;
638 }
639 #endif /* __CYGWIN__ */
640 else
641 FAIL;
642 break;
643 case 'S':
644 case 's':
645 if (strcasecmp (charset, "SJIS"))
646 FAIL;
647 strcpy (charset, "SJIS");
648 mbc_max = 2;
649 l_wctomb = __sjis_wctomb;
650 l_mbtowc = __sjis_mbtowc;
651 break;
652 case 'I':
653 case 'i':
654 /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
655 ISO-8859-12. This code also recognizes the aliases without dashes. */
656 if (strncasecmp (charset, "ISO", 3))
657 FAIL;
658 c = charset + 3;
659 if (*c == '-')
660 ++c;
661 if (strncasecmp (c, "8859", 4))
662 FAIL;
663 c += 4;
664 if (*c == '-')
665 ++c;
666 val = _strtol_r (p, c, &end, 10);
667 if (val < 1 || val > 16 || val == 12 || *end)
668 FAIL;
669 strcpy (charset, "ISO-8859-");
670 c = charset + 9;
671 if (val > 10)
672 *c++ = '1';
673 *c++ = val % 10 + '0';
674 *c = '\0';
675 mbc_max = 1;
676 #ifdef _MB_EXTENDED_CHARSETS_ISO
677 l_wctomb = __iso_wctomb;
678 l_mbtowc = __iso_mbtowc;
679 #else /* !_MB_EXTENDED_CHARSETS_ISO */
680 l_wctomb = __ascii_wctomb;
681 l_mbtowc = __ascii_mbtowc;
682 #endif /* _MB_EXTENDED_CHARSETS_ISO */
683 break;
684 case 'C':
685 case 'c':
686 if (charset[1] != 'P' && charset[1] != 'p')
687 FAIL;
688 strncpy (charset, "CP", 2);
689 val = _strtol_r (p, charset + 2, &end, 10);
690 if (*end)
691 FAIL;
692 switch (val)
693 {
694 case 437:
695 case 720:
696 case 737:
697 case 775:
698 case 850:
699 case 852:
700 case 855:
701 case 857:
702 case 858:
703 case 862:
704 case 866:
705 case 874:
706 case 1125:
707 case 1250:
708 case 1251:
709 case 1252:
710 case 1253:
711 case 1254:
712 case 1255:
713 case 1256:
714 case 1257:
715 case 1258:
716 mbc_max = 1;
717 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
718 l_wctomb = __cp_wctomb;
719 l_mbtowc = __cp_mbtowc;
720 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
721 l_wctomb = __ascii_wctomb;
722 l_mbtowc = __ascii_mbtowc;
723 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
724 break;
725 case 932:
726 mbc_max = 2;
727 l_wctomb = __sjis_wctomb;
728 l_mbtowc = __sjis_mbtowc;
729 break;
730 default:
731 FAIL;
732 }
733 break;
734 case 'K':
735 case 'k':
736 /* KOI8-R, KOI8-U and the aliases without dash */
737 if (strncasecmp (charset, "KOI8", 4))
738 FAIL;
739 c = charset + 4;
740 if (*c == '-')
741 ++c;
742 if (*c == 'R' || *c == 'r')
743 strcpy (charset, "CP20866");
744 else if (*c == 'U' || *c == 'u')
745 strcpy (charset, "CP21866");
746 else
747 FAIL;
748 mbc_max = 1;
749 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
750 l_wctomb = __cp_wctomb;
751 l_mbtowc = __cp_mbtowc;
752 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
753 l_wctomb = __ascii_wctomb;
754 l_mbtowc = __ascii_mbtowc;
755 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
756 break;
757 case 'A':
758 case 'a':
759 if (strcasecmp (charset, "ASCII"))
760 FAIL;
761 strcpy (charset, "ASCII");
762 mbc_max = 1;
763 l_wctomb = __ascii_wctomb;
764 l_mbtowc = __ascii_mbtowc;
765 break;
766 case 'G':
767 case 'g':
768 #ifdef __CYGWIN__
769 /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
770 requires Windows support. */
771 if (!strcasecmp (charset, "GBK")
772 || !strcasecmp (charset, "GB2312"))
773 {
774 strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
775 mbc_max = 2;
776 l_wctomb = __gbk_wctomb;
777 l_mbtowc = __gbk_mbtowc;
778 }
779 else
780 #endif /* __CYGWIN__ */
781 /* GEORGIAN-PS and the alias without dash */
782 if (!strncasecmp (charset, "GEORGIAN", 8))
783 {
784 c = charset + 8;
785 if (*c == '-')
786 ++c;
787 if (strcasecmp (c, "PS"))
788 FAIL;
789 strcpy (charset, "CP101");
790 mbc_max = 1;
791 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
792 l_wctomb = __cp_wctomb;
793 l_mbtowc = __cp_mbtowc;
794 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
795 l_wctomb = __ascii_wctomb;
796 l_mbtowc = __ascii_mbtowc;
797 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
798 }
799 else
800 FAIL;
801 break;
802 case 'P':
803 case 'p':
804 /* PT154 */
805 if (strcasecmp (charset, "PT154"))
806 FAIL;
807 strcpy (charset, "CP102");
808 mbc_max = 1;
809 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
810 l_wctomb = __cp_wctomb;
811 l_mbtowc = __cp_mbtowc;
812 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
813 l_wctomb = __ascii_wctomb;
814 l_mbtowc = __ascii_mbtowc;
815 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
816 break;
817 case 'T':
818 case 't':
819 if (strncasecmp (charset, "TIS", 3))
820 FAIL;
821 c = charset + 3;
822 if (*c == '-')
823 ++c;
824 if (strcasecmp (c, "620"))
825 FAIL;
826 strcpy (charset, "CP874");
827 mbc_max = 1;
828 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
829 l_wctomb = __cp_wctomb;
830 l_mbtowc = __cp_mbtowc;
831 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
832 l_wctomb = __ascii_wctomb;
833 l_mbtowc = __ascii_mbtowc;
834 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
835 break;
836 #ifdef __CYGWIN__
837 /* Newlib does not provide Big5 and Cygwin's implementation
838 requires Windows support. */
839 case 'B':
840 case 'b':
841 if (strcasecmp (charset, "BIG5"))
842 FAIL;
843 strcpy (charset, "BIG5");
844 mbc_max = 2;
845 l_wctomb = __big5_wctomb;
846 l_mbtowc = __big5_mbtowc;
847 break;
848 #endif /* __CYGWIN__ */
849 default:
850 FAIL;
851 }
852 switch (category)
853 {
854 case LC_CTYPE:
855 strcpy (lc_ctype_charset, charset);
856 __mb_cur_max = mbc_max;
857 __wctomb = l_wctomb;
858 __mbtowc = l_mbtowc;
859 __set_ctype (charset);
860 /* Determine the width for the "CJK Ambiguous Width" category of
861 characters. This is used in wcwidth(). Assume single width for
862 single-byte charsets, and double width for multi-byte charsets
863 other than UTF-8. For UTF-8, use double width for the East Asian
864 languages ("ja", "ko", "zh"), and single width for everything else.
865 Single width can also be forced with the "@cjknarrow" modifier. */
866 lc_ctype_cjk_lang = !cjknarrow
867 && mbc_max > 1
868 && (charset[0] != 'U'
869 || strncmp (locale, "ja", 2) == 0
870 || strncmp (locale, "ko", 2) == 0
871 || strncmp (locale, "zh", 2) == 0);
872 #ifdef __HAVE_LOCALE_INFO__
873 ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
874 #endif /* __HAVE_LOCALE_INFO__ */
875 break;
876 case LC_MESSAGES:
877 strcpy (lc_message_charset, charset);
878 #ifdef __HAVE_LOCALE_INFO__
879 ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
880 if (!ret)
881 #endif /* __HAVE_LOCALE_INFO__ */
882 break;
883 #ifdef __HAVE_LOCALE_INFO__
884 #ifdef __CYGWIN__
885 /* Right now only Cygwin supports a __collate_load_locale function at all. */
886 case LC_COLLATE:
887 ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
888 break;
889 #endif
890 case LC_MONETARY:
891 ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
892 break;
893 case LC_NUMERIC:
894 ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
895 break;
896 case LC_TIME:
897 ret = __time_load_locale (locale, (void *) l_wctomb, charset);
898 break;
899 #endif /* __HAVE_LOCALE_INFO__ */
900 default:
901 break;
902 }
903 #ifdef __HAVE_LOCALE_INFO__
904 if (ret)
905 FAIL;
906 #endif /* __HAVE_LOCALE_INFO__ */
907 return strcpy(current_categories[category], new_categories[category]);
908 }
909
910 static const char *
__get_locale_env(struct _reent * p,int category)911 __get_locale_env(struct _reent *p, int category)
912 {
913 const char *env;
914
915 /* 1. check LC_ALL. */
916 env = _getenv_r (p, categories[0]);
917
918 /* 2. check LC_* */
919 if (env == NULL || !*env)
920 env = _getenv_r (p, categories[category]);
921
922 /* 3. check LANG */
923 if (env == NULL || !*env)
924 env = _getenv_r (p, "LANG");
925
926 /* 4. if none is set, fall to default locale */
927 if (env == NULL || !*env)
928 env = __default_locale;
929
930 return env;
931 }
932 #endif /* _MB_CAPABLE */
933
934 char *
_DEFUN_VOID(__locale_charset)935 _DEFUN_VOID(__locale_charset)
936 {
937 #if 0//def __HAVE_LOCALE_INFO__
938 return __get_current_ctype_locale ()->codeset;
939 #else
940 return lc_ctype_charset;
941 #endif
942 }
943
944 int
_DEFUN_VOID(__locale_mb_cur_max)945 _DEFUN_VOID(__locale_mb_cur_max)
946 {
947 #if 0//def __HAVE_LOCALE_INFO__
948 return __get_current_ctype_locale ()->mb_cur_max[0];
949 #else
950 return __mb_cur_max;
951 #endif
952 }
953
954
955 char *
_DEFUN_VOID(__locale_msgcharset)956 _DEFUN_VOID(__locale_msgcharset)
957 {
958 #ifdef __HAVE_LOCALE_INFO__
959 return (char *) __get_current_messages_locale ()->codeset;
960 #else
961 return lc_message_charset;
962 #endif
963 }
964
965 int
_DEFUN_VOID(__locale_cjk_lang)966 _DEFUN_VOID(__locale_cjk_lang)
967 {
968 return lc_ctype_cjk_lang;
969 }
970
971 struct lconv *
972 _DEFUN(_localeconv_r, (data),
973 struct _reent *data)
974 {
975 #ifdef __HAVE_LOCALE_INFO__
976 if (__nlocale_changed)
977 {
978 struct lc_numeric_T *n = __get_current_numeric_locale ();
979 lconv.decimal_point = (char *) n->decimal_point;
980 lconv.thousands_sep = (char *) n->thousands_sep;
981 lconv.grouping = (char *) n->grouping;
982 __nlocale_changed = 0;
983 }
984 if (__mlocale_changed)
985 {
986 struct lc_monetary_T *m = __get_current_monetary_locale ();
987 lconv.int_curr_symbol = (char *) m->int_curr_symbol;
988 lconv.currency_symbol = (char *) m->currency_symbol;
989 lconv.mon_decimal_point = (char *) m->mon_decimal_point;
990 lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
991 lconv.mon_grouping = (char *) m->mon_grouping;
992 lconv.positive_sign = (char *) m->positive_sign;
993 lconv.negative_sign = (char *) m->negative_sign;
994 lconv.int_frac_digits = m->int_frac_digits[0];
995 lconv.frac_digits = m->frac_digits[0];
996 lconv.p_cs_precedes = m->p_cs_precedes[0];
997 lconv.p_sep_by_space = m->p_sep_by_space[0];
998 lconv.n_cs_precedes = m->n_cs_precedes[0];
999 lconv.n_sep_by_space = m->n_sep_by_space[0];
1000 lconv.p_sign_posn = m->p_sign_posn[0];
1001 lconv.n_sign_posn = m->n_sign_posn[0];
1002 #ifdef __HAVE_LOCALE_INFO_EXTENDED__
1003 lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
1004 lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
1005 lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
1006 lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
1007 lconv.int_n_sign_posn = m->int_n_sign_posn[0];
1008 lconv.int_p_sign_posn = m->int_p_sign_posn[0];
1009 #else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1010 lconv.int_p_cs_precedes = m->p_cs_precedes[0];
1011 lconv.int_p_sep_by_space = m->p_sep_by_space[0];
1012 lconv.int_n_cs_precedes = m->n_cs_precedes[0];
1013 lconv.int_n_sep_by_space = m->n_sep_by_space[0];
1014 lconv.int_n_sign_posn = m->n_sign_posn[0];
1015 lconv.int_p_sign_posn = m->p_sign_posn[0];
1016 #endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1017 __mlocale_changed = 0;
1018 }
1019 #endif /* __HAVE_LOCALE_INFO__ */
1020 return (struct lconv *) &lconv;
1021 }
1022
1023 #ifndef _REENT_ONLY
1024
1025 #ifndef __CYGWIN__
1026 /* Cygwin provides its own version of setlocale to perform some more
1027 initialization work. It calls _setlocale_r, though. */
1028 char *
1029 _DEFUN(setlocale, (category, locale),
1030 int category _AND
1031 _CONST char *locale)
1032 {
1033 return _setlocale_r (_REENT, category, locale);
1034 }
1035 #endif /* __CYGWIN__ */
1036
1037 struct lconv *
_DEFUN_VOID(localeconv)1038 _DEFUN_VOID(localeconv)
1039 {
1040 return _localeconv_r (_REENT);
1041 }
1042
1043 #endif
1044