1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
4 
5 INDEX
6 	setlocale
7 INDEX
8 	localeconv
9 INDEX
10 	_setlocale_r
11 INDEX
12 	_localeconv_r
13 
14 ANSI_SYNOPSIS
15 	#include <locale.h>
16 	char *setlocale(int <[category]>, const char *<[locale]>);
17 	lconv *localeconv(void);
18 
19 	char *_setlocale_r(void *<[reent]>,
20                         int <[category]>, const char *<[locale]>);
21 	lconv *_localeconv_r(void *<[reent]>);
22 
23 TRAD_SYNOPSIS
24 	#include <locale.h>
25 	char *setlocale(<[category]>, <[locale]>)
26 	int <[category]>;
27 	char *<[locale]>;
28 
29 	lconv *localeconv();
30 
31 	char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
32 	char *<[reent]>;
33 	int <[category]>;
34 	char *<[locale]>;
35 
36 	lconv *_localeconv_r(<[reent]>);
37 	char *<[reent]>;
38 
39 DESCRIPTION
40 <<setlocale>> is the facility defined by ANSI C to condition the
41 execution environment for international collating and formatting
42 information; <<localeconv>> reports on the settings of the current
43 locale.
44 
45 This is a minimal implementation, supporting only the required <<"POSIX">>
46 and <<"C">> values for <[locale]>; strings representing other locales are not
47 honored unless _MB_CAPABLE is defined.
48 
49 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
50 the form
51 
52   language[_TERRITORY][.charset][@@modifier]
53 
54 <<"language">> is a two character string per ISO 639, or, if not available
55 for a given language, a three character string per ISO 639-3.
56 <<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
57 <<"modifier">> see below.
58 
59 Additionally to the POSIX specifier, the following extension is supported
60 for backward compatibility with older implementations using newlib:
61 <<"C-charset">>.
62 Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
63 to specify language neutral locales while using other charsets than ASCII,
64 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
65 but uses the UTF-8 charset.
66 
67 The following charsets are recognized:
68 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
69 <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
70 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
71 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
72 1257, 1258].
73 
74 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
75 are equivalent.  Charset names with dashes can also be written without
76 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
77 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
78 
79 Full support for all of the above charsets requires that newlib has been
80 build with multibyte support and support for all ISO and Windows Codepage.
81 Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
82 only newlib for Cygwin is built with full charset support by default.
83 Under Cygwin, this implementation additionally supports the charsets
84 <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
85 does not support <<"JIS">>.
86 
87 Cygwin additionally supports locales from the file
88 /usr/share/locale/locale.alias.
89 
90 (<<"">> is also accepted; if given, the settings are read from the
91 corresponding LC_* environment variables and $LANG according to POSIX rules.)
92 
93 This implementation also supports the modifier <<"cjknarrow">>, which
94 affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
95 from the "CJK Ambiguous Width" category of characters described at
96 http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
97 of 1 for singlebyte charsets and a width of 2 for multibyte charsets
98 other than UTF-8. For UTF-8, their width depends on the language specifier:
99 it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
100 and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
101 independent of charset and language.
102 
103 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
104 pointer to the string representing the current locale.  The acceptable
105 values for <[category]> are defined in `<<locale.h>>' as macros
106 beginning with <<"LC_">>.
107 
108 <<localeconv>> returns a pointer to a structure (also defined in
109 `<<locale.h>>') describing the locale-specific conventions currently
110 in effect.
111 
112 <<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
113 <<localeconv>> and <<setlocale>> respectively.  The extra argument
114 <[reent]> is a pointer to a reentrancy structure.
115 
116 RETURNS
117 A successful call to <<setlocale>> returns a pointer to a string
118 associated with the specified category for the new locale.  The string
119 returned by <<setlocale>> is such that a subsequent call using that
120 string will restore that category (or all categories in case of LC_ALL),
121 to that state.  The application shall not modify the string returned
122 which may be overwritten by a subsequent call to <<setlocale>>.
123 On error, <<setlocale>> returns <<NULL>>.
124 
125 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
126 which describes the formatting and collating conventions in effect (in
127 this implementation, always those of the C locale).
128 
129 PORTABILITY
130 ANSI C requires <<setlocale>>, but the only locale required across all
131 implementations is the C locale.
132 
133 NOTES
134 There is no ISO-8859-12 codepage.  It's also refused by this implementation.
135 
136 No supporting OS subroutines are required.
137 */
138 
139 /* Parts of this code are originally taken from FreeBSD. */
140 /*
141  * Copyright (c) 1996 - 2002 FreeBSD Project
142  * Copyright (c) 1991, 1993
143  *      The Regents of the University of California.  All rights reserved.
144  *
145  * This code is derived from software contributed to Berkeley by
146  * Paul Borman at Krystal Technologies.
147  *
148  * Redistribution and use in source and binary forms, with or without
149  * modification, are permitted provided that the following conditions
150  * are met:
151  * 1. Redistributions of source code must retain the above copyright
152  *    notice, this list of conditions and the following disclaimer.
153  * 2. Redistributions in binary form must reproduce the above copyright
154  *    notice, this list of conditions and the following disclaimer in the
155  *    documentation and/or other materials provided with the distribution.
156  * 4. Neither the name of the University nor the names of its contributors
157  *    may be used to endorse or promote products derived from this software
158  *    without specific prior written permission.
159  *
160  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
161  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
162  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
163  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
164  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
165  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
166  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
167  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
168  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
169  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
170  * SUCH DAMAGE.
171  */
172 
173 #include <newlib.h>
174 #include <errno.h>
175 #include <locale.h>
176 #include <string.h>
177 #include <limits.h>
178 #include <reent.h>
179 #include <stdlib.h>
180 #include <wchar.h>
181 #include "lmessages.h"
182 #include "lmonetary.h"
183 #include "lnumeric.h"
184 #include "lctype.h"
185 #include "timelocal.h"
186 #include "../stdlib/local.h"
187 
188 #define _LC_LAST      7
189 #define ENCODING_LEN 31
190 
191 #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
192 int __EXPORT __mb_cur_max = 6;
193 #else
194 int __EXPORT __mb_cur_max = 1;
195 #endif
196 
197 int __nlocale_changed = 0;
198 int __mlocale_changed = 0;
199 char *_PathLocale = NULL;
200 
201 static
202 struct lconv lconv =
203 {
204   ".", "", "", "", "", "", "", "", "", "",
205   CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
206   CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
207   CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
208   CHAR_MAX, CHAR_MAX
209 };
210 
211 #ifdef _MB_CAPABLE
212 /*
213  * Category names for getenv()
214  */
215 static char *categories[_LC_LAST] = {
216   "LC_ALL",
217   "LC_COLLATE",
218   "LC_CTYPE",
219   "LC_MONETARY",
220   "LC_NUMERIC",
221   "LC_TIME",
222   "LC_MESSAGES",
223 };
224 
225 /*
226  * Default locale per POSIX.  Can be overridden on a per-target base.
227  */
228 #ifndef DEFAULT_LOCALE
229 #define DEFAULT_LOCALE	"C"
230 #endif
231 /*
232  * This variable can be changed by any outside mechanism.  This allows,
233  * for instance, to load the default locale from a file.
234  */
235 char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
236 
237 /*
238  * Current locales for each category
239  */
240 static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
241     "C",
242     "C",
243 #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
244     "C.UTF-8",
245 #else
246     "C",
247 #endif
248     "C",
249     "C",
250     "C",
251     "C",
252 };
253 
254 /*
255  * The locales we are going to try and load
256  */
257 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
258 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
259 
260 static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
261 static char *currentlocale(void);
262 static char *loadlocale(struct _reent *, int);
263 static const char *__get_locale_env(struct _reent *, int);
264 
265 #endif /* _MB_CAPABLE */
266 
267 #ifdef __CYGWIN__
268 static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
269 #else
270 static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
271 #endif
272 static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
273 static int lc_ctype_cjk_lang = 0;
274 
275 char *
276 _DEFUN(_setlocale_r, (p, category, locale),
277        struct _reent *p _AND
278        int category _AND
279        _CONST char *locale)
280 {
281 #ifndef _MB_CAPABLE
282   if (locale)
283     {
284       if (strcmp (locale, "POSIX") && strcmp (locale, "C")
285 	  && strcmp (locale, ""))
286         return NULL;
287     }
288   return "C";
289 #else /* !_MB_CAPABLE */
290   int i, j, len, saverr;
291   const char *env, *r;
292 
293   if (category < LC_ALL || category >= _LC_LAST)
294     {
295       p->_errno = EINVAL;
296       return NULL;
297     }
298 
299   if (locale == NULL)
300     return category != LC_ALL ? current_categories[category] : currentlocale();
301 
302   /*
303    * Default to the current locale for everything.
304    */
305   for (i = 1; i < _LC_LAST; ++i)
306     strcpy (new_categories[i], current_categories[i]);
307 
308   /*
309    * Now go fill up new_categories from the locale argument
310    */
311   if (!*locale)
312     {
313       if (category == LC_ALL)
314 	{
315 	  for (i = 1; i < _LC_LAST; ++i)
316 	    {
317 	      env = __get_locale_env (p, i);
318 	      if (strlen (env) > ENCODING_LEN)
319 		{
320 		  p->_errno = EINVAL;
321 		  return NULL;
322 		}
323 	      strcpy (new_categories[i], env);
324 	    }
325 	}
326       else
327 	{
328 	  env = __get_locale_env (p, category);
329 	  if (strlen (env) > ENCODING_LEN)
330 	    {
331 	      p->_errno = EINVAL;
332 	      return NULL;
333 	    }
334 	  strcpy (new_categories[category], env);
335 	}
336     }
337   else if (category != LC_ALL)
338     {
339       if (strlen (locale) > ENCODING_LEN)
340 	{
341 	  p->_errno = EINVAL;
342 	  return NULL;
343 	}
344       strcpy (new_categories[category], locale);
345     }
346   else
347     {
348       if ((r = strchr (locale, '/')) == NULL)
349 	{
350 	  if (strlen (locale) > ENCODING_LEN)
351 	    {
352 	      p->_errno = EINVAL;
353 	      return NULL;
354 	    }
355 	  for (i = 1; i < _LC_LAST; ++i)
356 	    strcpy (new_categories[i], locale);
357 	}
358       else
359 	{
360 	  for (i = 1; r[1] == '/'; ++r)
361 	    ;
362 	  if (!r[1])
363 	    {
364 	      p->_errno = EINVAL;
365 	      return NULL;  /* Hmm, just slashes... */
366 	    }
367 	  do
368 	    {
369 	      if (i == _LC_LAST)
370 		break;  /* Too many slashes... */
371 	      if ((len = r - locale) > ENCODING_LEN)
372 		{
373 		  p->_errno = EINVAL;
374 		  return NULL;
375 		}
376 	      strlcpy (new_categories[i], locale, len + 1);
377 	      i++;
378 	      while (*r == '/')
379 		r++;
380 	      locale = r;
381 	      while (*r && *r != '/')
382 		r++;
383 	    }
384 	  while (*locale);
385 	  while (i < _LC_LAST)
386 	    {
387 	      strcpy (new_categories[i], new_categories[i-1]);
388 	      i++;
389 	    }
390 	}
391     }
392 
393   if (category != LC_ALL)
394     return loadlocale (p, category);
395 
396   for (i = 1; i < _LC_LAST; ++i)
397     {
398       strcpy (saved_categories[i], current_categories[i]);
399       if (loadlocale (p, i) == NULL)
400 	{
401 	  saverr = p->_errno;
402 	  for (j = 1; j < i; j++)
403 	    {
404 	      strcpy (new_categories[j], saved_categories[j]);
405 	      if (loadlocale (p, j) == NULL)
406 		{
407 		  strcpy (new_categories[j], "C");
408 		  loadlocale (p, j);
409 		}
410 	    }
411 	  p->_errno = saverr;
412 	  return NULL;
413 	}
414     }
415   return currentlocale ();
416 #endif /* !_MB_CAPABLE */
417 }
418 
419 #ifdef _MB_CAPABLE
420 static char *
currentlocale()421 currentlocale()
422 {
423         int i;
424 
425         (void)strcpy(current_locale_string, current_categories[1]);
426 
427         for (i = 2; i < _LC_LAST; ++i)
428                 if (strcmp(current_categories[1], current_categories[i])) {
429                         for (i = 2; i < _LC_LAST; ++i) {
430                                 (void)strcat(current_locale_string, "/");
431                                 (void)strcat(current_locale_string,
432                                              current_categories[i]);
433                         }
434                         break;
435                 }
436         return (current_locale_string);
437 }
438 #endif /* _MB_CAPABLE */
439 
440 #ifdef _MB_CAPABLE
441 #ifdef __CYGWIN__
442 extern void __set_charset_from_locale (const char *locale, char *charset);
443 extern char *__set_locale_from_locale_alias (const char *, char *);
444 extern int __collate_load_locale (const char *, void *, const char *);
445 #endif /* __CYGWIN__ */
446 
447 extern void __set_ctype (const char *charset);
448 
449 static char *
loadlocale(struct _reent * p,int category)450 loadlocale(struct _reent *p, int category)
451 {
452   /* At this point a full-featured system would just load the locale
453      specific data from the locale files.
454      What we do here for now is to check the incoming string for correctness.
455      The string must be in one of the allowed locale strings, either
456      one in POSIX-style, or one in the old newlib style to maintain
457      backward compatibility.  If the local string is correct, the charset
458      is extracted and stored in lc_ctype_charset or lc_message_charset
459      dependent on the cateogry. */
460   char *locale = NULL;
461   char charset[ENCODING_LEN + 1];
462   unsigned long val;
463   char *end, *c = NULL;
464   int mbc_max;
465   int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
466   int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
467 		   const char *, mbstate_t *);
468   int cjknarrow = 0;
469 
470   /* Avoid doing everything twice if nothing has changed. */
471   if (!strcmp (new_categories[category], current_categories[category]))
472     return current_categories[category];
473 
474 #ifdef __CYGWIN__
475   /* This additional code handles the case that the incoming locale string
476      is not valid.  If so, it calls the function __set_locale_from_locale_alias,
477      which is only available on Cygwin right now.  The function reads the
478      file /usr/share/locale/locale.alias.  The file contains locale aliases
479      and their replacement locale.  For instance, the alias "french" is
480      translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
481      "th_TH.TIS-620".  If successful, the function returns with a pointer
482      to the second argument, which is a buffer in which the replacement locale
483      gets stored.  Otherwise the function returns NULL. */
484   char tmp_locale[ENCODING_LEN + 1];
485   int ret = 0;
486 
487 restart:
488   if (!locale)
489     locale = new_categories[category];
490   else if (locale != tmp_locale)
491     {
492       locale = __set_locale_from_locale_alias (locale, tmp_locale);
493       if (!locale)
494 	return NULL;
495     }
496 # define FAIL	goto restart
497 #else
498   locale = new_categories[category];
499 # define FAIL	return NULL
500 #endif
501 
502   /* "POSIX" is translated to "C", as on Linux. */
503   if (!strcmp (locale, "POSIX"))
504     strcpy (locale, "C");
505   if (!strcmp (locale, "C"))				/* Default "C" locale */
506     strcpy (charset, "ASCII");
507   else if (locale[0] == 'C'
508 	   && (locale[1] == '-'		/* Old newlib style */
509 	       || locale[1] == '.'))	/* Extension for the C locale to allow
510 					   specifying different charsets while
511 					   sticking to the C locale in terms
512 					   of sort order, etc.  Proposed in
513 					   the Debian project. */
514     {
515       char *chp;
516 
517       c = locale + 2;
518       strcpy (charset, c);
519       if ((chp = strchr (charset, '@')))
520         /* Strip off modifier */
521         *chp = '\0';
522       c += strlen (charset);
523     }
524   else							/* POSIX style */
525     {
526       c = locale;
527 
528       /* Don't use ctype macros here, they might be localized. */
529       /* Language */
530       if (c[0] < 'a' || c[0] > 'z'
531 	  || c[1] < 'a' || c[1] > 'z')
532 	FAIL;
533       c += 2;
534       /* Allow three character Language per ISO 639-3 */
535       if (c[0] >= 'a' && c[0] <= 'z')
536       	++c;
537       if (c[0] == '_')
538         {
539 	  /* Territory */
540 	  ++c;
541 	  if (c[0] < 'A' || c[0] > 'Z'
542 	      || c[1] < 'A' || c[1] > 'Z')
543 	    FAIL;
544 	  c += 2;
545 	}
546       if (c[0] == '.')
547 	{
548 	  /* Charset */
549 	  char *chp;
550 
551 	  ++c;
552 	  strcpy (charset, c);
553 	  if ((chp = strchr (charset, '@')))
554 	    /* Strip off modifier */
555 	    *chp = '\0';
556 	  c += strlen (charset);
557 	}
558       else if (c[0] == '\0' || c[0] == '@')
559 	/* End of string or just a modifier */
560 #ifdef __CYGWIN__
561 	/* The Cygwin-only function __set_charset_from_locale checks
562 	   for the default charset which is connected to the given locale.
563 	   The function uses Windows functions in turn so it can't be easily
564 	   adapted to other targets.  However, if any other target provides
565 	   equivalent functionality, preferrably using the same function name
566 	   it would be sufficient to change the guarding #ifdef. */
567 	__set_charset_from_locale (locale, charset);
568 #else
569 	strcpy (charset, "ISO-8859-1");
570 #endif
571       else
572 	/* Invalid string */
573       	FAIL;
574     }
575   if (c && c[0] == '@')
576     {
577       /* Modifier */
578       /* Only one modifier is recognized right now.  "cjknarrow" is used
579          to modify the behaviour of wcwidth() for East Asian languages.
580          For details see the comment at the end of this function. */
581       if (!strcmp (c + 1, "cjknarrow"))
582 	cjknarrow = 1;
583     }
584   /* We only support this subset of charsets. */
585   switch (charset[0])
586     {
587     case 'U':
588     case 'u':
589       if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
590 	FAIL;
591       strcpy (charset, "UTF-8");
592       mbc_max = 6;
593       l_wctomb = __utf8_wctomb;
594       l_mbtowc = __utf8_mbtowc;
595     break;
596 #ifndef __CYGWIN__
597     /* Cygwin does not support JIS at all. */
598     case 'J':
599     case 'j':
600       if (strcasecmp (charset, "JIS"))
601 	FAIL;
602       strcpy (charset, "JIS");
603       mbc_max = 8;
604       l_wctomb = __jis_wctomb;
605       l_mbtowc = __jis_mbtowc;
606     break;
607 #endif /* !__CYGWIN__ */
608     case 'E':
609     case 'e':
610       if (strncasecmp (charset, "EUC", 3))
611 	FAIL;
612       c = charset + 3;
613       if (*c == '-')
614 	++c;
615       if (!strcasecmp (c, "JP"))
616 	{
617 	  strcpy (charset, "EUCJP");
618 	  mbc_max = 3;
619 	  l_wctomb = __eucjp_wctomb;
620 	  l_mbtowc = __eucjp_mbtowc;
621 	}
622 #ifdef __CYGWIN__
623       /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
624       	 implementation requires Windows support. */
625       else if (!strcasecmp (c, "KR"))
626 	{
627 	  strcpy (charset, "EUCKR");
628 	  mbc_max = 2;
629 	  l_wctomb = __kr_wctomb;
630 	  l_mbtowc = __kr_mbtowc;
631 	}
632       else if (!strcasecmp (c, "CN"))
633 	{
634 	  strcpy (charset, "EUCCN");
635 	  mbc_max = 2;
636 	  l_wctomb = __gbk_wctomb;
637 	  l_mbtowc = __gbk_mbtowc;
638 	}
639 #endif /* __CYGWIN__ */
640       else
641 	FAIL;
642     break;
643     case 'S':
644     case 's':
645       if (strcasecmp (charset, "SJIS"))
646 	FAIL;
647       strcpy (charset, "SJIS");
648       mbc_max = 2;
649       l_wctomb = __sjis_wctomb;
650       l_mbtowc = __sjis_mbtowc;
651     break;
652     case 'I':
653     case 'i':
654       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
655          ISO-8859-12.  This code also recognizes the aliases without dashes. */
656       if (strncasecmp (charset, "ISO", 3))
657 	FAIL;
658       c = charset + 3;
659       if (*c == '-')
660 	++c;
661       if (strncasecmp (c, "8859", 4))
662 	FAIL;
663       c += 4;
664       if (*c == '-')
665 	++c;
666       val = _strtol_r (p, c, &end, 10);
667       if (val < 1 || val > 16 || val == 12 || *end)
668 	FAIL;
669       strcpy (charset, "ISO-8859-");
670       c = charset + 9;
671       if (val > 10)
672       	*c++ = '1';
673       *c++ = val % 10 + '0';
674       *c = '\0';
675       mbc_max = 1;
676 #ifdef _MB_EXTENDED_CHARSETS_ISO
677       l_wctomb = __iso_wctomb;
678       l_mbtowc = __iso_mbtowc;
679 #else /* !_MB_EXTENDED_CHARSETS_ISO */
680       l_wctomb = __ascii_wctomb;
681       l_mbtowc = __ascii_mbtowc;
682 #endif /* _MB_EXTENDED_CHARSETS_ISO */
683     break;
684     case 'C':
685     case 'c':
686       if (charset[1] != 'P' && charset[1] != 'p')
687 	FAIL;
688       strncpy (charset, "CP", 2);
689       val = _strtol_r (p, charset + 2, &end, 10);
690       if (*end)
691 	FAIL;
692       switch (val)
693 	{
694 	case 437:
695 	case 720:
696 	case 737:
697 	case 775:
698 	case 850:
699 	case 852:
700 	case 855:
701 	case 857:
702 	case 858:
703 	case 862:
704 	case 866:
705 	case 874:
706 	case 1125:
707 	case 1250:
708 	case 1251:
709 	case 1252:
710 	case 1253:
711 	case 1254:
712 	case 1255:
713 	case 1256:
714 	case 1257:
715 	case 1258:
716 	  mbc_max = 1;
717 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
718 	  l_wctomb = __cp_wctomb;
719 	  l_mbtowc = __cp_mbtowc;
720 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
721 	  l_wctomb = __ascii_wctomb;
722 	  l_mbtowc = __ascii_mbtowc;
723 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
724 	  break;
725 	case 932:
726 	  mbc_max = 2;
727 	  l_wctomb = __sjis_wctomb;
728 	  l_mbtowc = __sjis_mbtowc;
729 	  break;
730 	default:
731 	  FAIL;
732 	}
733     break;
734     case 'K':
735     case 'k':
736       /* KOI8-R, KOI8-U and the aliases without dash */
737       if (strncasecmp (charset, "KOI8", 4))
738 	FAIL;
739       c = charset + 4;
740       if (*c == '-')
741 	++c;
742       if (*c == 'R' || *c == 'r')
743 	strcpy (charset, "CP20866");
744       else if (*c == 'U' || *c == 'u')
745 	strcpy (charset, "CP21866");
746       else
747 	FAIL;
748       mbc_max = 1;
749 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
750       l_wctomb = __cp_wctomb;
751       l_mbtowc = __cp_mbtowc;
752 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
753       l_wctomb = __ascii_wctomb;
754       l_mbtowc = __ascii_mbtowc;
755 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
756       break;
757     case 'A':
758     case 'a':
759       if (strcasecmp (charset, "ASCII"))
760 	FAIL;
761       strcpy (charset, "ASCII");
762       mbc_max = 1;
763       l_wctomb = __ascii_wctomb;
764       l_mbtowc = __ascii_mbtowc;
765       break;
766     case 'G':
767     case 'g':
768 #ifdef __CYGWIN__
769       /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
770 	 requires Windows support. */
771       if (!strcasecmp (charset, "GBK")
772 	  || !strcasecmp (charset, "GB2312"))
773       	{
774 	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
775 	  mbc_max = 2;
776 	  l_wctomb = __gbk_wctomb;
777 	  l_mbtowc = __gbk_mbtowc;
778 	}
779       else
780 #endif /* __CYGWIN__ */
781       /* GEORGIAN-PS and the alias without dash */
782       if (!strncasecmp (charset, "GEORGIAN", 8))
783 	{
784 	  c = charset + 8;
785 	  if (*c == '-')
786 	    ++c;
787 	  if (strcasecmp (c, "PS"))
788 	    FAIL;
789 	  strcpy (charset, "CP101");
790 	  mbc_max = 1;
791 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
792 	  l_wctomb = __cp_wctomb;
793 	  l_mbtowc = __cp_mbtowc;
794 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
795 	  l_wctomb = __ascii_wctomb;
796 	  l_mbtowc = __ascii_mbtowc;
797 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
798 	}
799       else
800 	FAIL;
801       break;
802     case 'P':
803     case 'p':
804       /* PT154 */
805       if (strcasecmp (charset, "PT154"))
806 	FAIL;
807       strcpy (charset, "CP102");
808       mbc_max = 1;
809 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
810       l_wctomb = __cp_wctomb;
811       l_mbtowc = __cp_mbtowc;
812 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
813       l_wctomb = __ascii_wctomb;
814       l_mbtowc = __ascii_mbtowc;
815 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
816       break;
817     case 'T':
818     case 't':
819       if (strncasecmp (charset, "TIS", 3))
820       	FAIL;
821       c = charset + 3;
822       if (*c == '-')
823 	++c;
824       if (strcasecmp (c, "620"))
825       	FAIL;
826       strcpy (charset, "CP874");
827       mbc_max = 1;
828 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
829       l_wctomb = __cp_wctomb;
830       l_mbtowc = __cp_mbtowc;
831 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
832       l_wctomb = __ascii_wctomb;
833       l_mbtowc = __ascii_mbtowc;
834 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
835       break;
836 #ifdef __CYGWIN__
837     /* Newlib does not provide Big5 and Cygwin's implementation
838        requires Windows support. */
839     case 'B':
840     case 'b':
841       if (strcasecmp (charset, "BIG5"))
842       	FAIL;
843       strcpy (charset, "BIG5");
844       mbc_max = 2;
845       l_wctomb = __big5_wctomb;
846       l_mbtowc = __big5_mbtowc;
847       break;
848 #endif /* __CYGWIN__ */
849     default:
850       FAIL;
851     }
852   switch (category)
853     {
854     case LC_CTYPE:
855       strcpy (lc_ctype_charset, charset);
856       __mb_cur_max = mbc_max;
857       __wctomb = l_wctomb;
858       __mbtowc = l_mbtowc;
859       __set_ctype (charset);
860       /* Determine the width for the "CJK Ambiguous Width" category of
861          characters. This is used in wcwidth(). Assume single width for
862          single-byte charsets, and double width for multi-byte charsets
863          other than UTF-8. For UTF-8, use double width for the East Asian
864          languages ("ja", "ko", "zh"), and single width for everything else.
865          Single width can also be forced with the "@cjknarrow" modifier. */
866       lc_ctype_cjk_lang = !cjknarrow
867 			  && mbc_max > 1
868 			  && (charset[0] != 'U'
869 			      || strncmp (locale, "ja", 2) == 0
870 			      || strncmp (locale, "ko", 2) == 0
871 			      || strncmp (locale, "zh", 2) == 0);
872 #ifdef __HAVE_LOCALE_INFO__
873       ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
874 #endif /* __HAVE_LOCALE_INFO__ */
875       break;
876     case LC_MESSAGES:
877       strcpy (lc_message_charset, charset);
878 #ifdef __HAVE_LOCALE_INFO__
879       ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
880       if (!ret)
881 #endif /* __HAVE_LOCALE_INFO__ */
882       break;
883 #ifdef __HAVE_LOCALE_INFO__
884 #ifdef __CYGWIN__
885   /* Right now only Cygwin supports a __collate_load_locale function at all. */
886     case LC_COLLATE:
887       ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
888       break;
889 #endif
890     case LC_MONETARY:
891       ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
892       break;
893     case LC_NUMERIC:
894       ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
895       break;
896     case LC_TIME:
897       ret = __time_load_locale (locale, (void *) l_wctomb, charset);
898       break;
899 #endif /* __HAVE_LOCALE_INFO__ */
900     default:
901       break;
902     }
903 #ifdef __HAVE_LOCALE_INFO__
904   if (ret)
905     FAIL;
906 #endif /* __HAVE_LOCALE_INFO__ */
907   return strcpy(current_categories[category], new_categories[category]);
908 }
909 
910 static const char *
__get_locale_env(struct _reent * p,int category)911 __get_locale_env(struct _reent *p, int category)
912 {
913   const char *env;
914 
915   /* 1. check LC_ALL. */
916   env = _getenv_r (p, categories[0]);
917 
918   /* 2. check LC_* */
919   if (env == NULL || !*env)
920     env = _getenv_r (p, categories[category]);
921 
922   /* 3. check LANG */
923   if (env == NULL || !*env)
924     env = _getenv_r (p, "LANG");
925 
926   /* 4. if none is set, fall to default locale */
927   if (env == NULL || !*env)
928     env = __default_locale;
929 
930   return env;
931 }
932 #endif /* _MB_CAPABLE */
933 
934 char *
_DEFUN_VOID(__locale_charset)935 _DEFUN_VOID(__locale_charset)
936 {
937 #if 0//def __HAVE_LOCALE_INFO__
938   return __get_current_ctype_locale ()->codeset;
939 #else
940   return lc_ctype_charset;
941 #endif
942 }
943 
944 int
_DEFUN_VOID(__locale_mb_cur_max)945 _DEFUN_VOID(__locale_mb_cur_max)
946 {
947 #if 0//def __HAVE_LOCALE_INFO__
948   return __get_current_ctype_locale ()->mb_cur_max[0];
949 #else
950   return __mb_cur_max;
951 #endif
952 }
953 
954 
955 char *
_DEFUN_VOID(__locale_msgcharset)956 _DEFUN_VOID(__locale_msgcharset)
957 {
958 #ifdef __HAVE_LOCALE_INFO__
959   return (char *) __get_current_messages_locale ()->codeset;
960 #else
961   return lc_message_charset;
962 #endif
963 }
964 
965 int
_DEFUN_VOID(__locale_cjk_lang)966 _DEFUN_VOID(__locale_cjk_lang)
967 {
968   return lc_ctype_cjk_lang;
969 }
970 
971 struct lconv *
972 _DEFUN(_localeconv_r, (data),
973       struct _reent *data)
974 {
975 #ifdef __HAVE_LOCALE_INFO__
976   if (__nlocale_changed)
977     {
978       struct lc_numeric_T *n = __get_current_numeric_locale ();
979       lconv.decimal_point = (char *) n->decimal_point;
980       lconv.thousands_sep = (char *) n->thousands_sep;
981       lconv.grouping = (char *) n->grouping;
982       __nlocale_changed = 0;
983     }
984   if (__mlocale_changed)
985     {
986       struct lc_monetary_T *m = __get_current_monetary_locale ();
987       lconv.int_curr_symbol = (char *) m->int_curr_symbol;
988       lconv.currency_symbol = (char *) m->currency_symbol;
989       lconv.mon_decimal_point = (char *) m->mon_decimal_point;
990       lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
991       lconv.mon_grouping = (char *) m->mon_grouping;
992       lconv.positive_sign = (char *) m->positive_sign;
993       lconv.negative_sign = (char *) m->negative_sign;
994       lconv.int_frac_digits = m->int_frac_digits[0];
995       lconv.frac_digits = m->frac_digits[0];
996       lconv.p_cs_precedes = m->p_cs_precedes[0];
997       lconv.p_sep_by_space = m->p_sep_by_space[0];
998       lconv.n_cs_precedes = m->n_cs_precedes[0];
999       lconv.n_sep_by_space = m->n_sep_by_space[0];
1000       lconv.p_sign_posn = m->p_sign_posn[0];
1001       lconv.n_sign_posn = m->n_sign_posn[0];
1002 #ifdef __HAVE_LOCALE_INFO_EXTENDED__
1003       lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
1004       lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
1005       lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
1006       lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
1007       lconv.int_n_sign_posn = m->int_n_sign_posn[0];
1008       lconv.int_p_sign_posn = m->int_p_sign_posn[0];
1009 #else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1010       lconv.int_p_cs_precedes = m->p_cs_precedes[0];
1011       lconv.int_p_sep_by_space = m->p_sep_by_space[0];
1012       lconv.int_n_cs_precedes = m->n_cs_precedes[0];
1013       lconv.int_n_sep_by_space = m->n_sep_by_space[0];
1014       lconv.int_n_sign_posn = m->n_sign_posn[0];
1015       lconv.int_p_sign_posn = m->p_sign_posn[0];
1016 #endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1017       __mlocale_changed = 0;
1018     }
1019 #endif /* __HAVE_LOCALE_INFO__ */
1020   return (struct lconv *) &lconv;
1021 }
1022 
1023 #ifndef _REENT_ONLY
1024 
1025 #ifndef __CYGWIN__
1026 /* Cygwin provides its own version of setlocale to perform some more
1027    initialization work.  It calls _setlocale_r, though. */
1028 char *
1029 _DEFUN(setlocale, (category, locale),
1030        int category _AND
1031        _CONST char *locale)
1032 {
1033   return _setlocale_r (_REENT, category, locale);
1034 }
1035 #endif /* __CYGWIN__ */
1036 
1037 struct lconv *
_DEFUN_VOID(localeconv)1038 _DEFUN_VOID(localeconv)
1039 {
1040   return _localeconv_r (_REENT);
1041 }
1042 
1043 #endif
1044