1 /* nl_langinfo() replacement: query locale dependent information.
2 
3    Copyright (C) 2007-2021 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU Lesser General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <config.h>
19 
20 /* Specification.  */
21 #include <langinfo.h>
22 
23 #include <locale.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #if defined _WIN32 && ! defined __CYGWIN__
27 # define WIN32_LEAN_AND_MEAN  /* avoid including junk */
28 # include <windows.h>
29 # include <stdio.h>
30 #endif
31 
32 #if REPLACE_NL_LANGINFO && !NL_LANGINFO_MTSAFE
33 # if defined _WIN32 && !defined __CYGWIN__
34 
35 #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
36 #  include <windows.h>
37 
38 # elif HAVE_PTHREAD_API
39 
40 #  include <pthread.h>
41 #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
42 #   include <threads.h>
43 #   pragma weak thrd_exit
44 #   define c11_threads_in_use() (thrd_exit != NULL)
45 #  else
46 #   define c11_threads_in_use() 0
47 #  endif
48 
49 # elif HAVE_THREADS_H
50 
51 #  include <threads.h>
52 
53 # endif
54 #endif
55 
56 /* nl_langinfo() must be multithread-safe.  To achieve this without using
57    thread-local storage:
58      1. We use a specific static buffer for each possible argument.
59         So that different threads can call nl_langinfo with different arguments,
60         without interfering.
61      2. We use a simple strcpy or memcpy to fill this static buffer.  Filling it
62         through, for example, strcpy + strcat would not be guaranteed to leave
63         the buffer's contents intact if another thread is currently accessing
64         it.  If necessary, the contents is first assembled in a stack-allocated
65         buffer.  */
66 
67 #if !REPLACE_NL_LANGINFO || GNULIB_defined_CODESET
68 /* Return the codeset of the current locale, if this is easily deducible.
69    Otherwise, return "".  */
70 static char *
ctype_codeset(void)71 ctype_codeset (void)
72 {
73   static char result[2 + 10 + 1];
74   char buf[2 + 10 + 1];
75   char locale[SETLOCALE_NULL_MAX];
76   char *codeset;
77   size_t codesetlen;
78 
79   if (setlocale_null_r (LC_CTYPE, locale, sizeof (locale)))
80     locale[0] = '\0';
81 
82   codeset = buf;
83   codeset[0] = '\0';
84 
85   if (locale[0])
86     {
87       /* If the locale name contains an encoding after the dot, return it.  */
88       char *dot = strchr (locale, '.');
89 
90       if (dot)
91         {
92           /* Look for the possible @... trailer and remove it, if any.  */
93           char *codeset_start = dot + 1;
94           char const *modifier = strchr (codeset_start, '@');
95 
96           if (! modifier)
97             codeset = codeset_start;
98           else
99             {
100               codesetlen = modifier - codeset_start;
101               if (codesetlen < sizeof buf)
102                 {
103                   codeset = memcpy (buf, codeset_start, codesetlen);
104                   codeset[codesetlen] = '\0';
105                 }
106             }
107         }
108     }
109 
110 # if defined _WIN32 && ! defined __CYGWIN__
111   /* If setlocale is successful, it returns the number of the
112      codepage, as a string.  Otherwise, fall back on Windows API
113      GetACP, which returns the locale's codepage as a number (although
114      this doesn't change according to what the 'setlocale' call specified).
115      Either way, prepend "CP" to make it a valid codeset name.  */
116   codesetlen = strlen (codeset);
117   if (0 < codesetlen && codesetlen < sizeof buf - 2)
118     memmove (buf + 2, codeset, codesetlen + 1);
119   else
120     sprintf (buf + 2, "%u", GetACP ());
121   /* For a locale name such as "French_France.65001", in Windows 10,
122      setlocale now returns "French_France.utf8" instead.  */
123   if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
124     return (char *) "UTF-8";
125   else
126     {
127       memcpy (buf, "CP", 2);
128       strcpy (result, buf);
129       return result;
130     }
131 # else
132   strcpy (result, codeset);
133   return result;
134 #endif
135 }
136 #endif
137 
138 
139 #if REPLACE_NL_LANGINFO
140 
141 /* Override nl_langinfo with support for added nl_item values.  */
142 
143 # undef nl_langinfo
144 
145 /* Without locking, on Solaris 11.3, test-nl_langinfo-mt fails, with message
146    "thread5 disturbed by threadN!", even when threadN invokes only
147       nl_langinfo (CODESET);
148       nl_langinfo (CRNCYSTR);
149    Similarly on Solaris 10.  */
150 
151 # if !NL_LANGINFO_MTSAFE /* Solaris */
152 
153 #  define ITEMS (MAXSTRMSG + 1)
154 #  define MAX_RESULT_LEN 80
155 
156 static char *
nl_langinfo_unlocked(nl_item item)157 nl_langinfo_unlocked (nl_item item)
158 {
159   static char result[ITEMS][MAX_RESULT_LEN];
160 
161   /* The result of nl_langinfo is in storage that can be overwritten by
162      other calls to nl_langinfo.  */
163   char *tmp = nl_langinfo (item);
164   if (item >= 0 && item < ITEMS && tmp != NULL)
165     {
166       size_t tmp_len = strlen (tmp);
167       if (tmp_len < MAX_RESULT_LEN)
168         strcpy (result[item], tmp);
169       else
170         {
171           /* Produce a truncated result.  Oh well...  */
172           result[item][MAX_RESULT_LEN - 1] = '\0';
173           memcpy (result[item], tmp, MAX_RESULT_LEN - 1);
174         }
175       return result[item];
176     }
177   else
178     return tmp;
179 }
180 
181 /* Use a lock, so that no two threads can invoke nl_langinfo_unlocked
182    at the same time.  */
183 
184 /* Prohibit renaming this symbol.  */
185 #  undef gl_get_nl_langinfo_lock
186 
187 #  if defined _WIN32 && !defined __CYGWIN__
188 
189 extern __declspec(dllimport) CRITICAL_SECTION *gl_get_nl_langinfo_lock (void);
190 
191 static char *
nl_langinfo_with_lock(nl_item item)192 nl_langinfo_with_lock (nl_item item)
193 {
194   CRITICAL_SECTION *lock = gl_get_nl_langinfo_lock ();
195   char *ret;
196 
197   EnterCriticalSection (lock);
198   ret = nl_langinfo_unlocked (item);
199   LeaveCriticalSection (lock);
200 
201   return ret;
202 }
203 
204 #  elif HAVE_PTHREAD_API
205 
206 extern
207 #   if defined _WIN32 || defined __CYGWIN__
208   __declspec(dllimport)
209 #   endif
210   pthread_mutex_t *gl_get_nl_langinfo_lock (void);
211 
212 #   if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */
213 
214      /* Avoid the need to link with '-lpthread'.  */
215 #    pragma weak pthread_mutex_lock
216 #    pragma weak pthread_mutex_unlock
217 
218      /* Determine whether libpthread is in use.  */
219 #    pragma weak pthread_mutexattr_gettype
220      /* See the comments in lock.h.  */
221 #    define pthread_in_use() \
222        (pthread_mutexattr_gettype != NULL || c11_threads_in_use ())
223 
224 #   else
225 #    define pthread_in_use() 1
226 #   endif
227 
228 static char *
nl_langinfo_with_lock(nl_item item)229 nl_langinfo_with_lock (nl_item item)
230 {
231   if (pthread_in_use())
232     {
233       pthread_mutex_t *lock = gl_get_nl_langinfo_lock ();
234       char *ret;
235 
236       if (pthread_mutex_lock (lock))
237         abort ();
238       ret = nl_langinfo_unlocked (item);
239       if (pthread_mutex_unlock (lock))
240         abort ();
241 
242       return ret;
243     }
244   else
245     return nl_langinfo_unlocked (item);
246 }
247 
248 #  elif HAVE_THREADS_H
249 
250 extern mtx_t *gl_get_nl_langinfo_lock (void);
251 
252 static char *
nl_langinfo_with_lock(nl_item item)253 nl_langinfo_with_lock (nl_item item)
254 {
255   mtx_t *lock = gl_get_nl_langinfo_lock ();
256   char *ret;
257 
258   if (mtx_lock (lock) != thrd_success)
259     abort ();
260   ret = nl_langinfo_unlocked (item);
261   if (mtx_unlock (lock) != thrd_success)
262     abort ();
263 
264   return ret;
265 }
266 
267 #  endif
268 
269 # else
270 
271 /* On other platforms, no lock is needed.  */
272 #  define nl_langinfo_with_lock nl_langinfo
273 
274 # endif
275 
276 char *
rpl_nl_langinfo(nl_item item)277 rpl_nl_langinfo (nl_item item)
278 {
279   switch (item)
280     {
281 # if GNULIB_defined_CODESET
282     case CODESET:
283       return ctype_codeset ();
284 # endif
285 # if GNULIB_defined_T_FMT_AMPM
286     case T_FMT_AMPM:
287       return (char *) "%I:%M:%S %p";
288 # endif
289 # if GNULIB_defined_ALTMON
290     case ALTMON_1:
291     case ALTMON_2:
292     case ALTMON_3:
293     case ALTMON_4:
294     case ALTMON_5:
295     case ALTMON_6:
296     case ALTMON_7:
297     case ALTMON_8:
298     case ALTMON_9:
299     case ALTMON_10:
300     case ALTMON_11:
301     case ALTMON_12:
302       /* We don't ship the appropriate localizations with gnulib.  Therefore,
303          treat ALTMON_i like MON_i.  */
304       item = item - ALTMON_1 + MON_1;
305       break;
306 # endif
307 # if GNULIB_defined_ERA
308     case ERA:
309       /* The format is not standardized.  In glibc it is a sequence of strings
310          of the form "direction:offset:start_date:end_date:era_name:era_format"
311          with an empty string at the end.  */
312       return (char *) "";
313     case ERA_D_FMT:
314       /* The %Ex conversion in strftime behaves like %x if the locale does not
315          have an alternative time format.  */
316       item = D_FMT;
317       break;
318     case ERA_D_T_FMT:
319       /* The %Ec conversion in strftime behaves like %c if the locale does not
320          have an alternative time format.  */
321       item = D_T_FMT;
322       break;
323     case ERA_T_FMT:
324       /* The %EX conversion in strftime behaves like %X if the locale does not
325          have an alternative time format.  */
326       item = T_FMT;
327       break;
328     case ALT_DIGITS:
329       /* The format is not standardized.  In glibc it is a sequence of 10
330          strings, appended in memory.  */
331       return (char *) "\0\0\0\0\0\0\0\0\0\0";
332 # endif
333 # if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
334     case YESEXPR:
335       return (char *) "^[yY]";
336     case NOEXPR:
337       return (char *) "^[nN]";
338 # endif
339     default:
340       break;
341     }
342   return nl_langinfo_with_lock (item);
343 }
344 
345 #else
346 
347 /* Provide nl_langinfo from scratch, either for native MS-Windows, or
348    for old Unix platforms without locales, such as Linux libc5 or
349    BeOS.  */
350 
351 # include <time.h>
352 
353 char *
nl_langinfo(nl_item item)354 nl_langinfo (nl_item item)
355 {
356   char buf[100];
357   struct tm tmm = { 0 };
358 
359   switch (item)
360     {
361     /* nl_langinfo items of the LC_CTYPE category */
362     case CODESET:
363       {
364         char *codeset = ctype_codeset ();
365         if (*codeset)
366           return codeset;
367       }
368 # ifdef __BEOS__
369       return (char *) "UTF-8";
370 # else
371       return (char *) "ISO-8859-1";
372 # endif
373     /* nl_langinfo items of the LC_NUMERIC category */
374     case RADIXCHAR:
375       return localeconv () ->decimal_point;
376     case THOUSEP:
377       return localeconv () ->thousands_sep;
378 # ifdef GROUPING
379     case GROUPING:
380       return localeconv () ->grouping;
381 # endif
382     /* nl_langinfo items of the LC_TIME category.
383        TODO: Really use the locale.  */
384     case D_T_FMT:
385     case ERA_D_T_FMT:
386       return (char *) "%a %b %e %H:%M:%S %Y";
387     case D_FMT:
388     case ERA_D_FMT:
389       return (char *) "%m/%d/%y";
390     case T_FMT:
391     case ERA_T_FMT:
392       return (char *) "%H:%M:%S";
393     case T_FMT_AMPM:
394       return (char *) "%I:%M:%S %p";
395     case AM_STR:
396       {
397         static char result[80];
398         if (!strftime (buf, sizeof result, "%p", &tmm))
399           return (char *) "AM";
400         strcpy (result, buf);
401         return result;
402       }
403     case PM_STR:
404       {
405         static char result[80];
406         tmm.tm_hour = 12;
407         if (!strftime (buf, sizeof result, "%p", &tmm))
408           return (char *) "PM";
409         strcpy (result, buf);
410         return result;
411       }
412     case DAY_1:
413     case DAY_2:
414     case DAY_3:
415     case DAY_4:
416     case DAY_5:
417     case DAY_6:
418     case DAY_7:
419       {
420         static char result[7][50];
421         static char const days[][sizeof "Wednesday"] = {
422           "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
423           "Friday", "Saturday"
424         };
425         tmm.tm_wday = item - DAY_1;
426         if (!strftime (buf, sizeof result[0], "%A", &tmm))
427           return (char *) days[item - DAY_1];
428         strcpy (result[item - DAY_1], buf);
429         return result[item - DAY_1];
430       }
431     case ABDAY_1:
432     case ABDAY_2:
433     case ABDAY_3:
434     case ABDAY_4:
435     case ABDAY_5:
436     case ABDAY_6:
437     case ABDAY_7:
438       {
439         static char result[7][30];
440         static char const abdays[][sizeof "Sun"] = {
441           "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
442         };
443         tmm.tm_wday = item - ABDAY_1;
444         if (!strftime (buf, sizeof result[0], "%a", &tmm))
445           return (char *) abdays[item - ABDAY_1];
446         strcpy (result[item - ABDAY_1], buf);
447         return result[item - ABDAY_1];
448       }
449     {
450       static char const months[][sizeof "September"] = {
451         "January", "February", "March", "April", "May", "June", "July",
452         "September", "October", "November", "December"
453       };
454       case MON_1:
455       case MON_2:
456       case MON_3:
457       case MON_4:
458       case MON_5:
459       case MON_6:
460       case MON_7:
461       case MON_8:
462       case MON_9:
463       case MON_10:
464       case MON_11:
465       case MON_12:
466         {
467           static char result[12][50];
468           tmm.tm_mon = item - MON_1;
469           if (!strftime (buf, sizeof result[0], "%B", &tmm))
470             return (char *) months[item - MON_1];
471           strcpy (result[item - MON_1], buf);
472           return result[item - MON_1];
473         }
474       case ALTMON_1:
475       case ALTMON_2:
476       case ALTMON_3:
477       case ALTMON_4:
478       case ALTMON_5:
479       case ALTMON_6:
480       case ALTMON_7:
481       case ALTMON_8:
482       case ALTMON_9:
483       case ALTMON_10:
484       case ALTMON_11:
485       case ALTMON_12:
486         {
487           static char result[12][50];
488           tmm.tm_mon = item - ALTMON_1;
489           /* The platforms without nl_langinfo() don't support strftime with
490              %OB.  We don't even need to try.  */
491           #if 0
492           if (!strftime (buf, sizeof result[0], "%OB", &tmm))
493           #endif
494             if (!strftime (buf, sizeof result[0], "%B", &tmm))
495               return (char *) months[item - ALTMON_1];
496           strcpy (result[item - ALTMON_1], buf);
497           return result[item - ALTMON_1];
498         }
499     }
500     case ABMON_1:
501     case ABMON_2:
502     case ABMON_3:
503     case ABMON_4:
504     case ABMON_5:
505     case ABMON_6:
506     case ABMON_7:
507     case ABMON_8:
508     case ABMON_9:
509     case ABMON_10:
510     case ABMON_11:
511     case ABMON_12:
512       {
513         static char result[12][30];
514         static char const abmonths[][sizeof "Jan"] = {
515           "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
516           "Sep", "Oct", "Nov", "Dec"
517         };
518         tmm.tm_mon = item - ABMON_1;
519         if (!strftime (buf, sizeof result[0], "%b", &tmm))
520           return (char *) abmonths[item - ABMON_1];
521         strcpy (result[item - ABMON_1], buf);
522         return result[item - ABMON_1];
523       }
524     case ERA:
525       return (char *) "";
526     case ALT_DIGITS:
527       return (char *) "\0\0\0\0\0\0\0\0\0\0";
528     /* nl_langinfo items of the LC_MONETARY category.  */
529     case CRNCYSTR:
530       return localeconv () ->currency_symbol;
531 # ifdef INT_CURR_SYMBOL
532     case INT_CURR_SYMBOL:
533       return localeconv () ->int_curr_symbol;
534     case MON_DECIMAL_POINT:
535       return localeconv () ->mon_decimal_point;
536     case MON_THOUSANDS_SEP:
537       return localeconv () ->mon_thousands_sep;
538     case MON_GROUPING:
539       return localeconv () ->mon_grouping;
540     case POSITIVE_SIGN:
541       return localeconv () ->positive_sign;
542     case NEGATIVE_SIGN:
543       return localeconv () ->negative_sign;
544     case FRAC_DIGITS:
545       return & localeconv () ->frac_digits;
546     case INT_FRAC_DIGITS:
547       return & localeconv () ->int_frac_digits;
548     case P_CS_PRECEDES:
549       return & localeconv () ->p_cs_precedes;
550     case N_CS_PRECEDES:
551       return & localeconv () ->n_cs_precedes;
552     case P_SEP_BY_SPACE:
553       return & localeconv () ->p_sep_by_space;
554     case N_SEP_BY_SPACE:
555       return & localeconv () ->n_sep_by_space;
556     case P_SIGN_POSN:
557       return & localeconv () ->p_sign_posn;
558     case N_SIGN_POSN:
559       return & localeconv () ->n_sign_posn;
560 # endif
561     /* nl_langinfo items of the LC_MESSAGES category
562        TODO: Really use the locale. */
563     case YESEXPR:
564       return (char *) "^[yY]";
565     case NOEXPR:
566       return (char *) "^[nN]";
567     default:
568       return (char *) "";
569     }
570 }
571 
572 #endif
573