1 /* Set the current locale.  -*- coding: utf-8 -*-
2    Copyright (C) 2009, 2011-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
18 
19 #include <config.h>
20 
21 /* Override setlocale() so that when the default locale is requested
22    (locale = ""), the environment variables LC_ALL, LC_*, and LANG are
23    considered.
24    Also include all the functionality from libintl's setlocale() override.  */
25 
26 /* Please keep this file in sync with
27    gettext/gettext-runtime/intl/setlocale.c !  */
28 
29 /* Specification.  */
30 #include <locale.h>
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include "localename.h"
37 
38 #if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE
39 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES
40 #  include <CoreFoundation/CFLocale.h>
41 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
42 #  include <CoreFoundation/CFPreferences.h>
43 # endif
44 # include <CoreFoundation/CFPropertyList.h>
45 # include <CoreFoundation/CFArray.h>
46 # include <CoreFoundation/CFString.h>
47 extern void gl_locale_name_canonicalize (char *name);
48 #endif
49 
50 #if 1
51 
52 # undef setlocale
53 
54 /* Which of the replacements to activate?  */
55 # if NEED_SETLOCALE_IMPROVED
56 #  define setlocale_improved rpl_setlocale
57 # elif NEED_SETLOCALE_MTSAFE
58 #  define setlocale_mtsafe rpl_setlocale
59 # else
60 #  error "This file should only be compiled if NEED_SETLOCALE_IMPROVED || NEED_SETLOCALE_MTSAFE."
61 # endif
62 
63 /* Like setlocale, but guaranteed to be multithread-safe if LOCALE == NULL.  */
64 # if !SETLOCALE_NULL_ALL_MTSAFE || !SETLOCALE_NULL_ONE_MTSAFE /* i.e. if NEED_SETLOCALE_MTSAFE */
65 
66 #  if NEED_SETLOCALE_IMPROVED
67 static
68 #  endif
69 char *
setlocale_mtsafe(int category,const char * locale)70 setlocale_mtsafe (int category, const char *locale)
71 {
72   if (locale == NULL)
73     return (char *) setlocale_null (category);
74   else
75     return setlocale (category, locale);
76 }
77 # else /* !NEED_SETLOCALE_MTSAFE */
78 
79 #  define setlocale_mtsafe setlocale
80 
81 # endif /* NEED_SETLOCALE_MTSAFE */
82 
83 # if NEED_SETLOCALE_IMPROVED
84 
85 /* Return string representation of locale category CATEGORY.  */
86 static const char *
category_to_name(int category)87 category_to_name (int category)
88 {
89   const char *retval;
90 
91   switch (category)
92   {
93   case LC_COLLATE:
94     retval = "LC_COLLATE";
95     break;
96   case LC_CTYPE:
97     retval = "LC_CTYPE";
98     break;
99   case LC_MONETARY:
100     retval = "LC_MONETARY";
101     break;
102   case LC_NUMERIC:
103     retval = "LC_NUMERIC";
104     break;
105   case LC_TIME:
106     retval = "LC_TIME";
107     break;
108   case LC_MESSAGES:
109     retval = "LC_MESSAGES";
110     break;
111   default:
112     /* If you have a better idea for a default value let me know.  */
113     retval = "LC_XXX";
114   }
115 
116   return retval;
117 }
118 
119 #  if defined _WIN32 && ! defined __CYGWIN__
120 
121 /* The native Windows setlocale() function expects locale names of the form
122    "German" or "German_Germany" or "DEU", but not "de" or "de_DE".  We need
123    to convert the names from the form with ISO 639 language code and ISO 3166
124    country code to the form with English names or with three-letter identifier.
125    The three-letter identifiers known by a Windows XP SP2 or SP3 are:
126      AFK  Afrikaans_South Africa.1252
127      ARA  Arabic_Saudi Arabia.1256
128      ARB  Arabic_Lebanon.1256
129      ARE  Arabic_Egypt.1256
130      ARG  Arabic_Algeria.1256
131      ARH  Arabic_Bahrain.1256
132      ARI  Arabic_Iraq.1256
133      ARJ  Arabic_Jordan.1256
134      ARK  Arabic_Kuwait.1256
135      ARL  Arabic_Libya.1256
136      ARM  Arabic_Morocco.1256
137      ARO  Arabic_Oman.1256
138      ARQ  Arabic_Qatar.1256
139      ARS  Arabic_Syria.1256
140      ART  Arabic_Tunisia.1256
141      ARU  Arabic_U.A.E..1256
142      ARY  Arabic_Yemen.1256
143      AZE  Azeri (Latin)_Azerbaijan.1254
144      BEL  Belarusian_Belarus.1251
145      BGR  Bulgarian_Bulgaria.1251
146      BSB  Bosnian_Bosnia and Herzegovina.1250
147      BSC  Bosnian (Cyrillic)_Bosnia and Herzegovina.1250  (wrong encoding!)
148      CAT  Catalan_Spain.1252
149      CHH  Chinese_Hong Kong S.A.R..950
150      CHI  Chinese_Singapore.936
151      CHS  Chinese_People's Republic of China.936
152      CHT  Chinese_Taiwan.950
153      CSY  Czech_Czech Republic.1250
154      CYM  Welsh_United Kingdom.1252
155      DAN  Danish_Denmark.1252
156      DEA  German_Austria.1252
157      DEC  German_Liechtenstein.1252
158      DEL  German_Luxembourg.1252
159      DES  German_Switzerland.1252
160      DEU  German_Germany.1252
161      ELL  Greek_Greece.1253
162      ENA  English_Australia.1252
163      ENB  English_Caribbean.1252
164      ENC  English_Canada.1252
165      ENG  English_United Kingdom.1252
166      ENI  English_Ireland.1252
167      ENJ  English_Jamaica.1252
168      ENL  English_Belize.1252
169      ENP  English_Republic of the Philippines.1252
170      ENS  English_South Africa.1252
171      ENT  English_Trinidad and Tobago.1252
172      ENU  English_United States.1252
173      ENW  English_Zimbabwe.1252
174      ENZ  English_New Zealand.1252
175      ESA  Spanish_Panama.1252
176      ESB  Spanish_Bolivia.1252
177      ESC  Spanish_Costa Rica.1252
178      ESD  Spanish_Dominican Republic.1252
179      ESE  Spanish_El Salvador.1252
180      ESF  Spanish_Ecuador.1252
181      ESG  Spanish_Guatemala.1252
182      ESH  Spanish_Honduras.1252
183      ESI  Spanish_Nicaragua.1252
184      ESL  Spanish_Chile.1252
185      ESM  Spanish_Mexico.1252
186      ESN  Spanish_Spain.1252
187      ESO  Spanish_Colombia.1252
188      ESP  Spanish_Spain.1252
189      ESR  Spanish_Peru.1252
190      ESS  Spanish_Argentina.1252
191      ESU  Spanish_Puerto Rico.1252
192      ESV  Spanish_Venezuela.1252
193      ESY  Spanish_Uruguay.1252
194      ESZ  Spanish_Paraguay.1252
195      ETI  Estonian_Estonia.1257
196      EUQ  Basque_Spain.1252
197      FAR  Farsi_Iran.1256
198      FIN  Finnish_Finland.1252
199      FOS  Faroese_Faroe Islands.1252
200      FPO  Filipino_Philippines.1252
201      FRA  French_France.1252
202      FRB  French_Belgium.1252
203      FRC  French_Canada.1252
204      FRL  French_Luxembourg.1252
205      FRM  French_Principality of Monaco.1252
206      FRS  French_Switzerland.1252
207      FYN  Frisian_Netherlands.1252
208      GLC  Galician_Spain.1252
209      HEB  Hebrew_Israel.1255
210      HRB  Croatian_Bosnia and Herzegovina.1250
211      HRV  Croatian_Croatia.1250
212      HUN  Hungarian_Hungary.1250
213      IND  Indonesian_Indonesia.1252
214      IRE  Irish_Ireland.1252
215      ISL  Icelandic_Iceland.1252
216      ITA  Italian_Italy.1252
217      ITS  Italian_Switzerland.1252
218      IUK  Inuktitut (Latin)_Canada.1252
219      JPN  Japanese_Japan.932
220      KKZ  Kazakh_Kazakhstan.1251
221      KOR  Korean_Korea.949
222      KYR  Kyrgyz_Kyrgyzstan.1251
223      LBX  Luxembourgish_Luxembourg.1252
224      LTH  Lithuanian_Lithuania.1257
225      LVI  Latvian_Latvia.1257
226      MKI  FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251
227      MON  Mongolian_Mongolia.1251
228      MPD  Mapudungun_Chile.1252
229      MSB  Malay_Brunei Darussalam.1252
230      MSL  Malay_Malaysia.1252
231      MWK  Mohawk_Canada.1252
232      NLB  Dutch_Belgium.1252
233      NLD  Dutch_Netherlands.1252
234      NON  Norwegian-Nynorsk_Norway.1252
235      NOR  Norwegian (Bokmål)_Norway.1252
236      NSO  Northern Sotho_South Africa.1252
237      PLK  Polish_Poland.1250
238      PTB  Portuguese_Brazil.1252
239      PTG  Portuguese_Portugal.1252
240      QUB  Quechua_Bolivia.1252
241      QUE  Quechua_Ecuador.1252
242      QUP  Quechua_Peru.1252
243      RMC  Romansh_Switzerland.1252
244      ROM  Romanian_Romania.1250
245      RUS  Russian_Russia.1251
246      SKY  Slovak_Slovakia.1250
247      SLV  Slovenian_Slovenia.1250
248      SMA  Sami (Southern)_Norway.1252
249      SMB  Sami (Southern)_Sweden.1252
250      SME  Sami (Northern)_Norway.1252
251      SMF  Sami (Northern)_Sweden.1252
252      SMG  Sami (Northern)_Finland.1252
253      SMJ  Sami (Lule)_Norway.1252
254      SMK  Sami (Lule)_Sweden.1252
255      SMN  Sami (Inari)_Finland.1252
256      SMS  Sami (Skolt)_Finland.1252
257      SQI  Albanian_Albania.1250
258      SRB  Serbian (Cyrillic)_Serbia and Montenegro.1251
259      SRL  Serbian (Latin)_Serbia and Montenegro.1250
260      SRN  Serbian (Cyrillic)_Bosnia and Herzegovina.1251
261      SRS  Serbian (Latin)_Bosnia and Herzegovina.1250
262      SVE  Swedish_Sweden.1252
263      SVF  Swedish_Finland.1252
264      SWK  Swahili_Kenya.1252
265      THA  Thai_Thailand.874
266      TRK  Turkish_Turkey.1254
267      TSN  Tswana_South Africa.1252
268      TTT  Tatar_Russia.1251
269      UKR  Ukrainian_Ukraine.1251
270      URD  Urdu_Islamic Republic of Pakistan.1256
271      USA  English_United States.1252
272      UZB  Uzbek (Latin)_Uzbekistan.1254
273      VIT  Vietnamese_Viet Nam.1258
274      XHO  Xhosa_South Africa.1252
275      ZHH  Chinese_Hong Kong S.A.R..950
276      ZHI  Chinese_Singapore.936
277      ZHM  Chinese_Macau S.A.R..950
278      ZUL  Zulu_South Africa.1252
279  */
280 
281 /* Table from ISO 639 language code, optionally with country or script suffix,
282    to English name.
283    Keep in sync with the gl_locale_name_from_win32_LANGID function in
284    localename.c!  */
285 struct table_entry
286 {
287   const char *code;
288   const char *english;
289 };
290 static const struct table_entry language_table[] =
291   {
292     { "af", "Afrikaans" },
293     { "am", "Amharic" },
294     { "ar", "Arabic" },
295     { "arn", "Mapudungun" },
296     { "as", "Assamese" },
297     { "az@cyrillic", "Azeri (Cyrillic)" },
298     { "az@latin", "Azeri (Latin)" },
299     { "ba", "Bashkir" },
300     { "be", "Belarusian" },
301     { "ber", "Tamazight" },
302     { "ber@arabic", "Tamazight (Arabic)" },
303     { "ber@latin", "Tamazight (Latin)" },
304     { "bg", "Bulgarian" },
305     { "bin", "Edo" },
306     { "bn", "Bengali" },
307     { "bn_BD", "Bengali (Bangladesh)" },
308     { "bn_IN", "Bengali (India)" },
309     { "bnt", "Sutu" },
310     { "bo", "Tibetan" },
311     { "br", "Breton" },
312     { "bs", "BSB" }, /* "Bosnian (Latin)" */
313     { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */
314     { "ca", "Catalan" },
315     { "chr", "Cherokee" },
316     { "co", "Corsican" },
317     { "cpe", "Hawaiian" },
318     { "cs", "Czech" },
319     { "cy", "Welsh" },
320     { "da", "Danish" },
321     { "de", "German" },
322     { "dsb", "Lower Sorbian" },
323     { "dv", "Divehi" },
324     { "el", "Greek" },
325     { "en", "English" },
326     { "es", "Spanish" },
327     { "et", "Estonian" },
328     { "eu", "Basque" },
329     { "fa", "Farsi" },
330     { "ff", "Fulfulde" },
331     { "fi", "Finnish" },
332     { "fo", "Faroese" }, /* "Faeroese" does not work */
333     { "fr", "French" },
334     { "fy", "Frisian" },
335     { "ga", "IRE" }, /* Gaelic (Ireland) */
336     { "gd", "Gaelic (Scotland)" },
337     { "gd", "Scottish Gaelic" },
338     { "gl", "Galician" },
339     { "gn", "Guarani" },
340     { "gsw", "Alsatian" },
341     { "gu", "Gujarati" },
342     { "ha", "Hausa" },
343     { "he", "Hebrew" },
344     { "hi", "Hindi" },
345     { "hr", "Croatian" },
346     { "hsb", "Upper Sorbian" },
347     { "hu", "Hungarian" },
348     { "hy", "Armenian" },
349     { "id", "Indonesian" },
350     { "ig", "Igbo" },
351     { "ii", "Yi" },
352     { "is", "Icelandic" },
353     { "it", "Italian" },
354     { "iu", "IUK" }, /* Inuktitut */
355     { "ja", "Japanese" },
356     { "ka", "Georgian" },
357     { "kk", "Kazakh" },
358     { "kl", "Greenlandic" },
359     { "km", "Cambodian" },
360     { "km", "Khmer" },
361     { "kn", "Kannada" },
362     { "ko", "Korean" },
363     { "kok", "Konkani" },
364     { "kr", "Kanuri" },
365     { "ks", "Kashmiri" },
366     { "ks_IN", "Kashmiri_India" },
367     { "ks_PK", "Kashmiri (Arabic)_Pakistan" },
368     { "ky", "Kyrgyz" },
369     { "la", "Latin" },
370     { "lb", "Luxembourgish" },
371     { "lo", "Lao" },
372     { "lt", "Lithuanian" },
373     { "lv", "Latvian" },
374     { "mi", "Maori" },
375     { "mk", "FYRO Macedonian" },
376     { "mk", "Macedonian" },
377     { "ml", "Malayalam" },
378     { "mn", "Mongolian" },
379     { "mni", "Manipuri" },
380     { "moh", "Mohawk" },
381     { "mr", "Marathi" },
382     { "ms", "Malay" },
383     { "mt", "Maltese" },
384     { "my", "Burmese" },
385     { "nb", "NOR" }, /* Norwegian Bokmål */
386     { "ne", "Nepali" },
387     { "nic", "Ibibio" },
388     { "nl", "Dutch" },
389     { "nn", "NON" }, /* Norwegian Nynorsk */
390     { "no", "Norwegian" },
391     { "nso", "Northern Sotho" },
392     { "nso", "Sepedi" },
393     { "oc", "Occitan" },
394     { "om", "Oromo" },
395     { "or", "Oriya" },
396     { "pa", "Punjabi" },
397     { "pap", "Papiamentu" },
398     { "pl", "Polish" },
399     { "prs", "Dari" },
400     { "ps", "Pashto" },
401     { "pt", "Portuguese" },
402     { "qu", "Quechua" },
403     { "qut", "K'iche'" },
404     { "rm", "Romansh" },
405     { "ro", "Romanian" },
406     { "ru", "Russian" },
407     { "rw", "Kinyarwanda" },
408     { "sa", "Sanskrit" },
409     { "sah", "Yakut" },
410     { "sd", "Sindhi" },
411     { "se", "Sami (Northern)" },
412     { "se", "Northern Sami" },
413     { "si", "Sinhalese" },
414     { "sk", "Slovak" },
415     { "sl", "Slovenian" },
416     { "sma", "Sami (Southern)" },
417     { "sma", "Southern Sami" },
418     { "smj", "Sami (Lule)" },
419     { "smj", "Lule Sami" },
420     { "smn", "Sami (Inari)" },
421     { "smn", "Inari Sami" },
422     { "sms", "Sami (Skolt)" },
423     { "sms", "Skolt Sami" },
424     { "so", "Somali" },
425     { "sq", "Albanian" },
426     { "sr", "Serbian (Latin)" },
427     { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */
428     { "sv", "Swedish" },
429     { "sw", "Swahili" },
430     { "syr", "Syriac" },
431     { "ta", "Tamil" },
432     { "te", "Telugu" },
433     { "tg", "Tajik" },
434     { "th", "Thai" },
435     { "ti", "Tigrinya" },
436     { "tk", "Turkmen" },
437     { "tl", "Filipino" },
438     { "tn", "Tswana" },
439     { "tr", "Turkish" },
440     { "ts", "Tsonga" },
441     { "tt", "Tatar" },
442     { "ug", "Uighur" },
443     { "uk", "Ukrainian" },
444     { "ur", "Urdu" },
445     { "uz", "Uzbek" },
446     { "uz", "Uzbek (Latin)" },
447     { "uz@cyrillic", "Uzbek (Cyrillic)" },
448     { "ve", "Venda" },
449     { "vi", "Vietnamese" },
450     { "wen", "Sorbian" },
451     { "wo", "Wolof" },
452     { "xh", "Xhosa" },
453     { "yi", "Yiddish" },
454     { "yo", "Yoruba" },
455     { "zh", "Chinese" },
456     { "zu", "Zulu" }
457   };
458 
459 /* Table from ISO 3166 country code to English name.
460    Keep in sync with the gl_locale_name_from_win32_LANGID function in
461    localename.c!  */
462 static const struct table_entry country_table[] =
463   {
464     { "AE", "U.A.E." },
465     { "AF", "Afghanistan" },
466     { "AL", "Albania" },
467     { "AM", "Armenia" },
468     { "AN", "Netherlands Antilles" },
469     { "AR", "Argentina" },
470     { "AT", "Austria" },
471     { "AU", "Australia" },
472     { "AZ", "Azerbaijan" },
473     { "BA", "Bosnia and Herzegovina" },
474     { "BD", "Bangladesh" },
475     { "BE", "Belgium" },
476     { "BG", "Bulgaria" },
477     { "BH", "Bahrain" },
478     { "BN", "Brunei Darussalam" },
479     { "BO", "Bolivia" },
480     { "BR", "Brazil" },
481     { "BT", "Bhutan" },
482     { "BY", "Belarus" },
483     { "BZ", "Belize" },
484     { "CA", "Canada" },
485     { "CG", "Congo" },
486     { "CH", "Switzerland" },
487     { "CI", "Cote d'Ivoire" },
488     { "CL", "Chile" },
489     { "CM", "Cameroon" },
490     { "CN", "People's Republic of China" },
491     { "CO", "Colombia" },
492     { "CR", "Costa Rica" },
493     { "CS", "Serbia and Montenegro" },
494     { "CZ", "Czech Republic" },
495     { "DE", "Germany" },
496     { "DK", "Denmark" },
497     { "DO", "Dominican Republic" },
498     { "DZ", "Algeria" },
499     { "EC", "Ecuador" },
500     { "EE", "Estonia" },
501     { "EG", "Egypt" },
502     { "ER", "Eritrea" },
503     { "ES", "Spain" },
504     { "ET", "Ethiopia" },
505     { "FI", "Finland" },
506     { "FO", "Faroe Islands" },
507     { "FR", "France" },
508     { "GB", "United Kingdom" },
509     { "GD", "Caribbean" },
510     { "GE", "Georgia" },
511     { "GL", "Greenland" },
512     { "GR", "Greece" },
513     { "GT", "Guatemala" },
514     { "HK", "Hong Kong" },
515     { "HK", "Hong Kong S.A.R." },
516     { "HN", "Honduras" },
517     { "HR", "Croatia" },
518     { "HT", "Haiti" },
519     { "HU", "Hungary" },
520     { "ID", "Indonesia" },
521     { "IE", "Ireland" },
522     { "IL", "Israel" },
523     { "IN", "India" },
524     { "IQ", "Iraq" },
525     { "IR", "Iran" },
526     { "IS", "Iceland" },
527     { "IT", "Italy" },
528     { "JM", "Jamaica" },
529     { "JO", "Jordan" },
530     { "JP", "Japan" },
531     { "KE", "Kenya" },
532     { "KG", "Kyrgyzstan" },
533     { "KH", "Cambodia" },
534     { "KR", "South Korea" },
535     { "KW", "Kuwait" },
536     { "KZ", "Kazakhstan" },
537     { "LA", "Laos" },
538     { "LB", "Lebanon" },
539     { "LI", "Liechtenstein" },
540     { "LK", "Sri Lanka" },
541     { "LT", "Lithuania" },
542     { "LU", "Luxembourg" },
543     { "LV", "Latvia" },
544     { "LY", "Libya" },
545     { "MA", "Morocco" },
546     { "MC", "Principality of Monaco" },
547     { "MD", "Moldava" },
548     { "MD", "Moldova" },
549     { "ME", "Montenegro" },
550     { "MK", "Former Yugoslav Republic of Macedonia" },
551     { "ML", "Mali" },
552     { "MM", "Myanmar" },
553     { "MN", "Mongolia" },
554     { "MO", "Macau S.A.R." },
555     { "MT", "Malta" },
556     { "MV", "Maldives" },
557     { "MX", "Mexico" },
558     { "MY", "Malaysia" },
559     { "NG", "Nigeria" },
560     { "NI", "Nicaragua" },
561     { "NL", "Netherlands" },
562     { "NO", "Norway" },
563     { "NP", "Nepal" },
564     { "NZ", "New Zealand" },
565     { "OM", "Oman" },
566     { "PA", "Panama" },
567     { "PE", "Peru" },
568     { "PH", "Philippines" },
569     { "PK", "Islamic Republic of Pakistan" },
570     { "PL", "Poland" },
571     { "PR", "Puerto Rico" },
572     { "PT", "Portugal" },
573     { "PY", "Paraguay" },
574     { "QA", "Qatar" },
575     { "RE", "Reunion" },
576     { "RO", "Romania" },
577     { "RS", "Serbia" },
578     { "RU", "Russia" },
579     { "RW", "Rwanda" },
580     { "SA", "Saudi Arabia" },
581     { "SE", "Sweden" },
582     { "SG", "Singapore" },
583     { "SI", "Slovenia" },
584     { "SK", "Slovak" },
585     { "SN", "Senegal" },
586     { "SO", "Somalia" },
587     { "SR", "Suriname" },
588     { "SV", "El Salvador" },
589     { "SY", "Syria" },
590     { "TH", "Thailand" },
591     { "TJ", "Tajikistan" },
592     { "TM", "Turkmenistan" },
593     { "TN", "Tunisia" },
594     { "TR", "Turkey" },
595     { "TT", "Trinidad and Tobago" },
596     { "TW", "Taiwan" },
597     { "TZ", "Tanzania" },
598     { "UA", "Ukraine" },
599     { "US", "United States" },
600     { "UY", "Uruguay" },
601     { "VA", "Vatican" },
602     { "VE", "Venezuela" },
603     { "VN", "Viet Nam" },
604     { "YE", "Yemen" },
605     { "ZA", "South Africa" },
606     { "ZW", "Zimbabwe" }
607   };
608 
609 /* Given a string STRING, find the set of indices i such that TABLE[i].code is
610    the given STRING.  It is a range [lo,hi-1].  */
611 typedef struct { size_t lo; size_t hi; } range_t;
612 static void
search(const struct table_entry * table,size_t table_size,const char * string,range_t * result)613 search (const struct table_entry *table, size_t table_size, const char *string,
614         range_t *result)
615 {
616   /* The table is sorted.  Perform a binary search.  */
617   size_t hi = table_size;
618   size_t lo = 0;
619   while (lo < hi)
620     {
621       /* Invariant:
622          for i < lo, strcmp (table[i].code, string) < 0,
623          for i >= hi, strcmp (table[i].code, string) > 0.  */
624       size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
625       int cmp = strcmp (table[mid].code, string);
626       if (cmp < 0)
627         lo = mid + 1;
628       else if (cmp > 0)
629         hi = mid;
630       else
631         {
632           /* Found an i with
633                strcmp (language_table[i].code, string) == 0.
634              Find the entire interval of such i.  */
635           {
636             size_t i;
637 
638             for (i = mid; i > lo; )
639               {
640                 i--;
641                 if (strcmp (table[i].code, string) < 0)
642                   {
643                     lo = i + 1;
644                     break;
645                   }
646               }
647           }
648           {
649             size_t i;
650 
651             for (i = mid + 1; i < hi; i++)
652               {
653                 if (strcmp (table[i].code, string) > 0)
654                   {
655                     hi = i;
656                     break;
657                   }
658               }
659           }
660           /* The set of i with
661                strcmp (language_table[i].code, string) == 0
662              is the interval [lo, hi-1].  */
663           break;
664         }
665     }
666   result->lo = lo;
667   result->hi = hi;
668 }
669 
670 /* Like setlocale, but accept also locale names in the form ll or ll_CC,
671    where ll is an ISO 639 language code and CC is an ISO 3166 country code.  */
672 static char *
setlocale_unixlike(int category,const char * locale)673 setlocale_unixlike (int category, const char *locale)
674 {
675   char *result;
676   char llCC_buf[64];
677   char ll_buf[64];
678   char CC_buf[64];
679 
680   /* The native Windows implementation of setlocale understands the special
681      locale name "C", but not "POSIX".  Therefore map "POSIX" to "C".  */
682   if (locale != NULL && strcmp (locale, "POSIX") == 0)
683     locale = "C";
684 
685   /* First, try setlocale with the original argument unchanged.  */
686   result = setlocale_mtsafe (category, locale);
687   if (result != NULL)
688     return result;
689 
690   /* Otherwise, assume the argument is in the form
691        language[_territory][.codeset][@modifier]
692      and try to map it using the tables.  */
693   if (strlen (locale) < sizeof (llCC_buf))
694     {
695       /* Second try: Remove the codeset part.  */
696       {
697         const char *p = locale;
698         char *q = llCC_buf;
699 
700         /* Copy the part before the dot.  */
701         for (; *p != '\0' && *p != '.'; p++, q++)
702           *q = *p;
703         if (*p == '.')
704           /* Skip the part up to the '@', if any.  */
705           for (; *p != '\0' && *p != '@'; p++)
706             ;
707         /* Copy the part starting with '@', if any.  */
708         for (; *p != '\0'; p++, q++)
709           *q = *p;
710         *q = '\0';
711       }
712       /* llCC_buf now contains
713            language[_territory][@modifier]
714        */
715       if (strcmp (llCC_buf, locale) != 0)
716         {
717           result = setlocale (category, llCC_buf);
718           if (result != NULL)
719             return result;
720         }
721       /* Look it up in language_table.  */
722       {
723         range_t range;
724         size_t i;
725 
726         search (language_table,
727                 sizeof (language_table) / sizeof (language_table[0]),
728                 llCC_buf,
729                 &range);
730 
731         for (i = range.lo; i < range.hi; i++)
732           {
733             /* Try the replacement in language_table[i].  */
734             result = setlocale (category, language_table[i].english);
735             if (result != NULL)
736               return result;
737           }
738       }
739       /* Split language[_territory][@modifier]
740          into  ll_buf = language[@modifier]
741          and   CC_buf = territory
742        */
743       {
744         const char *underscore = strchr (llCC_buf, '_');
745         if (underscore != NULL)
746           {
747             const char *territory_start = underscore + 1;
748             const char *territory_end = strchr (territory_start, '@');
749             if (territory_end == NULL)
750               territory_end = territory_start + strlen (territory_start);
751 
752             memcpy (ll_buf, llCC_buf, underscore - llCC_buf);
753             strcpy (ll_buf + (underscore - llCC_buf), territory_end);
754 
755             memcpy (CC_buf, territory_start, territory_end - territory_start);
756             CC_buf[territory_end - territory_start] = '\0';
757 
758             {
759               /* Look up ll_buf in language_table
760                  and CC_buf in country_table.  */
761               range_t language_range;
762 
763               search (language_table,
764                       sizeof (language_table) / sizeof (language_table[0]),
765                       ll_buf,
766                       &language_range);
767               if (language_range.lo < language_range.hi)
768                 {
769                   range_t country_range;
770 
771                   search (country_table,
772                           sizeof (country_table) / sizeof (country_table[0]),
773                           CC_buf,
774                           &country_range);
775                   if (country_range.lo < country_range.hi)
776                     {
777                       size_t i;
778                       size_t j;
779 
780                       for (i = language_range.lo; i < language_range.hi; i++)
781                         for (j = country_range.lo; j < country_range.hi; j++)
782                           {
783                             /* Concatenate the replacements.  */
784                             const char *part1 = language_table[i].english;
785                             size_t part1_len = strlen (part1);
786                             const char *part2 = country_table[j].english;
787                             size_t part2_len = strlen (part2) + 1;
788                             char buf[64+64];
789 
790                             if (!(part1_len + 1 + part2_len <= sizeof (buf)))
791                               abort ();
792                             memcpy (buf, part1, part1_len);
793                             buf[part1_len] = '_';
794                             memcpy (buf + part1_len + 1, part2, part2_len);
795 
796                             /* Try the concatenated replacements.  */
797                             result = setlocale (category, buf);
798                             if (result != NULL)
799                               return result;
800                           }
801                     }
802 
803                   /* Try omitting the country entirely.  This may set a locale
804                      corresponding to the wrong country, but is better than
805                      failing entirely.  */
806                   {
807                     size_t i;
808 
809                     for (i = language_range.lo; i < language_range.hi; i++)
810                       {
811                         /* Try only the language replacement.  */
812                         result =
813                           setlocale (category, language_table[i].english);
814                         if (result != NULL)
815                           return result;
816                       }
817                   }
818                 }
819             }
820           }
821       }
822     }
823 
824   /* Failed.  */
825   return NULL;
826 }
827 
828 #  elif defined __ANDROID__
829 
830 /* Like setlocale, but accept also the locale names "C" and "POSIX".  */
831 static char *
setlocale_unixlike(int category,const char * locale)832 setlocale_unixlike (int category, const char *locale)
833 {
834   char *result = setlocale_mtsafe (category, locale);
835   if (result == NULL)
836     switch (category)
837       {
838       case LC_CTYPE:
839       case LC_NUMERIC:
840       case LC_TIME:
841       case LC_COLLATE:
842       case LC_MONETARY:
843       case LC_MESSAGES:
844       case LC_ALL:
845       case LC_PAPER:
846       case LC_NAME:
847       case LC_ADDRESS:
848       case LC_TELEPHONE:
849       case LC_MEASUREMENT:
850         if (locale == NULL
851             || strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
852           result = (char *) "C";
853         break;
854       default:
855         break;
856       }
857   return result;
858 }
859 #   define setlocale setlocale_unixlike
860 
861 #  else
862 #   define setlocale_unixlike setlocale_mtsafe
863 #  endif
864 
865 #  if LC_MESSAGES == 1729
866 
867 /* The system does not store an LC_MESSAGES locale category.  Do it here.  */
868 static char lc_messages_name[64] = "C";
869 
870 /* Like setlocale, but support also LC_MESSAGES.  */
871 static char *
setlocale_single(int category,const char * locale)872 setlocale_single (int category, const char *locale)
873 {
874   if (category == LC_MESSAGES)
875     {
876       if (locale != NULL)
877         {
878           lc_messages_name[sizeof (lc_messages_name) - 1] = '\0';
879           strncpy (lc_messages_name, locale, sizeof (lc_messages_name) - 1);
880         }
881       return lc_messages_name;
882     }
883   else
884     return setlocale_unixlike (category, locale);
885 }
886 
887 #  else
888 #   define setlocale_single setlocale_unixlike
889 #  endif
890 
891 #  if defined __APPLE__ && defined __MACH__
892 
893 /* Mapping from language to main territory where that language is spoken.  */
894 static char const locales_with_principal_territory[][6 + 1] =
895   {
896                 /* Language     Main territory */
897     "ace_ID",   /* Achinese     Indonesia */
898     "af_ZA",    /* Afrikaans    South Africa */
899     "ak_GH",    /* Akan         Ghana */
900     "am_ET",    /* Amharic      Ethiopia */
901     "an_ES",    /* Aragonese    Spain */
902     "ang_GB",   /* Old English  Britain */
903     "arn_CL",   /* Mapudungun   Chile */
904     "as_IN",    /* Assamese     India */
905     "ast_ES",   /* Asturian     Spain */
906     "av_RU",    /* Avaric       Russia */
907     "awa_IN",   /* Awadhi       India */
908     "az_AZ",    /* Azerbaijani  Azerbaijan */
909     "ban_ID",   /* Balinese     Indonesia */
910     "be_BY",    /* Belarusian   Belarus */
911     "bej_SD",   /* Beja         Sudan */
912     "bem_ZM",   /* Bemba        Zambia */
913     "bg_BG",    /* Bulgarian    Bulgaria */
914     "bho_IN",   /* Bhojpuri     India */
915     "bi_VU",    /* Bislama      Vanuatu */
916     "bik_PH",   /* Bikol        Philippines */
917     "bin_NG",   /* Bini         Nigeria */
918     "bm_ML",    /* Bambara      Mali */
919     "bn_IN",    /* Bengali      India */
920     "bo_CN",    /* Tibetan      China */
921     "br_FR",    /* Breton       France */
922     "bs_BA",    /* Bosnian      Bosnia */
923     "bug_ID",   /* Buginese     Indonesia */
924     "ca_ES",    /* Catalan      Spain */
925     "ce_RU",    /* Chechen      Russia */
926     "ceb_PH",   /* Cebuano      Philippines */
927     "co_FR",    /* Corsican     France */
928     "cr_CA",    /* Cree         Canada */
929     /* Don't put "crh_UZ" or "crh_UA" here.  That would be asking for fruitless
930        political discussion.  */
931     "cs_CZ",    /* Czech        Czech Republic */
932     "csb_PL",   /* Kashubian    Poland */
933     "cy_GB",    /* Welsh        Britain */
934     "da_DK",    /* Danish       Denmark */
935     "de_DE",    /* German       Germany */
936     "din_SD",   /* Dinka        Sudan */
937     "doi_IN",   /* Dogri        India */
938     "dsb_DE",   /* Lower Sorbian        Germany */
939     "dv_MV",    /* Divehi       Maldives */
940     "dz_BT",    /* Dzongkha     Bhutan */
941     "ee_GH",    /* Éwé          Ghana */
942     "el_GR",    /* Greek        Greece */
943     /* Don't put "en_GB" or "en_US" here.  That would be asking for fruitless
944        political discussion.  */
945     "es_ES",    /* Spanish      Spain */
946     "et_EE",    /* Estonian     Estonia */
947     "fa_IR",    /* Persian      Iran */
948     "fi_FI",    /* Finnish      Finland */
949     "fil_PH",   /* Filipino     Philippines */
950     "fj_FJ",    /* Fijian       Fiji */
951     "fo_FO",    /* Faroese      Faeroe Islands */
952     "fon_BJ",   /* Fon          Benin */
953     "fr_FR",    /* French       France */
954     "fur_IT",   /* Friulian     Italy */
955     "fy_NL",    /* Western Frisian      Netherlands */
956     "ga_IE",    /* Irish        Ireland */
957     "gd_GB",    /* Scottish Gaelic      Britain */
958     "gon_IN",   /* Gondi        India */
959     "gsw_CH",   /* Swiss German Switzerland */
960     "gu_IN",    /* Gujarati     India */
961     "he_IL",    /* Hebrew       Israel */
962     "hi_IN",    /* Hindi        India */
963     "hil_PH",   /* Hiligaynon   Philippines */
964     "hr_HR",    /* Croatian     Croatia */
965     "hsb_DE",   /* Upper Sorbian        Germany */
966     "ht_HT",    /* Haitian      Haiti */
967     "hu_HU",    /* Hungarian    Hungary */
968     "hy_AM",    /* Armenian     Armenia */
969     "id_ID",    /* Indonesian   Indonesia */
970     "ig_NG",    /* Igbo         Nigeria */
971     "ii_CN",    /* Sichuan Yi   China */
972     "ilo_PH",   /* Iloko        Philippines */
973     "is_IS",    /* Icelandic    Iceland */
974     "it_IT",    /* Italian      Italy */
975     "ja_JP",    /* Japanese     Japan */
976     "jab_NG",   /* Hyam         Nigeria */
977     "jv_ID",    /* Javanese     Indonesia */
978     "ka_GE",    /* Georgian     Georgia */
979     "kab_DZ",   /* Kabyle       Algeria */
980     "kaj_NG",   /* Jju          Nigeria */
981     "kam_KE",   /* Kamba        Kenya */
982     "kmb_AO",   /* Kimbundu     Angola */
983     "kcg_NG",   /* Tyap         Nigeria */
984     "kdm_NG",   /* Kagoma       Nigeria */
985     "kg_CD",    /* Kongo        Democratic Republic of Congo */
986     "kk_KZ",    /* Kazakh       Kazakhstan */
987     "kl_GL",    /* Kalaallisut  Greenland */
988     "km_KH",    /* Central Khmer        Cambodia */
989     "kn_IN",    /* Kannada      India */
990     "ko_KR",    /* Korean       Korea (South) */
991     "kok_IN",   /* Konkani      India */
992     "kr_NG",    /* Kanuri       Nigeria */
993     "kru_IN",   /* Kurukh       India */
994     "ky_KG",    /* Kyrgyz       Kyrgyzstan */
995     "lg_UG",    /* Ganda        Uganda */
996     "li_BE",    /* Limburgish   Belgium */
997     "lo_LA",    /* Laotian      Laos */
998     "lt_LT",    /* Lithuanian   Lithuania */
999     "lu_CD",    /* Luba-Katanga Democratic Republic of Congo */
1000     "lua_CD",   /* Luba-Lulua   Democratic Republic of Congo */
1001     "luo_KE",   /* Luo          Kenya */
1002     "lv_LV",    /* Latvian      Latvia */
1003     "mad_ID",   /* Madurese     Indonesia */
1004     "mag_IN",   /* Magahi       India */
1005     "mai_IN",   /* Maithili     India */
1006     "mak_ID",   /* Makasar      Indonesia */
1007     "man_ML",   /* Mandingo     Mali */
1008     "men_SL",   /* Mende        Sierra Leone */
1009     "mfe_MU",   /* Mauritian Creole     Mauritius */
1010     "mg_MG",    /* Malagasy     Madagascar */
1011     "mi_NZ",    /* Maori        New Zealand */
1012     "min_ID",   /* Minangkabau  Indonesia */
1013     "mk_MK",    /* Macedonian   North Macedonia */
1014     "ml_IN",    /* Malayalam    India */
1015     "mn_MN",    /* Mongolian    Mongolia */
1016     "mni_IN",   /* Manipuri     India */
1017     "mos_BF",   /* Mossi        Burkina Faso */
1018     "mr_IN",    /* Marathi      India */
1019     "ms_MY",    /* Malay        Malaysia */
1020     "mt_MT",    /* Maltese      Malta */
1021     "mwr_IN",   /* Marwari      India */
1022     "my_MM",    /* Burmese      Myanmar */
1023     "na_NR",    /* Nauru        Nauru */
1024     "nah_MX",   /* Nahuatl      Mexico */
1025     "nap_IT",   /* Neapolitan   Italy */
1026     "nb_NO",    /* Norwegian Bokmål    Norway */
1027     "nds_DE",   /* Low Saxon    Germany */
1028     "ne_NP",    /* Nepali       Nepal */
1029     "nl_NL",    /* Dutch        Netherlands */
1030     "nn_NO",    /* Norwegian Nynorsk    Norway */
1031     "no_NO",    /* Norwegian    Norway */
1032     "nr_ZA",    /* South Ndebele        South Africa */
1033     "nso_ZA",   /* Northern Sotho       South Africa */
1034     "ny_MW",    /* Chichewa     Malawi */
1035     "nym_TZ",   /* Nyamwezi     Tanzania */
1036     "nyn_UG",   /* Nyankole     Uganda */
1037     "oc_FR",    /* Occitan      France */
1038     "oj_CA",    /* Ojibwa       Canada */
1039     "or_IN",    /* Oriya        India */
1040     "pa_IN",    /* Punjabi      India */
1041     "pag_PH",   /* Pangasinan   Philippines */
1042     "pam_PH",   /* Pampanga     Philippines */
1043     "pap_AN",   /* Papiamento   Netherlands Antilles - this line can be removed in 2018 */
1044     "pbb_CO",   /* Páez         Colombia */
1045     "pl_PL",    /* Polish       Poland */
1046     "ps_AF",    /* Pashto       Afghanistan */
1047     "pt_PT",    /* Portuguese   Portugal */
1048     "raj_IN",   /* Rajasthani   India */
1049     "rm_CH",    /* Romansh      Switzerland */
1050     "rn_BI",    /* Kirundi      Burundi */
1051     "ro_RO",    /* Romanian     Romania */
1052     "ru_RU",    /* Russian      Russia */
1053     "rw_RW",    /* Kinyarwanda  Rwanda */
1054     "sa_IN",    /* Sanskrit     India */
1055     "sah_RU",   /* Yakut        Russia */
1056     "sas_ID",   /* Sasak        Indonesia */
1057     "sat_IN",   /* Santali      India */
1058     "sc_IT",    /* Sardinian    Italy */
1059     "scn_IT",   /* Sicilian     Italy */
1060     "sg_CF",    /* Sango        Central African Republic */
1061     "shn_MM",   /* Shan         Myanmar */
1062     "si_LK",    /* Sinhala      Sri Lanka */
1063     "sid_ET",   /* Sidamo       Ethiopia */
1064     "sk_SK",    /* Slovak       Slovakia */
1065     "sl_SI",    /* Slovenian    Slovenia */
1066     "sm_WS",    /* Samoan       Samoa */
1067     "smn_FI",   /* Inari Sami   Finland */
1068     "sms_FI",   /* Skolt Sami   Finland */
1069     "so_SO",    /* Somali       Somalia */
1070     "sq_AL",    /* Albanian     Albania */
1071     "sr_RS",    /* Serbian      Serbia */
1072     "srr_SN",   /* Serer        Senegal */
1073     "suk_TZ",   /* Sukuma       Tanzania */
1074     "sus_GN",   /* Susu         Guinea */
1075     "sv_SE",    /* Swedish      Sweden */
1076     "te_IN",    /* Telugu       India */
1077     "tem_SL",   /* Timne        Sierra Leone */
1078     "tet_ID",   /* Tetum        Indonesia */
1079     "tg_TJ",    /* Tajik        Tajikistan */
1080     "th_TH",    /* Thai         Thailand */
1081     "ti_ER",    /* Tigrinya     Eritrea */
1082     "tiv_NG",   /* Tiv          Nigeria */
1083     "tk_TM",    /* Turkmen      Turkmenistan */
1084     "tl_PH",    /* Tagalog      Philippines */
1085     "to_TO",    /* Tonga        Tonga */
1086     "tpi_PG",   /* Tok Pisin    Papua New Guinea */
1087     "tr_TR",    /* Turkish      Turkey */
1088     "tum_MW",   /* Tumbuka      Malawi */
1089     "ug_CN",    /* Uighur       China */
1090     "uk_UA",    /* Ukrainian    Ukraine */
1091     "umb_AO",   /* Umbundu      Angola */
1092     "ur_PK",    /* Urdu         Pakistan */
1093     "uz_UZ",    /* Uzbek        Uzbekistan */
1094     "ve_ZA",    /* Venda        South Africa */
1095     "vi_VN",    /* Vietnamese   Vietnam */
1096     "wa_BE",    /* Walloon      Belgium */
1097     "wal_ET",   /* Walamo       Ethiopia */
1098     "war_PH",   /* Waray        Philippines */
1099     "wen_DE",   /* Sorbian      Germany */
1100     "yao_MW",   /* Yao          Malawi */
1101     "zap_MX"    /* Zapotec      Mexico */
1102   };
1103 
1104 /* Compare just the language part of two locale names.  */
1105 static int
langcmp(const char * locale1,const char * locale2)1106 langcmp (const char *locale1, const char *locale2)
1107 {
1108   size_t locale1_len;
1109   size_t locale2_len;
1110   int cmp;
1111 
1112   {
1113     const char *locale1_end = strchr (locale1, '_');
1114     if (locale1_end != NULL)
1115       locale1_len = locale1_end - locale1;
1116     else
1117       locale1_len = strlen (locale1);
1118   }
1119   {
1120     const char *locale2_end = strchr (locale2, '_');
1121     if (locale2_end != NULL)
1122       locale2_len = locale2_end - locale2;
1123     else
1124       locale2_len = strlen (locale2);
1125   }
1126 
1127   if (locale1_len < locale2_len)
1128     {
1129       cmp = memcmp (locale1, locale2, locale1_len);
1130       if (cmp == 0)
1131         cmp = -1;
1132     }
1133   else
1134     {
1135       cmp = memcmp (locale1, locale2, locale2_len);
1136       if (locale1_len > locale2_len && cmp == 0)
1137         cmp = 1;
1138     }
1139 
1140   return cmp;
1141 }
1142 
1143 /* Given a locale name, return the main locale with the same language,
1144    or NULL if not found.
1145    For example: "fr_DE" -> "fr_FR".  */
1146 static const char *
get_main_locale_with_same_language(const char * locale)1147 get_main_locale_with_same_language (const char *locale)
1148 {
1149 #   define table locales_with_principal_territory
1150   /* The table is sorted.  Perform a binary search.  */
1151   size_t hi = sizeof (table) / sizeof (table[0]);
1152   size_t lo = 0;
1153   while (lo < hi)
1154     {
1155       /* Invariant:
1156          for i < lo, langcmp (table[i], locale) < 0,
1157          for i >= hi, langcmp (table[i], locale) > 0.  */
1158       size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1159       int cmp = langcmp (table[mid], locale);
1160       if (cmp < 0)
1161         lo = mid + 1;
1162       else if (cmp > 0)
1163         hi = mid;
1164       else
1165         {
1166           /* Found an i with
1167                langcmp (language_table[i], locale) == 0.
1168              Verify that it is the only such i.  */
1169           if (mid > lo && langcmp (table[mid - 1], locale) >= 0)
1170             abort ();
1171           if (mid + 1 < hi && langcmp (table[mid + 1], locale) <= 0)
1172             abort ();
1173           return table[mid];
1174         }
1175     }
1176 #   undef table
1177   return NULL;
1178 }
1179 
1180 /* Mapping from territory to main language that is spoken in that territory.  */
1181 static char const locales_with_principal_language[][6 + 1] =
1182   {
1183     /* This is based on the set of existing locales in glibc, with duplicates
1184        removed, and on the Wikipedia pages named "Languages of <territory>".
1185        If in doubt, use the locale that exists in macOS.  For example, the only
1186        "*_IN" locale in macOS 10.13 is "hi_IN", so use that.  */
1187     /* A useful shell function for producing a line of this table is:
1188          func_line ()
1189          {
1190            # Usage: func_line ll_CC
1191            ll=`echo "$1" | sed -e 's|_.*||'`
1192            cc=`echo "$1" | sed -e 's|^.*_||'`
1193            llx=`sed -n -e "s|^${ll} ||p" < gettext-tools/doc/ISO_639`
1194            ccx=`expand gettext-tools/doc/ISO_3166 | sed -n -e "s|^${cc}  *||p"`
1195            echo "    \"$1\",    /$X* ${llx} ${ccx} *$X/"
1196          }
1197      */
1198               /* Main language  Territory */
1199     "ca_AD",    /* Catalan      Andorra */
1200     "ar_AE",    /* Arabic       United Arab Emirates */
1201     "ps_AF",    /* Pashto       Afghanistan */
1202     "en_AG",    /* English      Antigua and Barbuda */
1203     "sq_AL",    /* Albanian     Albania */
1204     "hy_AM",    /* Armenian     Armenia */
1205     "pap_AN",   /* Papiamento   Netherlands Antilles - this line can be removed in 2018 */
1206     "pt_AO",    /* Portuguese   Angola */
1207     "es_AR",    /* Spanish      Argentina */
1208     "de_AT",    /* German       Austria */
1209     "en_AU",    /* English      Australia */
1210     /* Aruba has two official languages: "nl_AW", "pap_AW".  */
1211     "az_AZ",    /* Azerbaijani  Azerbaijan */
1212     "bs_BA",    /* Bosnian      Bosnia */
1213     "bn_BD",    /* Bengali      Bangladesh */
1214     "nl_BE",    /* Dutch        Belgium */
1215     "fr_BF",    /* French       Burkina Faso */
1216     "bg_BG",    /* Bulgarian    Bulgaria */
1217     "ar_BH",    /* Arabic       Bahrain */
1218     "rn_BI",    /* Kirundi      Burundi */
1219     "fr_BJ",    /* French       Benin */
1220     "es_BO",    /* Spanish      Bolivia */
1221     "pt_BR",    /* Portuguese   Brazil */
1222     "dz_BT",    /* Dzongkha     Bhutan */
1223     "en_BW",    /* English      Botswana */
1224     "be_BY",    /* Belarusian   Belarus */
1225     "en_CA",    /* English      Canada */
1226     "fr_CD",    /* French       Democratic Republic of Congo */
1227     "sg_CF",    /* Sango        Central African Republic */
1228     "de_CH",    /* German       Switzerland */
1229     "es_CL",    /* Spanish      Chile */
1230     "zh_CN",    /* Chinese      China */
1231     "es_CO",    /* Spanish      Colombia */
1232     "es_CR",    /* Spanish      Costa Rica */
1233     "es_CU",    /* Spanish      Cuba */
1234     /* Curaçao has three official languages: "nl_CW", "pap_CW", "en_CW".  */
1235     "el_CY",    /* Greek        Cyprus */
1236     "cs_CZ",    /* Czech        Czech Republic */
1237     "de_DE",    /* German       Germany */
1238     /* Djibouti has two official languages: "ar_DJ" and "fr_DJ".  */
1239     "da_DK",    /* Danish       Denmark */
1240     "es_DO",    /* Spanish      Dominican Republic */
1241     "ar_DZ",    /* Arabic       Algeria */
1242     "es_EC",    /* Spanish      Ecuador */
1243     "et_EE",    /* Estonian     Estonia */
1244     "ar_EG",    /* Arabic       Egypt */
1245     "ti_ER",    /* Tigrinya     Eritrea */
1246     "es_ES",    /* Spanish      Spain */
1247     "am_ET",    /* Amharic      Ethiopia */
1248     "fi_FI",    /* Finnish      Finland */
1249     /* Fiji has three official languages: "en_FJ", "fj_FJ", "hif_FJ".  */
1250     "fo_FO",    /* Faroese      Faeroe Islands */
1251     "fr_FR",    /* French       France */
1252     "en_GB",    /* English      Britain */
1253     "ka_GE",    /* Georgian     Georgia */
1254     "en_GH",    /* English      Ghana */
1255     "kl_GL",    /* Kalaallisut  Greenland */
1256     "fr_GN",    /* French       Guinea */
1257     "el_GR",    /* Greek        Greece */
1258     "es_GT",    /* Spanish      Guatemala */
1259     "zh_HK",    /* Chinese      Hong Kong */
1260     "es_HN",    /* Spanish      Honduras */
1261     "hr_HR",    /* Croatian     Croatia */
1262     "ht_HT",    /* Haitian      Haiti */
1263     "hu_HU",    /* Hungarian    Hungary */
1264     "id_ID",    /* Indonesian   Indonesia */
1265     "en_IE",    /* English      Ireland */
1266     "he_IL",    /* Hebrew       Israel */
1267     "hi_IN",    /* Hindi        India */
1268     "ar_IQ",    /* Arabic       Iraq */
1269     "fa_IR",    /* Persian      Iran */
1270     "is_IS",    /* Icelandic    Iceland */
1271     "it_IT",    /* Italian      Italy */
1272     "ar_JO",    /* Arabic       Jordan */
1273     "ja_JP",    /* Japanese     Japan */
1274     "sw_KE",    /* Swahili      Kenya */
1275     "ky_KG",    /* Kyrgyz       Kyrgyzstan */
1276     "km_KH",    /* Central Khmer        Cambodia */
1277     "ko_KR",    /* Korean       Korea (South) */
1278     "ar_KW",    /* Arabic       Kuwait */
1279     "kk_KZ",    /* Kazakh       Kazakhstan */
1280     "lo_LA",    /* Laotian      Laos */
1281     "ar_LB",    /* Arabic       Lebanon */
1282     "de_LI",    /* German       Liechtenstein */
1283     "si_LK",    /* Sinhala      Sri Lanka */
1284     "lt_LT",    /* Lithuanian   Lithuania */
1285     /* Luxembourg has three official languages: "lb_LU", "fr_LU", "de_LU".  */
1286     "lv_LV",    /* Latvian      Latvia */
1287     "ar_LY",    /* Arabic       Libya */
1288     "ar_MA",    /* Arabic       Morocco */
1289     "sr_ME",    /* Serbian      Montenegro */
1290     "mg_MG",    /* Malagasy     Madagascar */
1291     "mk_MK",    /* Macedonian   North Macedonia */
1292     "fr_ML",    /* French       Mali */
1293     "my_MM",    /* Burmese      Myanmar */
1294     "mn_MN",    /* Mongolian    Mongolia */
1295     "mt_MT",    /* Maltese      Malta */
1296     "mfe_MU",   /* Mauritian Creole     Mauritius */
1297     "dv_MV",    /* Divehi       Maldives */
1298     "ny_MW",    /* Chichewa     Malawi */
1299     "es_MX",    /* Spanish      Mexico */
1300     "ms_MY",    /* Malay        Malaysia */
1301     "en_NG",    /* English      Nigeria */
1302     "es_NI",    /* Spanish      Nicaragua */
1303     "nl_NL",    /* Dutch        Netherlands */
1304     "no_NO",    /* Norwegian    Norway */
1305     "ne_NP",    /* Nepali       Nepal */
1306     "na_NR",    /* Nauru        Nauru */
1307     "niu_NU",   /* Niuean       Niue */
1308     "en_NZ",    /* English      New Zealand */
1309     "ar_OM",    /* Arabic       Oman */
1310     "es_PA",    /* Spanish      Panama */
1311     "es_PE",    /* Spanish      Peru */
1312     "tpi_PG",   /* Tok Pisin    Papua New Guinea */
1313     "fil_PH",   /* Filipino     Philippines */
1314     "pa_PK",    /* Punjabi      Pakistan */
1315     "pl_PL",    /* Polish       Poland */
1316     "es_PR",    /* Spanish      Puerto Rico */
1317     "pt_PT",    /* Portuguese   Portugal */
1318     "es_PY",    /* Spanish      Paraguay */
1319     "ar_QA",    /* Arabic       Qatar */
1320     "ro_RO",    /* Romanian     Romania */
1321     "sr_RS",    /* Serbian      Serbia */
1322     "ru_RU",    /* Russian      Russia */
1323     "rw_RW",    /* Kinyarwanda  Rwanda */
1324     "ar_SA",    /* Arabic       Saudi Arabia */
1325     "en_SC",    /* English      Seychelles */
1326     "ar_SD",    /* Arabic       Sudan */
1327     "sv_SE",    /* Swedish      Sweden */
1328     "en_SG",    /* English      Singapore */
1329     "sl_SI",    /* Slovenian    Slovenia */
1330     "sk_SK",    /* Slovak       Slovakia */
1331     "en_SL",    /* English      Sierra Leone */
1332     "fr_SN",    /* French       Senegal */
1333     "so_SO",    /* Somali       Somalia */
1334     "ar_SS",    /* Arabic       South Sudan */
1335     "es_SV",    /* Spanish      El Salvador */
1336     "ar_SY",    /* Arabic       Syria */
1337     "th_TH",    /* Thai         Thailand */
1338     "tg_TJ",    /* Tajik        Tajikistan */
1339     "tk_TM",    /* Turkmen      Turkmenistan */
1340     "ar_TN",    /* Arabic       Tunisia */
1341     "to_TO",    /* Tonga        Tonga */
1342     "tr_TR",    /* Turkish      Turkey */
1343     "zh_TW",    /* Chinese      Taiwan */
1344     "sw_TZ",    /* Swahili      Tanzania */
1345     "uk_UA",    /* Ukrainian    Ukraine */
1346     "lg_UG",    /* Ganda        Uganda */
1347     "en_US",    /* English      United States of America */
1348     "es_UY",    /* Spanish      Uruguay */
1349     "uz_UZ",    /* Uzbek        Uzbekistan */
1350     "es_VE",    /* Spanish      Venezuela */
1351     "vi_VN",    /* Vietnamese   Vietnam */
1352     "bi_VU",    /* Bislama      Vanuatu */
1353     "sm_WS",    /* Samoan       Samoa */
1354     "ar_YE",    /* Arabic       Yemen */
1355     "en_ZA",    /* English      South Africa */
1356     "en_ZM",    /* English      Zambia */
1357     "en_ZW"     /* English      Zimbabwe */
1358   };
1359 
1360 /* Compare just the territory part of two locale names.  */
1361 static int
terrcmp(const char * locale1,const char * locale2)1362 terrcmp (const char *locale1, const char *locale2)
1363 {
1364   const char *territory1 = strrchr (locale1, '_') + 1;
1365   const char *territory2 = strrchr (locale2, '_') + 1;
1366 
1367   return strcmp (territory1, territory2);
1368 }
1369 
1370 /* Given a locale name, return the locale corresponding to the main language
1371    with the same territory, or NULL if not found.
1372    For example: "fr_DE" -> "de_DE".  */
1373 static const char *
get_main_locale_with_same_territory(const char * locale)1374 get_main_locale_with_same_territory (const char *locale)
1375 {
1376   if (strrchr (locale, '_') != NULL)
1377     {
1378 #   define table locales_with_principal_language
1379       /* The table is sorted.  Perform a binary search.  */
1380       size_t hi = sizeof (table) / sizeof (table[0]);
1381       size_t lo = 0;
1382       while (lo < hi)
1383         {
1384           /* Invariant:
1385              for i < lo, terrcmp (table[i], locale) < 0,
1386              for i >= hi, terrcmp (table[i], locale) > 0.  */
1387           size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1388           int cmp = terrcmp (table[mid], locale);
1389           if (cmp < 0)
1390             lo = mid + 1;
1391           else if (cmp > 0)
1392             hi = mid;
1393           else
1394             {
1395               /* Found an i with
1396                    terrcmp (language_table[i], locale) == 0.
1397                  Verify that it is the only such i.  */
1398               if (mid > lo && terrcmp (table[mid - 1], locale) >= 0)
1399                 abort ();
1400               if (mid + 1 < hi && terrcmp (table[mid + 1], locale) <= 0)
1401                 abort ();
1402               return table[mid];
1403             }
1404         }
1405 #   undef table
1406     }
1407   return NULL;
1408 }
1409 
1410 #  endif
1411 
1412 char *
setlocale_improved(int category,const char * locale)1413 setlocale_improved (int category, const char *locale)
1414 {
1415   if (locale != NULL && locale[0] == '\0')
1416     {
1417       /* A request to the set the current locale to the default locale.  */
1418       if (category == LC_ALL)
1419         {
1420           /* Set LC_CTYPE first.  Then the other categories.  */
1421           static int const categories[] =
1422             {
1423               LC_CTYPE,
1424               LC_NUMERIC,
1425               LC_TIME,
1426               LC_COLLATE,
1427               LC_MONETARY,
1428               LC_MESSAGES
1429             };
1430           char *saved_locale;
1431           const char *base_name;
1432           unsigned int i;
1433 
1434           /* Back up the old locale, in case one of the steps fails.  */
1435           saved_locale = setlocale (LC_ALL, NULL);
1436           if (saved_locale == NULL)
1437             return NULL;
1438           saved_locale = strdup (saved_locale);
1439           if (saved_locale == NULL)
1440             return NULL;
1441 
1442           /* Set LC_CTYPE category.  Set all other categories (except possibly
1443              LC_MESSAGES) to the same value in the same call; this is likely to
1444              save calls.  */
1445           base_name =
1446             gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE));
1447           if (base_name == NULL)
1448             base_name = gl_locale_name_default ();
1449 
1450           if (setlocale_unixlike (LC_ALL, base_name) != NULL)
1451             {
1452               /* LC_CTYPE category already set.  */
1453               i = 1;
1454             }
1455           else
1456             {
1457               /* On Mac OS X, "UTF-8" is a valid locale name for LC_CTYPE but
1458                  not for LC_ALL.  Therefore this call may fail.  So, try
1459                  another base_name.  */
1460               base_name = "C";
1461               if (setlocale_unixlike (LC_ALL, base_name) == NULL)
1462                 goto fail;
1463               i = 0;
1464             }
1465 #  if defined _WIN32 && ! defined __CYGWIN__
1466           /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1467              LC_CTYPE category to an invalid value ("C") when it does not
1468              support the specified encoding.  Report a failure instead.  */
1469           if (strchr (base_name, '.') != NULL
1470               && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1471             goto fail;
1472 #  endif
1473 
1474           for (; i < sizeof (categories) / sizeof (categories[0]); i++)
1475             {
1476               int cat = categories[i];
1477               const char *name;
1478 
1479               name = gl_locale_name_environ (cat, category_to_name (cat));
1480               if (name == NULL)
1481                 name = gl_locale_name_default ();
1482 
1483               /* If name is the same as base_name, it has already been set
1484                  through the setlocale call before the loop.  */
1485               if (strcmp (name, base_name) != 0
1486 #  if LC_MESSAGES == 1729
1487                   || cat == LC_MESSAGES
1488 #  endif
1489                  )
1490                 if (setlocale_single (cat, name) == NULL)
1491 #  if defined __APPLE__ && defined __MACH__
1492                   {
1493                     /* On Mac OS X 10.13, some locales can be set through
1494                        System Preferences > Language & Region, that are not
1495                        supported by libc.  The system's setlocale() falls
1496                        back to "C" for these locale categories.  We can do
1497                        better, by trying an existing locale with the same
1498                        language or an existing locale with the same territory.
1499                        If we can't, print a warning, to limit user
1500                        expectations.  */
1501                     int warn = 0;
1502 
1503                     if (cat == LC_CTYPE)
1504                       warn = (setlocale_single (cat, "UTF-8") == NULL);
1505                     else if (cat == LC_MESSAGES)
1506                       {
1507 #   if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1508                         /* Take the primary language preference.  */
1509 #    if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1510                         CFArrayRef prefArray = CFLocaleCopyPreferredLanguages ();
1511 #    elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1512                         CFTypeRef preferences =
1513                           CFPreferencesCopyAppValue (CFSTR ("AppleLanguages"),
1514                                                      kCFPreferencesCurrentApplication);
1515                         if (preferences != NULL
1516                             && CFGetTypeID (preferences) == CFArrayGetTypeID ())
1517                           {
1518                             CFArrayRef prefArray = (CFArrayRef)preferences;
1519 #    endif
1520                             int n = CFArrayGetCount (prefArray);
1521                             if (n > 0)
1522                               {
1523                                 char buf[256];
1524                                 CFTypeRef element = CFArrayGetValueAtIndex (prefArray, 0);
1525                                 if (element != NULL
1526                                     && CFGetTypeID (element) == CFStringGetTypeID ()
1527                                     && CFStringGetCString ((CFStringRef)element,
1528                                                            buf, sizeof (buf),
1529                                                            kCFStringEncodingASCII))
1530                                   {
1531                                     /* Remove the country.
1532                                        E.g. "zh-Hans-DE" -> "zh-Hans".  */
1533                                     char *last_minus = strrchr (buf, '-');
1534                                     if (last_minus != NULL)
1535                                       *last_minus = '\0';
1536 
1537                                     /* Convert to Unix locale name.
1538                                        E.g. "zh-Hans" -> "zh_CN".  */
1539                                     gl_locale_name_canonicalize (buf);
1540 
1541                                     /* Try setlocale with this value.  */
1542                                     if (setlocale_single (cat, buf) == NULL)
1543                                       {
1544                                         const char *last_try =
1545                                           get_main_locale_with_same_language (buf);
1546 
1547                                         if (last_try == NULL
1548                                             || setlocale_single (cat, last_try) == NULL)
1549                                           warn = 1;
1550                                       }
1551                                   }
1552                               }
1553 #    if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1554                         CFRelease (prefArray);
1555 #    elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1556                           }
1557 #    endif
1558 #   else
1559                         const char *last_try =
1560                           get_main_locale_with_same_language (name);
1561 
1562                         if (last_try == NULL
1563                             || setlocale_single (cat, last_try) == NULL)
1564                           warn = 1;
1565 #   endif
1566                       }
1567                     else
1568                       {
1569                         /* For LC_NUMERIC, the application should use the locale
1570                            properties kCFLocaleDecimalSeparator,
1571                            kCFLocaleGroupingSeparator.
1572                            For LC_TIME, the application should use the locale
1573                            property kCFLocaleCalendarIdentifier.
1574                            For LC_COLLATE, the application should use the locale
1575                            properties kCFLocaleCollationIdentifier,
1576                            kCFLocaleCollatorIdentifier.
1577                            For LC_MONETARY, the applicationshould use the locale
1578                            properties kCFLocaleCurrencySymbol,
1579                            kCFLocaleCurrencyCode.
1580                            But since most applications don't have macOS specific
1581                            code like this, try an existing locale with the same
1582                            territory.  */
1583                         const char *last_try =
1584                           get_main_locale_with_same_territory (name);
1585 
1586                         if (last_try == NULL
1587                             || setlocale_single (cat, last_try) == NULL)
1588                           warn = 1;
1589                       }
1590 
1591                     if (warn)
1592                       {
1593                         /* Warn only if the environment variable
1594                            SETLOCALE_VERBOSE is set.  Otherwise these warnings
1595                            are just annoyances, since normal users won't invoke
1596                            'localedef'.  */
1597                         const char *verbose = getenv ("SETLOCALE_VERBOSE");
1598                         if (verbose != NULL && verbose[0] != '\0')
1599                           fprintf (stderr,
1600                                    "Warning: Failed to set locale category %s to %s.\n",
1601                                    category_to_name (cat), name);
1602                       }
1603                   }
1604 #  else
1605                   goto fail;
1606 #  endif
1607             }
1608 
1609           /* All steps were successful.  */
1610           free (saved_locale);
1611           return setlocale (LC_ALL, NULL);
1612 
1613         fail:
1614           if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1615             setlocale (LC_ALL, saved_locale);
1616           free (saved_locale);
1617           return NULL;
1618         }
1619       else
1620         {
1621           const char *name =
1622             gl_locale_name_environ (category, category_to_name (category));
1623           if (name == NULL)
1624             name = gl_locale_name_default ();
1625 
1626           return setlocale_single (category, name);
1627         }
1628     }
1629   else
1630     {
1631 #  if defined _WIN32 && ! defined __CYGWIN__
1632       if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL)
1633         {
1634           char *saved_locale;
1635 
1636           /* Back up the old locale.  */
1637           saved_locale = setlocale (LC_ALL, NULL);
1638           if (saved_locale == NULL)
1639             return NULL;
1640           saved_locale = strdup (saved_locale);
1641           if (saved_locale == NULL)
1642             return NULL;
1643 
1644           if (setlocale_unixlike (LC_ALL, locale) == NULL)
1645             {
1646               free (saved_locale);
1647               return NULL;
1648             }
1649 
1650           /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1651              LC_CTYPE category to an invalid value ("C") when it does not
1652              support the specified encoding.  Report a failure instead.  */
1653           if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1654             {
1655               if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1656                 setlocale (LC_ALL, saved_locale);
1657               free (saved_locale);
1658               return NULL;
1659             }
1660 
1661           /* It was really successful.  */
1662           free (saved_locale);
1663           return setlocale (LC_ALL, NULL);
1664         }
1665       else
1666 #  endif
1667         return setlocale_single (category, locale);
1668     }
1669 }
1670 
1671 # endif /* NEED_SETLOCALE_IMPROVED */
1672 
1673 #endif
1674