1 /****************************************************************************
2 * *
3 * GNAT COMPILER COMPONENTS *
4 * *
5 * L O C A L E S *
6 * *
7 * C Implementation File *
8 * *
9 * Copyright (C) 2010-2019, Free Software Foundation, Inc. *
10 * *
11 * GNAT is free software; you can redistribute it and/or modify it under *
12 * terms of the GNU General Public License as published by the Free Soft- *
13 * ware Foundation; either version 3, or (at your option) any later ver- *
14 * sion. GNAT is distributed in the hope that it will be useful, but WITH- *
15 * OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *
16 * or FITNESS FOR A PARTICULAR PURPOSE. *
17 * *
18 * As a special exception under Section 7 of GPL version 3, you are granted *
19 * additional permissions described in the GCC Runtime Library Exception, *
20 * version 3.1, as published by the Free Software Foundation. *
21 * *
22 * You should have received a copy of the GNU General Public License and *
23 * a copy of the GCC Runtime Library Exception along with this program; *
24 * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see *
25 * <http://www.gnu.org/licenses/>. *
26 * *
27 * GNAT was originally developed by the GNAT team at New York University. *
28 * Extensive contributions were provided by Ada Core Technologies Inc. *
29 * *
30 ****************************************************************************/
31
32 /* This file provides OS-dependent support for the Ada.Locales package. */
33
34 #include <locale.h>
35 #include <ctype.h>
36 #include <stddef.h>
37
38 typedef char char4 [4];
39
40 /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
41 alpha-3 code plus their language name. */
42
43 static char* iso_639[] =
44 {
45 "aa", "aar", "Afar",
46 "ab", "abk", "Abkhazian",
47 "ae", "ave", "Avestan",
48 "af", "afr", "Afrikaans",
49 "ak", "aka", "Akan",
50 "am", "amh", "Amharic",
51 "an", "arg", "Aragonese",
52 "ar", "ara", "Arabic",
53 "as", "asm", "Assamese",
54 "av", "ava", "Avaric",
55 "ay", "aym", "Aymara",
56 "az", "aze", "Azerbaijani",
57
58 "ba", "bak", "Bashkir",
59 "be", "bel", "Belarusian",
60 "bg", "bul", "Bulgarian",
61 "bi", "bis", "Bislama",
62 "bm", "bam", "Bambara",
63 "bn", "ben", "Bengali",
64 "bo", "bod", "Tibetan",
65 "br", "bre", "Breton",
66 "bs", "bos", "Bosnian",
67
68 "ca", "cat", "Catalan",
69 "ce", "che", "Chechen",
70 "ch", "cha", "Chamorro",
71 "co", "cos", "Corsican",
72 "cr", "cre", "Cree",
73 "cs", "ces", "Czech",
74 "cu", "chu", "Church Slavic",
75 "cv", "chv", "Chuvash",
76 "cy", "cym", "Welsh",
77
78 "da", "dan", "Danish",
79 "de", "deu", "German",
80 "dv", "div", "Divehi",
81 "dz", "dzo", "Dzongkha",
82
83 "ee", "ewe", "Ewe",
84 "el", "ell", "Modern Greek",
85 "en", "eng", "English",
86 "eo", "epo", "Esperanto",
87 "es", "spa", "Spanish",
88 "et", "est", "Estonian",
89 "eu", "eus", "Basque",
90
91 "fa", "fas", "Persian",
92 "ff", "ful", "Fulah",
93 "fi", "fin", "Finnish",
94 "fj", "fij", "Fijian",
95 "fo", "fao", "Faroese",
96 "fr", "fra", "French",
97 "fy", "fry", "Western Frisian",
98
99 "ga", "gle", "Irish",
100 "gd", "gla", "Scottish Gaelic",
101 "gl", "glg", "Galician",
102 "gn", "grn", "Guarani",
103 "gu", "guj", "Gujarati",
104 "gv", "glv", "Manx",
105
106 "ha", "hau", "Hausa",
107 "he", "heb", "Hebrew",
108 "hi", "hin", "Hindi",
109 "ho", "hmo", "Hiri Motu",
110 "hr", "hrv", "Croatian",
111 "ht", "hat", "Haitian",
112 "hu", "hun", "Hungarian",
113 "hy", "hye", "Armenian",
114 "hz", "her", "Herero",
115
116 "ia", "ina", "Interlingua",
117 "id", "ind", "Indonesian",
118 "ie", "ile", "Interlingue",
119 "ig", "ibo", "Igbo",
120 "ii", "iii", "Sichuan Yi",
121 "ik", "ipk", "Inupiaq",
122 "io", "ido", "Ido",
123 "is", "isl", "Icelandic",
124 "it", "ita", "Italian",
125 "iu", "iku", "Inuktitut",
126
127 "ja", "jpn", "Japanese",
128 "jv", "jav", "Javanese",
129
130 "ka", "kat", "Georgian",
131 "kg", "kon", "Kongo",
132 "ki", "kik", "Kikuyu",
133 "kj", "kua", "Kuanyama",
134 "kk", "kaz", "Kazakh",
135 "kl", "kal", "Kalaallisut",
136 "km", "khm", "Central Khmer",
137 "kn", "kan", "Kannada",
138 "ko", "kor", "Korean",
139 "kr", "kau", "Kanuri",
140 "ks", "kas", "Kashmiri",
141 "ku", "kur", "Kurdish",
142 "kv", "kom", "Komi",
143 "kw", "cor", "Cornish",
144 "ky", "kir", "Kirghiz",
145
146 "la", "lat", "Latin",
147 "lb", "ltz", "Luxembourgish",
148 "lg", "lug", "Ganda",
149 "li", "lim", "Limburgan",
150 "ln", "lin", "Lingala",
151 "lo", "lao", "Lao",
152 "lt", "lit", "Lithuanian",
153 "lu", "lub", "Luba-Katanga",
154 "lv", "lav", "Latvian",
155
156 "mg", "mlg", "Malagasy",
157 "mh", "mah", "Marshallese",
158 "mi", "mri", "Maori",
159 "mk", "mkd", "Macedonian",
160 "ml", "mal", "Malayalam",
161 "mn", "mon", "Mongolian",
162 "mr", "mar", "Marathi",
163 "ms", "msa", "Malay",
164 "mt", "mlt", "Maltese",
165 "my", "mya", "Burmese",
166
167 "na", "nau", "Nauru",
168 "nb", "nob", "Norwegian Bokmal",
169 "nd", "nde", "North Ndebele",
170 "ne", "nep", "Nepali",
171 "ng", "ndo", "Ndonga",
172 "nl", "nld", "Dutch",
173 "nn", "nno", "Norwegian Nynorsk",
174 "no", "nor", "Norwegian",
175 "nr", "nbl", "South Ndebele",
176 "nv", "nav", "Navajo",
177 "ny", "nya", "Nyanja",
178
179 "oc", "oci", "Occitan",
180 "oj", "oji", "Ojibwa",
181 "om", "orm", "Oromo",
182 "or", "ori", "Oriya",
183 "os", "oss", "Ossetian",
184
185 "pa", "pan", "Panjabi",
186 "pi", "pli", "Pali",
187 "pl", "pol", "Polish",
188 "ps", "pus", "Pushto",
189 "pt", "por", "Portuguese",
190
191 "qu", "que", "Quechua",
192
193 "rm", "roh", "Romansh",
194 "rn", "run", "Rundi",
195 "ro", "ron", "Romanian",
196 "ru", "rus", "Russian",
197 "rw", "kin", "Kinyarwanda",
198
199 "sa", "san", "Sanskrit",
200 "sc", "srd", "Sardinian",
201 "sd", "snd", "Sindhi",
202 "se", "sme", "Northern Sami",
203 "sg", "sag", "Sango",
204 "sh", "hbs", "Serbo-Croatian",
205 "si", "sin", "Sinhala",
206 "sk", "slk", "Slovak",
207 "sl", "slv", "Slovenian",
208 "sm", "smo", "Samoan",
209 "sn", "sna", "Shona",
210 "so", "som", "Somali",
211 "sq", "sqi", "Albanian",
212 "sr", "srp", "Serbian",
213 "ss", "ssw", "Swati",
214 "st", "sot", "Southern Sotho",
215 "su", "sun", "Sundanese",
216 "sv", "swe", "Swedish",
217 "sw", "swa", "Swahili",
218
219 "ta", "tam", "Tamil",
220 "te", "tel", "Telugu",
221 "tg", "tgk", "Tajik",
222 "th", "tha", "Thai",
223 "ti", "tir", "Tigrinya",
224 "tk", "tuk", "Turkmen",
225 "tl", "tgl", "Tagalog",
226 "tn", "tsn", "Tswana",
227 "to", "ton", "Tonga",
228 "tr", "tur", "Turkish",
229 "ts", "tso", "Tsonga",
230 "tt", "tat", "Tatar",
231 "tw", "twi", "Twi",
232 "ty", "tah", "Tahitian",
233
234 "ug", "uig", "Uighur",
235 "uk", "ukr", "Ukrainian",
236 "ur", "urd", "Urdu",
237 "uz", "uzb", "Uzbek",
238
239 "ve", "ven", "Venda",
240 "vi", "vie", "Vietnamese",
241 "vo", "vol", "Volapuk",
242
243 "wa", "wln", "Walloon",
244 "wo", "wol", "Wolof",
245
246 "xh", "xho", "Xhosa",
247
248 "yi", "yid", "Yiddish",
249 "yo", "yor", "Yoruba",
250
251 "za", "zha", "Zhuang",
252 "zh", "zho", "Chinese",
253 "zu", "zul", "Zulu"
254 };
255
256 /* Table containing equivalences between ISO_3166 alpha-2 codes and country
257 names. This table has several entries for codes that have several valid
258 country names. */
259
260 static char* iso_3166[] =
261 {
262 "AU", "Australia",
263 "AD", "Andorra",
264 "AE", "United Arab Emirates",
265 "AF", "Afghanistan",
266 "AG", "Antigua and Barbuda",
267 "AI", "Anguilla",
268 "AL", "Albania",
269 "AM", "Armenia",
270 "AN", "Netherlands Antilles",
271 "AO", "Angola",
272 "AQ", "Antarctica",
273 "AR", "Argentina",
274 "AS", "American Samoa",
275 "AT", "Austria",
276 "AU", "Australia",
277 "AW", "Aruba",
278 "AX", "Aland Islands",
279 "AZ", "Azerbaijan",
280
281 "BA", "Bosnia and Herzegovina",
282 "BB", "Barbados",
283 "BD", "Bangladesh",
284 "BE", "Belgium",
285 "BF", "Burkina Faso",
286 "BG", "Bulgaria",
287 "BH", "Bahrain",
288 "BI", "Burundi",
289 "BJ", "Benin",
290 "BL", "Saint Barthélemy",
291 "BM", "Bermuda",
292 "BN", "Brunei Darussalam",
293 "BO", "Bolivia, Plurinational State of",
294 "BQ", "Bonaire, Sint Eustatius and Saba",
295 "BR", "Brazil",
296 "BS", "Bahamas",
297 "BT", "Bhutan",
298 "BV", "Bouvet Island",
299 "BW", "Botswana",
300 "BY", "Belarus",
301 "BZ", "Belize",
302
303 "CA", "Canada",
304 "CC", "Cocos (Keeling) Islands",
305 "CD", "Congo, Democratic Republic of the",
306 "CF", "Central African Republic",
307 "CG", "Congo",
308 "CH", "Switzerland",
309 "CI", "Côte d'Ivoire",
310 "CK", "Cook Islands",
311 "CL", "Chile",
312 "CM", "Cameroon",
313 "CN", "China",
314 "CN", "People’s Republic of China",
315 "CN", "PR China",
316 "CN", "PR-China",
317 "CO", "Colombia",
318 "CR", "Costa Rica",
319 "CS", "Czechoslovakia",
320 "CU", "Cuba",
321 "CV", "Cape Verde",
322 "CW", "Curaçao",
323 "CX", "Christmas Island",
324 "CY", "Cyprus",
325 "CZ", "Czech Republic",
326
327 "DE", "Germany",
328 "DJ", "Djibouti",
329 "DK", "Denmark",
330 "DM", "Dominica",
331 "DO", "Dominican Republic",
332 "DZ", "Algeria",
333
334 "EC", "Ecuador",
335 "EE", "Estonia",
336 "EG", "Egypt",
337 "EH", "Western Sahara",
338 "ER", "Eritrea",
339 "ES", "Spain",
340 "ET", "Ethiopia",
341
342 "FI", "Finland",
343 "FG", "Fiji",
344 "FK", "Falkland Islands (Malvinas)",
345 "FM", "Micronesia, Federated States of",
346 "FO", "Faroe Islands",
347 "FR", "France",
348
349 "GA", "Gabon",
350 "GB", "United Kingdom",
351 "GB", "United-Kingdom",
352 "GB", "England",
353 "GB", "Britain",
354 "GB", "Great Britain",
355 "GD", "Grenada",
356 "GE", "Georgia",
357 "GF", "French Guiana",
358 "GG", "Guernsey",
359 "GH", "Ghana",
360 "GI", "Gibraltar",
361 "GL", "Greenland",
362 "GM", "Gambia",
363 "GN", "Guinea",
364 "GP", "Guadeloupe",
365 "GQ", "Equatorial Guinea",
366 "GR", "Greece",
367 "GS", "South Georgia and the South Sandwich Islands",
368 "GT", "Guatemala",
369 "GU", "Guam",
370 "GW", "Guinea-Bissau",
371 "GY", "Guyana",
372
373 "HK", "Hong Kong",
374 "HK", "Hong-Kong",
375 "HM", "Heard Island and McDonald Islands",
376 "HN", "Honduras",
377 "HR", "Croatia",
378 "HT", "Haiti",
379 "HU", "Hungary",
380
381 "ID", "Indonesia",
382 "IE", "Ireland",
383 "IL", "Israel",
384 "IM", "Isle of Man",
385 "IN", "India",
386 "IO", "British Indian Ocean Territory",
387 "IQ", "Iraq",
388 "IR", "Iran",
389 "IR", "Iran, Islamic Republic of",
390 "IS", "Iceland",
391 "IT", "Italy",
392
393 "JE", "Jersey",
394 "JM", "Jamaica",
395 "JO", "Jordan",
396 "JP", "Japan",
397
398 "KE", "Kenya",
399 "KG", "Kyrgyzstan",
400 "KH", "Cambodia",
401 "KI", "Kiribati",
402 "KM", "Comoros",
403 "KN", "Saint Kitts and Nevis",
404 "KP", "Korea, Democratic People's Republic of",
405 "KR", "Korea, Republic of",
406 "KW", "Kuwait",
407 "KY", "Cayman Islands",
408 "KZ", "Kazakhstan",
409
410 "LA", "Lao People's Democratic Republic",
411 "LB", "Lebanon",
412 "LC", "Saint Lucia",
413 "LI", "Liechtenstein",
414 "LK", "Sri Lanka",
415 "LR", "Liberia",
416 "LS", "Lesotho",
417 "LT", "Lithuania",
418 "LU", "Luxembourg",
419 "LV", "Latvia",
420 "LY", "Libya",
421
422 "MA", "Morocco",
423 "MC", "Monaco",
424 "MD", "Moldova, Republic of",
425 "ME", "Montenegro",
426 "MF", "Saint Martin",
427 "MG", "Madagascar",
428 "MH", "Marshall Islands",
429 "MK", "Macedonia",
430 "ML", "Mali",
431 "MM", "Myanmar",
432 "MN", "Mongolia",
433 "MO", "Macao",
434 "MP", "Northern Mariana Islands",
435 "MQ", "Martinique",
436 "MR", "Mauritania",
437 "MS", "Montserrat",
438 "MT", "Malta",
439 "MU", "Mauritius",
440 "MV", "Maldives",
441 "MW", "Malawi",
442 "MX", "Mexico",
443 "MY", "Malaysia",
444 "MZ", "Mozambique",
445
446 "NA", "Namibia",
447 "NC", "New Caledonia",
448 "NE", "Niger",
449 "NF", "Norfolk Island",
450 "NG", "Nigeria",
451 "NI", "Nicaragua",
452 "NL", "Netherlands",
453 "NL", "Holland",
454 "NO", "Norway",
455 "NP", "Nepal",
456 "NR", "Nauru",
457 "NU", "Niue",
458 "NZ", "New Zealand",
459 "NZ", "New-Zealand",
460
461 "OM", "Oman",
462
463 "PA", "Panama",
464 "PE", "Peru",
465 "PF", "French Polynesia",
466 "PG", "Papua New Guinea",
467 "PH", "Philippines",
468 "PK", "Pakistan",
469 "PL", "Poland",
470 "PM", "Saint Pierre and Miquelon",
471 "PN", "Pitcairn",
472 "PR", "Puerto Rico",
473 "PS", "Palestine, State of",
474 "PT", "Portugal",
475 "PW", "Palau",
476 "PY", "Paraguay",
477
478 "QA", "Qatar",
479
480 "RE", "Réunion",
481 "RO", "Romania",
482 "RS", "Serbia",
483 "RU", "Russian Federation",
484 "RW", "Rwanda",
485
486 "SA", "Saudi Arabia",
487 "SB", "Solomon Islands",
488 "SC", "Seychelles",
489 "SD", "Sudan",
490 "SE", "Sweden",
491 "SG", "Singapore",
492 "SH", "Saint Helena, Ascension and Tristan da Cunha",
493 "SI", "Slovenia",
494 "SJ", "Svalbard and Jan Mayen",
495 "SK", "Slovakia",
496 "SL", "Sierra Leone",
497 "SM", "San Marino",
498 "SN", "Senegal",
499 "SO", "Somalia",
500 "SR", "Suriname",
501 "SS", "South Sudan",
502 "SV", "El Salvador",
503 "SX", "Sint Maarten (Dutch part)",
504 "SY", "Syrian Arab Republic",
505 "SZ", "Swaziland",
506
507 "TC", "Turks and Caicos Islands",
508 "TD", "Chad",
509 "TF", "French Southern Territories",
510 "TG", "Togo",
511 "TH", "Thailand",
512 "TJ", "Tajikistan",
513 "TK", "Tokelau",
514 "TL", "Timor-Leste",
515 "TM", "Turkmenistan",
516 "TN", "Tunisia",
517 "TO", "Tonga",
518 "TP", "East Timor",
519 "TR", "Turkey",
520 "TT", "Trinidad and Tobago",
521 "TV", "Tuvalu",
522 "TW", "Taiwan",
523 "TW", "Taiwan, Province of China",
524 "TZ", "Tanzania",
525 "TZ", "Tanzania, United Republic of",
526
527 "UA", "Ukraine",
528 "UG", "Uganda",
529 "UM", "United States Minor Outlying Islands",
530 "US", "United States",
531 "US", "United States of America",
532 "US", "United-States",
533 "UY", "Uruguay",
534 "UZ", "Uzbekistan",
535
536 "VA", "Holy See (Vatican City State)",
537 "VC", "Saint Vincent and the Grenadines",
538 "VE", "Venezuela",
539 "VE", "Venezuela, Bolivarian Republic of",
540 "VG", "Virgin Islands, British",
541 "VI", "Virgin Islands, U.S.",
542 "VN", "Viet Nam",
543 "VU", "Vanuatu",
544 "WF", "Wallis and Futuna",
545 "WS", "Samoa",
546
547 "YE", "Yemen",
548 "YT", "Mayotte",
549 "YU", "Yugoslavia",
550
551 "ZA", "South Africa",
552 "ZM", "Zambia",
553 "ZW", "Zimbabwe"
554 };
555
556 /* Utility function to perform case insensitive string comparison. Returns 1
557 if both strings are equal and 0 otherwise. */
558
559 static int
str_case_equals(const char * s1,const char * s2)560 str_case_equals (const char *s1, const char *s2) {
561 while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
562 s1++;
563 s2++;
564 }
565
566 return (*s1 == '\0') && (*s2 == '\0');
567 }
568
569 /* Utility function to copy length characters of a string. The target string
570 must have space to store the extra string null terminator. */
571
572 static void
str_copy(char * target,char * source,int length)573 str_copy (char *target, char *source, int length) {
574 for (; length > 0; source++, target++, length--) {
575 *target = *source;
576 }
577
578 *target = '\0';
579 }
580
581 /* Utility function to search for the last byte of the lc_all string to be
582 processed. Required because in some targets (for example, AIX), the
583 string returned by setlocale() has duplicates. */
584
585 static char*
str_get_last_byte(char * lc_all)586 str_get_last_byte (char *lc_all) {
587 char* first_space = NULL;
588 char* second_space = NULL;
589 char* last_byte = NULL;
590 char* s1 = lc_all;
591
592 /* Search for the 1st space (if any) */
593 while (*s1 != ' ' && *s1 != '\0')
594 s1++;
595
596 if (*s1 == '\0') {
597 last_byte = s1;
598
599 } else {
600 first_space = s1;
601
602 /* Skip this space and search for the 2nd one (if available) */
603 s1++;
604 while (*s1 != ' ' && *s1 != '\0')
605 s1++;
606
607 if (*s1 == '\0') {
608 last_byte = s1;
609
610 } else {
611 second_space=s1;
612
613 /* Search for the last byte of lc_all */
614 while (*s1 != '\0')
615 s1++;
616
617 last_byte = s1;
618
619 /* Check if the two strings match */
620 {
621 int len1 = first_space - lc_all;
622 int len2 = second_space - first_space - 1;
623
624 if (len1 == len2) {
625 char* p1 = lc_all;
626 char* p2 = first_space + 1;
627
628 /* Compare their contents */
629 while (*p1 == *p2 && p2 != second_space) {
630 p1++;
631 p2++;
632 }
633
634 /* if the two strings match then update the last byte */
635
636 if (p2 == second_space) {
637 last_byte = first_space;
638 }
639 }
640 }
641 }
642 }
643
644 return last_byte;
645 }
646
647 /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
648 returns the corresponding iso-639-3 code or NULL if not found. */
649
650 static char*
iso_639_1_to_639_3(char * iso_639_1_code)651 iso_639_1_to_639_3(char* iso_639_1_code) {
652 int len = sizeof(iso_639)/sizeof(iso_639[0]);
653 char **p = iso_639;
654 int j;
655
656 for (j=0; j < len/3; j++) {
657 char* s1 = iso_639_1_code;
658 char* s2 = *p;
659
660 if (s1[0]==s2[0] && s1[1]==s2[1]) {
661 p++;
662 return *p;
663 }
664
665 p = p + 3;
666 }
667
668 return NULL;
669 }
670
671 /* Utility function to search in the iso_639_1 table for a language name;
672 returns the corresponding iso-639-3 code or NULL if not found. */
673
674 static char*
language_name_to_639_3(char * name)675 language_name_to_639_3(char* name) {
676 int len = sizeof(iso_639)/sizeof(iso_639[0]);
677 char **p = iso_639;
678 int j;
679
680 p = p + 2;
681 for (j=0; j < len/3; j++) {
682 if (str_case_equals(name, *p)) {
683 p--;
684 return *p;
685 }
686
687 p = p + 3;
688 }
689
690 return NULL;
691 }
692
693 /* Utility function to search in the iso_3166 table for a country name;
694 returns the corresponding iso-3166 code or NULL if not found. */
695
696 static char*
country_name_to_3166(char * name)697 country_name_to_3166 (char* name) {
698 int len = sizeof(iso_3166)/sizeof(iso_3166[0]);
699 char **p = iso_3166;
700 int j;
701
702 p++;
703 for (j=0; j < len/2; j++) {
704 if (str_case_equals(name, *p)) {
705 p--;
706 return *p;
707 }
708
709 p = p + 2;
710 }
711
712 return NULL;
713 }
714
715 /*
716 c_get_language_code needs to fill in the Alpha-3 encoding of the
717 language code (3 lowercase letters). That should be "und" if the
718 language is unknown. [see Ada.Locales]
719 */
720 void
c_get_language_code(char4 p)721 c_get_language_code (char4 p) {
722 char* Saved_Locale = setlocale(LC_ALL, NULL);
723 char iso_639_3_code[] = "und"; /* Language Unknown */
724 char* lc_all;
725 char* result;
726
727 /* Get locales set in the environment */
728
729 setlocale(LC_ALL, "");
730 lc_all = setlocale(LC_ALL, NULL);
731
732 /* The string returned by setlocale has the following format:
733
734 language[_territory][.code-set][@modifier]
735
736 where language is an ISO 639 language code, territory is an ISO 3166
737 country code, and codeset is a character set or encoding identifier
738 like ISO-8859-1 or UTF-8.
739 */
740
741 if (lc_all != NULL) {
742 char* s = lc_all;
743 int lang_length = 0;
744
745 /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
746 code, or a language name) adding a string terminator */
747
748 while (*s != '_' && *s != '.' && *s != '@' && *s != '\0')
749 s++;
750
751 lang_length = s - lc_all;
752
753 /* Handle conversion of ISO-639-1 to ISO-639-3 */
754
755 if (lang_length == 2) {
756 char iso_639_1[3];
757 char* to_iso_639_3;
758
759 /* Duplicate the ISO-639-1 code adding the null terminator required to
760 search for the equivalent ISO-639-3 code; we cannot just append the
761 null terminator since the pointer may reference non-writable memory.
762 */
763
764 str_copy(iso_639_1, lc_all, lang_length);
765 to_iso_639_3 = iso_639_1_to_639_3(iso_639_1);
766
767 if (to_iso_639_3)
768 str_copy(iso_639_3_code, to_iso_639_3, 3);
769
770 /* Copy the ISO-639-3 code (adding a null terminator) */
771
772 } else if (lang_length == 3) {
773 str_copy(iso_639_3_code, lc_all, lang_length);
774
775 /* Handle conversion of language name to ISO-639-3 */
776
777 } else if (lang_length > 3) {
778 char name_copy[lang_length + 1];
779 char* to_iso_639_3;
780
781 /* Duplicate the ISO-639-1 code adding the null terminator required to
782 search for the equivalent ISO-639-3 code; we cannot just append the
783 null terminator since the pointer may reference non-writable memory.
784 */
785
786 str_copy(name_copy, lc_all, lang_length);
787 to_iso_639_3 = language_name_to_639_3(name_copy);
788
789 if (to_iso_639_3)
790 str_copy(iso_639_3_code, to_iso_639_3, 3);
791 }
792 }
793
794 /* Copy out the computed ISO_639_3 code */
795
796 result = iso_639_3_code;
797 for (; *result != '\0'; p++, result++)
798 *p = *result;
799
800 /* Restore the original locale settings */
801
802 setlocale(LC_ALL, Saved_Locale);
803
804 return;
805 }
806
807 /*
808 c_get_country_code needs to fill in the Alpha-2 encoding of the
809 country code (2 uppercase letters). That should be "ZZ" if the
810 country is unknown. [see Ada.Locales]
811 */
812 void
c_get_country_code(char4 p)813 c_get_country_code (char4 p) {
814 char* Saved_Locale = setlocale(LC_ALL, NULL);
815 char iso_3166_code[] = "ZZ"; /* Country Unknown */
816 char* lc_all;
817 char* result;
818
819 /* Get locales set in the environment */
820
821 setlocale(LC_ALL, "");
822 lc_all = setlocale(LC_ALL, NULL);
823
824 /* The string returned by setlocale has the following format:
825
826 language[_territory][.code-set][@modifier]
827
828 where language is an ISO 639 language code, territory is an ISO 3166
829 country code, and codeset is a character set or encoding identifier
830 like ISO-8859-1 or UTF-8.
831 */
832
833 if (lc_all != NULL) {
834 char* s1 = lc_all;
835 char* s2 = NULL;
836 char* last_byte = str_get_last_byte(lc_all);
837 int country_length = 0;
838
839 /* Search for the beginning of the country code */
840
841 s1 = lc_all;
842 while (*s1 != '_' && *s1 != '.' && *s1 != '@' && s1 != last_byte)
843 s1++;
844
845 if (*s1 == '_') {
846 s1++;
847 s2 = s1;
848
849 while (*s2 != '.' && *s2 != '@' && s2 != last_byte)
850 s2++;
851
852 country_length = s2 - s1;
853
854 if (country_length == 2) {
855 str_copy(iso_3166_code, s1, country_length);
856
857 /* setlocale returned us the country name */
858
859 } else if (country_length > 3) {
860 char name_copy[country_length + 1];
861 char* to_3166;
862
863 str_copy(name_copy, s1, country_length);
864 to_3166 = country_name_to_3166(name_copy);
865
866 if (to_3166)
867 str_copy(iso_3166_code, to_3166, 2);
868 }
869 }
870 }
871
872 /* Copy out the computed ISO_3166 code */
873
874 result = iso_3166_code;
875 for (; *result != '\0'; p++, result++)
876 *p = *result;
877
878 /* Restore the original locale settings */
879
880 setlocale(LC_ALL, Saved_Locale);
881
882 return;
883 }
884