1 // tinygettext - A gettext replacement that works directly on .po files
2 // Copyright (c) 2006 Ingo Ruhnke <grumbel@gmail.com>
3 //
4 // This software is provided 'as-is', without any express or implied
5 // warranty. In no event will the authors be held liable for any damages
6 // arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose,
9 // including commercial applications, and to alter it and redistribute it
10 // freely, subject to the following restrictions:
11 //
12 // 1. The origin of this software must not be misrepresented; you must not
13 //    claim that you wrote the original software. If you use this software
14 //    in a product, an acknowledgement in the product documentation would be
15 //    appreciated but is not required.
16 // 2. Altered source versions must be plainly marked as such, and must not be
17 //    misrepresented as being the original software.
18 // 3. This notice may not be removed or altered from any source distribution.
19 
20 #include "tinygettext/language.hpp"
21 
22 #include <assert.h>
23 #include <unordered_map>
24 #include <vector>
25 #include <algorithm>
26 
27 namespace tinygettext {
28 
29 struct LanguageSpec {
30   /** Language code: "de", "en", ... */
31   const char* language;
32 
33   /** Country code: "BR", "DE", ..., can be 0 */
34   const char* country;
35 
36   /** Modifier/Varint: "Latn", "ije", "latin"..., can be 0 */
37   const char* modifier;
38 
39   /** Language name: "German", "English", "French", ... */
40   const char* name;
41 
42   /** Language name in the specified language */
43   const char* name_localized;
44 };
45 
46 /** Language Definitions */
47 //*{
48 static const LanguageSpec languages[] = {
49   { "aa", 0,    0, "Afar"                        , "ʿAfár af"                 },
50   { "af", 0,    0, "Afrikaans"                   , "Afrikaans"                },
51   { "af", "ZA", 0, "Afrikaans (South Africa)"    , 0                          },
52   { "am", 0,    0, "Amharic"                     , "ኣማርኛ"                    },
53   { "ar", 0,    0, "Arabic"                      , "العربية"                  },
54   { "ar", "AR", 0, "Arabic (Argentina)"          , 0                          },
55   { "ar", "OM", 0, "Arabic (Oman)"               , 0                          },
56   { "ar", "SA", 0, "Arabic (Saudi Arabia)"       , 0                          },
57   { "ar", "SY", 0, "Arabic (Syrian Arab Republic)", 0                         },
58   { "ar", "TN", 0, "Arabic (Tunisia)"            , 0                          },
59   { "as", 0,    0, "Assamese"                    , "অসমীয়া"                    },
60   { "ast",0,    0, "Asturian"                    , "Asturianu"                },
61   { "ay", 0,    0, "Aymara"                      , "aymar aru"                },
62   { "az", 0,    0, "Azerbaijani"                 , "Azərbaycanca"             },
63   { "az", "IR", 0, "Azerbaijani (Iran)"          , 0                          },
64   { "be", 0,    0, "Belarusian"                  , "Беларуская мова"          },
65   { "be", 0, "latin", "Belarusian"               , "Беларуская мова"          },
66   { "bg", 0,    0, "Bulgarian"                   , "български"                },
67   { "bg", "BG", 0, "Bulgarian (Bulgaria)"        , 0                          },
68   { "bn", 0,    0, "Bengali"                     , "বাংলা"                     },
69   { "bn", "BD", 0, "Bengali (Bangladesh)"        , 0                          },
70   { "bn", "IN", 0, "Bengali (India)"             , 0                          },
71   { "bo", 0,    0, "Tibetan"                     , "བོད་སྐད་"                     },
72   { "br", 0,    0, "Breton"                      , "brezhoneg"                },
73   { "bs", 0,    0, "Bosnian"                     , "Bosanski"                 },
74   { "bs", "BA", 0, "Bosnian (Bosnia/Herzegovina)", 0                          },
75   { "bs", "BS", 0, "Bosnian (Bahamas)"           , 0                          },
76   { "ca", "ES", "valencia", "Catalan (valencia)" , 0                          },
77   { "ca", "ES", 0, "Catalan (Spain)"             , 0                          },
78   { "ca", 0,    "valencia", "Catalan (valencia)" , 0                          },
79   { "ca", 0,    0, "Catalan"                     , 0                          },
80   { "cmn", 0,    0, "Mandarin"                   , 0                          },
81   { "co", 0,    0, "Corsican"                    , "corsu"                    },
82   { "cs", 0,    0, "Czech"                       , "Čeština"                  },
83   { "cs", "CZ", 0, "Czech (Czech Republic)"      , "Čeština (Česká Republika)"},
84   { "cy", 0,    0, "Welsh"                       , "Welsh"                    },
85   { "cy", "GB", 0, "Welsh (Great Britain)"       , "Welsh (Great Britain)"    },
86   { "cz", 0,    0, "Unknown language"            , "Unknown language"         },
87   { "da", 0,    0, "Danish"                      , "Dansk"                    },
88   { "da", "DK", 0, "Danish (Denmark)"            , "Dansk (Danmark)"          },
89   { "de", 0,    0, "German"                      , "Deutsch"                  },
90   { "de", "AT", 0, "German (Austria)"            , "Deutsch (Österreich)"     },
91   { "de", "CH", 0, "German (Switzerland)"        , "Deutsch (Schweiz)"        },
92   { "de", "DE", 0, "German (Germany)"            , "Deutsch (Deutschland)"    },
93   { "dk", 0,    0, "Unknown language"            , "Unknown language"         },
94   { "dz", 0,    0, "Dzongkha"                    , "རྫོང་ཁ"                      },
95   { "el", 0,    0, "Greek"                       , "ελληνικά"                 },
96   { "el", "GR", 0, "Greek (Greece)"              , 0                          },
97   { "en", 0,    0, "English"                     , "English"                  },
98   { "en", "AU", 0, "English (Australia)"         , "English (Australia)"      },
99   { "en", "CA", 0, "English (Canada)"            , "English (Canada)"         },
100   { "en", "GB", 0, "English (Great Britain)"     , "English (Great Britain)"  },
101   { "en", "US", 0, "English (United States)"     , "English (United States)"  },
102   { "en", "ZA", 0, "English (South Africa)"      , "English (South Africa)"   },
103   { "en", 0, "boldquot", "English"               , "English"                  },
104   { "en", 0, "quot", "English"                   , "English"                  },
105   { "en", "US", "piglatin", "English"            , "English"                  },
106   { "eo", 0,    0, "Esperanto"                   , "Esperanto"                },
107   { "es", 0,    0, "Spanish"                     , "Español"                  },
108   { "es", "AR", 0, "Spanish (Argentina)"         , 0                          },
109   { "es", "CL", 0, "Spanish (Chile)"             , 0                          },
110   { "es", "CO", 0, "Spanish (Colombia)"          , 0                          },
111   { "es", "CR", 0, "Spanish (Costa Rica)"        , 0                          },
112   { "es", "DO", 0, "Spanish (Dominican Republic)", 0                          },
113   { "es", "EC", 0, "Spanish (Ecuador)"           , 0                          },
114   { "es", "ES", 0, "Spanish (Spain)"             , 0                          },
115   { "es", "GT", 0, "Spanish (Guatemala)"         , 0                          },
116   { "es", "HN", 0, "Spanish (Honduras)"          , 0                          },
117   { "es", "LA", 0, "Spanish (Laos)"              , 0                          },
118   { "es", "MX", 0, "Spanish (Mexico)"            , 0                          },
119   { "es", "NI", 0, "Spanish (Nicaragua)"         , 0                          },
120   { "es", "PA", 0, "Spanish (Panama)"            , 0                          },
121   { "es", "PE", 0, "Spanish (Peru)"              , 0                          },
122   { "es", "PR", 0, "Spanish (Puerto Rico)"       , 0                          },
123   { "es", "SV", 0, "Spanish (El Salvador)"       , 0                          },
124   { "es", "UY", 0, "Spanish (Uruguay)"           , 0                          },
125   { "es", "VE", 0, "Spanish (Venezuela)"         , 0                          },
126   { "et", 0,    0, "Estonian"                    , "eesti keel"               },
127   { "et", "EE", 0, "Estonian (Estonia)"          , 0                          },
128   { "et", "ET", 0, "Estonian (Ethiopia)"         , 0                          },
129   { "eu", 0,    0, "Basque"                      , "euskara"                  },
130   { "eu", "ES", 0, "Basque (Spain)"              , 0                          },
131   { "fa", 0,    0, "Persian"                     , "فارسى"                    },
132   { "fa", "AF", 0, "Persian (Afghanistan)"       , 0                          },
133   { "fa", "IR", 0, "Persian (Iran)"              , 0                          },
134   { "fi", 0,    0, "Finnish"                     , "suomi"                    },
135   { "fi", "FI", 0, "Finnish (Finland)"           , 0                          },
136   { "fo", 0,    0, "Faroese"                     , "Føroyskt"                 },
137   { "fo", "FO", 0, "Faeroese (Faroe Islands)"    , 0                          },
138   { "fr", 0,    0, "French"                      , "Français"                 },
139   { "fr", "CA", 0, "French (Canada)"             , "Français (Canada)"        },
140   { "fr", "CH", 0, "French (Switzerland)"        , "Français (Suisse)"        },
141   { "fr", "FR", 0, "French (France)"             , "Français (France)"        },
142   { "fr", "LU", 0, "French (Luxembourg)"         , "Français (Luxembourg)"    },
143   { "fy", 0,    0, "Frisian"                     , "Frysk"                    },
144   { "ga", 0,    0, "Irish"                       , "Gaeilge"                  },
145   { "gd", 0,    0, "Gaelic Scots"                , "Gàidhlig"                 },
146   { "gl", 0,    0, "Galician"                    , "Galego"                   },
147   { "gl", "ES", 0, "Galician (Spain)"            , 0                          },
148   { "gn", 0,    0, "Guarani"                     , "Avañe'ẽ"                  },
149   { "gu", 0,    0, "Gujarati"                    , "ગુજરાતી"                    },
150   { "gv", 0,    0, "Manx"                        , "Gaelg"                    },
151   { "ha", 0,    0, "Hausa"                       , "حَوْسَ"                   },
152   { "he", 0,    0, "Hebrew"                      , "עברית"                     },
153   { "he", "IL", 0, "Hebrew (Israel)"             , 0                          },
154   { "hi", 0,    0, "Hindi"                       , "हिन्दी"                      },
155   { "hi", "IN", 0, "Hindi (India)"               , 0                          },
156   { "hr", 0,    0, "Croatian"                    , "Hrvatski"                 },
157   { "hr", "HR", 0, "Croatian (Croatia)"          , 0                          },
158   { "hu", 0,    0, "Hungarian"                   , "magyar"                   },
159   { "hu", "HU", 0, "Hungarian (Hungary)"         , 0                          },
160   { "hy", 0,    0, "Armenian"                    , "Հայերեն"                   },
161   { "ia", 0,    0, "Interlingua"                 , "Interlingua"              },
162   { "id", 0,    0, "Indonesian"                  , "Bahasa Indonesia"         },
163   { "id", "ID", 0, "Indonesian (Indonesia)"      , 0                          },
164   { "is", 0,    0, "Icelandic"                   , "Íslenska"                 },
165   { "is", "IS", 0, "Icelandic (Iceland)"         , 0                          },
166   { "it", 0,    0, "Italian"                     , "Italiano"                 },
167   { "it", "CH", 0, "Italian (Switzerland)"       , 0                          },
168   { "it", "IT", 0, "Italian (Italy)"             , 0                          },
169   { "iu", 0,    0, "Inuktitut"                   , "ᐃᓄᒃᑎᑐᑦ/inuktitut"         },
170   { "ja", 0,    0, "Japanese"                    , "日本語"                    },
171   { "ja", "JP", 0, "Japanese (Japan)"            , 0                          },
172   { "ka", 0,    0, "Georgian"                    , "ქართული"                  },
173   { "kk", 0,    0, "Kazakh"                      , "Қазақша"                  },
174   { "kl", 0,    0, "Kalaallisut"                 , "Kalaallisut"              },
175   { "km", 0,    0, "Khmer"                       , "ភាសាខ្មែរ"                   },
176   { "km", "KH", 0, "Khmer (Cambodia)"            , 0                          },
177   { "kn", 0,    0, "Kannada"                     , "ಕನ್ನಡ"                      },
178   { "ko", 0,    0, "Korean"                      , "한국어"                     },
179   { "ko", "KR", 0, "Korean (Korea)"              , 0                          },
180   { "ku", 0,    0, "Kurdish"                     , "Kurdî"                    },
181   { "kw", 0,    0, "Cornish"                     , "Kernowek"                 },
182   { "ky", 0,    0, "Kirghiz"                     , 0                          },
183   { "la", 0,    0, "Latin"                       , "Latina"                   },
184   { "lo", 0,    0, "Lao"                         , "ລາວ"                       },
185   { "lt", 0,    0, "Lithuanian"                  , "Lietuvių"                 },
186   { "lt", "LT", 0, "Lithuanian (Lithuania)"      , 0                          },
187   { "lv", 0,    0, "Latvian"                     , "Latviešu"                 },
188   { "lv", "LV", 0, "Latvian (Latvia)"            , 0                          },
189   { "jbo", 0,    0, "Lojban"                     , "La .lojban."              },
190   { "mg", 0,    0, "Malagasy"                    , "Malagasy"                 },
191   { "mi", 0,    0, "Maori"                       , "Māori"                    },
192   { "mk", 0,    0, "Macedonian"                  , "Македонски"               },
193   { "mk", "MK", 0, "Macedonian (Macedonia)"      , 0                          },
194   { "ml", 0,    0, "Malayalam"                   , "മലയാളം"                   },
195   { "mn", 0,    0, "Mongolian"                   , "Монгол"                   },
196   { "mr", 0,    0, "Marathi"                     , "मराठी"                      },
197   { "ms", 0,    0, "Malay"                       , "Bahasa Melayu"            },
198   { "ms", "MY", 0, "Malay (Malaysia)"            , 0                          },
199   { "mt", 0,    0, "Maltese"                     , "Malti"                    },
200   { "my", 0,    0, "Burmese"                     , "မြန်မာဘာသာ"                },
201   { "my", "MM", 0, "Burmese (Myanmar)"           , 0                          },
202   { "nb", 0,    0, "Norwegian Bokmal"            , 0                          },
203   { "nb", "NO", 0, "Norwegian Bokmål (Norway)"   , 0                          },
204   { "nds", 0,   0, "Low German"                  , 0                          },
205   { "ne", 0,    0, "Nepali"                      , 0                          },
206   { "nl", 0,    0, "Dutch"                       , "Nederlands"               },
207   { "nl", "BE", 0, "Dutch (Belgium)"             , 0                          },
208   { "nl", "NL", 0, "Dutch (Netherlands)"         , 0                          },
209   { "nn", 0,    0, "Norwegian Nynorsk"           , "Norsk nynorsk"            },
210   { "nn", "NO", 0, "Norwegian Nynorsk (Norway)"  , 0                          },
211   { "no", 0,    0, "Norwegian"                   , "Norsk bokmål"             },
212   { "no", "NO", 0, "Norwegian (Norway)"          , 0                          },
213   { "no", "NY", 0, "Norwegian (NY)"              , 0                          },
214   { "nr", 0,    0, "Ndebele, South"              , 0                          },
215   { "oc", 0,    0, "Occitan post 1500"           , "Occitan"                  },
216   { "om", 0,    0, "Oromo"                       , "Oromoo"                   },
217   { "or", 0,    0, "Oriya"                       , "ଓଡ଼ିଆ"                     },
218   { "pa", 0,    0, "Punjabi"                     , "ਪੰਜਾਬੀ"                     },
219   { "pl", 0,    0, "Polish"                      , "Polski"                   },
220   { "pl", "PL", 0, "Polish (Poland)"             , 0                          },
221   { "ps", 0,    0, "Pashto"                      , "پښتو"                     },
222   { "pt", 0,    0, "Portuguese"                  , "Português"                },
223   { "pt", "BR", 0, "Portuguese (Brazil)"         , 0                          },
224   { "pt", "PT", 0, "Portuguese (Portugal)"       , 0                          },
225   { "qu", 0,    0, "Quechua"                     , "Runa Simi"                },
226   { "rm", 0,    0, "Rhaeto-Romance"              , "Rumantsch"                },
227   { "ro", 0,    0, "Romanian"                    , "Română"                   },
228   { "ro", "RO", 0, "Romanian (Romania)"          , 0                          },
229   { "ru", 0,    0, "Russian"                     ,"Русский"                   },
230   { "ru", "RU", 0, "Russian (Russia"             , 0                          },
231   { "rw", 0,    0, "Kinyarwanda"                 , "Kinyarwanda"              },
232   { "sa", 0,    0, "Sanskrit"                    , 0                          },
233   { "sd", 0,    0, "Sindhi"                      , 0                          },
234   { "se", 0,    0, "Sami"                        , "Sámegiella"               },
235   { "se", "NO", 0, "Sami (Norway)"               , 0                          },
236   { "si", 0,    0, "Sinhalese"                   , 0                          },
237   { "sk", 0,    0, "Slovak"                      , "Slovenčina"               },
238   { "sk", "SK", 0, "Slovak (Slovakia)"           , 0                          },
239   { "sl", 0,    0, "Slovenian"                   , "Slovenščina"              },
240   { "sl", "SI", 0, "Slovenian (Slovenia)"        , 0                          },
241   { "sl", "SL", 0, "Slovenian (Sierra Leone)"    , 0                          },
242   { "sm", 0,    0, "Samoan"                      , 0                          },
243   { "so", 0,    0, "Somali"                      , 0                          },
244   { "sp", 0,    0, "Unknown language"            , 0                          },
245   { "sq", 0,    0, "Albanian"                    , "Shqip"                    },
246   { "sq", "AL", 0, "Albanian (Albania)"          , 0                          },
247   { "sr", 0,    0, "Serbian"                     , "Српски / srpski"          },
248   { "sr", "YU", 0, "Serbian (Yugoslavia)"        , 0                          },
249   { "sr", 0,"ije", "Serbian"                     , 0                          },
250   { "sr", 0, "latin", "Serbian"                  , 0                          },
251   { "sr", 0, "Latn",  "Serbian"                  , 0                          },
252   { "ss", 0,    0, "Swati"                       , 0                          },
253   { "st", 0,    0, "Sotho"                       , 0                          },
254   { "sv", 0,    0, "Swedish"                     , "Svenska"                  },
255   { "sv", "SE", 0, "Swedish (Sweden)"            , 0                          },
256   { "sv", "SV", 0, "Swedish (El Salvador)"       , 0                          },
257   { "sw", 0,    0, "Swahili"                     , 0                          },
258   { "ta", 0,    0, "Tamil"                       , 0                          },
259   { "te", 0,    0, "Telugu"                      , 0                          },
260   { "tg", 0,    0, "Tajik"                       , 0                          },
261   { "th", 0,    0, "Thai"                        , "ไทย"                      },
262   { "th", "TH", 0, "Thai (Thailand)"             , 0                          },
263   { "ti", 0,    0, "Tigrinya"                    , 0                          },
264   { "tk", 0,    0, "Turkmen"                     , 0                          },
265   { "tl", 0,    0, "Tagalog"                     , 0                          },
266   { "to", 0,    0, "Tonga"                       , 0                          },
267   { "tr", 0,    0, "Turkish"                     , "Türkçe"                   },
268   { "tr", "TR", 0, "Turkish (Turkey)"            , 0                          },
269   { "ts", 0,    0, "Tsonga"                      , 0                          },
270   { "tt", 0,    0, "Tatar"                       , 0                          },
271   { "ug", 0,    0, "Uighur"                      , 0                          },
272   { "uk", 0,    0, "Ukrainian"                   , "Українська"               },
273   { "uk", "UA", 0, "Ukrainian (Ukraine)"         , 0                          },
274   { "ur", 0,    0, "Urdu"                        , 0                          },
275   { "ur", "PK", 0, "Urdu (Pakistan)"             , 0                          },
276   { "uz", 0,    0, "Uzbek"                       , 0                          },
277   { "uz", 0, "cyrillic", "Uzbek"                 , 0                          },
278   { "vi", 0,    0, "Vietnamese"                  , "Tiếng Việt"               },
279   { "vi", "VN", 0, "Vietnamese (Vietnam)"        , 0                          },
280   { "wa", 0,    0, "Walloon"                     , 0                          },
281   { "wo", 0,    0, "Wolof"                       , 0                          },
282   { "xh", 0,    0, "Xhosa"                       , 0                          },
283   { "yi", 0,    0, "Yiddish"                     , "ייִדיש"                      },
284   { "yo", 0,    0, "Yoruba"                      , 0                          },
285   { "zh", 0,    0, "Chinese"                     , "中文"                      },
286   { "zh", "CN", 0, "Chinese (simplified)"        , 0                          },
287   { "zh", "HK", 0, "Chinese (Hong Kong)"         , 0                          },
288   { "zh", "TW", 0, "Chinese (traditional)"       , 0                          },
289   { "zu", 0,    0, "Zulu"                        , 0                          },
290   { NULL, 0,    0, NULL                          , 0                          }
291 };
292 //*}
293 
294 std::string
resolve_language_alias(const std::string & name)295 resolve_language_alias(const std::string& name)
296 {
297   typedef std::unordered_map<std::string, std::string> Aliases;
298   static Aliases language_aliases;
299   if (language_aliases.empty())
300   {
301     // FIXME: Many of those are not useful for us, since we leave
302     // encoding to the app, not to the language, we could/should
303     // also match against all language names, not just aliases from
304     // locale.alias
305 
306     // Aliases taken from /etc/locale.alias
307     language_aliases["bokmal"]           = "nb_NO.ISO-8859-1";
308     language_aliases["bokmål"]           = "nb_NO.ISO-8859-1";
309     language_aliases["catalan"]          = "ca_ES.ISO-8859-1";
310     language_aliases["croatian"]         = "hr_HR.ISO-8859-2";
311     language_aliases["czech"]            = "cs_CZ.ISO-8859-2";
312     language_aliases["danish"]           = "da_DK.ISO-8859-1";
313     language_aliases["dansk"]            = "da_DK.ISO-8859-1";
314     language_aliases["deutsch"]          = "de_DE.ISO-8859-1";
315     language_aliases["dutch"]            = "nl_NL.ISO-8859-1";
316     language_aliases["eesti"]            = "et_EE.ISO-8859-1";
317     language_aliases["estonian"]         = "et_EE.ISO-8859-1";
318     language_aliases["finnish"]          = "fi_FI.ISO-8859-1";
319     language_aliases["français"]         = "fr_FR.ISO-8859-1";
320     language_aliases["french"]           = "fr_FR.ISO-8859-1";
321     language_aliases["galego"]           = "gl_ES.ISO-8859-1";
322     language_aliases["galician"]         = "gl_ES.ISO-8859-1";
323     language_aliases["german"]           = "de_DE.ISO-8859-1";
324     language_aliases["greek"]            = "el_GR.ISO-8859-7";
325     language_aliases["hebrew"]           = "he_IL.ISO-8859-8";
326     language_aliases["hrvatski"]         = "hr_HR.ISO-8859-2";
327     language_aliases["hungarian"]        = "hu_HU.ISO-8859-2";
328     language_aliases["icelandic"]        = "is_IS.ISO-8859-1";
329     language_aliases["italian"]          = "it_IT.ISO-8859-1";
330     language_aliases["japanese"]         = "ja_JP.eucJP";
331     language_aliases["japanese.euc"]     = "ja_JP.eucJP";
332     language_aliases["ja_JP"]            = "ja_JP.eucJP";
333     language_aliases["ja_JP.ujis"]       = "ja_JP.eucJP";
334     language_aliases["japanese.sjis"]    = "ja_JP.SJIS";
335     language_aliases["korean"]           = "ko_KR.eucKR";
336     language_aliases["korean.euc"]       = "ko_KR.eucKR";
337     language_aliases["ko_KR"]            = "ko_KR.eucKR";
338     language_aliases["lithuanian"]       = "lt_LT.ISO-8859-13";
339     language_aliases["no_NO"]            = "nb_NO.ISO-8859-1";
340     language_aliases["no_NO.ISO-8859-1"] = "nb_NO.ISO-8859-1";
341     language_aliases["norwegian"]        = "nb_NO.ISO-8859-1";
342     language_aliases["nynorsk"]          = "nn_NO.ISO-8859-1";
343     language_aliases["polish"]           = "pl_PL.ISO-8859-2";
344     language_aliases["portuguese"]       = "pt_PT.ISO-8859-1";
345     language_aliases["romanian"]         = "ro_RO.ISO-8859-2";
346     language_aliases["russian"]          = "ru_RU.ISO-8859-5";
347     language_aliases["slovak"]           = "sk_SK.ISO-8859-2";
348     language_aliases["slovene"]          = "sl_SI.ISO-8859-2";
349     language_aliases["slovenian"]        = "sl_SI.ISO-8859-2";
350     language_aliases["spanish"]          = "es_ES.ISO-8859-1";
351     language_aliases["swedish"]          = "sv_SE.ISO-8859-1";
352     language_aliases["thai"]             = "th_TH.TIS-620";
353     language_aliases["turkish"]          = "tr_TR.ISO-8859-9";
354   }
355 
356   std::string name_lowercase;
357   name_lowercase.resize(name.size());
358   for(std::string::size_type i = 0; i < name.size(); ++i)
359     name_lowercase[i] = static_cast<char>(tolower(name[i]));
360 
361   Aliases::iterator i = language_aliases.find(name_lowercase);
362   if (i != language_aliases.end())
363   {
364     return i->second;
365   }
366   else
367   {
368     return name;
369   }
370 }
371 
372 Language
from_spec(const std::string & language,const std::string & country,const std::string & modifier)373 Language::from_spec(const std::string& language, const std::string& country, const std::string& modifier)
374 {
375   typedef std::unordered_map<std::string, std::vector<const LanguageSpec*> > LanguageSpecMap;
376   static LanguageSpecMap language_map;
377 
378   if (language_map.empty())
379   { // Init language_map
380     for(int i = 0; languages[i].language != NULL; ++i)
381       language_map[languages[i].language].push_back(&languages[i]);
382   }
383 
384   LanguageSpecMap::iterator i = language_map.find(language);
385   if (i != language_map.end())
386   {
387     std::vector<const LanguageSpec*>& lst = i->second;
388 
389     LanguageSpec tmpspec;
390     tmpspec.language = language.c_str();
391     tmpspec.country  = country.c_str();
392     tmpspec.modifier = modifier.c_str();
393     Language tmplang(&tmpspec);
394 
395     const LanguageSpec* best_match = 0;
396     int best_match_score = 0;
397     for(std::vector<const LanguageSpec*>::iterator j = lst.begin(); j != lst.end(); ++j)
398     { // Search for the language that best matches the given spec, value country more then modifier
399       int score = Language::match(Language(*j), tmplang);
400 
401       if (score > best_match_score)
402       {
403         best_match = *j;
404         best_match_score = score;
405       }
406     }
407     assert(best_match);
408     return Language(best_match);
409   }
410   else
411   {
412     return Language();
413   }
414 }
415 
416 Language
from_name(const std::string & spec_str)417 Language::from_name(const std::string& spec_str)
418 {
419   return from_env(resolve_language_alias(spec_str));
420 }
421 
422 Language
from_env(const std::string & env)423 Language::from_env(const std::string& env)
424 {
425   // Split LANGUAGE_COUNTRY.CODESET@MODIFIER into parts
426   std::string::size_type ln = env.find('_');
427   std::string::size_type dt = env.find('.');
428   std::string::size_type at = env.find('@');
429 
430   std::string language;
431   std::string country;
432   std::string codeset;
433   std::string modifier;
434 
435   //std::cout << ln << " " << dt << " " << at << std::endl;
436 
437   language = env.substr(0, std::min(std::min(ln, dt), at));
438 
439   if (ln != std::string::npos && ln+1 < env.size()) // _
440   {
441     country = env.substr(ln+1, (std::min(dt, at) == std::string::npos) ? std::string::npos : std::min(dt, at) - (ln+1));
442   }
443 
444   if (dt != std::string::npos && dt+1 < env.size()) // .
445   {
446     codeset = env.substr(dt+1, (at == std::string::npos) ? std::string::npos : (at - (dt+1)));
447   }
448 
449   if (at != std::string::npos && at+1 < env.size()) // @
450   {
451     modifier = env.substr(at+1);
452   }
453 
454   return from_spec(language, country, modifier);
455 }
456 
Language(const LanguageSpec * language_spec_)457 Language::Language(const LanguageSpec* language_spec_)
458   : language_spec(language_spec_)
459 {
460 }
461 
Language()462 Language::Language()
463   : language_spec(0)
464 {
465 }
466 
467 int
match(const Language & lhs,const Language & rhs)468 Language::match(const Language& lhs, const Language& rhs)
469 {
470   if (lhs.get_language() != rhs.get_language())
471   {
472     return 0;
473   }
474   else
475   {
476     static int match_tbl[3][3] = {
477       // modifier match, wildchard, miss
478       { 9, 8, 5 }, // country match
479       { 7, 6, 3 }, // country wildcard
480       { 4, 2, 1 }, // country miss
481     };
482 
483     int c;
484     if (lhs.get_country() == rhs.get_country())
485       c = 0;
486     else if (lhs.get_country().empty() || rhs.get_country().empty())
487       c = 1;
488     else
489       c = 2;
490 
491     int m;
492     if (lhs.get_modifier() == rhs.get_modifier())
493       m = 0;
494     else if (lhs.get_modifier().empty() || rhs.get_modifier().empty())
495       m = 1;
496     else
497       m = 2;
498 
499     return match_tbl[c][m];
500   }
501 }
502 
503 std::string
get_language() const504 Language::get_language() const
505 {
506   if (language_spec)
507     return language_spec->language;
508   else
509     return "";
510 }
511 
512 std::string
get_country() const513 Language::get_country()  const
514 {
515   if (language_spec && language_spec->country)
516     return language_spec->country;
517   else
518     return "";
519 }
520 
521 std::string
get_modifier() const522 Language::get_modifier() const
523 {
524   if (language_spec && language_spec->modifier)
525     return language_spec->modifier;
526   else
527     return "";
528 }
529 
530 std::string
get_name() const531 Language::get_name()  const
532 {
533   if (language_spec)
534     return language_spec->name;
535   else
536     return "";
537 }
538 
539 std::string
get_localized_name() const540 Language::get_localized_name() const
541 {
542   if(language_spec && language_spec->name_localized)
543     return language_spec->name_localized;
544   else
545     return this->get_name();
546 }
547 
548 std::string
str() const549 Language::str() const
550 {
551   if (language_spec)
552   {
553     std::string var;
554     var += language_spec->language;
555     if (language_spec->country)
556     {
557       var += "_";
558       var += language_spec->country;
559     }
560 
561     if (language_spec->modifier)
562     {
563       var += "@";
564       var += language_spec->modifier;
565     }
566     return var;
567   }
568   else
569   {
570     return "";
571   }
572 }
573 
574 bool
operator ==(const Language & rhs) const575 Language::operator==(const Language& rhs) const
576 {
577   return language_spec == rhs.language_spec;
578 }
579 
580 bool
operator !=(const Language & rhs) const581 Language::operator!=(const Language& rhs) const
582 {
583   return language_spec != rhs.language_spec;
584 }
585 
586 } // namespace tinygettext
587 
588 /* EOF */
589