1 /* GRAPHITE2 LICENSING 2 3 Copyright 2010, SIL International 4 All rights reserved. 5 6 This library is free software; you can redistribute it and/or modify 7 it under the terms of the GNU Lesser General Public License as published 8 by the Free Software Foundation; either version 2.1 of License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should also have received a copy of the GNU Lesser General Public 17 License along with this library in the file named "LICENSE". 18 If not, write to the Free Software Foundation, 51 Franklin Street, 19 Suite 500, Boston, MA 02110-1335, USA or visit their web page on the 20 internet at http://www.fsf.org/licenses/lgpl.html. 21 22 Alternatively, the contents of this file may be used under the terms of the 23 Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public 24 License, as published by the Free Software Foundation, either version 2 25 of the License or (at your option) any later version. 26 */ 27 #pragma once 28 #include <cstring> 29 #include <cassert> 30 31 #include "inc/Main.h" 32 33 34 namespace graphite2 { 35 36 struct IsoLangEntry 37 { 38 unsigned short mnLang; 39 char maLangStr[4]; 40 char maCountry[3]; 41 }; 42 43 // Windows Language ID, Locale ISO-639 language, country code as used in 44 // naming table of OpenType fonts 45 const IsoLangEntry LANG_ENTRIES[] = { 46 { 0x0401, "ar","SA" }, // Arabic Saudi Arabia 47 { 0x0402, "bg","BG" }, // Bulgarian Bulgaria 48 { 0x0403, "ca","ES" }, // Catalan Catalan 49 { 0x0404, "zh","TW" }, // Chinese Taiwan 50 { 0x0405, "cs","CZ" }, // Czech Czech Republic 51 { 0x0406, "da","DK" }, // Danish Denmark 52 { 0x0407, "de","DE" }, // German Germany 53 { 0x0408, "el","GR" }, // Greek Greece 54 { 0x0409, "en","US" }, // English United States 55 { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain 56 { 0x040B, "fi","FI" }, // Finnish Finland 57 { 0x040C, "fr","FR" }, // French France 58 { 0x040D, "he","IL" }, // Hebrew Israel 59 { 0x040E, "hu","HU" }, // Hungarian Hungary 60 { 0x040F, "is","IS" }, // Icelandic Iceland 61 { 0x0410, "it","IT" }, // Italian Italy 62 { 0x0411, "jp","JP" }, // Japanese Japan 63 { 0x0412, "ko","KR" }, // Korean Korea 64 { 0x0413, "nl","NL" }, // Dutch Netherlands 65 { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway 66 { 0x0415, "pl","PL" }, // Polish Poland 67 { 0x0416, "pt","BR" }, // Portuguese Brazil 68 { 0x0417, "rm","CH" }, // Romansh Switzerland 69 { 0x0418, "ro","RO" }, // Romanian Romania 70 { 0x0419, "ru","RU" }, // Russian Russia 71 { 0x041A, "hr","HR" }, // Croatian Croatia 72 { 0x041B, "sk","SK" }, // Slovak Slovakia 73 { 0x041C, "sq","AL" }, // Albanian Albania 74 { 0x041D, "sv","SE" }, // Swedish Sweden 75 { 0x041E, "th","TH" }, // Thai Thailand 76 { 0x041F, "tr","TR" }, // Turkish Turkey 77 { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan 78 { 0x0421, "id","ID" }, // Indonesian Indonesia 79 { 0x0422, "uk","UA" }, // Ukrainian Ukraine 80 { 0x0423, "be","BY" }, // Belarusian Belarus 81 { 0x0424, "sl","SI" }, // Slovenian Slovenia 82 { 0x0425, "et","EE" }, // Estonian Estonia 83 { 0x0426, "lv","LV" }, // Latvian Latvia 84 { 0x0427, "lt","LT" }, // Lithuanian Lithuania 85 { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan 86 { 0x042A, "vi","VN" }, // Vietnamese Vietnam 87 { 0x042B, "hy","AM" }, // Armenian Armenia 88 { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan 89 { 0x042D, "eu","" }, // Basque Basque 90 { 0x042E, "hsb","DE" }, // Upper Sorbian Germany 91 { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia 92 { 0x0432, "tn","ZA" }, // Setswana South Africa 93 { 0x0434, "xh","ZA" }, // isiXhosa South Africa 94 { 0x0435, "zu","ZA" }, // isiZulu South Africa 95 { 0x0436, "af","ZA" }, // Afrikaans South Africa 96 { 0x0437, "ka","GE" }, // Georgian Georgia 97 { 0x0438, "fo","FO" }, // Faroese Faroe Islands 98 { 0x0439, "hi","IN" }, // Hindi India 99 { 0x043A, "mt","MT" }, // Maltese Malta 100 { 0x043B, "se","NO" }, // Sami (Northern) Norway 101 { 0x043E, "ms","MY" }, // Malay Malaysia 102 { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan 103 { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan 104 { 0x0441, "sw","KE" }, // Kiswahili Kenya 105 { 0x0442, "tk","TM" }, // Turkmen Turkmenistan 106 { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan 107 { 0x0444, "tt","RU" }, // Tatar Russia 108 { 0x0445, "bn","IN" }, // Bengali India 109 { 0x0446, "pa","IN" }, // Punjabi India 110 { 0x0447, "gu","IN" }, // Gujarati India 111 { 0x0448, "or","IN" }, // Oriya India 112 { 0x0448, "wo","SN" }, // Wolof Senegal 113 { 0x0449, "ta","IN" }, // Tamil India 114 { 0x044A, "te","IN" }, // Telugu India 115 { 0x044B, "kn","IN" }, // Kannada India 116 { 0x044C, "ml","IN" }, // Malayalam India 117 { 0x044D, "as","IN" }, // Assamese India 118 { 0x044E, "mr","IN" }, // Marathi India 119 { 0x044F, "sa","IN" }, // Sanskrit India 120 { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia 121 { 0x0451, "bo","CN" }, // Tibetan PRC 122 { 0x0452, "cy","GB" }, // Welsh United Kingdom 123 { 0x0453, "km","KH" }, // Khmer Cambodia 124 { 0x0454, "lo","LA" }, // Lao Lao P.D.R. 125 { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore 126 { 0x0456, "gl","ES" }, // Galician Galician 127 { 0x0457, "kok","IN" }, // Konkani India 128 { 0x045A, "syr","TR" }, // Syriac Syria 129 { 0x045B, "si","LK" }, // Sinhala Sri Lanka 130 { 0x045D, "iu","CA" }, // Inuktitut Canada 131 { 0x045E, "am","ET" }, // Amharic Ethiopia 132 { 0x0461, "ne","NP" }, // Nepali Nepal 133 { 0x0462, "fy","NL" }, // Frisian Netherlands 134 { 0x0463, "ps","AF" }, // Pashto Afghanistan 135 { 0x0464, "fil","PH" }, // Filipino Philippines 136 { 0x0465, "dv","MV" }, // Divehi Maldives 137 { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria 138 { 0x046A, "yo","NG" }, // Yoruba Nigeria 139 { 0x046B, "qu","BO" }, // Quechua Bolivia 140 { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa 141 { 0x046D, "ba","RU" }, // Bashkir Russia 142 { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg 143 { 0x046F, "kl","GL" }, // Greenlandic Greenland 144 { 0x0470, "ig","NG" }, // Igbo Nigeria 145 { 0x0478, "ii","CN" }, // Yi PRC 146 { 0x047A, "arn","CL" }, // Mapudungun Chile 147 { 0x047C, "moh","CA" }, // Mohawk Mohawk 148 { 0x047E, "br","FR" }, // Breton France 149 { 0x0480, "ug","CN" }, // Uighur PRC 150 { 0x0481, "mi","NZ" }, // Maori New Zealand 151 { 0x0482, "oc","FR" }, // Occitan France 152 { 0x0483, "co","FR" }, // Corsican France 153 { 0x0484, "gsw","FR" }, // Alsatian France 154 { 0x0485, "sah","RU" }, // Yakut Russia 155 { 0x0486, "qut","GT" }, // K'iche Guatemala 156 { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda 157 { 0x048C, "gbz","AF" }, // Dari Afghanistan 158 { 0x0801, "ar","IQ" }, // Arabic Iraq 159 { 0x0804, "zn","CH" }, // Chinese People's Republic of China 160 { 0x0807, "de","CH" }, // German Switzerland 161 { 0x0809, "en","GB" }, // English United Kingdom 162 { 0x080A, "es","MX" }, // Spanish Mexico 163 { 0x080C, "fr","BE" }, // French Belgium 164 { 0x0810, "it","CH" }, // Italian Switzerland 165 { 0x0813, "nl","BE" }, // Dutch Belgium 166 { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway 167 { 0x0816, "pt","PT" }, // Portuguese Portugal 168 { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia 169 { 0x081D, "sv","FI" }, // Sweden Finland 170 { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan 171 { 0x082E, "dsb","DE" }, // Lower Sorbian Germany 172 { 0x083B, "se","SE" }, // Sami (Northern) Sweden 173 { 0x083C, "ga","IE" }, // Irish Ireland 174 { 0x083E, "ms","BN" }, // Malay Brunei Darussalam 175 { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan 176 { 0x0845, "bn","BD" }, // Bengali Bangladesh 177 { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China 178 { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada 179 { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria 180 { 0x086B, "es","EC" }, // Quechua Ecuador 181 { 0x0C01, "ar","EG" }, // Arabic Egypt 182 { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R. 183 { 0x0C07, "de","AT" }, // German Austria 184 { 0x0C09, "en","AU" }, // English Australia 185 { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain 186 { 0x0C0C, "fr","CA" }, // French Canada 187 { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia 188 { 0x0C3B, "se","FI" }, // Sami (Northern) Finland 189 { 0x0C6B, "qu","PE" }, // Quechua Peru 190 { 0x1001, "ar","LY" }, // Arabic Libya 191 { 0x1004, "zh","SG" }, // Chinese Singapore 192 { 0x1007, "de","LU" }, // German Luxembourg 193 { 0x1009, "en","CA" }, // English Canada 194 { 0x100A, "es","GT" }, // Spanish Guatemala 195 { 0x100C, "fr","CH" }, // French Switzerland 196 { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina 197 { 0x103B, "smj","NO" }, // Sami (Lule) Norway 198 { 0x1401, "ar","DZ" }, // Arabic Algeria 199 { 0x1404, "zh","MO" }, // Chinese Macao S.A.R. 200 { 0x1407, "de","LI" }, // German Liechtenstein 201 { 0x1409, "en","NZ" }, // English New Zealand 202 { 0x140A, "es","CR" }, // Spanish Costa Rica 203 { 0x140C, "fr","LU" }, // French Luxembourg 204 { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina 205 { 0x143B, "smj","SE" }, // Sami (Lule) Sweden 206 { 0x1801, "ar","MA" }, // Arabic Morocco 207 { 0x1809, "en","IE" }, // English Ireland 208 { 0x180A, "es","PA" }, // Spanish Panama 209 { 0x180C, "fr","MC" }, // French Principality of Monoco 210 { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina 211 { 0x183B, "sma","NO" }, // Sami (Southern) Norway 212 { 0x1C01, "ar","TN" }, // Arabic Tunisia 213 { 0x1C09, "en","ZA" }, // English South Africa 214 { 0x1C0A, "es","DO" }, // Spanish Dominican Republic 215 { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina 216 { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden 217 { 0x2001, "ar","OM" }, // Arabic Oman 218 { 0x2009, "en","JM" }, // English Jamaica 219 { 0x200A, "es","VE" }, // Spanish Venezuela 220 { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina 221 { 0x203B, "sms","FI" }, // Sami (Skolt) Finland 222 { 0x2401, "ar","YE" }, // Arabic Yemen 223 { 0x2409, "en","BS" }, // English Caribbean 224 { 0x240A, "es","CO" }, // Spanish Colombia 225 { 0x243B, "smn","FI" }, // Sami (Inari) Finland 226 { 0x2801, "ar","SY" }, // Arabic Syria 227 { 0x2809, "en","BZ" }, // English Belize 228 { 0x280A, "es","PE" }, // Spanish Peru 229 { 0x2C01, "ar","JO" }, // Arabic Jordan 230 { 0x2C09, "en","TT" }, // English Trinidad and Tobago 231 { 0x2C0A, "es","AR" }, // Spanish Argentina 232 { 0x3001, "ar","LB" }, // Arabic Lebanon 233 { 0x3009, "en","ZW" }, // English Zimbabwe 234 { 0x300A, "es","EC" }, // Spanish Ecuador 235 { 0x3401, "ar","KW" }, // Arabic Kuwait 236 { 0x3409, "en","PH" }, // English Republic of the Philippines 237 { 0x340A, "es","CL" }, // Spanish Chile 238 { 0x3801, "ar","AE" }, // Arabic U.A.E. 239 { 0x380A, "es","UY" }, // Spanish Uruguay 240 { 0x3C01, "ar","BH" }, // Arabic Bahrain 241 { 0x3C0A, "es","PY" }, // Spanish Paraguay 242 { 0x4001, "ar","QA" }, // Arabic Qatar 243 { 0x4009, "en","IN" }, // English India 244 { 0x400A, "es","BO" }, // Spanish Bolivia 245 { 0x4409, "en","MY" }, // English Malaysia 246 { 0x440A, "es","SV" }, // Spanish El Salvador 247 { 0x4809, "en","SG" }, // English Singapore 248 { 0x480A, "es","HN" }, // Spanish Honduras 249 { 0x4C0A, "es","NI" }, // Spanish Nicaragua 250 { 0x500A, "es","PR" }, // Spanish Puerto Rico 251 { 0x540A, "es","US" } // Spanish United States 252 }; 253 254 class Locale2Lang 255 { 256 Locale2Lang(const Locale2Lang &); 257 Locale2Lang & operator = (const Locale2Lang &); 258 259 public: Locale2Lang()260 Locale2Lang() : mSeedPosition(128) 261 { 262 memset((void*)mLangLookup, 0, sizeof(mLangLookup)); 263 // create a tri lookup on first 2 letters of language code 264 static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); 265 for (int i = 0; i < maxIndex; i++) 266 { 267 size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a'; 268 size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a'; 269 if (mLangLookup[a][b]) 270 { 271 const IsoLangEntry ** old = mLangLookup[a][b]; 272 int len = 1; 273 while (old[len]) len++; 274 len += 2; 275 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len); 276 if (!mLangLookup[a][b]) 277 { 278 mLangLookup[a][b] = old; 279 continue; 280 } 281 mLangLookup[a][b][--len] = NULL; 282 mLangLookup[a][b][--len] = &LANG_ENTRIES[i]; 283 while (--len >= 0) 284 { 285 assert(len >= 0); 286 mLangLookup[a][b][len] = old[len]; 287 } 288 free(old); 289 } 290 else 291 { 292 mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2); 293 if (!mLangLookup[a][b]) continue; 294 mLangLookup[a][b][1] = NULL; 295 mLangLookup[a][b][0] = &LANG_ENTRIES[i]; 296 } 297 } 298 while (2 * mSeedPosition < maxIndex) 299 mSeedPosition *= 2; 300 }; ~Locale2Lang()301 ~Locale2Lang() 302 { 303 for (int i = 0; i != 26; ++i) 304 for (int j = 0; j != 26; ++j) 305 free(mLangLookup[i][j]); 306 } getMsId(const char * locale)307 unsigned short getMsId(const char * locale) const 308 { 309 size_t length = strlen(locale); 310 size_t langLength = length; 311 const char * language = locale; 312 const char * script = NULL; 313 const char * region = NULL; 314 size_t regionLength = 0; 315 const char * dash = strchr(locale, '-'); 316 if (dash && (dash != locale)) 317 { 318 langLength = (dash - locale); 319 size_t nextPartLength = length - langLength - 1; 320 if (nextPartLength >= 2) 321 { 322 script = ++dash; 323 dash = strchr(dash, '-'); 324 if (dash) 325 { 326 nextPartLength = (dash - script); 327 region = ++dash; 328 } 329 if (nextPartLength == 2 && 330 (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) && 331 (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B)) 332 { 333 region = script; 334 regionLength = nextPartLength; 335 script = NULL; 336 } 337 else if (nextPartLength == 4) 338 { 339 if (dash) 340 { 341 dash = strchr(dash, '-'); 342 if (dash) 343 { 344 nextPartLength = (dash - region); 345 } 346 else 347 { 348 nextPartLength = langLength - (region - locale); 349 } 350 regionLength = nextPartLength; 351 } 352 } 353 } 354 } 355 size_t a = 'e' - 'a'; 356 size_t b = 'n' - 'a'; 357 unsigned short langId = 0; 358 int i = 0; 359 switch (langLength) 360 { 361 case 2: 362 { 363 a = language[0] - 'a'; 364 b = language[1] - 'a'; 365 if ((a < 26) && (b < 26) && mLangLookup[a][b]) 366 { 367 while (mLangLookup[a][b][i]) 368 { 369 if (mLangLookup[a][b][i]->maLangStr[2] != '\0') 370 { 371 ++i; 372 continue; 373 } 374 if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) 375 { 376 langId = mLangLookup[a][b][i]->mnLang; 377 break; 378 } 379 else if (langId == 0) 380 { 381 // possible fallback code 382 langId = mLangLookup[a][b][i]->mnLang; 383 } 384 ++i; 385 } 386 } 387 } 388 break; 389 case 3: 390 { 391 a = language[0] - 'a'; 392 b = language[1] - 'a'; 393 if (mLangLookup[a][b]) 394 { 395 while (mLangLookup[a][b][i]) 396 { 397 if (mLangLookup[a][b][i]->maLangStr[2] != language[2]) 398 { 399 ++i; 400 continue; 401 } 402 if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) 403 { 404 langId = mLangLookup[a][b][i]->mnLang; 405 break; 406 } 407 else if (langId == 0) 408 { 409 // possible fallback code 410 langId = mLangLookup[a][b][i]->mnLang; 411 } 412 ++i; 413 } 414 } 415 } 416 break; 417 default: 418 break; 419 } 420 if (langId == 0) langId = 0x409; 421 return langId; 422 } findEntryById(unsigned short langId)423 const IsoLangEntry * findEntryById(unsigned short langId) const 424 { 425 static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); 426 int window = mSeedPosition; 427 int guess = mSeedPosition - 1; 428 while (LANG_ENTRIES[guess].mnLang != langId) 429 { 430 window /= 2; 431 if (window == 0) return NULL; 432 guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window; 433 while (guess >= maxIndex) 434 { 435 window /= 2; 436 guess -= window; 437 assert(window); 438 } 439 } 440 return &LANG_ENTRIES[guess]; 441 } 442 443 CLASS_NEW_DELETE; 444 445 private: 446 const IsoLangEntry ** mLangLookup[26][26]; 447 int mSeedPosition; 448 }; 449 450 } // namespace graphite2 451