1 //
2 // SuperTuxKart - a fun racing game with go-kart
3 // Copyright (C) 2006,-2015 2007, 2008 Joerg Henrichs
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 3
8 // of the License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19
20 // Note: the irrlicht include is only here (and esp. before including
21 // translation.hpp, which contradicts our style rule) to avoid the
22 // warning message " 'swprintf' : macro redefinition"
23 // This happens if libintl.h is included before irrlicht.h (since
24 // both files redefine swprintf).
25
26 #include "utils/translation.hpp"
27
28 #include <algorithm>
29 #include <cassert>
30 #include <cerrno>
31 #include <clocale>
32 #include <cstdio>
33 #include <cstdlib>
34 #include <cstring>
35 #include <cwchar>
36 #include <fstream>
37 #include <iostream>
38 #include <unordered_map>
39 #include <unordered_set>
40
41 #include "config/user_config.hpp"
42 #include "io/file_manager.hpp"
43 #include "utils/constants.hpp"
44 #include "utils/file_utils.hpp"
45 #include "utils/log.hpp"
46 #include "utils/string_utils.hpp"
47
48 #ifdef MOBILE_STK
49 #include "SDL_locale.h"
50 #endif
51
52 // set to 1 to debug i18n
53 #define TRANSLATE_VERBOSE 0
54 // Define TEST_BIDI to force right-to-left style for all languages
55 //#define TEST_BIDI
56
57 Translations* translations = NULL;
58
59 #ifdef LINUX // m_debug
60 #define PACKAGE "supertuxkart"
61 #endif
62
63 #ifndef SERVER_ONLY
64 std::map<std::string, std::string> Translations::m_localized_name;
65 std::map<std::string, std::map<std::string, irr::core::stringw> >
66 Translations::m_localized_country_codes;
67 // ============================================================================
68 std::unordered_map<char32_t,
69 std::pair<std::unordered_set<std::u32string>, size_t> > g_thai_dict;
70 // ============================================================================
isThaiCP(char32_t c)71 constexpr bool isThaiCP(char32_t c)
72 {
73 return c >= 0x0e00 && c <= 0x0e7f;
74 } // isThaiCP
75
76 // ============================================================================
77
78 const bool REMOVE_BOM = false;
79 using namespace tinygettext;
80 /** The list of available languages; this is global so that it is cached (and remains
81 even if the translations object is deleted and re-created) */
82 typedef std::vector<std::string> LanguageList;
83 static LanguageList g_language_list;
84
85 // ============================================================================
86 // Note : this method is not static because 'g_language_list' is initialized
87 // the first time Translations is constructed (despite being a global)
getLanguageList() const88 const LanguageList* Translations::getLanguageList() const
89 {
90 return &g_language_list;
91 }
92 #endif
93
94 // ----------------------------------------------------------------------------
Translations()95 Translations::Translations() //: m_dictionary_manager("UTF-16")
96 {
97 #ifndef SERVER_ONLY
98 m_dictionary_manager.add_directory(
99 file_manager->getAsset(FileManager::TRANSLATION,""));
100
101 if (g_language_list.size() == 0)
102 {
103 std::set<Language> languages = m_dictionary_manager.get_languages();
104
105 // English is always there but may be not found on file system
106 g_language_list.push_back("en");
107
108 for (const Language& language : languages)
109 {
110 if (language.str() == "en")
111 continue;
112
113 g_language_list.push_back(language.str());
114 }
115 }
116
117 if (m_localized_name.empty())
118 {
119 const std::string file_name = file_manager->getAsset("localized_name.txt");
120 try
121 {
122 std::ifstream in(FileUtils::getPortableReadingPath(file_name));
123 if (!in.is_open())
124 {
125 Log::error("translation", "error: failure opening: '%s'.",
126 file_name.c_str());
127 }
128 else
129 {
130 for (std::string line; std::getline(in, line, ';'); )
131 {
132 line = StringUtils::removeWhitespaces(line);
133
134 if (line.empty())
135 continue;
136
137 std::size_t pos = line.find("=");
138
139 if (pos == std::string::npos)
140 continue;
141
142 std::string name = line.substr(0, pos);
143 std::string localized_name = line.substr(pos + 1);
144
145 if (name.empty() || localized_name.empty())
146 continue;
147
148 if (localized_name == "0")
149 {
150 localized_name =
151 tinygettext::Language::from_name(name).get_name();
152 }
153 m_localized_name[name] = localized_name;
154 }
155 }
156 }
157 catch(std::exception& e)
158 {
159 Log::error("translation", "error: failure extract localized name.");
160 Log::error("translation", "%s", e.what());
161 }
162 }
163
164 if (m_localized_country_codes.empty())
165 {
166 const std::string file_name = file_manager->getAsset("country_names.tsv");
167 try
168 {
169 std::ifstream in(FileUtils::getPortableReadingPath(file_name));
170 if (!in.is_open())
171 {
172 Log::error("translation", "error: failure opening: '%s'.",
173 file_name.c_str());
174 }
175 else
176 {
177 std::vector<std::string> header;
178 std::string line;
179 while (!StringUtils::safeGetline(in, line).eof())
180 {
181 std::vector<std::string> lists = StringUtils::split(line, '\t');
182 if (lists.size() < 2)
183 {
184 Log::error("translation", "Invaild list.");
185 break;
186 }
187 if (lists[0] == "country_code")
188 {
189 header = lists;
190 continue;
191 }
192 if (lists.size() != header.size())
193 {
194 Log::error("translation", "Different column size.");
195 break;
196 }
197 if (m_localized_country_codes.find(lists[0]) ==
198 m_localized_country_codes.end())
199 {
200 m_localized_country_codes[lists[0]] =
201 std::map<std::string, irr::core::stringw>();
202 }
203 for (unsigned i = 1; i < lists.size(); i++)
204 {
205 auto& ret = m_localized_country_codes.at(lists[0]);
206 ret[header[i]] = StringUtils::utf8ToWide(lists[i]);
207 }
208 }
209 }
210 }
211 catch (std::exception& e)
212 {
213 Log::error("translation", "error: failure extract localized country name.");
214 Log::error("translation", "%s", e.what());
215 }
216 }
217
218 if (g_thai_dict.empty())
219 {
220 const std::string file_name = file_manager->getAsset("thaidict.txt");
221 try
222 {
223 std::ifstream in(FileUtils::getPortableReadingPath(file_name));
224 if (!in.is_open())
225 {
226 Log::error("translation", "error: failure opening: '%s'.",
227 file_name.c_str());
228 }
229 else
230 {
231 std::string line;
232 while (!StringUtils::safeGetline(in, line).eof())
233 {
234 const std::u32string& u32line = StringUtils::utf8ToUtf32(line);
235 char32_t thai = u32line[0];
236 if (u32line.empty() || !isThaiCP(thai))
237 continue;
238 if (g_thai_dict.find(thai) == g_thai_dict.end())
239 {
240 g_thai_dict[thai] =
241 {
242 std::make_pair(
243 std::unordered_set<std::u32string>{u32line},
244 u32line.size())
245 };
246 continue;
247 }
248 auto& ret = g_thai_dict.at(thai);
249 ret.first.insert(u32line);
250 if (ret.second < u32line.size())
251 ret.second = u32line.size();
252 }
253 }
254 }
255 catch (std::exception& e)
256 {
257 Log::error("translation", "error: failure extract Thai dictionary.");
258 Log::error("translation", "%s", e.what());
259 }
260 }
261 // LC_ALL does not work, sscanf will then not always be able
262 // to scan for example: s=-1.1,-2.3,-3.3 correctly, which is
263 // used in driveline files.
264 #if defined(WIN32) && !defined(__CYGWIN__)
265 // Windows does not have LC_MESSAGES
266 setlocale(LC_CTYPE, "");
267 #else
268 setlocale(LC_MESSAGES, "");
269 #endif
270
271
272 /*
273 bindtextdomain (PACKAGE, file_manager->getTranslationDir().c_str());
274
275 if (sizeof(wchar_t) == 4)
276 {
277 if (IS_LITTLE_ENDIAN) bind_textdomain_codeset(PACKAGE, "UTF-32LE");
278 else bind_textdomain_codeset(PACKAGE, "UTF-32BE");
279 }
280 else if (sizeof(wchar_t) == 2)
281 {
282 bind_textdomain_codeset(PACKAGE, "UTF-16LE");
283 }
284 else
285 {
286 fprintf(stderr, "Your wchar_t is neither 2 byte-long nor 4. What now??\n");
287 exit(1);
288 }
289
290 textdomain (PACKAGE);
291 */
292
293 /*
294 const std::set<Language>& languages = m_dictionary_manager.get_languages();
295 Log::info("Translatings", "Number of languages: %d", languages.size());
296 for (std::set<Language>::const_iterator i = languages.begin();
297 i != languages.end(); ++i)
298 {
299 const Language& language = *i;
300 Log::info("Translatings", "Env: %s", language.str());
301 Log::info("Translatings", "Name: %s", language.get_name());
302 Log::info("Translatings", "Language: %s", language.get_language());
303 Log::info("Translatings", "Country: %s", language.get_country());
304 Log::info("Translatings", "Modifier: %s", language.get_modifier());
305 }
306 */
307
308 const char *p_language = getenv("LANGUAGE");
309
310 std::string language;
311
312 if(p_language)
313 {
314 language=p_language;
315 }
316 else
317 {
318 const char *p_lang = getenv("LANG");
319
320 if(p_lang)
321 language = p_lang;
322 else
323 {
324 #ifdef MOBILE_STK
325 SDL_Locale* locale = SDL_GetPreferredLocales();
326 if (locale)
327 {
328 // First locale only
329 for (int l = 0; locale[l].language != NULL; l++)
330 {
331 language = locale[l].language;
332 // Convert deprecated language code
333 if (language == "iw")
334 language = "he";
335 else if (language == "in")
336 language = "id";
337 else if (language == "ji")
338 language = "yi";
339 if (locale[l].country != NULL)
340 {
341 language += "-";
342 language += locale[l].country;
343 }
344 // iOS specific
345 if (language.find("zh-Hans") != std::string::npos)
346 language = "zh_CN";
347 else if (language.find("zh-Hant") != std::string::npos)
348 language = "zh_TW";
349 language = StringUtils::findAndReplace(language, "-", "_");
350 break;
351 }
352 SDL_free(locale);
353 }
354 #elif defined(WIN32)
355 // Thanks to the frogatto developer for this code snippet:
356 char c[1024];
357 GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME,
358 c, 1024);
359 Log::verbose("translation", "GetLocaleInfo langname returns '%s'.",
360 c);
361 if(c[0])
362 {
363 language = c;
364 GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME,
365 c, 1024);
366 Log::verbose("translation",
367 "GetLocaleInfo tryname returns '%s'.", c);
368 if(c[0]) language += std::string("_")+c;
369 } // if c[0]
370 #endif
371 } // neither LANGUAGE nor LANG defined
372
373 }
374
375 if (language != "")
376 {
377 auto ignore_country = [](const std::string& test_full_form)
378 {
379 // Use a country to test if the test_full_form is supported by
380 // localized name
381 auto it = m_localized_country_codes.find("HK");
382 if (it != m_localized_country_codes.end())
383 return it->second.find(test_full_form) == it->second.end();
384 return true;
385 };
386
387 Log::verbose("translation", "Env var LANGUAGE = '%s'.",
388 language.c_str());
389
390 // Hong Kong use tranditional chinese, not zh_CN which C > T
391 language = StringUtils::findAndReplace(language, "zh_HK", "zh_TW");
392
393 if (language.find(":") != std::string::npos)
394 {
395 std::vector<std::string> langs = StringUtils::split(language, ':');
396 Language l;
397
398 for (unsigned int curr=0; curr<langs.size(); curr++)
399 {
400 l = Language::from_env(langs[curr]);
401 if (l)
402 {
403 Log::verbose("translation", "Language '%s'.",
404 l.get_name().c_str());
405 m_dictionary = m_dictionary_manager.get_dictionary(l);
406 break;
407 }
408 }
409
410 m_current_language_name = l.get_name();
411 m_current_language_name_code = l.get_language();
412 m_current_language_tag = m_current_language_name_code;
413 if (!l.get_country().empty() && !ignore_country(
414 m_current_language_name_code + "-" + l.get_country()))
415 {
416 m_current_language_tag += "-";
417 m_current_language_tag += l.get_country();
418 }
419 if (!l)
420 {
421 m_dictionary = m_dictionary_manager.get_dictionary();
422 }
423 }
424 else
425 {
426 const Language& tgtLang = Language::from_env(language);
427 if (!tgtLang)
428 {
429 Log::warn("Translation", "Unsupported language '%s'", language.c_str());
430 UserConfigParams::m_language = "system";
431 m_current_language_name = "Default language";
432 m_current_language_name_code = "en";
433 m_current_language_tag = "en";
434 m_dictionary = m_dictionary_manager.get_dictionary();
435 }
436 else
437 {
438 m_current_language_name = tgtLang.get_name();
439 m_current_language_name_code = tgtLang.get_language();
440 m_current_language_tag = m_current_language_name_code;
441 if (!tgtLang.get_country().empty() && !ignore_country(
442 m_current_language_name_code + "-" + tgtLang.get_country()))
443 {
444 m_current_language_tag += "-";
445 m_current_language_tag += tgtLang.get_country();
446 }
447 Log::verbose("translation", "Language '%s'.", m_current_language_name.c_str());
448 m_dictionary = m_dictionary_manager.get_dictionary(tgtLang);
449 }
450 }
451 }
452 else
453 {
454 m_current_language_name = "Default language";
455 m_current_language_name_code = "en";
456 m_current_language_tag = m_current_language_name_code;
457 m_dictionary = m_dictionary_manager.get_dictionary();
458 }
459
460 #endif
461 } // Translations
462
463 // ----------------------------------------------------------------------------
~Translations()464 Translations::~Translations()
465 {
466 } // ~Translations
467
468 // ----------------------------------------------------------------------------
469 /**
470 * \param original Message to translate
471 * \param context Optional, can be set to differentiate 2 strings that are identical
472 * in English but could be different in other languages
473 */
w_gettext(const wchar_t * original,const char * context)474 irr::core::stringw Translations::w_gettext(const wchar_t* original, const char* context)
475 {
476 std::string in = StringUtils::wideToUtf8(original);
477 return w_gettext(in.c_str(), context);
478 } // w_gettext
479
480 // ----------------------------------------------------------------------------
481 /**
482 * \param original Message to translate
483 * \param context Optional, can be set to differentiate 2 strings that are identical
484 * in English but could be different in other languages
485 */
w_gettext(const char * original,const char * context)486 irr::core::stringw Translations::w_gettext(const char* original, const char* context)
487 {
488
489 #ifdef SERVER_ONLY
490 return L"";
491 #else
492
493 if (original[0] == '\0') return L"";
494
495 #if TRANSLATE_VERBOSE
496 Log::info("Translations", "Translating %s", original);
497 #endif
498
499 const std::string& original_t = (context == NULL ?
500 m_dictionary.translate(original) :
501 m_dictionary.translate_ctxt(context, original));
502 // print
503 //for (int n=0;; n+=4)
504 const irr::core::stringw wide = StringUtils::utf8ToWide(original_t);
505 const wchar_t* out_ptr = wide.c_str();
506 if (REMOVE_BOM) out_ptr++;
507
508 #if TRANSLATE_VERBOSE
509 std::wcout << L" translation : " << out_ptr << std::endl;
510 #endif
511
512 return wide;
513 #endif
514
515 } // w_gettext
516
517 // ----------------------------------------------------------------------------
518 /**
519 * \param singular Message to translate in singular form
520 * \param plural Message to translate in plural form (can be the same as the singular form)
521 * \param num Count used to obtain the correct plural form.
522 * \param context Optional, can be set to differentiate 2 strings that are identical
523 * in English but could be different in other languages
524 */
w_ngettext(const wchar_t * singular,const wchar_t * plural,int num,const char * context)525 irr::core::stringw Translations::w_ngettext(const wchar_t* singular, const wchar_t* plural, int num, const char* context)
526 {
527 std::string in = StringUtils::wideToUtf8(singular);
528 std::string in2 = StringUtils::wideToUtf8(plural);
529 return w_ngettext(in.c_str(), in2.c_str(), num, context);
530 } // w_ngettext
531
532 // ----------------------------------------------------------------------------
533 /**
534 * \param singular Message to translate in singular form
535 * \param plural Message to translate in plural form (can be the same as the singular form)
536 * \param num Count used to obtain the correct plural form.
537 * \param context Optional, can be set to differentiate 2 strings that are identical
538 * in English but could be different in other languages
539 */
w_ngettext(const char * singular,const char * plural,int num,const char * context)540 irr::core::stringw Translations::w_ngettext(const char* singular, const char* plural, int num, const char* context)
541 {
542 #ifdef SERVER_ONLY
543 return L"";
544
545 #else
546
547 const std::string& res = (context == NULL ?
548 m_dictionary.translate_plural(singular, plural, num) :
549 m_dictionary.translate_ctxt_plural(context, singular, plural, num));
550
551 const irr::core::stringw wide = StringUtils::utf8ToWide(res);
552 const wchar_t* out_ptr = wide.c_str();
553 if (REMOVE_BOM) out_ptr++;
554
555 #if TRANSLATE_VERBOSE
556 std::wcout << L" translation : " << out_ptr << std::endl;
557 #endif
558
559 return wide;
560 #endif
561
562 } // w_ngettext
563
564 // ----------------------------------------------------------------------------
565 #ifndef SERVER_ONLY
getCurrentAllChar()566 std::set<wchar_t> Translations::getCurrentAllChar()
567 {
568 return m_dictionary.get_all_used_chars();
569 } // getCurrentAllChar
570
571 // ----------------------------------------------------------------------------
getCurrentLanguageName()572 std::string Translations::getCurrentLanguageName()
573 {
574 return m_current_language_name;
575 //return m_dictionary_manager.get_language().get_name();
576 } // getCurrentLanguageName
577
578 // ----------------------------------------------------------------------------
getCurrentLanguageNameCode()579 std::string Translations::getCurrentLanguageNameCode()
580 {
581 return m_current_language_name_code;
582 } // getCurrentLanguageNameCode
583
584 // ----------------------------------------------------------------------------
getLocalizedName(const std::string & str) const585 const std::string& Translations::getLocalizedName(const std::string& str) const
586 {
587 std::map<std::string, std::string>::const_iterator n = m_localized_name.find(str);
588 assert (n != m_localized_name.end());
589 return n->second;
590 } // getLocalizedName
591
592 // ----------------------------------------------------------------------------
593 /* Convert 2-letter country code to localized readable name.
594 */
getLocalizedCountryName(const std::string & country_code) const595 irr::core::stringw Translations::getLocalizedCountryName(const std::string& country_code) const
596 {
597 auto it = m_localized_country_codes.find(country_code);
598 // If unknown 2 letter country just return the same
599 if (it == m_localized_country_codes.end())
600 return StringUtils::utf8ToWide(country_code);
601 auto name_itr = it->second.find(m_current_language_tag);
602 if (name_itr != it->second.end())
603 return name_itr->second;
604 // If there should be invalid language tag, use en (which always exists)
605 name_itr = it->second.find("en");
606 if (name_itr != it->second.end())
607 return name_itr->second;
608 // Fallback
609 return StringUtils::utf8ToWide(country_code);
610 } // getLocalizedCountryName
611
612 // ----------------------------------------------------------------------------
613 /* Insert breakmark to thai sentence according to thai word dictionary, which
614 * adds a mark in the begining of a thai vocabulary
615 */
insertThaiBreakMark(const std::u32string & thai,std::vector<bool> & breakable)616 void Translations::insertThaiBreakMark(const std::u32string& thai,
617 std::vector<bool>& breakable)
618 {
619 if (thai.size() < 3)
620 return;
621 for (size_t i = 0; i < thai.size();)
622 {
623 char32_t t = thai[i];
624 if (i >= thai.size() - 2 || !isThaiCP(t))
625 {
626 i++;
627 continue;
628 }
629 auto ret = g_thai_dict.find(t);
630 if (ret == g_thai_dict.end())
631 {
632 i++;
633 continue;
634 }
635 size_t checked_word = 1;
636 const size_t max_checking_word = ret->second.second;
637 for (size_t j = i + 1;; j++)
638 {
639 if (j - i > max_checking_word || j > thai.size())
640 break;
641 const std::u32string& ss = thai.substr(i, j - i);
642 if (ret->second.first.find(ss) != ret->second.first.end())
643 {
644 if (ss.size() > checked_word)
645 checked_word = ss.size();
646 if (i != 0)
647 breakable[i - 1] = true;
648 }
649 }
650 i += checked_word;
651 }
652 } // insertThaiBreakMark
653
654 #endif
655