1 //
2 //  SuperTuxKart - a fun racing game with go-kart
3 //  Copyright (C) 2006,-2015 2007, 2008 Joerg Henrichs
4 //
5 //  This program is free software; you can redistribute it and/or
6 //  modify it under the terms of the GNU General Public License
7 //  as published by the Free Software Foundation; either version 3
8 //  of the License, or (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program; if not, write to the Free Software
17 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18 
19 
20 // Note: the irrlicht include is only here (and esp. before including
21 //       translation.hpp, which contradicts our style rule) to avoid the
22 //        warning message  "  'swprintf' : macro redefinition"
23 //       This happens if libintl.h is included before irrlicht.h (since
24 //       both files redefine swprintf).
25 
26 #include "utils/translation.hpp"
27 
28 #include <algorithm>
29 #include <cassert>
30 #include <cerrno>
31 #include <clocale>
32 #include <cstdio>
33 #include <cstdlib>
34 #include <cstring>
35 #include <cwchar>
36 #include <fstream>
37 #include <iostream>
38 #include <unordered_map>
39 #include <unordered_set>
40 
41 #include "config/user_config.hpp"
42 #include "io/file_manager.hpp"
43 #include "utils/constants.hpp"
44 #include "utils/file_utils.hpp"
45 #include "utils/log.hpp"
46 #include "utils/string_utils.hpp"
47 
48 #ifdef MOBILE_STK
49 #include "SDL_locale.h"
50 #endif
51 
52 // set to 1 to debug i18n
53 #define TRANSLATE_VERBOSE 0
54 // Define TEST_BIDI to force right-to-left style for all languages
55 //#define TEST_BIDI
56 
57 Translations* translations = NULL;
58 
59 #ifdef LINUX // m_debug
60 #define PACKAGE "supertuxkart"
61 #endif
62 
63 #ifndef SERVER_ONLY
64 std::map<std::string, std::string> Translations::m_localized_name;
65 std::map<std::string, std::map<std::string, irr::core::stringw> >
66     Translations::m_localized_country_codes;
67 // ============================================================================
68 std::unordered_map<char32_t,
69     std::pair<std::unordered_set<std::u32string>, size_t> > g_thai_dict;
70 // ============================================================================
isThaiCP(char32_t c)71 constexpr bool isThaiCP(char32_t c)
72 {
73     return c >= 0x0e00 && c <= 0x0e7f;
74 }   // isThaiCP
75 
76 // ============================================================================
77 
78 const bool REMOVE_BOM = false;
79 using namespace tinygettext;
80 /** The list of available languages; this is global so that it is cached (and remains
81     even if the translations object is deleted and re-created) */
82 typedef std::vector<std::string> LanguageList;
83 static LanguageList g_language_list;
84 
85 // ============================================================================
86 // Note : this method is not static because 'g_language_list' is initialized
87 //        the first time Translations is constructed (despite being a global)
getLanguageList() const88 const LanguageList* Translations::getLanguageList() const
89 {
90     return &g_language_list;
91 }
92 #endif
93 
94 // ----------------------------------------------------------------------------
Translations()95 Translations::Translations() //: m_dictionary_manager("UTF-16")
96 {
97 #ifndef SERVER_ONLY
98     m_dictionary_manager.add_directory(
99                         file_manager->getAsset(FileManager::TRANSLATION,""));
100 
101     if (g_language_list.size() == 0)
102     {
103         std::set<Language> languages = m_dictionary_manager.get_languages();
104 
105         // English is always there but may be not found on file system
106         g_language_list.push_back("en");
107 
108         for (const Language& language : languages)
109         {
110             if (language.str() == "en")
111                 continue;
112 
113             g_language_list.push_back(language.str());
114         }
115     }
116 
117     if (m_localized_name.empty())
118     {
119         const std::string file_name = file_manager->getAsset("localized_name.txt");
120         try
121         {
122             std::ifstream in(FileUtils::getPortableReadingPath(file_name));
123             if (!in.is_open())
124             {
125                 Log::error("translation", "error: failure opening: '%s'.",
126                     file_name.c_str());
127             }
128             else
129             {
130                 for (std::string line; std::getline(in, line, ';'); )
131                 {
132                     line = StringUtils::removeWhitespaces(line);
133 
134                     if (line.empty())
135                         continue;
136 
137                     std::size_t pos = line.find("=");
138 
139                     if (pos == std::string::npos)
140                         continue;
141 
142                     std::string name = line.substr(0, pos);
143                     std::string localized_name = line.substr(pos + 1);
144 
145                     if (name.empty() || localized_name.empty())
146                         continue;
147 
148                     if (localized_name == "0")
149                     {
150                         localized_name =
151                             tinygettext::Language::from_name(name).get_name();
152                     }
153                     m_localized_name[name] = localized_name;
154                 }
155             }
156         }
157         catch(std::exception& e)
158         {
159             Log::error("translation", "error: failure extract localized name.");
160             Log::error("translation", "%s", e.what());
161         }
162     }
163 
164     if (m_localized_country_codes.empty())
165     {
166         const std::string file_name = file_manager->getAsset("country_names.tsv");
167         try
168         {
169             std::ifstream in(FileUtils::getPortableReadingPath(file_name));
170             if (!in.is_open())
171             {
172                 Log::error("translation", "error: failure opening: '%s'.",
173                     file_name.c_str());
174             }
175             else
176             {
177                 std::vector<std::string> header;
178                 std::string line;
179                 while (!StringUtils::safeGetline(in, line).eof())
180                 {
181                     std::vector<std::string> lists = StringUtils::split(line, '\t');
182                     if (lists.size() < 2)
183                     {
184                         Log::error("translation", "Invaild list.");
185                         break;
186                     }
187                     if (lists[0] == "country_code")
188                     {
189                         header = lists;
190                         continue;
191                     }
192                     if (lists.size() != header.size())
193                     {
194                         Log::error("translation", "Different column size.");
195                         break;
196                     }
197                     if (m_localized_country_codes.find(lists[0]) ==
198                         m_localized_country_codes.end())
199                     {
200                         m_localized_country_codes[lists[0]] =
201                         std::map<std::string, irr::core::stringw>();
202                     }
203                     for (unsigned i = 1; i < lists.size(); i++)
204                     {
205                         auto& ret = m_localized_country_codes.at(lists[0]);
206                         ret[header[i]] = StringUtils::utf8ToWide(lists[i]);
207                     }
208                 }
209             }
210         }
211         catch (std::exception& e)
212         {
213             Log::error("translation", "error: failure extract localized country name.");
214             Log::error("translation", "%s", e.what());
215         }
216     }
217 
218     if (g_thai_dict.empty())
219     {
220         const std::string file_name = file_manager->getAsset("thaidict.txt");
221         try
222         {
223             std::ifstream in(FileUtils::getPortableReadingPath(file_name));
224             if (!in.is_open())
225             {
226                 Log::error("translation", "error: failure opening: '%s'.",
227                     file_name.c_str());
228             }
229             else
230             {
231                 std::string line;
232                 while (!StringUtils::safeGetline(in, line).eof())
233                 {
234                     const std::u32string& u32line = StringUtils::utf8ToUtf32(line);
235                     char32_t thai = u32line[0];
236                     if (u32line.empty() || !isThaiCP(thai))
237                         continue;
238                     if (g_thai_dict.find(thai) == g_thai_dict.end())
239                     {
240                         g_thai_dict[thai] =
241                             {
242                                 std::make_pair(
243                                     std::unordered_set<std::u32string>{u32line},
244                                     u32line.size())
245                             };
246                         continue;
247                     }
248                     auto& ret = g_thai_dict.at(thai);
249                     ret.first.insert(u32line);
250                     if (ret.second < u32line.size())
251                         ret.second = u32line.size();
252                 }
253             }
254         }
255         catch (std::exception& e)
256         {
257             Log::error("translation", "error: failure extract Thai dictionary.");
258             Log::error("translation", "%s", e.what());
259         }
260     }
261     // LC_ALL does not work, sscanf will then not always be able
262     // to scan for example: s=-1.1,-2.3,-3.3 correctly, which is
263     // used in driveline files.
264 #if defined(WIN32) && !defined(__CYGWIN__)
265     // Windows does not have LC_MESSAGES
266     setlocale(LC_CTYPE,    "");
267 #else
268     setlocale(LC_MESSAGES, "");
269 #endif
270 
271 
272     /*
273     bindtextdomain (PACKAGE, file_manager->getTranslationDir().c_str());
274 
275     if (sizeof(wchar_t) == 4)
276     {
277         if (IS_LITTLE_ENDIAN) bind_textdomain_codeset(PACKAGE, "UTF-32LE");
278         else                  bind_textdomain_codeset(PACKAGE, "UTF-32BE");
279     }
280     else if (sizeof(wchar_t) == 2)
281     {
282         bind_textdomain_codeset(PACKAGE, "UTF-16LE");
283     }
284     else
285     {
286         fprintf(stderr, "Your wchar_t is neither 2 byte-long nor 4. What now??\n");
287         exit(1);
288     }
289 
290     textdomain (PACKAGE);
291     */
292 
293     /*
294     const std::set<Language>& languages = m_dictionary_manager.get_languages();
295     Log::info("Translatings", "Number of languages: %d", languages.size());
296     for (std::set<Language>::const_iterator i = languages.begin();
297                                             i != languages.end(); ++i)
298     {
299         const Language& language = *i;
300         Log::info("Translatings", "Env:       %s", language.str());
301         Log::info("Translatings", "Name:      %s", language.get_name());
302         Log::info("Translatings", "Language:  %s", language.get_language());
303         Log::info("Translatings", "Country:   %s", language.get_country());
304         Log::info("Translatings", "Modifier:  %s", language.get_modifier());
305     }
306     */
307 
308     const char *p_language = getenv("LANGUAGE");
309 
310     std::string language;
311 
312     if(p_language)
313     {
314         language=p_language;
315     }
316     else
317     {
318         const char *p_lang = getenv("LANG");
319 
320         if(p_lang)
321             language = p_lang;
322         else
323         {
324 #ifdef MOBILE_STK
325             SDL_Locale* locale = SDL_GetPreferredLocales();
326             if (locale)
327             {
328                 // First locale only
329                 for (int l = 0; locale[l].language != NULL; l++)
330                 {
331                     language = locale[l].language;
332                     // Convert deprecated language code
333                     if (language == "iw")
334                         language = "he";
335                     else if (language == "in")
336                         language = "id";
337                     else if (language == "ji")
338                         language = "yi";
339                     if (locale[l].country != NULL)
340                     {
341                         language += "-";
342                         language += locale[l].country;
343                     }
344                     // iOS specific
345                     if (language.find("zh-Hans") != std::string::npos)
346                         language = "zh_CN";
347                     else if (language.find("zh-Hant") != std::string::npos)
348                         language = "zh_TW";
349                     language = StringUtils::findAndReplace(language, "-", "_");
350                     break;
351                 }
352                 SDL_free(locale);
353             }
354 #elif defined(WIN32)
355             // Thanks to the frogatto developer for this code snippet:
356             char c[1024];
357             GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME,
358                            c, 1024);
359             Log::verbose("translation", "GetLocaleInfo langname returns '%s'.",
360                          c);
361             if(c[0])
362             {
363                 language = c;
364                 GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME,
365                                c, 1024);
366                 Log::verbose("translation",
367                              "GetLocaleInfo tryname returns '%s'.", c);
368                 if(c[0]) language += std::string("_")+c;
369             }   // if c[0]
370 #endif
371         }   // neither LANGUAGE nor LANG defined
372 
373     }
374 
375     if (language != "")
376     {
377         auto ignore_country = [](const std::string& test_full_form)
378         {
379             // Use a country to test if the test_full_form is supported by
380             // localized name
381             auto it = m_localized_country_codes.find("HK");
382             if (it != m_localized_country_codes.end())
383                 return it->second.find(test_full_form) == it->second.end();
384             return true;
385         };
386 
387         Log::verbose("translation", "Env var LANGUAGE = '%s'.",
388                      language.c_str());
389 
390         // Hong Kong use tranditional chinese, not zh_CN which C > T
391         language = StringUtils::findAndReplace(language, "zh_HK", "zh_TW");
392 
393         if (language.find(":") != std::string::npos)
394         {
395             std::vector<std::string> langs = StringUtils::split(language, ':');
396             Language l;
397 
398             for (unsigned int curr=0; curr<langs.size(); curr++)
399             {
400                 l = Language::from_env(langs[curr]);
401                 if (l)
402                 {
403                     Log::verbose("translation", "Language '%s'.",
404                                  l.get_name().c_str());
405                     m_dictionary = m_dictionary_manager.get_dictionary(l);
406                     break;
407                 }
408             }
409 
410             m_current_language_name = l.get_name();
411             m_current_language_name_code = l.get_language();
412             m_current_language_tag = m_current_language_name_code;
413             if (!l.get_country().empty() && !ignore_country(
414                 m_current_language_name_code + "-" + l.get_country()))
415             {
416                 m_current_language_tag += "-";
417                 m_current_language_tag += l.get_country();
418             }
419             if (!l)
420             {
421                 m_dictionary = m_dictionary_manager.get_dictionary();
422             }
423         }
424         else
425         {
426             const Language& tgtLang = Language::from_env(language);
427             if (!tgtLang)
428             {
429                 Log::warn("Translation", "Unsupported language '%s'", language.c_str());
430                 UserConfigParams::m_language = "system";
431                 m_current_language_name = "Default language";
432                 m_current_language_name_code = "en";
433                 m_current_language_tag = "en";
434                 m_dictionary = m_dictionary_manager.get_dictionary();
435             }
436             else
437             {
438                 m_current_language_name = tgtLang.get_name();
439                 m_current_language_name_code = tgtLang.get_language();
440                 m_current_language_tag = m_current_language_name_code;
441                 if (!tgtLang.get_country().empty() && !ignore_country(
442                     m_current_language_name_code + "-" + tgtLang.get_country()))
443                 {
444                     m_current_language_tag += "-";
445                     m_current_language_tag += tgtLang.get_country();
446                 }
447                 Log::verbose("translation", "Language '%s'.", m_current_language_name.c_str());
448                 m_dictionary = m_dictionary_manager.get_dictionary(tgtLang);
449             }
450         }
451     }
452     else
453     {
454         m_current_language_name = "Default language";
455         m_current_language_name_code = "en";
456         m_current_language_tag = m_current_language_name_code;
457         m_dictionary = m_dictionary_manager.get_dictionary();
458     }
459 
460 #endif
461 }   // Translations
462 
463 // ----------------------------------------------------------------------------
~Translations()464 Translations::~Translations()
465 {
466 }   // ~Translations
467 
468 // ----------------------------------------------------------------------------
469 /**
470  * \param original Message to translate
471  * \param context  Optional, can be set to differentiate 2 strings that are identical
472  *                 in English but could be different in other languages
473  */
w_gettext(const wchar_t * original,const char * context)474 irr::core::stringw Translations::w_gettext(const wchar_t* original, const char* context)
475 {
476     std::string in = StringUtils::wideToUtf8(original);
477     return w_gettext(in.c_str(), context);
478 }   // w_gettext
479 
480 // ----------------------------------------------------------------------------
481 /**
482  * \param original Message to translate
483  * \param context  Optional, can be set to differentiate 2 strings that are identical
484  *                 in English but could be different in other languages
485  */
w_gettext(const char * original,const char * context)486 irr::core::stringw Translations::w_gettext(const char* original, const char* context)
487 {
488 
489 #ifdef SERVER_ONLY
490     return L"";
491 #else
492 
493     if (original[0] == '\0') return L"";
494 
495 #if TRANSLATE_VERBOSE
496     Log::info("Translations", "Translating %s", original);
497 #endif
498 
499     const std::string& original_t = (context == NULL ?
500                                      m_dictionary.translate(original) :
501                                      m_dictionary.translate_ctxt(context, original));
502     // print
503     //for (int n=0;; n+=4)
504     const irr::core::stringw wide = StringUtils::utf8ToWide(original_t);
505     const wchar_t* out_ptr = wide.c_str();
506     if (REMOVE_BOM) out_ptr++;
507 
508 #if TRANSLATE_VERBOSE
509     std::wcout << L"  translation : " << out_ptr << std::endl;
510 #endif
511 
512     return wide;
513 #endif
514 
515 }   // w_gettext
516 
517 // ----------------------------------------------------------------------------
518 /**
519  * \param singular Message to translate in singular form
520  * \param plural   Message to translate in plural form (can be the same as the singular form)
521  * \param num      Count used to obtain the correct plural form.
522  * \param context  Optional, can be set to differentiate 2 strings that are identical
523  *                 in English but could be different in other languages
524  */
w_ngettext(const wchar_t * singular,const wchar_t * plural,int num,const char * context)525 irr::core::stringw Translations::w_ngettext(const wchar_t* singular, const wchar_t* plural, int num, const char* context)
526 {
527     std::string in = StringUtils::wideToUtf8(singular);
528     std::string in2 = StringUtils::wideToUtf8(plural);
529     return w_ngettext(in.c_str(), in2.c_str(), num, context);
530 }   // w_ngettext
531 
532 // ----------------------------------------------------------------------------
533 /**
534  * \param singular Message to translate in singular form
535  * \param plural   Message to translate in plural form (can be the same as the singular form)
536  * \param num      Count used to obtain the correct plural form.
537  * \param context  Optional, can be set to differentiate 2 strings that are identical
538  *                 in English but could be different in other languages
539  */
w_ngettext(const char * singular,const char * plural,int num,const char * context)540 irr::core::stringw Translations::w_ngettext(const char* singular, const char* plural, int num, const char* context)
541 {
542 #ifdef SERVER_ONLY
543     return L"";
544 
545 #else
546 
547     const std::string& res = (context == NULL ?
548                               m_dictionary.translate_plural(singular, plural, num) :
549                               m_dictionary.translate_ctxt_plural(context, singular, plural, num));
550 
551     const irr::core::stringw wide = StringUtils::utf8ToWide(res);
552     const wchar_t* out_ptr = wide.c_str();
553     if (REMOVE_BOM) out_ptr++;
554 
555 #if TRANSLATE_VERBOSE
556     std::wcout << L"  translation : " << out_ptr << std::endl;
557 #endif
558 
559     return wide;
560 #endif
561 
562 }   // w_ngettext
563 
564 // ----------------------------------------------------------------------------
565 #ifndef SERVER_ONLY
getCurrentAllChar()566 std::set<wchar_t> Translations::getCurrentAllChar()
567 {
568     return m_dictionary.get_all_used_chars();
569 }   // getCurrentAllChar
570 
571 // ----------------------------------------------------------------------------
getCurrentLanguageName()572 std::string Translations::getCurrentLanguageName()
573 {
574     return m_current_language_name;
575     //return m_dictionary_manager.get_language().get_name();
576 }   // getCurrentLanguageName
577 
578 // ----------------------------------------------------------------------------
getCurrentLanguageNameCode()579 std::string Translations::getCurrentLanguageNameCode()
580 {
581     return m_current_language_name_code;
582 }   // getCurrentLanguageNameCode
583 
584 // ----------------------------------------------------------------------------
getLocalizedName(const std::string & str) const585 const std::string& Translations::getLocalizedName(const std::string& str) const
586 {
587     std::map<std::string, std::string>::const_iterator n = m_localized_name.find(str);
588     assert (n != m_localized_name.end());
589     return n->second;
590 }   // getLocalizedName
591 
592 // ----------------------------------------------------------------------------
593 /* Convert 2-letter country code to localized readable name.
594  */
getLocalizedCountryName(const std::string & country_code) const595 irr::core::stringw Translations::getLocalizedCountryName(const std::string& country_code) const
596 {
597     auto it = m_localized_country_codes.find(country_code);
598     // If unknown 2 letter country just return the same
599     if (it == m_localized_country_codes.end())
600         return StringUtils::utf8ToWide(country_code);
601     auto name_itr = it->second.find(m_current_language_tag);
602     if (name_itr != it->second.end())
603         return name_itr->second;
604     // If there should be invalid language tag, use en (which always exists)
605     name_itr = it->second.find("en");
606     if (name_itr != it->second.end())
607         return name_itr->second;
608     // Fallback
609     return StringUtils::utf8ToWide(country_code);
610 }   // getLocalizedCountryName
611 
612 // ----------------------------------------------------------------------------
613 /* Insert breakmark to thai sentence according to thai word dictionary, which
614  * adds a mark in the begining of a thai vocabulary
615  */
insertThaiBreakMark(const std::u32string & thai,std::vector<bool> & breakable)616 void Translations::insertThaiBreakMark(const std::u32string& thai,
617                                        std::vector<bool>& breakable)
618 {
619     if (thai.size() < 3)
620         return;
621     for (size_t i = 0; i < thai.size();)
622     {
623         char32_t t = thai[i];
624         if (i >= thai.size() - 2 || !isThaiCP(t))
625         {
626             i++;
627             continue;
628         }
629         auto ret = g_thai_dict.find(t);
630         if (ret == g_thai_dict.end())
631         {
632             i++;
633             continue;
634         }
635         size_t checked_word = 1;
636         const size_t max_checking_word = ret->second.second;
637         for (size_t j = i + 1;; j++)
638         {
639             if (j - i > max_checking_word || j > thai.size())
640                 break;
641             const std::u32string& ss = thai.substr(i, j - i);
642             if (ret->second.first.find(ss) != ret->second.first.end())
643             {
644                 if (ss.size() > checked_word)
645                     checked_word = ss.size();
646                 if (i != 0)
647                     breakable[i - 1] = true;
648             }
649         }
650         i += checked_word;
651     }
652 }   // insertThaiBreakMark
653 
654 #endif
655