1 /*
2 * Copyright 2010-2014 OpenXcom Developers.
3 *
4 * This file is part of OpenXcom.
5 *
6 * OpenXcom is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * OpenXcom is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with OpenXcom. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "Language.h"
20 #include <algorithm>
21 #include <assert.h>
22 #include <locale>
23 #include <fstream>
24 #include <cassert>
25 #include "CrossPlatform.h"
26 #include "Logger.h"
27 #include "Exception.h"
28 #include "Options.h"
29 #include "LanguagePlurality.h"
30 #include "../Ruleset/ExtraStrings.h"
31 #include "../Interface/TextList.h"
32 #ifdef _WIN32
33 #ifndef NOMINMAX
34 #define NOMINMAX
35 #endif
36 #define WIN32_LEAN_AND_MEAN
37 #include <windows.h>
38 #endif
39
40 namespace OpenXcom
41 {
42
43 std::map<std::string, std::wstring> Language::_names;
44 std::vector<std::string> Language::_rtl, Language::_cjk;
45
46 /**
47 * Initializes an empty language file.
48 */
Language()49 Language::Language() : _id(""), _strings(), _handler(0), _direction(DIRECTION_LTR), _wrap(WRAP_WORDS)
50 {
51 // maps don't have initializers :(
52 if (_names.empty())
53 {
54 _names["en-US"] = utf8ToWstr("English (US)");
55 _names["en-GB"] = utf8ToWstr("English (UK)");
56 _names["bg-BG"] = utf8ToWstr("Български");
57 _names["cs-CZ"] = utf8ToWstr("Česky");
58 _names["da"] = utf8ToWstr("Dansk");
59 _names["de"] = utf8ToWstr("Deutsch");
60 _names["es"] = utf8ToWstr("Español (ES)");
61 _names["es-419"] = utf8ToWstr("Español (AL)");
62 _names["fr"] = utf8ToWstr("Français");
63 _names["fi"] = utf8ToWstr("Suomi");
64 _names["grk"] = utf8ToWstr("Ελληνικά");
65 _names["hu-HU"] = utf8ToWstr("Magyar");
66 _names["it"] = utf8ToWstr("Italiano");
67 _names["ja-JP"] = utf8ToWstr("日本語");
68 _names["ko"] = utf8ToWstr("한국어");
69 _names["nl"] = utf8ToWstr("Nederlands");
70 _names["no"] = utf8ToWstr("Norsk");
71 _names["pl-PL"] = utf8ToWstr("Polski");
72 _names["pt-BR"] = utf8ToWstr("Português (BR)");
73 _names["pt-PT"] = utf8ToWstr("Português (PT)");
74 _names["ro"] = utf8ToWstr("Română");
75 _names["ru"] = utf8ToWstr("Русский");
76 _names["sk-SK"] = utf8ToWstr("Slovenčina");
77 _names["sv"] = utf8ToWstr("Svenska");
78 _names["tr-TR"] = utf8ToWstr("Türkçe");
79 _names["uk"] = utf8ToWstr("Українська");
80 _names["zh-CN"] = utf8ToWstr("中文");
81 _names["zh-TW"] = utf8ToWstr("文言");
82 }
83 if (_rtl.empty())
84 {
85 _rtl.push_back("he");
86 }
87 if (_cjk.empty())
88 {
89 _cjk.push_back("ja-JP");
90 //_cjk.push_back("ko"); has spacing between words
91 _cjk.push_back("zh-CN");
92 _cjk.push_back("zh-TW");
93 }
94 }
95
96 /**
97 *
98 */
~Language()99 Language::~Language()
100 {
101 delete _handler;
102 }
103
104 /**
105 * Takes a wide-character string and converts it
106 * to a 8-bit string encoded in UTF-8.
107 * @note Adapted from http://stackoverflow.com/questions/148403/utf8-to-from-wide-char-conversion-in-stl
108 * @param src Wide-character string.
109 * @return UTF-8 string.
110 */
wstrToUtf8(const std::wstring & src)111 std::string Language::wstrToUtf8(const std::wstring& src)
112 {
113 if (src.empty())
114 return "";
115 #ifdef _WIN32
116 int size = WideCharToMultiByte(CP_UTF8, 0, &src[0], (int)src.size(), NULL, 0, NULL, NULL);
117 std::string str(size, 0);
118 WideCharToMultiByte(CP_UTF8, 0, &src[0], (int)src.size(), &str[0], size, NULL, NULL);
119 return str;
120 #else
121 std::string out;
122 unsigned int codepoint = 0;
123 for (std::wstring::const_iterator i = src.begin(); i != src.end(); ++i)
124 {
125 wchar_t ch = *i;
126 if (ch >= 0xd800 && ch <= 0xdbff)
127 codepoint = ((ch - 0xd800) << 10) + 0x10000;
128 else
129 {
130 if (ch >= 0xdc00 && ch <= 0xdfff)
131 codepoint |= ch - 0xdc00;
132 else
133 codepoint = ch;
134
135 if (codepoint <= 0x7f)
136 out.append(1, static_cast<char>(codepoint));
137 else if (codepoint <= 0x7ff)
138 {
139 out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
140 out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
141 }
142 else if (codepoint <= 0xffff)
143 {
144 out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
145 out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
146 out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
147 }
148 else
149 {
150 out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
151 out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
152 out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
153 out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
154 }
155 codepoint = 0;
156 }
157 }
158 return out;
159 #endif
160 }
161
162 /**
163 * Takes a wide-character string and converts it to an
164 * 8-bit string encoded in the current system codepage.
165 * @param src Wide-character string.
166 * @return Codepage string.
167 */
wstrToCp(const std::wstring & src)168 std::string Language::wstrToCp(const std::wstring& src)
169 {
170 if (src.empty())
171 return "";
172 #ifdef _WIN32
173 int size = WideCharToMultiByte(CP_ACP, 0, &src[0], (int)src.size(), NULL, 0, NULL, NULL);
174 std::string str(size, 0);
175 WideCharToMultiByte(CP_ACP, 0, &src[0], (int)src.size(), &str[0], size, NULL, NULL);
176 return str;
177 #else
178 const int MAX = 500;
179 char buffer[MAX];
180 setlocale(LC_ALL, "");
181 wcstombs(buffer, src.c_str(), MAX);
182 setlocale(LC_ALL, "C");
183 std::string str(buffer);
184 return str;
185 #endif
186 }
187
188 /**
189 * Takes a wide-character string and converts it to an
190 * 8-bit string with the filesystem encoding.
191 * @param src Wide-character string.
192 * @return Filesystem string.
193 */
wstrToFs(const std::wstring & src)194 std::string Language::wstrToFs(const std::wstring& src)
195 {
196 #ifdef _WIN32
197 return Language::wstrToCp(src);
198 #else
199 return Language::wstrToUtf8(src);
200 #endif
201 }
202
203 /**
204 * Takes an 8-bit string encoded in UTF-8 and converts it
205 * to a wide-character string.
206 * @note Adapted from http://stackoverflow.com/questions/148403/utf8-to-from-wide-char-conversion-in-stl
207 * @param src UTF-8 string.
208 * @return Wide-character string.
209 */
utf8ToWstr(const std::string & src)210 std::wstring Language::utf8ToWstr(const std::string& src)
211 {
212 if (src.empty())
213 return L"";
214 #ifdef _WIN32
215 int size = MultiByteToWideChar(CP_UTF8, 0, &src[0], (int)src.size(), NULL, 0);
216 std::wstring wstr(size, 0);
217 MultiByteToWideChar(CP_UTF8, 0, &src[0], (int)src.size(), &wstr[0], size);
218 return wstr;
219 #else
220 std::wstring out;
221 unsigned int codepoint = 0;
222 int following = 0;
223 for (std::string::const_iterator i = src.begin(); i != src.end(); ++i)
224 {
225 unsigned char ch = *i;
226 if (ch <= 0x7f)
227 {
228 codepoint = ch;
229 following = 0;
230 }
231 else if (ch <= 0xbf)
232 {
233 if (following > 0)
234 {
235 codepoint = (codepoint << 6) | (ch & 0x3f);
236 --following;
237 }
238 }
239 else if (ch <= 0xdf)
240 {
241 codepoint = ch & 0x1f;
242 following = 1;
243 }
244 else if (ch <= 0xef)
245 {
246 codepoint = ch & 0x0f;
247 following = 2;
248 }
249 else
250 {
251 codepoint = ch & 0x07;
252 following = 3;
253 }
254 if (following == 0)
255 {
256 if (codepoint > 0xffff)
257 {
258 out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
259 out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
260 }
261 else
262 out.append(1, static_cast<wchar_t>(codepoint));
263 codepoint = 0;
264 }
265 }
266 return out;
267 #endif
268 }
269
270 /**
271 * Takes an 8-bit string encoded in the current system codepage
272 * and converts it to a wide-character string.
273 * @param src Codepage string.
274 * @return Wide-character string.
275 */
cpToWstr(const std::string & src)276 std::wstring Language::cpToWstr(const std::string& src)
277 {
278 if (src.empty())
279 return L"";
280 #ifdef _WIN32
281 int size = MultiByteToWideChar(CP_ACP, 0, &src[0], (int)src.size(), NULL, 0);
282 std::wstring wstr(size, 0);
283 MultiByteToWideChar(CP_ACP, 0, &src[0], (int)src.size(), &wstr[0], size);
284 return wstr;
285 #else
286 const int MAX = 500;
287 wchar_t buffer[MAX + 1];
288 setlocale(LC_ALL, "");
289 size_t len = mbstowcs(buffer, src.c_str(), MAX);
290 setlocale(LC_ALL, "C");
291 if (len == (size_t)-1)
292 return L"?";
293 return std::wstring(buffer, len);
294 #endif
295 }
296
297 /**
298 * Takes an 8-bit string with the filesystem encoding
299 * and converts it to a wide-character string.
300 * @param src Filesystem string.
301 * @return Wide-character string.
302 */
fsToWstr(const std::string & src)303 std::wstring Language::fsToWstr(const std::string& src)
304 {
305 #ifdef _WIN32
306 return Language::cpToWstr(src);
307 #else
308 return Language::utf8ToWstr(src);
309 #endif
310 }
311
312 /**
313 * Replaces every instance of a substring.
314 * @param str The string to modify.
315 * @param find The substring to find.
316 * @param replace The substring to replace it with.
317 */
replace(std::string & str,const std::string & find,const std::string & replace)318 void Language::replace(std::string &str, const std::string &find, const std::string &replace)
319 {
320 for (size_t i = str.find(find); i != std::string::npos; i = str.find(find, i + replace.length()))
321 {
322 str.replace(i, find.length(), replace);
323 }
324 }
325
326 /**
327 * Replaces every instance of a substring.
328 * @param str The string to modify.
329 * @param find The substring to find.
330 * @param replace The substring to replace it with.
331 */
replace(std::wstring & str,const std::wstring & find,const std::wstring & replace)332 void Language::replace(std::wstring &str, const std::wstring &find, const std::wstring &replace)
333 {
334 for (size_t i = str.find(find); i != std::wstring::npos; i = str.find(find, i + replace.length()))
335 {
336 str.replace(i, find.length(), replace);
337 }
338 }
339
340 /**
341 * Gets all the languages found in the
342 * Data folder and returns their properties.
343 * @param files List of language filenames.
344 * @param names List of language human-readable names.
345 */
getList(std::vector<std::string> & files,std::vector<std::wstring> & names)346 void Language::getList(std::vector<std::string> &files, std::vector<std::wstring> &names)
347 {
348 files = CrossPlatform::getFolderContents(CrossPlatform::getDataFolder("Language/"), "yml");
349 names.clear();
350
351 for (std::vector<std::string>::iterator i = files.begin(); i != files.end(); ++i)
352 {
353 *i = CrossPlatform::noExt(*i);
354 std::wstring name;
355 std::map<std::string, std::wstring>::iterator lang = _names.find(*i);
356 if (lang != _names.end())
357 {
358 name = lang->second;
359 }
360 else
361 {
362 name = Language::fsToWstr(*i);
363 }
364 names.push_back(name);
365 }
366 }
367
368 /**
369 * Loads a language file in Ruby-on-Rails YAML format.
370 * Not that this has anything to do with Ruby, but since it's a
371 * widely-supported format and we already have YAML, it was convenient.
372 * @param filename Filename of the YAML file.
373 * @param extras Pointer to extra strings from ruleset.
374 */
load(const std::string & filename,ExtraStrings * extras)375 void Language::load(const std::string &filename, ExtraStrings *extras)
376 {
377 _strings.clear();
378
379 YAML::Node doc = YAML::LoadFile(filename);
380 _id = doc.begin()->first.as<std::string>();
381 YAML::Node lang = doc.begin()->second;
382 for (YAML::const_iterator i = lang.begin(); i != lang.end(); ++i)
383 {
384 // Regular strings
385 if (i->second.IsScalar())
386 {
387 _strings[i->first.as<std::string>()] = loadString(i->second.as<std::string>());
388 }
389 // Strings with plurality
390 else if (i->second.IsMap())
391 {
392 for (YAML::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
393 {
394 std::string s = i->first.as<std::string>() + "_" + j->first.as<std::string>();
395 _strings[s] = loadString(j->second.as<std::string>());
396 }
397 }
398 }
399 if (extras)
400 {
401 for (std::map<std::string, std::string>::const_iterator i = extras->getStrings()->begin(); i != extras->getStrings()->end(); ++i)
402 {
403 _strings[i->first] = loadString(i->second);
404 }
405 }
406 delete _handler;
407 _handler = LanguagePlurality::create(_id);
408 if (std::find(_rtl.begin(), _rtl.end(), _id) == _rtl.end())
409 {
410 _direction = DIRECTION_LTR;
411 }
412 else
413 {
414 _direction = DIRECTION_RTL;
415 }
416 if (std::find(_cjk.begin(), _cjk.end(), _id) == _cjk.end())
417 {
418 _wrap = WRAP_WORDS;
419 }
420 else
421 {
422 _wrap = WRAP_LETTERS;
423 }
424 }
425
426 /**
427 * Replaces all special string markers with the approriate characters
428 * and converts the string encoding.
429 * @param string Original UTF-8 string.
430 * @return New widechar string.
431 */
loadString(const std::string & string) const432 std::wstring Language::loadString(const std::string &string) const
433 {
434 std::string s = string;
435 replace(s, "{NEWLINE}", "\n");
436 replace(s, "{SMALLLINE}", "\x02");
437 replace(s, "{ALT}", "\x01");
438 return utf8ToWstr(s);
439 }
440
441 /**
442 * Returns the language's locale.
443 * @return IANA language tag.
444 */
getId() const445 std::string Language::getId() const
446 {
447 return _id;
448 }
449
450 /**
451 * Returns the language's name in its native language.
452 * @return Language name.
453 */
getName() const454 std::wstring Language::getName() const
455 {
456 return _names[_id];
457 }
458
459 /**
460 * Returns the localized text with the specified ID.
461 * If it's not found, just returns the ID.
462 * @param id ID of the string.
463 * @return String with the requested ID.
464 */
getString(const std::string & id) const465 const LocalizedText &Language::getString(const std::string &id) const
466 {
467 static LocalizedText hack(L"");
468 if (id.empty())
469 return hack;
470 std::map<std::string, LocalizedText>::const_iterator s = _strings.find(id);
471 if (s == _strings.end())
472 {
473 Log(LOG_WARNING) << id << " not found in " << Options::language;
474 hack = LocalizedText(utf8ToWstr(id));
475 return hack;
476 }
477 else
478 {
479 return s->second;
480 }
481 }
482
483 /**
484 * Returns the localized text with the specified ID, in the proper form for @a n.
485 * The substitution of @a n has already happened in the returned LocalizedText.
486 * If it's not found, just returns the ID.
487 * @param id ID of the string.
488 * @param n Number to use to decide the proper form.
489 * @return String with the requested ID.
490 */
getString(const std::string & id,unsigned n) const491 LocalizedText Language::getString(const std::string &id, unsigned n) const
492 {
493 assert(!id.empty());
494 std::map<std::string, LocalizedText>::const_iterator s = _strings.end();
495 if (0 == n)
496 {
497 // Try specialized form.
498 s = _strings.find(id + "_zero");
499 }
500 if (s == _strings.end())
501 {
502 // Try proper form by language
503 s = _strings.find(id + _handler->getSuffix(n));
504 }
505 if (s == _strings.end())
506 {
507 Log(LOG_WARNING) << id << " not found in " << Options::language;
508 return LocalizedText(utf8ToWstr(id));
509 }
510 std::wostringstream ss;
511 ss << n;
512 std::wstring marker(L"{N}"), val(ss.str()), txt(s->second);
513 replace(txt, marker, val);
514 return txt;
515 }
516
517 /**
518 * Returns the localized text with the specified ID, in the proper form for the gender.
519 * If it's not found, just returns the ID.
520 * @param id ID of the string.
521 * @param gender Current soldier gender.
522 * @return String with the requested ID.
523 */
getString(const std::string & id,SoldierGender gender) const524 const LocalizedText &Language::getString(const std::string &id, SoldierGender gender) const
525 {
526 std::string genderId;
527 if (gender == GENDER_MALE)
528 {
529 genderId = id + "_MALE";
530 }
531 else
532 {
533 genderId = id + "_FEMALE";
534 }
535 return getString(genderId);
536 }
537
538 /**
539 * Outputs all the language IDs and strings
540 * to an HTML table.
541 * @param filename HTML file.
542 */
toHtml(const std::string & filename) const543 void Language::toHtml(const std::string &filename) const
544 {
545 std::ofstream htmlFile (filename.c_str(), std::ios::out);
546 htmlFile << "<table border=\"1\" width=\"100%\">" << std::endl;
547 htmlFile << "<tr><th>ID String</th><th>English String</th></tr>" << std::endl;
548 for (std::map<std::string, LocalizedText>::const_iterator i = _strings.begin(); i != _strings.end(); ++i)
549 {
550 htmlFile << "<tr><td>" << i->first << "</td><td>";
551 std::string s = wstrToUtf8(i->second);
552 for (std::string::const_iterator j = s.begin(); j != s.end(); ++j)
553 {
554 if (*j == 2 || *j == '\n')
555 {
556 htmlFile << "<br />";
557 }
558 else
559 {
560 htmlFile << *j;
561 }
562 }
563 htmlFile << "</td></tr>" << std::endl;
564 }
565 htmlFile << "</table>" << std::endl;
566 htmlFile.close();
567 }
568
569 /**
570 * Returns the direction to use for rendering
571 * text in this language.
572 * @return Text direction.
573 */
getTextDirection() const574 TextDirection Language::getTextDirection() const
575 {
576 return _direction;
577 }
578
579 /**
580 * Returns the wrapping rules to use for rendering
581 * text in this language.
582 * @return Text wrapping.
583 */
getTextWrapping() const584 TextWrapping Language::getTextWrapping() const
585 {
586 return _wrap;
587 }
588
589 }
590
591 /** @page LanguageFiles Format of the language files.
592
593 Language files are formatted as YAML (.yml) containing UTF-8 (no BOM) text.
594 The first line in a language file is the language's identifier.
595 The rest of the file are key-value pairs. The key of each pair
596 contains the ID string (dictionary key), and the value contains the localized
597 text for the given key in quotes.
598
599 The localized text may contain the following special markers:
600 <table>
601 <tr>
602 <td><tt>{</tt><i>0, 1, 2, ...</i> <tt>}</tt></td>
603 <td>These markers will be replaced by programmer-supplied values before the
604 message is displayed.</td></tr>
605 <tr>
606 <td><tt>{ALT}</tt></td>
607 <td>The rest of the text will be in an alternate color. Using this again will
608 switch back to the primary color.</td></tr>
609 <tr>
610 <td><tt>{NEWLINE}</tt></td>
611 <td>It will be replaced with a line break in the game.</td></tr>
612 <tr>
613 <td><tt>{SMALLLINE}</tt></td>
614 <td>The rest of the text will be in a small font.</td></tr>
615 </table>
616
617 There is an additional marker sequence, that should only appear in texts that
618 depend on a number. This marker <tt>{N}</tt> will be replaced by the actual
619 number used. The keys for texts that depend on numbers also have special
620 suffixes, that depend on the language. For all languages, a suffix of
621 <tt>_zero</tt> is tried if the number is zero, before trying the actual key
622 according to the language rules. The rest of the suffixes depend on the language,
623 as described <a href="http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html">here</a>.
624
625 So, you would write (for English):
626 <pre>
627 STR_ENEMIES:
628 zero: "There are no enemies left."
629 one: "There is a single enemy left."
630 other: "There are {N} enemies left."
631 </pre>
632
633 */
634