1 // $Id: tinygettext.cpp,v 1.4 2004/11/25 13:15:56 matzebraun Exp $
2 //
3 // TinyGetText - A small flexible gettext() replacement
4 // Copyright (C) 2004 Ingo Ruhnke <grumbel@gmx.de>
5 //
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
20 #include <config.h>
21
22 #include <sys/types.h>
23 #include <fstream>
24 #include <iostream>
25 #include <algorithm>
26 #include <ctype.h>
27 #include <errno.h>
28
29 #include "SDL.h"
30
31 #include "TinyGetText.hpp"
32 #include "PhysfsStream/PhysfsStream.hpp"
33 #include "findlocale.hpp"
34 #undef getchar
35
36 //#define TRANSLATION_DEBUG
37
38 namespace TinyGetText {
39
40 /** Convert \a which is in \a from_charset to \a to_charset and return it */
convert(const std::string & text,const std::string & from_charset,const std::string & to_charset)41 std::string convert(const std::string& text,
42 const std::string& from_charset,
43 const std::string& to_charset)
44 {
45 if (from_charset == to_charset)
46 return text;
47
48 char *in = new char[text.length() + 1];
49 strcpy(in, text.c_str());
50 char *out = SDL_iconv_string(to_charset.c_str(), from_charset.c_str(), in, text.length() + 1);
51 delete[] in;
52 if(out == 0)
53 {
54 std::cerr << "Error: conversion from " << from_charset << " to " << to_charset << " failed" << std::endl;
55 return "";
56 }
57 std::string ret(out);
58 SDL_free(out);
59 return ret;
60 #if 0
61 iconv_t cd = SDL_iconv_open(to_charset.c_str(), from_charset.c_str());
62
63 size_t in_len = text.length();
64 size_t out_len = text.length()*3; // FIXME: cross fingers that this is enough
65
66 char* out_orig = new char[out_len];
67 char* in_orig = new char[in_len+1];
68 strcpy(in_orig, text.c_str());
69
70 char* out = out_orig;
71 ICONV_CONST char* in = in_orig;
72 size_t out_len_temp = out_len; // iconv is counting down the bytes it has
73 // written from this...
74
75 size_t retval = SDL_iconv(cd, &in, &in_len, &out, &out_len_temp);
76 out_len -= out_len_temp; // see above
77 if (retval == (size_t) -1)
78 {
79 std::cerr << strerror(errno) << std::endl;
80 std::cerr << "Error: conversion from " << from_charset
81 << " to " << to_charset << " went wrong: " << retval << std::endl;
82 return "";
83 }
84 SDL_iconv_close(cd);
85
86 std::string ret(out_orig, out_len);
87 delete[] out_orig;
88 delete[] in_orig;
89 return ret;
90 #endif
91 }
92
has_suffix(const std::string & lhs,const std::string rhs)93 bool has_suffix(const std::string& lhs, const std::string rhs)
94 {
95 if (lhs.length() < rhs.length())
96 return false;
97 else
98 return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
99 }
100
has_prefix(const std::string & lhs,const std::string rhs)101 bool has_prefix(const std::string& lhs, const std::string rhs)
102 {
103 if (lhs.length() < rhs.length())
104 return false;
105 else
106 return lhs.compare(0, rhs.length(), rhs) == 0;
107 }
108
plural1(int)109 int plural1(int ) { return 0; }
plural2_1(int n)110 int plural2_1(int n) { return (n != 1); }
plural2_2(int n)111 int plural2_2(int n) { return (n > 1); }
plural3_lv(int n)112 int plural3_lv(int n) { return (n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
plural3_ga(int n)113 int plural3_ga(int n) { return n==1 ? 0 : n==2 ? 1 : 2; }
plural3_lt(int n)114 int plural3_lt(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
plural3_1(int n)115 int plural3_1(int n) { return (n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
plural3_sk(int n)116 int plural3_sk(int n) { return (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2; }
plural3_pl(int n)117 int plural3_pl(int n) { return (n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
plural3_sl(int n)118 int plural3_sl(int n) { return (n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
119
120 /** Language Definitions */
121 //*{
122 LanguageDef lang_hu("hu", "Hungarian", 1, plural1); // "nplurals=1; plural=0;"
123 LanguageDef lang_ja("ja", "Japanese", 1, plural1); // "nplurals=1; plural=0;"
124 LanguageDef lang_ko("ko", "Korean", 1, plural1); // "nplurals=1; plural=0;"
125 LanguageDef lang_tr("tr", "Turkish", 1, plural1); // "nplurals=1; plural=0;"
126 LanguageDef lang_da("da", "Danish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
127 LanguageDef lang_nl("nl", "Dutch", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
128 LanguageDef lang_en("en", "English", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
129 LanguageDef lang_fo("fo", "Faroese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
130 LanguageDef lang_de("de", "German", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
131 LanguageDef lang_nb("nb", "Norwegian Bokmal", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
132 LanguageDef lang_no("no", "Norwegian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
133 LanguageDef lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
134 LanguageDef lang_sv("sv", "Swedish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
135 LanguageDef lang_et("et", "Estonian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
136 LanguageDef lang_fi("fi", "Finnish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
137 LanguageDef lang_el("el", "Greek", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
138 LanguageDef lang_he("he", "Hebrew", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
139 LanguageDef lang_it("it", "Italian", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
140 LanguageDef lang_pt("pt", "Portuguese", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
141 LanguageDef lang_es("es", "Spanish", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
142 LanguageDef lang_eo("eo", "Esperanto", 2, plural2_1); // "nplurals=2; plural=(n != 1);"
143 LanguageDef lang_fr("fr", "French", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
144 LanguageDef lang_pt_BR("pt_BR", "Brazilian", 2, plural2_2); // "nplurals=2; plural=(n > 1);"
145 LanguageDef lang_lv("lv", "Latvian", 3, plural3_lv); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
146 LanguageDef lang_ga("ga", "Irish", 3, plural3_ga); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
147 LanguageDef lang_lt("lt", "Lithuanian", 3, plural3_lt); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
148 LanguageDef lang_hr("hr", "Croatian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
149 LanguageDef lang_cs("cs", "Czech", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
150 LanguageDef lang_ru("ru", "Russian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
151 LanguageDef lang_uk("uk", "Ukrainian", 3, plural3_1); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
152 LanguageDef lang_sk("sk", "Slovak", 3, plural3_sk); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
153 LanguageDef lang_pl("pl", "Polish", 3, plural3_pl); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
154 LanguageDef lang_sl("sl", "Slovenian", 3, plural3_sl); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
155 //*}
156
157 LanguageDef&
get_language_def(const std::string & name)158 get_language_def(const std::string& name)
159 {
160 if (name == "hu") return lang_hu;
161 else if (name == "ja") return lang_ja;
162 else if (name == "ko") return lang_ko;
163 else if (name == "tr") return lang_tr;
164 else if (name == "da") return lang_da;
165 else if (name == "nl") return lang_nl;
166 else if (name == "en") return lang_en;
167 else if (name == "fo") return lang_fo;
168 else if (name == "de") return lang_de;
169 else if (name == "nb") return lang_nb;
170 else if (name == "no") return lang_no;
171 else if (name == "nn") return lang_nn;
172 else if (name == "sv") return lang_sv;
173 else if (name == "et") return lang_et;
174 else if (name == "fi") return lang_fi;
175 else if (name == "el") return lang_el;
176 else if (name == "he") return lang_he;
177 else if (name == "it") return lang_it;
178 else if (name == "pt") return lang_pt;
179 else if (name == "es") return lang_es;
180 else if (name == "eo") return lang_eo;
181 else if (name == "fr") return lang_fr;
182 else if (name == "pt_BR") return lang_pt_BR;
183 else if (name == "lv") return lang_lv;
184 else if (name == "ga") return lang_ga;
185 else if (name == "lt") return lang_lt;
186 else if (name == "hr") return lang_hr;
187 else if (name == "cs") return lang_cs;
188 else if (name == "ru") return lang_ru;
189 else if (name == "uk") return lang_uk;
190 else if (name == "sk") return lang_sk;
191 else if (name == "pl") return lang_pl;
192 else if (name == "sl") return lang_sl;
193 else return lang_en;
194 }
195
DictionaryManager()196 DictionaryManager::DictionaryManager()
197 : current_dict(&empty_dict)
198 {
199 parseLocaleAliases();
200 // Environment variable LINCITY_LANG overrides language settings.
201 const char* lang = getenv( "LINCITY_LANG" );
202 if( lang ){
203 set_language( lang );
204 return;
205 }
206 // use findlocale to setup language
207 FL_Locale *locale;
208 FL_FindLocale( &locale, FL_MESSAGES );
209 if(locale->lang) {
210 if (locale->country) {
211 set_language( std::string(locale->lang)+"_"+std::string(locale->country) );
212 } else {
213 set_language( std::string(locale->lang) );
214 }
215 }
216 FL_FreeLocale( &locale );
217 }
218
219 void
parseLocaleAliases()220 DictionaryManager::parseLocaleAliases()
221 {
222 // try to parse language alias list
223 std::ifstream in("/usr/share/locale/locale.alias");
224
225 char c = ' ';
226 while(in.good() && !in.eof()) {
227 while(isspace(static_cast<unsigned char>(c)) && !in.eof())
228 in.get(c);
229
230 if(c == '#') { // skip comments
231 while(c != '\n' && !in.eof())
232 in.get(c);
233 continue;
234 }
235
236 std::string alias;
237 while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
238 alias += c;
239 in.get(c);
240 }
241 while(isspace(static_cast<unsigned char>(c)) && !in.eof())
242 in.get(c);
243 std::string language;
244 while(!isspace(static_cast<unsigned char>(c)) && !in.eof()) {
245 language += c;
246 in.get(c);
247 }
248
249 if(in.eof())
250 break;
251 set_language_alias(alias, language);
252 }
253 }
254
255 Dictionary&
get_dictionary(const std::string & spec)256 DictionaryManager::get_dictionary(const std::string& spec)
257 {
258
259 //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
260
261 std::string lang = get_language_from_spec(spec);
262
263 //log_debug << "...normalized as \"" << lang << "\"" << std::endl;
264
265 Dictionaries::iterator i = dictionaries.find(get_language_from_spec(lang));
266 if (i != dictionaries.end())
267 {
268 return i->second;
269 }
270 else // Dictionary for languages lang isn't loaded, so we load it
271 {
272 //std::cout << "get_dictionary: " << lang << std::endl;
273 Dictionary& dict = dictionaries[lang];
274
275 dict.set_language(get_language_def(lang));
276 if(charset != "")
277 dict.set_charset(charset);
278
279 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
280 {
281 char** files = PHYSFS_enumerateFiles(p->c_str());
282 if(!files)
283 {
284 std::cerr << "Error: enumerateFiles() failed on " << *p << std::endl;
285 }
286 else
287 {
288 for(const char* const* filename = files;
289 *filename != 0; filename++) {
290
291 // check if filename matches requested language
292 std::string fname = std::string(*filename);
293 std::string load_from_file = "";
294 if(fname == lang + ".po") {
295 load_from_file = fname;
296 } else {
297 std::string::size_type s = lang.find("_");
298 if(s != std::string::npos) {
299 std::string lang_short = std::string(lang, 0, s);
300 if (fname == lang_short + ".po") {
301 load_from_file = lang_short;
302 }
303 }
304 }
305
306 // if it matched, load dictionary
307 if (load_from_file != "") {
308 //log_debug << "Loading dictionary for language \"" << lang << "\" from \"" << filename << "\"" << std::endl;
309 std::string pofile = *p + "/" + *filename;
310 try {
311 IFileStream in(pofile);
312 read_po_file(dict, in);
313 } catch(std::exception& e) {
314 std::cerr << "Error: Failure file opening: " << pofile << std::endl;
315 std::cerr << e.what() << "\n";
316 }
317 }
318 }
319 PHYSFS_freeList(files);
320 }
321 }
322
323 return dict;
324 }
325 }
326
327 std::set<std::string>
get_languages()328 DictionaryManager::get_languages()
329 {
330 std::set<std::string> languages;
331
332 for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
333 {
334 char** files = PHYSFS_enumerateFiles(p->c_str());
335 if (!files)
336 {
337 std::cerr << "Error: opendir() failed on " << *p << std::endl;
338 }
339 else
340 {
341 for(const char* const* file = files; *file != 0; file++) {
342 if(has_suffix(*file, ".po")) {
343 std::string filename = *file;
344 languages.insert(filename.substr(0, filename.length()-3));
345 }
346 }
347 PHYSFS_freeList(files);
348 }
349 }
350 return languages;
351 }
352
353 void
set_language(const std::string & lang)354 DictionaryManager::set_language(const std::string& lang)
355 {
356 //log_debug << "set_language \"" << lang << "\"" << std::endl;
357 language = get_language_from_spec(lang);
358 //log_debug << "==> \"" << language << "\"" << std::endl;
359 current_dict = & (get_dictionary(language));
360 }
361
362 const std::string&
get_language() const363 DictionaryManager::get_language() const
364 {
365 return language;
366 }
367
368 void
set_charset(const std::string & charset)369 DictionaryManager::set_charset(const std::string& charset)
370 {
371 dictionaries.clear(); // changing charset invalidates cache
372 this->charset = charset;
373 set_language(language);
374 }
375
376 void
set_language_alias(const std::string & alias,const std::string & language)377 DictionaryManager::set_language_alias(const std::string& alias,
378 const std::string& language)
379 {
380 language_aliases.insert(std::make_pair(alias, language));
381 }
382
383 std::string
get_language_from_spec(const std::string & spec)384 DictionaryManager::get_language_from_spec(const std::string& spec)
385 {
386 std::string lang = spec;
387 Aliases::iterator i = language_aliases.find(lang);
388 if(i != language_aliases.end()) {
389 lang = i->second;
390 }
391
392 std::string::size_type s = lang.find(".");
393 if(s != std::string::npos) {
394 lang = std::string(lang, 0, s);
395 }
396
397 s = lang.find("_");
398 if(s == std::string::npos) {
399 std::string lang_big = lang;
400 std::transform (lang_big.begin(), lang_big.end(), lang_big.begin(), toupper);
401 lang += "_" + lang_big;
402 }
403
404 return lang;
405
406 }
407
408 void
add_directory(const std::string & pathname)409 DictionaryManager::add_directory(const std::string& pathname)
410 {
411 dictionaries.clear(); // adding directories invalidates cache
412 search_path.push_back(pathname);
413 set_language(language);
414 }
415
416 //---------------------------------------------------------------------------
417
Dictionary(const LanguageDef & language_,const std::string & charset_)418 Dictionary::Dictionary(const LanguageDef& language_, const std::string& charset_)
419 : language(language_), charset(charset_)
420 {
421 }
422
Dictionary()423 Dictionary::Dictionary()
424 : language(lang_en)
425 {
426 }
427
428 std::string
get_charset() const429 Dictionary::get_charset() const
430 {
431 return charset;
432 }
433
434 void
set_charset(const std::string & charset_)435 Dictionary::set_charset(const std::string& charset_)
436 {
437 charset = charset_;
438 }
439
440 void
set_language(const LanguageDef & lang)441 Dictionary::set_language(const LanguageDef& lang)
442 {
443 language = lang;
444 }
445
446 std::string
translate(const std::string & msgid,const std::string & msgid2,int num)447 Dictionary::translate(const std::string& msgid, const std::string& msgid2, int num)
448 {
449 PluralEntries::iterator i = plural_entries.find(msgid);
450 std::map<int, std::string>& msgstrs = i->second;
451
452 if (i != plural_entries.end() && !msgstrs.empty())
453 {
454 int g = language.plural(num);
455 std::map<int, std::string>::iterator j = msgstrs.find(g);
456 if (j != msgstrs.end())
457 {
458 return j->second;
459 }
460 else
461 {
462 // Return the first translation, in case we can't translate the specific number
463 return msgstrs.begin()->second;
464 }
465 }
466 else
467 {
468 #ifdef TRANSLATION_DEBUG
469 std::cerr << "Warning: Couldn't translate: " << msgid << std::endl;
470 std::cerr << "Candidates: " << std::endl;
471 for (PluralEntries::iterator i = plural_entries.begin(); i != plural_entries.end(); ++i)
472 std::cout << "'" << i->first << "'" << std::endl;
473 #endif
474
475 if (plural2_1(num)) // default to english rules
476 return msgid2;
477 else
478 return msgid;
479 }
480 }
481
482 const char*
translate(const char * msgid)483 Dictionary::translate(const char* msgid)
484 {
485 Entries::iterator i = entries.find(msgid);
486 if (i != entries.end() && !i->second.empty())
487 {
488 return i->second.c_str();
489 }
490 else
491 {
492 #ifdef TRANSLATION_DBEUG
493 std::cout << "Error: Couldn't translate: " << msgid << std::endl;
494 #endif
495 return msgid;
496 }
497 }
498
499 std::string
translate(const std::string & msgid)500 Dictionary::translate(const std::string& msgid)
501 {
502 Entries::iterator i = entries.find(msgid);
503 if (i != entries.end() && !i->second.empty())
504 {
505 return i->second;
506 }
507 else
508 {
509 #ifdef TRANSLATION_DBEUG
510 std::cout << "Error: Couldn't translate: " << msgid << std::endl;
511 #endif
512 return msgid;
513 }
514 }
515
516 void
add_translation(const std::string & msgid,const std::string &,const std::map<int,std::string> & msgstrs)517 Dictionary::add_translation(const std::string& msgid, const std::string& ,
518 const std::map<int, std::string>& msgstrs)
519 {
520 // Do we need msgid2 for anything? its after all supplied to the
521 // translate call, so we just throw it away
522 plural_entries[msgid] = msgstrs;
523 }
524
525 void
add_translation(const std::string & msgid,const std::string & msgstr)526 Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
527 {
528 entries[msgid] = msgstr;
529 }
530
531 class POFileReader
532 {
533 private:
534 struct Token
535 {
536 std::string keyword;
537 std::string content;
538 };
539
540 Dictionary& dict;
541
542 std::string from_charset;
543 std::string to_charset;
544
545 std::string current_msgid;
546 std::string current_msgid_plural;
547 std::map<int, std::string> msgstr_plural;
548
549 int line_num;
550
551 enum { WANT_MSGID, WANT_MSGSTR, WANT_MSGSTR_PLURAL, WANT_MSGID_PLURAL } state;
552
553 public:
POFileReader(std::istream & in,Dictionary & dict_)554 POFileReader(std::istream& in, Dictionary& dict_)
555 : dict(dict_)
556 {
557 state = WANT_MSGID;
558 line_num = 0;
559 char c = in.get();
560 if(c == (char) 0xef) { // skip UTF-8 intro that some texteditors produce
561 in.get();
562 in.get();
563 } else {
564 in.unget();
565 }
566 tokenize_po(in);
567 }
568
parse_header(const std::string & header)569 void parse_header(const std::string& header)
570 {
571 // Seperate the header in lines
572 typedef std::vector<std::string> Lines;
573 Lines lines;
574
575 std::string::size_type start = 0;
576 for(std::string::size_type i = 0; i < header.length(); ++i)
577 {
578 if (header[i] == '\n')
579 {
580 lines.push_back(header.substr(start, i - start));
581 start = i+1;
582 }
583 }
584
585 for(Lines::iterator i = lines.begin(); i != lines.end(); ++i)
586 {
587 if (has_prefix(*i, "Content-Type: text/plain; charset=")) {
588 from_charset = i->substr(strlen("Content-Type: text/plain; charset="));
589 }
590 }
591
592 if (from_charset.empty() || from_charset == "CHARSET")
593 {
594 std::cerr << "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl;
595 from_charset = "ISO-8859-1";
596 }
597
598 to_charset = dict.get_charset();
599 if (to_charset.empty())
600 { // No charset requested from the dict, use utf-8
601 to_charset = "utf-8";
602 dict.set_charset(from_charset);
603 }
604 }
605
add_token(const Token & token)606 void add_token(const Token& token)
607 {
608 switch(state)
609 {
610 case WANT_MSGID:
611 if (token.keyword == "msgid")
612 {
613 current_msgid = token.content;
614 state = WANT_MSGID_PLURAL;
615 }
616 else if (token.keyword.empty())
617 {
618 //std::cerr << "Got EOF, everything looks ok." << std::endl;
619 }
620 else
621 {
622 std::cerr << "tinygettext: expected 'msgid' keyword, got " << token.keyword
623 << " at line " << line_num << std::endl;
624 }
625 break;
626
627 case WANT_MSGID_PLURAL:
628 if (token.keyword == "msgid_plural")
629 {
630 current_msgid_plural = token.content;
631 state = WANT_MSGSTR_PLURAL;
632 }
633 else
634 {
635 state = WANT_MSGSTR;
636 add_token(token);
637 }
638 break;
639
640 case WANT_MSGSTR:
641 if (token.keyword == "msgstr")
642 {
643 if (current_msgid == "")
644 { // .po Header is hidden in the msgid with the empty string
645 parse_header(token.content);
646 }
647 else
648 {
649 dict.add_translation(current_msgid, convert(token.content, from_charset, to_charset));
650 }
651 state = WANT_MSGID;
652 }
653 else
654 {
655 std::cerr << "tinygettext: expected 'msgstr' keyword, got " << token.keyword
656 << " at line " << line_num << std::endl;
657 }
658 break;
659
660 case WANT_MSGSTR_PLURAL:
661 if (has_prefix(token.keyword, "msgstr["))
662 {
663 int num;
664 if (sscanf(token.keyword.c_str(), "msgstr[%d]", &num) != 1)
665 {
666 std::cerr << "Error: Couldn't parse: " << token.keyword << std::endl;
667 }
668 else
669 {
670 msgstr_plural[num] = convert(token.content, from_charset, to_charset);
671 }
672 }
673 else
674 {
675 dict.add_translation(current_msgid, current_msgid_plural, msgstr_plural);
676
677 state = WANT_MSGID;
678 add_token(token);
679 }
680 break;
681 }
682 }
683
getchar(std::istream & in)684 inline int getchar(std::istream& in)
685 {
686 int c = in.get();
687 if (c == '\n')
688 line_num += 1;
689 return c;
690 }
691
tokenize_po(std::istream & in)692 void tokenize_po(std::istream& in)
693 {
694 enum State { READ_KEYWORD,
695 READ_CONTENT,
696 READ_CONTENT_IN_STRING,
697 SKIP_COMMENT };
698
699 State state = READ_KEYWORD;
700 int c;
701 Token token;
702
703 while((c = getchar(in)) != EOF)
704 {
705 //std::cout << "Lexing char: " << char(c) << " " << state << std::endl;
706 switch(state)
707 {
708 case READ_KEYWORD:
709 if (c == '#')
710 {
711 state = SKIP_COMMENT;
712 }
713 else if (c == '\n')
714 {
715 }
716 else
717 {
718 // Read a new token
719 token = Token();
720
721 do { // Read keyword
722 token.keyword += c;
723 } while((c = getchar(in)) != EOF && !isspace(static_cast<unsigned char>(c)));
724 in.unget();
725
726 state = READ_CONTENT;
727 }
728 break;
729
730 case READ_CONTENT:
731 while((c = getchar(in)) != EOF)
732 {
733 if (c == '"') {
734 // Found start of content
735 state = READ_CONTENT_IN_STRING;
736 break;
737 } else if (isspace(static_cast<unsigned char>(c))) {
738 // skip
739 } else { // Read something that may be a keyword
740 in.unget();
741 state = READ_KEYWORD;
742 add_token(token);
743 token = Token();
744 break;
745 }
746 }
747 break;
748
749 case READ_CONTENT_IN_STRING:
750 if (c == '\\') {
751 c = getchar(in);
752 if (c != EOF)
753 {
754 if (c == 'n') token.content += '\n';
755 else if (c == 't') token.content += '\t';
756 else if (c == 'r') token.content += '\r';
757 else if (c == '"') token.content += '"';
758 else if (c == '\\') token.content += '\\';
759 else
760 {
761 std::cout << "Unhandled escape character: " << char(c) << std::endl;
762 }
763 }
764 else
765 {
766 std::cout << "Unterminated string" << std::endl;
767 }
768 } else if (c == '"') { // Content string is terminated
769 state = READ_CONTENT;
770 } else {
771 token.content += c;
772 }
773 break;
774
775 case SKIP_COMMENT:
776 if (c == '\n')
777 state = READ_KEYWORD;
778 break;
779 }
780 }
781 add_token(token);
782 token = Token();
783 }
784 };
785
read_po_file(Dictionary & dict_,std::istream & in)786 void read_po_file(Dictionary& dict_, std::istream& in)
787 {
788 POFileReader reader(in, dict_);
789 }
790
791 } // namespace TinyGetText
792
793 /* EOF */
794