1
2 /******************************************************************************
3 * MODULE : text_language.cpp
4 * DESCRIPTION: natural textual languages
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12 #if defined(_WIN32) || defined(__WIN32__)
13 #include <locale.h>
14 #endif
15
16 #if !defined(__MINGW__) && !defined(__MINGW32__)
17 #include <langinfo.h>
18 #endif
19
20 #include "analyze.hpp"
21 #include "hyphenate.hpp"
22 #include "impl_language.hpp"
23 #include "sys_utils.hpp"
24
25 #ifdef QTTEXMACS
26 #include "Qt/qt_utilities.hpp"
27 #endif
28
29 /******************************************************************************
30 * Western text languages / 8 bit charset
31 ******************************************************************************/
32
33 struct text_language_rep: language_rep {
34 hashmap<string,string> patterns;
35 hashmap<string,string> hyphenations;
36
37 text_language_rep (string lan_name, string hyph_name);
38 text_property advance (tree t, int& pos);
39 array<int> get_hyphens (string s);
40 void hyphenate (string s, int after, string& left, string& right);
41 };
42
text_language_rep(string lan_name,string hyph_name)43 text_language_rep::text_language_rep (string lan_name, string hyph_name):
44 language_rep (lan_name), patterns ("?"), hyphenations ("?") {
45 load_hyphen_tables (hyph_name, patterns, hyphenations, true); }
46
47 text_property
advance(tree t,int & pos)48 text_language_rep::advance (tree t, int& pos) {
49 string s= t->label;
50 if (pos == N(s)) return &tp_normal_rep;
51
52 if (s[pos]==' ') {
53 pos++;
54 // while ((pos<N(s)) && (s[pos]==' ')) pos++;
55 if ((pos == N(s)) || (!is_punctuation (s[pos])))
56 return &tp_space_rep;
57 return &tp_nb_space_rep;
58 }
59
60 if (is_punctuation (s[pos])) {
61 while ((pos<N(s)) && is_punctuation (s[pos])) pos++;
62 if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
63 switch (s[pos-1]) {
64 case ',': case ':': case ';': case '`': case '\'':
65 return &tp_space_rep;
66 case '.': case '!': case '?':
67 return &tp_period_rep;
68 }
69 return &tp_space_rep;
70 }
71
72 if (s[pos]=='-') {
73 pos++;
74 while ((pos<N(s)) && (s[pos]=='-')) pos++;
75 return &tp_hyph_rep;
76 }
77
78 if (is_iso_alpha (s[pos])) {
79 while ((pos<N(s)) && is_iso_alpha (s[pos])) pos++;
80 return &tp_normal_rep;
81 }
82
83 if (is_numeric (s[pos])) { // can not be a '.'
84 while ((pos<N(s)) && is_numeric (s[pos])) pos++;
85 while (s[pos-1]=='.') pos--;
86 return &tp_normal_rep;
87 }
88
89 if (s[pos]=='<') {
90 while ((pos<N(s)) && (s[pos]!='>')) pos++;
91 if (pos<N(s)) pos++;
92 return &tp_normal_rep;
93 }
94
95 pos++;
96 return &tp_normal_rep;
97 }
98
99 array<int>
get_hyphens(string s)100 text_language_rep::get_hyphens (string s) {
101 return ::get_hyphens (s, patterns, hyphenations);
102 }
103
104 void
hyphenate(string s,int after,string & left,string & right)105 text_language_rep::hyphenate (
106 string s, int after, string& left, string& right)
107 {
108 array<int> penalty= get_hyphens (s);
109 std_hyphenate (s, after, left, right, penalty[after]);
110 }
111
112 /******************************************************************************
113 * French typography
114 ******************************************************************************/
115
116 struct french_language_rep: language_rep {
117 hashmap<string,string> patterns;
118 hashmap<string,string> hyphenations;
119
120 french_language_rep (string lan_name, string hyph_name);
121 text_property advance (tree t, int& pos);
122 array<int> get_hyphens (string s);
123 void hyphenate (string s, int after, string& left, string& right);
124 };
125
french_language_rep(string lan_name,string hyph_name)126 french_language_rep::french_language_rep (string lan_name, string hyph_name):
127 language_rep (lan_name), patterns ("?"), hyphenations ("?") {
128 load_hyphen_tables (hyph_name, patterns, hyphenations, true); }
129
130 inline bool
is_french_punctuation(register char c)131 is_french_punctuation (register char c) {
132 return is_punctuation (c) || (c=='\23') || (c=='\24');
133 }
134
135 text_property
advance(tree t,int & pos)136 french_language_rep::advance (tree t, int& pos) {
137 string s= t->label;
138 if (pos == N(s)) return &tp_normal_rep;
139
140 if (s[pos]==' ') {
141 pos++;
142 if (pos>1 && s[pos-2] == '\23')
143 return &tp_nb_thin_space_rep;
144 // while ((pos<N(s)) && (s[pos]==' ')) pos++;
145 if ((pos == N(s)) || (!is_french_punctuation (s[pos])))
146 return &tp_space_rep;
147 if (s[pos] == '\23')
148 return &tp_space_rep;
149 if (s[pos] == ':' || s[pos] == ';' ||
150 s[pos] == '!' || s[pos] == '?' || s[pos] == '\24')
151 return &tp_nb_thin_space_rep;
152 return &tp_nb_space_rep;
153 }
154
155 if (is_french_punctuation (s[pos])) {
156 while ((pos<N(s)) && is_french_punctuation (s[pos])) pos++;
157 if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
158 switch (s[pos-1]) {
159 case '\23':
160 return &tp_nb_thin_space_rep;
161 case '\24':
162 case ',': case ':': case ';': case '`': case '\'':
163 return &tp_space_rep;
164 case '.': case '!': case '?':
165 return &tp_period_rep;
166 }
167 return &tp_space_rep;
168 }
169
170 if (s[pos]=='-') {
171 pos++;
172 while ((pos<N(s)) && (s[pos]=='-')) pos++;
173 return &tp_hyph_rep;
174 }
175
176 if (is_iso_alpha (s[pos])) {
177 while ((pos<N(s)) && is_iso_alpha (s[pos])) pos++;
178 return &tp_normal_rep;
179 }
180
181 if (is_numeric (s[pos])) { // can not be a '.'
182 while ((pos<N(s)) && is_numeric (s[pos])) pos++;
183 while (s[pos-1]=='.') pos--;
184 return &tp_normal_rep;
185 }
186
187 if (s[pos]=='<') {
188 while ((pos<N(s)) && (s[pos]!='>')) pos++;
189 if (pos<N(s)) pos++;
190 return &tp_normal_rep;
191 }
192
193 pos++;
194 return &tp_normal_rep;
195 }
196
197 array<int>
get_hyphens(string s)198 french_language_rep::get_hyphens (string s) {
199 return ::get_hyphens (s, patterns, hyphenations);
200 }
201
202 void
hyphenate(string s,int after,string & left,string & right)203 french_language_rep::hyphenate (
204 string s, int after, string& left, string& right)
205 {
206 array<int> penalty= get_hyphens (s);
207 std_hyphenate (s, after, left, right, penalty[after]);
208 }
209
210 /******************************************************************************
211 * Eastern text languages / using UCS entities
212 ******************************************************************************/
213
214 struct ucs_text_language_rep: language_rep {
215 hashmap<string,string> patterns;
216 hashmap<string,string> hyphenations;
217
218 ucs_text_language_rep (string lan_name, string hyph_name);
219 text_property advance (tree t, int& pos);
220 array<int> get_hyphens (string s);
221 void hyphenate (string s, int after, string& left, string& right);
222 bool unicode;
223 };
224
ucs_text_language_rep(string lan_name,string hyph_name)225 ucs_text_language_rep::ucs_text_language_rep (string lan_name, string hyph_name):
226 language_rep (lan_name), patterns ("?"), hyphenations ("?")
227 { load_hyphen_tables (hyph_name, patterns, hyphenations, false); }
228
229 text_property
advance(tree t,int & pos)230 ucs_text_language_rep::advance (tree t, int& pos) {
231 //TODO: replace methods is_punctuation (), is_iso_alpha () and is_numeric (),
232 // by equivalents taking into account unicode entities.
233 string s= t->label;
234 if (pos == N(s)) return &tp_normal_rep;
235
236 if (s[pos]==' ') {
237 pos++;
238 // while ((pos<N(s)) && (s[pos]==' ')) pos++;
239 if ((pos == N(s)) || (!is_punctuation (s[pos])))
240 return &tp_space_rep;
241 return &tp_nb_space_rep;
242 }
243
244 if (is_punctuation (s[pos])) {
245 while ((pos<N(s)) && is_punctuation (s[pos])) pos++;
246 if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
247 switch (s[pos-1]) {
248 case ',': case ':': case ';': case '`': case '\'':
249 return &tp_space_rep;
250 case '.': case '!': case '?':
251 return &tp_period_rep;
252 }
253 return &tp_space_rep;
254 }
255
256 if (s[pos]=='-') {
257 pos++;
258 while ((pos<N(s)) && (s[pos]=='-')) pos++;
259 return &tp_hyph_rep;
260 }
261
262 if (is_iso_alpha (s[pos]) || (s[pos]=='<')) {
263 while ((pos<N(s)) && (is_iso_alpha (s[pos]) || (s[pos]=='<'))) {
264 if (s[pos]=='<') {
265 while ((pos<N(s)) && (s[pos]!='>')) pos++;
266 if (pos<N(s)) pos++;
267 }
268 else
269 pos++;
270 }
271 return &tp_normal_rep;
272 }
273
274 if (is_numeric (s[pos])) { // can not be a '.'
275 while ((pos<N(s)) && is_numeric (s[pos])) pos++;
276 while (s[pos-1]=='.') pos--;
277 return &tp_normal_rep;
278 }
279
280 pos++;
281 return &tp_normal_rep;
282 }
283
284 array<int>
get_hyphens(string s)285 ucs_text_language_rep::get_hyphens (string s) {
286 return ::get_hyphens (s, patterns, hyphenations, true);
287 }
288
289 void
hyphenate(string s,int after,string & left,string & right)290 ucs_text_language_rep::hyphenate (
291 string s, int after, string& left, string& right)
292 {
293 array<int> penalty= get_hyphens (s);
294 std_hyphenate (s, after, left, right, penalty[after], true);
295 }
296
297 /******************************************************************************
298 * Oriental languages
299 ******************************************************************************/
300
301 struct oriental_language_rep: language_rep {
302 hashmap<string,bool> punct;
303 oriental_language_rep (string lan_name);
304 text_property advance (tree t, int& pos);
305 array<int> get_hyphens (string s);
306 void hyphenate (string s, int after, string& left, string& right);
307 };
308
oriental_language_rep(string lan_name)309 oriental_language_rep::oriental_language_rep (string lan_name):
310 language_rep (lan_name), punct (false)
311 {
312 punct (".")= true;
313 punct (",")= true;
314 punct (":")= true;
315 punct (";")= true;
316 punct ("!")= true;
317 punct ("?")= true;
318 punct ("<#3000>")= true;
319 punct ("<#3001>")= true;
320 punct ("<#3002>")= true;
321 punct ("<#3003>")= true;
322 punct ("<#3004>")= true;
323 punct ("<#3005>")= true;
324 punct ("<#3006>")= true;
325 punct ("<#3007>")= true;
326 punct ("<#3008>")= true;
327 punct ("<#3009>")= true;
328 punct ("<#300a>")= true;
329 punct ("<#300b>")= true;
330 punct ("<#300c>")= true;
331 punct ("<#300d>")= true;
332 punct ("<#300e>")= true;
333 punct ("<#300f>")= true;
334 punct ("<#300A>")= true;
335 punct ("<#300B>")= true;
336 punct ("<#300C>")= true;
337 punct ("<#300D>")= true;
338 punct ("<#300E>")= true;
339 punct ("<#300F>")= true;
340 punct ("<#ff01>")= true;
341 punct ("<#ff0c>")= true;
342 punct ("<#ff0e>")= true;
343 punct ("<#ff1a>")= true;
344 punct ("<#ff1b>")= true;
345 punct ("<#ff1f>")= true;
346 punct ("<#FF01>")= true;
347 punct ("<#FF0C>")= true;
348 punct ("<#FF0E>")= true;
349 punct ("<#FF1A>")= true;
350 punct ("<#FF1B>")= true;
351 punct ("<#FF1F>")= true;
352 }
353
354 text_property
advance(tree t,int & pos)355 oriental_language_rep::advance (tree t, int& pos) {
356 string s= t->label;
357 if (pos == N(s)) return &tp_normal_rep;
358
359 if (s[pos] == ' ') {
360 pos++;
361 return &tp_space_rep;
362 }
363
364 if (pos < N(s) && !test (s, pos, "<#")) {
365 while (pos < N(s) && s[pos] != ' ' && !test (s, pos, "<#"))
366 tm_char_forwards (s, pos);
367 return &tp_cjk_no_break_rep;
368 }
369
370 int start= pos;
371 tm_char_forwards (s, pos);
372 string c= s (start, pos);
373 int next= pos;
374 tm_char_forwards (s, next);
375 string x= s (pos, next);
376
377 if (punct->contains (c)) {
378 if (punct->contains (x) || pos == N(s))
379 return &tp_cjk_no_break_period_rep;
380 else return &tp_cjk_period_rep;
381 }
382 else {
383 if (punct->contains (x) || pos == N(s))
384 return &tp_cjk_no_break_rep;
385 else return &tp_cjk_normal_rep;
386 }
387 }
388
389 array<int>
get_hyphens(string s)390 oriental_language_rep::get_hyphens (string s) {
391 int i;
392 array<int> penalty (N(s)+1);
393 for (i=0; i<N(penalty); i++) penalty[i]= HYPH_INVALID;
394 return penalty;
395 }
396
397 void
hyphenate(string s,int after,string & left,string & right)398 oriental_language_rep::hyphenate (
399 string s, int after, string& left, string& right)
400 {
401 left = s (0, after+1);
402 right= s (after+1, N(s));
403 }
404
405 /******************************************************************************
406 * Locales
407 ******************************************************************************/
408
409 string
windows_locale_to_language(string s)410 windows_locale_to_language (string s) {
411 if (s == "Bulgarian_Bulgaria.1251") return "bulgarian";
412 if (s == "Chinese_People's Republic of China.936")
413 return "chinese"; // for windows xp
414 if (s == "Chinese (Simplified)_People's Republic of China.936")
415 return "chinese"; // for windows 7
416 if (s == "Chinese_Taiwan.950")
417 return "taiwanese"; // for windows xp
418 if (s == "Chinese (Traditional)_Taiwan.950")
419 return "taiwanese"; // for windows 7
420 if (s == "Croatian_Croatia.1250") return "croatian";
421 if (s == "Czech_Czech Republic.1250") return "czech";
422 if (s == "Danish_Denmark.1252") return "danish";
423 if (s == "Dutch_Netherlands.1252") return "dutch";
424 if (s == "English_United States.1252") return "english";
425 if (s == "English_United Kingdom.1252") return "british";
426 if (s == "Finnish_Finland.1252") return "finnish";
427 if (s == "French_France.1252") return "french";
428 if (s == "German_Germany.1252") return "german";
429 if (s == "Greek_Greece.1253") return "greek";
430 if (s == "Hungarian_Hungary.1250") return "hungarian";
431 if (s == "Italian_Italy.1252") return "italian";
432 if (s == "Japanese_Japan.932") return "japanese";
433 if (s == "Korean_Korea.949") return "korean";
434 if (s == "Polish_Poland.1250") return "polish";
435 if (s == "Portuguese_Portugal.1252") return "portuguese";
436 if (s == "Romanian_Romania.1250") return "romanian";
437 if (s == "Russian_Russia.1251") return "russian";
438 if (s == "Slovenian_Slovenia.1250") return "slovene";
439 if (s == "Spanish_Spain.1252") return "spanish";
440 if (s == "Swedish_Sweden.1252") return "swedish";
441 if (s == "Ukrainian_Ukraine.1251") return "ukrainian";
442 return "english";
443 }
444
445 string
locale_to_language(string s)446 locale_to_language (string s) {
447 if (N(s) > 5) s= s (0, 5);
448 if (s == "en_GB") return "british";
449 if (s == "zh_TW") return "taiwanese";
450 if (N(s) > 2) s= s (0, 2);
451 if (s == "bg") return "bulgarian";
452 if (s == "zh") return "chinese";
453 if (s == "hr") return "croatian";
454 if (s == "cs") return "czech";
455 if (s == "da") return "danish";
456 if (s == "nl") return "dutch";
457 if (s == "en") return "english";
458 if (s == "fi") return "finnish";
459 if (s == "fr") return "french";
460 if (s == "de") return "german";
461 if (s == "gr") return "greek";
462 if (s == "hu") return "hungarian";
463 if (s == "it") return "italian";
464 if (s == "ja") return "japanese";
465 if (s == "ko") return "korean";
466 if (s == "pl") return "polish";
467 if (s == "pt") return "portuguese";
468 if (s == "ro") return "romanian";
469 if (s == "ru") return "russian";
470 if (s == "sl") return "slovene";
471 if (s == "es") return "spanish";
472 if (s == "sv") return "swedish";
473 if (s == "uk") return "ukrainian";
474 return "english";
475 }
476
477 string
language_to_locale(string s)478 language_to_locale (string s) {
479 if (s == "american") return "en_US";
480 if (s == "british") return "en_GB";
481 if (s == "bulgarian") return "bg_BG";
482 if (s == "chinese") return "zh_CN";
483 if (s == "croatian") return "hr_HR";
484 if (s == "czech") return "cs_CZ";
485 if (s == "danish") return "da_DK";
486 if (s == "dutch") return "nl_NL";
487 if (s == "english") return "en_US";
488 if (s == "finnish") return "fi_FI";
489 if (s == "french") return "fr_FR";
490 if (s == "german") return "de_DE";
491 if (s == "greek") return "gr_GR";
492 if (s == "hungarian") return "hu_HU";
493 if (s == "italian") return "it_IT";
494 if (s == "japanese") return "ja_JP";
495 if (s == "korean") return "ko_KR";
496 if (s == "polish") return "pl_PL";
497 if (s == "portuguese") return "pt_PT";
498 if (s == "romanian") return "ro_RO";
499 if (s == "russian") return "ru_RU";
500 if (s == "slovene") return "sl_SI";
501 if (s == "spanish") return "es_ES";
502 if (s == "swedish") return "sv_SV";
503 if (s == "taiwanese") return "zh_TW";
504 if (s == "ukrainian") return "uk_UA";
505 return "en_US";
506 }
507
508 string
language_to_local_ISO_charset(string s)509 language_to_local_ISO_charset (string s) {
510 if (s == "bulgarian") return "ISO-8859-5";
511 if (s == "chinese") return "";
512 if (s == "croatian") return "ISO-8859-2";
513 if (s == "czech") return "ISO-8859-2";
514 if (s == "greek") return "ISO-8859-7";
515 if (s == "hungarian") return "ISO-8859-2";
516 if (s == "japanese") return "";
517 if (s == "korean") return "";
518 if (s == "polish") return "ISO-8859-2";
519 if (s == "romanian") return "ISO-8859-2";
520 if (s == "russian") return "ISO-8859-5";
521 if (s == "slovene") return "ISO-8859-2";
522 if (s == "taiwanese") return "";
523 if (s == "ukrainian") return "ISO-8859-5";
524 return "ISO-8859-1";
525 }
526
527 string
get_locale_language()528 get_locale_language () {
529 #if defined(_WIN32) || defined(__WIN32__)
530 return windows_locale_to_language (setlocale (LC_ALL, ""));
531 #else
532 string env_lan= get_env ("LC_ALL");
533 if (env_lan != "") return locale_to_language (env_lan);
534 env_lan= get_env ("LC_MESSAGES");
535 if (env_lan != "") return locale_to_language (env_lan);
536 env_lan= get_env ("LANG");
537 if (env_lan != "") return locale_to_language (env_lan);
538 env_lan= get_env ("GDM_LANG");
539 if (env_lan != "") return locale_to_language (env_lan);
540 return "english";
541 #endif
542 }
543
544 string
get_locale_charset()545 get_locale_charset () {
546 #if defined(__MINGW__) || defined(__MINGW32__)
547 return ("UTF-8");
548 #else
549 return nl_langinfo (CODESET);
550 #endif
551 }
552
553 /******************************************************************************
554 * Getting a formatted date
555 ******************************************************************************/
556
557 #ifdef QTTEXMACS
558 string
get_date(string lan,string fm)559 get_date (string lan, string fm) {
560 return qt_get_date(lan, fm);
561 }
562
563 string
pretty_time(int t)564 pretty_time (int t) {
565 return qt_pretty_time (t);
566 }
567 #else
568
569 static bool
invalid_format(string s)570 invalid_format (string s) {
571 if (N(s) == 0) return true;
572 for (int i=0; i<N(s); i++)
573 if (!(is_alpha (s[i]) || is_numeric (s[i]) ||
574 s[i] == ' ' || s[i] == '%' || s[i] == '.' || s[i] == ',' ||
575 s[i] == '+' || s[i] == '-' || s[i] == ':'))
576 return true;
577 return false;
578 }
579
580 static string
simplify_date(string s)581 simplify_date (string s) {
582 int i, n=N(s);
583 string r;
584 for (i=0; i<n; i++)
585 if ((s[i]!='0') || ((N(r)>0) && is_digit(r[N(r)-1]))) r << s[i];
586 return r;
587 }
588
589 string
get_date(string lan,string fm)590 get_date (string lan, string fm) {
591 //#if defined(__MINGW__) || defined(__MINGW32__) || defined(OS_WIN32)
592 // return win32::get_date(lan, fm);
593 if (invalid_format (fm)) {
594 if ((lan == "british") || (lan == "english") || (lan == "american"))
595 fm= "%B %d, %Y";
596 else if (lan == "german")
597 fm= "%d. %B %Y";
598 else if (lan == "chinese" || lan == "japanese" ||
599 lan == "korean" || lan == "taiwanese")
600 {
601 string y= simplify_date (var_eval_system ("date +\"%Y\""));
602 string m= simplify_date (var_eval_system ("date +\"%m\""));
603 string d= simplify_date (var_eval_system ("date +\"%d\""));
604 if (lan == "japanese")
605 return y * "<#5e74>" * m * "<#6708>" * d * "<#65e5>";
606 if (lan == "korean")
607 return y * "<#b144> " * m * "<#c6d4> " * d * "<#c77c>";
608 return y * "," * m * "," * d;
609 }
610 else fm= "%d %B %Y";
611 }
612 lan= language_to_locale (lan);
613 string lvar= "LC_TIME";
614 if (get_env (lvar) == "") lvar= "LC_ALL";
615 if (get_env (lvar) == "") lvar= "LANG";
616 string old= get_env (lvar);
617 set_env (lvar, lan);
618 string date= simplify_date (var_eval_system ("date +\"" * fm * "\""));
619 if ((lan == "cz_CZ") || (lan == "hu_HU") || (lan == "pl_PL"))
620 date= il2_to_cork (date);
621 // if (lan == "ru_RU") date= iso_to_koi8 (date);
622 set_env (lvar, old);
623 return date;
624 }
625
626 string
pretty_time(int t)627 pretty_time (int t) {
628 return var_eval_system ("date -r " * as_string (t));
629 }
630 #endif
631
632 /******************************************************************************
633 * Main interface
634 ******************************************************************************/
635
636 typedef const char* const_char_ptr;
637
638 static language
make_ucs_text_language(string s,string h)639 make_ucs_text_language (string s, string h) {
640 return tm_new<ucs_text_language_rep> (s, h);
641 }
642
643 static language
make_text_language(string s,string h)644 make_text_language (string s, string h) {
645 return tm_new<text_language_rep> (s, h);
646 }
647
648 static language
make_french_language(string s,string h)649 make_french_language (string s, string h) {
650 return tm_new<french_language_rep> (s, h);
651 }
652
653 static language
make_oriental_language(string s)654 make_oriental_language (string s) {
655 return tm_new<oriental_language_rep> (s);
656 }
657
658 language
text_language(string s)659 text_language (string s) {
660 if (language::instances -> contains (s)) return language (s);
661 if (s == "american") return make_text_language (s, "us");
662 if (s == "british") return make_text_language (s, "ukenglish");
663 if (s == "bulgarian") return make_ucs_text_language (s, "bulgarian");
664 if (s == "chinese") return make_oriental_language (s);
665 if (s == "croatian") return make_text_language (s, "croatian");
666 if (s == "czech") return make_text_language (s, "czech");
667 if (s == "danish") return make_text_language (s, "danish");
668 if (s == "dutch") return make_text_language (s, "dutch");
669 if (s == "english") return make_text_language (s, "us");
670 if (s == "finnish") return make_text_language (s, "finnish");
671 if (s == "french") return make_french_language (s, "french");
672 if (s == "german") return make_text_language (s, "german");
673 if (s == "greek") return make_text_language (s, "greek");
674 if (s == "hungarian") return make_text_language (s, "hungarian");
675 if (s == "italian") return make_text_language (s, "italian");
676 if (s == "japanese") return make_oriental_language (s);
677 if (s == "korean") return make_oriental_language (s);
678 if (s == "polish") return make_text_language (s, "polish");
679 if (s == "portuguese") return make_text_language (s, "portuguese");
680 if (s == "romanian") return make_text_language (s, "romanian");
681 if (s == "russian") return make_ucs_text_language (s, "russian");
682 if (s == "slovene") return make_text_language (s, "slovene");
683 if (s == "spanish") return make_text_language (s, "spanish");
684 if (s == "swedish") return make_text_language (s, "swedish");
685 if (s == "taiwanese") return make_oriental_language (s);
686 if (s == "ukrainian") return make_ucs_text_language (s, "ukrainian");
687 if (s == "verbatim") return tm_new<verb_language_rep> ("verbatim");
688 failed_error << "The language was " << s << "\n";
689 FAILED ("unknown language");
690 return tm_new<verb_language_rep> ("verbatim");
691 }
692