1 
2 /******************************************************************************
3 * MODULE     : text_language.cpp
4 * DESCRIPTION: natural textual languages
5 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11 
12 #if defined(_WIN32) || defined(__WIN32__)
13 #include <locale.h>
14 #endif
15 
16 #if !defined(__MINGW__) && !defined(__MINGW32__)
17 #include <langinfo.h>
18 #endif
19 
20 #include "analyze.hpp"
21 #include "hyphenate.hpp"
22 #include "impl_language.hpp"
23 #include "sys_utils.hpp"
24 
25 #ifdef QTTEXMACS
26 #include "Qt/qt_utilities.hpp"
27 #endif
28 
29 /******************************************************************************
30 * Western text languages / 8 bit charset
31 ******************************************************************************/
32 
33 struct text_language_rep: language_rep {
34   hashmap<string,string> patterns;
35   hashmap<string,string> hyphenations;
36 
37   text_language_rep (string lan_name, string hyph_name);
38   text_property advance (tree t, int& pos);
39   array<int> get_hyphens (string s);
40   void hyphenate (string s, int after, string& left, string& right);
41 };
42 
text_language_rep(string lan_name,string hyph_name)43 text_language_rep::text_language_rep (string lan_name, string hyph_name):
44   language_rep (lan_name), patterns ("?"), hyphenations ("?") {
45     load_hyphen_tables (hyph_name, patterns, hyphenations, true); }
46 
47 text_property
advance(tree t,int & pos)48 text_language_rep::advance (tree t, int& pos) {
49   string s= t->label;
50   if (pos == N(s)) return &tp_normal_rep;
51 
52   if (s[pos]==' ') {
53     pos++;
54     // while ((pos<N(s)) && (s[pos]==' ')) pos++;
55     if ((pos == N(s)) || (!is_punctuation (s[pos])))
56       return &tp_space_rep;
57     return &tp_nb_space_rep;
58   }
59 
60   if (is_punctuation (s[pos])) {
61     while ((pos<N(s)) && is_punctuation (s[pos])) pos++;
62     if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
63     switch (s[pos-1]) {
64     case ',': case ':': case ';': case '`': case '\'':
65       return &tp_space_rep;
66     case '.': case '!': case '?':
67       return &tp_period_rep;
68     }
69     return &tp_space_rep;
70   }
71 
72   if (s[pos]=='-') {
73     pos++;
74     while ((pos<N(s)) && (s[pos]=='-')) pos++;
75     return &tp_hyph_rep;
76   }
77 
78   if (is_iso_alpha (s[pos])) {
79     while ((pos<N(s)) && is_iso_alpha (s[pos])) pos++;
80     return &tp_normal_rep;
81   }
82 
83   if (is_numeric (s[pos])) { // can not be a '.'
84     while ((pos<N(s)) && is_numeric (s[pos])) pos++;
85     while (s[pos-1]=='.') pos--;
86     return &tp_normal_rep;
87   }
88 
89   if (s[pos]=='<') {
90     while ((pos<N(s)) && (s[pos]!='>')) pos++;
91     if (pos<N(s)) pos++;
92     return &tp_normal_rep;
93   }
94 
95   pos++;
96   return &tp_normal_rep;
97 }
98 
99 array<int>
get_hyphens(string s)100 text_language_rep::get_hyphens (string s) {
101   return ::get_hyphens (s, patterns, hyphenations);
102 }
103 
104 void
hyphenate(string s,int after,string & left,string & right)105 text_language_rep::hyphenate (
106   string s, int after, string& left, string& right)
107 {
108   array<int> penalty= get_hyphens (s);
109   std_hyphenate (s, after, left, right, penalty[after]);
110 }
111 
112 /******************************************************************************
113 * French typography
114 ******************************************************************************/
115 
116 struct french_language_rep: language_rep {
117   hashmap<string,string> patterns;
118   hashmap<string,string> hyphenations;
119 
120   french_language_rep (string lan_name, string hyph_name);
121   text_property advance (tree t, int& pos);
122   array<int> get_hyphens (string s);
123   void hyphenate (string s, int after, string& left, string& right);
124 };
125 
french_language_rep(string lan_name,string hyph_name)126 french_language_rep::french_language_rep (string lan_name, string hyph_name):
127   language_rep (lan_name), patterns ("?"), hyphenations ("?") {
128     load_hyphen_tables (hyph_name, patterns, hyphenations, true); }
129 
130 inline bool
is_french_punctuation(register char c)131 is_french_punctuation (register char c) {
132   return is_punctuation (c) || (c=='\23') || (c=='\24');
133 }
134 
135 text_property
advance(tree t,int & pos)136 french_language_rep::advance (tree t, int& pos) {
137   string s= t->label;
138   if (pos == N(s)) return &tp_normal_rep;
139 
140   if (s[pos]==' ') {
141     pos++;
142     if (pos>1 && s[pos-2] == '\23')
143       return &tp_nb_thin_space_rep;
144     // while ((pos<N(s)) && (s[pos]==' ')) pos++;
145     if ((pos == N(s)) || (!is_french_punctuation (s[pos])))
146       return &tp_space_rep;
147     if (s[pos] == '\23')
148       return &tp_space_rep;
149     if (s[pos] == ':' || s[pos] == ';' ||
150         s[pos] == '!' || s[pos] == '?' || s[pos] == '\24')
151       return &tp_nb_thin_space_rep;
152     return &tp_nb_space_rep;
153   }
154 
155   if (is_french_punctuation (s[pos])) {
156     while ((pos<N(s)) && is_french_punctuation (s[pos])) pos++;
157     if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
158     switch (s[pos-1]) {
159     case '\23':
160       return &tp_nb_thin_space_rep;
161     case '\24':
162     case ',': case ':': case ';': case '`': case '\'':
163       return &tp_space_rep;
164     case '.': case '!': case '?':
165       return &tp_period_rep;
166     }
167     return &tp_space_rep;
168   }
169 
170   if (s[pos]=='-') {
171     pos++;
172     while ((pos<N(s)) && (s[pos]=='-')) pos++;
173     return &tp_hyph_rep;
174   }
175 
176   if (is_iso_alpha (s[pos])) {
177     while ((pos<N(s)) && is_iso_alpha (s[pos])) pos++;
178     return &tp_normal_rep;
179   }
180 
181   if (is_numeric (s[pos])) { // can not be a '.'
182     while ((pos<N(s)) && is_numeric (s[pos])) pos++;
183     while (s[pos-1]=='.') pos--;
184     return &tp_normal_rep;
185   }
186 
187   if (s[pos]=='<') {
188     while ((pos<N(s)) && (s[pos]!='>')) pos++;
189     if (pos<N(s)) pos++;
190     return &tp_normal_rep;
191   }
192 
193   pos++;
194   return &tp_normal_rep;
195 }
196 
197 array<int>
get_hyphens(string s)198 french_language_rep::get_hyphens (string s) {
199   return ::get_hyphens (s, patterns, hyphenations);
200 }
201 
202 void
hyphenate(string s,int after,string & left,string & right)203 french_language_rep::hyphenate (
204   string s, int after, string& left, string& right)
205 {
206   array<int> penalty= get_hyphens (s);
207   std_hyphenate (s, after, left, right, penalty[after]);
208 }
209 
210 /******************************************************************************
211 * Eastern text languages / using UCS entities
212 ******************************************************************************/
213 
214 struct ucs_text_language_rep: language_rep {
215   hashmap<string,string> patterns;
216   hashmap<string,string> hyphenations;
217 
218   ucs_text_language_rep (string lan_name, string hyph_name);
219   text_property advance (tree t, int& pos);
220   array<int> get_hyphens (string s);
221   void hyphenate (string s, int after, string& left, string& right);
222   bool unicode;
223 };
224 
ucs_text_language_rep(string lan_name,string hyph_name)225 ucs_text_language_rep::ucs_text_language_rep (string lan_name, string hyph_name):
226   language_rep (lan_name), patterns ("?"), hyphenations ("?")
227   { load_hyphen_tables (hyph_name, patterns, hyphenations, false); }
228 
229 text_property
advance(tree t,int & pos)230 ucs_text_language_rep::advance (tree t, int& pos) {
231   //TODO: replace methods is_punctuation (), is_iso_alpha () and is_numeric (),
232   //      by equivalents taking into account unicode entities.
233   string s= t->label;
234   if (pos == N(s)) return &tp_normal_rep;
235 
236   if (s[pos]==' ') {
237     pos++;
238     // while ((pos<N(s)) && (s[pos]==' ')) pos++;
239     if ((pos == N(s)) || (!is_punctuation (s[pos])))
240       return &tp_space_rep;
241     return &tp_nb_space_rep;
242   }
243 
244   if (is_punctuation (s[pos])) {
245     while ((pos<N(s)) && is_punctuation (s[pos])) pos++;
246     if ((pos==N(s)) || (s[pos]!=' ')) return &tp_normal_rep;
247     switch (s[pos-1]) {
248     case ',': case ':': case ';': case '`': case '\'':
249       return &tp_space_rep;
250     case '.': case '!': case '?':
251       return &tp_period_rep;
252     }
253     return &tp_space_rep;
254   }
255 
256   if (s[pos]=='-') {
257     pos++;
258     while ((pos<N(s)) && (s[pos]=='-')) pos++;
259     return &tp_hyph_rep;
260   }
261 
262   if (is_iso_alpha (s[pos]) || (s[pos]=='<')) {
263     while ((pos<N(s)) && (is_iso_alpha (s[pos]) || (s[pos]=='<'))) {
264       if (s[pos]=='<') {
265         while ((pos<N(s)) && (s[pos]!='>')) pos++;
266         if (pos<N(s)) pos++;
267       }
268       else
269         pos++;
270     }
271     return &tp_normal_rep;
272   }
273 
274   if (is_numeric (s[pos])) { // can not be a '.'
275     while ((pos<N(s)) && is_numeric (s[pos])) pos++;
276     while (s[pos-1]=='.') pos--;
277     return &tp_normal_rep;
278   }
279 
280   pos++;
281   return &tp_normal_rep;
282 }
283 
284 array<int>
get_hyphens(string s)285 ucs_text_language_rep::get_hyphens (string s) {
286   return ::get_hyphens (s, patterns, hyphenations, true);
287 }
288 
289 void
hyphenate(string s,int after,string & left,string & right)290 ucs_text_language_rep::hyphenate (
291   string s, int after, string& left, string& right)
292 {
293   array<int> penalty= get_hyphens (s);
294   std_hyphenate (s, after, left, right, penalty[after], true);
295 }
296 
297 /******************************************************************************
298 * Oriental languages
299 ******************************************************************************/
300 
301 struct oriental_language_rep: language_rep {
302   hashmap<string,bool> punct;
303   oriental_language_rep (string lan_name);
304   text_property advance (tree t, int& pos);
305   array<int> get_hyphens (string s);
306   void hyphenate (string s, int after, string& left, string& right);
307 };
308 
oriental_language_rep(string lan_name)309 oriental_language_rep::oriental_language_rep (string lan_name):
310   language_rep (lan_name), punct (false)
311 {
312   punct (".")= true;
313   punct (",")= true;
314   punct (":")= true;
315   punct (";")= true;
316   punct ("!")= true;
317   punct ("?")= true;
318   punct ("<#3000>")= true;
319   punct ("<#3001>")= true;
320   punct ("<#3002>")= true;
321   punct ("<#3003>")= true;
322   punct ("<#3004>")= true;
323   punct ("<#3005>")= true;
324   punct ("<#3006>")= true;
325   punct ("<#3007>")= true;
326   punct ("<#3008>")= true;
327   punct ("<#3009>")= true;
328   punct ("<#300a>")= true;
329   punct ("<#300b>")= true;
330   punct ("<#300c>")= true;
331   punct ("<#300d>")= true;
332   punct ("<#300e>")= true;
333   punct ("<#300f>")= true;
334   punct ("<#300A>")= true;
335   punct ("<#300B>")= true;
336   punct ("<#300C>")= true;
337   punct ("<#300D>")= true;
338   punct ("<#300E>")= true;
339   punct ("<#300F>")= true;
340   punct ("<#ff01>")= true;
341   punct ("<#ff0c>")= true;
342   punct ("<#ff0e>")= true;
343   punct ("<#ff1a>")= true;
344   punct ("<#ff1b>")= true;
345   punct ("<#ff1f>")= true;
346   punct ("<#FF01>")= true;
347   punct ("<#FF0C>")= true;
348   punct ("<#FF0E>")= true;
349   punct ("<#FF1A>")= true;
350   punct ("<#FF1B>")= true;
351   punct ("<#FF1F>")= true;
352 }
353 
354 text_property
advance(tree t,int & pos)355 oriental_language_rep::advance (tree t, int& pos) {
356   string s= t->label;
357   if (pos == N(s)) return &tp_normal_rep;
358 
359   if (s[pos] == ' ') {
360     pos++;
361     return &tp_space_rep;
362   }
363 
364   if (pos < N(s) && !test (s, pos, "<#")) {
365     while (pos < N(s) && s[pos] != ' ' && !test (s, pos, "<#"))
366       tm_char_forwards (s, pos);
367     return &tp_cjk_no_break_rep;
368   }
369 
370   int start= pos;
371   tm_char_forwards (s, pos);
372   string c= s (start, pos);
373   int next= pos;
374   tm_char_forwards (s, next);
375   string x= s (pos, next);
376 
377   if (punct->contains (c)) {
378     if (punct->contains (x) || pos == N(s))
379       return &tp_cjk_no_break_period_rep;
380     else return &tp_cjk_period_rep;
381   }
382   else {
383     if (punct->contains (x) || pos == N(s))
384       return &tp_cjk_no_break_rep;
385     else return &tp_cjk_normal_rep;
386   }
387 }
388 
389 array<int>
get_hyphens(string s)390 oriental_language_rep::get_hyphens (string s) {
391   int i;
392   array<int> penalty (N(s)+1);
393   for (i=0; i<N(penalty); i++) penalty[i]= HYPH_INVALID;
394   return penalty;
395 }
396 
397 void
hyphenate(string s,int after,string & left,string & right)398 oriental_language_rep::hyphenate (
399   string s, int after, string& left, string& right)
400 {
401   left = s (0, after+1);
402   right= s (after+1, N(s));
403 }
404 
405 /******************************************************************************
406 * Locales
407 ******************************************************************************/
408 
409 string
windows_locale_to_language(string s)410 windows_locale_to_language (string s) {
411   if (s == "Bulgarian_Bulgaria.1251") return "bulgarian";
412   if (s == "Chinese_People's Republic of China.936")
413     return "chinese"; // for windows xp
414   if (s == "Chinese (Simplified)_People's Republic of China.936")
415     return "chinese"; // for windows 7
416   if (s == "Chinese_Taiwan.950")
417     return "taiwanese"; // for windows xp
418   if (s == "Chinese (Traditional)_Taiwan.950")
419     return "taiwanese"; // for windows 7
420   if (s == "Croatian_Croatia.1250") return "croatian";
421   if (s == "Czech_Czech Republic.1250") return "czech";
422   if (s == "Danish_Denmark.1252") return "danish";
423   if (s == "Dutch_Netherlands.1252") return "dutch";
424   if (s == "English_United States.1252") return "english";
425   if (s == "English_United Kingdom.1252") return "british";
426   if (s == "Finnish_Finland.1252") return "finnish";
427   if (s == "French_France.1252") return "french";
428   if (s == "German_Germany.1252") return "german";
429   if (s == "Greek_Greece.1253") return "greek";
430   if (s == "Hungarian_Hungary.1250") return "hungarian";
431   if (s == "Italian_Italy.1252") return "italian";
432   if (s == "Japanese_Japan.932") return "japanese";
433   if (s == "Korean_Korea.949") return "korean";
434   if (s == "Polish_Poland.1250") return "polish";
435   if (s == "Portuguese_Portugal.1252") return "portuguese";
436   if (s == "Romanian_Romania.1250") return "romanian";
437   if (s == "Russian_Russia.1251") return "russian";
438   if (s == "Slovenian_Slovenia.1250") return "slovene";
439   if (s == "Spanish_Spain.1252") return "spanish";
440   if (s == "Swedish_Sweden.1252") return "swedish";
441   if (s == "Ukrainian_Ukraine.1251") return "ukrainian";
442   return "english";
443 }
444 
445 string
locale_to_language(string s)446 locale_to_language (string s) {
447   if (N(s) > 5) s= s (0, 5);
448   if (s == "en_GB") return "british";
449   if (s == "zh_TW") return "taiwanese";
450   if (N(s) > 2) s= s (0, 2);
451   if (s == "bg") return "bulgarian";
452   if (s == "zh") return "chinese";
453   if (s == "hr") return "croatian";
454   if (s == "cs") return "czech";
455   if (s == "da") return "danish";
456   if (s == "nl") return "dutch";
457   if (s == "en") return "english";
458   if (s == "fi") return "finnish";
459   if (s == "fr") return "french";
460   if (s == "de") return "german";
461   if (s == "gr") return "greek";
462   if (s == "hu") return "hungarian";
463   if (s == "it") return "italian";
464   if (s == "ja") return "japanese";
465   if (s == "ko") return "korean";
466   if (s == "pl") return "polish";
467   if (s == "pt") return "portuguese";
468   if (s == "ro") return "romanian";
469   if (s == "ru") return "russian";
470   if (s == "sl") return "slovene";
471   if (s == "es") return "spanish";
472   if (s == "sv") return "swedish";
473   if (s == "uk") return "ukrainian";
474   return "english";
475 }
476 
477 string
language_to_locale(string s)478 language_to_locale (string s) {
479   if (s == "american")   return "en_US";
480   if (s == "british")    return "en_GB";
481   if (s == "bulgarian")  return "bg_BG";
482   if (s == "chinese")    return "zh_CN";
483   if (s == "croatian")   return "hr_HR";
484   if (s == "czech")      return "cs_CZ";
485   if (s == "danish")     return "da_DK";
486   if (s == "dutch")      return "nl_NL";
487   if (s == "english")    return "en_US";
488   if (s == "finnish")    return "fi_FI";
489   if (s == "french")     return "fr_FR";
490   if (s == "german")     return "de_DE";
491   if (s == "greek")      return "gr_GR";
492   if (s == "hungarian")  return "hu_HU";
493   if (s == "italian")    return "it_IT";
494   if (s == "japanese")   return "ja_JP";
495   if (s == "korean")     return "ko_KR";
496   if (s == "polish")     return "pl_PL";
497   if (s == "portuguese") return "pt_PT";
498   if (s == "romanian")   return "ro_RO";
499   if (s == "russian")    return "ru_RU";
500   if (s == "slovene")    return "sl_SI";
501   if (s == "spanish")    return "es_ES";
502   if (s == "swedish")    return "sv_SV";
503   if (s == "taiwanese")  return "zh_TW";
504   if (s == "ukrainian")  return "uk_UA";
505   return "en_US";
506 }
507 
508 string
language_to_local_ISO_charset(string s)509 language_to_local_ISO_charset (string s) {
510   if (s == "bulgarian")  return "ISO-8859-5";
511   if (s == "chinese")    return "";
512   if (s == "croatian")   return "ISO-8859-2";
513   if (s == "czech")      return "ISO-8859-2";
514   if (s == "greek")      return "ISO-8859-7";
515   if (s == "hungarian")  return "ISO-8859-2";
516   if (s == "japanese")   return "";
517   if (s == "korean")     return "";
518   if (s == "polish")     return "ISO-8859-2";
519   if (s == "romanian")   return "ISO-8859-2";
520   if (s == "russian")    return "ISO-8859-5";
521   if (s == "slovene")    return "ISO-8859-2";
522   if (s == "taiwanese")  return "";
523   if (s == "ukrainian")  return "ISO-8859-5";
524   return "ISO-8859-1";
525 }
526 
527 string
get_locale_language()528 get_locale_language () {
529 #if defined(_WIN32) || defined(__WIN32__)
530   return windows_locale_to_language (setlocale (LC_ALL, ""));
531 #else
532   string env_lan= get_env ("LC_ALL");
533   if (env_lan != "") return locale_to_language (env_lan);
534   env_lan= get_env ("LC_MESSAGES");
535   if (env_lan != "") return locale_to_language (env_lan);
536   env_lan= get_env ("LANG");
537   if (env_lan != "") return locale_to_language (env_lan);
538   env_lan= get_env ("GDM_LANG");
539   if (env_lan != "") return locale_to_language (env_lan);
540   return "english";
541 #endif
542 }
543 
544 string
get_locale_charset()545 get_locale_charset () {
546 #if defined(__MINGW__) || defined(__MINGW32__)
547   return ("UTF-8");
548 #else
549   return nl_langinfo (CODESET);
550 #endif
551 }
552 
553 /******************************************************************************
554 * Getting a formatted date
555 ******************************************************************************/
556 
557 #ifdef QTTEXMACS
558 string
get_date(string lan,string fm)559 get_date (string lan, string fm) {
560   return qt_get_date(lan, fm);
561 }
562 
563 string
pretty_time(int t)564 pretty_time (int t) {
565   return qt_pretty_time (t);
566 }
567 #else
568 
569 static bool
invalid_format(string s)570 invalid_format (string s) {
571   if (N(s) == 0) return true;
572   for (int i=0; i<N(s); i++)
573     if (!(is_alpha (s[i]) || is_numeric (s[i]) ||
574 	  s[i] == ' ' || s[i] == '%' || s[i] == '.' || s[i] == ',' ||
575 	  s[i] == '+' || s[i] == '-' || s[i] == ':'))
576       return true;
577   return false;
578 }
579 
580 static string
simplify_date(string s)581 simplify_date (string s) {
582   int i, n=N(s);
583   string r;
584   for (i=0; i<n; i++)
585     if ((s[i]!='0') || ((N(r)>0) && is_digit(r[N(r)-1]))) r << s[i];
586   return r;
587 }
588 
589 string
get_date(string lan,string fm)590 get_date (string lan, string fm) {
591 //#if defined(__MINGW__) || defined(__MINGW32__) || defined(OS_WIN32)
592 //  return win32::get_date(lan, fm);
593   if (invalid_format (fm)) {
594     if ((lan == "british") || (lan == "english") || (lan == "american"))
595       fm= "%B %d, %Y";
596     else if (lan == "german")
597       fm= "%d. %B %Y";
598     else if (lan == "chinese" || lan == "japanese" ||
599 	     lan == "korean" || lan == "taiwanese")
600       {
601 	string y= simplify_date (var_eval_system ("date +\"%Y\""));
602 	string m= simplify_date (var_eval_system ("date +\"%m\""));
603 	string d= simplify_date (var_eval_system ("date +\"%d\""));
604 	if (lan == "japanese")
605 	  return y * "<#5e74>" * m * "<#6708>" * d * "<#65e5>";
606 	if (lan == "korean")
607 	  return y * "<#b144> " * m * "<#c6d4> " * d * "<#c77c>";
608 	return y * "," * m * "," * d;
609       }
610     else fm= "%d %B %Y";
611   }
612   lan= language_to_locale (lan);
613   string lvar= "LC_TIME";
614   if (get_env (lvar) == "") lvar= "LC_ALL";
615   if (get_env (lvar) == "") lvar= "LANG";
616   string old= get_env (lvar);
617   set_env (lvar, lan);
618   string date= simplify_date (var_eval_system ("date +\"" * fm * "\""));
619   if ((lan == "cz_CZ") || (lan == "hu_HU") || (lan == "pl_PL"))
620     date= il2_to_cork (date);
621   // if (lan == "ru_RU") date= iso_to_koi8 (date);
622   set_env (lvar, old);
623   return date;
624 }
625 
626 string
pretty_time(int t)627 pretty_time (int t) {
628   return var_eval_system ("date -r " * as_string (t));
629 }
630 #endif
631 
632 /******************************************************************************
633 * Main interface
634 ******************************************************************************/
635 
636 typedef const char* const_char_ptr;
637 
638 static language
make_ucs_text_language(string s,string h)639 make_ucs_text_language (string s, string h) {
640   return tm_new<ucs_text_language_rep> (s, h);
641 }
642 
643 static language
make_text_language(string s,string h)644 make_text_language (string s, string h) {
645   return tm_new<text_language_rep> (s, h);
646 }
647 
648 static language
make_french_language(string s,string h)649 make_french_language (string s, string h) {
650   return tm_new<french_language_rep> (s, h);
651 }
652 
653 static language
make_oriental_language(string s)654 make_oriental_language (string s) {
655   return tm_new<oriental_language_rep> (s);
656 }
657 
658 language
text_language(string s)659 text_language (string s) {
660   if (language::instances -> contains (s)) return language (s);
661   if (s == "american")   return make_text_language (s, "us");
662   if (s == "british")    return make_text_language (s, "ukenglish");
663   if (s == "bulgarian")  return make_ucs_text_language (s, "bulgarian");
664   if (s == "chinese")    return make_oriental_language (s);
665   if (s == "croatian")   return make_text_language (s, "croatian");
666   if (s == "czech")      return make_text_language (s, "czech");
667   if (s == "danish")     return make_text_language (s, "danish");
668   if (s == "dutch")      return make_text_language (s, "dutch");
669   if (s == "english")    return make_text_language (s, "us");
670   if (s == "finnish")    return make_text_language (s, "finnish");
671   if (s == "french")     return make_french_language (s, "french");
672   if (s == "german")     return make_text_language (s, "german");
673   if (s == "greek")      return make_text_language (s, "greek");
674   if (s == "hungarian")  return make_text_language (s, "hungarian");
675   if (s == "italian")    return make_text_language (s, "italian");
676   if (s == "japanese")   return make_oriental_language (s);
677   if (s == "korean")     return make_oriental_language (s);
678   if (s == "polish")     return make_text_language (s, "polish");
679   if (s == "portuguese") return make_text_language (s, "portuguese");
680   if (s == "romanian")   return make_text_language (s, "romanian");
681   if (s == "russian")    return make_ucs_text_language (s, "russian");
682   if (s == "slovene")    return make_text_language (s, "slovene");
683   if (s == "spanish")    return make_text_language (s, "spanish");
684   if (s == "swedish")    return make_text_language (s, "swedish");
685   if (s == "taiwanese")  return make_oriental_language (s);
686   if (s == "ukrainian")  return make_ucs_text_language (s, "ukrainian");
687   if (s == "verbatim")   return tm_new<verb_language_rep> ("verbatim");
688   failed_error << "The language was " << s << "\n";
689   FAILED ("unknown language");
690   return tm_new<verb_language_rep> ("verbatim");
691 }
692