1 
2 /******************************************************************************
3 * MODULE     : dictionary.cpp
4 * DESCRIPTION: used for translations and analyzing text
5 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11 
12 #include "dictionary.hpp"
13 #include "file.hpp"
14 #include "convert.hpp"
15 #include "converter.hpp"
16 #include "universal.hpp"
17 #include "drd_std.hpp"
18 #include "scheme.hpp"
19 
20 RESOURCE_CODE(dictionary);
21 
22 /******************************************************************************
23 * Dictionary initialization
24 ******************************************************************************/
25 
dictionary_rep(string from2,string to2)26 dictionary_rep::dictionary_rep (string from2, string to2):
27   rep<dictionary> (from2 * "-" * to2), table ("?"), from (from2), to (to2) {}
28 
29 void
load(url u)30 dictionary_rep::load (url u) {
31   if (is_none (u)) return;
32   if (is_or (u)) {
33     load (u[1]);
34     load (u[2]);
35     return;
36   }
37 
38   string s;
39   if (load_string (u, s, false)) return;
40   tree t= block_to_scheme_tree (s);
41   if (!is_tuple (t)) return;
42 
43   int i, n= N(t);
44   for (i=0; i<n; i++)
45     if (is_func (t[i], TUPLE, 2) &&
46         is_atomic (t[i][0]) && is_atomic (t[i][1]))
47     {
48       string l= t[i][0]->label; if (is_quoted (l)) l= scm_unquote (l);
49       string r= t[i][1]->label; if (is_quoted (r)) r= scm_unquote (r);
50       if (to == "chinese" ||  to == "japanese"  ||
51           to == "korean"  ||  to == "taiwanese" ||
52           to == "russian" ||  to == "ukrainian" || to == "bulgarian" ||
53           to == "german" || to == "greek")
54         r= utf8_to_cork (r);
55       table (l)= r;
56     }
57 }
58 
59 void
load(string fname)60 dictionary_rep::load (string fname) {
61   fname= fname * ".scm";
62   if (DEBUG_VERBOSE) debug_convert << "Loading " << fname << "\n";
63   url u= url ("$TEXMACS_DIC_PATH") * url_wildcard ("*" * fname);
64   load (expand (complete (u)));
65 }
66 
67 dictionary
load_dictionary(string from,string to)68 load_dictionary (string from, string to) {
69   string name= from * "-" * to;
70   if (dictionary::instances -> contains (name))
71     return dictionary (name);
72   dictionary dict= tm_new<dictionary_rep> (from, to);
73   if (from != to) dict->load (name);
74   return dict;
75 }
76 
77 /******************************************************************************
78 * Translation routines
79 ******************************************************************************/
80 
81 string
translate(string s,bool guess)82 dictionary_rep::translate (string s, bool guess) {
83   if (s == "" || from == to) return s;
84   //cout << "Translate <" << s << ">\n";
85   // Is s in dictionary?
86   if (table->contains (s) && table[s] != "")
87     return table[s];
88 
89   // Is lowercase version of s in dictionary?
90   string ls= locase_first (s);
91   if (table->contains (ls) && table[ls] != "")
92     return uni_upcase_first (table[ls]);
93 
94   // Attempt to split the string and translate its parts?
95   if (!guess) return s;
96 
97   // Remove trailing non iso_alpha characters
98   int i, n= N(s);
99   for (i=0; i<n; i++)
100     if (is_iso_alpha (s[i]))
101       break;
102   int start= i;
103   for (i=n; i>0; i--)
104     if (is_iso_alpha (s[i-1]))
105       break;
106   int end= i;
107   if (start >= n || end <= 0) return s;
108   if (start != 0 || end != n) {
109     ASSERT (start < end, "invalid situation");
110     string s1= translate (s (0, start));
111     string s2= translate (s (start, end));
112     string s3= translate (s (end, n));
113     if (to == "french") {
114       if (s3 == ":") s3= " :";
115       if (s3 == "!") s3= " !";
116       if (s3 == "?") s3= " ?";
117     }
118     return s1 * s2 * s3;
119   }
120 
121   // Break at last non iso_alpha character which is not a space
122   for (i=n; i>0; i--)
123     if (!is_iso_alpha (s[i-1]) && s[i-1] != ' ')
124       break;
125   if (i > 0) {
126     string s1= translate (s (0, i));
127     string s2= translate (s (i, n));
128     return s1 * s2;
129   }
130 
131   // No translation available
132   return s;
133 }
134 
135 /******************************************************************************
136 * Interface
137 ******************************************************************************/
138 
139 static string in_lan ("english");
140 static string out_lan ("english");
141 
set_input_language(string s)142 void set_input_language (string s) { in_lan= s; }
get_input_language()143 string get_input_language () { return in_lan; }
set_output_language(string s)144 void set_output_language (string s) { out_lan= s; }
get_output_language()145 string get_output_language () { return out_lan; }
146 
147 string
translate(string s,string from,string to)148 translate (string s, string from, string to) {
149   if (N(from)==0) return s;
150   dictionary dict= load_dictionary (from, to);
151   return dict->translate (s);
152 }
153 
154 string
translate(string s)155 translate (string s) {
156   return translate (s, "english", out_lan);
157 }
158 
159 string
translate(const char * s)160 translate (const char* s) {
161   return translate (string (s), "english", out_lan);
162 }
163 
164 void
force_load_dictionary(string from,string to)165 force_load_dictionary (string from, string to) {
166   string name= from * "-" * to;
167   if (dictionary::instances -> contains (name))
168     dictionary::instances -> reset (name);
169   load_dictionary (from, to);
170   notify_preference ("language");
171 }
172 
173 string
translate_as_is(string s,string from,string to)174 translate_as_is (string s, string from, string to) {
175   dictionary dict= load_dictionary (from, to);
176   return dict->translate (s, false);
177 }
178 
179 string
translate_as_is(string s)180 translate_as_is (string s) {
181   return translate_as_is (s, "english", out_lan);
182 }
183 
184 /******************************************************************************
185 * Translation of trees
186 ******************************************************************************/
187 
188 tree
translate_replace(tree t,string from,string to,int n=1)189 translate_replace (tree t, string from, string to, int n=1)
190 {
191   if (N(t) < 2) return t[0];
192 
193   string s= t[0]->label;
194   string arg= "%" * as_string (n);
195 
196   if (is_atomic (t[1])) {
197     s= replace (s, arg, translate (t[1]->label, from, to));
198     return translate_replace (concat (s) * t(2, N(t)), from, to, n+1);
199   }
200   else {
201     int l= search_forwards (arg, s);
202     if (l < 0) return t;
203     int r= l + N(arg);
204     tree r1= tree_translate (t[1], from, to);
205     tree r2= translate_replace (tuple (s (r, N(s))) * t(2, N(t)), from, to, n+1);
206     s= s(0, l);
207     if (is_atomic (r1)) {
208       if (is_atomic (r2)) return s * r1->label * r2->label;
209       else                return concat (s * r1->label, r2);
210     }
211     return concat (s, r1, r2);
212   }
213 }
214 
215 tree
tree_translate(tree t,string from,string to)216 tree_translate (tree t, string from, string to) {
217   //cout << "Translating " << t << " from " << from << " into " << to << "\n";
218   if (is_atomic (t))
219     return translate (t->label, from, to);
220   else if (is_compound (t, "replace")) {
221     if (!is_atomic (t[0])) {
222         //cout << "tree_translate() ERROR: first child should be a string\n";
223       return t;
224     }
225     t[0]->label= translate_as_is (t[0]->label, from, to);
226     return translate_replace (t, from, to);
227   }
228   else if (is_compound (t, "verbatim", 1))
229     return t[0];
230   else if (is_compound (t, "localize", 1))
231     return tree_translate (t[0], "english", out_lan);
232   else if (is_compound (t, "render-key", 1))
233     return compound ("render-key", tree_translate (t[0], from, to));
234   else {
235     tree r (t, N(t));
236     for (int i=0; i<N(t); i++)
237       if (!the_drd->is_accessible_child (t, i)) r[i]= t[i];
238       else r[i]= tree_translate (t[i], from, to);
239     return r;
240   }
241 }
242 
243 tree
tree_translate(tree t)244 tree_translate (tree t) {
245   return tree_translate (t, "english", out_lan);
246 }
247 
248 /******************************************************************************
249 * Translate and serialize
250 ******************************************************************************/
251 
252 static string
serialize(tree t)253 serialize (tree t) {
254   if (is_atomic (t))
255     return t->label;
256   else if (is_concat (t)) {
257     string s;
258     for (int i=0; i<N(t); i++) {
259       tree u= t[i];
260       while (is_concat (u) && N(u) > 0) u= u[0];
261       if (i > 0 && is_compound (u, "render-key"))
262 	if (!is_atomic (t[i-1]) || !ends (t[i-1]->label, " ")) {
263 	  if (use_macos_fonts () || gui_is_qt ()) s << "  ";
264 	  else s << " ";
265 	}
266       s << serialize (t[i]);
267     }
268     return s;
269   }
270   else if (is_compound (t, "render-key", 1))
271     return serialize (t[0]);
272   else if (is_func (t, WITH))
273     return serialize (t[N(t)-1]);
274   else if (is_compound (t, "math", 1))
275     return serialize (t[0]);
276   else if (is_compound (t, "op", 1)) {
277     t= t[0];
278     if (gui_is_qt ()) {
279       if (t == "<leftarrow>") return "Left";
280       if (t == "<rightarrow>") return "Right";
281       if (t == "<uparrow>") return "Up";
282       if (t == "<downarrow>") return "Down";
283     }
284     else {
285       if (t == "<leftarrow>") return "left";
286       if (t == "<rightarrow>") return "right";
287       if (t == "<uparrow>") return "up";
288       if (t == "<downarrow>") return "down";
289     }
290     return serialize (t);
291   }
292   else return "";
293 }
294 
295 string
translate(tree t,string from,string to)296 translate (tree t, string from, string to) {
297   return serialize (tree_translate (t, from, to));
298 }
299 
300 string
translate(tree t)301 translate (tree t) {
302   return serialize (tree_translate (t));
303 }
304