1
2 /******************************************************************************
3 * MODULE : dictionary.cpp
4 * DESCRIPTION: used for translations and analyzing text
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
11
12 #include "dictionary.hpp"
13 #include "file.hpp"
14 #include "convert.hpp"
15 #include "converter.hpp"
16 #include "universal.hpp"
17 #include "drd_std.hpp"
18 #include "scheme.hpp"
19
20 RESOURCE_CODE(dictionary);
21
22 /******************************************************************************
23 * Dictionary initialization
24 ******************************************************************************/
25
dictionary_rep(string from2,string to2)26 dictionary_rep::dictionary_rep (string from2, string to2):
27 rep<dictionary> (from2 * "-" * to2), table ("?"), from (from2), to (to2) {}
28
29 void
load(url u)30 dictionary_rep::load (url u) {
31 if (is_none (u)) return;
32 if (is_or (u)) {
33 load (u[1]);
34 load (u[2]);
35 return;
36 }
37
38 string s;
39 if (load_string (u, s, false)) return;
40 tree t= block_to_scheme_tree (s);
41 if (!is_tuple (t)) return;
42
43 int i, n= N(t);
44 for (i=0; i<n; i++)
45 if (is_func (t[i], TUPLE, 2) &&
46 is_atomic (t[i][0]) && is_atomic (t[i][1]))
47 {
48 string l= t[i][0]->label; if (is_quoted (l)) l= scm_unquote (l);
49 string r= t[i][1]->label; if (is_quoted (r)) r= scm_unquote (r);
50 if (to == "chinese" || to == "japanese" ||
51 to == "korean" || to == "taiwanese" ||
52 to == "russian" || to == "ukrainian" || to == "bulgarian" ||
53 to == "german" || to == "greek")
54 r= utf8_to_cork (r);
55 table (l)= r;
56 }
57 }
58
59 void
load(string fname)60 dictionary_rep::load (string fname) {
61 fname= fname * ".scm";
62 if (DEBUG_VERBOSE) debug_convert << "Loading " << fname << "\n";
63 url u= url ("$TEXMACS_DIC_PATH") * url_wildcard ("*" * fname);
64 load (expand (complete (u)));
65 }
66
67 dictionary
load_dictionary(string from,string to)68 load_dictionary (string from, string to) {
69 string name= from * "-" * to;
70 if (dictionary::instances -> contains (name))
71 return dictionary (name);
72 dictionary dict= tm_new<dictionary_rep> (from, to);
73 if (from != to) dict->load (name);
74 return dict;
75 }
76
77 /******************************************************************************
78 * Translation routines
79 ******************************************************************************/
80
81 string
translate(string s,bool guess)82 dictionary_rep::translate (string s, bool guess) {
83 if (s == "" || from == to) return s;
84 //cout << "Translate <" << s << ">\n";
85 // Is s in dictionary?
86 if (table->contains (s) && table[s] != "")
87 return table[s];
88
89 // Is lowercase version of s in dictionary?
90 string ls= locase_first (s);
91 if (table->contains (ls) && table[ls] != "")
92 return uni_upcase_first (table[ls]);
93
94 // Attempt to split the string and translate its parts?
95 if (!guess) return s;
96
97 // Remove trailing non iso_alpha characters
98 int i, n= N(s);
99 for (i=0; i<n; i++)
100 if (is_iso_alpha (s[i]))
101 break;
102 int start= i;
103 for (i=n; i>0; i--)
104 if (is_iso_alpha (s[i-1]))
105 break;
106 int end= i;
107 if (start >= n || end <= 0) return s;
108 if (start != 0 || end != n) {
109 ASSERT (start < end, "invalid situation");
110 string s1= translate (s (0, start));
111 string s2= translate (s (start, end));
112 string s3= translate (s (end, n));
113 if (to == "french") {
114 if (s3 == ":") s3= " :";
115 if (s3 == "!") s3= " !";
116 if (s3 == "?") s3= " ?";
117 }
118 return s1 * s2 * s3;
119 }
120
121 // Break at last non iso_alpha character which is not a space
122 for (i=n; i>0; i--)
123 if (!is_iso_alpha (s[i-1]) && s[i-1] != ' ')
124 break;
125 if (i > 0) {
126 string s1= translate (s (0, i));
127 string s2= translate (s (i, n));
128 return s1 * s2;
129 }
130
131 // No translation available
132 return s;
133 }
134
135 /******************************************************************************
136 * Interface
137 ******************************************************************************/
138
139 static string in_lan ("english");
140 static string out_lan ("english");
141
set_input_language(string s)142 void set_input_language (string s) { in_lan= s; }
get_input_language()143 string get_input_language () { return in_lan; }
set_output_language(string s)144 void set_output_language (string s) { out_lan= s; }
get_output_language()145 string get_output_language () { return out_lan; }
146
147 string
translate(string s,string from,string to)148 translate (string s, string from, string to) {
149 if (N(from)==0) return s;
150 dictionary dict= load_dictionary (from, to);
151 return dict->translate (s);
152 }
153
154 string
translate(string s)155 translate (string s) {
156 return translate (s, "english", out_lan);
157 }
158
159 string
translate(const char * s)160 translate (const char* s) {
161 return translate (string (s), "english", out_lan);
162 }
163
164 void
force_load_dictionary(string from,string to)165 force_load_dictionary (string from, string to) {
166 string name= from * "-" * to;
167 if (dictionary::instances -> contains (name))
168 dictionary::instances -> reset (name);
169 load_dictionary (from, to);
170 notify_preference ("language");
171 }
172
173 string
translate_as_is(string s,string from,string to)174 translate_as_is (string s, string from, string to) {
175 dictionary dict= load_dictionary (from, to);
176 return dict->translate (s, false);
177 }
178
179 string
translate_as_is(string s)180 translate_as_is (string s) {
181 return translate_as_is (s, "english", out_lan);
182 }
183
184 /******************************************************************************
185 * Translation of trees
186 ******************************************************************************/
187
188 tree
translate_replace(tree t,string from,string to,int n=1)189 translate_replace (tree t, string from, string to, int n=1)
190 {
191 if (N(t) < 2) return t[0];
192
193 string s= t[0]->label;
194 string arg= "%" * as_string (n);
195
196 if (is_atomic (t[1])) {
197 s= replace (s, arg, translate (t[1]->label, from, to));
198 return translate_replace (concat (s) * t(2, N(t)), from, to, n+1);
199 }
200 else {
201 int l= search_forwards (arg, s);
202 if (l < 0) return t;
203 int r= l + N(arg);
204 tree r1= tree_translate (t[1], from, to);
205 tree r2= translate_replace (tuple (s (r, N(s))) * t(2, N(t)), from, to, n+1);
206 s= s(0, l);
207 if (is_atomic (r1)) {
208 if (is_atomic (r2)) return s * r1->label * r2->label;
209 else return concat (s * r1->label, r2);
210 }
211 return concat (s, r1, r2);
212 }
213 }
214
215 tree
tree_translate(tree t,string from,string to)216 tree_translate (tree t, string from, string to) {
217 //cout << "Translating " << t << " from " << from << " into " << to << "\n";
218 if (is_atomic (t))
219 return translate (t->label, from, to);
220 else if (is_compound (t, "replace")) {
221 if (!is_atomic (t[0])) {
222 //cout << "tree_translate() ERROR: first child should be a string\n";
223 return t;
224 }
225 t[0]->label= translate_as_is (t[0]->label, from, to);
226 return translate_replace (t, from, to);
227 }
228 else if (is_compound (t, "verbatim", 1))
229 return t[0];
230 else if (is_compound (t, "localize", 1))
231 return tree_translate (t[0], "english", out_lan);
232 else if (is_compound (t, "render-key", 1))
233 return compound ("render-key", tree_translate (t[0], from, to));
234 else {
235 tree r (t, N(t));
236 for (int i=0; i<N(t); i++)
237 if (!the_drd->is_accessible_child (t, i)) r[i]= t[i];
238 else r[i]= tree_translate (t[i], from, to);
239 return r;
240 }
241 }
242
243 tree
tree_translate(tree t)244 tree_translate (tree t) {
245 return tree_translate (t, "english", out_lan);
246 }
247
248 /******************************************************************************
249 * Translate and serialize
250 ******************************************************************************/
251
252 static string
serialize(tree t)253 serialize (tree t) {
254 if (is_atomic (t))
255 return t->label;
256 else if (is_concat (t)) {
257 string s;
258 for (int i=0; i<N(t); i++) {
259 tree u= t[i];
260 while (is_concat (u) && N(u) > 0) u= u[0];
261 if (i > 0 && is_compound (u, "render-key"))
262 if (!is_atomic (t[i-1]) || !ends (t[i-1]->label, " ")) {
263 if (use_macos_fonts () || gui_is_qt ()) s << " ";
264 else s << " ";
265 }
266 s << serialize (t[i]);
267 }
268 return s;
269 }
270 else if (is_compound (t, "render-key", 1))
271 return serialize (t[0]);
272 else if (is_func (t, WITH))
273 return serialize (t[N(t)-1]);
274 else if (is_compound (t, "math", 1))
275 return serialize (t[0]);
276 else if (is_compound (t, "op", 1)) {
277 t= t[0];
278 if (gui_is_qt ()) {
279 if (t == "<leftarrow>") return "Left";
280 if (t == "<rightarrow>") return "Right";
281 if (t == "<uparrow>") return "Up";
282 if (t == "<downarrow>") return "Down";
283 }
284 else {
285 if (t == "<leftarrow>") return "left";
286 if (t == "<rightarrow>") return "right";
287 if (t == "<uparrow>") return "up";
288 if (t == "<downarrow>") return "down";
289 }
290 return serialize (t);
291 }
292 else return "";
293 }
294
295 string
translate(tree t,string from,string to)296 translate (tree t, string from, string to) {
297 return serialize (tree_translate (t, from, to));
298 }
299
300 string
translate(tree t)301 translate (tree t) {
302 return serialize (tree_translate (t));
303 }
304