1 #ifndef __VARIABLES_HPP__
2 #define __VARIABLES_HPP__
3
4 #include <ctype.h>
5 #include <vector>
6 #include <map>
7 #include <iostream>
8
9 using std::cout;
10 using std::cerr;
11 using std::endl;
12
13 #include "guiding.hpp"
14 #include "trie.hpp"
15 #include "matrix-ut.hpp"
16 #include "fast-sprintf.hpp"
17
18 #define MAX_VARIABLE_NAME 256
19
20 extern "C"
21 {
22 #include "api-structures.h" // for definition of Sentence
23 }
24
25 // #define SAT_DEBUG
26 // #define _VARS
27
28 #ifdef SAT_DEBUG
29 #define _VARS
30 #endif
31
32
33
34 #ifdef _VARS
35 extern std::ostream& var_defs_stream;
36 #endif
37
construct_link_label(const char * connector1,const char * connector2)38 static char* construct_link_label(const char* connector1, const char* connector2) {
39 char* result = (char*)xalloc((std::max(strlen(connector1), strlen(connector2)) + 1)*
40 sizeof(char));
41 char* presult = result;
42
43 /* Skip head-dependent indicator in each connector. */
44 if (islower(*connector1)) connector1++;
45 if (islower(*connector2)) connector2++;
46
47 while (*connector1 != '\0' && *connector2 != '\0') {
48 if (*connector1 == '*')
49 *presult++ = *connector2;
50 else if (*connector2 == '*')
51 *presult++ = *connector1;
52 else
53 *presult++ = std::max(*connector1, *connector2);
54
55 connector1++;
56 connector2++;
57 }
58 while(*connector1 != '\0')
59 *presult++ = *connector1++;
60 while(*connector2 != '\0')
61 *presult++ = *connector2++;
62 *presult = '\0';
63 return result;
64 }
65
66
67 ////////////////////////////////////////////////////////////////////////////
68 class Variables
69 {
70 public:
Variables(Sentence sent)71 Variables(Sentence sent)
72 : _link_variable_map(sent->length)
73 ,_linked_variable_map(sent->length, -1)
74 ,_link_cw_variable_map(sent->length)
75 ,_guiding(new CostDistanceGuiding(sent))
76 ,_var(0)
77 {
78 }
79
~Variables()80 ~Variables() {
81 std::vector<LinkVar*>::iterator i;
82 for (i = _link_variables.begin(); i != _link_variables.end(); i++) {
83 if ((*i) != 0) {
84 xfree((*i)->label, strlen((*i)->label));
85 delete *i;
86 }
87 }
88
89 for (size_t vi = 0; vi < _linked_variables.size(); vi++)
90 delete _linked_variables[vi];
91
92 delete _guiding;
93 }
94
95
96 /*
97 * General purpose variables specified by their names
98 */
99
var_exists(const char * name)100 bool var_exists(const char* name) {
101 try {
102 int num = _variable_trie.lookup(name);
103 return num != Trie<int>::NOT_FOUND;
104 } catch (const std::string& s) {
105 cout << s << endl;
106 exit(EXIT_FAILURE);
107 }
108 }
109
110 // If guiding params are unknown, they are set to default
string(const char * name)111 int string(const char* name)
112 {
113 int var;
114 if (!get_var_from_trie(name, var)) {
115 #ifdef _VARS
116 var_defs_stream << name << "\t" << var << endl;
117 #endif
118 _guiding->setStringParameters(var, name);
119 }
120 assert(var != -1, "Var == -1");
121 return var;
122 }
123
124 // If the cost is explicitly given, guiding params are calculated
125 // using the cost. Any params set earlier are overridden.
string_cost(const char * name,double cost)126 int string_cost(const char* name, double cost)
127 {
128 int var;
129 var = string(name);
130 _guiding->setStringParameters(var, name, cost);
131 assert(var != -1, "Var == -1");
132 return var;
133 }
134
135 /*
136 * Variables that specify that a part of word tag is satisfied
137 * without making any connections of the given direction.
138 */
139
140 // If guiding params are unknown, they are set to default
epsilon(char * v,char dir)141 int epsilon(char* v, char dir) {
142 char name[MAX_VARIABLE_NAME];
143 dir = (dir == '+') ? 'r' : 'l';
144 char* s = name;
145 *s++ = dir;
146 *s++ = 'e';
147 s = fast_sprintf(s, v);
148 int var;
149 if (!get_var_from_trie(name, var)) {
150 #ifdef _VARS
151 var_defs_stream << name << "\t" << var << endl;
152 #endif
153 _guiding->setEpsilonParameters(var);
154 }
155 assert(var != -1, "Var == -1");
156 return var;
157 }
158
159 /*
160 * linked(wi, wj)
161 * Variables that specify that two words are linked
162 */
163
164 // If guiding params are unknown, they are set to default
linked(int wi,int wj)165 int linked(int wi, int wj) {
166 assert(wi < wj, "Variables: linked should be ordered");
167 int var;
168 if (!get_linked_variable(wi, wj, var)) {
169 #ifdef _VARS
170 var_defs_stream << "linked_" << wi << "_" << wj << "\t" << var << endl;
171 #endif
172 add_linked_variable(wi, wj, var);
173 _guiding->setLinkedParameters(var, wi, wj);
174 }
175 assert(var != -1, "Var == -1");
176 return var;
177 }
178
179 /*
180 * link(wi, pi, wj, pj)
181 * Variables that specify that a direct link has been established
182 * between the connectors ci of the word_i at position i and
183 * cj of the word_j at position j
184 */
185
186 // If guiding params are unknown, they are set to default
link(int wi,int pi,const char * ci,Exp * ei,int wj,int pj,const char * cj,Exp * ej)187 int link(int wi, int pi, const char* ci, Exp* ei,
188 int wj, int pj, const char* cj, Exp* ej)
189 {
190 assert(wi < wj, "Variables: link should be ordered");
191 int var;
192 if (!get_link_variable(wi, pi, wj, pj, var)) {
193 #ifdef _VARS
194 var_defs_stream << "link_" << wi << "_" << pi << "_" << ci << "_"
195 << wj << "_" << pj << "_" << cj << "\t" << var << endl;
196 #endif
197 add_link_variable(wi, pi, ci, ei, wj, pj, cj, ej, var);
198 _guiding->setLinkParameters(var, wi, ci, wj, cj, _link_variables[var]->label);
199 }
200 assert(var != -1, "Var == -1");
201 return var;
202 }
203
204 // If the cost is specified, guiding params are calculated
205 // using the cost. Any guiding params that are set earlier are overridden
link_cost(int wi,int pi,const char * ci,Exp * ei,int wj,int pj,const char * cj,Exp * ej,double cost)206 int link_cost(int wi, int pi, const char* ci, Exp* ei,
207 int wj, int pj, const char* cj, Exp* ej,
208 double cost)
209 {
210 assert(wi < wj, "Variables: link should be ordered");
211 int var = link(wi, pi, ci, ei, wj, pj, cj, ej);
212 _guiding->setLinkParameters(var, wi, ci, wj, cj, link_variable(var)->label, cost);
213 assert(var != -1, "Var == -1");
214 return var;
215 }
216
217 /*
218 * link_cw(wi, wj, pj)
219 * Variables that specify that an indirect link has been established
220 * between the word i and connector cj of the word_j at position
221 * j.
222 */
223
224 // If guiding params for this variable are not set earlier, they are
225 // now set to default
link_cw(int wi,int wj,int pj,const char * cj)226 int link_cw(int wi, int wj, int pj, const char* cj) {
227 int var;
228 if (!get_link_cw_variable(wi, wj, pj, var)) {
229 #ifdef _VARS
230 var_defs_stream << "link_cw_"
231 << "(" << wj << "_" << pj << "_" << cj << ")_"
232 << wi
233 << "\t" << var << endl;
234 #endif
235 _guiding->setLinkCWParameters(var, wi, wj, cj);
236 }
237 assert(var != -1, "Var == -1");
238 return var;
239 }
240
241 #ifdef _CONNECTIVITY_
242 /* The following variables are deprecated */
243
244 // Variables that specify that words i and j are connected
con(int i,int j)245 int con(int i, int j) {
246 char name[MAX_VARIABLE_NAME];
247 char* s = name;
248 *s++ = 'c';
249 s = fast_sprintf(s, i);
250 *s++ = '_';
251 s = fast_sprintf(s, j);
252 int var;
253 if (!get_var_from_trie(name, var))
254 set_variable_sat_params(var, false);
255 return var;
256 }
257
258 // Auxiliary variables used for connectivity encoding
l_con(int i,int j,int k)259 int l_con(int i, int j, int k) {
260 int var;
261 if (!get_lcon_variable(i, j, k, var))
262 set_variable_sat_params(var, false);
263 return var;
264 }
265 #endif
266
267 /*
268 * link(wi, pi, wj, pj)
269 */
270 // Returns the indices of all link variables
link_variables() const271 const std::vector<int>& link_variables() const {
272 return _link_variables_indices;
273 }
274
275 // Returns the indices of all link_x_x_wj_pj variables
link_variables(int wj,int pj)276 const std::vector<int>& link_variables(int wj, int pj) {
277 std::pair<int, int> p(wj, pj);
278 return _link_variable_wp_map[p];
279 }
280
281 // Additional info about the link(wi, pi, wj, pj) variable
282 struct LinkVar
283 {
LinkVarVariables::LinkVar284 LinkVar(const std::string& _name, char* _label,
285 int _lw, int _lp, int _rw, int _rp,
286 const char* _lc, const char* _rc,
287 Exp* _le, Exp* _re)
288 : name(_name), label(_label),
289 left_word(_lw), right_word(_rw),
290 left_position(_lp), right_position(_rp),
291 left_connector(_lc), right_connector(_rc),
292 left_exp(_le), right_exp(_re)
293 {}
294
295 std::string name;
296 char* label;
297 int left_word;
298 int right_word;
299 int left_position;
300 int right_position;
301 const char* left_connector;
302 const char* right_connector;
303 Exp* left_exp;
304 Exp* right_exp;
305 };
306
307 // Returns additional info about the given link variable
link_variable(int var) const308 const LinkVar* link_variable(int var) const {
309 return _link_variables[var];
310 }
311
312 /*
313 * linked(wi, wj)
314 */
315 // Returns the indices of all linked variables
linked_variables() const316 const std::vector<int>& linked_variables() const {
317 return _linked_variables_indices;
318 }
319
320 // Additional info about the linked(i, j) variable
321 struct LinkedVar {
LinkedVarVariables::LinkedVar322 LinkedVar(int lw, int rw)
323 : left_word(lw), right_word(rw) {
324 }
325
326 int left_word;
327 int right_word;
328 };
329
330 // Returns additional info about the given linked variable
linked_variable(int var) const331 const LinkedVar* linked_variable(int var) const {
332 return _linked_variables[var];
333 }
334
335 /* Pass SAT search parameters to the MiniSAT solver */
setVariableParameters(Solver * solver)336 void setVariableParameters(Solver* solver) {
337 _guiding->passParametersToSolver(solver);
338 }
339
340 private:
341 /*
342 * Information about string variables
343 */
344
345 // What is the number of the variable with the given name?
346 Trie<int> _variable_trie;
347
348 /*
349 * Information about link(wi, pi, wj, pj) variables
350 */
351
352 // What is the number of the link(wi, pi, wj, pj) variable?
353 MatrixUpperTriangle< std::map<std::pair<int, int>, int> > _link_variable_map;
354
355
356 // What are the numbers of all link(wi, pi, wj, pj) variables?
357 std::vector<int> _link_variables_indices;
358
359 // What are the numbers of all link(x, x, wj, pj) variables?
360 std::map< std::pair<int, int>, std::vector<int> > _link_variable_wp_map;
361
362
363 // Additional info about the link(wi, pi, wj, pj) variable with the given number
364 std::vector<LinkVar*> _link_variables;
365
366 // Set this additional info
add_link_variable(int i,int pi,const char * ci,Exp * ei,int j,int pj,const char * cj,Exp * ej,size_t var)367 void add_link_variable(int i, int pi, const char* ci, Exp* ei,
368 int j, int pj, const char* cj, Exp* ej, size_t var)
369 {
370 /* The following variable is created but is never inserted to the trie,
371 and generating it has an observable performance impact.
372 The trie even doesn't have 'k'. */
373 #if 0
374 char name[MAX_VARIABLE_NAME];
375 char* s = name;
376 *s++ = 'l'; *s++ = 'i'; *s++ = 'n'; *s++ = 'k';
377 *s++ = '_';
378 s = fast_sprintf(s, i);
379 *s++ = '_';
380 s = fast_sprintf(s, pi);
381 *s++ = '_';
382 s = fast_sprintf(s, ci);
383 *s++ = '_';
384 s = fast_sprintf(s, j);
385 *s++ = '_';
386 s = fast_sprintf(s, pj);
387 *s++ = '_';
388 s = fast_sprintf(s, cj);
389 #endif
390 char* label = construct_link_label(ci, cj);
391
392 if (var >= _link_variables.size()) {
393 _link_variables.resize(var + 1, 0);
394 }
395 // The first argument was the redundant variable eliminated above
396 _link_variables[var] = new LinkVar("", label, i, pi, j, pj, ci, cj, ei, ej);
397 _link_variables_indices.push_back(var);
398 }
399
400 /*
401 * Information about linked(i, j) variables
402 */
403
404 // What is the number of the linked(i, j) variable?
405 MatrixUpperTriangle<int> _linked_variable_map;
406
407 // What are the numbers of all linked(i, j) variables?
408 std::vector<int> _linked_variables_indices;
409
410 // Additional info about the linked(i, j) variable
411 std::vector<LinkedVar*> _linked_variables;
412
413 // Set the additional info
add_linked_variable(int i,int j,size_t var)414 void add_linked_variable(int i, int j, size_t var) {
415 if (var >= _linked_variables.size()) {
416 _linked_variables.resize(var + 1, 0);
417 }
418 _linked_variables[var] = new LinkedVar(i, j);
419 _linked_variables_indices.push_back(var);
420 }
421
422 /*
423 * Information about the link_cw(w, wj, pj) variables
424 */
425 // What is the number of the link_cw(wi, wj, pj) variable?
426 Matrix< std::map<int, int> > _link_cw_variable_map;
427
428
429 #ifdef _CONNECTIVITY_
430 std::map<std::pair<std::pair<int, int>,int>, int> _lcon_variables;
431 #endif
432
433 /* SAT search parameters */
434 Guiding* _guiding;
435
436 /* Current free variable number */
437 size_t _var;
438
439 /* Get a variable number that has not been used before */
get_fresh_var(void)440 int get_fresh_var(void) {
441 return _var++;
442 }
443
444 /* Helper functions that retrieve variable numbers from appropriate
445 data structures. If the variable is not present, it is assigned a
446 fresh variable number, and false is returned. Otherwise, the number
447 is retrieved and true is returned. */
448
get_var_from_trie(const char * name,int & var)449 bool get_var_from_trie(const char* name, int& var) {
450 try {
451 int num = _variable_trie.lookup(name);
452 if (num != Trie<int>::NOT_FOUND) {
453 var = num;
454 return true;
455 }
456 else {
457 var = get_fresh_var();
458 _variable_trie.insert(name, var);
459 return false;
460 }
461 } catch (const std::string& s) {
462 cout << s << endl;
463 exit(EXIT_FAILURE);
464 }
465 }
466
467
get_2int_variable(int i,int j,int & var,Matrix<int> & mp)468 bool get_2int_variable(int i, int j, int& var,
469 Matrix<int>& mp) {
470 var = mp(i, j);
471 if (var == -1) {
472 var = get_fresh_var();
473 mp.set(i, j, var);
474 return false;
475 }
476 return true;
477 }
478
get_3int_variable(int i,int j,int pj,int & var,Matrix<std::map<int,int>> & mp)479 bool get_3int_variable(int i, int j, int pj, int& var,
480 Matrix< std::map<int, int> >& mp) {
481 std::map<int, int>& m = mp(i, j);
482 std::map<int, int>::iterator it = m.find(pj);
483 if (it == m.end()) {
484 var = get_fresh_var();
485 m[pj] = var;
486 return false;
487 } else {
488 var = it->second;
489 return true;
490 }
491 }
492
get_4int_variable(int i,int pi,int j,int pj,int & var,Matrix<std::map<std::pair<int,int>,int>> & mp)493 bool get_4int_variable(int i, int pi, int j, int pj, int& var,
494 Matrix< std::map<std::pair<int, int>, int> >& mp) {
495 std::map< std::pair<int, int>, int >& m = mp(i, j);
496 std::pair<int, int> p(pi, pj);
497 std::map< std::pair<int, int>, int >::iterator it = m.find(p);
498 if (it == m.end()) {
499 var = get_fresh_var();
500 m[p] = var;
501 return false;
502 } else {
503 var = it->second;
504 return true;
505 }
506 }
507
get_link_variable(int i,int pi,int j,int pj,int & var)508 bool get_link_variable(int i, int pi, int j, int pj, int& var) {
509 bool ret = get_4int_variable(i, pi, j, pj, var, _link_variable_map);
510 if (!ret) {
511 std::pair<int, int> p(j, pj);
512 _link_variable_wp_map[p].push_back(var);
513 }
514 return ret;
515 }
516
517
get_linked_variable(int i,int j,int & var)518 bool get_linked_variable(int i, int j, int& var) {
519 return get_2int_variable(i, j, var, _linked_variable_map);
520 }
521
get_link_cw_variable(int i,int j,int pj,int & var)522 bool get_link_cw_variable(int i, int j, int pj, int& var) {
523 return get_3int_variable(i, j, pj, var, _link_cw_variable_map);
524 }
525
526 #ifdef _CONNECTIVITY_
get_lcon_variable(int i,int j,int k,int & var)527 bool get_lcon_variable(int i, int j, int k, int& var) {
528 std::pair<std::pair<int, int>, int> p(std::pair<int, int>(i, j), k);
529 std::map<std::pair<std::pair<int, int>, int>, int>::iterator it = _lcon_variables.find(p);
530 if (it != _lcon_variables.end()) {
531 var = it->second;
532 return true;
533 } else {
534 var = get_fresh_var();
535 #ifdef _VARS
536 var_defs_stream << "lcon_" << i << "_" << j << "_" << k << "\t" << var << endl;
537 #endif
538 _lcon_variables[p] = var;
539 return false;
540 }
541 }
542 #endif
543
544 };
545
546 #endif
547