1 #ifndef __VARIABLES_HPP__
2 #define __VARIABLES_HPP__
3 
4 #include <ctype.h>
5 #include <vector>
6 #include <map>
7 #include <iostream>
8 
9 using std::cout;
10 using std::cerr;
11 using std::endl;
12 
13 #include "guiding.hpp"
14 #include "trie.hpp"
15 #include "matrix-ut.hpp"
16 #include "fast-sprintf.hpp"
17 
18 #define  MAX_VARIABLE_NAME 256
19 
20 extern "C"
21 {
22 #include "api-structures.h" // for definition of Sentence
23 }
24 
25 // #define SAT_DEBUG
26 // #define _VARS
27 
28 #ifdef SAT_DEBUG
29 #define _VARS
30 #endif
31 
32 
33 
34 #ifdef _VARS
35 extern std::ostream& var_defs_stream;
36 #endif
37 
construct_link_label(const char * connector1,const char * connector2)38 static char* construct_link_label(const char* connector1, const char* connector2) {
39   char* result = (char*)xalloc((std::max(strlen(connector1), strlen(connector2)) + 1)*
40                                sizeof(char));
41   char* presult = result;
42 
43   /* Skip head-dependent indicator in each connector. */
44   if (islower(*connector1)) connector1++;
45   if (islower(*connector2)) connector2++;
46 
47   while (*connector1 != '\0' && *connector2 != '\0') {
48     if (*connector1 == '*')
49       *presult++ = *connector2;
50     else if (*connector2 == '*')
51       *presult++ = *connector1;
52     else
53       *presult++ = std::max(*connector1, *connector2);
54 
55     connector1++;
56     connector2++;
57   }
58   while(*connector1 != '\0')
59     *presult++ = *connector1++;
60   while(*connector2 != '\0')
61     *presult++ = *connector2++;
62   *presult = '\0';
63   return result;
64 }
65 
66 
67 ////////////////////////////////////////////////////////////////////////////
68 class Variables
69 {
70 public:
Variables(Sentence sent)71   Variables(Sentence sent)
72     : _link_variable_map(sent->length)
73     ,_linked_variable_map(sent->length, -1)
74     ,_link_cw_variable_map(sent->length)
75     ,_guiding(new CostDistanceGuiding(sent))
76     ,_var(0)
77   {
78   }
79 
~Variables()80   ~Variables() {
81     std::vector<LinkVar*>::iterator i;
82     for (i = _link_variables.begin(); i != _link_variables.end(); i++) {
83       if ((*i) != 0) {
84         xfree((*i)->label, strlen((*i)->label));
85         delete *i;
86       }
87     }
88 
89     for (size_t vi = 0; vi < _linked_variables.size(); vi++)
90       delete _linked_variables[vi];
91 
92     delete _guiding;
93   }
94 
95 
96   /*
97    * General purpose variables specified by their names
98    */
99 
var_exists(const char * name)100   bool var_exists(const char* name) {
101     try {
102       int num = _variable_trie.lookup(name);
103       return num != Trie<int>::NOT_FOUND;
104       } catch (const std::string& s) {
105           cout << s << endl;
106           exit(EXIT_FAILURE);
107         }
108   }
109 
110   // If guiding params are unknown, they are set to default
string(const char * name)111   int string(const char* name)
112   {
113     int var;
114     if (!get_var_from_trie(name, var)) {
115 #ifdef _VARS
116       var_defs_stream << name << "\t" << var << endl;
117 #endif
118       _guiding->setStringParameters(var, name);
119     }
120     assert(var != -1, "Var == -1");
121     return var;
122   }
123 
124   // If the cost is explicitly given, guiding params are calculated
125   // using the cost. Any params set earlier are overridden.
string_cost(const char * name,double cost)126   int string_cost(const char* name, double cost)
127   {
128     int var;
129     var = string(name);
130     _guiding->setStringParameters(var, name, cost);
131     assert(var != -1, "Var == -1");
132     return var;
133   }
134 
135   /*
136    * Variables that specify that a part of word tag is satisfied
137    * without making any connections of the given direction.
138    */
139 
140   // If guiding params are unknown, they are set to default
epsilon(char * v,char dir)141   int epsilon(char* v, char dir) {
142     char name[MAX_VARIABLE_NAME];
143     dir = (dir == '+') ? 'r' : 'l';
144     char* s = name;
145     *s++ = dir;
146     *s++ = 'e';
147     s = fast_sprintf(s, v);
148     int var;
149     if (!get_var_from_trie(name, var)) {
150 #ifdef _VARS
151       var_defs_stream << name << "\t" << var << endl;
152 #endif
153       _guiding->setEpsilonParameters(var);
154     }
155     assert(var != -1, "Var == -1");
156     return var;
157   }
158 
159   /*
160    *             linked(wi, wj)
161    * Variables that specify that two words are linked
162    */
163 
164   // If guiding params are unknown, they are set to default
linked(int wi,int wj)165   int linked(int wi, int wj) {
166     assert(wi < wj, "Variables: linked should be ordered");
167     int var;
168     if (!get_linked_variable(wi, wj, var)) {
169 #ifdef _VARS
170       var_defs_stream << "linked_" << wi << "_" << wj << "\t" << var << endl;
171 #endif
172       add_linked_variable(wi, wj, var);
173       _guiding->setLinkedParameters(var, wi, wj);
174     }
175     assert(var != -1, "Var == -1");
176     return var;
177   }
178 
179   /*
180    *                  link(wi, pi, wj, pj)
181    * Variables that specify that a direct link has been established
182    * between the connectors ci of the word_i at position i and
183    * cj of the word_j at position j
184    */
185 
186   // If guiding params are unknown, they are set to default
link(int wi,int pi,const char * ci,Exp * ei,int wj,int pj,const char * cj,Exp * ej)187   int link(int wi, int pi, const char* ci, Exp* ei,
188            int wj, int pj, const char* cj, Exp* ej)
189   {
190     assert(wi < wj, "Variables: link should be ordered");
191     int var;
192     if (!get_link_variable(wi, pi, wj, pj, var)) {
193 #ifdef _VARS
194       var_defs_stream << "link_" << wi << "_" << pi << "_" << ci << "_"
195                       << wj << "_" << pj << "_" << cj << "\t" << var << endl;
196 #endif
197       add_link_variable(wi, pi, ci, ei, wj, pj, cj, ej, var);
198       _guiding->setLinkParameters(var, wi, ci, wj, cj, _link_variables[var]->label);
199     }
200     assert(var != -1, "Var == -1");
201     return var;
202   }
203 
204   // If the cost is specified, guiding params are calculated
205   // using the cost. Any guiding params that are set earlier are overridden
link_cost(int wi,int pi,const char * ci,Exp * ei,int wj,int pj,const char * cj,Exp * ej,double cost)206   int link_cost(int wi, int pi, const char* ci, Exp* ei,
207                 int wj, int pj, const char* cj, Exp* ej,
208                 double cost)
209   {
210     assert(wi < wj, "Variables: link should be ordered");
211     int var = link(wi, pi, ci, ei,  wj, pj, cj, ej);
212     _guiding->setLinkParameters(var, wi, ci, wj, cj, link_variable(var)->label, cost);
213     assert(var != -1, "Var == -1");
214     return var;
215   }
216 
217   /*
218    *                   link_cw(wi, wj, pj)
219    * Variables that specify that an indirect link has been established
220    * between the word i and connector cj of the word_j at position
221    * j.
222    */
223 
224   // If guiding params for this variable are not set earlier, they are
225   // now set to default
link_cw(int wi,int wj,int pj,const char * cj)226   int link_cw(int wi, int wj, int pj, const char* cj) {
227     int var;
228     if (!get_link_cw_variable(wi, wj, pj, var)) {
229 #ifdef _VARS
230       var_defs_stream << "link_cw_"
231                       << "(" << wj << "_" << pj << "_" << cj  << ")_"
232                       << wi
233                       << "\t" << var << endl;
234 #endif
235       _guiding->setLinkCWParameters(var, wi, wj, cj);
236     }
237     assert(var != -1, "Var == -1");
238     return var;
239   }
240 
241 #ifdef _CONNECTIVITY_
242   /* The following variables are deprecated */
243 
244   // Variables that specify that words i and j are connected
con(int i,int j)245   int con(int i, int j) {
246     char name[MAX_VARIABLE_NAME];
247     char* s = name;
248     *s++ = 'c';
249     s = fast_sprintf(s, i);
250     *s++ = '_';
251     s = fast_sprintf(s, j);
252     int var;
253     if (!get_var_from_trie(name, var))
254       set_variable_sat_params(var, false);
255     return var;
256   }
257 
258   // Auxiliary variables used for connectivity encoding
l_con(int i,int j,int k)259   int l_con(int i, int j, int k) {
260     int var;
261     if (!get_lcon_variable(i, j, k, var))
262       set_variable_sat_params(var, false);
263     return var;
264   }
265 #endif
266 
267   /*
268    *      link(wi, pi, wj, pj)
269    */
270   // Returns the indices of all link variables
link_variables() const271   const std::vector<int>& link_variables() const {
272     return _link_variables_indices;
273   }
274 
275   // Returns the indices of all link_x_x_wj_pj variables
link_variables(int wj,int pj)276   const std::vector<int>& link_variables(int wj, int pj) {
277     std::pair<int, int> p(wj, pj);
278     return _link_variable_wp_map[p];
279   }
280 
281   // Additional info about the link(wi, pi, wj, pj) variable
282   struct LinkVar
283   {
LinkVarVariables::LinkVar284     LinkVar(const std::string& _name, char* _label,
285             int _lw, int _lp, int _rw, int _rp,
286             const char* _lc, const char* _rc,
287             Exp* _le, Exp* _re)
288       : name(_name), label(_label),
289         left_word(_lw), right_word(_rw),
290         left_position(_lp), right_position(_rp),
291         left_connector(_lc), right_connector(_rc),
292         left_exp(_le), right_exp(_re)
293     {}
294 
295     std::string name;
296     char* label;
297     int left_word;
298     int right_word;
299     int left_position;
300     int right_position;
301     const char* left_connector;
302     const char* right_connector;
303     Exp* left_exp;
304     Exp* right_exp;
305   };
306 
307   // Returns additional info about the given link variable
link_variable(int var) const308   const LinkVar* link_variable(int var) const {
309     return _link_variables[var];
310   }
311 
312   /*
313    *       linked(wi, wj)
314    */
315   // Returns the indices of all linked variables
linked_variables() const316   const std::vector<int>& linked_variables() const {
317     return _linked_variables_indices;
318   }
319 
320   // Additional info about the linked(i, j) variable
321   struct LinkedVar {
LinkedVarVariables::LinkedVar322     LinkedVar(int lw, int rw)
323       : left_word(lw), right_word(rw) {
324     }
325 
326     int left_word;
327     int right_word;
328   };
329 
330   // Returns additional info about the given linked variable
linked_variable(int var) const331   const LinkedVar* linked_variable(int var) const {
332     return _linked_variables[var];
333   }
334 
335   /* Pass SAT search parameters to the MiniSAT solver */
setVariableParameters(Solver * solver)336   void setVariableParameters(Solver* solver) {
337     _guiding->passParametersToSolver(solver);
338   }
339 
340 private:
341   /*
342    * Information about string variables
343    */
344 
345   // What is the number of the variable with the given name?
346   Trie<int> _variable_trie;
347 
348   /*
349    * Information about link(wi, pi, wj, pj) variables
350    */
351 
352   // What is the number of the link(wi, pi, wj, pj) variable?
353   MatrixUpperTriangle< std::map<std::pair<int, int>, int> > _link_variable_map;
354 
355 
356   // What are the numbers of all link(wi, pi, wj, pj) variables?
357   std::vector<int>  _link_variables_indices;
358 
359   // What are the numbers of all link(x, x, wj, pj) variables?
360   std::map< std::pair<int, int>, std::vector<int> > _link_variable_wp_map;
361 
362 
363   // Additional info about the link(wi, pi, wj, pj) variable with the given number
364   std::vector<LinkVar*> _link_variables;
365 
366   // Set this additional info
add_link_variable(int i,int pi,const char * ci,Exp * ei,int j,int pj,const char * cj,Exp * ej,size_t var)367   void add_link_variable(int i, int pi, const char* ci, Exp* ei,
368                          int j, int pj, const char* cj, Exp* ej, size_t var)
369   {
370   /* The following variable is created but is never inserted to the trie,
371      and generating it has an observable performance impact.
372      The trie even doesn't have 'k'. */
373 #if 0
374     char name[MAX_VARIABLE_NAME];
375     char* s = name;
376     *s++ = 'l';    *s++ = 'i';     *s++ = 'n';     *s++ = 'k';
377     *s++ = '_';
378     s = fast_sprintf(s, i);
379     *s++ = '_';
380     s = fast_sprintf(s, pi);
381     *s++ = '_';
382     s = fast_sprintf(s, ci);
383     *s++ = '_';
384     s = fast_sprintf(s, j);
385     *s++ = '_';
386     s = fast_sprintf(s, pj);
387     *s++ = '_';
388     s = fast_sprintf(s, cj);
389 #endif
390     char* label = construct_link_label(ci, cj);
391 
392     if (var >= _link_variables.size()) {
393       _link_variables.resize(var + 1, 0);
394     }
395     // The first argument was the redundant variable eliminated above
396     _link_variables[var] = new LinkVar("", label, i, pi, j, pj, ci, cj, ei, ej);
397     _link_variables_indices.push_back(var);
398   }
399 
400   /*
401    * Information about linked(i, j) variables
402    */
403 
404   // What is the number of the linked(i, j) variable?
405   MatrixUpperTriangle<int> _linked_variable_map;
406 
407   // What are the numbers of all linked(i, j) variables?
408   std::vector<int>  _linked_variables_indices;
409 
410   // Additional info about the linked(i, j) variable
411   std::vector<LinkedVar*> _linked_variables;
412 
413   // Set the additional info
add_linked_variable(int i,int j,size_t var)414   void add_linked_variable(int i, int j, size_t var) {
415     if (var >= _linked_variables.size()) {
416       _linked_variables.resize(var + 1, 0);
417     }
418     _linked_variables[var] = new LinkedVar(i, j);
419     _linked_variables_indices.push_back(var);
420   }
421 
422   /*
423    *   Information about the link_cw(w, wj, pj) variables
424    */
425   // What is the number of the link_cw(wi, wj, pj) variable?
426   Matrix< std::map<int, int> > _link_cw_variable_map;
427 
428 
429 #ifdef _CONNECTIVITY_
430   std::map<std::pair<std::pair<int, int>,int>, int> _lcon_variables;
431 #endif
432 
433   /* SAT search parameters */
434   Guiding* _guiding;
435 
436   /* Current free variable number */
437   size_t _var;
438 
439   /* Get a variable number that has not been used before */
get_fresh_var(void)440   int get_fresh_var(void) {
441     return _var++;
442   }
443 
444   /* Helper functions that retrieve variable numbers from appropriate
445      data structures. If the variable is not present, it is assigned a
446      fresh variable number, and false is returned. Otherwise, the number
447      is retrieved and true is returned. */
448 
get_var_from_trie(const char * name,int & var)449   bool get_var_from_trie(const char* name, int& var) {
450     try {
451       int num = _variable_trie.lookup(name);
452       if (num != Trie<int>::NOT_FOUND) {
453         var = num;
454         return true;
455       }
456       else {
457         var = get_fresh_var();
458         _variable_trie.insert(name, var);
459         return false;
460       }
461     } catch (const std::string& s) {
462       cout << s << endl;
463       exit(EXIT_FAILURE);
464     }
465   }
466 
467 
get_2int_variable(int i,int j,int & var,Matrix<int> & mp)468   bool get_2int_variable(int i, int j, int& var,
469                          Matrix<int>& mp) {
470     var = mp(i, j);
471     if (var == -1) {
472       var = get_fresh_var();
473       mp.set(i, j, var);
474       return false;
475     }
476     return true;
477   }
478 
get_3int_variable(int i,int j,int pj,int & var,Matrix<std::map<int,int>> & mp)479   bool get_3int_variable(int i, int j, int pj, int& var,
480                          Matrix< std::map<int, int> >& mp) {
481     std::map<int, int>& m = mp(i, j);
482     std::map<int, int>::iterator it = m.find(pj);
483     if (it == m.end()) {
484       var = get_fresh_var();
485       m[pj] = var;
486       return false;
487     } else {
488       var = it->second;
489       return true;
490     }
491   }
492 
get_4int_variable(int i,int pi,int j,int pj,int & var,Matrix<std::map<std::pair<int,int>,int>> & mp)493   bool get_4int_variable(int i, int pi, int j, int pj, int& var,
494                          Matrix< std::map<std::pair<int, int>, int> >& mp) {
495     std::map< std::pair<int, int>, int >& m = mp(i, j);
496     std::pair<int, int> p(pi, pj);
497     std::map< std::pair<int, int>, int >::iterator it = m.find(p);
498     if (it == m.end()) {
499       var = get_fresh_var();
500       m[p] = var;
501       return false;
502     } else {
503       var = it->second;
504       return true;
505     }
506   }
507 
get_link_variable(int i,int pi,int j,int pj,int & var)508   bool get_link_variable(int i, int pi, int j, int pj, int& var) {
509     bool ret = get_4int_variable(i, pi, j, pj, var, _link_variable_map);
510     if (!ret) {
511       std::pair<int, int> p(j, pj);
512       _link_variable_wp_map[p].push_back(var);
513     }
514     return ret;
515   }
516 
517 
get_linked_variable(int i,int j,int & var)518   bool get_linked_variable(int i, int j, int& var) {
519     return get_2int_variable(i, j, var, _linked_variable_map);
520   }
521 
get_link_cw_variable(int i,int j,int pj,int & var)522   bool get_link_cw_variable(int i, int j, int pj, int& var) {
523     return get_3int_variable(i, j, pj, var, _link_cw_variable_map);
524   }
525 
526 #ifdef _CONNECTIVITY_
get_lcon_variable(int i,int j,int k,int & var)527   bool get_lcon_variable(int i, int j, int k, int& var) {
528     std::pair<std::pair<int, int>, int> p(std::pair<int, int>(i, j), k);
529     std::map<std::pair<std::pair<int, int>, int>, int>::iterator it = _lcon_variables.find(p);
530     if (it != _lcon_variables.end()) {
531       var = it->second;
532       return true;
533     } else {
534       var = get_fresh_var();
535 #ifdef _VARS
536       var_defs_stream << "lcon_" << i << "_" << j << "_" << k << "\t" << var << endl;
537 #endif
538       _lcon_variables[p] = var;
539       return false;
540     }
541   }
542 #endif
543 
544 };
545 
546 #endif
547