1 /**
2  * Encode link-grammar for solving with the SAT solver...
3  */
4 #include <cstdlib>
5 #include <cstdio>
6 #include <iostream>
7 #include <vector>
8 #include <algorithm>
9 #include <iterator>
10 using std::cout;
11 using std::cerr;
12 using std::endl;
13 using std::vector;
14 
15 extern "C" {
16 #include "sat-encoder.h"
17 }
18 #include "minisat/core/Solver.h"
19 #undef assert
20 
21 #include "sat-encoder.hpp"
22 #include "variables.hpp"
23 #include "word-tag.hpp"
24 #include "matrix-ut.hpp"
25 #include "clock.hpp"
26 #include "fast-sprintf.hpp"
27 
28 extern "C" {
29 #include "disjunct-utils.h"
30 #include "linkage/analyze-linkage.h" // for compute_link_names()
31 #include "linkage/linkage.h"
32 #include "linkage/sane.h"            // for sane_linkage_morphism()
33 #include "linkage/score.h"           // for linkage_score()
34 #include "parse/prune.h"             // for optional_gap_collapse()
35 #include "prepare/build-disjuncts.h" // for build_disjuncts_for_exp()
36 #include "post-process/post-process.h"
37 #include "post-process/pp-structures.h"
38 #include "tokenize/word-structures.h" // for Word_struct
39 #include "tokenize/tok-structures.h"  // got Gword internals
40 }
41 
42 // Macro DEBUG_print is used to dump to stdout information while debugging
43 #ifdef SAT_DEBUG
44 #define DEBUG_print(x) (cout << x << endl)
45 #else
46 #define DEBUG_print(x)
47 #endif
48 
49 #define D_SAT 5
50 
51 // Convert a NULL C string pointer, for printing a possibly NULL string
52 #define N(s) ((s) ? (s) : "(null)")
53 
54 /*-------------------------------------------------------------------------*
55  *               C N F   C O N V E R S I O N                               *
56  *-------------------------------------------------------------------------*/
generate_literal(Lit l)57 void SATEncoder::generate_literal(Lit l) {
58   vec<Lit> clause(1);
59   clause[0] = l;
60   add_clause(clause);
61 }
62 
generate_and_definition(Lit lhs,vec<Lit> & rhs)63 void SATEncoder::generate_and_definition(Lit lhs, vec<Lit>& rhs) {
64   vec<Lit> clause(2);
65   for (int i = 0; i < rhs.size(); i++) {
66     clause[0] = ~lhs;
67     clause[1] = rhs[i];
68     add_clause(clause);
69   }
70 
71   for (int i = 0; i < rhs.size(); i++) {
72     clause[0] = ~rhs[i];
73     clause[1] = lhs;
74     add_clause(clause);
75   }
76 }
generate_classical_and_definition(Lit lhs,vec<Lit> & rhs)77 void SATEncoder::generate_classical_and_definition(Lit lhs, vec<Lit>& rhs) {
78   {
79     vec<Lit> clause(2);
80     for (int i = 0; i < rhs.size(); i++) {
81       clause[0] = ~lhs;
82       clause[1] = rhs[i];
83       add_clause(clause);
84     }
85   }
86 
87   {
88     vec<Lit> clause(rhs.size() + 1);
89     for (int i = 0; i < rhs.size(); i++) {
90       clause[i] = ~rhs[i];
91     }
92     clause[rhs.size()] = lhs;
93     add_clause(clause);
94   }
95 }
96 
generate_or_definition(Lit lhs,vec<Lit> & rhs)97 void SATEncoder::generate_or_definition(Lit lhs, vec<Lit>& rhs) {
98   {
99     vec<Lit> clause(2);
100     for (int i = 0; i < rhs.size(); i++) {
101       clause[0] = lhs;
102       clause[1] = ~rhs[i];
103       add_clause(clause);
104     }
105   }
106 
107   {
108     vec<Lit> clause(rhs.size() + 1);
109     for (int i = 0; i < rhs.size(); i++) {
110       clause[i] = rhs[i];
111     }
112     clause[rhs.size()] = ~lhs;
113     add_clause(clause);
114   }
115 }
116 
117 #if 0
118 void SATEncoder::generate_conditional_lr_implication_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs) {
119   {
120     vec<Lit> clause(2);
121     for (int i = 0; i < rhs.size(); i++) {
122       clause[0] = lhs;
123       clause[1] = ~rhs[i];
124       add_clause(clause);
125     }
126   }
127 
128   {
129     vec<Lit> clause(rhs.size() + 2);
130     for (int i = 0; i < rhs.size(); i++) {
131       clause[i] = rhs[i];
132     }
133     clause[rhs.size()] = ~lhs;
134     clause[rhs.size()+1] = ~condition;
135     add_clause(clause);
136   }
137 }
138 #endif
139 
140 #if 0
141 void SATEncoder::generate_conditional_lr_implication_or_definition(Lit condition1, Lit condition2, Lit lhs, vec<Lit>& rhs) {
142   {
143     vec<Lit> clause(2);
144     for (int i = 0; i < rhs.size(); i++) {
145       clause[0] = lhs;
146       clause[1] = ~rhs[i];
147       add_clause(clause);
148     }
149   }
150 
151   {
152     vec<Lit> clause(rhs.size() + 3);
153     for (int i = 0; i < rhs.size(); i++) {
154       clause[i] = rhs[i];
155     }
156     clause[rhs.size()] = ~lhs;
157     clause[rhs.size()+1] = ~condition1;
158     clause[rhs.size()+2] = ~condition2;
159     add_clause(clause);
160   }
161 }
162 #endif
163 
generate_xor_conditions(vec<Lit> & vect)164 void SATEncoder::generate_xor_conditions(vec<Lit>& vect) {
165   vec<Lit> clause(2);
166   for (int i = 0; i < vect.size(); i++) {
167     for (int j = i + 1; j < vect.size(); j++) {
168       if (vect[i] == vect[j])
169         continue;
170       clause[0] = ~vect[i];
171       clause[1] = ~vect[j];
172       add_clause(clause);
173     }
174   }
175 }
176 
generate_and(vec<Lit> & vect)177 void SATEncoder::generate_and(vec<Lit>& vect) {
178   for (int i = 0; i < vect.size(); i++) {
179     generate_literal(vect[i]);
180   }
181 }
182 
183 
generate_or(vec<Lit> & vect)184 void SATEncoder::generate_or(vec<Lit>& vect) {
185   add_clause(vect);
186 }
187 
generate_equivalence_definition(Lit l1,Lit l2)188 void SATEncoder::generate_equivalence_definition(Lit l1, Lit l2) {
189   vec<Lit> clause(2);
190   {
191     clause[0] = ~l1;
192     clause[1] = l2;
193     add_clause(clause);
194   }
195   {
196     clause[0] = l1;
197     clause[1] = ~l2;
198     add_clause(clause);
199   }
200 }
201 
202 
203 /*-------------------------------------------------------------------------*
204  *                          E N C O D I N G                                *
205  *-------------------------------------------------------------------------*/
encode()206 void SATEncoder::encode() {
207     Clock clock;
208     generate_satisfaction_conditions();
209     clock.print_time(verbosity, "Generated satisfaction conditions");
210     generate_linked_definitions();
211     clock.print_time(verbosity, "Generated linked definitions");
212     generate_planarity_conditions();
213     clock.print_time(verbosity, "Generated planarity conditions");
214 
215 #ifdef _CONNECTIVITY_
216     generate_connectivity();
217     clock.print_time(verbosity, "Generated connectivity");
218 #endif
219 
220     generate_encoding_specific_clauses();
221     //clock.print_time(verbosity, "Generated encoding specific clauses");
222 
223     pp_prune();
224     clock.print_time(verbosity, "PP pruned");
225     power_prune();
226     clock.print_time(verbosity, "Power pruned");
227 
228     _variables->setVariableParameters(_solver);
229 }
230 
231 
232 
233 /*-------------------------------------------------------------------------*
234  *                         W O R D - T A G S                               *
235  *-------------------------------------------------------------------------*/
build_word_tags()236 void SATEncoder::build_word_tags()
237 {
238   char name[MAX_VARIABLE_NAME];
239   name[0] = 'w';
240 
241   for (size_t w = 0; w < _sent->length; w++) {
242     fast_sprintf(name+1, (int)w);
243     // The SAT encoding word variables are set to be equal to the word numbers.
244     Var var = _variables->string(name);
245     assert((Var)w == var);
246   }
247 
248   for (size_t w = 0; w < _sent->length; w++) {
249     _word_tags.push_back(WordTag(w, _variables, _sent, _opts));
250     int dfs_position = 0;
251 
252     if (_sent->word[w].x == NULL) continue;
253 
254     bool join = _sent->word[w].x->next != NULL;
255     Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
256 
257 #ifdef SAT_DEBUG
258     cout << "Word ." << w << ".: " << N(_sent->word[w].unsplit_word) << endl;
259     cout << lg_exp_stringify(exp);
260     //prt_exp_mem(exp, 0);
261     cout << endl;
262 #endif
263 
264     fast_sprintf(name+1, (int)w);
265     bool leading_right = true;
266     bool leading_left = true;
267     std::vector<int> eps_right, eps_left;
268 
269     _word_tags[w].insert_connectors(exp, dfs_position, leading_right,
270          leading_left, eps_right, eps_left, name, true, 0, NULL, _sent->word[w].x);
271   }
272 
273   for (size_t wl = 0; wl < _sent->length - 1; wl++) {
274     for (size_t wr = wl + 1; wr < _sent->length; wr++) {
275       _word_tags[wl].add_matches_with_word(_word_tags[wr]);
276     }
277   }
278 }
279 
280 #if 0
281 void SATEncoder::find_all_matches_between_words(size_t w1, size_t w2,
282                                                 std::vector<std::pair<const PositionConnector*, const PositionConnector*> >& matches) {
283   const std::vector<PositionConnector>& w1_right = _word_tags[w1].get_right_connectors();
284   std::vector<PositionConnector>::const_iterator i;
285   for (i = w1_right.begin(); i != w1_right.end(); i++) {
286     const std::vector<PositionConnector*>& w2_left_c = (*i).matches;
287     std::vector<PositionConnector*>::const_iterator j;
288     for (j = w2_left_c.begin(); j != w2_left_c.end(); j++) {
289       if ((*j)->word == w2) {
290         matches.push_back(std::pair<const PositionConnector*, const PositionConnector*>(&(*i), *j));
291       }
292     }
293   }
294 }
295 
296 bool SATEncoder::matches_in_interval(int wi, int pi, int l, int r) {
297   for (int w = l; w < r; w++) {
298     if (_word_tags[w].match_possible(wi, pi))
299       return true;
300   }
301   return false;
302 }
303 #endif
304 
305 /*-------------------------------------------------------------------------*
306  *                     S A T I S F A C T I O N                             *
307  *-------------------------------------------------------------------------*/
308 
generate_satisfaction_conditions()309 void SATEncoder::generate_satisfaction_conditions()
310 {
311   char name[MAX_VARIABLE_NAME] = "w";
312 
313   for (size_t w = 0; w < _sent->length; w++) {
314 
315 #ifdef SAT_DEBUG
316     cout << "Word " << w << ": " << N(_sent->word[w].unsplit_word);
317     if (_sent->word[w].optional) cout << " (optional)";
318     cout << endl;
319     cout << lg_exp_stringify(exp);
320     cout << endl;
321 #endif
322 
323     fast_sprintf(name+1, w);
324 
325     if (_sent->word[w].optional)
326       _variables->string(name);
327     else
328       determine_satisfaction(w, name);
329 
330     if (_sent->word[w].x == NULL) {
331       if (!_sent->word[w].optional) {
332         // Most probably everything got pruned. There will be no linkage.
333         lgdebug(+D_SAT, "Word%zu '%s': Null X_node\n", w, _sent->word[w].unsplit_word);
334         handle_null_expression(w);
335       }
336       continue; // No expression to handle.
337     }
338 
339     bool join = _sent->word[w].x->next != NULL;
340     Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
341 
342     int dfs_position = 0;
343     generate_satisfaction_for_expression(w, dfs_position, exp, name, 0);
344   }
345 }
346 
347 
generate_satisfaction_for_expression(int w,int & dfs_position,Exp * e,char * var,double parent_cost)348 void SATEncoder::generate_satisfaction_for_expression(int w, int& dfs_position, Exp* e,
349                                                       char* var, double parent_cost)
350 {
351   Exp *opd;
352   double total_cost = parent_cost + e->cost;
353 
354   if (e->type == CONNECTOR_type) {
355     dfs_position++;
356 
357     generate_satisfaction_for_connector(w, dfs_position, e, var);
358 
359     if (total_cost > _cost_cutoff) {
360       Lit lhs = Lit(_variables->string_cost(var, e->cost));
361       generate_literal(~lhs);
362     }
363   } else {
364     if (e->type == AND_type) {
365       if (e->operand_first == NULL) {
366         /* zeroary and */
367         _variables->string_cost(var, e->cost);
368         if (total_cost > _cost_cutoff) {
369           generate_literal(~Lit(_variables->string_cost(var, e->cost)));
370         }
371       } else if (e->operand_first->operand_next == NULL) {
372         /* unary and - skip */
373         generate_satisfaction_for_expression(w, dfs_position, e->operand_first, var, total_cost);
374       } else {
375         /* n-ary and */
376         int i;
377 
378         char new_var[MAX_VARIABLE_NAME];
379         char* last_new_var = new_var;
380         char* last_var = var;
381         while((*last_new_var = *last_var)) {
382           last_new_var++;
383           last_var++;
384         }
385 
386         vec<Lit> rhs;
387         for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
388           // sprintf(new_var, "%sc%d", var, i)
389           char* s = last_new_var;
390           *s++ = 'c';
391           fast_sprintf(s, i);
392           rhs.push(Lit(_variables->string(new_var)));
393         }
394 
395         Lit lhs = Lit(_variables->string_cost(var, e->cost));
396         generate_and_definition(lhs, rhs);
397 
398         /* Precedes */
399         int dfs_position_tmp = dfs_position;
400         for (opd = e->operand_first; opd->operand_next != NULL; opd = opd->operand_next) {
401           Exp e_tmp, *e_rhs = &e_tmp;
402 
403           if (opd->operand_next->operand_next == NULL) {
404             e_rhs = opd->operand_next;
405           }
406           else {
407             // A workaround for issue #932:
408             // Since the generated ordering conditions are not transitive
409             // (see the example below), chains w/length > 2 miss needed
410             // constrains.  So if the chain length > 2, refer to it as if
411             // it is a 2-component chain (1st: l->e; 2nd: AND of the rest).
412             //
413             // Problem example:
414             // Sentence: *I saw make some changes in the program
415             // In word 2 we get here this subexpression (among many others):
416             // e = (K+ & {[[@MV+]]} & (O*n+ or ())) // (& chain length = 3)
417             // The generated constrains are only: (Decoding:
418             // (word-number_connector-position_connector-string)_to-word-number)
419             // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_5_MV)_6
420             // Clause: -link_cw_(2_5_MV)_6 -link_cw_(2_6_O*n)_4
421             // (Reading: If MV connects from word 2 to word 6, then O*n
422             // cannot connect from word 2 to word 4).
423             // Clause: -link_cw_(2_5_MV)_6 -link_cw_(2_6_O*n)_5
424             // However, the following, which are generated when the
425             // chain length is limited to 2, are missing:
426             // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_6_O*n)_4
427             // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_6_O*n)_5
428             // Clearly, they cannot be concluded transitively.
429             e_rhs->type = AND_type;
430             e_rhs->operand_first = opd->operand_next;
431             e_rhs->cost = 0.0;
432           };
433           generate_conjunct_order_constraints(w, opd, e_rhs, dfs_position_tmp);
434         }
435 
436         /* Recurse */
437         for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
438           // sprintf(new_var, "%sc%d", var, i)
439           char* s = last_new_var;
440           *s++ = 'c';
441           fast_sprintf(s, i);
442 
443           /* if (i != 0) total_cost = 0; */ // This interferes with the cost cutoff
444           generate_satisfaction_for_expression(w, dfs_position, opd, new_var, total_cost);
445         }
446       }
447     } else if (e->type == OR_type) {
448       if (e->operand_first == NULL) {
449         /* zeroary or */
450         cerr << "Zeroary OR" << endl;
451         exit(EXIT_FAILURE);
452       } else if (e->operand_first->operand_next == NULL) {
453         /* unary or */
454         generate_satisfaction_for_expression(w, dfs_position, e->operand_first, var, total_cost);
455       } else {
456         /* n-ary or */
457         int i;
458 
459         char new_var[MAX_VARIABLE_NAME];
460         char* last_new_var = new_var;
461         char* last_var = var;
462         while ((*last_new_var = *last_var)) {
463           last_new_var++;
464           last_var++;
465         }
466 
467         vec<Lit> rhs;
468         for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
469           // sprintf(new_var, "%sc%d", var, i)
470           char* s = last_new_var;
471           *s++ = 'd';
472           fast_sprintf(s, i);
473           rhs.push(Lit(_variables->string(new_var)));
474         }
475 
476         Lit lhs = Lit(_variables->string_cost(var, e->cost));
477         generate_or_definition(lhs, rhs);
478         generate_xor_conditions(rhs);
479 
480         /* Recurse */
481         for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
482           char* s = last_new_var;
483           *s++ = 'd';
484           fast_sprintf(s, i);
485 
486           generate_satisfaction_for_expression(w, dfs_position, opd, new_var, total_cost);
487         }
488       }
489     }
490   }
491 }
492 
493 /**
494  * Join all alternatives using an OR_type node.
495  */
join_alternatives(int w)496 Exp* SATEncoder::join_alternatives(int w)
497 {
498   Exp* exp = Exp_create(_sent->Exp_pool);
499   exp->type = OR_type;
500   exp->cost = 0.0;
501 
502   Exp** opdp = &exp->operand_first;
503 
504   for (X_node* x = _sent->word[w].x; x != NULL; x = x->next)
505   {
506     *opdp = Exp_create_dup(_sent->Exp_pool, x->exp);
507     opdp = &(*opdp)->operand_next;
508   }
509   *opdp = NULL;
510 
511   return exp;
512 }
513 
generate_link_cw_ordinary_definition(size_t wi,int pi,Exp * e,size_t wj)514 void SATEncoder::generate_link_cw_ordinary_definition(size_t wi, int pi,
515                                                       Exp* e, size_t wj)
516 {
517   const char* Ci = e->condesc->string;
518   char dir = e->dir;
519   double cost = e->cost;
520   Lit lhs = Lit(_variables->link_cw(wj, wi, pi, Ci));
521   vec<Lit> rhs;
522 
523   // Collect matches (wi, pi) with word wj
524   std::vector<PositionConnector*>& matches = _word_tags[wi].get(pi)->matches;
525   std::vector<PositionConnector*>::const_iterator i;
526   for (i = matches.begin(); i != matches.end(); i++) {
527     /* TODO: PositionConnector->matches[wj] */
528     if ((*i)->word != wj)
529       continue;
530 
531     if (dir == '+') {
532       rhs.push(Lit(_variables->link_cost(wi, pi, Ci, e,
533                                          (*i)->word, (*i)->position,
534                                          connector_string(&(*i)->connector),
535                                          (*i)->exp,
536                                          cost + (*i)->cost)));
537     } else if (dir == '-'){
538       rhs.push(Lit(_variables->link((*i)->word, (*i)->position,
539                                     connector_string(&(*i)->connector),
540                                     (*i)->exp,
541                                     wi, pi, Ci, e)));
542     }
543   }
544 
545   // Generate clauses
546   DEBUG_print("--------- link_cw as ordinary down");
547   generate_or_definition(lhs, rhs);
548   generate_xor_conditions(rhs);
549   DEBUG_print("--------- end link_cw as ordinary down");
550 }
551 
552 /*--------------------------------------------------------------------------*
553  *               C O N J U N C T    O R D E R I N G                         *
554  *--------------------------------------------------------------------------*/
num_connectors(Exp * e)555 int SATEncoder::num_connectors(Exp* e)
556 {
557   if (e->type == CONNECTOR_type)
558     return 1;
559   else {
560     int num = 0;
561     for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
562       num += num_connectors(opd);
563     }
564     return num;
565   }
566 }
567 
empty_connectors(Exp * e,char dir)568 int SATEncoder::empty_connectors(Exp* e, char dir)
569 {
570   if (e->type == CONNECTOR_type) {
571     return e->dir != dir;
572   } else if (e->type == OR_type) {
573     for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
574       if (empty_connectors(opd, dir))
575         return true;
576     }
577     return false;
578   } else if (e->type == AND_type) {
579     for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
580       if (!empty_connectors(opd, dir))
581         return false;
582     }
583     return true;
584   } else
585     throw std::string("Unknown connector type");
586 }
587 
588 #if 0
589 int SATEncoder::non_empty_connectors(Exp* e, char dir)
590 {
591   if (e->type == CONNECTOR_type) {
592     return e->dir == dir;
593   } else if (e->type == OR_type) {
594     for (E_list* l = e->operand_first; l != NULL; l = l->next) {
595       if (non_empty_connectors(l->e, dir))
596         return true;
597     }
598     return false;
599   } else if (e->type == AND_type) {
600     for (E_list* l = e->operand_first; l != NULL; l = l->next) {
601       if (non_empty_connectors(l->e, dir))
602         return true;
603     }
604     return false;
605   } else
606     throw std::string("Unknown connector type");
607 }
608 #endif
609 
trailing_connectors_and_aux(int w,Exp * opd,char dir,int & dfs_position,std::vector<PositionConnector * > & connectors)610 bool SATEncoder::trailing_connectors_and_aux(int w, Exp *opd, char dir, int& dfs_position,
611                                              std::vector<PositionConnector*>& connectors)
612 {
613   if (opd == NULL) {
614     return true;
615   } else {
616     int dfs_position_in = dfs_position;
617     dfs_position += num_connectors(opd);
618     if (trailing_connectors_and_aux(w, opd->operand_next, dir, dfs_position, connectors)) {
619       trailing_connectors(w, opd, dir, dfs_position_in, connectors);
620     }
621     return empty_connectors(opd, dir);
622   }
623 }
624 
trailing_connectors(int w,Exp * exp,char dir,int & dfs_position,std::vector<PositionConnector * > & connectors)625 void SATEncoder::trailing_connectors(int w, Exp* exp, char dir, int& dfs_position,
626                                      std::vector<PositionConnector*>& connectors)
627 {
628   if (exp->type == CONNECTOR_type) {
629     dfs_position++;
630     if (exp->dir == dir) {
631       connectors.push_back(_word_tags[w].get(dfs_position));
632     }
633   } else if (exp->type == OR_type) {
634     for (Exp* opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
635       trailing_connectors(w, opd, dir, dfs_position, connectors);
636     }
637   } else if (exp->type == AND_type) {
638     trailing_connectors_and_aux(w, exp->operand_first, dir, dfs_position, connectors);
639   }
640 }
641 
642 #if 0
643 void SATEncoder::certainly_non_trailing(int w, Exp* exp, char dir, int& dfs_position,
644                                        std::vector<PositionConnector*>& connectors, bool has_right) {
645   if (exp->type == CONNECTOR_type) {
646     dfs_position++;
647     if (exp->dir == dir && has_right) {
648       connectors.push_back(_word_tags[w].get(dfs_position));
649     }
650   } else if (exp->type == OR_type) {
651     for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
652       certainly_non_trailing(w, l->e, dir, dfs_position, connectors, has_right);
653     }
654   } else if (exp->type == AND_type) {
655     if (has_right) {
656       for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
657         certainly_non_trailing(w, l->e, dir, dfs_position, connectors, true);
658       }
659     } else {
660       for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
661         has_right = false;
662         for (E_list* m = l->next; m != NULL; m = m->next) {
663           if (non_empty_connectors(m->e, dir) && !empty_connectors(m->e, dir)) {
664             has_right = true;
665             break;
666           }
667         }
668         certainly_non_trailing(w, l->e, dir, dfs_position, connectors, has_right);
669       }
670     }
671   }
672 }
673 #endif
674 
leading_connectors(int w,Exp * exp,char dir,int & dfs_position,vector<PositionConnector * > & connectors)675 void SATEncoder::leading_connectors(int w, Exp* exp, char dir, int& dfs_position, vector<PositionConnector*>& connectors) {
676   if (exp->type == CONNECTOR_type) {
677     dfs_position++;
678     if (exp->dir == dir) {
679       connectors.push_back(_word_tags[w].get(dfs_position));
680     }
681   } else if (exp->type == OR_type) {
682     for (Exp* opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
683       leading_connectors(w, opd, dir, dfs_position, connectors);
684     }
685   } else if (exp->type == AND_type) {
686     Exp* opd;
687     for (opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
688       leading_connectors(w, opd, dir, dfs_position, connectors);
689       if (!empty_connectors(opd, dir))
690         break;
691     }
692 
693     if (opd != NULL) {
694       for (opd = opd->operand_next; opd != NULL; opd = opd->operand_next)
695         dfs_position += num_connectors(opd);
696     }
697   }
698 }
699 
generate_conjunct_order_constraints(int w,Exp * e1,Exp * e2,int & dfs_position)700 void SATEncoder::generate_conjunct_order_constraints(int w, Exp *e1, Exp* e2, int& dfs_position) {
701   DEBUG_print("----- conjunct order constraints");
702   int dfs_position_e1 = dfs_position;
703   std::vector<PositionConnector*> last_right_in_e1, first_right_in_e2;
704   trailing_connectors(w, e1, '+', dfs_position_e1, last_right_in_e1);
705 
706   int dfs_position_e2 = dfs_position_e1;
707   leading_connectors(w, e2, '+', dfs_position_e2, first_right_in_e2);
708 
709   vec<Lit> clause(2);
710 
711   if (!last_right_in_e1.empty() && !first_right_in_e2.empty()) {
712     std::vector<PositionConnector*>::iterator i, j;
713     for (i = last_right_in_e1.begin(); i != last_right_in_e1.end(); i++) {
714       std::vector<PositionConnector*>& matches_e1 = (*i)->matches;
715 
716       for (j = first_right_in_e2.begin(); j != first_right_in_e2.end(); j++) {
717         std::vector<PositionConnector*>& matches_e2 = (*j)->matches;
718         std::vector<PositionConnector*>::const_iterator m1, m2;
719 
720         std::vector<int> mw1;
721         for (m1 = matches_e1.begin(); m1 != matches_e1.end(); m1++) {
722           if ((m1+1) == matches_e1.end() || (*m1)->word != (*(m1 + 1))->word)
723             mw1.push_back((*m1)->word);
724         }
725 
726         std::vector<int> mw2;
727         for (m2 = matches_e2.begin(); m2 != matches_e2.end(); m2++) {
728           if ((m2+1) == matches_e2.end() || (*m2)->word != (*(m2 + 1))->word)
729             mw2.push_back((*m2)->word);
730         }
731 
732         std::vector<int>::const_iterator mw1i, mw2i;
733         for (mw1i = mw1.begin(); mw1i != mw1.end(); mw1i++) {
734           for (mw2i = mw2.begin(); mw2i != mw2.end(); mw2i++) {
735             if (*mw1i >= *mw2i) {
736               clause[0] = ~Lit(_variables->link_cw(*mw1i, w, (*i)->position, connector_string(&(*i)->connector)));
737               clause[1] = ~Lit(_variables->link_cw(*mw2i, w, (*j)->position, connector_string(&(*j)->connector)));
738               add_clause(clause);
739             }
740           }
741         }
742       }
743     }
744   }
745 
746   DEBUG_print("----");
747   dfs_position_e1 = dfs_position;
748   std::vector<PositionConnector*> last_left_in_e1, first_left_in_e2;
749   trailing_connectors(w, e1, '-', dfs_position_e1, last_left_in_e1);
750 
751   dfs_position_e2 = dfs_position_e1;
752   leading_connectors(w, e2, '-', dfs_position_e2, first_left_in_e2);
753 
754   if (!last_left_in_e1.empty() && !first_left_in_e2.empty()) {
755     std::vector<PositionConnector*>::iterator i, j;
756     for (i = last_left_in_e1.begin(); i != last_left_in_e1.end(); i++) {
757       std::vector<PositionConnector*>& matches_e1 = (*i)->matches;
758 
759       for (j = first_left_in_e2.begin(); j != first_left_in_e2.end(); j++) {
760         std::vector<PositionConnector*>& matches_e2 = (*j)->matches;
761 
762         std::vector<PositionConnector*>::const_iterator m1, m2;
763 
764 
765         std::vector<int> mw1;
766         for (m1 = matches_e1.begin(); m1 != matches_e1.end(); m1++) {
767           if ((m1+1) == matches_e1.end() || (*m1)->word != (*(m1 + 1))->word)
768             mw1.push_back((*m1)->word);
769         }
770 
771         std::vector<int> mw2;
772         for (m2 = matches_e2.begin(); m2 != matches_e2.end(); m2++) {
773           if ((m2+1) == matches_e2.end() || (*m2)->word != (*(m2 + 1))->word)
774             mw2.push_back((*m2)->word);
775         }
776 
777         std::vector<int>::const_iterator mw1i, mw2i;
778         for (mw1i = mw1.begin(); mw1i != mw1.end(); mw1i++) {
779           for (mw2i = mw2.begin(); mw2i != mw2.end(); mw2i++) {
780             if (*mw1i <= *mw2i) {
781               clause[0] = ~Lit(_variables->link_cw(*mw1i, w, (*i)->position, connector_string(&(*i)->connector)));
782               clause[1] = ~Lit(_variables->link_cw(*mw2i, w, (*j)->position, connector_string(&(*j)->connector)));
783               add_clause(clause);
784             }
785           }
786         }
787       }
788     }
789   }
790 
791   dfs_position = dfs_position_e1;
792 
793   DEBUG_print("---end conjunct order constraints");
794 }
795 
796 
797 
798 /*--------------------------------------------------------------------------*
799  *                       C O N N E C T I V I T Y                            *
800  *--------------------------------------------------------------------------*/
801 
802 #ifdef _CONNECTIVITY_
generate_connectivity()803 void SATEncoder::generate_connectivity() {
804   for (int w = 1; w < _sent->length; w++) {
805     if (!_linkage_possible(0, w)) {
806       vec<Lit> clause;
807       clause.push(~Lit(_variables->con(w, 1)));
808       generate_and(clause);
809     }
810     else {
811       generate_equivalence_definition(Lit(_variables->con(w, 1)),
812                                       Lit(_variables->linked(0, w)));
813     }
814   }
815 
816   for (int d = 2; d < _sent->length; d++) {
817     for (int w = 1; w < _sent->length; w++) {
818       Lit lhs = Lit(_variables->con(w, d));
819 
820       vec<Lit> rhs;
821       rhs.push(Lit(_variables->con(w, d-1)));
822       for (int w1 = 1; w1 < _sent->length; w1++) {
823         if (w == w1)
824           continue;
825 
826         if (!_linkage_possible(std::min(w, w1), std::max(w, w1))) {
827           continue;
828         }
829         rhs.push(Lit(_variables->l_con(w, w1, d-1)));
830       }
831       generate_or_definition(lhs, rhs);
832 
833 
834       /* lc definitions*/
835       for (int w1 = 1; w1 < _sent->length; w1++) {
836         if (w == w1)
837           continue;
838 
839         int mi = std::min(w, w1), ma = std::max(w, w1);
840         if (!_linked_possible(mi, ma))
841           continue;
842 
843         Lit lhs = Lit(_variables->l_con(w, w1, d-1));
844         vec<Lit> rhs(2);
845         rhs[0] = Lit(_variables->linked(mi, ma));
846         rhs[1] = Lit(_variables->con(w1, d-1));
847         generate_classical_and_definition(lhs, rhs);
848       }
849     }
850   }
851 
852 
853   for (int w = 1; w < _sent->length; w++) {
854     generate_literal(Lit(_variables->con(w, _sent->length-1)));
855   }
856 }
857 #endif
858 
dfs(int node,const MatrixUpperTriangle<int> & graph,int component,std::vector<int> & components)859 void SATEncoder::dfs(int node, const MatrixUpperTriangle<int>& graph, int component, std::vector<int>& components) {
860   if (components[node] != -1)
861     return;
862 
863   components[node] = component;
864   for (size_t other_node = node + 1; other_node < components.size(); other_node++) {
865     if (graph(node, other_node))
866       dfs(other_node, graph, component, components);
867   }
868   for (int other_node = 0; other_node < node; other_node++) {
869     if (graph(other_node, node))
870       dfs(other_node, graph, component, components);
871   }
872 }
873 
874 /* Temporary debug (may be needed again for adding parsing with null-links).
875  * This allows to do "CXXFLAGS=-DCONNECTIVITY_DEBUG configure" or
876  * "make CXXFLAGS=-DCONNECTIVITY_DEBUG" . */
877 //#define CONNECTIVITY_DEBUG
878 #ifdef CONNECTIVITY_DEBUG
879 #undef CONNECTIVITY_DEBUG
880 #define CONNECTIVITY_DEBUG(x) x
debug_generate_disconnectivity_prohibiting(vector<int> components,std::vector<int> different_components)881 static void debug_generate_disconnectivity_prohibiting(vector<int> components,
882                                       std::vector<int> different_components) {
883 
884   printf("generate_disconnectivity_prohibiting: ");
885   for (auto c: components) cout << c << " ";
886   cout << endl;
887   for (auto c: different_components) cout << c << " ";
888   cout << endl;
889 }
890 #else
891 #define debug_generate_disconnectivity_prohibiting(x, y)
892 #define CONNECTIVITY_DEBUG(x)
893 #endif
894 
895 /*
896  * The idea here is to save issuing a priori a lot of clauses to enforce
897  * linkage connectivity (i.e. no islands), and instead do it when
898  * a solution finds a disconnected linkage. This works well for valid
899  * sentences because their solutions tend to be connected or mostly connected.
900  */
901 
902  /**
903   * Find the connectivity vector of the linkage.
904   * @param components (result) A connectivity vector whose components
905   * correspond to the linkage words.
906   * @return true iff the linkage is completely connected (ignoring
907   * optional words).
908   *
909   * Each segment in the linkage is numbered by the ordinal number of its
910   * start word. Each vector component is numbered according to the segment
911   * in which the corresponding word resides.
912   * For example, a linkage with 7 words, which consists of 3 segments
913   * (islands) of 2,2,3 words, will be represented as: 0 0 1 1 2 2 2.
914   *
915   * In order to support optional words (words that are allowed to have
916   * no connectivity), optional words which don't participate in the linkage
917   * are marked with -1 instead of their segment number, and are disregarded
918   * in the connectivity test.
919   */
connectivity_components(std::vector<int> & components)920 bool SATEncoder::connectivity_components(std::vector<int>& components) {
921   // get satisfied linked(wi, wj) variables
922   const std::vector<int>& linked_variables = _variables->linked_variables();
923   std::vector<int> satisfied_linked_variables;
924   for (std::vector<int>::const_iterator i = linked_variables.begin(); i != linked_variables.end(); i++) {
925     int var = *i;
926     if (_solver->model[var] == l_True) {
927       satisfied_linked_variables.push_back(var);
928     }
929   }
930 
931   // Words that are not in the linkage don't need to be connected.
932   // (For now these can be only be words marked as "optional").
933   std::vector<bool> is_linked_word(_sent->length, false);
934   for (size_t node = 0; node < _sent->length; node++) {
935     is_linked_word[node] = _solver->model[node] == l_True;
936   }
937 
938   // build the connectivity graph
939   MatrixUpperTriangle<int> graph(_sent->length, 0);
940   std::vector<int>::const_iterator i;
941   CONNECTIVITY_DEBUG(printf("connectivity_components words:\n"));
942   for (i = satisfied_linked_variables.begin(); i != satisfied_linked_variables.end(); i++) {
943     const int lv_l = _variables->linked_variable(*i)->left_word;
944     const int lv_r = _variables->linked_variable(*i)->right_word;
945 
946     if (!is_linked_word[lv_l] || !is_linked_word[lv_r]) continue;
947     graph.set(lv_l, lv_r, 1);
948     CONNECTIVITY_DEBUG(printf("L%d R%d\n", lv_l, lv_r));
949   }
950 
951   // determine the connectivity components
952   components.resize(_sent->length);
953   std::fill(components.begin(), components.end(), -1);
954   for (size_t node = 0; node < _sent->length; node++)
955     dfs(node, graph, node, components);
956 
957   CONNECTIVITY_DEBUG(printf("connectivity_components: "));
958   bool connected = true;
959   for (size_t node = 0; node < _sent->length; node++) {
960     CONNECTIVITY_DEBUG(
961       if (is_linked_word[node]) printf("%d ", components[node]);
962       else                      printf("[%d] ", components[node]);
963     )
964     if (is_linked_word[node]) {
965       if (components[node] != 0) {
966         connected = false;
967       }
968     } else {
969        components[node] = -1;
970     }
971   }
972 
973   CONNECTIVITY_DEBUG(printf(" connected=%d\n", connected));
974   return connected;
975 }
976 
977 #ifdef SAT_DEBUG
978 #define MVALUE(s, v) (s->model[v] == l_True?'T':(s->model[v] == l_False?'f':'u'))
pmodel(Solver * solver,int var)979 GNUC_UNUSED static void pmodel(Solver *solver, int var) {
980   printf("%c\n", MVALUE(solver, var));
981 }
982 
pmodel(Solver * solver,vec<Lit> & clause)983 GNUC_UNUSED static void pmodel(Solver *solver, vec<Lit> &clause) {
984   vector<int> t;
985   for (int i = 0; i < clause.size(); i++) {
986     int v = var(clause[i]);
987     printf("%d:%c ", v, MVALUE(solver, v));
988     if (solver->model[v] == l_True) t.push_back(v);
989   }
990   printf("\n");
991   if (t.size()) {
992     printf("l_True:");
993     for (auto i: t) printf(" %d", i);
994     printf("\n");
995   }
996 }
997 #endif
998 
999 /**
1000  * Generate clauses to enforce exactly the missing connectivity.
1001  * @param components A connectivity vector as computed by
1002  * connectivity_components().
1003  *
1004  * Iterate the list of possible links between words, and find links
1005  * between words that are found in different segments according to the
1006  * connectivity vector. For each two segments, issue a clause asserting
1007  * that at least one of the corresponding links must exist.
1008  *
1009  * In order to support optional words, optional words which don't have links
1010  * (marked as -1 in the connectivity vector) are ignored. A missing link
1011  * between words when one of them is optional (referred to as a
1012  * "conditional_link" below) is considered missing only if both words exist
1013  * in the linkage.
1014  */
generate_disconnectivity_prohibiting(std::vector<int> components)1015 void SATEncoder::generate_disconnectivity_prohibiting(std::vector<int> components) {
1016   // vector of unique components
1017   std::vector<int> different_components = components;
1018   std::sort(different_components.begin(), different_components.end());
1019   different_components.erase(std::unique(different_components.begin(), different_components.end()),
1020                              different_components.end());
1021   debug_generate_disconnectivity_prohibiting(components, different_components);
1022   // Each connected component must contain a branch going out of it
1023   std::vector<int>::const_iterator c;
1024   bool missing_word = false;
1025   if (*different_components.begin() == -1) {
1026     different_components.erase(different_components.begin());
1027     missing_word = true;
1028   }
1029   for (c = different_components.begin(); c != different_components.end(); c++) {
1030     vec<Lit> clause;
1031     const std::vector<int>& linked_variables = _variables->linked_variables();
1032     for (std::vector<int>::const_iterator i = linked_variables.begin(); i != linked_variables.end(); i++) {
1033       int var = *i;
1034       const Variables::LinkedVar* lv = _variables->linked_variable(var);
1035       if ((components[lv->left_word] == *c && components[lv->right_word] != *c) ||
1036           (components[lv->left_word] != *c && components[lv->right_word] == *c)) {
1037 
1038         CONNECTIVITY_DEBUG(printf(" %d(%d-%d)", var, lv->left_word, lv->right_word));
1039         bool optlw_exists = _sent->word[lv->left_word].optional &&
1040                             _solver->model[lv->left_word] == l_True;
1041         bool optrw_exists = _sent->word[lv->right_word].optional &&
1042                             _solver->model[lv->right_word] == l_True;
1043         if (optlw_exists || optrw_exists) {
1044           int conditional_link_var;
1045           bool conditional_link_var_exists;
1046 
1047           CONNECTIVITY_DEBUG(printf("R ")); // "replaced"
1048           char name[MAX_VARIABLE_NAME] = "0";
1049           sprintf(name+1, "%dw%dw%d", var, optlw_exists?lv->left_word:255,
1050                                            optrw_exists?lv->right_word:255);
1051           conditional_link_var_exists = _variables->var_exists(name);
1052           conditional_link_var = _variables->string(name);
1053 
1054           if (!conditional_link_var_exists) {
1055             Lit lhs = Lit(conditional_link_var);
1056             vec<Lit> rhs;
1057             rhs.push(Lit(var));
1058             if (optlw_exists) rhs.push(~Lit(lv->left_word));
1059             if (optrw_exists) rhs.push(~Lit(lv->right_word));
1060             generate_or_definition(lhs, rhs);
1061           }
1062 
1063           var = conditional_link_var; // Replace it by its conditional link var
1064         }
1065         clause.push(Lit(var)); // Implied link var is used if needed
1066       }
1067     }
1068 
1069     if (missing_word) {
1070       // The connectivity may be restored differently if a word reappears
1071       for (WordIdx w = 0; w < _sent->length; w++) {
1072         if (_solver->model[w] == l_False)
1073           clause.push(Lit(w));
1074       }
1075     }
1076     CONNECTIVITY_DEBUG(printf("\n"));
1077     _solver->addClause(clause);
1078 
1079     // Avoid issuing two identical clauses
1080     if (different_components.size() == 2)
1081       break;
1082   }
1083 }
1084 
1085 /*--------------------------------------------------------------------------*
1086  *                           P L A N A R I T Y                              *
1087  *--------------------------------------------------------------------------*/
generate_planarity_conditions()1088 void SATEncoder::generate_planarity_conditions()
1089 {
1090   vec<Lit> clause(2);
1091   for (size_t wi1 = 0; wi1 < _sent->length; wi1++) {
1092     for (size_t wj1 = wi1+1; wj1 < _sent->length; wj1++) {
1093       for (size_t wi2 = wj1+1; wi2 < _sent->length; wi2++) {
1094         if (!_linked_possible(wi1, wi2))
1095           continue;
1096         for (size_t wj2 = wi2+1; wj2 < _sent->length; wj2++) {
1097           if (!_linked_possible(wj1, wj2))
1098             continue;
1099           clause[0] = ~Lit(_variables->linked(wi1, wi2));
1100           clause[1] = ~Lit(_variables->linked(wj1, wj2));
1101           add_clause(clause);
1102         }
1103       }
1104     }
1105   }
1106 }
1107 
1108 /*--------------------------------------------------------------------------*
1109  *               P O W E R     P R U N I N G                                *
1110  *--------------------------------------------------------------------------*/
1111 
generate_epsilon_definitions()1112 void SATEncoder::generate_epsilon_definitions()
1113 {
1114   for (size_t w = 0; w < _sent->length; w++) {
1115     if (_sent->word[w].x == NULL) {
1116       continue;
1117     }
1118 
1119     bool join = _sent->word[w].x->next != NULL;
1120     Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
1121 
1122     char name[MAX_VARIABLE_NAME];
1123     // sprintf(name, "w%zu", w);
1124     name[0] = 'w';
1125     fast_sprintf(name+1, w);
1126 
1127     int dfs_position;
1128 
1129     dfs_position = 0;
1130     generate_epsilon_for_expression(w, dfs_position, exp, name, true, '+');
1131 
1132     dfs_position = 0;
1133     generate_epsilon_for_expression(w, dfs_position, exp, name, true, '-');
1134   }
1135 }
1136 
generate_epsilon_for_expression(int w,int & dfs_position,Exp * e,char * var,bool root,char dir)1137 bool SATEncoder::generate_epsilon_for_expression(int w, int& dfs_position, Exp* e,
1138                                                  char* var, bool root, char dir)
1139 {
1140   if (e->type == CONNECTOR_type) {
1141     dfs_position++;
1142     if (var != NULL) {
1143       if (e->dir == dir) {
1144         // generate_literal(-_variables->epsilon(name, var, e->dir));
1145         return false;
1146       } else {
1147         generate_equivalence_definition(Lit(_variables->epsilon(var, dir)),
1148                                         Lit(_variables->string(var)));
1149         return true;
1150       }
1151     }
1152   } else if (e->type == AND_type) {
1153     if (e->operand_first == NULL) {
1154       /* zeroary and */
1155       generate_equivalence_definition(Lit(_variables->string(var)),
1156                                       Lit(_variables->epsilon(var, dir)));
1157       return true;
1158     } else
1159       if (e->operand_first != NULL && e->operand_first->operand_next == NULL) {
1160         /* unary and - skip */
1161         return generate_epsilon_for_expression(w, dfs_position, e->operand_first, var, root, dir);
1162       } else {
1163         /* Recurse */
1164         Exp* opd;
1165         int i;
1166         bool eps = true;
1167 
1168         char new_var[MAX_VARIABLE_NAME];
1169         char* last_new_var = new_var;
1170         char* last_var = var;
1171         while ((*last_new_var = *last_var)) {
1172           last_new_var++;
1173           last_var++;
1174         }
1175 
1176 
1177         for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1178           // sprintf(new_var, "%sc%d", var, i)
1179           char* s = last_new_var;
1180           *s++ = 'c';
1181           fast_sprintf(s, i);
1182 
1183           if (!generate_epsilon_for_expression(w, dfs_position, opd, new_var, false, dir)) {
1184             eps = false;
1185             break;
1186           }
1187         }
1188 
1189         if (opd != NULL) {
1190           for (opd = opd->operand_next; opd != NULL; opd = opd->operand_next)
1191             dfs_position += num_connectors(opd);
1192         }
1193 
1194         if (!root && eps) {
1195           Lit lhs = Lit(_variables->epsilon(var, dir));
1196           vec<Lit> rhs;
1197         for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1198             // sprintf(new_var, "%sc%d", var, i)
1199             char* s = last_new_var;
1200             *s++ = 'c';
1201             fast_sprintf(s, i);
1202             rhs.push(Lit(_variables->epsilon(new_var, dir)));
1203           }
1204           generate_classical_and_definition(lhs, rhs);
1205         }
1206         return eps;
1207       }
1208   } else if (e->type == OR_type) {
1209     if (e->operand_first != NULL && e->operand_first->operand_next == NULL) {
1210       /* unary or - skip */
1211       return generate_epsilon_for_expression(w, dfs_position, e->operand_first, var, root, dir);
1212     } else {
1213       /* Recurse */
1214       Exp* opd;
1215       int i;
1216       bool eps = false;
1217 
1218       char new_var[MAX_VARIABLE_NAME];
1219       char* last_new_var = new_var;
1220       char* last_var = var;
1221       while ((*last_new_var = *last_var)) {
1222         last_new_var++;
1223         last_var++;
1224       }
1225 
1226       vec<Lit> rhs;
1227         for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1228         // sprintf(new_var, "%sc%d", var, i)
1229         char* s = last_new_var;
1230         *s++ = 'd';
1231         fast_sprintf(s, i);
1232 
1233         if (generate_epsilon_for_expression(w, dfs_position, opd, new_var, false, dir) && !root) {
1234           rhs.push(Lit(_variables->epsilon(new_var, dir)));
1235           eps = true;
1236         }
1237       }
1238 
1239       if (!root && eps) {
1240         Lit lhs = Lit(_variables->epsilon(var, dir));
1241         generate_or_definition(lhs, rhs);
1242       }
1243 
1244       return eps;
1245     }
1246   }
1247   return false;
1248 }
1249 
power_prune()1250 void SATEncoder::power_prune()
1251 {
1252   generate_epsilon_definitions();
1253 
1254   // on two non-adjacent words, a pair of connectors can be used only
1255   // if not [both of them are the deepest].
1256 
1257   for (size_t wl = 0; wl < _sent->length - 2; wl++) {
1258     const std::vector<PositionConnector>& rc = _word_tags[wl].get_right_connectors();
1259     std::vector<PositionConnector>::const_iterator rci;
1260     for (rci = rc.begin(); rci != rc.end(); rci++) {
1261       if (!(*rci).leading_right || (*rci).connector.multi)
1262         continue;
1263 
1264       const std::vector<PositionConnector*>& matches = rci->matches;
1265       for (std::vector<PositionConnector*>::const_iterator lci = matches.begin(); lci != matches.end(); lci++) {
1266         if (!(*lci)->leading_left || (*lci)->connector.multi || (*lci)->word <= wl + 2)
1267           continue;
1268         if (_opts->min_null_count == 0)
1269           if (optional_gap_collapse(_sent, wl, (*lci)->word)) continue;
1270 
1271         //        printf("LR: .%zu. .%d. %s\n", wl, rci->position, rci->connector.desc->string);
1272         //        printf("LL: .%zu. .%d. %s\n", (*lci)->word, (*lci)->position, (*lci)->connector.desc->string);
1273 
1274         vec<Lit> clause;
1275         for (std::vector<int>::const_iterator i = rci->eps_right.begin(); i != rci->eps_right.end(); i++) {
1276           clause.push(~Lit(*i));
1277         }
1278 
1279         for (std::vector<int>::const_iterator i = (*lci)->eps_left.begin(); i != (*lci)->eps_left.end(); i++) {
1280           clause.push(~Lit(*i));
1281         }
1282 
1283         add_additional_power_pruning_conditions(clause, wl, (*lci)->word);
1284 
1285         clause.push(~Lit(_variables->link(
1286                wl, rci->position, connector_string(&rci->connector), rci->exp,
1287                (*lci)->word, (*lci)->position, connector_string(&(*lci)->connector), (*lci)->exp)));
1288         add_clause(clause);
1289       }
1290     }
1291   }
1292 
1293 #if 0
1294   // on two adjacent words, a pair of connectors can be used only if
1295   // they're the deepest ones on their disjuncts
1296   for (size_t wl = 0; wl < _sent->length - 2; wl++) {
1297     const std::vector<PositionConnector>& rc = _word_tags[wl].get_right_connectors();
1298     std::vector<PositionConnector>::const_iterator rci;
1299     for (rci = rc.begin(); rci != rc.end(); rci++) {
1300       if (!(*rci).leading_right)
1301         continue;
1302 
1303       const std::vector<PositionConnector*>& matches = rci->matches;
1304       for (std::vector<PositionConnector*>::const_iterator lci = matches.begin(); lci != matches.end(); lci++) {
1305         if (!(*lci)->leading_left || (*lci)->word != wl + 1)
1306           continue;
1307         int link = _variables->link(wl, rci->position, rci->connector.desc->string, rci->exp,
1308                                     (*lci)->word, (*lci)->position, (*lci)->connector.desc->string, (*lci)->exp);
1309         vec<Lit> clause(2);
1310         clause.push(~Lit(link));
1311 
1312         for (std::vector<int>::const_iterator i = rci->eps_right.begin(); i != rci->eps_right.end(); i++) {
1313           clause.push(Lit(*i));
1314         }
1315 
1316         for (std::vector<int>::const_iterator i = (*lci)->eps_left.begin(); i != (*lci)->eps_left.end(); i++) {
1317           clause.push(Lit(*i));
1318         }
1319 
1320         add_clause(clause);
1321       }
1322     }
1323   }
1324 #endif
1325 
1326 
1327 #if 0
1328   // Two deep connectors cannot match (deep means notlast)
1329   std::vector<std::vector<PositionConnector*> > certainly_deep_left(_sent->length), certainly_deep_right(_sent->length);
1330   for (size_t w = 0; w < _sent->length - 2; w++) {
1331     if (_sent->word[w].x == NULL)
1332       continue;
1333 
1334     bool join = _sent->word[w].x->next != NULL;
1335     Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
1336 
1337     int dfs_position = 0;
1338     certainly_non_trailing(w, exp, '+', dfs_position, certainly_deep_right[w], false);
1339     dfs_position = 0;
1340     certainly_non_trailing(w, exp, '-', dfs_position, certainly_deep_left[w], false);
1341   }
1342 
1343   for (size_t w = 0; w < _sent->length; w++) {
1344     std::vector<PositionConnector*>::const_iterator i;
1345     for (i = certainly_deep_right[w].begin(); i != certainly_deep_right[w].end(); i++) {
1346       const std::vector<PositionConnector*>& matches = (*i)->matches;
1347       std::vector<PositionConnector*>::const_iterator j;
1348       for (j = matches.begin(); j != matches.end(); j++) {
1349         if (std::find(certainly_deep_left[(*j)->word].begin(), certainly_deep_left[(*j)->word].end(),
1350                       *j) != certainly_deep_left[(*j)->word].end()) {
1351           generate_literal(~Lit(_variables->link((*i)->word, (*i)->position, (*i)->connector.desc->string, (*i)->exp,
1352                                              (*j)->word, (*j)->position, (*j)->connector.desc->string, (*j)->exp)));
1353         }
1354       }
1355     }
1356   }
1357 #endif
1358 }
1359 
1360 /*--------------------------------------------------------------------------*
1361  *        P O S T P R O C E S S I N G   &  P P     P R U N I N G            *
1362  *--------------------------------------------------------------------------*/
1363 
1364 #if 0 // Needed only for the additional PP pruning (pp_pruning_1)
1365 /**
1366  * Returns false if the string s does not match anything in
1367  * the array. The array elements are post-processing symbols.
1368  */
1369 static bool string_in_list(const char * s, const char * a[])
1370 {
1371   for (size_t i = 0; a[i] != NULL; i++)
1372     if (post_process_match(a[i], s)) return true;
1373   return false;
1374 }
1375 #endif
1376 
1377 /* For debugging. */
1378 /*
1379 GNUC_UNUSED static void print_link_array(const char **s)
1380 {
1381   printf("|");
1382   do
1383   {
1384     printf("%s ", *s);
1385   } while (*++s);
1386   printf("|\n");
1387 }
1388 */
1389 
pp_prune()1390 void SATEncoder::pp_prune()
1391 {
1392   const std::vector<int>& link_variables = _variables->link_variables();
1393 
1394   if (_sent->dict->base_knowledge == NULL)
1395     return;
1396 
1397   if (!_opts->perform_pp_prune)
1398     return;
1399 
1400   pp_knowledge * knowledge;
1401   knowledge = _sent->dict->base_knowledge;
1402 
1403   for (size_t i=0; i<knowledge->n_contains_one_rules; i++)
1404   {
1405     pp_rule rule = knowledge->contains_one_rules[i];
1406     // const char * selector = rule.selector;         /* selector string for this rule */
1407     pp_linkset * link_set = rule.link_set;            /* the set of criterion links */
1408 
1409 
1410     vec<Lit> triggers;
1411     for (size_t vi = 0; vi < link_variables.size(); vi++) {
1412       const Variables::LinkVar* var = _variables->link_variable(link_variables[vi]);
1413       if (post_process_match(rule.selector, var->label)) {
1414         triggers.push(Lit(link_variables[vi]));
1415       }
1416     }
1417 
1418     if (triggers.size() == 0)
1419       continue;
1420 
1421     vec<Lit> criterions;
1422     for (size_t j = 0; j < link_variables.size(); j++) {
1423       pp_linkset_node *p;
1424       for (size_t hashval = 0; hashval < link_set->hash_table_size; hashval++) {
1425         for (p = link_set->hash_table[hashval]; p!=NULL; p=p->next) {
1426           const Variables::LinkVar* var = _variables->link_variable(link_variables[j]);
1427           if (post_process_match(p->str, var->label)) {
1428             criterions.push(Lit(link_variables[j]));
1429           }
1430         }
1431       }
1432     }
1433 
1434     DEBUG_print("---pp_pruning--");
1435     for (int k = 0; k < triggers.size(); k++) {
1436       vec<Lit> clause(criterions.size() + 1);
1437       for (int ci = 0; ci < criterions.size(); ci++)
1438         clause[ci] = criterions[ci];
1439       clause[criterions.size()] = (~triggers[k]);
1440       add_clause(clause);
1441     }
1442     DEBUG_print("---end pp_pruning--");
1443   }
1444 
1445   /* The following code has a bug that causes it to be too restrictive.
1446    * Instead of fixing it, it is disabled here, as it is to be replaced
1447    * soon by the pp-pruning code of the classic parser. */
1448 #if 0
1449   if (test_enabled("no-pp_pruning_1")) return; // For result comparison.
1450   /* Additional PP pruning.
1451    * Since the SAT parser defines constrains on links, it is possible
1452    * to encode all the postprocessing rules, in order to prune all the
1453    * solutions that violate the postprocessing rules.
1454    *
1455    * This may save much SAT-solver overhead, because solutions which are
1456    * not going to pass postprocessing will get eliminated, saving a very
1457    * high number of calls to the SAT-solver that are not yielding a valid
1458    * linkage.
1459    *
1460    * However, the encoding code has a non-negligible overhead, and also
1461    * it may add clauses that increase the overhead of the SAT-solver.
1462    *
1463    * FIXME:
1464    * A large part of the encoding overhead is because the need to
1465    * iterate a very large number of items in order to find the small
1466    * number of items that are needed. Hashing in advance can solve this
1467    * and similar problems.
1468    *
1469    * For now only a very limited pruning is tried.
1470    * For the CONTAINS_NONE_RULES, only the nearest link to the root-link
1471    * is checked.
1472    *
1473    * TODO:
1474    * BOUNDED_RULES pruning.
1475    * Domain encoding.
1476    */
1477 
1478   /*
1479    * CONTAINS_NONE_RULES
1480    * 1. Suppose no one of them refers to a URFL domain.
1481    * 2. Only the links of words accessible through a right link of
1482    * the root word are checked here. Even such a limited check saves CPU.
1483    *
1484    * Below, var_i and var_j are link variables, when var_i is the root link.
1485    * root_word---var_i---wr---var_j
1486    */
1487 
1488   //printf("n_contains_none_rules %zu\n", knowledge->n_contains_none_rules);
1489   //printf("link_variables.size() %zu\n", link_variables.size());
1490   for (size_t i=0; i<knowledge->n_contains_none_rules; i++)
1491   {
1492     pp_rule rule = knowledge->contains_none_rules[i];
1493     //printf("rule[%2zu]selector = %s; ", i, rule.selector);
1494     //print_link_array(rule.link_array);
1495 
1496     int root_link;
1497     int wr; // A word at the end of a right-link of the root word.
1498     for (size_t vi = 0; vi < link_variables.size(); vi++)
1499     {
1500       const Variables::LinkVar* var_i = _variables->link_variable(link_variables[vi]);
1501       if (!post_process_match(rule.selector, var_i->label)) continue;
1502 
1503       root_link = link_variables[vi];
1504       wr = var_i->right_word;
1505 
1506       //printf("Found rule[%zu].selector %s: %s %d\n", i, rule.selector, var_i->label, wr);
1507 
1508       vector<int> must_not_exist;
1509       for (size_t vj = 0; vj < link_variables.size(); vj++)
1510       {
1511         const Variables::LinkVar* var_j = _variables->link_variable(link_variables[vj]);
1512         if ((wr != var_j->left_word) && (wr != var_j->right_word)) continue;
1513         if (var_i == var_j) continue; // It points back to the root word.
1514 
1515         //printf("var_j->label %s %d %d\n", var_j->label, var_j->right_word, var_j->left_word);
1516         if (string_in_list(var_j->label, rule.link_array))
1517           must_not_exist.push_back(link_variables[vj]);
1518       }
1519 
1520       DEBUG_print("---pp_pruning 1--");
1521       for (auto c: must_not_exist)
1522       {
1523         vec<Lit> clause(2);
1524         clause[0] = ~Lit(root_link);
1525         clause[1] = ~Lit(c);
1526         add_clause(clause);
1527       }
1528       DEBUG_print("---end pp_pruning 1--");
1529     }
1530   }
1531 #endif // 0
1532 }
1533 
1534 /*--------------------------------------------------------------------------*
1535  *                         D E C O D I N G                                  *
1536  *--------------------------------------------------------------------------*/
1537 
1538 /**
1539  * Create the next linkage.
1540  * Return true iff the linkage can be created.
1541  */
create_linkage(Linkage linkage)1542 bool SATEncoder::create_linkage(Linkage linkage)
1543 {
1544   partial_init_linkage(_sent, linkage, _sent->length);
1545   if (!sat_extract_links(linkage)) return false;
1546   compute_link_names(linkage, _sent->string_set);
1547   return true;
1548 }
1549 
generate_linkage_prohibiting()1550 void SATEncoder::generate_linkage_prohibiting()
1551 {
1552   vec<Lit> clause;
1553   const std::vector<int>& link_variables = _variables->link_variables();
1554   for (std::vector<int>::const_iterator i = link_variables.begin(); i != link_variables.end(); i++) {
1555     int var = *i;
1556     if (_solver->model[var] == l_True) {
1557       clause.push(~Lit(var));
1558     } else if (_solver->model[var] == l_False) {
1559       clause.push(Lit(var));
1560     }
1561   }
1562   _solver->addClause(clause);
1563 }
1564 
get_next_linkage()1565 Linkage SATEncoder::get_next_linkage()
1566 {
1567   Linkage_s linkage;
1568   bool connected;
1569   bool display_linkage_disconnected = false;
1570 
1571 
1572   /* Loop until a good linkage is found.
1573    * Insane (mixed alternatives) linkages are always ignored.
1574    * Disconnected linkages are normally ignored, unless
1575    * !test=linkage-disconnected is used (and they are sane) */
1576   bool linkage_ok;
1577   do {
1578     if (!_solver->solve()) return NULL;
1579 
1580     std::vector<int> components;
1581     connected = connectivity_components(components);
1582 
1583     // Prohibit this solution so the next ones can be found
1584     if (!connected) {
1585       _num_lkg_disconnected++;
1586       generate_disconnectivity_prohibiting(components);
1587       display_linkage_disconnected = test_enabled("linkage-disconnected");
1588     } else {
1589       generate_linkage_prohibiting();
1590     }
1591 
1592     linkage_ok = false;
1593     if (connected || display_linkage_disconnected) {
1594       memset(&linkage, 0, sizeof(struct Linkage_s));
1595 
1596       linkage_ok = create_linkage(&linkage);
1597       if (linkage_ok)
1598         linkage_ok = sane_linkage_morphism(_sent, &linkage, _opts);
1599       if (!linkage_ok) {
1600           /* We cannot elegantly add this linkage to sent->linkges[] -
1601            * to be freed in sentence_delete(), since insane linkages
1602            * must be there with index > num_linkages_post_processed - so
1603            * they remain hidden, but num_linkages_post_processed is an
1604            * arbitrary number.  So we must free it here. */
1605           free_linkage_connectors_and_disjuncts(&linkage);
1606           free_linkage(&linkage);
1607           continue; // skip this linkage
1608       }
1609       remove_empty_words(&linkage);  /* Discard optional words. */
1610     }
1611 
1612     if (!connected) {
1613       if (display_linkage_disconnected) {
1614         cout << "Linkage DISCONNECTED" << endl;
1615       } else {
1616         lgdebug(+D_SAT, "Linkage DISCONNECTED (skipped)\n");
1617       }
1618     }
1619   } while (!linkage_ok);
1620 
1621   /* We cannot expand the linkage array on demand, since the API uses
1622    * linkage pointers, and they would become invalid if realloc() changes
1623    * the address of the memory block. So it is allocated in advance. */
1624   if (NULL == _sent->lnkages)
1625   {
1626     _sent->num_linkages_alloced = _opts->linkage_limit;
1627     size_t nbytes = _sent->num_linkages_alloced * sizeof(struct Linkage_s);
1628     _sent->lnkages = (Linkage) malloc(nbytes);
1629     _next_linkage_index = 0;
1630   }
1631   assert(_next_linkage_index<_sent->num_linkages_alloced, "_sent->lnkages ovl");
1632 
1633   Linkage lkg = &_sent->lnkages[_next_linkage_index];
1634   _next_linkage_index++;
1635   *lkg = linkage;  /* copy en-mass */
1636 
1637   /* The link-parser code checks the next linkage for num_violations
1638    * (to save calls to linkage_create()). Allow for that practice. */
1639   if (_next_linkage_index < _sent->num_linkages_alloced)
1640     lkg[1].lifo.N_violations = 0;
1641 
1642   // Perform the rest of the post-processing
1643   if (NULL != _sent->postprocessor) {
1644     do_post_process(_sent->postprocessor, lkg, false);
1645     if (NULL != _sent->postprocessor->violation) {
1646       _num_pp_violations++;
1647       lkg->lifo.N_violations++;
1648       lkg->lifo.pp_violation_msg = _sent->postprocessor->violation;
1649       lgdebug(+D_SAT, "Postprocessing error: %s\n", lkg->lifo.pp_violation_msg);
1650     } else {
1651       // XXX We cannot maintain num_valid_linkages, because it starts from
1652       // a high number. If we start it from 0, then on value 1 link-parser
1653       // would report "Unique linkage".
1654       //_sent->num_valid_linkages++;
1655     }
1656     post_process_free_data(&_sent->postprocessor->pp_data);
1657   }
1658   linkage_score(lkg, _opts);
1659 
1660   // if (NULL == _sent->postprocessor->violation && verbosity > 1)
1661   //   _solver->printStats();
1662   return lkg;
1663 }
1664 
1665 /*******************************************************************************
1666  *        SAT encoding for sentences that do not contain conjunction.          *
1667  *******************************************************************************/
1668 
generate_encoding_specific_clauses()1669 void SATEncoderConjunctionFreeSentences::generate_encoding_specific_clauses() {
1670 }
1671 
handle_null_expression(int w)1672 void SATEncoderConjunctionFreeSentences::handle_null_expression(int w) {
1673   // Formula is unsatisfiable
1674   vec<Lit> clause;
1675   add_clause(clause);
1676 }
1677 
determine_satisfaction(int w,char * name)1678 void SATEncoderConjunctionFreeSentences::determine_satisfaction(int w, char* name)
1679 {
1680   // All tags must be satisfied
1681   generate_literal(Lit(_variables->string(name)));
1682 }
1683 
generate_satisfaction_for_connector(int wi,int pi,Exp * e,char * var)1684 void SATEncoderConjunctionFreeSentences::generate_satisfaction_for_connector(
1685     int wi, int pi, Exp *e, char* var)
1686 {
1687   const char* Ci = e->condesc->string;
1688   char dir = e->dir;
1689   bool multi = e->multi;
1690   double cost = e->cost;
1691 
1692   Lit lhs = Lit(_variables->string_cost(var, cost));
1693 
1694 #ifdef SAT_DEBUG
1695   cout << "*** Connector: ." << wi << ". ." << pi << ". " << Ci << dir << endl;
1696 #endif
1697 
1698   // words that can potentially match (wi, pi)
1699   int low = (dir == '-') ? 0 : wi + 1;
1700   int hi  = (dir == '-') ? wi : _sent->length;
1701 
1702   std::vector<int> _w_;
1703   for (int wj = low; wj < hi; wj++) {
1704     // Eliminate words that cannot be connected to (wi, pi)
1705     if (!_word_tags[wj].match_possible(wi, pi))
1706       continue;
1707 
1708     // Now we know that there is a potential link between the
1709     // connector (wi, pi) and the word wj
1710     _w_.push_back(wj);
1711 
1712     generate_link_cw_ordinary_definition(wi, pi, e, wj);
1713   }
1714 
1715   vec<Lit> _link_cw_;
1716   for (size_t i = 0; i < _w_.size(); i++)
1717     _link_cw_.push(Lit(_variables->link_cw(_w_[i], wi, pi, Ci)));
1718   generate_or_definition(lhs, _link_cw_);
1719 
1720   DEBUG_print("--------- multi");
1721   if (!multi) {
1722     generate_xor_conditions(_link_cw_);
1723   }
1724   DEBUG_print("--------- end multi");
1725 
1726 #ifdef SAT_DEBUG
1727   cout << "*** End Connector: ." << wi << ". ." << pi << ". " <<  Ci << endl;
1728 #endif
1729 }
1730 
generate_linked_definitions()1731 void SATEncoderConjunctionFreeSentences::generate_linked_definitions()
1732 {
1733   _linked_possible.resize(_sent->length, 1);
1734   vector<vec<Lit>> linked_to_word(_sent->length);
1735 
1736   DEBUG_print("------- linked definitions");
1737   for (size_t w1 = 0; w1 < _sent->length; w1++) {
1738     vec<Lit> linked;
1739     for (size_t w2 = w1 + 1; w2 < _sent->length; w2++) {
1740       DEBUG_print("---------- ." << w1 << ". ." << w2 << ".");
1741       Lit lhs = Lit(_variables->linked(w1, w2));
1742 
1743       vec<Lit> rhs;
1744       const std::vector<PositionConnector>& w1_connectors = _word_tags[w1].get_right_connectors();
1745       std::vector<PositionConnector>::const_iterator c;
1746       for (c = w1_connectors.begin(); c != w1_connectors.end(); c++) {
1747         assert(c->word == w1, "Connector word must match");
1748         if (_word_tags[w2].match_possible(c->word, c->position)) {
1749           rhs.push(Lit(_variables->link_cw(w2, c->word, c->position, connector_string(&c->connector))));
1750         }
1751       }
1752 
1753       _linked_possible.set(w1, w2, rhs.size() > 0);
1754       generate_or_definition(lhs, rhs);
1755 
1756       /* Optional words that have no links should be "down", as a mark that
1757        * they are missing in the linkage.
1758        * Collect all possible word links, per word, to be used below. */
1759       if (rhs.size() > 0) {
1760         if (_sent->word[w1].optional) {
1761           linked_to_word[w1].push(Lit(_variables->linked(w1, w2)));
1762         }
1763         if (_sent->word[w2].optional) {
1764           linked_to_word[w2].push(Lit(_variables->linked(w1, w2)));
1765         }
1766       }
1767     }
1768 
1769     if (_sent->word[w1].optional) {
1770       /* The word should be connected to at least another word in order to be
1771        * in the linkage. */
1772       DEBUG_print("------------S not linked -> no word (w" << w1 << ")");
1773       generate_or_definition(Lit(w1), linked_to_word[w1]);
1774       DEBUG_print("------------E not linked -> no word");
1775     }
1776   }
1777   DEBUG_print("------- end linked definitions");
1778 }
1779 
PositionConnector2exp(const PositionConnector * pc)1780 Exp* SATEncoderConjunctionFreeSentences::PositionConnector2exp(const PositionConnector* pc)
1781 {
1782     Exp* e = Exp_create(_sent->Exp_pool);
1783     e->type = CONNECTOR_type;
1784     e->dir = pc->dir;
1785     e->multi = pc->connector.multi;
1786     e->condesc = (condesc_t *)pc->connector.desc; // FIXME - const
1787     e->cost = pc->cost;
1788 
1789     return e;
1790 }
1791 
1792 #define D_SEL 8
sat_extract_links(Linkage lkg)1793 bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg)
1794 {
1795   Disjunct *d;
1796   int current_link = 0;
1797 
1798   Exp **exp_word = (Exp **)alloca(_sent->length * sizeof(Exp *));
1799   memset(exp_word, 0, _sent->length * sizeof(Exp *));
1800   const X_node **xnode_word = (const X_node **)alloca(_sent->length * sizeof(X_node *));
1801   memset(xnode_word, 0, _sent->length * sizeof(X_node *));
1802 
1803   const std::vector<int>& link_variables = _variables->link_variables();
1804   std::vector<int>::const_iterator i;
1805   for (i = link_variables.begin(); i != link_variables.end(); i++) {
1806     if (_solver->model[*i] != l_True)
1807       continue;
1808 
1809     const Variables::LinkVar* var = _variables->link_variable(*i);
1810     if (_solver->model[_variables->linked(var->left_word, var->right_word)] != l_True)
1811       continue;
1812 
1813     check_link_size(lkg);
1814     Link& clink = lkg->link_array[current_link];
1815     current_link++;
1816     clink.lw = var->left_word;
1817     clink.rw = var->right_word;
1818 
1819     PositionConnector* lpc = _word_tags[var->left_word].get(var->left_position);
1820     PositionConnector* rpc = _word_tags[var->right_word].get(var->right_position);
1821 
1822     const X_node *left_xnode = lpc->word_xnode;
1823     const X_node *right_xnode = rpc->word_xnode;
1824 
1825     // Allocate memory for the connectors, because they should persist
1826     // beyond the lifetime of the sat-solver data structures.
1827     clink.lc = connector_new(NULL, NULL, NULL);
1828     clink.rc = connector_new(NULL, NULL, NULL);
1829 
1830     *clink.lc = lpc->connector;
1831     *clink.rc = rpc->connector;
1832 
1833     Exp* lcexp = PositionConnector2exp(lpc);
1834     Exp* rcexp = PositionConnector2exp(rpc);
1835     add_anded_exp(_sent, exp_word[var->left_word], lcexp);
1836     add_anded_exp(_sent, exp_word[var->right_word], rcexp);
1837 
1838     if (verbosity_level(D_SAT)) {
1839       //cout<< "Lexp[" <<left_xnode->word->subword <<"]: " << lg_exp_stringify(var->left_exp);
1840       cout<< "w"<<var->left_word<<" LCexp[" <<left_xnode->word->subword <<"]: ", lg_exp_stringify(lcexp);
1841       //cout<< "Rexp[" <<right_xnode->word->subword <<"]: ", lg_exp_stringify(var->right_exp);
1842       cout<< "w"<<var->right_word<<" RCexp[" <<right_xnode->word->subword <<"]: ", lg_exp_stringify(rcexp);
1843       cout<< "L+L: ", lg_exp_stringify(exp_word[var->left_word]);
1844       cout<< "R+R: ", lg_exp_stringify(exp_word[var->right_word]);
1845     }
1846 
1847     if (xnode_word[var->left_word] && xnode_word[var->left_word] != left_xnode) {
1848       lgdebug(+0, "Warning: Inconsistent X_node for word %d (%s and %s)\n",
1849               var->left_word, xnode_word[var->left_word]->string,
1850               left_xnode->string);
1851     }
1852     if (xnode_word[var->right_word] && xnode_word[var->right_word] != right_xnode) {
1853       lgdebug(+0, "Warning: Inconsistent X_node for word %d (%s and %s)\n",
1854               var->right_word, xnode_word[var->right_word]->string,
1855               right_xnode->string);
1856     }
1857 
1858     xnode_word[var->left_word] = left_xnode;
1859     xnode_word[var->right_word] = right_xnode;
1860   }
1861 
1862   lkg->num_links = current_link;
1863 
1864   // Now build the disjuncts.
1865   // This is needed so that compute_chosen_word works correctly.
1866   // Just in case there is no expression for a disjunct, a null one is used.
1867   for (WordIdx wi = 0; wi < _sent->length; wi++) {
1868     Exp *de = exp_word[wi];
1869 
1870     // Skip optional words
1871     if (xnode_word[wi] == NULL)
1872     {
1873       if (!_sent->word[wi].optional)
1874       {
1875         de = null_exp();
1876         prt_error("Error: Internal error: Non-optional word %zu has no linkage\n", wi);
1877       }
1878       continue;
1879     }
1880 
1881     if (de == NULL) {
1882       de = null_exp();
1883       prt_error("Error: Internal error: No expression for word %zu\n", wi);
1884     }
1885 
1886     double cost_cutoff;
1887 #if LIMIT_TOTAL_LINKAGE_COST // Undefined - incompatible to the classic parser.
1888     cost_cutoff = _opts->disjunct_cost;
1889 #else
1890     cost_cutoff = 1000.0;
1891 #endif // LIMIT_TOTAL_LINKAGE_COST
1892     d = build_disjuncts_for_exp(NULL, de, xnode_word[wi]->string,
1893                                 &xnode_word[wi]->word->gword_set_head,
1894                                 cost_cutoff, _opts);
1895 
1896     if (d == NULL)
1897     {
1898       lgdebug(+D_SEL, "Debug: Word %zu: Disjunct cost > cost_cutoff %.2f\n",
1899               wi, cost_cutoff);
1900 #ifdef DEBUG
1901       if (!test_enabled("SAT-cost"))
1902 #endif
1903       return false;
1904     }
1905 
1906     /* Recover cost of costly-nulls. */
1907     const vector<EmptyConnector>& ec = _word_tags[wi].get_empty_connectors();
1908     for (vector<EmptyConnector>::const_iterator j = ec.begin(); j < ec.end(); j++)
1909     {
1910       lgdebug(+D_SEL, "Debug: Word %zu: Costly-null var=%d, found=%d cost=%.2f\n",
1911               wi, j->ec_var, _solver->model[j->ec_var] == l_True, j->ec_cost);
1912       if (_solver->model[j->ec_var] == l_True)
1913         d->cost += j->ec_cost;
1914     }
1915 
1916     lkg->chosen_disjuncts[wi] = d;
1917 
1918 #if LIMIT_TOTAL_LINKAGE_COST
1919     if (d->cost > cost_cutoff)
1920     {
1921       lgdebug(+D_SEL, "Word %zu: Disjunct cost  %.2f > cost_cutoff %.2f\n",
1922               wi, d->cost, cost_cutoff);
1923 #ifdef DEBUG
1924       if (!test_enabled("SAT-cost"))
1925 #endif
1926       return false;
1927     }
1928 #endif // LIMIT_TOTAL_LINKAGE_COST
1929   }
1930 
1931   DEBUG_print("Total links: ." <<  lkg->num_links << "." << endl);
1932   return true;
1933 }
1934 #undef D_SEL
1935 
1936 /**
1937  * Main entry point into the SAT parser.
1938  * A note about panic mode:
1939  * - The MiniSAT support for timeout is not yet used (FIXME).
1940  * - Parsing with null links is not supported (FIXME).
1941  * So nothing particularly useful happens in a panic mode, and it is
1942  * left for the user to disable it.
1943  */
sat_parse(Sentence sent,Parse_Options opts)1944 extern "C" int sat_parse(Sentence sent, Parse_Options  opts)
1945 {
1946   if (opts->min_null_count > 0) {
1947     // The sat solver doesn't support (yet) parsing with nulls.
1948     // For now, just avoid the delay of a useless re-parsing.
1949     if (opts->verbosity >= 1)
1950       prt_error("Info: use-sat: Cannot parse with null links (yet).\n"
1951                 "              Set the \"null\" option to 0 to turn off "
1952                 "parsing with null links.\n");
1953     sent->num_valid_linkages = 0;
1954     sent->num_linkages_post_processed = 0;
1955     return 0;
1956   }
1957 
1958   SATEncoder* encoder = (SATEncoder*) sent->hook;
1959   if (encoder) {
1960     sat_free_linkages(sent, encoder->_next_linkage_index);
1961     delete encoder;
1962   }
1963 
1964   encoder = new SATEncoderConjunctionFreeSentences(sent, opts);
1965   sent->hook = encoder;
1966   encoder->encode();
1967 
1968   LinkageIdx linkage_limit = opts->linkage_limit;
1969   LinkageIdx k;
1970   Linkage lkg = NULL;
1971 
1972   /* Due to the nature of SAT solving, we cannot know in advance the
1973    * number of linkages. But in order to process batch files, we must
1974    * know if there is at least one valid linkage. We check up to
1975    * linkage_limit number of linkages (settable by !limit).
1976    * Normally the following loop terminates after one or a few
1977    * iterations, when a valid linkage is found or when no more linkages
1978    * are found.
1979    *
1980    * Note that trying to find here the first valid linkage doesn't add
1981    * overhead to an interactive user. It also doesn't add overhead to
1982    * batch processing, which needs anyway to find out if there is a
1983    * valid linkage in order to be any useful. */
1984   for (k = 0; k < linkage_limit; k++)
1985   {
1986     lkg = encoder->get_next_linkage();
1987     if (lkg == NULL || lkg->lifo.N_violations == 0) break;
1988   }
1989   encoder->print_stats();
1990 
1991   if (lkg == NULL || k == linkage_limit) {
1992     // We don't have a valid linkages among the first linkage_limit ones
1993     sent->num_valid_linkages = 0;
1994     sent->num_linkages_found = k;
1995     sent->num_linkages_post_processed = k;
1996     if (opts->max_null_count > 0) {
1997       // The sat solver doesn't support (yet) parsing with nulls.
1998       if (opts->verbosity >= 1)
1999       prt_error("Info: use-sat: Cannot parse with null links (yet).\n"
2000                 "              Set the \"null\" option to 0 to turn off "
2001                 "parsing with null links.\n");
2002     }
2003   } else {
2004     /* We found a valid linkage. However, we actually don't know yet the
2005      * number of linkages, and if we set them too low, the command-line
2006      * client will fail to display all of the possible linkages.  Work
2007      * around this by lying... return the maximum number of linkages we
2008      * are going to produce.
2009      * A NULL linkage will be returned by linkage_create() after the last
2010      * linkage is produced to signify that there are no more linkages. */
2011     sent->num_valid_linkages = linkage_limit;
2012     sent->num_linkages_post_processed = linkage_limit;
2013   }
2014 
2015   return 0;
2016 }
2017 
2018 /**
2019  * Get a SAT linkage.
2020  * Validate that k is not too big.
2021  * If the k'th linkage already exists, return it.
2022  * Else k is the next linkage index - get the next SAT solution.
2023  */
sat_create_linkage(LinkageIdx k,Sentence sent,Parse_Options opts)2024 extern "C" Linkage sat_create_linkage(LinkageIdx k, Sentence sent, Parse_Options  opts)
2025 {
2026   SATEncoder* encoder = (SATEncoder*) sent->hook;
2027   if (!encoder) return NULL;
2028 
2029   if (k >= opts->linkage_limit)                  // > allocated memory
2030   {
2031     encoder->print_stats();
2032     return NULL;
2033   }
2034 
2035   if(k > encoder->_next_linkage_index)           // skips unproduced linkages
2036   {
2037     prt_error("Error: Linkage index %zu is greater than the "
2038               "maximum expected one %zu\n", k, encoder->_next_linkage_index);
2039     return NULL;
2040   }
2041   if (k < encoder->_next_linkage_index)          // already produced
2042     return &encoder->_sent->lnkages[k];
2043 
2044   return encoder->get_next_linkage();            // exactly next to produce
2045 }
2046 
sat_sentence_delete(Sentence sent)2047 extern "C" void sat_sentence_delete(Sentence sent)
2048 {
2049   SATEncoder* encoder = (SATEncoder*) sent->hook;
2050   if (!encoder) return;
2051 
2052   encoder->print_stats();
2053 
2054   sat_free_linkages(sent, encoder->_next_linkage_index);
2055   delete encoder;
2056 }
2057