1 /**
2 * Encode link-grammar for solving with the SAT solver...
3 */
4 #include <cstdlib>
5 #include <cstdio>
6 #include <iostream>
7 #include <vector>
8 #include <algorithm>
9 #include <iterator>
10 using std::cout;
11 using std::cerr;
12 using std::endl;
13 using std::vector;
14
15 extern "C" {
16 #include "sat-encoder.h"
17 }
18 #include "minisat/core/Solver.h"
19 #undef assert
20
21 #include "sat-encoder.hpp"
22 #include "variables.hpp"
23 #include "word-tag.hpp"
24 #include "matrix-ut.hpp"
25 #include "clock.hpp"
26 #include "fast-sprintf.hpp"
27
28 extern "C" {
29 #include "disjunct-utils.h"
30 #include "linkage/analyze-linkage.h" // for compute_link_names()
31 #include "linkage/linkage.h"
32 #include "linkage/sane.h" // for sane_linkage_morphism()
33 #include "linkage/score.h" // for linkage_score()
34 #include "parse/prune.h" // for optional_gap_collapse()
35 #include "prepare/build-disjuncts.h" // for build_disjuncts_for_exp()
36 #include "post-process/post-process.h"
37 #include "post-process/pp-structures.h"
38 #include "tokenize/word-structures.h" // for Word_struct
39 #include "tokenize/tok-structures.h" // got Gword internals
40 }
41
42 // Macro DEBUG_print is used to dump to stdout information while debugging
43 #ifdef SAT_DEBUG
44 #define DEBUG_print(x) (cout << x << endl)
45 #else
46 #define DEBUG_print(x)
47 #endif
48
49 #define D_SAT 5
50
51 // Convert a NULL C string pointer, for printing a possibly NULL string
52 #define N(s) ((s) ? (s) : "(null)")
53
54 /*-------------------------------------------------------------------------*
55 * C N F C O N V E R S I O N *
56 *-------------------------------------------------------------------------*/
generate_literal(Lit l)57 void SATEncoder::generate_literal(Lit l) {
58 vec<Lit> clause(1);
59 clause[0] = l;
60 add_clause(clause);
61 }
62
generate_and_definition(Lit lhs,vec<Lit> & rhs)63 void SATEncoder::generate_and_definition(Lit lhs, vec<Lit>& rhs) {
64 vec<Lit> clause(2);
65 for (int i = 0; i < rhs.size(); i++) {
66 clause[0] = ~lhs;
67 clause[1] = rhs[i];
68 add_clause(clause);
69 }
70
71 for (int i = 0; i < rhs.size(); i++) {
72 clause[0] = ~rhs[i];
73 clause[1] = lhs;
74 add_clause(clause);
75 }
76 }
generate_classical_and_definition(Lit lhs,vec<Lit> & rhs)77 void SATEncoder::generate_classical_and_definition(Lit lhs, vec<Lit>& rhs) {
78 {
79 vec<Lit> clause(2);
80 for (int i = 0; i < rhs.size(); i++) {
81 clause[0] = ~lhs;
82 clause[1] = rhs[i];
83 add_clause(clause);
84 }
85 }
86
87 {
88 vec<Lit> clause(rhs.size() + 1);
89 for (int i = 0; i < rhs.size(); i++) {
90 clause[i] = ~rhs[i];
91 }
92 clause[rhs.size()] = lhs;
93 add_clause(clause);
94 }
95 }
96
generate_or_definition(Lit lhs,vec<Lit> & rhs)97 void SATEncoder::generate_or_definition(Lit lhs, vec<Lit>& rhs) {
98 {
99 vec<Lit> clause(2);
100 for (int i = 0; i < rhs.size(); i++) {
101 clause[0] = lhs;
102 clause[1] = ~rhs[i];
103 add_clause(clause);
104 }
105 }
106
107 {
108 vec<Lit> clause(rhs.size() + 1);
109 for (int i = 0; i < rhs.size(); i++) {
110 clause[i] = rhs[i];
111 }
112 clause[rhs.size()] = ~lhs;
113 add_clause(clause);
114 }
115 }
116
117 #if 0
118 void SATEncoder::generate_conditional_lr_implication_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs) {
119 {
120 vec<Lit> clause(2);
121 for (int i = 0; i < rhs.size(); i++) {
122 clause[0] = lhs;
123 clause[1] = ~rhs[i];
124 add_clause(clause);
125 }
126 }
127
128 {
129 vec<Lit> clause(rhs.size() + 2);
130 for (int i = 0; i < rhs.size(); i++) {
131 clause[i] = rhs[i];
132 }
133 clause[rhs.size()] = ~lhs;
134 clause[rhs.size()+1] = ~condition;
135 add_clause(clause);
136 }
137 }
138 #endif
139
140 #if 0
141 void SATEncoder::generate_conditional_lr_implication_or_definition(Lit condition1, Lit condition2, Lit lhs, vec<Lit>& rhs) {
142 {
143 vec<Lit> clause(2);
144 for (int i = 0; i < rhs.size(); i++) {
145 clause[0] = lhs;
146 clause[1] = ~rhs[i];
147 add_clause(clause);
148 }
149 }
150
151 {
152 vec<Lit> clause(rhs.size() + 3);
153 for (int i = 0; i < rhs.size(); i++) {
154 clause[i] = rhs[i];
155 }
156 clause[rhs.size()] = ~lhs;
157 clause[rhs.size()+1] = ~condition1;
158 clause[rhs.size()+2] = ~condition2;
159 add_clause(clause);
160 }
161 }
162 #endif
163
generate_xor_conditions(vec<Lit> & vect)164 void SATEncoder::generate_xor_conditions(vec<Lit>& vect) {
165 vec<Lit> clause(2);
166 for (int i = 0; i < vect.size(); i++) {
167 for (int j = i + 1; j < vect.size(); j++) {
168 if (vect[i] == vect[j])
169 continue;
170 clause[0] = ~vect[i];
171 clause[1] = ~vect[j];
172 add_clause(clause);
173 }
174 }
175 }
176
generate_and(vec<Lit> & vect)177 void SATEncoder::generate_and(vec<Lit>& vect) {
178 for (int i = 0; i < vect.size(); i++) {
179 generate_literal(vect[i]);
180 }
181 }
182
183
generate_or(vec<Lit> & vect)184 void SATEncoder::generate_or(vec<Lit>& vect) {
185 add_clause(vect);
186 }
187
generate_equivalence_definition(Lit l1,Lit l2)188 void SATEncoder::generate_equivalence_definition(Lit l1, Lit l2) {
189 vec<Lit> clause(2);
190 {
191 clause[0] = ~l1;
192 clause[1] = l2;
193 add_clause(clause);
194 }
195 {
196 clause[0] = l1;
197 clause[1] = ~l2;
198 add_clause(clause);
199 }
200 }
201
202
203 /*-------------------------------------------------------------------------*
204 * E N C O D I N G *
205 *-------------------------------------------------------------------------*/
encode()206 void SATEncoder::encode() {
207 Clock clock;
208 generate_satisfaction_conditions();
209 clock.print_time(verbosity, "Generated satisfaction conditions");
210 generate_linked_definitions();
211 clock.print_time(verbosity, "Generated linked definitions");
212 generate_planarity_conditions();
213 clock.print_time(verbosity, "Generated planarity conditions");
214
215 #ifdef _CONNECTIVITY_
216 generate_connectivity();
217 clock.print_time(verbosity, "Generated connectivity");
218 #endif
219
220 generate_encoding_specific_clauses();
221 //clock.print_time(verbosity, "Generated encoding specific clauses");
222
223 pp_prune();
224 clock.print_time(verbosity, "PP pruned");
225 power_prune();
226 clock.print_time(verbosity, "Power pruned");
227
228 _variables->setVariableParameters(_solver);
229 }
230
231
232
233 /*-------------------------------------------------------------------------*
234 * W O R D - T A G S *
235 *-------------------------------------------------------------------------*/
build_word_tags()236 void SATEncoder::build_word_tags()
237 {
238 char name[MAX_VARIABLE_NAME];
239 name[0] = 'w';
240
241 for (size_t w = 0; w < _sent->length; w++) {
242 fast_sprintf(name+1, (int)w);
243 // The SAT encoding word variables are set to be equal to the word numbers.
244 Var var = _variables->string(name);
245 assert((Var)w == var);
246 }
247
248 for (size_t w = 0; w < _sent->length; w++) {
249 _word_tags.push_back(WordTag(w, _variables, _sent, _opts));
250 int dfs_position = 0;
251
252 if (_sent->word[w].x == NULL) continue;
253
254 bool join = _sent->word[w].x->next != NULL;
255 Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
256
257 #ifdef SAT_DEBUG
258 cout << "Word ." << w << ".: " << N(_sent->word[w].unsplit_word) << endl;
259 cout << lg_exp_stringify(exp);
260 //prt_exp_mem(exp, 0);
261 cout << endl;
262 #endif
263
264 fast_sprintf(name+1, (int)w);
265 bool leading_right = true;
266 bool leading_left = true;
267 std::vector<int> eps_right, eps_left;
268
269 _word_tags[w].insert_connectors(exp, dfs_position, leading_right,
270 leading_left, eps_right, eps_left, name, true, 0, NULL, _sent->word[w].x);
271 }
272
273 for (size_t wl = 0; wl < _sent->length - 1; wl++) {
274 for (size_t wr = wl + 1; wr < _sent->length; wr++) {
275 _word_tags[wl].add_matches_with_word(_word_tags[wr]);
276 }
277 }
278 }
279
280 #if 0
281 void SATEncoder::find_all_matches_between_words(size_t w1, size_t w2,
282 std::vector<std::pair<const PositionConnector*, const PositionConnector*> >& matches) {
283 const std::vector<PositionConnector>& w1_right = _word_tags[w1].get_right_connectors();
284 std::vector<PositionConnector>::const_iterator i;
285 for (i = w1_right.begin(); i != w1_right.end(); i++) {
286 const std::vector<PositionConnector*>& w2_left_c = (*i).matches;
287 std::vector<PositionConnector*>::const_iterator j;
288 for (j = w2_left_c.begin(); j != w2_left_c.end(); j++) {
289 if ((*j)->word == w2) {
290 matches.push_back(std::pair<const PositionConnector*, const PositionConnector*>(&(*i), *j));
291 }
292 }
293 }
294 }
295
296 bool SATEncoder::matches_in_interval(int wi, int pi, int l, int r) {
297 for (int w = l; w < r; w++) {
298 if (_word_tags[w].match_possible(wi, pi))
299 return true;
300 }
301 return false;
302 }
303 #endif
304
305 /*-------------------------------------------------------------------------*
306 * S A T I S F A C T I O N *
307 *-------------------------------------------------------------------------*/
308
generate_satisfaction_conditions()309 void SATEncoder::generate_satisfaction_conditions()
310 {
311 char name[MAX_VARIABLE_NAME] = "w";
312
313 for (size_t w = 0; w < _sent->length; w++) {
314
315 #ifdef SAT_DEBUG
316 cout << "Word " << w << ": " << N(_sent->word[w].unsplit_word);
317 if (_sent->word[w].optional) cout << " (optional)";
318 cout << endl;
319 cout << lg_exp_stringify(exp);
320 cout << endl;
321 #endif
322
323 fast_sprintf(name+1, w);
324
325 if (_sent->word[w].optional)
326 _variables->string(name);
327 else
328 determine_satisfaction(w, name);
329
330 if (_sent->word[w].x == NULL) {
331 if (!_sent->word[w].optional) {
332 // Most probably everything got pruned. There will be no linkage.
333 lgdebug(+D_SAT, "Word%zu '%s': Null X_node\n", w, _sent->word[w].unsplit_word);
334 handle_null_expression(w);
335 }
336 continue; // No expression to handle.
337 }
338
339 bool join = _sent->word[w].x->next != NULL;
340 Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
341
342 int dfs_position = 0;
343 generate_satisfaction_for_expression(w, dfs_position, exp, name, 0);
344 }
345 }
346
347
generate_satisfaction_for_expression(int w,int & dfs_position,Exp * e,char * var,double parent_cost)348 void SATEncoder::generate_satisfaction_for_expression(int w, int& dfs_position, Exp* e,
349 char* var, double parent_cost)
350 {
351 Exp *opd;
352 double total_cost = parent_cost + e->cost;
353
354 if (e->type == CONNECTOR_type) {
355 dfs_position++;
356
357 generate_satisfaction_for_connector(w, dfs_position, e, var);
358
359 if (total_cost > _cost_cutoff) {
360 Lit lhs = Lit(_variables->string_cost(var, e->cost));
361 generate_literal(~lhs);
362 }
363 } else {
364 if (e->type == AND_type) {
365 if (e->operand_first == NULL) {
366 /* zeroary and */
367 _variables->string_cost(var, e->cost);
368 if (total_cost > _cost_cutoff) {
369 generate_literal(~Lit(_variables->string_cost(var, e->cost)));
370 }
371 } else if (e->operand_first->operand_next == NULL) {
372 /* unary and - skip */
373 generate_satisfaction_for_expression(w, dfs_position, e->operand_first, var, total_cost);
374 } else {
375 /* n-ary and */
376 int i;
377
378 char new_var[MAX_VARIABLE_NAME];
379 char* last_new_var = new_var;
380 char* last_var = var;
381 while((*last_new_var = *last_var)) {
382 last_new_var++;
383 last_var++;
384 }
385
386 vec<Lit> rhs;
387 for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
388 // sprintf(new_var, "%sc%d", var, i)
389 char* s = last_new_var;
390 *s++ = 'c';
391 fast_sprintf(s, i);
392 rhs.push(Lit(_variables->string(new_var)));
393 }
394
395 Lit lhs = Lit(_variables->string_cost(var, e->cost));
396 generate_and_definition(lhs, rhs);
397
398 /* Precedes */
399 int dfs_position_tmp = dfs_position;
400 for (opd = e->operand_first; opd->operand_next != NULL; opd = opd->operand_next) {
401 Exp e_tmp, *e_rhs = &e_tmp;
402
403 if (opd->operand_next->operand_next == NULL) {
404 e_rhs = opd->operand_next;
405 }
406 else {
407 // A workaround for issue #932:
408 // Since the generated ordering conditions are not transitive
409 // (see the example below), chains w/length > 2 miss needed
410 // constrains. So if the chain length > 2, refer to it as if
411 // it is a 2-component chain (1st: l->e; 2nd: AND of the rest).
412 //
413 // Problem example:
414 // Sentence: *I saw make some changes in the program
415 // In word 2 we get here this subexpression (among many others):
416 // e = (K+ & {[[@MV+]]} & (O*n+ or ())) // (& chain length = 3)
417 // The generated constrains are only: (Decoding:
418 // (word-number_connector-position_connector-string)_to-word-number)
419 // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_5_MV)_6
420 // Clause: -link_cw_(2_5_MV)_6 -link_cw_(2_6_O*n)_4
421 // (Reading: If MV connects from word 2 to word 6, then O*n
422 // cannot connect from word 2 to word 4).
423 // Clause: -link_cw_(2_5_MV)_6 -link_cw_(2_6_O*n)_5
424 // However, the following, which are generated when the
425 // chain length is limited to 2, are missing:
426 // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_6_O*n)_4
427 // Clause: -link_cw_(2_4_K)_6 -link_cw_(2_6_O*n)_5
428 // Clearly, they cannot be concluded transitively.
429 e_rhs->type = AND_type;
430 e_rhs->operand_first = opd->operand_next;
431 e_rhs->cost = 0.0;
432 };
433 generate_conjunct_order_constraints(w, opd, e_rhs, dfs_position_tmp);
434 }
435
436 /* Recurse */
437 for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
438 // sprintf(new_var, "%sc%d", var, i)
439 char* s = last_new_var;
440 *s++ = 'c';
441 fast_sprintf(s, i);
442
443 /* if (i != 0) total_cost = 0; */ // This interferes with the cost cutoff
444 generate_satisfaction_for_expression(w, dfs_position, opd, new_var, total_cost);
445 }
446 }
447 } else if (e->type == OR_type) {
448 if (e->operand_first == NULL) {
449 /* zeroary or */
450 cerr << "Zeroary OR" << endl;
451 exit(EXIT_FAILURE);
452 } else if (e->operand_first->operand_next == NULL) {
453 /* unary or */
454 generate_satisfaction_for_expression(w, dfs_position, e->operand_first, var, total_cost);
455 } else {
456 /* n-ary or */
457 int i;
458
459 char new_var[MAX_VARIABLE_NAME];
460 char* last_new_var = new_var;
461 char* last_var = var;
462 while ((*last_new_var = *last_var)) {
463 last_new_var++;
464 last_var++;
465 }
466
467 vec<Lit> rhs;
468 for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
469 // sprintf(new_var, "%sc%d", var, i)
470 char* s = last_new_var;
471 *s++ = 'd';
472 fast_sprintf(s, i);
473 rhs.push(Lit(_variables->string(new_var)));
474 }
475
476 Lit lhs = Lit(_variables->string_cost(var, e->cost));
477 generate_or_definition(lhs, rhs);
478 generate_xor_conditions(rhs);
479
480 /* Recurse */
481 for (i = 0, opd=e->operand_first; opd!=NULL; opd=opd->operand_next, i++) {
482 char* s = last_new_var;
483 *s++ = 'd';
484 fast_sprintf(s, i);
485
486 generate_satisfaction_for_expression(w, dfs_position, opd, new_var, total_cost);
487 }
488 }
489 }
490 }
491 }
492
493 /**
494 * Join all alternatives using an OR_type node.
495 */
join_alternatives(int w)496 Exp* SATEncoder::join_alternatives(int w)
497 {
498 Exp* exp = Exp_create(_sent->Exp_pool);
499 exp->type = OR_type;
500 exp->cost = 0.0;
501
502 Exp** opdp = &exp->operand_first;
503
504 for (X_node* x = _sent->word[w].x; x != NULL; x = x->next)
505 {
506 *opdp = Exp_create_dup(_sent->Exp_pool, x->exp);
507 opdp = &(*opdp)->operand_next;
508 }
509 *opdp = NULL;
510
511 return exp;
512 }
513
generate_link_cw_ordinary_definition(size_t wi,int pi,Exp * e,size_t wj)514 void SATEncoder::generate_link_cw_ordinary_definition(size_t wi, int pi,
515 Exp* e, size_t wj)
516 {
517 const char* Ci = e->condesc->string;
518 char dir = e->dir;
519 double cost = e->cost;
520 Lit lhs = Lit(_variables->link_cw(wj, wi, pi, Ci));
521 vec<Lit> rhs;
522
523 // Collect matches (wi, pi) with word wj
524 std::vector<PositionConnector*>& matches = _word_tags[wi].get(pi)->matches;
525 std::vector<PositionConnector*>::const_iterator i;
526 for (i = matches.begin(); i != matches.end(); i++) {
527 /* TODO: PositionConnector->matches[wj] */
528 if ((*i)->word != wj)
529 continue;
530
531 if (dir == '+') {
532 rhs.push(Lit(_variables->link_cost(wi, pi, Ci, e,
533 (*i)->word, (*i)->position,
534 connector_string(&(*i)->connector),
535 (*i)->exp,
536 cost + (*i)->cost)));
537 } else if (dir == '-'){
538 rhs.push(Lit(_variables->link((*i)->word, (*i)->position,
539 connector_string(&(*i)->connector),
540 (*i)->exp,
541 wi, pi, Ci, e)));
542 }
543 }
544
545 // Generate clauses
546 DEBUG_print("--------- link_cw as ordinary down");
547 generate_or_definition(lhs, rhs);
548 generate_xor_conditions(rhs);
549 DEBUG_print("--------- end link_cw as ordinary down");
550 }
551
552 /*--------------------------------------------------------------------------*
553 * C O N J U N C T O R D E R I N G *
554 *--------------------------------------------------------------------------*/
num_connectors(Exp * e)555 int SATEncoder::num_connectors(Exp* e)
556 {
557 if (e->type == CONNECTOR_type)
558 return 1;
559 else {
560 int num = 0;
561 for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
562 num += num_connectors(opd);
563 }
564 return num;
565 }
566 }
567
empty_connectors(Exp * e,char dir)568 int SATEncoder::empty_connectors(Exp* e, char dir)
569 {
570 if (e->type == CONNECTOR_type) {
571 return e->dir != dir;
572 } else if (e->type == OR_type) {
573 for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
574 if (empty_connectors(opd, dir))
575 return true;
576 }
577 return false;
578 } else if (e->type == AND_type) {
579 for (Exp* opd = e->operand_first; opd != NULL; opd = opd->operand_next) {
580 if (!empty_connectors(opd, dir))
581 return false;
582 }
583 return true;
584 } else
585 throw std::string("Unknown connector type");
586 }
587
588 #if 0
589 int SATEncoder::non_empty_connectors(Exp* e, char dir)
590 {
591 if (e->type == CONNECTOR_type) {
592 return e->dir == dir;
593 } else if (e->type == OR_type) {
594 for (E_list* l = e->operand_first; l != NULL; l = l->next) {
595 if (non_empty_connectors(l->e, dir))
596 return true;
597 }
598 return false;
599 } else if (e->type == AND_type) {
600 for (E_list* l = e->operand_first; l != NULL; l = l->next) {
601 if (non_empty_connectors(l->e, dir))
602 return true;
603 }
604 return false;
605 } else
606 throw std::string("Unknown connector type");
607 }
608 #endif
609
trailing_connectors_and_aux(int w,Exp * opd,char dir,int & dfs_position,std::vector<PositionConnector * > & connectors)610 bool SATEncoder::trailing_connectors_and_aux(int w, Exp *opd, char dir, int& dfs_position,
611 std::vector<PositionConnector*>& connectors)
612 {
613 if (opd == NULL) {
614 return true;
615 } else {
616 int dfs_position_in = dfs_position;
617 dfs_position += num_connectors(opd);
618 if (trailing_connectors_and_aux(w, opd->operand_next, dir, dfs_position, connectors)) {
619 trailing_connectors(w, opd, dir, dfs_position_in, connectors);
620 }
621 return empty_connectors(opd, dir);
622 }
623 }
624
trailing_connectors(int w,Exp * exp,char dir,int & dfs_position,std::vector<PositionConnector * > & connectors)625 void SATEncoder::trailing_connectors(int w, Exp* exp, char dir, int& dfs_position,
626 std::vector<PositionConnector*>& connectors)
627 {
628 if (exp->type == CONNECTOR_type) {
629 dfs_position++;
630 if (exp->dir == dir) {
631 connectors.push_back(_word_tags[w].get(dfs_position));
632 }
633 } else if (exp->type == OR_type) {
634 for (Exp* opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
635 trailing_connectors(w, opd, dir, dfs_position, connectors);
636 }
637 } else if (exp->type == AND_type) {
638 trailing_connectors_and_aux(w, exp->operand_first, dir, dfs_position, connectors);
639 }
640 }
641
642 #if 0
643 void SATEncoder::certainly_non_trailing(int w, Exp* exp, char dir, int& dfs_position,
644 std::vector<PositionConnector*>& connectors, bool has_right) {
645 if (exp->type == CONNECTOR_type) {
646 dfs_position++;
647 if (exp->dir == dir && has_right) {
648 connectors.push_back(_word_tags[w].get(dfs_position));
649 }
650 } else if (exp->type == OR_type) {
651 for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
652 certainly_non_trailing(w, l->e, dir, dfs_position, connectors, has_right);
653 }
654 } else if (exp->type == AND_type) {
655 if (has_right) {
656 for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
657 certainly_non_trailing(w, l->e, dir, dfs_position, connectors, true);
658 }
659 } else {
660 for (E_list* l = exp->operand_first; l != NULL; l = l->next) {
661 has_right = false;
662 for (E_list* m = l->next; m != NULL; m = m->next) {
663 if (non_empty_connectors(m->e, dir) && !empty_connectors(m->e, dir)) {
664 has_right = true;
665 break;
666 }
667 }
668 certainly_non_trailing(w, l->e, dir, dfs_position, connectors, has_right);
669 }
670 }
671 }
672 }
673 #endif
674
leading_connectors(int w,Exp * exp,char dir,int & dfs_position,vector<PositionConnector * > & connectors)675 void SATEncoder::leading_connectors(int w, Exp* exp, char dir, int& dfs_position, vector<PositionConnector*>& connectors) {
676 if (exp->type == CONNECTOR_type) {
677 dfs_position++;
678 if (exp->dir == dir) {
679 connectors.push_back(_word_tags[w].get(dfs_position));
680 }
681 } else if (exp->type == OR_type) {
682 for (Exp* opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
683 leading_connectors(w, opd, dir, dfs_position, connectors);
684 }
685 } else if (exp->type == AND_type) {
686 Exp* opd;
687 for (opd = exp->operand_first; opd != NULL; opd = opd->operand_next) {
688 leading_connectors(w, opd, dir, dfs_position, connectors);
689 if (!empty_connectors(opd, dir))
690 break;
691 }
692
693 if (opd != NULL) {
694 for (opd = opd->operand_next; opd != NULL; opd = opd->operand_next)
695 dfs_position += num_connectors(opd);
696 }
697 }
698 }
699
generate_conjunct_order_constraints(int w,Exp * e1,Exp * e2,int & dfs_position)700 void SATEncoder::generate_conjunct_order_constraints(int w, Exp *e1, Exp* e2, int& dfs_position) {
701 DEBUG_print("----- conjunct order constraints");
702 int dfs_position_e1 = dfs_position;
703 std::vector<PositionConnector*> last_right_in_e1, first_right_in_e2;
704 trailing_connectors(w, e1, '+', dfs_position_e1, last_right_in_e1);
705
706 int dfs_position_e2 = dfs_position_e1;
707 leading_connectors(w, e2, '+', dfs_position_e2, first_right_in_e2);
708
709 vec<Lit> clause(2);
710
711 if (!last_right_in_e1.empty() && !first_right_in_e2.empty()) {
712 std::vector<PositionConnector*>::iterator i, j;
713 for (i = last_right_in_e1.begin(); i != last_right_in_e1.end(); i++) {
714 std::vector<PositionConnector*>& matches_e1 = (*i)->matches;
715
716 for (j = first_right_in_e2.begin(); j != first_right_in_e2.end(); j++) {
717 std::vector<PositionConnector*>& matches_e2 = (*j)->matches;
718 std::vector<PositionConnector*>::const_iterator m1, m2;
719
720 std::vector<int> mw1;
721 for (m1 = matches_e1.begin(); m1 != matches_e1.end(); m1++) {
722 if ((m1+1) == matches_e1.end() || (*m1)->word != (*(m1 + 1))->word)
723 mw1.push_back((*m1)->word);
724 }
725
726 std::vector<int> mw2;
727 for (m2 = matches_e2.begin(); m2 != matches_e2.end(); m2++) {
728 if ((m2+1) == matches_e2.end() || (*m2)->word != (*(m2 + 1))->word)
729 mw2.push_back((*m2)->word);
730 }
731
732 std::vector<int>::const_iterator mw1i, mw2i;
733 for (mw1i = mw1.begin(); mw1i != mw1.end(); mw1i++) {
734 for (mw2i = mw2.begin(); mw2i != mw2.end(); mw2i++) {
735 if (*mw1i >= *mw2i) {
736 clause[0] = ~Lit(_variables->link_cw(*mw1i, w, (*i)->position, connector_string(&(*i)->connector)));
737 clause[1] = ~Lit(_variables->link_cw(*mw2i, w, (*j)->position, connector_string(&(*j)->connector)));
738 add_clause(clause);
739 }
740 }
741 }
742 }
743 }
744 }
745
746 DEBUG_print("----");
747 dfs_position_e1 = dfs_position;
748 std::vector<PositionConnector*> last_left_in_e1, first_left_in_e2;
749 trailing_connectors(w, e1, '-', dfs_position_e1, last_left_in_e1);
750
751 dfs_position_e2 = dfs_position_e1;
752 leading_connectors(w, e2, '-', dfs_position_e2, first_left_in_e2);
753
754 if (!last_left_in_e1.empty() && !first_left_in_e2.empty()) {
755 std::vector<PositionConnector*>::iterator i, j;
756 for (i = last_left_in_e1.begin(); i != last_left_in_e1.end(); i++) {
757 std::vector<PositionConnector*>& matches_e1 = (*i)->matches;
758
759 for (j = first_left_in_e2.begin(); j != first_left_in_e2.end(); j++) {
760 std::vector<PositionConnector*>& matches_e2 = (*j)->matches;
761
762 std::vector<PositionConnector*>::const_iterator m1, m2;
763
764
765 std::vector<int> mw1;
766 for (m1 = matches_e1.begin(); m1 != matches_e1.end(); m1++) {
767 if ((m1+1) == matches_e1.end() || (*m1)->word != (*(m1 + 1))->word)
768 mw1.push_back((*m1)->word);
769 }
770
771 std::vector<int> mw2;
772 for (m2 = matches_e2.begin(); m2 != matches_e2.end(); m2++) {
773 if ((m2+1) == matches_e2.end() || (*m2)->word != (*(m2 + 1))->word)
774 mw2.push_back((*m2)->word);
775 }
776
777 std::vector<int>::const_iterator mw1i, mw2i;
778 for (mw1i = mw1.begin(); mw1i != mw1.end(); mw1i++) {
779 for (mw2i = mw2.begin(); mw2i != mw2.end(); mw2i++) {
780 if (*mw1i <= *mw2i) {
781 clause[0] = ~Lit(_variables->link_cw(*mw1i, w, (*i)->position, connector_string(&(*i)->connector)));
782 clause[1] = ~Lit(_variables->link_cw(*mw2i, w, (*j)->position, connector_string(&(*j)->connector)));
783 add_clause(clause);
784 }
785 }
786 }
787 }
788 }
789 }
790
791 dfs_position = dfs_position_e1;
792
793 DEBUG_print("---end conjunct order constraints");
794 }
795
796
797
798 /*--------------------------------------------------------------------------*
799 * C O N N E C T I V I T Y *
800 *--------------------------------------------------------------------------*/
801
802 #ifdef _CONNECTIVITY_
generate_connectivity()803 void SATEncoder::generate_connectivity() {
804 for (int w = 1; w < _sent->length; w++) {
805 if (!_linkage_possible(0, w)) {
806 vec<Lit> clause;
807 clause.push(~Lit(_variables->con(w, 1)));
808 generate_and(clause);
809 }
810 else {
811 generate_equivalence_definition(Lit(_variables->con(w, 1)),
812 Lit(_variables->linked(0, w)));
813 }
814 }
815
816 for (int d = 2; d < _sent->length; d++) {
817 for (int w = 1; w < _sent->length; w++) {
818 Lit lhs = Lit(_variables->con(w, d));
819
820 vec<Lit> rhs;
821 rhs.push(Lit(_variables->con(w, d-1)));
822 for (int w1 = 1; w1 < _sent->length; w1++) {
823 if (w == w1)
824 continue;
825
826 if (!_linkage_possible(std::min(w, w1), std::max(w, w1))) {
827 continue;
828 }
829 rhs.push(Lit(_variables->l_con(w, w1, d-1)));
830 }
831 generate_or_definition(lhs, rhs);
832
833
834 /* lc definitions*/
835 for (int w1 = 1; w1 < _sent->length; w1++) {
836 if (w == w1)
837 continue;
838
839 int mi = std::min(w, w1), ma = std::max(w, w1);
840 if (!_linked_possible(mi, ma))
841 continue;
842
843 Lit lhs = Lit(_variables->l_con(w, w1, d-1));
844 vec<Lit> rhs(2);
845 rhs[0] = Lit(_variables->linked(mi, ma));
846 rhs[1] = Lit(_variables->con(w1, d-1));
847 generate_classical_and_definition(lhs, rhs);
848 }
849 }
850 }
851
852
853 for (int w = 1; w < _sent->length; w++) {
854 generate_literal(Lit(_variables->con(w, _sent->length-1)));
855 }
856 }
857 #endif
858
dfs(int node,const MatrixUpperTriangle<int> & graph,int component,std::vector<int> & components)859 void SATEncoder::dfs(int node, const MatrixUpperTriangle<int>& graph, int component, std::vector<int>& components) {
860 if (components[node] != -1)
861 return;
862
863 components[node] = component;
864 for (size_t other_node = node + 1; other_node < components.size(); other_node++) {
865 if (graph(node, other_node))
866 dfs(other_node, graph, component, components);
867 }
868 for (int other_node = 0; other_node < node; other_node++) {
869 if (graph(other_node, node))
870 dfs(other_node, graph, component, components);
871 }
872 }
873
874 /* Temporary debug (may be needed again for adding parsing with null-links).
875 * This allows to do "CXXFLAGS=-DCONNECTIVITY_DEBUG configure" or
876 * "make CXXFLAGS=-DCONNECTIVITY_DEBUG" . */
877 //#define CONNECTIVITY_DEBUG
878 #ifdef CONNECTIVITY_DEBUG
879 #undef CONNECTIVITY_DEBUG
880 #define CONNECTIVITY_DEBUG(x) x
debug_generate_disconnectivity_prohibiting(vector<int> components,std::vector<int> different_components)881 static void debug_generate_disconnectivity_prohibiting(vector<int> components,
882 std::vector<int> different_components) {
883
884 printf("generate_disconnectivity_prohibiting: ");
885 for (auto c: components) cout << c << " ";
886 cout << endl;
887 for (auto c: different_components) cout << c << " ";
888 cout << endl;
889 }
890 #else
891 #define debug_generate_disconnectivity_prohibiting(x, y)
892 #define CONNECTIVITY_DEBUG(x)
893 #endif
894
895 /*
896 * The idea here is to save issuing a priori a lot of clauses to enforce
897 * linkage connectivity (i.e. no islands), and instead do it when
898 * a solution finds a disconnected linkage. This works well for valid
899 * sentences because their solutions tend to be connected or mostly connected.
900 */
901
902 /**
903 * Find the connectivity vector of the linkage.
904 * @param components (result) A connectivity vector whose components
905 * correspond to the linkage words.
906 * @return true iff the linkage is completely connected (ignoring
907 * optional words).
908 *
909 * Each segment in the linkage is numbered by the ordinal number of its
910 * start word. Each vector component is numbered according to the segment
911 * in which the corresponding word resides.
912 * For example, a linkage with 7 words, which consists of 3 segments
913 * (islands) of 2,2,3 words, will be represented as: 0 0 1 1 2 2 2.
914 *
915 * In order to support optional words (words that are allowed to have
916 * no connectivity), optional words which don't participate in the linkage
917 * are marked with -1 instead of their segment number, and are disregarded
918 * in the connectivity test.
919 */
connectivity_components(std::vector<int> & components)920 bool SATEncoder::connectivity_components(std::vector<int>& components) {
921 // get satisfied linked(wi, wj) variables
922 const std::vector<int>& linked_variables = _variables->linked_variables();
923 std::vector<int> satisfied_linked_variables;
924 for (std::vector<int>::const_iterator i = linked_variables.begin(); i != linked_variables.end(); i++) {
925 int var = *i;
926 if (_solver->model[var] == l_True) {
927 satisfied_linked_variables.push_back(var);
928 }
929 }
930
931 // Words that are not in the linkage don't need to be connected.
932 // (For now these can be only be words marked as "optional").
933 std::vector<bool> is_linked_word(_sent->length, false);
934 for (size_t node = 0; node < _sent->length; node++) {
935 is_linked_word[node] = _solver->model[node] == l_True;
936 }
937
938 // build the connectivity graph
939 MatrixUpperTriangle<int> graph(_sent->length, 0);
940 std::vector<int>::const_iterator i;
941 CONNECTIVITY_DEBUG(printf("connectivity_components words:\n"));
942 for (i = satisfied_linked_variables.begin(); i != satisfied_linked_variables.end(); i++) {
943 const int lv_l = _variables->linked_variable(*i)->left_word;
944 const int lv_r = _variables->linked_variable(*i)->right_word;
945
946 if (!is_linked_word[lv_l] || !is_linked_word[lv_r]) continue;
947 graph.set(lv_l, lv_r, 1);
948 CONNECTIVITY_DEBUG(printf("L%d R%d\n", lv_l, lv_r));
949 }
950
951 // determine the connectivity components
952 components.resize(_sent->length);
953 std::fill(components.begin(), components.end(), -1);
954 for (size_t node = 0; node < _sent->length; node++)
955 dfs(node, graph, node, components);
956
957 CONNECTIVITY_DEBUG(printf("connectivity_components: "));
958 bool connected = true;
959 for (size_t node = 0; node < _sent->length; node++) {
960 CONNECTIVITY_DEBUG(
961 if (is_linked_word[node]) printf("%d ", components[node]);
962 else printf("[%d] ", components[node]);
963 )
964 if (is_linked_word[node]) {
965 if (components[node] != 0) {
966 connected = false;
967 }
968 } else {
969 components[node] = -1;
970 }
971 }
972
973 CONNECTIVITY_DEBUG(printf(" connected=%d\n", connected));
974 return connected;
975 }
976
977 #ifdef SAT_DEBUG
978 #define MVALUE(s, v) (s->model[v] == l_True?'T':(s->model[v] == l_False?'f':'u'))
pmodel(Solver * solver,int var)979 GNUC_UNUSED static void pmodel(Solver *solver, int var) {
980 printf("%c\n", MVALUE(solver, var));
981 }
982
pmodel(Solver * solver,vec<Lit> & clause)983 GNUC_UNUSED static void pmodel(Solver *solver, vec<Lit> &clause) {
984 vector<int> t;
985 for (int i = 0; i < clause.size(); i++) {
986 int v = var(clause[i]);
987 printf("%d:%c ", v, MVALUE(solver, v));
988 if (solver->model[v] == l_True) t.push_back(v);
989 }
990 printf("\n");
991 if (t.size()) {
992 printf("l_True:");
993 for (auto i: t) printf(" %d", i);
994 printf("\n");
995 }
996 }
997 #endif
998
999 /**
1000 * Generate clauses to enforce exactly the missing connectivity.
1001 * @param components A connectivity vector as computed by
1002 * connectivity_components().
1003 *
1004 * Iterate the list of possible links between words, and find links
1005 * between words that are found in different segments according to the
1006 * connectivity vector. For each two segments, issue a clause asserting
1007 * that at least one of the corresponding links must exist.
1008 *
1009 * In order to support optional words, optional words which don't have links
1010 * (marked as -1 in the connectivity vector) are ignored. A missing link
1011 * between words when one of them is optional (referred to as a
1012 * "conditional_link" below) is considered missing only if both words exist
1013 * in the linkage.
1014 */
generate_disconnectivity_prohibiting(std::vector<int> components)1015 void SATEncoder::generate_disconnectivity_prohibiting(std::vector<int> components) {
1016 // vector of unique components
1017 std::vector<int> different_components = components;
1018 std::sort(different_components.begin(), different_components.end());
1019 different_components.erase(std::unique(different_components.begin(), different_components.end()),
1020 different_components.end());
1021 debug_generate_disconnectivity_prohibiting(components, different_components);
1022 // Each connected component must contain a branch going out of it
1023 std::vector<int>::const_iterator c;
1024 bool missing_word = false;
1025 if (*different_components.begin() == -1) {
1026 different_components.erase(different_components.begin());
1027 missing_word = true;
1028 }
1029 for (c = different_components.begin(); c != different_components.end(); c++) {
1030 vec<Lit> clause;
1031 const std::vector<int>& linked_variables = _variables->linked_variables();
1032 for (std::vector<int>::const_iterator i = linked_variables.begin(); i != linked_variables.end(); i++) {
1033 int var = *i;
1034 const Variables::LinkedVar* lv = _variables->linked_variable(var);
1035 if ((components[lv->left_word] == *c && components[lv->right_word] != *c) ||
1036 (components[lv->left_word] != *c && components[lv->right_word] == *c)) {
1037
1038 CONNECTIVITY_DEBUG(printf(" %d(%d-%d)", var, lv->left_word, lv->right_word));
1039 bool optlw_exists = _sent->word[lv->left_word].optional &&
1040 _solver->model[lv->left_word] == l_True;
1041 bool optrw_exists = _sent->word[lv->right_word].optional &&
1042 _solver->model[lv->right_word] == l_True;
1043 if (optlw_exists || optrw_exists) {
1044 int conditional_link_var;
1045 bool conditional_link_var_exists;
1046
1047 CONNECTIVITY_DEBUG(printf("R ")); // "replaced"
1048 char name[MAX_VARIABLE_NAME] = "0";
1049 sprintf(name+1, "%dw%dw%d", var, optlw_exists?lv->left_word:255,
1050 optrw_exists?lv->right_word:255);
1051 conditional_link_var_exists = _variables->var_exists(name);
1052 conditional_link_var = _variables->string(name);
1053
1054 if (!conditional_link_var_exists) {
1055 Lit lhs = Lit(conditional_link_var);
1056 vec<Lit> rhs;
1057 rhs.push(Lit(var));
1058 if (optlw_exists) rhs.push(~Lit(lv->left_word));
1059 if (optrw_exists) rhs.push(~Lit(lv->right_word));
1060 generate_or_definition(lhs, rhs);
1061 }
1062
1063 var = conditional_link_var; // Replace it by its conditional link var
1064 }
1065 clause.push(Lit(var)); // Implied link var is used if needed
1066 }
1067 }
1068
1069 if (missing_word) {
1070 // The connectivity may be restored differently if a word reappears
1071 for (WordIdx w = 0; w < _sent->length; w++) {
1072 if (_solver->model[w] == l_False)
1073 clause.push(Lit(w));
1074 }
1075 }
1076 CONNECTIVITY_DEBUG(printf("\n"));
1077 _solver->addClause(clause);
1078
1079 // Avoid issuing two identical clauses
1080 if (different_components.size() == 2)
1081 break;
1082 }
1083 }
1084
1085 /*--------------------------------------------------------------------------*
1086 * P L A N A R I T Y *
1087 *--------------------------------------------------------------------------*/
generate_planarity_conditions()1088 void SATEncoder::generate_planarity_conditions()
1089 {
1090 vec<Lit> clause(2);
1091 for (size_t wi1 = 0; wi1 < _sent->length; wi1++) {
1092 for (size_t wj1 = wi1+1; wj1 < _sent->length; wj1++) {
1093 for (size_t wi2 = wj1+1; wi2 < _sent->length; wi2++) {
1094 if (!_linked_possible(wi1, wi2))
1095 continue;
1096 for (size_t wj2 = wi2+1; wj2 < _sent->length; wj2++) {
1097 if (!_linked_possible(wj1, wj2))
1098 continue;
1099 clause[0] = ~Lit(_variables->linked(wi1, wi2));
1100 clause[1] = ~Lit(_variables->linked(wj1, wj2));
1101 add_clause(clause);
1102 }
1103 }
1104 }
1105 }
1106 }
1107
1108 /*--------------------------------------------------------------------------*
1109 * P O W E R P R U N I N G *
1110 *--------------------------------------------------------------------------*/
1111
generate_epsilon_definitions()1112 void SATEncoder::generate_epsilon_definitions()
1113 {
1114 for (size_t w = 0; w < _sent->length; w++) {
1115 if (_sent->word[w].x == NULL) {
1116 continue;
1117 }
1118
1119 bool join = _sent->word[w].x->next != NULL;
1120 Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
1121
1122 char name[MAX_VARIABLE_NAME];
1123 // sprintf(name, "w%zu", w);
1124 name[0] = 'w';
1125 fast_sprintf(name+1, w);
1126
1127 int dfs_position;
1128
1129 dfs_position = 0;
1130 generate_epsilon_for_expression(w, dfs_position, exp, name, true, '+');
1131
1132 dfs_position = 0;
1133 generate_epsilon_for_expression(w, dfs_position, exp, name, true, '-');
1134 }
1135 }
1136
generate_epsilon_for_expression(int w,int & dfs_position,Exp * e,char * var,bool root,char dir)1137 bool SATEncoder::generate_epsilon_for_expression(int w, int& dfs_position, Exp* e,
1138 char* var, bool root, char dir)
1139 {
1140 if (e->type == CONNECTOR_type) {
1141 dfs_position++;
1142 if (var != NULL) {
1143 if (e->dir == dir) {
1144 // generate_literal(-_variables->epsilon(name, var, e->dir));
1145 return false;
1146 } else {
1147 generate_equivalence_definition(Lit(_variables->epsilon(var, dir)),
1148 Lit(_variables->string(var)));
1149 return true;
1150 }
1151 }
1152 } else if (e->type == AND_type) {
1153 if (e->operand_first == NULL) {
1154 /* zeroary and */
1155 generate_equivalence_definition(Lit(_variables->string(var)),
1156 Lit(_variables->epsilon(var, dir)));
1157 return true;
1158 } else
1159 if (e->operand_first != NULL && e->operand_first->operand_next == NULL) {
1160 /* unary and - skip */
1161 return generate_epsilon_for_expression(w, dfs_position, e->operand_first, var, root, dir);
1162 } else {
1163 /* Recurse */
1164 Exp* opd;
1165 int i;
1166 bool eps = true;
1167
1168 char new_var[MAX_VARIABLE_NAME];
1169 char* last_new_var = new_var;
1170 char* last_var = var;
1171 while ((*last_new_var = *last_var)) {
1172 last_new_var++;
1173 last_var++;
1174 }
1175
1176
1177 for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1178 // sprintf(new_var, "%sc%d", var, i)
1179 char* s = last_new_var;
1180 *s++ = 'c';
1181 fast_sprintf(s, i);
1182
1183 if (!generate_epsilon_for_expression(w, dfs_position, opd, new_var, false, dir)) {
1184 eps = false;
1185 break;
1186 }
1187 }
1188
1189 if (opd != NULL) {
1190 for (opd = opd->operand_next; opd != NULL; opd = opd->operand_next)
1191 dfs_position += num_connectors(opd);
1192 }
1193
1194 if (!root && eps) {
1195 Lit lhs = Lit(_variables->epsilon(var, dir));
1196 vec<Lit> rhs;
1197 for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1198 // sprintf(new_var, "%sc%d", var, i)
1199 char* s = last_new_var;
1200 *s++ = 'c';
1201 fast_sprintf(s, i);
1202 rhs.push(Lit(_variables->epsilon(new_var, dir)));
1203 }
1204 generate_classical_and_definition(lhs, rhs);
1205 }
1206 return eps;
1207 }
1208 } else if (e->type == OR_type) {
1209 if (e->operand_first != NULL && e->operand_first->operand_next == NULL) {
1210 /* unary or - skip */
1211 return generate_epsilon_for_expression(w, dfs_position, e->operand_first, var, root, dir);
1212 } else {
1213 /* Recurse */
1214 Exp* opd;
1215 int i;
1216 bool eps = false;
1217
1218 char new_var[MAX_VARIABLE_NAME];
1219 char* last_new_var = new_var;
1220 char* last_var = var;
1221 while ((*last_new_var = *last_var)) {
1222 last_new_var++;
1223 last_var++;
1224 }
1225
1226 vec<Lit> rhs;
1227 for (i = 0, opd = e->operand_first; opd != NULL; opd = opd->operand_next, i++) {
1228 // sprintf(new_var, "%sc%d", var, i)
1229 char* s = last_new_var;
1230 *s++ = 'd';
1231 fast_sprintf(s, i);
1232
1233 if (generate_epsilon_for_expression(w, dfs_position, opd, new_var, false, dir) && !root) {
1234 rhs.push(Lit(_variables->epsilon(new_var, dir)));
1235 eps = true;
1236 }
1237 }
1238
1239 if (!root && eps) {
1240 Lit lhs = Lit(_variables->epsilon(var, dir));
1241 generate_or_definition(lhs, rhs);
1242 }
1243
1244 return eps;
1245 }
1246 }
1247 return false;
1248 }
1249
power_prune()1250 void SATEncoder::power_prune()
1251 {
1252 generate_epsilon_definitions();
1253
1254 // on two non-adjacent words, a pair of connectors can be used only
1255 // if not [both of them are the deepest].
1256
1257 for (size_t wl = 0; wl < _sent->length - 2; wl++) {
1258 const std::vector<PositionConnector>& rc = _word_tags[wl].get_right_connectors();
1259 std::vector<PositionConnector>::const_iterator rci;
1260 for (rci = rc.begin(); rci != rc.end(); rci++) {
1261 if (!(*rci).leading_right || (*rci).connector.multi)
1262 continue;
1263
1264 const std::vector<PositionConnector*>& matches = rci->matches;
1265 for (std::vector<PositionConnector*>::const_iterator lci = matches.begin(); lci != matches.end(); lci++) {
1266 if (!(*lci)->leading_left || (*lci)->connector.multi || (*lci)->word <= wl + 2)
1267 continue;
1268 if (_opts->min_null_count == 0)
1269 if (optional_gap_collapse(_sent, wl, (*lci)->word)) continue;
1270
1271 // printf("LR: .%zu. .%d. %s\n", wl, rci->position, rci->connector.desc->string);
1272 // printf("LL: .%zu. .%d. %s\n", (*lci)->word, (*lci)->position, (*lci)->connector.desc->string);
1273
1274 vec<Lit> clause;
1275 for (std::vector<int>::const_iterator i = rci->eps_right.begin(); i != rci->eps_right.end(); i++) {
1276 clause.push(~Lit(*i));
1277 }
1278
1279 for (std::vector<int>::const_iterator i = (*lci)->eps_left.begin(); i != (*lci)->eps_left.end(); i++) {
1280 clause.push(~Lit(*i));
1281 }
1282
1283 add_additional_power_pruning_conditions(clause, wl, (*lci)->word);
1284
1285 clause.push(~Lit(_variables->link(
1286 wl, rci->position, connector_string(&rci->connector), rci->exp,
1287 (*lci)->word, (*lci)->position, connector_string(&(*lci)->connector), (*lci)->exp)));
1288 add_clause(clause);
1289 }
1290 }
1291 }
1292
1293 #if 0
1294 // on two adjacent words, a pair of connectors can be used only if
1295 // they're the deepest ones on their disjuncts
1296 for (size_t wl = 0; wl < _sent->length - 2; wl++) {
1297 const std::vector<PositionConnector>& rc = _word_tags[wl].get_right_connectors();
1298 std::vector<PositionConnector>::const_iterator rci;
1299 for (rci = rc.begin(); rci != rc.end(); rci++) {
1300 if (!(*rci).leading_right)
1301 continue;
1302
1303 const std::vector<PositionConnector*>& matches = rci->matches;
1304 for (std::vector<PositionConnector*>::const_iterator lci = matches.begin(); lci != matches.end(); lci++) {
1305 if (!(*lci)->leading_left || (*lci)->word != wl + 1)
1306 continue;
1307 int link = _variables->link(wl, rci->position, rci->connector.desc->string, rci->exp,
1308 (*lci)->word, (*lci)->position, (*lci)->connector.desc->string, (*lci)->exp);
1309 vec<Lit> clause(2);
1310 clause.push(~Lit(link));
1311
1312 for (std::vector<int>::const_iterator i = rci->eps_right.begin(); i != rci->eps_right.end(); i++) {
1313 clause.push(Lit(*i));
1314 }
1315
1316 for (std::vector<int>::const_iterator i = (*lci)->eps_left.begin(); i != (*lci)->eps_left.end(); i++) {
1317 clause.push(Lit(*i));
1318 }
1319
1320 add_clause(clause);
1321 }
1322 }
1323 }
1324 #endif
1325
1326
1327 #if 0
1328 // Two deep connectors cannot match (deep means notlast)
1329 std::vector<std::vector<PositionConnector*> > certainly_deep_left(_sent->length), certainly_deep_right(_sent->length);
1330 for (size_t w = 0; w < _sent->length - 2; w++) {
1331 if (_sent->word[w].x == NULL)
1332 continue;
1333
1334 bool join = _sent->word[w].x->next != NULL;
1335 Exp* exp = join ? join_alternatives(w) : _sent->word[w].x->exp;
1336
1337 int dfs_position = 0;
1338 certainly_non_trailing(w, exp, '+', dfs_position, certainly_deep_right[w], false);
1339 dfs_position = 0;
1340 certainly_non_trailing(w, exp, '-', dfs_position, certainly_deep_left[w], false);
1341 }
1342
1343 for (size_t w = 0; w < _sent->length; w++) {
1344 std::vector<PositionConnector*>::const_iterator i;
1345 for (i = certainly_deep_right[w].begin(); i != certainly_deep_right[w].end(); i++) {
1346 const std::vector<PositionConnector*>& matches = (*i)->matches;
1347 std::vector<PositionConnector*>::const_iterator j;
1348 for (j = matches.begin(); j != matches.end(); j++) {
1349 if (std::find(certainly_deep_left[(*j)->word].begin(), certainly_deep_left[(*j)->word].end(),
1350 *j) != certainly_deep_left[(*j)->word].end()) {
1351 generate_literal(~Lit(_variables->link((*i)->word, (*i)->position, (*i)->connector.desc->string, (*i)->exp,
1352 (*j)->word, (*j)->position, (*j)->connector.desc->string, (*j)->exp)));
1353 }
1354 }
1355 }
1356 }
1357 #endif
1358 }
1359
1360 /*--------------------------------------------------------------------------*
1361 * P O S T P R O C E S S I N G & P P P R U N I N G *
1362 *--------------------------------------------------------------------------*/
1363
1364 #if 0 // Needed only for the additional PP pruning (pp_pruning_1)
1365 /**
1366 * Returns false if the string s does not match anything in
1367 * the array. The array elements are post-processing symbols.
1368 */
1369 static bool string_in_list(const char * s, const char * a[])
1370 {
1371 for (size_t i = 0; a[i] != NULL; i++)
1372 if (post_process_match(a[i], s)) return true;
1373 return false;
1374 }
1375 #endif
1376
1377 /* For debugging. */
1378 /*
1379 GNUC_UNUSED static void print_link_array(const char **s)
1380 {
1381 printf("|");
1382 do
1383 {
1384 printf("%s ", *s);
1385 } while (*++s);
1386 printf("|\n");
1387 }
1388 */
1389
pp_prune()1390 void SATEncoder::pp_prune()
1391 {
1392 const std::vector<int>& link_variables = _variables->link_variables();
1393
1394 if (_sent->dict->base_knowledge == NULL)
1395 return;
1396
1397 if (!_opts->perform_pp_prune)
1398 return;
1399
1400 pp_knowledge * knowledge;
1401 knowledge = _sent->dict->base_knowledge;
1402
1403 for (size_t i=0; i<knowledge->n_contains_one_rules; i++)
1404 {
1405 pp_rule rule = knowledge->contains_one_rules[i];
1406 // const char * selector = rule.selector; /* selector string for this rule */
1407 pp_linkset * link_set = rule.link_set; /* the set of criterion links */
1408
1409
1410 vec<Lit> triggers;
1411 for (size_t vi = 0; vi < link_variables.size(); vi++) {
1412 const Variables::LinkVar* var = _variables->link_variable(link_variables[vi]);
1413 if (post_process_match(rule.selector, var->label)) {
1414 triggers.push(Lit(link_variables[vi]));
1415 }
1416 }
1417
1418 if (triggers.size() == 0)
1419 continue;
1420
1421 vec<Lit> criterions;
1422 for (size_t j = 0; j < link_variables.size(); j++) {
1423 pp_linkset_node *p;
1424 for (size_t hashval = 0; hashval < link_set->hash_table_size; hashval++) {
1425 for (p = link_set->hash_table[hashval]; p!=NULL; p=p->next) {
1426 const Variables::LinkVar* var = _variables->link_variable(link_variables[j]);
1427 if (post_process_match(p->str, var->label)) {
1428 criterions.push(Lit(link_variables[j]));
1429 }
1430 }
1431 }
1432 }
1433
1434 DEBUG_print("---pp_pruning--");
1435 for (int k = 0; k < triggers.size(); k++) {
1436 vec<Lit> clause(criterions.size() + 1);
1437 for (int ci = 0; ci < criterions.size(); ci++)
1438 clause[ci] = criterions[ci];
1439 clause[criterions.size()] = (~triggers[k]);
1440 add_clause(clause);
1441 }
1442 DEBUG_print("---end pp_pruning--");
1443 }
1444
1445 /* The following code has a bug that causes it to be too restrictive.
1446 * Instead of fixing it, it is disabled here, as it is to be replaced
1447 * soon by the pp-pruning code of the classic parser. */
1448 #if 0
1449 if (test_enabled("no-pp_pruning_1")) return; // For result comparison.
1450 /* Additional PP pruning.
1451 * Since the SAT parser defines constrains on links, it is possible
1452 * to encode all the postprocessing rules, in order to prune all the
1453 * solutions that violate the postprocessing rules.
1454 *
1455 * This may save much SAT-solver overhead, because solutions which are
1456 * not going to pass postprocessing will get eliminated, saving a very
1457 * high number of calls to the SAT-solver that are not yielding a valid
1458 * linkage.
1459 *
1460 * However, the encoding code has a non-negligible overhead, and also
1461 * it may add clauses that increase the overhead of the SAT-solver.
1462 *
1463 * FIXME:
1464 * A large part of the encoding overhead is because the need to
1465 * iterate a very large number of items in order to find the small
1466 * number of items that are needed. Hashing in advance can solve this
1467 * and similar problems.
1468 *
1469 * For now only a very limited pruning is tried.
1470 * For the CONTAINS_NONE_RULES, only the nearest link to the root-link
1471 * is checked.
1472 *
1473 * TODO:
1474 * BOUNDED_RULES pruning.
1475 * Domain encoding.
1476 */
1477
1478 /*
1479 * CONTAINS_NONE_RULES
1480 * 1. Suppose no one of them refers to a URFL domain.
1481 * 2. Only the links of words accessible through a right link of
1482 * the root word are checked here. Even such a limited check saves CPU.
1483 *
1484 * Below, var_i and var_j are link variables, when var_i is the root link.
1485 * root_word---var_i---wr---var_j
1486 */
1487
1488 //printf("n_contains_none_rules %zu\n", knowledge->n_contains_none_rules);
1489 //printf("link_variables.size() %zu\n", link_variables.size());
1490 for (size_t i=0; i<knowledge->n_contains_none_rules; i++)
1491 {
1492 pp_rule rule = knowledge->contains_none_rules[i];
1493 //printf("rule[%2zu]selector = %s; ", i, rule.selector);
1494 //print_link_array(rule.link_array);
1495
1496 int root_link;
1497 int wr; // A word at the end of a right-link of the root word.
1498 for (size_t vi = 0; vi < link_variables.size(); vi++)
1499 {
1500 const Variables::LinkVar* var_i = _variables->link_variable(link_variables[vi]);
1501 if (!post_process_match(rule.selector, var_i->label)) continue;
1502
1503 root_link = link_variables[vi];
1504 wr = var_i->right_word;
1505
1506 //printf("Found rule[%zu].selector %s: %s %d\n", i, rule.selector, var_i->label, wr);
1507
1508 vector<int> must_not_exist;
1509 for (size_t vj = 0; vj < link_variables.size(); vj++)
1510 {
1511 const Variables::LinkVar* var_j = _variables->link_variable(link_variables[vj]);
1512 if ((wr != var_j->left_word) && (wr != var_j->right_word)) continue;
1513 if (var_i == var_j) continue; // It points back to the root word.
1514
1515 //printf("var_j->label %s %d %d\n", var_j->label, var_j->right_word, var_j->left_word);
1516 if (string_in_list(var_j->label, rule.link_array))
1517 must_not_exist.push_back(link_variables[vj]);
1518 }
1519
1520 DEBUG_print("---pp_pruning 1--");
1521 for (auto c: must_not_exist)
1522 {
1523 vec<Lit> clause(2);
1524 clause[0] = ~Lit(root_link);
1525 clause[1] = ~Lit(c);
1526 add_clause(clause);
1527 }
1528 DEBUG_print("---end pp_pruning 1--");
1529 }
1530 }
1531 #endif // 0
1532 }
1533
1534 /*--------------------------------------------------------------------------*
1535 * D E C O D I N G *
1536 *--------------------------------------------------------------------------*/
1537
1538 /**
1539 * Create the next linkage.
1540 * Return true iff the linkage can be created.
1541 */
create_linkage(Linkage linkage)1542 bool SATEncoder::create_linkage(Linkage linkage)
1543 {
1544 partial_init_linkage(_sent, linkage, _sent->length);
1545 if (!sat_extract_links(linkage)) return false;
1546 compute_link_names(linkage, _sent->string_set);
1547 return true;
1548 }
1549
generate_linkage_prohibiting()1550 void SATEncoder::generate_linkage_prohibiting()
1551 {
1552 vec<Lit> clause;
1553 const std::vector<int>& link_variables = _variables->link_variables();
1554 for (std::vector<int>::const_iterator i = link_variables.begin(); i != link_variables.end(); i++) {
1555 int var = *i;
1556 if (_solver->model[var] == l_True) {
1557 clause.push(~Lit(var));
1558 } else if (_solver->model[var] == l_False) {
1559 clause.push(Lit(var));
1560 }
1561 }
1562 _solver->addClause(clause);
1563 }
1564
get_next_linkage()1565 Linkage SATEncoder::get_next_linkage()
1566 {
1567 Linkage_s linkage;
1568 bool connected;
1569 bool display_linkage_disconnected = false;
1570
1571
1572 /* Loop until a good linkage is found.
1573 * Insane (mixed alternatives) linkages are always ignored.
1574 * Disconnected linkages are normally ignored, unless
1575 * !test=linkage-disconnected is used (and they are sane) */
1576 bool linkage_ok;
1577 do {
1578 if (!_solver->solve()) return NULL;
1579
1580 std::vector<int> components;
1581 connected = connectivity_components(components);
1582
1583 // Prohibit this solution so the next ones can be found
1584 if (!connected) {
1585 _num_lkg_disconnected++;
1586 generate_disconnectivity_prohibiting(components);
1587 display_linkage_disconnected = test_enabled("linkage-disconnected");
1588 } else {
1589 generate_linkage_prohibiting();
1590 }
1591
1592 linkage_ok = false;
1593 if (connected || display_linkage_disconnected) {
1594 memset(&linkage, 0, sizeof(struct Linkage_s));
1595
1596 linkage_ok = create_linkage(&linkage);
1597 if (linkage_ok)
1598 linkage_ok = sane_linkage_morphism(_sent, &linkage, _opts);
1599 if (!linkage_ok) {
1600 /* We cannot elegantly add this linkage to sent->linkges[] -
1601 * to be freed in sentence_delete(), since insane linkages
1602 * must be there with index > num_linkages_post_processed - so
1603 * they remain hidden, but num_linkages_post_processed is an
1604 * arbitrary number. So we must free it here. */
1605 free_linkage_connectors_and_disjuncts(&linkage);
1606 free_linkage(&linkage);
1607 continue; // skip this linkage
1608 }
1609 remove_empty_words(&linkage); /* Discard optional words. */
1610 }
1611
1612 if (!connected) {
1613 if (display_linkage_disconnected) {
1614 cout << "Linkage DISCONNECTED" << endl;
1615 } else {
1616 lgdebug(+D_SAT, "Linkage DISCONNECTED (skipped)\n");
1617 }
1618 }
1619 } while (!linkage_ok);
1620
1621 /* We cannot expand the linkage array on demand, since the API uses
1622 * linkage pointers, and they would become invalid if realloc() changes
1623 * the address of the memory block. So it is allocated in advance. */
1624 if (NULL == _sent->lnkages)
1625 {
1626 _sent->num_linkages_alloced = _opts->linkage_limit;
1627 size_t nbytes = _sent->num_linkages_alloced * sizeof(struct Linkage_s);
1628 _sent->lnkages = (Linkage) malloc(nbytes);
1629 _next_linkage_index = 0;
1630 }
1631 assert(_next_linkage_index<_sent->num_linkages_alloced, "_sent->lnkages ovl");
1632
1633 Linkage lkg = &_sent->lnkages[_next_linkage_index];
1634 _next_linkage_index++;
1635 *lkg = linkage; /* copy en-mass */
1636
1637 /* The link-parser code checks the next linkage for num_violations
1638 * (to save calls to linkage_create()). Allow for that practice. */
1639 if (_next_linkage_index < _sent->num_linkages_alloced)
1640 lkg[1].lifo.N_violations = 0;
1641
1642 // Perform the rest of the post-processing
1643 if (NULL != _sent->postprocessor) {
1644 do_post_process(_sent->postprocessor, lkg, false);
1645 if (NULL != _sent->postprocessor->violation) {
1646 _num_pp_violations++;
1647 lkg->lifo.N_violations++;
1648 lkg->lifo.pp_violation_msg = _sent->postprocessor->violation;
1649 lgdebug(+D_SAT, "Postprocessing error: %s\n", lkg->lifo.pp_violation_msg);
1650 } else {
1651 // XXX We cannot maintain num_valid_linkages, because it starts from
1652 // a high number. If we start it from 0, then on value 1 link-parser
1653 // would report "Unique linkage".
1654 //_sent->num_valid_linkages++;
1655 }
1656 post_process_free_data(&_sent->postprocessor->pp_data);
1657 }
1658 linkage_score(lkg, _opts);
1659
1660 // if (NULL == _sent->postprocessor->violation && verbosity > 1)
1661 // _solver->printStats();
1662 return lkg;
1663 }
1664
1665 /*******************************************************************************
1666 * SAT encoding for sentences that do not contain conjunction. *
1667 *******************************************************************************/
1668
generate_encoding_specific_clauses()1669 void SATEncoderConjunctionFreeSentences::generate_encoding_specific_clauses() {
1670 }
1671
handle_null_expression(int w)1672 void SATEncoderConjunctionFreeSentences::handle_null_expression(int w) {
1673 // Formula is unsatisfiable
1674 vec<Lit> clause;
1675 add_clause(clause);
1676 }
1677
determine_satisfaction(int w,char * name)1678 void SATEncoderConjunctionFreeSentences::determine_satisfaction(int w, char* name)
1679 {
1680 // All tags must be satisfied
1681 generate_literal(Lit(_variables->string(name)));
1682 }
1683
generate_satisfaction_for_connector(int wi,int pi,Exp * e,char * var)1684 void SATEncoderConjunctionFreeSentences::generate_satisfaction_for_connector(
1685 int wi, int pi, Exp *e, char* var)
1686 {
1687 const char* Ci = e->condesc->string;
1688 char dir = e->dir;
1689 bool multi = e->multi;
1690 double cost = e->cost;
1691
1692 Lit lhs = Lit(_variables->string_cost(var, cost));
1693
1694 #ifdef SAT_DEBUG
1695 cout << "*** Connector: ." << wi << ". ." << pi << ". " << Ci << dir << endl;
1696 #endif
1697
1698 // words that can potentially match (wi, pi)
1699 int low = (dir == '-') ? 0 : wi + 1;
1700 int hi = (dir == '-') ? wi : _sent->length;
1701
1702 std::vector<int> _w_;
1703 for (int wj = low; wj < hi; wj++) {
1704 // Eliminate words that cannot be connected to (wi, pi)
1705 if (!_word_tags[wj].match_possible(wi, pi))
1706 continue;
1707
1708 // Now we know that there is a potential link between the
1709 // connector (wi, pi) and the word wj
1710 _w_.push_back(wj);
1711
1712 generate_link_cw_ordinary_definition(wi, pi, e, wj);
1713 }
1714
1715 vec<Lit> _link_cw_;
1716 for (size_t i = 0; i < _w_.size(); i++)
1717 _link_cw_.push(Lit(_variables->link_cw(_w_[i], wi, pi, Ci)));
1718 generate_or_definition(lhs, _link_cw_);
1719
1720 DEBUG_print("--------- multi");
1721 if (!multi) {
1722 generate_xor_conditions(_link_cw_);
1723 }
1724 DEBUG_print("--------- end multi");
1725
1726 #ifdef SAT_DEBUG
1727 cout << "*** End Connector: ." << wi << ". ." << pi << ". " << Ci << endl;
1728 #endif
1729 }
1730
generate_linked_definitions()1731 void SATEncoderConjunctionFreeSentences::generate_linked_definitions()
1732 {
1733 _linked_possible.resize(_sent->length, 1);
1734 vector<vec<Lit>> linked_to_word(_sent->length);
1735
1736 DEBUG_print("------- linked definitions");
1737 for (size_t w1 = 0; w1 < _sent->length; w1++) {
1738 vec<Lit> linked;
1739 for (size_t w2 = w1 + 1; w2 < _sent->length; w2++) {
1740 DEBUG_print("---------- ." << w1 << ". ." << w2 << ".");
1741 Lit lhs = Lit(_variables->linked(w1, w2));
1742
1743 vec<Lit> rhs;
1744 const std::vector<PositionConnector>& w1_connectors = _word_tags[w1].get_right_connectors();
1745 std::vector<PositionConnector>::const_iterator c;
1746 for (c = w1_connectors.begin(); c != w1_connectors.end(); c++) {
1747 assert(c->word == w1, "Connector word must match");
1748 if (_word_tags[w2].match_possible(c->word, c->position)) {
1749 rhs.push(Lit(_variables->link_cw(w2, c->word, c->position, connector_string(&c->connector))));
1750 }
1751 }
1752
1753 _linked_possible.set(w1, w2, rhs.size() > 0);
1754 generate_or_definition(lhs, rhs);
1755
1756 /* Optional words that have no links should be "down", as a mark that
1757 * they are missing in the linkage.
1758 * Collect all possible word links, per word, to be used below. */
1759 if (rhs.size() > 0) {
1760 if (_sent->word[w1].optional) {
1761 linked_to_word[w1].push(Lit(_variables->linked(w1, w2)));
1762 }
1763 if (_sent->word[w2].optional) {
1764 linked_to_word[w2].push(Lit(_variables->linked(w1, w2)));
1765 }
1766 }
1767 }
1768
1769 if (_sent->word[w1].optional) {
1770 /* The word should be connected to at least another word in order to be
1771 * in the linkage. */
1772 DEBUG_print("------------S not linked -> no word (w" << w1 << ")");
1773 generate_or_definition(Lit(w1), linked_to_word[w1]);
1774 DEBUG_print("------------E not linked -> no word");
1775 }
1776 }
1777 DEBUG_print("------- end linked definitions");
1778 }
1779
PositionConnector2exp(const PositionConnector * pc)1780 Exp* SATEncoderConjunctionFreeSentences::PositionConnector2exp(const PositionConnector* pc)
1781 {
1782 Exp* e = Exp_create(_sent->Exp_pool);
1783 e->type = CONNECTOR_type;
1784 e->dir = pc->dir;
1785 e->multi = pc->connector.multi;
1786 e->condesc = (condesc_t *)pc->connector.desc; // FIXME - const
1787 e->cost = pc->cost;
1788
1789 return e;
1790 }
1791
1792 #define D_SEL 8
sat_extract_links(Linkage lkg)1793 bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg)
1794 {
1795 Disjunct *d;
1796 int current_link = 0;
1797
1798 Exp **exp_word = (Exp **)alloca(_sent->length * sizeof(Exp *));
1799 memset(exp_word, 0, _sent->length * sizeof(Exp *));
1800 const X_node **xnode_word = (const X_node **)alloca(_sent->length * sizeof(X_node *));
1801 memset(xnode_word, 0, _sent->length * sizeof(X_node *));
1802
1803 const std::vector<int>& link_variables = _variables->link_variables();
1804 std::vector<int>::const_iterator i;
1805 for (i = link_variables.begin(); i != link_variables.end(); i++) {
1806 if (_solver->model[*i] != l_True)
1807 continue;
1808
1809 const Variables::LinkVar* var = _variables->link_variable(*i);
1810 if (_solver->model[_variables->linked(var->left_word, var->right_word)] != l_True)
1811 continue;
1812
1813 check_link_size(lkg);
1814 Link& clink = lkg->link_array[current_link];
1815 current_link++;
1816 clink.lw = var->left_word;
1817 clink.rw = var->right_word;
1818
1819 PositionConnector* lpc = _word_tags[var->left_word].get(var->left_position);
1820 PositionConnector* rpc = _word_tags[var->right_word].get(var->right_position);
1821
1822 const X_node *left_xnode = lpc->word_xnode;
1823 const X_node *right_xnode = rpc->word_xnode;
1824
1825 // Allocate memory for the connectors, because they should persist
1826 // beyond the lifetime of the sat-solver data structures.
1827 clink.lc = connector_new(NULL, NULL, NULL);
1828 clink.rc = connector_new(NULL, NULL, NULL);
1829
1830 *clink.lc = lpc->connector;
1831 *clink.rc = rpc->connector;
1832
1833 Exp* lcexp = PositionConnector2exp(lpc);
1834 Exp* rcexp = PositionConnector2exp(rpc);
1835 add_anded_exp(_sent, exp_word[var->left_word], lcexp);
1836 add_anded_exp(_sent, exp_word[var->right_word], rcexp);
1837
1838 if (verbosity_level(D_SAT)) {
1839 //cout<< "Lexp[" <<left_xnode->word->subword <<"]: " << lg_exp_stringify(var->left_exp);
1840 cout<< "w"<<var->left_word<<" LCexp[" <<left_xnode->word->subword <<"]: ", lg_exp_stringify(lcexp);
1841 //cout<< "Rexp[" <<right_xnode->word->subword <<"]: ", lg_exp_stringify(var->right_exp);
1842 cout<< "w"<<var->right_word<<" RCexp[" <<right_xnode->word->subword <<"]: ", lg_exp_stringify(rcexp);
1843 cout<< "L+L: ", lg_exp_stringify(exp_word[var->left_word]);
1844 cout<< "R+R: ", lg_exp_stringify(exp_word[var->right_word]);
1845 }
1846
1847 if (xnode_word[var->left_word] && xnode_word[var->left_word] != left_xnode) {
1848 lgdebug(+0, "Warning: Inconsistent X_node for word %d (%s and %s)\n",
1849 var->left_word, xnode_word[var->left_word]->string,
1850 left_xnode->string);
1851 }
1852 if (xnode_word[var->right_word] && xnode_word[var->right_word] != right_xnode) {
1853 lgdebug(+0, "Warning: Inconsistent X_node for word %d (%s and %s)\n",
1854 var->right_word, xnode_word[var->right_word]->string,
1855 right_xnode->string);
1856 }
1857
1858 xnode_word[var->left_word] = left_xnode;
1859 xnode_word[var->right_word] = right_xnode;
1860 }
1861
1862 lkg->num_links = current_link;
1863
1864 // Now build the disjuncts.
1865 // This is needed so that compute_chosen_word works correctly.
1866 // Just in case there is no expression for a disjunct, a null one is used.
1867 for (WordIdx wi = 0; wi < _sent->length; wi++) {
1868 Exp *de = exp_word[wi];
1869
1870 // Skip optional words
1871 if (xnode_word[wi] == NULL)
1872 {
1873 if (!_sent->word[wi].optional)
1874 {
1875 de = null_exp();
1876 prt_error("Error: Internal error: Non-optional word %zu has no linkage\n", wi);
1877 }
1878 continue;
1879 }
1880
1881 if (de == NULL) {
1882 de = null_exp();
1883 prt_error("Error: Internal error: No expression for word %zu\n", wi);
1884 }
1885
1886 double cost_cutoff;
1887 #if LIMIT_TOTAL_LINKAGE_COST // Undefined - incompatible to the classic parser.
1888 cost_cutoff = _opts->disjunct_cost;
1889 #else
1890 cost_cutoff = 1000.0;
1891 #endif // LIMIT_TOTAL_LINKAGE_COST
1892 d = build_disjuncts_for_exp(NULL, de, xnode_word[wi]->string,
1893 &xnode_word[wi]->word->gword_set_head,
1894 cost_cutoff, _opts);
1895
1896 if (d == NULL)
1897 {
1898 lgdebug(+D_SEL, "Debug: Word %zu: Disjunct cost > cost_cutoff %.2f\n",
1899 wi, cost_cutoff);
1900 #ifdef DEBUG
1901 if (!test_enabled("SAT-cost"))
1902 #endif
1903 return false;
1904 }
1905
1906 /* Recover cost of costly-nulls. */
1907 const vector<EmptyConnector>& ec = _word_tags[wi].get_empty_connectors();
1908 for (vector<EmptyConnector>::const_iterator j = ec.begin(); j < ec.end(); j++)
1909 {
1910 lgdebug(+D_SEL, "Debug: Word %zu: Costly-null var=%d, found=%d cost=%.2f\n",
1911 wi, j->ec_var, _solver->model[j->ec_var] == l_True, j->ec_cost);
1912 if (_solver->model[j->ec_var] == l_True)
1913 d->cost += j->ec_cost;
1914 }
1915
1916 lkg->chosen_disjuncts[wi] = d;
1917
1918 #if LIMIT_TOTAL_LINKAGE_COST
1919 if (d->cost > cost_cutoff)
1920 {
1921 lgdebug(+D_SEL, "Word %zu: Disjunct cost %.2f > cost_cutoff %.2f\n",
1922 wi, d->cost, cost_cutoff);
1923 #ifdef DEBUG
1924 if (!test_enabled("SAT-cost"))
1925 #endif
1926 return false;
1927 }
1928 #endif // LIMIT_TOTAL_LINKAGE_COST
1929 }
1930
1931 DEBUG_print("Total links: ." << lkg->num_links << "." << endl);
1932 return true;
1933 }
1934 #undef D_SEL
1935
1936 /**
1937 * Main entry point into the SAT parser.
1938 * A note about panic mode:
1939 * - The MiniSAT support for timeout is not yet used (FIXME).
1940 * - Parsing with null links is not supported (FIXME).
1941 * So nothing particularly useful happens in a panic mode, and it is
1942 * left for the user to disable it.
1943 */
sat_parse(Sentence sent,Parse_Options opts)1944 extern "C" int sat_parse(Sentence sent, Parse_Options opts)
1945 {
1946 if (opts->min_null_count > 0) {
1947 // The sat solver doesn't support (yet) parsing with nulls.
1948 // For now, just avoid the delay of a useless re-parsing.
1949 if (opts->verbosity >= 1)
1950 prt_error("Info: use-sat: Cannot parse with null links (yet).\n"
1951 " Set the \"null\" option to 0 to turn off "
1952 "parsing with null links.\n");
1953 sent->num_valid_linkages = 0;
1954 sent->num_linkages_post_processed = 0;
1955 return 0;
1956 }
1957
1958 SATEncoder* encoder = (SATEncoder*) sent->hook;
1959 if (encoder) {
1960 sat_free_linkages(sent, encoder->_next_linkage_index);
1961 delete encoder;
1962 }
1963
1964 encoder = new SATEncoderConjunctionFreeSentences(sent, opts);
1965 sent->hook = encoder;
1966 encoder->encode();
1967
1968 LinkageIdx linkage_limit = opts->linkage_limit;
1969 LinkageIdx k;
1970 Linkage lkg = NULL;
1971
1972 /* Due to the nature of SAT solving, we cannot know in advance the
1973 * number of linkages. But in order to process batch files, we must
1974 * know if there is at least one valid linkage. We check up to
1975 * linkage_limit number of linkages (settable by !limit).
1976 * Normally the following loop terminates after one or a few
1977 * iterations, when a valid linkage is found or when no more linkages
1978 * are found.
1979 *
1980 * Note that trying to find here the first valid linkage doesn't add
1981 * overhead to an interactive user. It also doesn't add overhead to
1982 * batch processing, which needs anyway to find out if there is a
1983 * valid linkage in order to be any useful. */
1984 for (k = 0; k < linkage_limit; k++)
1985 {
1986 lkg = encoder->get_next_linkage();
1987 if (lkg == NULL || lkg->lifo.N_violations == 0) break;
1988 }
1989 encoder->print_stats();
1990
1991 if (lkg == NULL || k == linkage_limit) {
1992 // We don't have a valid linkages among the first linkage_limit ones
1993 sent->num_valid_linkages = 0;
1994 sent->num_linkages_found = k;
1995 sent->num_linkages_post_processed = k;
1996 if (opts->max_null_count > 0) {
1997 // The sat solver doesn't support (yet) parsing with nulls.
1998 if (opts->verbosity >= 1)
1999 prt_error("Info: use-sat: Cannot parse with null links (yet).\n"
2000 " Set the \"null\" option to 0 to turn off "
2001 "parsing with null links.\n");
2002 }
2003 } else {
2004 /* We found a valid linkage. However, we actually don't know yet the
2005 * number of linkages, and if we set them too low, the command-line
2006 * client will fail to display all of the possible linkages. Work
2007 * around this by lying... return the maximum number of linkages we
2008 * are going to produce.
2009 * A NULL linkage will be returned by linkage_create() after the last
2010 * linkage is produced to signify that there are no more linkages. */
2011 sent->num_valid_linkages = linkage_limit;
2012 sent->num_linkages_post_processed = linkage_limit;
2013 }
2014
2015 return 0;
2016 }
2017
2018 /**
2019 * Get a SAT linkage.
2020 * Validate that k is not too big.
2021 * If the k'th linkage already exists, return it.
2022 * Else k is the next linkage index - get the next SAT solution.
2023 */
sat_create_linkage(LinkageIdx k,Sentence sent,Parse_Options opts)2024 extern "C" Linkage sat_create_linkage(LinkageIdx k, Sentence sent, Parse_Options opts)
2025 {
2026 SATEncoder* encoder = (SATEncoder*) sent->hook;
2027 if (!encoder) return NULL;
2028
2029 if (k >= opts->linkage_limit) // > allocated memory
2030 {
2031 encoder->print_stats();
2032 return NULL;
2033 }
2034
2035 if(k > encoder->_next_linkage_index) // skips unproduced linkages
2036 {
2037 prt_error("Error: Linkage index %zu is greater than the "
2038 "maximum expected one %zu\n", k, encoder->_next_linkage_index);
2039 return NULL;
2040 }
2041 if (k < encoder->_next_linkage_index) // already produced
2042 return &encoder->_sent->lnkages[k];
2043
2044 return encoder->get_next_linkage(); // exactly next to produce
2045 }
2046
sat_sentence_delete(Sentence sent)2047 extern "C" void sat_sentence_delete(Sentence sent)
2048 {
2049 SATEncoder* encoder = (SATEncoder*) sent->hook;
2050 if (!encoder) return;
2051
2052 encoder->print_stats();
2053
2054 sat_free_linkages(sent, encoder->_next_linkage_index);
2055 delete encoder;
2056 }
2057