1 /*
2  * This file is part of the Yices SMT Solver.
3  * Copyright (C) 2017 SRI International.
4  *
5  * Yices is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Yices is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Yices.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20  * E-GRAPH DATA STRUCTURES
21  *
22  * Initial egraph module started in August 2007.
23  * Functional version completed by October 2007 and integrated to
24  * QF_UF prototype solver (yices_smt)
25  *
26  * 2008-02-08: started main revision
27  * - need to adapt the egraph data structures to interface with the smt_core module
28  * - need to provide better API for communication with sub_solvers (other theories)
29  * - added support for egraph atoms here (rather than in an external solver module)
30  * - removed lazy clause support (for now) to simplify the implementation
31  * - removed support for disequality propagation (did not seem to provide any performance
32  *   improvement on our benchmarks)
33  * - revert to implementing the dynamic ackermann trick as in Yices 1 (i.e., by
34  *   creating lemmas in the smt_core)
35  *
36  * 2008-12-30: modified term creation interface
37  *
38  * 2009-01-22: cleaned up interface with subsolvers
39  *
40  * 2012-09-06: added lambda terms
41  *
42  * 2013-11-11: changed model construction to use fresh_val_maker.
43  */
44 
45 
46 /*
47  * Terms
48  * -----
49  * Terms are represented by 32 bit signed integers (eterm_t) between 0
50  * and nterms - 1. Negative indices are used as special markers.
51  *
52  * For each term t,
53  * - body[t] is either a composite or a special marker.
54  *
55  * Atomic terms are constants or variables
56  * - if t is a constant, body[t] = mk_constant(id) where id is a constant index.
57  *   id is used during model construction.
58  * - if t is a variable, body[t] = VARIABLE_BODY
59  * The difference is that all constants are assumed distinct, whereas
60  * variables are not. Term 0 is the boolean constant (true).
61  *
62  * Non-atomic terms are of one of the following forms:
63  *   (apply f t_1 ... t_n)
64  *   (update f t_1 ... t_n v)
65  *   (tuple t_1 ... t_n)
66  *   (eq t1 t2)
67  *   (distinct t1 ... t_n)
68  *   (ite t1 t2 t3)
69  *   (or t1 .... t_n)
70  *   (lambda t1 tag)
71  * where f, t_i, v are occurrences of terms. These are stored as
72  * composite_t objects.
73  *
74  * Lambda terms are of the form (lambda t1 tag) where tag is an integer index
75  * that identifies the term domain. There's an associate lambda descriptor
76  * table and lambda_desc[tag] stores:
77  *    n = arity of the term
78  *    dom[0] ... dom[n-1] = types
79  * For example, we may have
80  *    u = (lambda (x::int) (y::int) t)
81  &    v = (lambda (x::real) t)
82  * Then the egraph will contain
83  *    u = (lambda t i)
84  *    v = (lambda t j)
85  * and the descriptor table will store
86  *    i --> arity = 2, dom[0] = int, dom[1] = int
87  *    j --> arity = 1, dom[0] = real.
88  *
89  *
90  * To deal with boolean terms, we distinguish between positive and
91  * negative occurrences of t. If t is boolean, a positive occurrence
92  * denotes t, a negative occurrence denotes (not t). If t is
93  * non-boolean, all its occurrences are positive. Occurrences are
94  * represented by 32 bit signed integers (occ_t) and consist
95  * of an eterm_t + a polarity bit. The polarity bit is the low-order bit.
96  *
97  * A term may have a theory variable attached: it's stored in thvar[t].
98  *
99  * 2009-01-23: For model construction, we also store the type of every
100  * term (as an index in the generic type table = egraph->types).
101  *
102  *
103  * Equivalence classes
104  * -------------------
105  * - an equivalence class is a set of term occurrences stored as a circular list
106  * - for every term t,
107  *   - label[t] = class identifier + polarity
108  *     label is extended to term occurrence by setting label[t+] = label[t],
109  *       and label[t-] = opposite of label[t]
110  *   - next[t] = successor of t in the circular list
111  * Polarities are always positive for non-boolean terms.
112  * For a boolean class, there's an (implicit) complementary class that
113  * contains the same terms with opposite polarities.
114  *
115  * Example: if  t, (not u), and (not v) are in the same class, then we would have
116  *    next[t] = u-  label[t] = C+
117  *    next[u] = v+  label[u] = C-
118  *    next[v] = t-  label[v] = C-
119  * this defines two complementary classes:
120  *   C+ = { t+, u-, v- } = { t, (not u), (not v) }
121  *   C- = { t-, u+, v+ } = { (not t), u, v }
122  *
123  * Classes are identified by an index from 0 to nclasses - 1.
124  *
125  * For every class c, we store
126  * - parents[c] = vectors of composites that contain a term in class c
127  * - root[c] = class representative = root of the explanation tree for c
128  * - dmask[c] = bitmask for distinct predicates
129  *   - bit 0 of dmask[c] is 1 if c contains a constant term
130  *   - bit i of dmask[c] is 1 if predicate (distinct t_1 ... t_n) is asserted
131  *     and t_j is in the class of c (i is an index attached to distinct ...)
132  * - for boolean classes, only bit 0 matters. Distinct predicates with boolean
133  *   arguments can be eliminated:
134  *      (distinct t1 ... t_n) = false if n >= 3
135  *      (distinct t1 t2) = (not (eq t1 t2))
136  *   so if c is a class of boolean terms, then either dmask[c] == 1 if c is the
137  *   class of true or dmask[c] == 0.
138  *
139  * The merge algorithm ensures that root[c] is fixed. When a term t is created,
140  * it's initially assigned to a fresh class c_0 and root[c_0] is set to t. This
141  * root term is never modified.
142  * (TODO: check whether root[c] can be removed. Since roots and terms are created
143  *  together, we could ensure that root term and class have the same integer index).
144  *
145  *
146  * Atoms
147  * -----
148  * An egraph atom is a term t of the form (eq t1 t2) or (distinct t1 ... t_n).
149  * Each such atom is mapped to a boolean variable v in the core. The mapping
150  * from t to v is stored by setting thvar[t] = v. More generally, there can
151  * be a core variable v attached to any boolean term t in the egraph.
152  *
153  * In addition, the egraph store atom objects. Each atom object is a
154  * pair atm=<t, v> where t is a boolean term and v is a boolean variable
155  * in the core. The core keeps the mapping from v to atm in its
156  * internal atom table. If t1 ... t_n are boolean terms in the same class c
157  * then their respective atoms are stored in a circular list:
158  *   <t1, v1> --> ... <t_n,v_n> --> back to <t1, v1>
159  * This is used to propagate implied literals to the core.
160  *
161  *
162  * Additional data for classes
163  * ---------------------------
164  * For a class c, the egraph maintains:
165  * - etype[c] = its type (either bool, nat, int, bitvector, tuple, array, or none)
166  * - thvar[c] = an attached theory variable (or null_thvar)
167  *            = a 32 bit integers, whose interpretation depends on type[c]
168  *
169  * The algorithms maintain the following invariant:
170  * - if no term in class c has a theory variable then thvar[c] is null
171  * - otherwise thvar[c] is equal to thvar[t] for some term t in c
172  *
173  * Initially, thvar[c] is the same as thvar[root[c]]. But thvar[c] is dynamically
174  * updated as the classes are merged. If classes c2 is merged into class c1, and
175  * c1 has no theory variable, then it inherits the theory variable of c2 (if any).
176  * If both classes have theory variables v1 and v2, then a theory solver is notified
177  * that v1 and v2 are now equal.
178  *
179  *
180  * Special tricks: the egraph deals internally with boolean and tuple classes.
181  * There is no theory solver involved for them:
182  *
183  * If etype[c] = ETYPE_TUPLE: then thvar[c] is a tuple-term that belongs to c.
184  * - this is used to implement the propagation rule
185  *    (tuple t_1 ... t_n) == (tuple u_1 ... u_n) ==> u_i == t_i
186  *
187  * If etype[c] = ETYPE_BOOL, thvar[c] is a boolean variable in the core
188  * - for class 0 == class of true/false, we use thvar[c] = bool_const (bvar 0)
189  * - for other boolean class, thvar[c] is a boolean variable v such that
190  *   1) there is a term t in c  with thvar[c] = t
191  *   2) there is an egraph atom of the form <t, v> in the atom table
192  *
193  *
194  * Propagation queue
195  * -----------------
196  * - all assertions are written in the form t1 == t2, where t1 and t2 are term occurrences.
197  * - in particular, asserting an atom <t, v> is translated to (t == true), and asserting its
198  *   negation is turned into (t == false)
199  * - a propagation queue stores all these assertions, and the equalities implied by congruence
200  *   closure or other propagation mechanisms
201  * - this is stored in a global queue: each element in the queue is a pair <t1, t2> representing
202  *   the equality (t1 == t2)
203  * - every pair <t1, t2> has an explanation
204  *
205  *
206  * Explanations
207  * ------------
208  * - an explanation contains enough information for explaining how an equality (t1 == t2)
209  *   was derived.
210  * - explanations can be
211  *   - axiom: empty antecedent
212  *   - assertion: literal
213  *   - congruence: means that t1 and t2 are equal by congruence
214  *   or special codes and data encoding simplification and propagation rules
215  *
216  *
217  * Distinct predicates
218  * -------------------
219  * - there can be no more than 31 terms of the form (distinct ....)
220  * - when (distinct t_1 ... t_n) := true is asserted, a bit-index k is assigned and
221  *   the composite (distinct ... ) is  stored in distinct[k]
222  * - then bit k of dmask[class[t1] ... class[t_n]] is set to 1
223  * - bit 0 is reserved and distinct[0] = NULL:
224  *   bit 0 of dmask[c] is 1 iff c contains a constant term
225  *
226  *
227  * Merge graph/explanation tree
228  * ----------------------------
229  * - with every class c is associated an explanation tree
230  * - nodes in the tree are the terms in c
231  * - the root of the tree is root[c]
232  * - edges are of the form t1 ---> t2 where (t1 = t2) is an asserted or implied equality
233  * - an edge of origin t1 is represented by storing edge[t1] = k where k is an index
234  *   in the propagation queue (that contains <t1, t2> + an explanation)
235  *
236  */
237 
238 #ifndef __EGRAPH_TYPES_H
239 #define __EGRAPH_TYPES_H
240 
241 #include <stdint.h>
242 #include <stdbool.h>
243 #include <assert.h>
244 
245 #include "model/abstract_values.h"
246 #include "model/concrete_values.h"
247 #include "model/fresh_value_maker.h"
248 #include "model/fun_maps.h"
249 #include "solvers/cdcl/smt_core.h"
250 #include "solvers/egraph/egraph_base_types.h"
251 #include "utils/arena.h"
252 #include "utils/cache.h"
253 #include "utils/int_hash_map.h"
254 #include "utils/int_hash_tables.h"
255 #include "utils/int_partitions.h"
256 #include "utils/int_stack.h"
257 #include "utils/int_vectors.h"
258 #include "utils/object_stores.h"
259 #include "utils/ptr_partitions.h"
260 #include "utils/ptr_vectors.h"
261 #include "utils/use_vectors.h"
262 
263 
264 
265 
266 /*****************
267  *  CLASS TABLE  *
268  ****************/
269 
270 /*
271  * For each equivalence class c:
272  * - root[c] = a term occurrence (used as class representative)
273  * - dmask[c] = bitvector encoding distinct assertions
274  * - parents[c] = composites that contain a term t whose class is c
275  * - etype[c] = type of terms in class c
276  * - thvar[c] = theory variable for class c
277  */
278 typedef struct class_table_s {
279   uint32_t size;         // size of all arrays
280   uint32_t nclasses;     // number of classes
281 
282   occ_t *root;
283   uint32_t *dmask;
284   use_vector_t *parents;
285   unsigned char *etype;
286   thvar_t *thvar;
287 } class_table_t;
288 
289 
290 // max size: based on size of use_vector_t
291 #define DEFAULT_CLASS_TABLE_SIZE   200
292 #define MAX_CLASS_TABLE_SIZE  (UINT32_MAX>>4)
293 
294 /*
295  * Deletion threshold for parent vectors: When a class c is deleted,
296  * its parent vector parent[c] is reset. In addition, if parent[c] has
297  * size >= PARENT_DELETION_SIZE then the memory used by parent[c] is freed.
298  */
299 #define PARENT_DELETION_SIZE 100
300 
301 
302 
303 /****************
304  *  TERM TABLE  *
305  ***************/
306 
307 /*
308  * For each term t:
309  * - body[t] = descriptor (composite or special pointers)
310  * - next[t] = next element in the same class
311  * - edge[t] = edge id, used for constructing explanations
312  *           (or if t is not active, edge[t] = etype of t)
313  * - thvar[t] = theory variable attached to t
314  * - mark[t] = 1/0 bit, used for constructing explanations
315  * - real_type[t] = type of t (this is necessary for constructing models)
316  */
317 typedef struct eterm_table_s {
318   uint32_t size;
319   uint32_t nterms;
320 
321   composite_t **body;
322   elabel_t *label;
323   occ_t *next;
324   int32_t *edge;
325   thvar_t *thvar;
326   byte_t *mark;
327   type_t *real_type;
328 } eterm_table_t;
329 
330 
331 #define DEFAULT_ETERM_TABLE_SIZE   200
332 #define MAX_ETERM_TABLE_SIZE  ((uint32_t)(MAX_ETERMS))
333 
334 
335 
336 
337 
338 /******************
339  *  EXPLANATIONS  *
340  *****************/
341 
342 /*
343  * Every equality (t1 == t2) in the propagation queue has an
344  * antecedent that explains how (t1 == t2) was derived.
345  *
346  * An explanation is encoded in two parts:
347  * - an 8bit tag defines its type
348  * - extra data depending on the tag is stored in a union
349  *
350  * Explanation types:
351  * - axiom:          (t1 == t2) needs no explanation
352  * - assert(l):      (t1 == t2) was asserted (by setting l to true)
353  * - eq(u, v):       (u == v) implies (t1 == t2)
354  * - distinct0(u, v): (u != v) implies (t1 == t2)
355  *      where u != v is obtained via constants (bit 0 of dmask)
356  * - distinct[i](u, v): (u != v) implies (t1 == t2)
357  *      where u != v is obtained via dmask (bit i of dmask)
358  * - simp_or is used in two cases:
359  *    (or u_1 ... u_n) == false (for all i, u_i == false)
360  *    (or u_1 ... u_n) == v  (for all i, u_i == false or u_i == v)
361  *
362  * For equalities implied by congruence, we must be careful to
363  * preserve causality: the explanation for (t1 == t2) must use only
364  * equalities asserted before. To ensure causality, we use different
365  * tags depending on the congruence rule applied:
366  *
367  * - basic_congruence: t1 and t2 are apply/update/tuple terms
368  *
369  * - eq_congruence1: (t1 == u1), (t2 == u2) implies (eq t1 t2) == (eq u1 u2)
370  * - eq_congruence2: (t1 == u2), (t2 == u1) implies (eq t1 t2) == (eq u1 u2)
371  *
372  * - ite_congruence1: (t1 == u1), (t2 == u2), (t3 == u3) implies
373  *                      (ite t1 t2 t3) == (ite u1 u2 u3)
374  * - ite_congruence2: (t1 == (not u1)), (t2 == u3), (t3 == u2)
375  *                      implies (ite t1 t2 t3) == (ite u1 u2 u3)
376  *
377  * - or_congruence(v) where v is an array of term occurrences
378  * - distinct_congruence(v) where v is an array of term occurrences
379  *
380  * When (or t_1 ... t_n) == (or u_1 ... u_m) is implied by congruence,
381  * we have to match each t_i to a u_j (and conversely) to produce an explanation.
382  * To preserve causality, we compute the mapping t_i --> u_j at the time
383  * the congruence is detected.  The mapping is stored as an array v of n + m elements
384  * v_1 ... v_n, v_n+1 ... v_{n+m} and it encodes the conjunction
385  * (t_1 == v1 ... t_n == v_n) and (u_1 == v_{n+1} ... u_m == v_{n+m}).
386  *
387  * The same thing is done when (distinct t1 ... t_n) == (distinct u_1 ... u_n) is
388  * implied by congruence.
389  *
390  * Other explanations:
391  * - Propagation via pseudo-clauses and Ackermann propagation are not used anymore.
392  * - Equalities propagated from a satellite solver: the antecedent is a pointer
393  *   The tag identifies the solver
394  */
395 typedef enum expl_tag {
396   EXPL_AXIOM,
397   EXPL_ASSERT,
398   EXPL_EQ,
399 
400   // Hackish: for EXPL_DISTINCT, we need to keep track of which bit of dmask
401   // caused the propagation (i.e., the composite in egraph->dtable.distinct[i])
402   // We use 32 tags since there are at most 32bits
403   EXPL_DISTINCT0,
404   EXPL_DISTINCT1,
405   EXPL_DISTINCT2,
406   EXPL_DISTINCT3,
407   EXPL_DISTINCT4,
408   EXPL_DISTINCT5,
409   EXPL_DISTINCT6,
410   EXPL_DISTINCT7,
411   EXPL_DISTINCT8,
412   EXPL_DISTINCT9,
413   EXPL_DISTINCT10,
414   EXPL_DISTINCT11,
415   EXPL_DISTINCT12,
416   EXPL_DISTINCT13,
417   EXPL_DISTINCT14,
418   EXPL_DISTINCT15,
419   EXPL_DISTINCT16,
420   EXPL_DISTINCT17,
421   EXPL_DISTINCT18,
422   EXPL_DISTINCT19,
423   EXPL_DISTINCT20,
424   EXPL_DISTINCT21,
425   EXPL_DISTINCT22,
426   EXPL_DISTINCT23,
427   EXPL_DISTINCT24,
428   EXPL_DISTINCT25,
429   EXPL_DISTINCT26,
430   EXPL_DISTINCT27,
431   EXPL_DISTINCT28,
432   EXPL_DISTINCT29,
433   EXPL_DISTINCT30,
434   EXPL_DISTINCT31,
435 
436   EXPL_SIMP_OR,
437 
438   // congruence rules
439   EXPL_BASIC_CONGRUENCE,
440   EXPL_EQ_CONGRUENCE1,
441   EXPL_EQ_CONGRUENCE2,
442   EXPL_ITE_CONGRUENCE1,
443   EXPL_ITE_CONGRUENCE2,
444   EXPL_OR_CONGRUENCE,
445   EXPL_DISTINCT_CONGRUENCE,
446 
447   // equality propagated from a satellite theory
448   // we just use a hardcoded index to identify the satellite solver for now
449   EXPL_ARITH_PROPAGATION,
450   EXPL_BV_PROPAGATION,
451   EXPL_FUN_PROPAGATION,
452 
453   // attempt to reconcile models
454   EXPL_RECONCILE,
455 } expl_tag_t;
456 
457 
458 typedef union expl_data_u {
459   literal_t lit;      // for EXPL_ASSERT
460   occ_t t[2];         // for EXPL_EQ, EXPL_DISTINCT, EXPL_DISTINCT0
461   void *ptr;          // for EXPL_DISTINCT_CONGRUENCE, EXPL_OR_CONGRUENCE, theory propagation
462 } expl_data_t;
463 
464 
465 
466 /*******************************
467  *  DISTINCT ASSERTION TABLE   *
468  ******************************/
469 
470 /*
471  * - npreds = number of assertions so far
472  * - distinct[k] = composite (distinct ....)
473  *   (the explanation is always (distinct ... ) == true).
474  *
475  * There's an implicit (distinct t_1 .... t_n) of index 0 where
476  * t_1 ... t_n are the constant terms.
477  * - index 0 is reserved (so npreds >= 1)
478  * - distinct[0] = NULL
479  */
480 #define NDISTINCTS 32
481 #define MAX_DISTINCT_TERMS (NDISTINCTS-1)
482 
483 typedef struct distinct_table_s {
484   uint32_t npreds;
485   composite_t *distinct[NDISTINCTS];
486 } distinct_table_t;
487 
488 
489 
490 
491 /*********************************
492  *  LAMBDA TAGS AND DESCRIPTORS  *
493  ********************************/
494 
495 /*
496  * There are ntags descriptors.
497  *
498  * For each lambda tag we store:
499  * - arity + domain
500  */
501 typedef struct ltag_desc_s {
502   uint32_t arity;
503   type_t dom[0]; // real size = arity
504 } ltag_desc_t;
505 
506 #define MAX_LTAG_DESC_ARITY ((UINT32_MAX-sizeof(ltag_desc_t))/sizeof(type_t))
507 
508 typedef struct ltag_table_s {
509   uint32_t size;
510   uint32_t ntags;
511   ltag_desc_t **data;
512 } ltag_table_t;
513 
514 #define DEF_LTAG_TABLE_SIZE   8
515 #define MAX_LTAG_TABLE_SIZE   (UINT32_MAX/sizeof(ltag_desc_t *))
516 
517 
518 
519 /******************
520  *  UPDATE GRAPH  *
521  *****************/
522 
523 /*
524  * To replace the fun_solver satellite.
525  * The egraph has an optional 'update graph' component to
526  * deal with lambda terms, updates, and extensionality.
527  *
528  * The data structure is defined in update_graph.h.
529  */
530 typedef struct update_graph_s update_graph_t;
531 
532 
533 /*****************************
534  *  PROPAGATION QUEUE/STACK  *
535  ****************************/
536 
537 /*
538  * Each element i in the queue encodes an assertion
539  * - eq[i] of the from (t1 == t2)
540  * - with explanation defined by the pair <etag[i], edata[i]>
541  *
542  * When the assertion is processed, it's turned into an edge
543  * t1 ---> t2 in an explanation tree (i.e., edge[t1] is set to i).
544  *
545  * Other components:
546  * - top = top of the stack
547  * - prop_ptr = index of the first equality to process
548  *   so eq[prop_ptr ... top-1] = all assertions not yet processed.
549  * - size = size of arrays eq, expl, saved_class
550  * - mark = bitvector for constructing explanations
551  *
552  * Assertions are organized in levels:
553  * - level_index[k] = index of the first assertion added at level k
554  * - level_index[0] = 0
555  * - nlevels = size of the level_index array
556  */
557 typedef struct equeue_elem_s {
558   occ_t lhs;
559   occ_t rhs;
560 } equeue_elem_t;
561 
562 typedef struct egraph_stack_s {
563   equeue_elem_t *eq;
564   unsigned char *etag;
565   expl_data_t *edata;
566   byte_t *mark;
567 
568   uint32_t top;
569   uint32_t prop_ptr;
570   uint32_t size;
571   uint32_t *level_index;
572   uint32_t nlevels;
573 } egraph_stack_t;
574 
575 
576 // Marker for edge[t] when t is a root
577 enum {
578   null_edge = -1,
579 };
580 
581 
582 #define DEFAULT_EGRAPH_STACK_SIZE 200
583 #define MAX_EGRAPH_STACK_SIZE  (UINT32_MAX/8)
584 
585 #define DEFAULT_EGRAPH_NLEVELS 100
586 #define MAX_EGRAPH_STACK_LEVELS (UINT32_MAX/8)
587 
588 
589 
590 
591 /****************
592  *  UNDO STACK  *
593  ***************/
594 
595 /*
596  * Data for backtracking. Some of the undo data is already in
597  * the propagation queue, but it's simpler to use a special undo stack.
598  * TODO: check cost and revise this if needed.
599  *
600  * Operations that can be undone:
601  * - merge classes of t1 and t2
602  * - assertion of a distinct atom
603  * - simplification of a composite when dmask change
604  *
605  * Other operations that require processing after backtracking
606  * are also recorded in the trail_stack:
607  * - a composite term created during the search needs to
608  *   be reanalyzed after backtracking
609  *
610  */
611 typedef enum undo_tag {
612   // undo operations
613   UNDO_MERGE,
614   UNDO_DISTINCT,
615   UNDO_SIMPLIFY,
616   // reanalyze operations: two variants
617   REANALYZE_CONGRUENCE_ROOT,
618   REANALYZE_COMPOSITE,
619 } undo_tag_t;
620 
621 typedef struct {
622   occ_t saved_occ;
623   elabel_t saved_label;
624 } undo_merge_t;
625 
626 typedef union undo_u {
627   undo_merge_t merge;
628   void *ptr;
629 } undo_t;
630 
631 /*
632  * Undo stack:
633  * - each trail object consists of an 8bit tag (tag[i]) + a union (data[i])
634  * Other components:
635  * - top = top of the stack
636  * - size = size of arrays tag and data
637  *
638  * Levels as in egraph_stack:
639  * - level_index[k] = first trail object of level k
640  * - nlevels = size of level_index array
641  */
642 typedef struct undo_stack_s {
643   unsigned char *tag;
644   undo_t *data;
645   uint32_t top;
646   uint32_t size;
647 
648   uint32_t *level_index;
649   uint32_t nlevels;
650 } undo_stack_t;
651 
652 
653 
654 
655 
656 /**********************
657  *  SIGNATURE BUFFERS *
658  *********************/
659 
660 /*
661  * The signature (sigma c) of a composite term c is derived from
662  * label[t[0]] ... label[t[n-1]] where t[0] ... t[n-1] are c's children.
663  * - two composites c1 and c2 are congruent iff they have equal signature
664  * - signature computation also includes normalization
665  *
666  * A signature is a pair tag + array of class labels, stored in a signature
667  * buffer. size = size of the array sigma
668  */
669 typedef struct signature_s {
670   uint32_t size;
671   uint32_t tag;  // encodes kind + arity
672   elabel_t *sigma;
673 } signature_t;
674 
675 
676 
677 
678 /***********************
679  *  CONGRUENCE TABLE   *
680  **********************/
681 
682 /*
683  * Hash-table of composites: stores a unique representative
684  * (congruence root) per signature. It's similar to int_hash_table.
685  */
686 typedef struct congruence_table_s {
687   composite_t **data;  // the hash table proper
688   uint32_t size;       // its size (must be a power of 2)
689   uint32_t nelems;     // number of elements
690   uint32_t ndeleted;   // deleted elements
691   uint32_t resize_threshold;
692   uint32_t cleanup_threshold;
693   signature_t buffer;  // for internal use
694 } congruence_table_t;
695 
696 
697 /*
698  * Marker for deleted elements in the congruence table
699  * Empty slots contain the NULL pointer
700  */
701 #define DELETED_COMPOSITE ((composite_t *) 1)
702 #define NULL_COMPOSITE ((composite_t *) 0)
703 
704 #define DEFAULT_CONGRUENCE_TBL_SIZE 256
705 #define MAX_CONGRUENCE_TBL_SIZE (UINT32_MAX/sizeof(composite_t*))
706 #define CONGRUENCE_TBL_RESIZE_RATIO 0.6
707 #define CONGRUENCE_TBL_CLEANUP_RATIO 0.2
708 
709 
710 
711 
712 
713 /*******************************
714  *  TRAIL STACK FOR PUSH/POP   *
715  ******************************/
716 
717 /*
718  * At every push we save the number of terms
719  * + the current propagation pointer
720  */
721 typedef struct egraph_trail_s {
722   uint32_t nterms;
723   uint32_t prop_ptr;
724 } egraph_trail_t;
725 
726 typedef struct egraph_trail_stack_s {
727   uint32_t size;
728   uint32_t top;
729   egraph_trail_t *data;
730 } egraph_trail_stack_t;
731 
732 #define DEFAULT_EGRAPH_TRAIL_SIZE 20
733 #define MAX_EGRAPH_TRAIL_SIZE (UINT32_MAX/sizeof(egraph_trail_t))
734 
735 
736 
737 
738 
739 /*********************************************
740  *  COMMUNICATION WITH OTHER THEORY SOLVERS  *
741  ********************************************/
742 
743 /*
744  * The egraph can be used standalone or as a central solver
745  * connected to the core and communicating with other solvers.
746  *
747  * Two kinds of solvers can be attached to the egraph:
748  * - full solvers: right now, this means arithmetic and bitvector solver,
749  *   a full solver is one that has its own atoms (and could be attached
750  *   directly to the core).
751  * - subsolvers are theory solvers that cannot work without the egraph,
752  *   a subsolver can deal only with equalities and disequalities.
753  *
754  * Full solvers must implement the th_ctrl and th_smt interfaces
755  * (defined in smt_core.h) and the th_egraph interface defined
756  * below.
757  *
758  * Sub-solvers must implement the th_ctrl and th_egraph interfaces.
759  *
760  * The egraph does the following:
761  * - forward commands it gets from the core to all attached solvers
762  * - forward atom-processing commands to the full solvers
763  * - propagate equalities and disequalities to all sub-solvers
764  * Satellite solvers may propagate equalities to the egraph.
765  *
766  *
767  * GENERIC EGRAPH INTERFACE
768  * ========================
769  *
770  * A set of functions common to all satellite solvers is used by the
771  * egraph during the search. To propagate equalities and disequalities
772  * to a satellite solver, the egraph calls one of the following
773  * functions (in the th_egraph interface).
774  *
775  * 1) void assert_equality(void *solver, thvar_t x1, thvar_t x2, int32_t id)
776  *    notify solver that x1 and x2 are equal (after merging classes c1 and c2,
777  *    with thvar[c1] = x1  and thvar[c2] = x2).
778  *    id is the egraph edge that caused c1 and c2 to be merged.
779  *
780  * 2) void assert_disequality(void *solver, thvar_t x1, thvar_t x2, composite_t *cmp)
781  *    notify solver that x1 != x2 holds.
782  *    cmp is an explanation hint. It allows the egraph to correctly generate an
783  *    explanation for x1 != x2 if that's needed later.
784  *
785  * 3) void assert_distinct(void *solver, uint32_t n, thvar_t *a, composite_t *cmp)
786  *    notify solver that a[0] ... a[n-1] are all distinct
787  *    as above, cmp is an explanation hint.
788  *
789  * For all three assert functions above, the satellite solver must
790  * store the assertions internally and process them when propagate is
791  * called.
792  *
793  * 4) bool check_diseq(void *solver, thvar_t x1 thvar_t x2)
794  *    return true if (x1 != x2) holds in the solver at the base level.
795  *    (don't need to be complete: may return false)
796  *
797  * 4a) bool is_constant(void *solver, thvar_t x)
798  *     return true if x is a constant in the theory solver (optional)
799  *     return false otherwise
800  *
801  * Optional function: if the solver propagates equalities to the egraph,
802  * it must implement the following function.
803  *
804  *
805  * 5) void expand_th_explanation(void *solver, thvar_t x1, thvar_t x2, void *expl, th_explanation_t *result)
806  *
807  *    When the solver propagates an equality t1 == t2, it must provide an opaque explanation
808  *    object expl (a void * pointer). The egraph calls the function expand_th_explanation
809  *    when it needs to expand expl to a set of antecedent atoms. The parameters passed to
810  *    the function are x1 --> theory variable of t1 and x2 --> theory variable of t2.
811  *    The function must construct a conjunction of constraints that imply t1 == t2.
812  *    Each constraint may be of the following forms:
813  *     - atom(l): an atom from the satellite solver, attached to literal l
814  *     - eq(u, v): an equality (u==v) that was propagated by egraph to the solver
815  *                 via a call to assert_equality.
816  *     - diseq(u, v): a disequality (u!=v) that was propagated by egraph to the solver,
817  *                via a call to assert_disequality or assert_distinct. This must be given
818  *                as a pre_expl object.
819  *    The set of constraints is stored in a th_explanation data structure that maintains
820  *    three resizable vectors, for atoms, equalities, and diseq constraints.
821  *
822  *
823  * Theory conflict and explanations
824  * --------------------------------
825  *
826  * To construct theory conflicts, a satellite solver can
827  * query the egraph for explanations using functions
828  *  - egraph_explain_term_eq
829  *  - egraph_store_diseq_pre_expl
830  *  - egraph_expand_diseq_pre_expl
831  * These functions are defined in egraph_explanation.c.
832  *
833  * Another function 'egraph_explain_term_diseq' is defined in
834  * egraph_explanation.c but it cannot be used reliably to
835  * lazily build an explanation for (x1 != x2). It may generate an
836  * incorrect (non-causal) explanation. The hint passed to
837  * assert_distinct and assert_disequality is not enough to rebuild the
838  * correct explanation in all cases (cf. egraph_explanation.c).
839  *
840  * If the satellite solver performs an inference or theory propagation
841  * with (x1 != x2) as antecedent, it must use an intermediate
842  * 'pre_expl' object that keeps enough information to build an
843  * explanation for (x1 != x2) if it is needed later.  At the time of
844  * the inference: the satellite must record the pre_expl data using
845  * function
846  *
847  *   egraph_store_diseq_pre_expl(egraph, t1, t2, hint, pre_expl) where
848  *   - t1 must be the egraph term attached to x1
849  *   - t2 must be the egraph term attached to x2
850  *   - hint is the composite provided by the egraph in
851  *     assert_disequality or assert_distinct
852  *   - pre_expl is a pointer to a pre_expl_t structure
853  *
854  * If the explanation for (x1 != x2) is needed later on (i.e., must be expanded to a list of
855  * literal), then the satellite solver must call
856  *
857  *   egraph_expand_diseq_pre_expl(egraph, pre_expl, ..)
858  *
859  * at that time.
860  *
861  *
862  * Interface equalities (2010/01/13)
863  * ---------------------------------
864  *
865  * In final check, the egraph and satellite solver attempt to build consistent
866  * models. If that fails, interface equalities must be generated. The egraph
867  * currently supports to variant implementations of final_check, that use
868  * different functions provided by the satellite solvers.
869  *
870  *
871  * Baseline final_check: for this variant, a satellite solver must implement
872  * the following function.
873  *
874  * 6) uint32_t reconcile_model(void *solver, uint32_t max_eq)
875  *
876  *     This function is called in final_check by the egraph to enforce consistency
877  *     between the egraph and the solver's internal model. There's a conflict
878  *     between egraph and solver if there are two terms t1 and t2 in the egraph,
879  *     and two theory variables x1 and x2 in the solver such that:
880  *      - t1 and t2 are in different classes in the egraph
881  *      - x1 and x2 are equal in the solver's model
882  *      - x1 is t1's theory variable and x2 is t2's theory variable.
883  *
884  *     The solver should attempt to resolve such conflicts by changing the
885  *     values of x1 and x2 if that's possible. Otherwise, it must construct
886  *     the interface equality (eq t1 t2) in the egraph (by calling egraph_make_simple_eq).
887  *     - max_eq is a bound on the number of interface equalities the solver is
888  *       allowed to generate
889  *     - the function must return the number of interface equalities actually created
890  *       (0 means that the egraph and solver model are consistent).
891  *
892  *
893  * Experimental final_check: this variant implements a more flexible
894  * interface generation algorithms. The egraph attempts to resolve
895  * conflict by merging classes. If that fails, it asks the satellite
896  * solver to generate interface lemmas. To support this, the
897  * satellite solvers must implement the following functions:
898  *
899  * 6a) void prepare_model(void *solver)
900  *
901  *    This function is called in final_check before the egraph generates interface lemmas.
902  *    The solver must build a local model at this point (enough to be able to check
903  *    whether two variables x1 and x2 have the same or different values in the local model).
904  *
905  * 6b) bool equal_in_model(void *solver, thvar_t x1, thvar_t x2)
906  *
907  *    Must return true if x1 and x2 have the same value in the model and false if
908  *    they have different values. (So the model must assign a value to all theory variables).
909  *
910  * 6c) void gen_interface_lemma(void *solver, literal_t l, thvar_t x1, thvar_t x2, bool equiv)
911  *
912  *    Ask the theory solver to create a lemmas of the form (l => x1 /= x2).
913  *    If equiv is true, then the solver can also create the reverse implication:  (x1 /= x2 => l).
914  *
915  * 6d) void release_model(void *solver)
916  *
917  *    Called at the end of final_check. This means that the local model built by prepare_model
918  *    is no longer needed. The solver can free whatever internal data structures it used for
919  *    the local model, or do other cleanup.
920  *
921  *
922  * 6e) ipart_t *build_model_partition(void *solver)
923  *
924  *    Called after prepare_model and before release model. The solver
925  *    must construct a partition of its variables: two variables are
926  *    in the same class if they have the same value in the model. This
927  *    can be restricted so that the partition uses one theory variable
928  *    per Egraph class.
929  *
930  *    The solver should use 'int_partition.h' to build this.
931  *
932  * 6f) void release_model_partition(void *solver, ipart_t *partition)
933  *
934  *    Called by the egraph when the partition is no longer needed.
935  *
936  *
937  * In addition, all satellite solvers must implement the following functions
938  * to link egraph terms and theory variables.
939  *
940  * 7) void attach_eterm(void *solver, thvar_t x, eterm_t u): attach u as term for variable x
941  *    in solver. This must be the same function as used by the context.
942  *
943  * 8) eterm_t eterm_of_var(void *solver, thvar_t v)
944  *    return the eterm t attached to v (or null_eterm if v has no eterm attached).
945  *    This must be the same function as used by the context.
946  *
947  *
948  * New function for theory branching (2010/10/27)
949  * ----------------------------------------------
950  *
951  * If theory branching is enabled, the egraph must decide whether an atom l := (eq u1 u2)
952  * must be assigned true or false when l is selected as decision literal. If u1 and u2 are both
953  * attached to two theory variables x1 and x2, then the egraph can delegate the decision to
954  * the theory solver. For this purpose, it calls the function
955  *
956  * 9) literal_t select_eq_polarity(void *solver, thvar_t x1, thvar_t x2, literal_t l)
957  *    - l is a decision literal attached to an egraph atom (eq u1 u2)
958  *    - x1 and x2 are the theory variables for u1 and u2, respectively
959  *    - the theory solver must decide between assigning l true or false
960  *    - the function must return l to set l := true or (not l) to set l := false
961  *
962  *
963  *
964  * THEORY-SPECIFIC INTERFACES
965  * ==========================
966  *
967  * In addition to the common egraph interface defined above, the egraph needs theory-specific
968  * functions to create terms and theory variables, and to build a global model. These are
969  * grouped in interface descriptors that are distinct for each theory.
970  *
971  * Support for term creation
972  * -------------------------
973  *
974  * For new terms of arithmetic, bitvector, or function types, the egraph may create a theory variable
975  * and attach it to the new term. For this purpose, the theory solver must provide functions for
976  * creating theory variables. These functions are the same as the ones used in the context:
977  *
978  * For arithmetic variables:
979  * - thvar_t create_arith_var(void *arith_solver, bool is_int): create a new arithmetic variable.
980  *   If is_int is true, the variable is integer, otherwise it's real.
981  *
982  * For bitvector variables
983  * - thvar_t create_bv_var(void *bv_solver, uint32_t n): create a new bitvector variable.
984  *   n = number of bits
985  *
986  * For function variables
987  * - thvar_t create_fun_var(void *fun_solver, type_t tau): create a new function/array variable
988  *   tau = its type (in the global type table)
989  *
990  *
991  * Support for model construction (global model)
992  * ---------------------------------------------
993  *
994  * An egraph model maps term occurrences to objects (defined in
995  * concrete_values.h).  To build the model, the egraph must query the
996  * satellite solvers for rational/integer or bitvector values assigned
997  * to theory variables.
998  *
999  * For this, we use the following functions:
1000  *
1001  *  a) arithmetic solver
1002  *     ----------------
1003  *    bool value_in_model(void *arith_solver, thvar_t x, rational_t *v)
1004  *
1005  *    Must return true if the arithmetic solver has a value for x in its current model
1006  *    and that value is rational (or integer). It must then copy that value in v.
1007  *    The function must return false otherwise.
1008  *
1009  *    This function should be identical to the function of the same name in
1010  *    the arithmetic solver arith_interface (used by the context).
1011  *
1012  *
1013  *  b) bitvector solver
1014  *     ----------------
1015  *    bool value_in_model(void *bv_solver, thvar_t x, bvconstant_t *b)
1016  *
1017  *    Must return true if the bitvector solver has a value for x in its current model.
1018  *    It must then copy that value in b. It must return false otherwise.
1019  *
1020  *    This function should be identical to the function of the same name in the
1021  *    bvsolver_interface used by the context.
1022  *
1023  *
1024  *  c) function-theory solver
1025  *     ----------------------
1026  *
1027  *    A function subsolver cannot be used without the egraph. So model
1028  *    construction requires close coordination between the egraph and
1029  *    the subsolver. We use a two-step approach:
1030  *
1031  *    - First the function solver constructs an abstract model (via
1032  *      fun_maps/abstract_values).  This may introduce new objects
1033  *      (fresh particles) that are not currently present in the
1034  *      egraph. The only requirement is that all fresh particles must be
1035  *      distinct from each other and from any existing egraph term.
1036  *
1037  *    - Then the egraph converts the abstract model into a concrete
1038  *      model by mapping the abstract particles to concrete values.
1039  *
1040  *    To support this, the function solver must implement the following functions:
1041  *
1042  *    1) void build_model(void *solver, pstore_t *store):
1043  *       Build a model that maps theory variables to abstract maps (map_t).
1044  *       Abstract values (particles) necessary for this model must be allocated in store.
1045  *
1046  *    2) map_t *value_in_model(void *solver, thvar_t x):
1047  *       Return the value assigned to theory variable x in the model.
1048  *       Return NULL if the value of x is not available.
1049  *
1050  *    3) void free_model(void *solver):
1051  *       Notify the solver that the model is no longer needed.
1052  *
1053  */
1054 
1055 
1056 /*
1057  * THEORY EXPLANATION OBJECT
1058  */
1059 
1060 /*
1061  * Disequality pre-explanation object:
1062  * This store enough data to encode the antecedents of a derived
1063  * disequality. A disequality is derived by one of the following
1064  * inference rules:
1065  * Rule 1) (eq u1 u2) = false, u1 == t1,  u2 == t2 IMPLIES (t1 != t2)
1066  * Rule 2) (distinct ... u2 ... u1 ,...) == true,
1067  *          u1 == t1, u2 == t2 IMPLIES (t1 != t2)
1068  * To reliably reconstruct the explanation for (t1 != t2) later on,
1069  * we must store
1070  * - the composite involved (either (eq u1 u2) or (distinct ....))
1071  * - the terms involved, namely, u1, u2, t1, and t2.
1072  */
1073 typedef struct diseq_pre_expl_s {
1074   composite_t *hint;
1075   eterm_t t1, u1;
1076   eterm_t t2, u2;
1077 } diseq_pre_expl_t;
1078 
1079 
1080 // equality constraint in a theory explanation
1081 typedef struct th_eq_s {
1082   eterm_t lhs;
1083   eterm_t rhs;
1084 } th_eq_t;
1085 
1086 // explanation object = three vectors (with hidden headers)
1087 // see theory_explanations.h and theory_explanations.c
1088 typedef struct th_explanation_s {
1089   literal_t *atoms;
1090   th_eq_t *eqs;
1091   diseq_pre_expl_t *diseqs;
1092 } th_explanation_t;
1093 
1094 
1095 
1096 /*
1097  * GENERIC EGRAPH INTERFACE
1098  */
1099 typedef void (*assert_eq_fun_t)(void *satellite, thvar_t x1, thvar_t x2, int32_t id);
1100 typedef void (*assert_diseq_fun_t)(void *satellite, thvar_t x1, thvar_t x2, composite_t *hint);
1101 typedef bool (*assert_distinct_fun_t)(void *satellite, uint32_t n, thvar_t *a, composite_t *hint);
1102 typedef bool (*check_diseq_fun_t)(void *satellite, thvar_t x1, thvar_t x2);
1103 typedef bool (*is_constant_fun_t)(void *satellite, thvar_t x);
1104 typedef void (*expand_eq_exp_fun_t)(void *satellite, thvar_t x1, thvar_t x2, void *expl, th_explanation_t *result);
1105 typedef uint32_t (*reconcile_model_fun_t)(void *satellite, uint32_t max_eq);
1106 typedef void (*prepare_model_fun_t)(void *satellite);
1107 typedef bool (*equal_in_model_fun_t)(void *satellite, thvar_t x1, thvar_t x2);
1108 typedef void (*gen_inter_lemma_fun_t)(void *satellite, literal_t l, thvar_t x1, thvar_t x2, bool equiv);
1109 typedef void (*release_model_fun_t)(void *satellite);
1110 typedef ipart_t *(*build_partition_fun_t)(void *satellite);
1111 typedef void (*free_partition_fun_t)(void *satellite, ipart_t *partition);
1112 typedef void (*attach_to_var_fun_t)(void *satellite, thvar_t x, eterm_t t);
1113 typedef eterm_t (*get_eterm_fun_t)(void *satellite, thvar_t x);
1114 typedef literal_t (*select_eq_polarity_fun_t)(void *satellite, thvar_t x, thvar_t y, literal_t l);
1115 
1116 typedef struct th_egraph_interface_s {
1117   assert_eq_fun_t          assert_equality;
1118   assert_diseq_fun_t       assert_disequality;
1119   assert_distinct_fun_t    assert_distinct;
1120   check_diseq_fun_t        check_diseq;
1121   is_constant_fun_t        is_constant;
1122   expand_eq_exp_fun_t      expand_th_explanation;
1123   reconcile_model_fun_t    reconcile_model;
1124   prepare_model_fun_t      prepare_model;
1125   equal_in_model_fun_t     equal_in_model;
1126   gen_inter_lemma_fun_t    gen_interface_lemma;
1127   release_model_fun_t      release_model;
1128   build_partition_fun_t    build_model_partition;
1129   free_partition_fun_t     release_model_partition;
1130   attach_to_var_fun_t      attach_eterm;
1131   get_eterm_fun_t          eterm_of_var;
1132   select_eq_polarity_fun_t select_eq_polarity;
1133 } th_egraph_interface_t;
1134 
1135 
1136 
1137 
1138 /*
1139  * ARITHMETIC-SPECIFIC INTERFACE
1140  */
1141 typedef thvar_t (*make_arith_var_fun_t)(void *solver, bool is_int);
1142 typedef bool (*arith_val_fun_t)(void *arith_solver, thvar_t x, rational_t *v);
1143 
1144 typedef struct arith_egraph_interface_s {
1145   make_arith_var_fun_t  create_arith_var;
1146   arith_val_fun_t       value_in_model;
1147 } arith_egraph_interface_t;
1148 
1149 
1150 
1151 /*
1152  * BITVECTOR-SPECIFIC INTERFACE
1153  */
1154 typedef thvar_t (*make_bv_var_fun_t)(void *solver, uint32_t n);
1155 typedef bool (*bv_val_fun_t)(void *solver, thvar_t x, bvconstant_t *v);
1156 
1157 typedef struct bv_egraph_interface_s {
1158   make_bv_var_fun_t    create_bv_var;
1159   bv_val_fun_t         value_in_model;
1160 } bv_egraph_interface_t;
1161 
1162 
1163 
1164 /*
1165  * FUNCTION-SOLVER INTERFACE
1166  */
1167 typedef thvar_t (*make_fun_var_fun_t)(void *solver, type_t tau);
1168 typedef void (*fun_build_model_fun_t)(void *fun_solver, pstore_t *store);
1169 typedef map_t *(*fun_val_fun_t)(void *fun_solver, thvar_t x);
1170 typedef void (*fun_free_model_fun_t)(void *fun_solver);
1171 
1172 typedef struct fun_egraph_interface_s {
1173   make_fun_var_fun_t    create_fun_var;
1174   fun_build_model_fun_t build_model;
1175   fun_val_fun_t         value_in_model;
1176   fun_free_model_fun_t  free_model;
1177 } fun_egraph_interface_t;
1178 
1179 
1180 
1181 /***********
1182  *  MODEL  *
1183  **********/
1184 
1185 /*
1186  * Auxiliary structures used in model construction. We now use a
1187  * global fresh_value_maker to construct fresh values. For this to
1188  * work properly, we must assign a value to all classes of type tau
1189  * before attempting to create fresh values of type tau. To do this,
1190  * we sort the classes by rank when rank c = depth of c's type:
1191  * - all atomic types have depth 0
1192  * - a function or tuple type has positive depth equal to 1 + max depth
1193  *   of the types it depends on.
1194  * Then we assign values to classes in increasing order of type depth.
1195  * This works since fresh values of type tau are used to construct
1196  * arrays/functions of some function type that contains tau.
1197  *
1198  *
1199  * Model components:
1200  * - value = maps classes to concrete values (if c is not a root class
1201  *   then value[c] = null_value).
1202  * - root_classes = vector of all root classes
1203  * - rank_ctr = vector of counters for sorting the root classes by rank
1204  * - pstore = auxiliary particle store used by the function solver
1205  * - fval_maker = data structure to create fresh values
1206  * - internal buffers for rational and bitvector values
1207  *
1208  */
1209 typedef struct egraph_model_s {
1210   value_t *value;
1211   pstore_t *pstore;
1212   fresh_val_maker_t *fval_maker;
1213   ivector_t root_classes;
1214   ivector_t rank_ctr;
1215   rational_t arith_buffer;
1216   bvconstant_t bv_buffer;
1217 } egraph_model_t;
1218 
1219 
1220 
1221 
1222 
1223 
1224 /******************
1225  *  CACHED DATA   *
1226  *****************/
1227 
1228 /*
1229  * Lemmas can be added to the core by the egraph:
1230  *
1231  * 1) Expansion of a (distinct ...) atom. When
1232  *   "not (distinct t1 ... t_n)" is asserted, then the clause
1233  *   "(distinct t1 ... t_n) or (eq t1 t2) or (eq t1 t3) ... or (eq t_n-1 t_n)"
1234  *   is added to the core
1235  *
1236  * 2) Ackermann clause. A heuristic may create clauses of the form
1237  *   "(eq t1 u1) and (eq t2 u2) ... (eq t_n u_n) implies
1238  *           (eq (f t_1 ... t_n) (f u_1 ... u_n))"
1239  *
1240  * An internal cache stores data about which lemmas have been created (to
1241  * prevents multiple generation of the same lemma).
1242  * - for a distinct-expansion, the cache stores <DISTINCT_LEMMA, u> where u = term id of
1243  *   the (distinct ...) term
1244  * - for an Ackermann clause, the cache stores <ACKERMANN_LEMMA, u, v> where u and v are
1245  *   the term ids of (f t_1 ... t_n) and (f u_1 ... u_n), respectively.
1246  */
1247 typedef enum elemma_tag {
1248   DISTINCT_LEMMA = 0,
1249   ACKERMANN_LEMMA = 1,
1250 } elemma_tag_t;
1251 
1252 
1253 
1254 /****************
1255  *  STATISTICS  *
1256  ***************/
1257 
1258 // search statistics
1259 typedef struct egraph_stats_s {
1260   uint32_t app_reductions;
1261 
1262   uint32_t eq_props;         // equalities propagated by satellite solvers (simplex)
1263   uint32_t th_props;         // propagations from egraph to core
1264   uint32_t th_conflicts;     // conflicts detected by egraph
1265   uint32_t nd_lemmas;        // number of non-distinct lemmas
1266 
1267   // counters related to ackermann clauses
1268   uint32_t aux_eqs;          // number of equality terms created
1269   uint32_t boolack_lemmas;   // number of boolean ackermann instances created
1270   uint32_t ack_lemmas;       // number of non-boolean ackermann instances created
1271 
1272   // statistics on interface equalities
1273   uint32_t final_checks;     // number of calls to final check
1274   uint32_t interface_eqs;    // number of interface equalities generated
1275 
1276 } egraph_stats_t;
1277 
1278 
1279 
1280 /**************
1281  *   EGRAPH   *
1282  *************/
1283 
1284 typedef struct egraph_s egraph_t;
1285 
1286 struct egraph_s {
1287   /*
1288    * Attached smt_core + type table
1289    */
1290   smt_core_t *core;
1291   type_table_t *types;
1292 
1293   /*
1294    * base_level and decision_level mirror the same counters
1295    * in the attached core.
1296    * - base_level keeps track of the number of calls to push
1297    * - decision_level is incremented during the search, decremented
1298    *   during backtracking
1299    */
1300   uint32_t base_level;
1301   uint32_t decision_level;
1302 
1303   /*
1304    * Presearch flag: set on call to start_internalization
1305    * Cleared on call to start_search.
1306    */
1307   bool presearch;
1308 
1309   /*
1310    * Number of (distinct ...) terms allocated
1311    */
1312   uint32_t ndistincts;
1313 
1314   /*
1315    * Number of atoms
1316    */
1317   uint32_t natoms;
1318 
1319   /*
1320    * True if the egraph contains high-order terms
1321    */
1322   bool is_high_order;
1323 
1324   /*
1325    * Statistics
1326    */
1327   egraph_stats_t stats;
1328 
1329   /*
1330    * Option flag and search parameters
1331    * each option is a single bit in the option flag
1332    */
1333   uint32_t options;
1334 
1335   /*
1336    * Limits on ackermann clause generation
1337    * - max_ackermann = bound on the number of non-boolean Ackermann lemmas
1338    * - max_boolackermann = bound on the number of boolean Ackermann lemmas
1339    * - aux_eq_quota = bound on the number of auxiliary equalities created
1340    *   by Ackermann lemmas
1341    */
1342   uint32_t max_ackermann;
1343   uint32_t max_boolackermann;
1344   uint32_t aux_eq_quota;
1345 
1346   /*
1347    * Thresholds to trigger the generation of Ackermann/Boolean Ackermann
1348    * lemmas: when a candidate pair (t1 == t2) is selected, a counter
1349    * is increased. When that counter reaches the threshold, a lemma
1350    * is generated.
1351    */
1352   uint16_t ackermann_threshold;
1353   uint16_t boolack_threshold;
1354 
1355   /*
1356    * Two candidates for the next Ackermann lemma:
1357    * when the egraph detects a conflict while processing (t1 == t2)
1358    * then it stores t1 in ack_left and t2 in ack_right if
1359    * (t1 == t2) was propagated by BASIC_CONGRUENCE.
1360    */
1361   occ_t ack_left, ack_right;
1362 
1363   /*
1364    * Limit on the number of interface equalities created
1365    * in each call to final_check
1366    */
1367   uint32_t max_interface_eqs;
1368 
1369   /*
1370    * Main components
1371    */
1372   class_table_t classes;
1373   eterm_table_t terms;
1374   egraph_stack_t stack;
1375   undo_stack_t undo;
1376   distinct_table_t dtable;
1377   congruence_table_t ctable;
1378   ltag_table_t tag_table;
1379 
1380   update_graph_t *update_graph; // optional
1381 
1382   /*
1383    * Push/pop stack
1384    */
1385   egraph_trail_stack_t trail_stack;
1386 
1387   /*
1388    * Auxiliary buffers and structures
1389    */
1390   int_htbl_t *const_htbl;     // for hash-consing of constants (allocated on demand)
1391   int_htbl_t htbl;            // for hash-consing of composite terms
1392   object_store_t atom_store;  // for creating atoms
1393   cache_t cache;              // for creating lemmas
1394 
1395   int_hmap_t *imap;           // for or-congruence explanations
1396   signature_t sgn;            // auxiliary buffer for congruence closure
1397   arena_t arena;              // stack-based allocation
1398   ivector_t expl_queue;       // vector used as a queue of edges (explanation queue)
1399   ivector_t expl_vector;      // vector of literals for conflict/explanations
1400   pvector_t cmp_vector;       // generic vector to store composites
1401   ivector_t aux_buffer;       // generic buffer used in term construction
1402   int_stack_t istack;         // generic stack for recursive processing
1403 
1404 
1405   /*
1406    * Experimental: attempt to produce better equality explanation
1407    * - when the egraph knows (t1 == t2) it can propagate a literal l := true
1408    *   where l is attached to (eq t1 t2).
1409    * - by default, we explore the egraph merge trees to construct an
1410    *   explanation for (t1 == t2)
1411    * - if short_cuts is true, we try to just use l as the explanation for (t1 == t2),
1412    *   but we have to make sure this does not introduce circularities.
1413    * - top_id is intended to prevent circular explanation
1414    */
1415   bool short_cuts;            // enable/disable short cuts in explanations
1416   int32_t top_id;             // used when building explanations
1417 
1418   /*
1419    * Support for model reconciliation
1420    */
1421   ivector_t interface_eqs;    // pairs of term occurrences (for interface lemmas)
1422   uint32_t reconcile_top;     // top of the undo stack when reconcile started
1423   uint32_t reconcile_neqs;    // number of equalities when reconcile started
1424   bool reconcile_mode;        // true if the egraph has some edges for model reconciliation
1425 
1426 
1427   /*
1428    * Support for on-the-fly creation of composite terms.
1429    * Composites created at decision_level > base_level
1430    * may need to be reactivated after backtracking. They
1431    * are stored in reanalyze_vector.
1432    */
1433   pvector_t reanalyze_vector;
1434 
1435   /*
1436    * Theory explanation object: for building explanation of equalities
1437    * propagated by a subsolver
1438    */
1439   th_explanation_t th_expl;
1440 
1441   /*
1442    * Helper for the array theory solver
1443    * allocated on demand
1444    */
1445   ppart_t *app_partition;
1446 
1447   /*
1448    * Satellite solvers and interface descriptors
1449    *
1450    * Generic:
1451    * - th[i] = solver for theory i
1452    * - ctrl[i] = its control interface
1453    * - eg[i] = its egraph interface
1454    *
1455    * Theory specific descriptors
1456    * - arith_smt:   core interface for arith solver
1457    * - bv_smt:      core interface for bitvector solver
1458    * - arith_eg: egraph interface for arith solver
1459    * - bv_eg:    egraph interface for the bitvector solver
1460    * - fun_eg:   egraph interface for the array/function solver
1461    */
1462   void *th[NUM_SATELLITES];
1463   th_ctrl_interface_t *ctrl[NUM_SATELLITES];
1464   th_egraph_interface_t *eg[NUM_SATELLITES];
1465 
1466   th_smt_interface_t *arith_smt;
1467   th_smt_interface_t *bv_smt;
1468 
1469   arith_egraph_interface_t *arith_eg;
1470   bv_egraph_interface_t  *bv_eg;
1471   fun_egraph_interface_t *fun_eg;
1472 
1473 
1474   /*
1475    * Model structure
1476    */
1477   egraph_model_t mdl;
1478 };
1479 
1480 
1481 #define DEFAULT_EXPL_VECTOR_SIZE  20
1482 #define DEFAULT_CMP_VECTOR_SIZE   20
1483 
1484 
1485 /*
1486  * Option flags:
1487  * - each mask selects a bit in the option word
1488  * - bit == 1 means option enabled, 0 means option disabled
1489  *
1490  * DYNAMIC_ACKERMANN enables generation of ackermann lemmas for non-boolean terms.
1491  * If it's enabled, max_ackermann is a bound on the number of lemmas generated.
1492  *
1493  * DYNAMIC_BOOLACKERMANN enables the generation of ackermann lemmas for boolean terms.
1494  * If that's enabled, max_boolackermann is a bound on the number of lemmas generated.
1495  *
1496  * OPTIMISTIC_FCHECK selects the experimental version of final_check instead of the
1497  * baseline version.
1498  *
1499  * In addition, aux_eq_quota is a bound on the total number of new equalities allowed
1500  * for ackermann lemmas.
1501  *
1502  * MAX_INTERFACE_EQS is a bound on the number of interface equalities created
1503  * in each call to final_check.
1504  */
1505 #define EGRAPH_DYNAMIC_ACKERMANN       0x1
1506 #define EGRAPH_DYNAMIC_BOOLACKERMANN   0x2
1507 #define EGRAPH_OPTIMISTIC_FCHECK       0x4
1508 #define EGRAPH_DISABLE_ALL_OPTIONS     0x0
1509 
1510 #define DEFAULT_MAX_ACKERMANN         1000
1511 #define DEFAULT_MAX_BOOLACKERMANN     600000 // unlimited
1512 #define DEFAULT_AUX_EQ_QUOTA          100
1513 
1514 #define DEFAULT_ACKERMANN_THRESHOLD   8
1515 #define DEFAULT_BOOLACK_THRESHOLD     8
1516 
1517 #define DEFAULT_MAX_INTERFACE_EQS     200
1518 
1519 
1520 //  disable boolean and non-boolean ackermann
1521 #define EGRAPH_DEFAULT_OPTIONS  EGRAPH_DISABLE_ALL_OPTIONS
1522 
1523 
1524 
1525 #endif /* __EGRAPH_TYPES_H */
1526