1 /*********************                                                        */
2 /*! \file sygus_pbe.h
3  ** \verbatim
4  ** Top contributors (to current version):
5  **   Andrew Reynolds, Haniel Barbosa
6  ** This file is part of the CVC4 project.
7  ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8  ** in the top-level source directory) and their institutional affiliations.
9  ** All rights reserved.  See the file COPYING in the top-level source
10  ** directory for licensing information.\endverbatim
11  **
12  ** \brief utility for processing programming by examples synthesis conjectures
13  **/
14 
15 #include "cvc4_private.h"
16 
17 #ifndef __CVC4__THEORY__QUANTIFIERS__SYGUS_PBE_H
18 #define __CVC4__THEORY__QUANTIFIERS__SYGUS_PBE_H
19 
20 #include "context/cdhashmap.h"
21 #include "theory/quantifiers/sygus/sygus_module.h"
22 #include "theory/quantifiers/sygus/sygus_unif_io.h"
23 
24 namespace CVC4 {
25 namespace theory {
26 namespace quantifiers {
27 
28 class SynthConjecture;
29 
30 /** SygusPbe
31  *
32  * This class implements optimizations that target synthesis conjectures
33  * that are in Programming-By-Examples (PBE) form.
34  *
35  * [EX#1] An example of a synthesis conjecture in PBE form is :
36  * exists f. forall x.
37  * ( x = 0 => f( x ) = 2 ) ^ ( x = 5 => f( x ) = 7 ) ^ ( x = 6 => f( x ) = 8 )
38  *
39  * We say that the above conjecture has I/O examples (0)->2, (5)->7, (6)->8.
40  *
41  * Internally, this class does the following for SyGuS inputs:
42  *
43  * (1) Infers whether the input conjecture is in PBE form or not.
44  * (2) Based on this information and on the syntactic restrictions, it
45  *     devises a strategy for enumerating terms and construction solutions,
46  *     which is inspired by Alur et al. "Scaling Enumerative Program Synthesis
47  *     via Divide and Conquer" TACAS 2017. In particular, it may consider
48  *     strategies for constructing decision trees when the grammar permits ITEs
49  *     and a strategy for divide-and-conquer string synthesis when the grammar
50  *     permits string concatenation. This is managed through the SygusUnif
51  *     utilities, d_sygus_unif.
52  * (3) It makes (possibly multiple) calls to
53  *     TermDatabaseSygus::regstierEnumerator(...) based
54  *     on the strategy, which inform the rest of the system to enumerate values
55  *     of particular types in the grammar through use of fresh variables which
56  *     we call "enumerators".
57  *
58  * Points (1)-(3) happen within a call to SygusPbe::initialize(...).
59  *
60  * Notice that each enumerator is associated with a single
61  * function-to-synthesize, but a function-to-sythesize may be mapped to multiple
62  * enumerators. Some public functions of this class expect an enumerator as
63  * input, which we map to a function-to-synthesize via
64  * TermDatabaseSygus::getSynthFunFor(e).
65  *
66  * An enumerator is initially "active" but may become inactive if the
67  * enumeration exhausts all possible values in the datatype corresponding to
68  * syntactic restrictions for it. The search may continue unless all enumerators
69  * become inactive.
70  *
71  * (4) During search, the extension of quantifier-free datatypes procedure for
72  *     SyGuS datatypes may ask this class whether current candidates can be
73  *     discarded based on inferring when two candidate solutions are equivalent
74  *     up to examples. For example, the candidate solutions:
75  *     f = \x ite( x < 0, x+1, x ) and f = \x x
76  *     are equivalent up to examples on the above conjecture, since they have
77  * the same value on the points x = 0,5,6. Hence, we need only consider one of
78  *     them. The interface for querying this is
79  *       SygusPbe::addSearchVal(...).
80  *     For details, see Reynolds et al. SYNT 2017.
81  *
82  * (5) When the extension of quantifier-free datatypes procedure for SyGuS
83  *     datatypes terminates with a model, the parent of this class calls
84  *     SygusPbe::getCandidateList(...), where this class returns the list
85  *     of active enumerators.
86  * (6) The parent class subsequently calls
87  *     SygusPbe::constructValues(...), which informs this class that new
88  *     values have been enumerated for active enumerators, as indicated by the
89  *     current model. This call also requests that based on these
90  *     newly enumerated values, whether this class is now able to construct a
91  *     solution based on the high-level strategy (stored in d_sygus_unif).
92  *
93  * This class is not designed to work in incremental mode, since there is no way
94  * to specify incremental problems in SyguS.
95  */
96 class SygusPbe : public SygusModule
97 {
98  public:
99   SygusPbe(QuantifiersEngine* qe, SynthConjecture* p);
100   ~SygusPbe();
101 
102   /** initialize this class
103   *
104   * This function may add lemmas to the vector lemmas corresponding
105   * to initial lemmas regarding static analysis of enumerators it
106   * introduced. For example, we may say that the top-level symbol
107   * of an enumerator is not ITE if it is being used to construct
108   * return values for decision trees.
109   */
110   bool initialize(Node n,
111                   const std::vector<Node>& candidates,
112                   std::vector<Node>& lemmas) override;
113   /** get term list
114    *
115   * Adds all active enumerators associated with functions-to-synthesize in
116   * candidates to terms.
117   */
118   void getTermList(const std::vector<Node>& candidates,
119                    std::vector<Node>& terms) override;
120   /**
121    * PBE allows partial models to handle multiple enumerators if we are not
122    * using a strictly fair enumeration strategy.
123    */
124   bool allowPartialModel() override;
125   /** construct candidates
126    *
127    * This function attempts to use unification-based approaches for constructing
128    * solutions for all functions-to-synthesize (indicated by candidates). These
129    * approaches include decision tree learning and a divide-and-conquer
130    * algorithm based on string concatenation.
131    *
132    * Calls to this function are such that terms is the list of active
133    * enumerators (returned by getTermList), and term_values are their current
134    * model values. This function registers { terms -> terms_values } in
135    * the database of values that have been enumerated, which are in turn used
136    * for constructing candidate solutions when possible.
137    *
138    * This function also excludes models where (terms = terms_values) by adding
139    * blocking clauses to lems. For example, for grammar:
140    *   A -> A+A | x | 1 | 0
141    * and a call where terms = { d } and term_values = { +( x, 1 ) }, it adds:
142    *   ~G V ~is_+( d ) V ~is_x( d.1 ) V ~is_1( d.2 )
143    * to lems, where G is active guard of the enumerator d (see
144    * TermDatabaseSygus::getActiveGuardForEnumerator). This blocking clause
145    * indicates that d should not be given the model value +( x, 1 ) anymore,
146    * since { d -> +( x, 1 ) } has now been added to the database of this class.
147    */
148   bool constructCandidates(const std::vector<Node>& terms,
149                            const std::vector<Node>& term_values,
150                            const std::vector<Node>& candidates,
151                            std::vector<Node>& candidate_values,
152                            std::vector<Node>& lems) override;
153   /** is PBE enabled for any enumerator? */
isPbe()154   bool isPbe() { return d_is_pbe; }
155   /** is the enumerator e associated with I/O example pairs? */
156   bool hasExamples(Node e);
157   /** get number of I/O example pairs for enumerator e */
158   unsigned getNumExamples(Node e);
159   /** get the input arguments for i^th I/O example for e, which is added to the
160    * vector ex */
161   void getExample(Node e, unsigned i, std::vector<Node>& ex);
162   /** get the output value of the i^th I/O example for enumerator e */
163   Node getExampleOut(Node e, unsigned i);
164 
165   /** add the search val
166   * This function is called by the extension of quantifier-free datatypes
167   * procedure for SyGuS datatypes when we are considering a value of
168   * enumerator e of sygus type tn whose analog in the signature of builtin
169   * theory is bvr.
170   *
171   * For example, bvr = x + 1 when e is the datatype value Plus( x(), One() ) and
172   * tn is a sygus datatype that encodes a subsignature of the integers.
173   *
174   * This returns either:
175   * - A SyGuS term whose analog is equivalent to bvr up to examples
176   *   In the above example,
177   *   it may return a term t of the form Plus( One(), x() ), such that this
178   *   function was previously called with t as input.
179   * - e, indicating that no previous terms are equivalent to e up to examples.
180   */
181   Node addSearchVal(TypeNode tn, Node e, Node bvr);
182   /** evaluate builtin
183   * This returns the evaluation of bn on the i^th example for the
184   * function-to-synthesis
185   * associated with enumerator e. If there are not at least i examples, it
186   * returns the rewritten form of bn.
187   * For example, if bn = x+5, e is an enumerator for f in the above example
188   * [EX#1], then
189   *   evaluateBuiltin( tn, bn, e, 0 ) = 7
190   *   evaluateBuiltin( tn, bn, e, 1 ) = 9
191   *   evaluateBuiltin( tn, bn, e, 2 ) = 10
192   */
193   Node evaluateBuiltin(TypeNode tn, Node bn, Node e, unsigned i);
194 
195  private:
196   /** true and false nodes */
197   Node d_true;
198   Node d_false;
199   /** is this a PBE conjecture for any function? */
200   bool d_is_pbe;
201   /** for each candidate variable f (a function-to-synthesize), whether the
202   * conjecture is purely PBE for that variable
203   * In other words, all occurrences of f are guarded by equalities that
204   * constraint its arguments to constants.
205   */
206   std::map<Node, bool> d_examples_invalid;
207   /** for each candidate variable (function-to-synthesize), whether the
208   * conjecture is purely PBE for that variable.
209   * An example of a conjecture for which d_examples_invalid is false but
210   * d_examples_out_invalid is true is:
211   *   exists f. forall x. ( x = 0 => f( x ) > 2 )
212   * another example is:
213   *   exists f. forall x. ( ( x = 0 => f( x ) = 2 ) V ( x = 3 => f( x ) = 3 ) )
214   * since the formula is not a conjunction (the example values are not
215   * entailed).
216   * However, the domain of f in both cases is finite, which can be used for
217   * search space pruning.
218   */
219   std::map<Node, bool> d_examples_out_invalid;
220   /**
221    * Map from candidates to sygus unif utility. This class implements
222    * the core algorithm (e.g. decision tree learning) that this module relies
223    * upon.
224    */
225   std::map<Node, SygusUnifIo> d_sygus_unif;
226   /**
227    * map from candidates to the list of enumerators that are being used to
228    * build solutions for that candidate by the above utility.
229    */
230   std::map<Node, std::vector<Node> > d_candidate_to_enum;
231   /** reverse map of above */
232   std::map<Node, Node> d_enum_to_candidate;
233   /** for each candidate variable (function-to-synthesize), input of I/O
234    * examples */
235   std::map<Node, std::vector<std::vector<Node> > > d_examples;
236   /** for each candidate variable (function-to-synthesize), output of I/O
237    * examples */
238   std::map<Node, std::vector<Node> > d_examples_out;
239   /** the list of example terms
240    * For the example [EX#1] above, this is f( 0 ), f( 5 ), f( 6 )
241    */
242   std::map<Node, std::vector<Node> > d_examples_term;
243   /**
244    * Map from example input terms to their output, for example [EX#1] above,
245    * this is { f( 0 ) -> 2, f( 5 ) -> 7, f( 6 ) -> 8 }.
246    */
247   std::map<Node, Node> d_exampleTermMap;
248   /** collect the PBE examples in n
249    * This is called on the input conjecture, and will populate the above
250    * vectors, where hasPol/pol denote the polarity of n in the conjecture. This
251    * function returns false if it finds two examples that are contradictory.
252    */
253   bool collectExamples(Node n,
254                        std::map<Node, bool>& visited,
255                        bool hasPol,
256                        bool pol);
257 
258   //--------------------------------- PBE search values
259   /**
260    * This class is an index of candidate solutions for PBE synthesis and their
261    * (concrete) evaluation on the set of input examples. For example, if the
262    * set of input examples for (x,y) is (0,1), (1,3), then:
263    *   term x is indexed by 0,1
264    *   term x+y is indexed by 1,4
265    *   term 0 is indexed by 0,0.
266    */
267   class PbeTrie
268   {
269    public:
PbeTrie()270     PbeTrie() {}
~PbeTrie()271     ~PbeTrie() {}
272     /** the children for this node in the trie */
273     std::map<Node, PbeTrie> d_children;
274     /** clear this trie */
clear()275     void clear() { d_children.clear(); }
276     /**
277      * Add term b whose value on examples is exOut to the trie. Return
278      * the first term registered to this trie whose evaluation was exOut.
279      */
280     Node addTerm(Node b, const std::vector<Node>& exOut);
281   };
282   /** trie of candidate solutions tried
283   * This stores information for each (enumerator, type),
284   * where type is a type in the grammar of the space of solutions for a subterm
285   * of e. This is used for symmetry breaking in quantifier-free reasoning
286   * about SyGuS datatypes.
287   */
288   std::map<Node, std::map<TypeNode, PbeTrie> > d_pbe_trie;
289   //--------------------------------- end PBE search values
290 };
291 
292 } /* namespace CVC4::theory::quantifiers */
293 } /* namespace CVC4::theory */
294 } /* namespace CVC4 */
295 
296 #endif
297