1 /*++
2 Copyright (c) 2015 Microsoft Corporation
3 
4 Module Name:
5 
6     seq_rewriter.h
7 
8 Abstract:
9 
10     Basic rewriting rules for sequences constraints.
11 
12 Author:
13 
14     Nikolaj Bjorner (nbjorner) 2015-12-5
15 
16 Notes:
17 
18 --*/
19 #pragma once
20 
21 #include "ast/seq_decl_plugin.h"
22 #include "ast/ast_pp.h"
23 #include "ast/arith_decl_plugin.h"
24 #include "ast/rewriter/rewriter_types.h"
25 #include "ast/rewriter/bool_rewriter.h"
26 #include "util/params.h"
27 #include "util/lbool.h"
28 #include "util/sign.h"
29 #include "math/automata/automaton.h"
30 #include "math/automata/symbolic_automata.h"
31 
32 
33 inline std::ostream& operator<<(std::ostream& out, expr_ref_pair_vector const& es) {
34     for (auto const& p : es) {
35         out << expr_ref(p.first, es.get_manager()) << "; " << expr_ref(p.second, es.get_manager()) << "\n";
36     }
37     return out;
38 }
39 
40 class sym_expr {
41     enum ty {
42         t_char,
43         t_pred,
44         t_not,
45         t_range
46     };
47     ty        m_ty;
48     sort*     m_sort;
49     sym_expr* m_expr;
50     expr_ref  m_t;
51     expr_ref  m_s;
52     unsigned  m_ref;
sym_expr(ty ty,expr_ref & t,expr_ref & s,sort * srt,sym_expr * e)53     sym_expr(ty ty, expr_ref& t, expr_ref& s, sort* srt, sym_expr* e) :
54         m_ty(ty), m_sort(srt), m_expr(e), m_t(t), m_s(s), m_ref(0) {}
55 public:
~sym_expr()56     ~sym_expr() { if (m_expr) m_expr->dec_ref(); }
57     expr_ref accept(expr* e);
mk_char(expr_ref & t)58     static sym_expr* mk_char(expr_ref& t) { return alloc(sym_expr, t_char, t, t, t->get_sort(), nullptr); }
mk_char(ast_manager & m,expr * t)59     static sym_expr* mk_char(ast_manager& m, expr* t) { expr_ref tr(t, m); return mk_char(tr); }
mk_pred(expr_ref & t,sort * s)60     static sym_expr* mk_pred(expr_ref& t, sort* s) { return alloc(sym_expr, t_pred, t, t, s, nullptr); }
mk_range(expr_ref & lo,expr_ref & hi)61     static sym_expr* mk_range(expr_ref& lo, expr_ref& hi) { return alloc(sym_expr, t_range, lo, hi, hi->get_sort(), nullptr); }
mk_not(ast_manager & m,sym_expr * e)62     static sym_expr* mk_not(ast_manager& m, sym_expr* e) { expr_ref f(m); e->inc_ref(); return alloc(sym_expr, t_not, f, f, e->get_sort(), e); }
inc_ref()63     void inc_ref() { ++m_ref;  }
dec_ref()64     void dec_ref() { --m_ref; if (m_ref == 0) dealloc(this); }
65     std::ostream& display(std::ostream& out) const;
is_char()66     bool is_char() const { return m_ty == t_char; }
is_pred()67     bool is_pred() const { return !is_char(); }
is_range()68     bool is_range() const { return m_ty == t_range; }
is_not()69     bool is_not() const { return m_ty == t_not; }
get_sort()70     sort* get_sort() const { return m_sort; }
get_char()71     expr* get_char() const { SASSERT(is_char()); return m_t; }
get_pred()72     expr* get_pred() const { SASSERT(is_pred()); return m_t; }
get_lo()73     expr* get_lo() const { SASSERT(is_range()); return m_t; }
get_hi()74     expr* get_hi() const { SASSERT(is_range()); return m_s; }
get_arg()75     sym_expr* get_arg() const { SASSERT(is_not()); return m_expr; }
76 };
77 
78 class sym_expr_manager {
79 public:
inc_ref(sym_expr * s)80     void inc_ref(sym_expr* s) { if (s) s->inc_ref(); }
dec_ref(sym_expr * s)81     void dec_ref(sym_expr* s) { if (s) s->dec_ref(); }
82 };
83 
84 class expr_solver {
85 public:
~expr_solver()86     virtual ~expr_solver() {}
87     virtual lbool check_sat(expr* e) = 0;
88 };
89 
90 typedef automaton<sym_expr, sym_expr_manager> eautomaton;
91 class re2automaton {
92     typedef boolean_algebra<sym_expr*> boolean_algebra_t;
93     typedef symbolic_automata<sym_expr, sym_expr_manager> symbolic_automata_t;
94     ast_manager& m;
95     sym_expr_manager sm;
96     seq_util     u;
97     scoped_ptr<expr_solver>         m_solver;
98     scoped_ptr<boolean_algebra_t>   m_ba;
99     scoped_ptr<symbolic_automata_t> m_sa;
100 
101     bool is_unit_char(expr* e, expr_ref& ch);
102     eautomaton* re2aut(expr* e);
103     eautomaton* seq2aut(expr* e);
104 public:
105     re2automaton(ast_manager& m);
106     eautomaton* operator()(expr* e);
107     void set_solver(expr_solver* solver);
has_solver()108     bool has_solver() const { return m_solver; }
109     eautomaton* mk_product(eautomaton *a1, eautomaton *a2);
110 };
111 
112 /**
113    \brief Cheap rewrite rules for seq constraints
114 */
115 class seq_rewriter {
116 
117     class op_cache {
118         struct op_entry {
119             decl_kind k;
120             expr* a, *b, *c, *r;
op_entryop_entry121             op_entry(decl_kind k, expr* a, expr* b, expr* c, expr* r): k(k), a(a), b(b), c(c), r(r) {}
op_entryop_entry122             op_entry():k(0), a(nullptr), b(nullptr), c(nullptr), r(nullptr) {}
123         };
124 
125         struct hash_entry {
operatorhash_entry126             unsigned operator()(op_entry const& e) const {
127                 return combine_hash(mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0), e.c ? e.c->get_id() : 0);
128             }
129         };
130 
131         struct eq_entry {
operatoreq_entry132             bool operator()(op_entry const& a, op_entry const& b) const {
133                 return a.k == b.k && a.a == b.a && a.b == b.b && a.c == b.c;
134             }
135         };
136 
137         typedef hashtable<op_entry, hash_entry, eq_entry> op_table;
138 
139         unsigned        m_max_cache_size { 10000 };
140         expr_ref_vector m_trail;
141         op_table        m_table;
142         void cleanup();
143 
144     public:
145         op_cache(ast_manager& m);
146         expr* find(decl_kind op, expr* a, expr* b, expr* c);
147         void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
148     };
149 
150     seq_util       m_util;
151     arith_util     m_autil;
152     bool_rewriter  m_br;
153     re2automaton   m_re2aut;
154     op_cache       m_op_cache;
155     expr_ref_vector m_es, m_lhs, m_rhs;
156     bool           m_coalesce_chars;
157 
158     enum length_comparison {
159         shorter_c,
160         longer_c,
161         same_length_c,
162         unknown_c
163     };
164 
165 
166 
compare_lengths(expr_ref_vector const & as,expr_ref_vector const & bs)167     length_comparison compare_lengths(expr_ref_vector const& as, expr_ref_vector const& bs) {
168         return compare_lengths(as.size(), as.data(), bs.size(), bs.data());
169     }
170     length_comparison compare_lengths(unsigned sza, expr* const* as, unsigned szb, expr* const* bs);
171 
172 
173     bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail);
174     bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
175     bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail);
176     bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
177 
178     expr_ref re_and(expr* cond, expr* r);
179     expr_ref re_predicate(expr* cond, sort* seq_sort);
180 
181     expr_ref mk_seq_concat(expr* a, expr* b);
182 
183     // Construct the expressions for taking the first element, the last element, the rest, and the butlast element
184     expr_ref mk_seq_first(expr* s);
185     expr_ref mk_seq_rest(expr* s);
186     expr_ref mk_seq_last(expr* s);
187     expr_ref mk_seq_butlast(expr* s);
188 
189     bool try_get_unit_values(expr* s, expr_ref_vector& result);
190     //replace b in a by c into result
191     void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result);
192 
193     // Calculate derivative, memoized and enforcing a normal form
194     expr_ref is_nullable_rec(expr* r);
195     expr_ref mk_derivative_rec(expr* ele, expr* r);
196     expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
197     expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);
198     expr_ref mk_der_concat(expr* a, expr* b);
199     expr_ref mk_der_union(expr* a, expr* b);
200     expr_ref mk_der_inter(expr* a, expr* b);
201     expr_ref mk_der_compl(expr* a);
202     expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
203     expr_ref mk_der_antimorov_union(expr* r1, expr* r2);
204     bool ite_bdds_compatabile(expr* a, expr* b);
205     /* if r has the form deriv(en..deriv(e1,to_re(s))..) returns 's = [e1..en]' else returns '() in r'*/
206     expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
207     #ifdef Z3DEBUG
208     bool check_deriv_normal_form(expr* r, int level = 3);
209     #endif
210 
211     void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result);
212 
213     expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path);
214     expr_ref mk_in_antimirov_rec(expr* s, expr* d);
215     expr_ref mk_in_antimirov(expr* s, expr* d);
216 
217     expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path);
218     expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
219     expr_ref mk_antimirov_deriv_negate(expr* d);
220     expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
221     expr_ref mk_regex_reverse(expr* r);
222     expr_ref mk_regex_concat(expr* r1, expr* r2);
223 
224     expr_ref simplify_path(expr* path);
225 
226     bool lt_char(expr* ch1, expr* ch2);
227     bool eq_char(expr* ch1, expr* ch2);
228     bool neq_char(expr* ch1, expr* ch2);
229     bool le_char(expr* ch1, expr* ch2);
230     bool pred_implies(expr* a, expr* b);
231     bool are_complements(expr* r1, expr* r2) const;
232     bool is_subset(expr* r1, expr* r2) const;
233 
234     br_status mk_seq_unit(expr* e, expr_ref& result);
235     br_status mk_seq_concat(expr* a, expr* b, expr_ref& result);
236     br_status mk_seq_length(expr* a, expr_ref& result);
237     expr_ref mk_len(rational const& offset, expr_ref_vector const& xs);
238     bool extract_pop_suffix(expr_ref_vector const& as, expr* b, expr* c, expr_ref& result);
239     bool extract_push_offset(expr_ref_vector const& as, expr* b, expr* c, expr_ref& result);
240     bool extract_push_length(expr_ref_vector& as, expr* b, expr* c, expr_ref& result);
241     br_status mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& result);
242     br_status mk_seq_contains(expr* a, expr* b, expr_ref& result);
243     br_status mk_seq_at(expr* a, expr* b, expr_ref& result);
244     br_status mk_seq_nth(expr* a, expr* b, expr_ref& result);
245     br_status mk_seq_nth_i(expr* a, expr* b, expr_ref& result);
246     br_status mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result);
247     br_status mk_seq_last_index(expr* a, expr* b, expr_ref& result);
248     br_status mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result);
249     br_status mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref& result);
250     br_status mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result);
251     br_status mk_seq_replace_re(expr* a, expr* b, expr* c, expr_ref& result);
252     br_status mk_seq_prefix(expr* a, expr* b, expr_ref& result);
253     br_status mk_seq_suffix(expr* a, expr* b, expr_ref& result);
254     br_status mk_str_units(func_decl* f, expr_ref& result);
255     br_status mk_str_itos(expr* a, expr_ref& result);
256     br_status mk_str_stoi(expr* a, expr_ref& result);
257     br_status mk_str_ubv2s(expr* a, expr_ref& result);
258     br_status mk_str_sbv2s(expr* a, expr_ref& result);
259     br_status mk_str_in_regexp(expr* a, expr* b, expr_ref& result);
260     br_status mk_str_to_regexp(expr* a, expr_ref& result);
261     br_status mk_str_le(expr* a, expr* b, expr_ref& result);
262     br_status mk_str_lt(expr* a, expr* b, expr_ref& result);
263     br_status mk_str_from_code(expr* a, expr_ref& result);
264     br_status mk_str_to_code(expr* a, expr_ref& result);
265     br_status mk_str_is_digit(expr* a, expr_ref& result);
266     br_status mk_re_concat(expr* a, expr* b, expr_ref& result);
267     br_status mk_re_union(expr* a, expr* b, expr_ref& result);
268     br_status mk_re_inter(expr* a, expr* b, expr_ref& result);
269     br_status mk_re_union0(expr* a, expr* b, expr_ref& result);
270     br_status mk_re_inter0(expr* a, expr* b, expr_ref& result);
271     br_status mk_re_complement(expr* a, expr_ref& result);
272     br_status mk_re_star(expr* a, expr_ref& result);
273     br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
274     br_status mk_re_plus(expr* a, expr_ref& result);
275     br_status mk_re_opt(expr* a, expr_ref& result);
276     br_status mk_re_power(func_decl* f, expr* a, expr_ref& result);
277     br_status mk_re_loop(func_decl* f, unsigned num_args, expr* const* args, expr_ref& result);
278     br_status mk_re_range(expr* lo, expr* hi, expr_ref& result);
279     br_status mk_re_reverse(expr* r, expr_ref& result);
280     br_status mk_re_derivative(expr* ele, expr* r, expr_ref& result);
281 
282     br_status lift_ites_throttled(func_decl* f, unsigned n, expr* const* args, expr_ref& result);
283     bool lift_ites_filter(func_decl* f, expr* ite);
284 
285     br_status reduce_re_eq(expr* a, expr* b, expr_ref& result);
286     br_status reduce_re_is_empty(expr* r, expr_ref& result);
287 
288     bool non_overlap(expr_ref_vector const& p1, expr_ref_vector const& p2) const;
289     bool non_overlap(zstring const& p1, zstring const& p2) const;
290     bool rewrite_contains_pattern(expr* a, expr* b, expr_ref& result);
291     bool has_fixed_length_constraint(expr* a, unsigned& len);
292     /* r = ite(c1,ite(c2,to_re(s),to_re(t)),to_re(u)) ==> returns true, result = ite(c1,ite(c2,s,t),u)*/
293     bool lift_str_from_to_re_ite(expr * r, expr_ref & result);
294     /* same as lift_to_re_from_ite and also: r = to_re(u) ==> returns true, result = u */
295     bool lift_str_from_to_re(expr * r, expr_ref & result);
296 
297     br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result);
298     br_status mk_eq_helper(expr* a, expr* b, expr_ref& result);
299 
300     bool cannot_contain_prefix(expr* a, expr* b);
301     bool cannot_contain_suffix(expr* a, expr* b);
zero()302     expr_ref zero() { return expr_ref(m_autil.mk_int(0), m()); }
one()303     expr_ref one() { return expr_ref(m_autil.mk_int(1), m()); }
minus_one()304     expr_ref minus_one() { return expr_ref(m_autil.mk_int(-1), m()); }
305     expr_ref mk_sub(expr* a, rational const& n);
mk_sub(expr * a,unsigned n)306     expr_ref mk_sub(expr* a, unsigned n) { return mk_sub(a, rational(n)); }
307 
308     bool is_suffix(expr* s, expr* offset, expr* len);
309     bool is_prefix(expr* s, expr* offset, expr* len);
310     bool sign_is_determined(expr* len, sign& s);
311 
312     bool set_empty(unsigned sz, expr* const* es, bool all, expr_ref_pair_vector& eqs);
313     bool reduce_non_overlap(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
314     bool reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
315     bool reduce_by_length(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
316     bool reduce_itos(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs);
317     bool reduce_eq_empty(expr* l, expr* r, expr_ref& result);
318     bool min_length(expr_ref_vector const& es, unsigned& len);
319     bool min_length(expr* e, unsigned& len);
320     bool max_length(expr* e, rational& len);
321     lbool eq_length(expr* x, expr* y);
322     expr* concat_non_empty(expr_ref_vector& es);
323     bool reduce_by_char(expr_ref& r, expr* ch, unsigned depth);
324 
325     bool is_string(unsigned n, expr* const* es, zstring& s) const;
326 
327     void add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
328     bool is_sequence(expr* e, expr_ref_vector& seq);
329     bool is_sequence(eautomaton& aut, expr_ref_vector& seq);
330     bool get_lengths(expr* e, expr_ref_vector& lens, rational& pos);
331     bool reduce_back(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs);
332     bool reduce_front(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs);
333     void remove_empty_and_concats(expr_ref_vector& es);
334     void remove_leading(unsigned n, expr_ref_vector& es);
335 
re()336     class seq_util::rex& re() { return u().re; }
re()337     class seq_util::rex const& re() const { return u().re; }
str()338     class seq_util::str& str() { return u().str; }
str()339     class seq_util::str const& str() const { return u().str; }
340 
341     void intersect(unsigned lo, unsigned hi, svector<std::pair<unsigned, unsigned>>& ranges);
342 
343 public:
344     seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
m_util(m)345         m_util(m), m_autil(m), m_br(m), m_re2aut(m), m_op_cache(m), m_es(m),
346         m_lhs(m), m_rhs(m), m_coalesce_chars(true) {
347     }
m()348     ast_manager & m() const { return m_util.get_manager(); }
get_fid()349     family_id get_fid() const { return m_util.get_family_id(); }
u()350     seq_util const& u() const { return m_util; }
u()351     seq_util& u() { return m_util; }
352 
353     void updt_params(params_ref const & p);
354     static void get_param_descrs(param_descrs & r);
355 
set_solver(expr_solver * solver)356     void set_solver(expr_solver* solver) { m_re2aut.set_solver(solver); }
has_solver()357     bool has_solver() { return m_re2aut.has_solver(); }
358 
coalesce_chars()359     bool coalesce_chars() const { return m_coalesce_chars; }
360 
361     br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
362     br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);
363     br_status mk_le_core(expr* lhs, expr* rhs, expr_ref& result);
364     br_status mk_bool_app(func_decl* f, unsigned n, expr* const* args, expr_ref& result);
365 
mk_app(func_decl * f,expr_ref_vector const & args)366     expr_ref mk_app(func_decl* f, expr_ref_vector const& args) { return mk_app(f, args.size(), args.data()); }
mk_app(func_decl * f,unsigned n,expr * const * args)367     expr_ref mk_app(func_decl* f, unsigned n, expr* const* args) {
368         expr_ref result(m());
369         if (f->get_family_id() != u().get_family_id() ||
370             BR_FAILED == mk_app_core(f, n, args, result))
371             result = m().mk_app(f, n, args);
372         return result;
373     }
374 
375     /*
376     * makes concat and simplifies
377     */
mk_re_append(expr * r1,expr * r2)378     expr_ref mk_re_append(expr* r1, expr* r2) {
379         expr_ref result(m());
380         if (mk_re_concat(r1, r2, result) == BR_FAILED)
381             result = re().mk_concat(r1, r2);
382         return result;
383     }
384 
385     /**
386      * check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
387      */
388     bool is_re_contains_pattern(expr* r, vector<expr_ref_vector>& patterns);
389 
390     bool reduce_eq(expr* l, expr* r, expr_ref_pair_vector& new_eqs, bool& change);
391 
392     bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs, bool& change);
393 
394     bool reduce_contains(expr* a, expr* b, expr_ref_vector& disj);
395 
396     expr_ref mk_length(expr* s);
397 
398     void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs);
399 
400     /*
401     create the nullability check for r
402     */
403     expr_ref is_nullable(expr* r);
404     /*
405     make the derivative of r wrt the given element ele
406     */
407     expr_ref mk_derivative(expr* ele, expr* r);
408     /*
409     make the derivative of r wrt the canonical variable v0 = (:var 0),
410     for example mk_derivative(a+) = (if (v0 = 'a') then a* else [])
411     */
412     expr_ref mk_derivative(expr* r);
413 
414     // heuristic elimination of element from condition that comes form a derivative.
415     // special case optimization for conjunctions of equalities, disequalities and ranges.
416     void elim_condition(expr* elem, expr_ref& cond);
417 };
418 
419