1 /*++ 2 Copyright (c) 2015 Microsoft Corporation 3 4 Module Name: 5 6 seq_rewriter.h 7 8 Abstract: 9 10 Basic rewriting rules for sequences constraints. 11 12 Author: 13 14 Nikolaj Bjorner (nbjorner) 2015-12-5 15 16 Notes: 17 18 --*/ 19 #pragma once 20 21 #include "ast/seq_decl_plugin.h" 22 #include "ast/ast_pp.h" 23 #include "ast/arith_decl_plugin.h" 24 #include "ast/rewriter/rewriter_types.h" 25 #include "ast/rewriter/bool_rewriter.h" 26 #include "util/params.h" 27 #include "util/lbool.h" 28 #include "util/sign.h" 29 #include "math/automata/automaton.h" 30 #include "math/automata/symbolic_automata.h" 31 32 33 inline std::ostream& operator<<(std::ostream& out, expr_ref_pair_vector const& es) { 34 for (auto const& p : es) { 35 out << expr_ref(p.first, es.get_manager()) << "; " << expr_ref(p.second, es.get_manager()) << "\n"; 36 } 37 return out; 38 } 39 40 class sym_expr { 41 enum ty { 42 t_char, 43 t_pred, 44 t_not, 45 t_range 46 }; 47 ty m_ty; 48 sort* m_sort; 49 sym_expr* m_expr; 50 expr_ref m_t; 51 expr_ref m_s; 52 unsigned m_ref; sym_expr(ty ty,expr_ref & t,expr_ref & s,sort * srt,sym_expr * e)53 sym_expr(ty ty, expr_ref& t, expr_ref& s, sort* srt, sym_expr* e) : 54 m_ty(ty), m_sort(srt), m_expr(e), m_t(t), m_s(s), m_ref(0) {} 55 public: ~sym_expr()56 ~sym_expr() { if (m_expr) m_expr->dec_ref(); } 57 expr_ref accept(expr* e); mk_char(expr_ref & t)58 static sym_expr* mk_char(expr_ref& t) { return alloc(sym_expr, t_char, t, t, t->get_sort(), nullptr); } mk_char(ast_manager & m,expr * t)59 static sym_expr* mk_char(ast_manager& m, expr* t) { expr_ref tr(t, m); return mk_char(tr); } mk_pred(expr_ref & t,sort * s)60 static sym_expr* mk_pred(expr_ref& t, sort* s) { return alloc(sym_expr, t_pred, t, t, s, nullptr); } mk_range(expr_ref & lo,expr_ref & hi)61 static sym_expr* mk_range(expr_ref& lo, expr_ref& hi) { return alloc(sym_expr, t_range, lo, hi, hi->get_sort(), nullptr); } mk_not(ast_manager & m,sym_expr * e)62 static sym_expr* mk_not(ast_manager& m, sym_expr* e) { expr_ref f(m); e->inc_ref(); return alloc(sym_expr, t_not, f, f, e->get_sort(), e); } inc_ref()63 void inc_ref() { ++m_ref; } dec_ref()64 void dec_ref() { --m_ref; if (m_ref == 0) dealloc(this); } 65 std::ostream& display(std::ostream& out) const; is_char()66 bool is_char() const { return m_ty == t_char; } is_pred()67 bool is_pred() const { return !is_char(); } is_range()68 bool is_range() const { return m_ty == t_range; } is_not()69 bool is_not() const { return m_ty == t_not; } get_sort()70 sort* get_sort() const { return m_sort; } get_char()71 expr* get_char() const { SASSERT(is_char()); return m_t; } get_pred()72 expr* get_pred() const { SASSERT(is_pred()); return m_t; } get_lo()73 expr* get_lo() const { SASSERT(is_range()); return m_t; } get_hi()74 expr* get_hi() const { SASSERT(is_range()); return m_s; } get_arg()75 sym_expr* get_arg() const { SASSERT(is_not()); return m_expr; } 76 }; 77 78 class sym_expr_manager { 79 public: inc_ref(sym_expr * s)80 void inc_ref(sym_expr* s) { if (s) s->inc_ref(); } dec_ref(sym_expr * s)81 void dec_ref(sym_expr* s) { if (s) s->dec_ref(); } 82 }; 83 84 class expr_solver { 85 public: ~expr_solver()86 virtual ~expr_solver() {} 87 virtual lbool check_sat(expr* e) = 0; 88 }; 89 90 typedef automaton<sym_expr, sym_expr_manager> eautomaton; 91 class re2automaton { 92 typedef boolean_algebra<sym_expr*> boolean_algebra_t; 93 typedef symbolic_automata<sym_expr, sym_expr_manager> symbolic_automata_t; 94 ast_manager& m; 95 sym_expr_manager sm; 96 seq_util u; 97 scoped_ptr<expr_solver> m_solver; 98 scoped_ptr<boolean_algebra_t> m_ba; 99 scoped_ptr<symbolic_automata_t> m_sa; 100 101 bool is_unit_char(expr* e, expr_ref& ch); 102 eautomaton* re2aut(expr* e); 103 eautomaton* seq2aut(expr* e); 104 public: 105 re2automaton(ast_manager& m); 106 eautomaton* operator()(expr* e); 107 void set_solver(expr_solver* solver); has_solver()108 bool has_solver() const { return m_solver; } 109 eautomaton* mk_product(eautomaton *a1, eautomaton *a2); 110 }; 111 112 /** 113 \brief Cheap rewrite rules for seq constraints 114 */ 115 class seq_rewriter { 116 117 class op_cache { 118 struct op_entry { 119 decl_kind k; 120 expr* a, *b, *c, *r; op_entryop_entry121 op_entry(decl_kind k, expr* a, expr* b, expr* c, expr* r): k(k), a(a), b(b), c(c), r(r) {} op_entryop_entry122 op_entry():k(0), a(nullptr), b(nullptr), c(nullptr), r(nullptr) {} 123 }; 124 125 struct hash_entry { operatorhash_entry126 unsigned operator()(op_entry const& e) const { 127 return combine_hash(mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0), e.c ? e.c->get_id() : 0); 128 } 129 }; 130 131 struct eq_entry { operatoreq_entry132 bool operator()(op_entry const& a, op_entry const& b) const { 133 return a.k == b.k && a.a == b.a && a.b == b.b && a.c == b.c; 134 } 135 }; 136 137 typedef hashtable<op_entry, hash_entry, eq_entry> op_table; 138 139 unsigned m_max_cache_size { 10000 }; 140 expr_ref_vector m_trail; 141 op_table m_table; 142 void cleanup(); 143 144 public: 145 op_cache(ast_manager& m); 146 expr* find(decl_kind op, expr* a, expr* b, expr* c); 147 void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r); 148 }; 149 150 seq_util m_util; 151 arith_util m_autil; 152 bool_rewriter m_br; 153 re2automaton m_re2aut; 154 op_cache m_op_cache; 155 expr_ref_vector m_es, m_lhs, m_rhs; 156 bool m_coalesce_chars; 157 158 enum length_comparison { 159 shorter_c, 160 longer_c, 161 same_length_c, 162 unknown_c 163 }; 164 165 166 compare_lengths(expr_ref_vector const & as,expr_ref_vector const & bs)167 length_comparison compare_lengths(expr_ref_vector const& as, expr_ref_vector const& bs) { 168 return compare_lengths(as.size(), as.data(), bs.size(), bs.data()); 169 } 170 length_comparison compare_lengths(unsigned sza, expr* const* as, unsigned szb, expr* const* bs); 171 172 173 bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail); 174 bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail); 175 bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail); 176 bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail); 177 178 expr_ref re_and(expr* cond, expr* r); 179 expr_ref re_predicate(expr* cond, sort* seq_sort); 180 181 expr_ref mk_seq_concat(expr* a, expr* b); 182 183 // Construct the expressions for taking the first element, the last element, the rest, and the butlast element 184 expr_ref mk_seq_first(expr* s); 185 expr_ref mk_seq_rest(expr* s); 186 expr_ref mk_seq_last(expr* s); 187 expr_ref mk_seq_butlast(expr* s); 188 189 bool try_get_unit_values(expr* s, expr_ref_vector& result); 190 //replace b in a by c into result 191 void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result); 192 193 // Calculate derivative, memoized and enforcing a normal form 194 expr_ref is_nullable_rec(expr* r); 195 expr_ref mk_derivative_rec(expr* ele, expr* r); 196 expr_ref mk_der_op(decl_kind k, expr* a, expr* b); 197 expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); 198 expr_ref mk_der_concat(expr* a, expr* b); 199 expr_ref mk_der_union(expr* a, expr* b); 200 expr_ref mk_der_inter(expr* a, expr* b); 201 expr_ref mk_der_compl(expr* a); 202 expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort); 203 expr_ref mk_der_antimorov_union(expr* r1, expr* r2); 204 bool ite_bdds_compatabile(expr* a, expr* b); 205 /* if r has the form deriv(en..deriv(e1,to_re(s))..) returns 's = [e1..en]' else returns '() in r'*/ 206 expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort); 207 #ifdef Z3DEBUG 208 bool check_deriv_normal_form(expr* r, int level = 3); 209 #endif 210 211 void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result); 212 213 expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path); 214 expr_ref mk_in_antimirov_rec(expr* s, expr* d); 215 expr_ref mk_in_antimirov(expr* s, expr* d); 216 217 expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path); 218 expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); 219 expr_ref mk_antimirov_deriv_negate(expr* d); 220 expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); 221 expr_ref mk_regex_reverse(expr* r); 222 expr_ref mk_regex_concat(expr* r1, expr* r2); 223 224 expr_ref simplify_path(expr* path); 225 226 bool lt_char(expr* ch1, expr* ch2); 227 bool eq_char(expr* ch1, expr* ch2); 228 bool neq_char(expr* ch1, expr* ch2); 229 bool le_char(expr* ch1, expr* ch2); 230 bool pred_implies(expr* a, expr* b); 231 bool are_complements(expr* r1, expr* r2) const; 232 bool is_subset(expr* r1, expr* r2) const; 233 234 br_status mk_seq_unit(expr* e, expr_ref& result); 235 br_status mk_seq_concat(expr* a, expr* b, expr_ref& result); 236 br_status mk_seq_length(expr* a, expr_ref& result); 237 expr_ref mk_len(rational const& offset, expr_ref_vector const& xs); 238 bool extract_pop_suffix(expr_ref_vector const& as, expr* b, expr* c, expr_ref& result); 239 bool extract_push_offset(expr_ref_vector const& as, expr* b, expr* c, expr_ref& result); 240 bool extract_push_length(expr_ref_vector& as, expr* b, expr* c, expr_ref& result); 241 br_status mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& result); 242 br_status mk_seq_contains(expr* a, expr* b, expr_ref& result); 243 br_status mk_seq_at(expr* a, expr* b, expr_ref& result); 244 br_status mk_seq_nth(expr* a, expr* b, expr_ref& result); 245 br_status mk_seq_nth_i(expr* a, expr* b, expr_ref& result); 246 br_status mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result); 247 br_status mk_seq_last_index(expr* a, expr* b, expr_ref& result); 248 br_status mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result); 249 br_status mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref& result); 250 br_status mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result); 251 br_status mk_seq_replace_re(expr* a, expr* b, expr* c, expr_ref& result); 252 br_status mk_seq_prefix(expr* a, expr* b, expr_ref& result); 253 br_status mk_seq_suffix(expr* a, expr* b, expr_ref& result); 254 br_status mk_str_units(func_decl* f, expr_ref& result); 255 br_status mk_str_itos(expr* a, expr_ref& result); 256 br_status mk_str_stoi(expr* a, expr_ref& result); 257 br_status mk_str_ubv2s(expr* a, expr_ref& result); 258 br_status mk_str_sbv2s(expr* a, expr_ref& result); 259 br_status mk_str_in_regexp(expr* a, expr* b, expr_ref& result); 260 br_status mk_str_to_regexp(expr* a, expr_ref& result); 261 br_status mk_str_le(expr* a, expr* b, expr_ref& result); 262 br_status mk_str_lt(expr* a, expr* b, expr_ref& result); 263 br_status mk_str_from_code(expr* a, expr_ref& result); 264 br_status mk_str_to_code(expr* a, expr_ref& result); 265 br_status mk_str_is_digit(expr* a, expr_ref& result); 266 br_status mk_re_concat(expr* a, expr* b, expr_ref& result); 267 br_status mk_re_union(expr* a, expr* b, expr_ref& result); 268 br_status mk_re_inter(expr* a, expr* b, expr_ref& result); 269 br_status mk_re_union0(expr* a, expr* b, expr_ref& result); 270 br_status mk_re_inter0(expr* a, expr* b, expr_ref& result); 271 br_status mk_re_complement(expr* a, expr_ref& result); 272 br_status mk_re_star(expr* a, expr_ref& result); 273 br_status mk_re_diff(expr* a, expr* b, expr_ref& result); 274 br_status mk_re_plus(expr* a, expr_ref& result); 275 br_status mk_re_opt(expr* a, expr_ref& result); 276 br_status mk_re_power(func_decl* f, expr* a, expr_ref& result); 277 br_status mk_re_loop(func_decl* f, unsigned num_args, expr* const* args, expr_ref& result); 278 br_status mk_re_range(expr* lo, expr* hi, expr_ref& result); 279 br_status mk_re_reverse(expr* r, expr_ref& result); 280 br_status mk_re_derivative(expr* ele, expr* r, expr_ref& result); 281 282 br_status lift_ites_throttled(func_decl* f, unsigned n, expr* const* args, expr_ref& result); 283 bool lift_ites_filter(func_decl* f, expr* ite); 284 285 br_status reduce_re_eq(expr* a, expr* b, expr_ref& result); 286 br_status reduce_re_is_empty(expr* r, expr_ref& result); 287 288 bool non_overlap(expr_ref_vector const& p1, expr_ref_vector const& p2) const; 289 bool non_overlap(zstring const& p1, zstring const& p2) const; 290 bool rewrite_contains_pattern(expr* a, expr* b, expr_ref& result); 291 bool has_fixed_length_constraint(expr* a, unsigned& len); 292 /* r = ite(c1,ite(c2,to_re(s),to_re(t)),to_re(u)) ==> returns true, result = ite(c1,ite(c2,s,t),u)*/ 293 bool lift_str_from_to_re_ite(expr * r, expr_ref & result); 294 /* same as lift_to_re_from_ite and also: r = to_re(u) ==> returns true, result = u */ 295 bool lift_str_from_to_re(expr * r, expr_ref & result); 296 297 br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result); 298 br_status mk_eq_helper(expr* a, expr* b, expr_ref& result); 299 300 bool cannot_contain_prefix(expr* a, expr* b); 301 bool cannot_contain_suffix(expr* a, expr* b); zero()302 expr_ref zero() { return expr_ref(m_autil.mk_int(0), m()); } one()303 expr_ref one() { return expr_ref(m_autil.mk_int(1), m()); } minus_one()304 expr_ref minus_one() { return expr_ref(m_autil.mk_int(-1), m()); } 305 expr_ref mk_sub(expr* a, rational const& n); mk_sub(expr * a,unsigned n)306 expr_ref mk_sub(expr* a, unsigned n) { return mk_sub(a, rational(n)); } 307 308 bool is_suffix(expr* s, expr* offset, expr* len); 309 bool is_prefix(expr* s, expr* offset, expr* len); 310 bool sign_is_determined(expr* len, sign& s); 311 312 bool set_empty(unsigned sz, expr* const* es, bool all, expr_ref_pair_vector& eqs); 313 bool reduce_non_overlap(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs); 314 bool reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs); 315 bool reduce_by_length(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs); 316 bool reduce_itos(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs); 317 bool reduce_eq_empty(expr* l, expr* r, expr_ref& result); 318 bool min_length(expr_ref_vector const& es, unsigned& len); 319 bool min_length(expr* e, unsigned& len); 320 bool max_length(expr* e, rational& len); 321 lbool eq_length(expr* x, expr* y); 322 expr* concat_non_empty(expr_ref_vector& es); 323 bool reduce_by_char(expr_ref& r, expr* ch, unsigned depth); 324 325 bool is_string(unsigned n, expr* const* es, zstring& s) const; 326 327 void add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond); 328 bool is_sequence(expr* e, expr_ref_vector& seq); 329 bool is_sequence(eautomaton& aut, expr_ref_vector& seq); 330 bool get_lengths(expr* e, expr_ref_vector& lens, rational& pos); 331 bool reduce_back(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs); 332 bool reduce_front(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs); 333 void remove_empty_and_concats(expr_ref_vector& es); 334 void remove_leading(unsigned n, expr_ref_vector& es); 335 re()336 class seq_util::rex& re() { return u().re; } re()337 class seq_util::rex const& re() const { return u().re; } str()338 class seq_util::str& str() { return u().str; } str()339 class seq_util::str const& str() const { return u().str; } 340 341 void intersect(unsigned lo, unsigned hi, svector<std::pair<unsigned, unsigned>>& ranges); 342 343 public: 344 seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): m_util(m)345 m_util(m), m_autil(m), m_br(m), m_re2aut(m), m_op_cache(m), m_es(m), 346 m_lhs(m), m_rhs(m), m_coalesce_chars(true) { 347 } m()348 ast_manager & m() const { return m_util.get_manager(); } get_fid()349 family_id get_fid() const { return m_util.get_family_id(); } u()350 seq_util const& u() const { return m_util; } u()351 seq_util& u() { return m_util; } 352 353 void updt_params(params_ref const & p); 354 static void get_param_descrs(param_descrs & r); 355 set_solver(expr_solver * solver)356 void set_solver(expr_solver* solver) { m_re2aut.set_solver(solver); } has_solver()357 bool has_solver() { return m_re2aut.has_solver(); } 358 coalesce_chars()359 bool coalesce_chars() const { return m_coalesce_chars; } 360 361 br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result); 362 br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result); 363 br_status mk_le_core(expr* lhs, expr* rhs, expr_ref& result); 364 br_status mk_bool_app(func_decl* f, unsigned n, expr* const* args, expr_ref& result); 365 mk_app(func_decl * f,expr_ref_vector const & args)366 expr_ref mk_app(func_decl* f, expr_ref_vector const& args) { return mk_app(f, args.size(), args.data()); } mk_app(func_decl * f,unsigned n,expr * const * args)367 expr_ref mk_app(func_decl* f, unsigned n, expr* const* args) { 368 expr_ref result(m()); 369 if (f->get_family_id() != u().get_family_id() || 370 BR_FAILED == mk_app_core(f, n, args, result)) 371 result = m().mk_app(f, n, args); 372 return result; 373 } 374 375 /* 376 * makes concat and simplifies 377 */ mk_re_append(expr * r1,expr * r2)378 expr_ref mk_re_append(expr* r1, expr* r2) { 379 expr_ref result(m()); 380 if (mk_re_concat(r1, r2, result) == BR_FAILED) 381 result = re().mk_concat(r1, r2); 382 return result; 383 } 384 385 /** 386 * check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences 387 */ 388 bool is_re_contains_pattern(expr* r, vector<expr_ref_vector>& patterns); 389 390 bool reduce_eq(expr* l, expr* r, expr_ref_pair_vector& new_eqs, bool& change); 391 392 bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs, bool& change); 393 394 bool reduce_contains(expr* a, expr* b, expr_ref_vector& disj); 395 396 expr_ref mk_length(expr* s); 397 398 void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs); 399 400 /* 401 create the nullability check for r 402 */ 403 expr_ref is_nullable(expr* r); 404 /* 405 make the derivative of r wrt the given element ele 406 */ 407 expr_ref mk_derivative(expr* ele, expr* r); 408 /* 409 make the derivative of r wrt the canonical variable v0 = (:var 0), 410 for example mk_derivative(a+) = (if (v0 = 'a') then a* else []) 411 */ 412 expr_ref mk_derivative(expr* r); 413 414 // heuristic elimination of element from condition that comes form a derivative. 415 // special case optimization for conjunctions of equalities, disequalities and ranges. 416 void elim_condition(expr* elem, expr_ref& cond); 417 }; 418 419