1 /*++ 2 Copyright (c) 2011 Microsoft Corporation 3 4 Module Name: 5 6 seq_decl_plugin.h 7 8 Abstract: 9 10 decl_plugin for the theory of sequences 11 12 Author: 13 14 Nikolaj Bjorner (nbjorner) 2011-11-14 15 16 Revision History: 17 18 Updated to string sequences 2015-12-5 19 20 Add SMTLIB 2.6 support 2020-5-17 21 22 --*/ 23 #pragma once 24 25 #include "ast/ast.h" 26 #include "ast/char_decl_plugin.h" 27 #include "util/lbool.h" 28 #include "util/zstring.h" 29 30 enum seq_sort_kind { 31 SEQ_SORT, 32 RE_SORT, 33 _STRING_SORT, 34 _REGLAN_SORT 35 }; 36 37 enum seq_op_kind { 38 OP_SEQ_UNIT, 39 OP_SEQ_EMPTY, 40 OP_SEQ_CONCAT, 41 OP_SEQ_PREFIX, 42 OP_SEQ_SUFFIX, 43 OP_SEQ_CONTAINS, 44 OP_SEQ_EXTRACT, 45 OP_SEQ_REPLACE, 46 OP_SEQ_AT, 47 OP_SEQ_NTH, // NTH function exposed over API. Rewritten to NTH(s,i) := if (0 <= i < len(s)) then NTH_I(s,i) else NTH_U(s,i) 48 OP_SEQ_NTH_I, // Interpreted variant of Nth for indices within defined domain. 49 OP_SEQ_NTH_U, // Uninterpreted variant of Nth for indices outside of uniquely defined domain. 50 OP_SEQ_LENGTH, 51 OP_SEQ_INDEX, 52 OP_SEQ_LAST_INDEX, 53 OP_SEQ_TO_RE, 54 OP_SEQ_IN_RE, 55 OP_SEQ_REPLACE_RE_ALL, // Seq -> RegEx -> Seq -> Seq 56 OP_SEQ_REPLACE_RE, // Seq -> RegEx -> Seq -> Seq 57 OP_SEQ_REPLACE_ALL, // Seq -> Seq -> Seq -> Seq 58 59 OP_RE_PLUS, 60 OP_RE_STAR, 61 OP_RE_OPTION, 62 OP_RE_RANGE, 63 OP_RE_CONCAT, 64 OP_RE_UNION, 65 OP_RE_DIFF, 66 OP_RE_INTERSECT, 67 OP_RE_LOOP, 68 OP_RE_POWER, 69 OP_RE_COMPLEMENT, 70 OP_RE_EMPTY_SET, 71 OP_RE_FULL_SEQ_SET, 72 OP_RE_FULL_CHAR_SET, 73 OP_RE_OF_PRED, 74 OP_RE_REVERSE, 75 OP_RE_DERIVATIVE, // Char -> RegEx -> RegEx 76 77 78 // string specific operators. 79 OP_STRING_CONST, 80 OP_STRING_ITOS, 81 OP_STRING_STOI, 82 OP_STRING_UBVTOS, 83 OP_STRING_SBVTOS, 84 OP_STRING_LT, 85 OP_STRING_LE, 86 OP_STRING_IS_DIGIT, 87 OP_STRING_TO_CODE, 88 OP_STRING_FROM_CODE, 89 90 // internal only operators. Converted to SEQ variants. 91 _OP_STRING_FROM_CHAR, 92 _OP_STRING_STRREPL, 93 _OP_STRING_CONCAT, 94 _OP_STRING_LENGTH, 95 _OP_STRING_STRCTN, 96 _OP_STRING_PREFIX, 97 _OP_STRING_SUFFIX, 98 _OP_STRING_IN_REGEXP, 99 _OP_STRING_TO_REGEXP, 100 _OP_STRING_CHARAT, 101 _OP_STRING_SUBSTR, 102 _OP_STRING_STRIDOF, 103 _OP_REGEXP_EMPTY, 104 _OP_REGEXP_FULL_CHAR, 105 _OP_RE_IS_NULLABLE, 106 _OP_RE_ANTIMOROV_UNION, // Lifted union for antimorov-style derivatives 107 _OP_SEQ_SKOLEM, 108 LAST_SEQ_OP 109 }; 110 111 112 class seq_decl_plugin : public decl_plugin { 113 struct psig { 114 symbol m_name; 115 unsigned m_num_params; 116 sort_ref_vector m_dom; 117 sort_ref m_range; psigpsig118 psig(ast_manager& m, char const* name, unsigned n, unsigned dsz, sort* const* dom, sort* rng): 119 m_name(name), 120 m_num_params(n), 121 m_dom(m), 122 m_range(rng, m) 123 { 124 m_dom.append(dsz, dom); 125 } 126 }; 127 128 ptr_vector<psig> m_sigs; 129 ptr_vector<sort> m_binding; 130 bool m_init; 131 symbol m_stringc_sym; 132 sort* m_string; 133 sort* m_char; 134 sort* m_reglan; 135 bool m_has_re; 136 bool m_has_seq; 137 char_decl_plugin* m_char_plugin { nullptr }; 138 139 void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); 140 141 void match_assoc(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); 142 143 bool match(ptr_vector<sort>& binding, sort* s, sort* sP); 144 145 sort* apply_binding(ptr_vector<sort> const& binding, sort* s); 146 147 bool is_sort_param(sort* s, unsigned& idx); 148 149 func_decl* mk_seq_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string); 150 func_decl* mk_str_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq); 151 func_decl* mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq); 152 func_decl* mk_left_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq); 153 func_decl* mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq, bool is_right); 154 func_decl* mk_ubv2s(unsigned arity, sort* const* domain) const; 155 func_decl* mk_sbv2s(unsigned arity, sort* const* domain) const; 156 157 158 void init(); 159 160 void set_manager(ast_manager * m, family_id id) override; 161 162 sort* mk_reglan(); 163 164 public: 165 seq_decl_plugin(); 166 167 void finalize() override; 168 mk_fresh()169 decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); } 170 171 sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override; 172 173 func_decl * mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters, 174 unsigned arity, sort * const * domain, sort * range) override; 175 176 void get_op_names(svector<builtin_name> & op_names, symbol const & logic) override; 177 178 void get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) override; 179 180 bool is_value(app * e) const override; 181 182 bool is_unique_value(app * e) const override; 183 184 bool are_equal(app* a, app* b) const override; 185 186 bool are_distinct(app* a, app* b) const override; 187 188 expr * get_some_value(sort * s) override; 189 is_char(sort * a)190 bool is_char(sort* a) const { return a == m_char; } 191 max_char()192 unsigned max_char() const { return get_char_plugin().max_char(); } num_bits()193 unsigned num_bits() const { return get_char_plugin().num_bits(); } 194 195 app* mk_string(zstring const& s); 196 app* mk_char(unsigned ch); 197 has_re()198 bool has_re() const { return m_has_re; } has_seq()199 bool has_seq() const { return m_has_seq; } 200 201 bool is_considered_uninterpreted(func_decl * f) override; 202 char_sort()203 sort* char_sort() const { return m_char; } string_sort()204 sort* string_sort() const { return m_string; } 205 get_char_plugin()206 char_decl_plugin& get_char_plugin() const { return *m_char_plugin; } 207 208 }; 209 210 class seq_util { 211 ast_manager& m; 212 seq_decl_plugin& seq; 213 char_decl_plugin& ch; 214 family_id m_fid; 215 216 public: 217 218 unsigned max_plus(unsigned x, unsigned y) const; 219 unsigned max_mul(unsigned x, unsigned y) const; 220 get_manager()221 ast_manager& get_manager() const { return m; } 222 mk_char_sort()223 sort* mk_char_sort() const { return seq.char_sort(); } mk_string_sort()224 sort* mk_string_sort() const { return seq.string_sort(); } 225 is_char(sort * s)226 bool is_char(sort* s) const { return seq.is_char(s); } is_char(expr * e)227 bool is_char(expr* e) const { return is_char(e->get_sort()); } is_string(sort * s)228 bool is_string(sort* s) const { return is_seq(s) && seq.is_char(to_sort(s->get_parameter(0).get_ast())); } is_seq(sort * s)229 bool is_seq(sort* s) const { return is_sort_of(s, m_fid, SEQ_SORT); } is_re(sort * s)230 bool is_re(sort* s) const { return is_sort_of(s, m_fid, RE_SORT); } is_re(sort * s,sort * & seq)231 bool is_re(sort* s, sort*& seq) const { return is_sort_of(s, m_fid, RE_SORT) && (seq = to_sort(s->get_parameter(0).get_ast()), true); } is_seq(expr * e)232 bool is_seq(expr* e) const { return is_seq(e->get_sort()); } is_seq(sort * s,sort * & seq)233 bool is_seq(sort* s, sort*& seq) const { return is_seq(s) && (seq = to_sort(s->get_parameter(0).get_ast()), true); } is_re(expr * e)234 bool is_re(expr* e) const { return is_re(e->get_sort()); } is_re(expr * e,sort * & seq)235 bool is_re(expr* e, sort*& seq) const { return is_re(e->get_sort(), seq); } 236 bool is_const_char(expr* e, unsigned& c) const; is_const_char(expr * e)237 bool is_const_char(expr* e) const { unsigned c; return is_const_char(e, c); } 238 bool is_char_le(expr const* e) const; is_char_is_digit(expr const * e,expr * & d)239 bool is_char_is_digit(expr const* e, expr*& d) const { return ch.is_is_digit(e, d); } is_char_is_digit(expr const * e)240 bool is_char_is_digit(expr const* e) const { return ch.is_is_digit(e); } 241 bool is_char2int(expr const* e) const; 242 bool is_bv2char(expr const* e) const; 243 bool is_char2bv(expr const* e) const; 244 app* mk_char_bit(expr* e, unsigned i); 245 app* mk_char(unsigned ch) const; mk_char_is_digit(expr * e)246 app* mk_char_is_digit(expr* e) { return ch.mk_is_digit(e); } 247 app* mk_le(expr* ch1, expr* ch2) const; 248 app* mk_lt(expr* ch1, expr* ch2) const; mk_char2int(expr * e)249 app* mk_char2int(expr* e) { return ch.mk_to_int(e); } max_char()250 unsigned max_char() const { return seq.max_char(); } num_bits()251 unsigned num_bits() const { return seq.num_bits(); } 252 253 app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range); is_skolem(expr const * e)254 bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); } 255 256 MATCH_BINARY(is_char_le); 257 MATCH_UNARY(is_char2int); 258 MATCH_UNARY(is_char2bv); 259 MATCH_UNARY(is_bv2char); 260 has_re()261 bool has_re() const { return seq.has_re(); } has_seq()262 bool has_seq() const { return seq.has_seq(); } 263 264 class str { 265 seq_util& u; 266 ast_manager& m; 267 family_id m_fid; 268 269 270 public: str(seq_util & u)271 str(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} 272 mk_seq(sort * s)273 sort* mk_seq(sort* s) const { parameter param(s); return m.mk_sort(m_fid, SEQ_SORT, 1, ¶m); } mk_string_sort()274 sort* mk_string_sort() const { return m.mk_sort(m_fid, _STRING_SORT, 0, nullptr); } mk_empty(sort * s)275 app* mk_empty(sort* s) const { return m.mk_const(m.mk_func_decl(m_fid, OP_SEQ_EMPTY, 0, nullptr, 0, (expr*const*)nullptr, s)); } 276 app* mk_string(zstring const& s) const; 277 app* mk_char(unsigned ch) const; mk_concat(expr * a,expr * b)278 app* mk_concat(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONCAT, 2, es); } mk_concat(expr * a,expr * b,expr * c)279 app* mk_concat(expr* a, expr* b, expr* c) const { return mk_concat(a, mk_concat(b, c)); } mk_concat(unsigned n,expr * const * es,sort * s)280 expr* mk_concat(unsigned n, expr* const* es, sort* s) const { 281 if (n == 0) return mk_empty(s); 282 if (n == 1) return es[0]; 283 return m.mk_app(m_fid, OP_SEQ_CONCAT, n, es); } mk_concat(expr_ref_vector const & es,sort * s)284 expr* mk_concat(expr_ref_vector const& es, sort* s) const { return mk_concat(es.size(), es.data(), s); } mk_length(expr * a)285 app* mk_length(expr* a) const { return m.mk_app(m_fid, OP_SEQ_LENGTH, 1, &a); } mk_at(expr * s,expr * i)286 app* mk_at(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_AT, 2, es); } mk_nth(expr * s,expr * i)287 app* mk_nth(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH, 2, es); } mk_nth_i(expr * s,expr * i)288 app* mk_nth_i(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_I, 2, es); } mk_nth_u(expr * s,expr * i)289 app* mk_nth_u(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_U, 2, es); } 290 app* mk_nth_c(expr* s, unsigned i) const; 291 mk_substr(expr * a,expr * b,expr * c)292 app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); } mk_contains(expr * a,expr * b)293 app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); } mk_prefix(expr * a,expr * b)294 app* mk_prefix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_PREFIX, 2, es); } mk_suffix(expr * a,expr * b)295 app* mk_suffix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_SUFFIX, 2, es); } mk_index(expr * a,expr * b,expr * i)296 app* mk_index(expr* a, expr* b, expr* i) const { expr* es[3] = { a, b, i}; return m.mk_app(m_fid, OP_SEQ_INDEX, 3, es); } mk_last_index(expr * a,expr * b)297 app* mk_last_index(expr* a, expr* b) const { expr* es[2] = { a, b}; return m.mk_app(m_fid, OP_SEQ_LAST_INDEX, 2, es); } mk_replace(expr * a,expr * b,expr * c)298 app* mk_replace(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c}; return m.mk_app(m_fid, OP_SEQ_REPLACE, 3, es); } mk_unit(expr * u)299 app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); } 300 app* mk_char(zstring const& s, unsigned idx) const; 301 app* mk_char_bit(expr* e, unsigned i); mk_itos(expr * i)302 app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); } mk_stoi(expr * s)303 app* mk_stoi(expr* s) const { return m.mk_app(m_fid, OP_STRING_STOI, 1, &s); } mk_ubv2s(expr * b)304 app* mk_ubv2s(expr* b) const { return m.mk_app(m_fid, OP_STRING_UBVTOS, 1, &b); } mk_sbv2s(expr * b)305 app* mk_sbv2s(expr* b) const { return m.mk_app(m_fid, OP_STRING_SBVTOS, 1, &b); } 306 app* mk_is_empty(expr* s) const; mk_lex_lt(expr * a,expr * b)307 app* mk_lex_lt(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_STRING_LT, 2, es); } mk_lex_le(expr * a,expr * b)308 app* mk_lex_le(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_STRING_LE, 2, es); } mk_to_code(expr * e)309 app* mk_to_code(expr* e) const { return m.mk_app(m_fid, OP_STRING_TO_CODE, 1, &e); } mk_from_code(expr * e)310 app* mk_from_code(expr* e) const { return m.mk_app(m_fid, OP_STRING_FROM_CODE, 1, &e); } mk_is_digit(expr * e)311 app* mk_is_digit(expr* e) const { return m.mk_app(m_fid, OP_STRING_IS_DIGIT, 1, &e); } 312 313 is_nth_i(func_decl const * f)314 bool is_nth_i(func_decl const* f) const { return is_decl_of(f, m_fid, OP_SEQ_NTH_I); } is_nth_u(func_decl const * f)315 bool is_nth_u(func_decl const* f) const { return is_decl_of(f, m_fid, OP_SEQ_NTH_U); } is_skolem(func_decl const * f)316 bool is_skolem(func_decl const* f) const { return is_decl_of(f, m_fid, _OP_SEQ_SKOLEM); } 317 is_string(expr const * n)318 bool is_string(expr const * n) const { return is_app_of(n, m_fid, OP_STRING_CONST); } is_string(func_decl const * f)319 bool is_string(func_decl const* f) const { return is_decl_of(f, m_fid, OP_STRING_CONST); } 320 bool is_string(expr const* n, zstring& s) const; 321 bool is_string(func_decl const* f, zstring& s) const; is_empty(expr const * n)322 bool is_empty(expr const* n) const { 323 zstring s; 324 return is_app_of(n, m_fid, OP_SEQ_EMPTY) || (is_string(n, s) && s.empty()); 325 } is_concat(expr const * n)326 bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONCAT); } is_length(expr const * n)327 bool is_length(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LENGTH); } is_extract(expr const * n)328 bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); } is_contains(expr const * n)329 bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); } is_at(expr const * n)330 bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); } is_nth_i(expr const * n)331 bool is_nth_i(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_NTH_I); } is_nth_u(expr const * n)332 bool is_nth_u(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_NTH_U); } 333 bool is_nth_i(expr const* n, expr*& s, unsigned& idx) const; is_index(expr const * n)334 bool is_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_INDEX); } is_last_index(expr const * n)335 bool is_last_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LAST_INDEX); } is_replace(expr const * n)336 bool is_replace(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE); } is_replace_re(expr const * n)337 bool is_replace_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE); } is_replace_re_all(expr const * n)338 bool is_replace_re_all(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE_ALL); } is_replace_all(expr const * n)339 bool is_replace_all(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_ALL); } is_prefix(expr const * n)340 bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); } is_suffix(expr const * n)341 bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); } is_itos(expr const * n)342 bool is_itos(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_ITOS); } is_stoi(expr const * n)343 bool is_stoi(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_STOI); } is_ubv2s(expr const * n)344 bool is_ubv2s(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_UBVTOS); } is_sbv2s(expr const * n)345 bool is_sbv2s(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_SBVTOS); } is_in_re(expr const * n)346 bool is_in_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_IN_RE); } is_unit(expr const * n)347 bool is_unit(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_UNIT); } is_lt(expr const * n)348 bool is_lt(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_LT); } is_le(expr const * n)349 bool is_le(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_LE); } is_is_digit(expr const * n)350 bool is_is_digit(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_IS_DIGIT); } is_from_code(expr const * n)351 bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); } is_to_code(expr const * n)352 bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); } 353 354 bool is_len_sub(expr const* n, expr*& l, expr*& u, rational& k) const; 355 356 /* 357 tests if s is a single character string(c) or a unit (c) 358 */ 359 bool is_unit_string(expr const* s, expr_ref& c) const; 360 is_string_term(expr const * n)361 bool is_string_term(expr const * n) const { 362 return u.is_string(n->get_sort()); 363 } 364 is_non_string_sequence(expr const * n)365 bool is_non_string_sequence(expr const * n) const { 366 sort * s = n->get_sort(); 367 return (u.is_seq(s) && !u.is_string(s)); 368 } 369 370 MATCH_BINARY(is_concat); 371 MATCH_UNARY(is_length); 372 MATCH_TERNARY(is_extract); 373 MATCH_BINARY(is_contains); 374 MATCH_BINARY(is_at); 375 MATCH_BINARY(is_nth_i); 376 MATCH_BINARY(is_nth_u); 377 MATCH_BINARY(is_index); 378 MATCH_TERNARY(is_index); 379 MATCH_BINARY(is_last_index); 380 MATCH_TERNARY(is_replace); 381 MATCH_TERNARY(is_replace_re); 382 MATCH_TERNARY(is_replace_re_all); 383 MATCH_TERNARY(is_replace_all); 384 MATCH_BINARY(is_prefix); 385 MATCH_BINARY(is_suffix); 386 MATCH_BINARY(is_lt); 387 MATCH_BINARY(is_le); 388 MATCH_UNARY(is_itos); 389 MATCH_UNARY(is_stoi); 390 MATCH_UNARY(is_ubv2s); 391 MATCH_UNARY(is_sbv2s); 392 MATCH_UNARY(is_is_digit); 393 MATCH_UNARY(is_from_code); 394 MATCH_UNARY(is_to_code); 395 MATCH_BINARY(is_in_re); 396 MATCH_UNARY(is_unit); 397 398 void get_concat(expr* e, expr_ref_vector& es) const; 399 void get_concat_units(expr* e, expr_ref_vector& es) const; get_leftmost_concat(expr * e)400 expr* get_leftmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e1; return e; } get_rightmost_concat(expr * e)401 expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; } 402 403 unsigned min_length(expr* s) const; 404 unsigned max_length(expr* s) const; 405 }; 406 407 class rex { 408 public: 409 struct info { 410 /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/ 411 lbool known { l_undef }; 412 /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */ 413 bool classical { false }; 414 /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */ 415 bool standard { false }; 416 /* There are no uninterpreted symbols. */ 417 bool interpreted { false }; 418 /* No if-then-else is used. */ 419 bool nonbranching { false }; 420 /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */ 421 bool normalized { false }; 422 /* All bounded loops have a body that is a singleton. */ 423 bool monadic { false }; 424 /* Positive Boolean combination of ranges or predicates or singleton sequences. */ 425 bool singleton { false }; 426 /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */ 427 lbool nullable { l_undef }; 428 /* Lower bound on the length of all accepted words. */ 429 unsigned min_length { 0 }; 430 /* Maximum nesting depth of Kleene stars. */ 431 unsigned star_height { 0 }; 432 433 /* 434 Default constructor of invalid info. 435 */ infoinfo436 info() {} 437 438 /* 439 Used for constructing either an invalid info that is only used to indicate uninitialzed entry, or valid but unknown info value. 440 */ infoinfo441 info(lbool is_known) : known(is_known) {} 442 443 /* 444 General info constructor. 445 */ infoinfo446 info(bool is_classical, 447 bool is_standard, 448 bool is_interpreted, 449 bool is_nonbranching, 450 bool is_normalized, 451 bool is_monadic, 452 bool is_singleton, 453 lbool is_nullable, 454 unsigned min_l, 455 unsigned star_h) : 456 known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching), 457 normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable), 458 min_length(min_l), star_height(star_h) {} 459 460 /* 461 Appends a string representation of the info into the stream. 462 */ 463 std::ostream& display(std::ostream&) const; 464 465 /* 466 Returns a string representation of the info. 467 */ 468 std::string str() const; 469 is_validinfo470 bool is_valid() const { return known != l_undef; } 471 is_knowninfo472 bool is_known() const { return known == l_true; } 473 474 info star() const; 475 info plus() const; 476 info opt() const; 477 info complement() const; 478 info concat(info const& rhs, bool lhs_is_concat) const; 479 info disj(info const& rhs) const; 480 info conj(info const& rhs) const; 481 info diff(info const& rhs) const; 482 info orelse(info const& rhs) const; 483 info loop(unsigned lower, unsigned upper) const; 484 }; 485 private: 486 seq_util& u; 487 ast_manager& m; 488 family_id m_fid; 489 vector<info> mutable m_infos; 490 expr_ref_vector mutable m_info_pinned; 491 info invalid_info { info(l_undef) }; 492 info unknown_info { info(l_false) }; 493 494 bool has_valid_info(expr* r) const; 495 info get_info_rec(expr* r) const; 496 info mk_info_rec(app* r) const; 497 info get_cached_info(expr* e) const; 498 499 public: rex(seq_util & u)500 rex(seq_util& u): u(u), m(u.m), m_fid(u.m_fid), m_info_pinned(u.m) {} 501 mk_re(sort * seq)502 sort* mk_re(sort* seq) { parameter param(seq); return m.mk_sort(m_fid, RE_SORT, 1, ¶m); } 503 sort* to_seq(sort* re); 504 mk_to_re(expr * s)505 app* mk_to_re(expr* s) { return m.mk_app(m_fid, OP_SEQ_TO_RE, 1, &s); } mk_in_re(expr * s,expr * r)506 app* mk_in_re(expr* s, expr* r) { return m.mk_app(m_fid, OP_SEQ_IN_RE, s, r); } mk_range(expr * s1,expr * s2)507 app* mk_range(expr* s1, expr* s2) { return m.mk_app(m_fid, OP_RE_RANGE, s1, s2); } mk_concat(expr * r1,expr * r2)508 app* mk_concat(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_CONCAT, r1, r2); } mk_union(expr * r1,expr * r2)509 app* mk_union(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_UNION, r1, r2); } mk_inter(expr * r1,expr * r2)510 app* mk_inter(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_INTERSECT, r1, r2); } mk_diff(expr * r1,expr * r2)511 app* mk_diff(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_DIFF, r1, r2); } mk_complement(expr * r)512 app* mk_complement(expr* r) { return m.mk_app(m_fid, OP_RE_COMPLEMENT, r); } mk_star(expr * r)513 app* mk_star(expr* r) { return m.mk_app(m_fid, OP_RE_STAR, r); } mk_plus(expr * r)514 app* mk_plus(expr* r) { return m.mk_app(m_fid, OP_RE_PLUS, r); } mk_opt(expr * r)515 app* mk_opt(expr* r) { return m.mk_app(m_fid, OP_RE_OPTION, r); } 516 app* mk_loop(expr* r, unsigned lo); 517 app* mk_loop(expr* r, unsigned lo, unsigned hi); 518 app* mk_loop(expr* r, expr* lo); 519 app* mk_loop(expr* r, expr* lo, expr* hi); 520 app* mk_full_char(sort* s); 521 app* mk_full_seq(sort* s); 522 app* mk_empty(sort* s); 523 app* mk_of_pred(expr* p); mk_reverse(expr * r)524 app* mk_reverse(expr* r) { return m.mk_app(m_fid, OP_RE_REVERSE, r); } mk_derivative(expr * ele,expr * r)525 app* mk_derivative(expr* ele, expr* r) { return m.mk_app(m_fid, OP_RE_DERIVATIVE, ele, r); } mk_antimorov_union(expr * r1,expr * r2)526 app* mk_antimorov_union(expr* r1, expr* r2) { return m.mk_app(m_fid, _OP_RE_ANTIMOROV_UNION, r1, r2); } 527 is_to_re(expr const * n)528 bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); } is_concat(expr const * n)529 bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); } is_union(expr const * n)530 bool is_union(expr const* n) const { return is_app_of(n, m_fid, OP_RE_UNION); } is_intersection(expr const * n)531 bool is_intersection(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); } is_diff(expr const * n)532 bool is_diff(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DIFF); } is_complement(expr const * n)533 bool is_complement(expr const* n) const { return is_app_of(n, m_fid, OP_RE_COMPLEMENT); } is_star(expr const * n)534 bool is_star(expr const* n) const { return is_app_of(n, m_fid, OP_RE_STAR); } is_plus(expr const * n)535 bool is_plus(expr const* n) const { return is_app_of(n, m_fid, OP_RE_PLUS); } is_opt(expr const * n)536 bool is_opt(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OPTION); } is_range(expr const * n)537 bool is_range(expr const* n) const { return is_app_of(n, m_fid, OP_RE_RANGE); } is_loop(expr const * n)538 bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); } is_empty(expr const * n)539 bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); } is_full_char(expr const * n)540 bool is_full_char(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); } is_full_seq(expr const * n)541 bool is_full_seq(expr const* n) const { 542 expr* s; 543 return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET) || (is_star(n, s) && is_full_char(s)); 544 } is_dot_plus(expr const * n)545 bool is_dot_plus(expr const* n) const { 546 expr* s, * t; 547 if (is_plus(n, s) && is_full_char(s)) 548 return true; 549 if (is_concat(n, s, t)) { 550 if ((is_full_char(s) && is_full_seq(t)) || (is_full_char(t) && is_full_seq(s))) 551 return true; 552 } 553 return false; 554 } is_of_pred(expr const * n)555 bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); } is_reverse(expr const * n)556 bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); } is_derivative(expr const * n)557 bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); } is_antimorov_union(expr const * n)558 bool is_antimorov_union(expr const* n) const { return is_app_of(n, m_fid, _OP_RE_ANTIMOROV_UNION); } 559 MATCH_UNARY(is_to_re); 560 MATCH_BINARY(is_concat); 561 MATCH_BINARY(is_union); 562 MATCH_BINARY(is_intersection); 563 MATCH_BINARY(is_diff); 564 MATCH_BINARY(is_range); 565 MATCH_UNARY(is_complement); 566 MATCH_UNARY(is_star); 567 MATCH_UNARY(is_plus); 568 MATCH_UNARY(is_opt); 569 MATCH_UNARY(is_of_pred); 570 MATCH_UNARY(is_reverse); 571 MATCH_BINARY(is_derivative); 572 MATCH_BINARY(is_antimorov_union); 573 bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const; 574 bool is_loop(expr const* n, expr*& body, unsigned& lo) const; 575 bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const; 576 bool is_loop(expr const* n, expr*& body, expr*& lo) const; 577 unsigned min_length(expr* r) const; 578 unsigned max_length(expr* r) const; 579 bool is_epsilon(expr* r) const; 580 app* mk_epsilon(sort* seq_sort); 581 info get_info(expr* r) const; 582 std::string to_str(expr* r) const; 583 std::string to_strh(expr* r) const; 584 mk_ite_simplify(expr * c,expr * t,expr * e)585 expr_ref mk_ite_simplify(expr* c, expr* t, expr* e) 586 { 587 expr_ref result(m); 588 if (m.is_true(c) || t == e) 589 result = t; 590 else if (m.is_false(c)) 591 result = e; 592 else 593 result = m.mk_ite(c, t, e); 594 return result; 595 } 596 597 class pp { 598 seq_util::rex& re; 599 expr* ex; 600 bool html_encode; 601 bool can_skip_parenth(expr* r) const; 602 bool print_unit(std::ostream& out, expr* s) const; 603 bool print_seq(std::ostream& out, expr* s) const; 604 std::ostream& print_range(std::ostream& out, expr* s1, expr* s2) const; 605 std::ostream& print(std::ostream& out, expr* e) const; 606 607 public: pp(seq_util::rex & re,expr * ex,bool html)608 pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {} 609 std::ostream& display(std::ostream&) const; 610 }; 611 }; 612 str str; 613 rex re; 614 seq_util(ast_manager & m)615 seq_util(ast_manager& m): 616 m(m), 617 seq(*static_cast<seq_decl_plugin*>(m.get_plugin(m.mk_family_id("seq")))), 618 ch(seq.get_char_plugin()), 619 m_fid(seq.get_family_id()), 620 str(*this), 621 re(*this) { 622 } 623 get_family_id()624 family_id get_family_id() const { return m_fid; } 625 }; 626 627 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::pp const & p) { return p.display(out); } 628 629 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::info const& p) { return p.display(out); } 630 631