1 /*++
2 Copyright (c) 2011 Microsoft Corporation
3 
4 Module Name:
5 
6     seq_decl_plugin.h
7 
8 Abstract:
9 
10     decl_plugin for the theory of sequences
11 
12 Author:
13 
14     Nikolaj Bjorner (nbjorner) 2011-11-14
15 
16 Revision History:
17 
18     Updated to string sequences 2015-12-5
19 
20     Add SMTLIB 2.6 support 2020-5-17
21 
22 --*/
23 #pragma once
24 
25 #include "ast/ast.h"
26 #include "ast/char_decl_plugin.h"
27 #include "util/lbool.h"
28 #include "util/zstring.h"
29 
30 enum seq_sort_kind {
31     SEQ_SORT,
32     RE_SORT,
33     _STRING_SORT,
34     _REGLAN_SORT
35 };
36 
37 enum seq_op_kind {
38     OP_SEQ_UNIT,
39     OP_SEQ_EMPTY,
40     OP_SEQ_CONCAT,
41     OP_SEQ_PREFIX,
42     OP_SEQ_SUFFIX,
43     OP_SEQ_CONTAINS,
44     OP_SEQ_EXTRACT,
45     OP_SEQ_REPLACE,
46     OP_SEQ_AT,
47     OP_SEQ_NTH,         // NTH function exposed over API. Rewritten to NTH(s,i) := if (0 <= i < len(s)) then NTH_I(s,i) else NTH_U(s,i)
48     OP_SEQ_NTH_I,       // Interpreted variant of Nth for indices within defined domain.
49     OP_SEQ_NTH_U,       // Uninterpreted variant of Nth for indices outside of uniquely defined domain.
50     OP_SEQ_LENGTH,
51     OP_SEQ_INDEX,
52     OP_SEQ_LAST_INDEX,
53     OP_SEQ_TO_RE,
54     OP_SEQ_IN_RE,
55     OP_SEQ_REPLACE_RE_ALL, // Seq -> RegEx -> Seq -> Seq
56     OP_SEQ_REPLACE_RE,     // Seq -> RegEx -> Seq -> Seq
57     OP_SEQ_REPLACE_ALL,    // Seq -> Seq -> Seq -> Seq
58 
59     OP_RE_PLUS,
60     OP_RE_STAR,
61     OP_RE_OPTION,
62     OP_RE_RANGE,
63     OP_RE_CONCAT,
64     OP_RE_UNION,
65     OP_RE_DIFF,
66     OP_RE_INTERSECT,
67     OP_RE_LOOP,
68     OP_RE_POWER,
69     OP_RE_COMPLEMENT,
70     OP_RE_EMPTY_SET,
71     OP_RE_FULL_SEQ_SET,
72     OP_RE_FULL_CHAR_SET,
73     OP_RE_OF_PRED,
74     OP_RE_REVERSE,
75     OP_RE_DERIVATIVE, // Char -> RegEx -> RegEx
76 
77 
78     // string specific operators.
79     OP_STRING_CONST,
80     OP_STRING_ITOS,
81     OP_STRING_STOI,
82     OP_STRING_UBVTOS,
83     OP_STRING_SBVTOS,
84     OP_STRING_LT,
85     OP_STRING_LE,
86     OP_STRING_IS_DIGIT,
87     OP_STRING_TO_CODE,
88     OP_STRING_FROM_CODE,
89 
90     // internal only operators. Converted to SEQ variants.
91     _OP_STRING_FROM_CHAR,
92     _OP_STRING_STRREPL,
93     _OP_STRING_CONCAT,
94     _OP_STRING_LENGTH,
95     _OP_STRING_STRCTN,
96     _OP_STRING_PREFIX,
97     _OP_STRING_SUFFIX,
98     _OP_STRING_IN_REGEXP,
99     _OP_STRING_TO_REGEXP,
100     _OP_STRING_CHARAT,
101     _OP_STRING_SUBSTR,
102     _OP_STRING_STRIDOF,
103     _OP_REGEXP_EMPTY,
104     _OP_REGEXP_FULL_CHAR,
105     _OP_RE_IS_NULLABLE,
106     _OP_RE_ANTIMOROV_UNION, // Lifted union for antimorov-style derivatives
107     _OP_SEQ_SKOLEM,
108     LAST_SEQ_OP
109 };
110 
111 
112 class seq_decl_plugin : public decl_plugin {
113     struct psig {
114         symbol          m_name;
115         unsigned        m_num_params;
116         sort_ref_vector m_dom;
117         sort_ref        m_range;
psigpsig118         psig(ast_manager& m, char const* name, unsigned n, unsigned dsz, sort* const* dom, sort* rng):
119             m_name(name),
120             m_num_params(n),
121             m_dom(m),
122             m_range(rng, m)
123         {
124             m_dom.append(dsz, dom);
125         }
126     };
127 
128     ptr_vector<psig> m_sigs;
129     ptr_vector<sort> m_binding;
130     bool             m_init;
131     symbol           m_stringc_sym;
132     sort*            m_string;
133     sort*            m_char;
134     sort*            m_reglan;
135     bool             m_has_re;
136     bool             m_has_seq;
137     char_decl_plugin* m_char_plugin { nullptr };
138 
139     void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng);
140 
141     void match_assoc(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng);
142 
143     bool match(ptr_vector<sort>& binding, sort* s, sort* sP);
144 
145     sort* apply_binding(ptr_vector<sort> const& binding, sort* s);
146 
147     bool is_sort_param(sort* s, unsigned& idx);
148 
149     func_decl* mk_seq_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string);
150     func_decl* mk_str_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq);
151     func_decl* mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq);
152     func_decl* mk_left_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq);
153     func_decl* mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string, decl_kind k_seq, bool is_right);
154     func_decl* mk_ubv2s(unsigned arity, sort* const* domain) const;
155     func_decl* mk_sbv2s(unsigned arity, sort* const* domain) const;
156 
157 
158     void init();
159 
160     void set_manager(ast_manager * m, family_id id) override;
161 
162     sort* mk_reglan();
163 
164 public:
165     seq_decl_plugin();
166 
167     void finalize() override;
168 
mk_fresh()169     decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); }
170 
171     sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override;
172 
173     func_decl * mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
174                              unsigned arity, sort * const * domain, sort * range) override;
175 
176     void get_op_names(svector<builtin_name> & op_names, symbol const & logic) override;
177 
178     void get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) override;
179 
180     bool is_value(app * e) const override;
181 
182     bool is_unique_value(app * e) const override;
183 
184     bool are_equal(app* a, app* b) const override;
185 
186     bool are_distinct(app* a, app* b) const override;
187 
188     expr * get_some_value(sort * s) override;
189 
is_char(sort * a)190     bool is_char(sort* a) const { return a == m_char; }
191 
max_char()192     unsigned max_char() const { return get_char_plugin().max_char(); }
num_bits()193     unsigned num_bits() const { return get_char_plugin().num_bits(); }
194 
195     app* mk_string(zstring const& s);
196     app* mk_char(unsigned ch);
197 
has_re()198     bool has_re() const { return m_has_re; }
has_seq()199     bool has_seq() const { return m_has_seq; }
200 
201     bool is_considered_uninterpreted(func_decl * f) override;
202 
char_sort()203     sort* char_sort() const { return m_char; }
string_sort()204     sort* string_sort() const { return m_string; }
205 
get_char_plugin()206     char_decl_plugin& get_char_plugin() const { return *m_char_plugin; }
207 
208 };
209 
210 class seq_util {
211     ast_manager& m;
212     seq_decl_plugin& seq;
213     char_decl_plugin& ch;
214     family_id m_fid;
215 
216 public:
217 
218     unsigned max_plus(unsigned x, unsigned y) const;
219     unsigned max_mul(unsigned x, unsigned y) const;
220 
get_manager()221     ast_manager& get_manager() const { return m; }
222 
mk_char_sort()223     sort* mk_char_sort() const { return seq.char_sort(); }
mk_string_sort()224     sort* mk_string_sort() const { return seq.string_sort(); }
225 
is_char(sort * s)226     bool is_char(sort* s) const { return seq.is_char(s); }
is_char(expr * e)227     bool is_char(expr* e) const { return is_char(e->get_sort()); }
is_string(sort * s)228     bool is_string(sort* s) const { return is_seq(s) && seq.is_char(to_sort(s->get_parameter(0).get_ast())); }
is_seq(sort * s)229     bool is_seq(sort* s) const { return is_sort_of(s, m_fid, SEQ_SORT); }
is_re(sort * s)230     bool is_re(sort* s) const { return is_sort_of(s, m_fid, RE_SORT); }
is_re(sort * s,sort * & seq)231     bool is_re(sort* s, sort*& seq) const { return is_sort_of(s, m_fid, RE_SORT)  && (seq = to_sort(s->get_parameter(0).get_ast()), true); }
is_seq(expr * e)232     bool is_seq(expr* e) const  { return is_seq(e->get_sort()); }
is_seq(sort * s,sort * & seq)233     bool is_seq(sort* s, sort*& seq) const { return is_seq(s) && (seq = to_sort(s->get_parameter(0).get_ast()), true); }
is_re(expr * e)234     bool is_re(expr* e) const { return is_re(e->get_sort()); }
is_re(expr * e,sort * & seq)235     bool is_re(expr* e, sort*& seq) const { return is_re(e->get_sort(), seq); }
236     bool is_const_char(expr* e, unsigned& c) const;
is_const_char(expr * e)237     bool is_const_char(expr* e) const { unsigned c; return is_const_char(e, c); }
238     bool is_char_le(expr const* e) const;
is_char_is_digit(expr const * e,expr * & d)239     bool is_char_is_digit(expr const* e, expr*& d) const { return ch.is_is_digit(e, d); }
is_char_is_digit(expr const * e)240     bool is_char_is_digit(expr const* e) const { return ch.is_is_digit(e); }
241     bool is_char2int(expr const* e) const;
242     bool is_bv2char(expr const* e) const;
243     bool is_char2bv(expr const* e) const;
244     app* mk_char_bit(expr* e, unsigned i);
245     app* mk_char(unsigned ch) const;
mk_char_is_digit(expr * e)246     app* mk_char_is_digit(expr* e) { return ch.mk_is_digit(e); }
247     app* mk_le(expr* ch1, expr* ch2) const;
248     app* mk_lt(expr* ch1, expr* ch2) const;
mk_char2int(expr * e)249     app* mk_char2int(expr* e) { return ch.mk_to_int(e); }
max_char()250     unsigned max_char() const { return seq.max_char(); }
num_bits()251     unsigned num_bits() const { return seq.num_bits(); }
252 
253     app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range);
is_skolem(expr const * e)254     bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); }
255 
256     MATCH_BINARY(is_char_le);
257     MATCH_UNARY(is_char2int);
258     MATCH_UNARY(is_char2bv);
259     MATCH_UNARY(is_bv2char);
260 
has_re()261     bool has_re() const { return seq.has_re(); }
has_seq()262     bool has_seq() const { return seq.has_seq(); }
263 
264     class str {
265         seq_util&    u;
266         ast_manager& m;
267         family_id    m_fid;
268 
269 
270     public:
str(seq_util & u)271         str(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {}
272 
mk_seq(sort * s)273         sort* mk_seq(sort* s) const { parameter param(s); return m.mk_sort(m_fid, SEQ_SORT, 1, &param); }
mk_string_sort()274         sort* mk_string_sort() const { return m.mk_sort(m_fid, _STRING_SORT, 0, nullptr); }
mk_empty(sort * s)275         app* mk_empty(sort* s) const { return m.mk_const(m.mk_func_decl(m_fid, OP_SEQ_EMPTY, 0, nullptr, 0, (expr*const*)nullptr, s)); }
276         app* mk_string(zstring const& s) const;
277         app* mk_char(unsigned ch) const;
mk_concat(expr * a,expr * b)278         app* mk_concat(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONCAT, 2, es); }
mk_concat(expr * a,expr * b,expr * c)279         app* mk_concat(expr* a, expr* b, expr* c) const { return mk_concat(a, mk_concat(b, c)); }
mk_concat(unsigned n,expr * const * es,sort * s)280         expr* mk_concat(unsigned n, expr* const* es, sort* s) const {
281             if (n == 0) return mk_empty(s);
282             if (n == 1) return es[0];
283             return m.mk_app(m_fid, OP_SEQ_CONCAT, n, es); }
mk_concat(expr_ref_vector const & es,sort * s)284         expr* mk_concat(expr_ref_vector const& es, sort* s) const { return mk_concat(es.size(), es.data(), s); }
mk_length(expr * a)285         app* mk_length(expr* a) const { return m.mk_app(m_fid, OP_SEQ_LENGTH, 1, &a); }
mk_at(expr * s,expr * i)286         app* mk_at(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_AT, 2, es); }
mk_nth(expr * s,expr * i)287         app* mk_nth(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH, 2, es); }
mk_nth_i(expr * s,expr * i)288         app* mk_nth_i(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_I, 2, es); }
mk_nth_u(expr * s,expr * i)289         app* mk_nth_u(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_U, 2, es); }
290         app* mk_nth_c(expr* s, unsigned i) const;
291 
mk_substr(expr * a,expr * b,expr * c)292         app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
mk_contains(expr * a,expr * b)293         app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
mk_prefix(expr * a,expr * b)294         app* mk_prefix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_PREFIX, 2, es); }
mk_suffix(expr * a,expr * b)295         app* mk_suffix(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_SUFFIX, 2, es); }
mk_index(expr * a,expr * b,expr * i)296         app* mk_index(expr* a, expr* b, expr* i) const { expr* es[3] = { a, b, i}; return m.mk_app(m_fid, OP_SEQ_INDEX, 3, es); }
mk_last_index(expr * a,expr * b)297         app* mk_last_index(expr* a, expr* b) const { expr* es[2] = { a, b}; return m.mk_app(m_fid, OP_SEQ_LAST_INDEX, 2, es); }
mk_replace(expr * a,expr * b,expr * c)298         app* mk_replace(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c}; return m.mk_app(m_fid, OP_SEQ_REPLACE, 3, es); }
mk_unit(expr * u)299         app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); }
300         app* mk_char(zstring const& s, unsigned idx) const;
301         app* mk_char_bit(expr* e, unsigned i);
mk_itos(expr * i)302         app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); }
mk_stoi(expr * s)303         app* mk_stoi(expr* s) const { return m.mk_app(m_fid, OP_STRING_STOI, 1, &s); }
mk_ubv2s(expr * b)304         app* mk_ubv2s(expr* b) const { return m.mk_app(m_fid, OP_STRING_UBVTOS, 1, &b); }
mk_sbv2s(expr * b)305         app* mk_sbv2s(expr* b) const { return m.mk_app(m_fid, OP_STRING_SBVTOS, 1, &b); }
306         app* mk_is_empty(expr* s) const;
mk_lex_lt(expr * a,expr * b)307         app* mk_lex_lt(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_STRING_LT, 2, es); }
mk_lex_le(expr * a,expr * b)308         app* mk_lex_le(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_STRING_LE, 2, es); }
mk_to_code(expr * e)309         app* mk_to_code(expr* e) const { return m.mk_app(m_fid, OP_STRING_TO_CODE, 1, &e); }
mk_from_code(expr * e)310         app* mk_from_code(expr* e) const { return m.mk_app(m_fid, OP_STRING_FROM_CODE, 1, &e); }
mk_is_digit(expr * e)311         app* mk_is_digit(expr* e) const { return m.mk_app(m_fid, OP_STRING_IS_DIGIT, 1, &e); }
312 
313 
is_nth_i(func_decl const * f)314         bool is_nth_i(func_decl const* f)       const { return is_decl_of(f, m_fid, OP_SEQ_NTH_I); }
is_nth_u(func_decl const * f)315         bool is_nth_u(func_decl const* f)       const { return is_decl_of(f, m_fid, OP_SEQ_NTH_U); }
is_skolem(func_decl const * f)316         bool is_skolem(func_decl const* f)      const { return is_decl_of(f, m_fid, _OP_SEQ_SKOLEM); }
317 
is_string(expr const * n)318         bool is_string(expr const * n) const { return is_app_of(n, m_fid, OP_STRING_CONST); }
is_string(func_decl const * f)319         bool is_string(func_decl const* f) const { return is_decl_of(f, m_fid, OP_STRING_CONST); }
320         bool is_string(expr const* n, zstring& s) const;
321         bool is_string(func_decl const* f, zstring& s) const;
is_empty(expr const * n)322         bool is_empty(expr const* n) const {
323             zstring s;
324             return is_app_of(n, m_fid, OP_SEQ_EMPTY) || (is_string(n, s) && s.empty());
325         }
is_concat(expr const * n)326         bool is_concat(expr const* n)   const { return is_app_of(n, m_fid, OP_SEQ_CONCAT); }
is_length(expr const * n)327         bool is_length(expr const* n)   const { return is_app_of(n, m_fid, OP_SEQ_LENGTH); }
is_extract(expr const * n)328         bool is_extract(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); }
is_contains(expr const * n)329         bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); }
is_at(expr const * n)330         bool is_at(expr const* n)       const { return is_app_of(n, m_fid, OP_SEQ_AT); }
is_nth_i(expr const * n)331         bool is_nth_i(expr const* n)       const { return is_app_of(n, m_fid, OP_SEQ_NTH_I); }
is_nth_u(expr const * n)332         bool is_nth_u(expr const* n)       const { return is_app_of(n, m_fid, OP_SEQ_NTH_U); }
333         bool is_nth_i(expr const* n, expr*& s, unsigned& idx) const;
is_index(expr const * n)334         bool is_index(expr const* n)    const { return is_app_of(n, m_fid, OP_SEQ_INDEX); }
is_last_index(expr const * n)335         bool is_last_index(expr const* n)    const { return is_app_of(n, m_fid, OP_SEQ_LAST_INDEX); }
is_replace(expr const * n)336         bool is_replace(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_REPLACE); }
is_replace_re(expr const * n)337         bool is_replace_re(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE); }
is_replace_re_all(expr const * n)338         bool is_replace_re_all(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE_ALL); }
is_replace_all(expr const * n)339         bool is_replace_all(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_ALL); }
is_prefix(expr const * n)340         bool is_prefix(expr const* n)   const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); }
is_suffix(expr const * n)341         bool is_suffix(expr const* n)   const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); }
is_itos(expr const * n)342         bool is_itos(expr const* n)     const { return is_app_of(n, m_fid, OP_STRING_ITOS); }
is_stoi(expr const * n)343         bool is_stoi(expr const* n)     const { return is_app_of(n, m_fid, OP_STRING_STOI); }
is_ubv2s(expr const * n)344         bool is_ubv2s(expr const* n)    const { return is_app_of(n, m_fid, OP_STRING_UBVTOS); }
is_sbv2s(expr const * n)345         bool is_sbv2s(expr const* n)    const { return is_app_of(n, m_fid, OP_STRING_SBVTOS); }
is_in_re(expr const * n)346         bool is_in_re(expr const* n)    const { return is_app_of(n, m_fid, OP_SEQ_IN_RE); }
is_unit(expr const * n)347         bool is_unit(expr const* n)     const { return is_app_of(n, m_fid, OP_SEQ_UNIT); }
is_lt(expr const * n)348         bool is_lt(expr const* n)       const { return is_app_of(n, m_fid, OP_STRING_LT); }
is_le(expr const * n)349         bool is_le(expr const* n)       const { return is_app_of(n, m_fid, OP_STRING_LE); }
is_is_digit(expr const * n)350         bool is_is_digit(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_IS_DIGIT); }
is_from_code(expr const * n)351         bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); }
is_to_code(expr const * n)352         bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); }
353 
354         bool is_len_sub(expr const* n, expr*& l, expr*& u, rational& k) const;
355 
356         /*
357         tests if s is a single character string(c) or a unit (c)
358         */
359         bool is_unit_string(expr const* s, expr_ref& c) const;
360 
is_string_term(expr const * n)361         bool is_string_term(expr const * n) const {
362             return u.is_string(n->get_sort());
363         }
364 
is_non_string_sequence(expr const * n)365         bool is_non_string_sequence(expr const * n) const {
366             sort * s = n->get_sort();
367             return (u.is_seq(s) && !u.is_string(s));
368         }
369 
370         MATCH_BINARY(is_concat);
371         MATCH_UNARY(is_length);
372         MATCH_TERNARY(is_extract);
373         MATCH_BINARY(is_contains);
374         MATCH_BINARY(is_at);
375         MATCH_BINARY(is_nth_i);
376         MATCH_BINARY(is_nth_u);
377         MATCH_BINARY(is_index);
378         MATCH_TERNARY(is_index);
379         MATCH_BINARY(is_last_index);
380         MATCH_TERNARY(is_replace);
381         MATCH_TERNARY(is_replace_re);
382         MATCH_TERNARY(is_replace_re_all);
383         MATCH_TERNARY(is_replace_all);
384         MATCH_BINARY(is_prefix);
385         MATCH_BINARY(is_suffix);
386         MATCH_BINARY(is_lt);
387         MATCH_BINARY(is_le);
388         MATCH_UNARY(is_itos);
389         MATCH_UNARY(is_stoi);
390         MATCH_UNARY(is_ubv2s);
391         MATCH_UNARY(is_sbv2s);
392         MATCH_UNARY(is_is_digit);
393         MATCH_UNARY(is_from_code);
394         MATCH_UNARY(is_to_code);
395         MATCH_BINARY(is_in_re);
396         MATCH_UNARY(is_unit);
397 
398         void get_concat(expr* e, expr_ref_vector& es) const;
399         void get_concat_units(expr* e, expr_ref_vector& es) const;
get_leftmost_concat(expr * e)400         expr* get_leftmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e1; return e; }
get_rightmost_concat(expr * e)401         expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; }
402 
403         unsigned min_length(expr* s) const;
404         unsigned max_length(expr* s) const;
405     };
406 
407     class rex {
408     public:
409         struct info {
410             /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
411             lbool known { l_undef };
412             /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
413             bool classical { false };
414             /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
415             bool standard { false };
416             /* There are no uninterpreted symbols. */
417             bool interpreted { false };
418             /* No if-then-else is used. */
419             bool nonbranching { false };
420             /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
421             bool normalized { false };
422             /* All bounded loops have a body that is a singleton. */
423             bool monadic { false };
424             /* Positive Boolean combination of ranges or predicates or singleton sequences. */
425             bool singleton { false };
426             /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
427             lbool nullable { l_undef };
428             /* Lower bound  on the length of all accepted words. */
429             unsigned min_length { 0 };
430             /* Maximum nesting depth of Kleene stars. */
431             unsigned star_height { 0 };
432 
433             /*
434               Default constructor of invalid info.
435             */
infoinfo436             info() {}
437 
438             /*
439               Used for constructing either an invalid info that is only used to indicate uninitialzed entry, or valid but unknown info value.
440             */
infoinfo441             info(lbool is_known) : known(is_known) {}
442 
443             /*
444               General info constructor.
445             */
infoinfo446             info(bool is_classical,
447                 bool is_standard,
448                 bool is_interpreted,
449                 bool is_nonbranching,
450                 bool is_normalized,
451                 bool is_monadic,
452                 bool is_singleton,
453                 lbool is_nullable,
454                 unsigned min_l,
455                 unsigned star_h) :
456                 known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
457                 normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable),
458                 min_length(min_l), star_height(star_h) {}
459 
460             /*
461               Appends a string representation of the info into the stream.
462             */
463             std::ostream& display(std::ostream&) const;
464 
465             /*
466               Returns a string representation of the info.
467             */
468             std::string str() const;
469 
is_validinfo470             bool is_valid() const { return known != l_undef; }
471 
is_knowninfo472             bool is_known() const { return known == l_true; }
473 
474             info star() const;
475             info plus() const;
476             info opt() const;
477             info complement() const;
478             info concat(info const& rhs, bool lhs_is_concat) const;
479             info disj(info const& rhs) const;
480             info conj(info const& rhs) const;
481             info diff(info const& rhs) const;
482             info orelse(info const& rhs) const;
483             info loop(unsigned lower, unsigned upper) const;
484         };
485     private:
486         seq_util&    u;
487         ast_manager& m;
488         family_id    m_fid;
489         vector<info> mutable m_infos;
490         expr_ref_vector mutable m_info_pinned;
491         info invalid_info { info(l_undef) };
492         info unknown_info { info(l_false) };
493 
494         bool has_valid_info(expr* r) const;
495         info get_info_rec(expr* r) const;
496         info mk_info_rec(app* r) const;
497         info get_cached_info(expr* e) const;
498 
499     public:
rex(seq_util & u)500         rex(seq_util& u): u(u), m(u.m), m_fid(u.m_fid), m_info_pinned(u.m) {}
501 
mk_re(sort * seq)502         sort* mk_re(sort* seq) { parameter param(seq); return m.mk_sort(m_fid, RE_SORT, 1, &param); }
503         sort* to_seq(sort* re);
504 
mk_to_re(expr * s)505         app* mk_to_re(expr* s) { return m.mk_app(m_fid, OP_SEQ_TO_RE, 1, &s); }
mk_in_re(expr * s,expr * r)506         app* mk_in_re(expr* s, expr* r) { return m.mk_app(m_fid, OP_SEQ_IN_RE, s, r); }
mk_range(expr * s1,expr * s2)507         app* mk_range(expr* s1, expr* s2) { return m.mk_app(m_fid, OP_RE_RANGE, s1, s2); }
mk_concat(expr * r1,expr * r2)508         app* mk_concat(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_CONCAT, r1, r2); }
mk_union(expr * r1,expr * r2)509         app* mk_union(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_UNION, r1, r2); }
mk_inter(expr * r1,expr * r2)510         app* mk_inter(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_INTERSECT, r1, r2); }
mk_diff(expr * r1,expr * r2)511         app* mk_diff(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_DIFF, r1, r2); }
mk_complement(expr * r)512         app* mk_complement(expr* r) { return m.mk_app(m_fid, OP_RE_COMPLEMENT, r); }
mk_star(expr * r)513         app* mk_star(expr* r) { return m.mk_app(m_fid, OP_RE_STAR, r); }
mk_plus(expr * r)514         app* mk_plus(expr* r) { return m.mk_app(m_fid, OP_RE_PLUS, r); }
mk_opt(expr * r)515         app* mk_opt(expr* r) { return m.mk_app(m_fid, OP_RE_OPTION, r); }
516         app* mk_loop(expr* r, unsigned lo);
517         app* mk_loop(expr* r, unsigned lo, unsigned hi);
518         app* mk_loop(expr* r, expr* lo);
519         app* mk_loop(expr* r, expr* lo, expr* hi);
520         app* mk_full_char(sort* s);
521         app* mk_full_seq(sort* s);
522         app* mk_empty(sort* s);
523         app* mk_of_pred(expr* p);
mk_reverse(expr * r)524         app* mk_reverse(expr* r) { return m.mk_app(m_fid, OP_RE_REVERSE, r); }
mk_derivative(expr * ele,expr * r)525         app* mk_derivative(expr* ele, expr* r) { return m.mk_app(m_fid, OP_RE_DERIVATIVE, ele, r); }
mk_antimorov_union(expr * r1,expr * r2)526         app* mk_antimorov_union(expr* r1, expr* r2) { return m.mk_app(m_fid, _OP_RE_ANTIMOROV_UNION, r1, r2); }
527 
is_to_re(expr const * n)528         bool is_to_re(expr const* n)    const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
is_concat(expr const * n)529         bool is_concat(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
is_union(expr const * n)530         bool is_union(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_UNION); }
is_intersection(expr const * n)531         bool is_intersection(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
is_diff(expr const * n)532         bool is_diff(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_DIFF); }
is_complement(expr const * n)533         bool is_complement(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_COMPLEMENT); }
is_star(expr const * n)534         bool is_star(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_STAR); }
is_plus(expr const * n)535         bool is_plus(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_PLUS); }
is_opt(expr const * n)536         bool is_opt(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_OPTION); }
is_range(expr const * n)537         bool is_range(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_RANGE); }
is_loop(expr const * n)538         bool is_loop(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_LOOP); }
is_empty(expr const * n)539         bool is_empty(expr const* n)  const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); }
is_full_char(expr const * n)540         bool is_full_char(expr const* n)  const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); }
is_full_seq(expr const * n)541         bool is_full_seq(expr const* n)  const {
542             expr* s;
543             return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET) || (is_star(n, s) && is_full_char(s));
544         }
is_dot_plus(expr const * n)545         bool is_dot_plus(expr const* n)  const {
546             expr* s, * t;
547             if (is_plus(n, s) && is_full_char(s))
548                 return true;
549             if (is_concat(n, s, t)) {
550                 if ((is_full_char(s) && is_full_seq(t)) || (is_full_char(t) && is_full_seq(s)))
551                     return true;
552             }
553             return false;
554         }
is_of_pred(expr const * n)555         bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
is_reverse(expr const * n)556         bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
is_derivative(expr const * n)557         bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
is_antimorov_union(expr const * n)558         bool is_antimorov_union(expr const* n) const { return is_app_of(n, m_fid, _OP_RE_ANTIMOROV_UNION); }
559         MATCH_UNARY(is_to_re);
560         MATCH_BINARY(is_concat);
561         MATCH_BINARY(is_union);
562         MATCH_BINARY(is_intersection);
563         MATCH_BINARY(is_diff);
564         MATCH_BINARY(is_range);
565         MATCH_UNARY(is_complement);
566         MATCH_UNARY(is_star);
567         MATCH_UNARY(is_plus);
568         MATCH_UNARY(is_opt);
569         MATCH_UNARY(is_of_pred);
570         MATCH_UNARY(is_reverse);
571         MATCH_BINARY(is_derivative);
572         MATCH_BINARY(is_antimorov_union);
573         bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const;
574         bool is_loop(expr const* n, expr*& body, unsigned& lo) const;
575         bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const;
576         bool is_loop(expr const* n, expr*& body, expr*& lo) const;
577         unsigned min_length(expr* r) const;
578         unsigned max_length(expr* r) const;
579         bool is_epsilon(expr* r) const;
580         app* mk_epsilon(sort* seq_sort);
581         info get_info(expr* r) const;
582         std::string to_str(expr* r) const;
583         std::string to_strh(expr* r) const;
584 
mk_ite_simplify(expr * c,expr * t,expr * e)585         expr_ref mk_ite_simplify(expr* c, expr* t, expr* e)
586         {
587             expr_ref result(m);
588             if (m.is_true(c) || t == e)
589                 result = t;
590             else if (m.is_false(c))
591                 result = e;
592             else
593                 result = m.mk_ite(c, t, e);
594             return result;
595         }
596 
597         class pp {
598             seq_util::rex& re;
599             expr* ex;
600             bool html_encode;
601             bool can_skip_parenth(expr* r) const;
602             bool print_unit(std::ostream& out, expr* s) const;
603             bool print_seq(std::ostream& out, expr* s) const;
604             std::ostream& print_range(std::ostream& out, expr* s1, expr* s2) const;
605             std::ostream& print(std::ostream& out, expr* e) const;
606 
607         public:
pp(seq_util::rex & re,expr * ex,bool html)608             pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {}
609             std::ostream& display(std::ostream&) const;
610         };
611     };
612     str str;
613     rex  re;
614 
seq_util(ast_manager & m)615     seq_util(ast_manager& m):
616         m(m),
617         seq(*static_cast<seq_decl_plugin*>(m.get_plugin(m.mk_family_id("seq")))),
618         ch(seq.get_char_plugin()),
619         m_fid(seq.get_family_id()),
620         str(*this),
621         re(*this) {
622     }
623 
get_family_id()624     family_id get_family_id() const { return m_fid; }
625 };
626 
627 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::pp const & p) { return p.display(out); }
628 
629 inline std::ostream& operator<<(std::ostream& out, seq_util::rex::info const& p) { return p.display(out); }
630 
631