1 /*++
2 Copyright (c) 2011 Microsoft Corporation
3 
4 Module Name:
5 
6     seq_decl_plugin.h
7 
8 Abstract:
9 
10     decl_plugin for the theory of sequences
11 
12 Author:
13 
14     Nikolaj Bjorner (nbjorner) 2011-14-11
15 
16 Revision History:
17 
18 --*/
19 #include "ast/seq_decl_plugin.h"
20 #include "ast/arith_decl_plugin.h"
21 #include "ast/array_decl_plugin.h"
22 #include "ast/ast_pp.h"
23 #include <sstream>
24 
25 
seq_decl_plugin()26 seq_decl_plugin::seq_decl_plugin(): m_init(false),
27                                     m_stringc_sym("String"),
28                                     m_string(nullptr),
29                                     m_char(nullptr),
30                                     m_reglan(nullptr),
31                                     m_has_re(false),
32                                     m_has_seq(false) {
33 }
34 
finalize()35 void seq_decl_plugin::finalize() {
36     for (psig* s : m_sigs)
37         dealloc(s);
38     m_manager->dec_ref(m_string);
39     m_manager->dec_ref(m_char);
40     m_manager->dec_ref(m_reglan);
41 }
42 
is_sort_param(sort * s,unsigned & idx)43 bool seq_decl_plugin::is_sort_param(sort* s, unsigned& idx) {
44     return
45         s->get_name().is_numerical() &&
46         (idx = s->get_name().get_num(), true);
47 }
48 
match(ptr_vector<sort> & binding,sort * s,sort * sP)49 bool seq_decl_plugin::match(ptr_vector<sort>& binding, sort* s, sort* sP) {
50     if (s == sP) return true;
51     unsigned idx;
52     if (is_sort_param(sP, idx)) {
53         if (binding.size() <= idx) binding.resize(idx+1);
54         if (binding[idx] && (binding[idx] != s)) return false;
55         binding[idx] = s;
56         return true;
57     }
58 
59 
60     if (s->get_family_id() == sP->get_family_id() &&
61         s->get_decl_kind() == sP->get_decl_kind() &&
62         s->get_num_parameters() == sP->get_num_parameters()) {
63         for (unsigned i = 0, sz = s->get_num_parameters(); i < sz; ++i) {
64             parameter const& p = s->get_parameter(i);
65             if (p.is_ast() && is_sort(p.get_ast())) {
66                 parameter const& p2 = sP->get_parameter(i);
67                 if (!match(binding, to_sort(p.get_ast()), to_sort(p2.get_ast()))) return false;
68             }
69         }
70         return true;
71     }
72     else {
73         TRACE("seq", tout << "Could not match " << mk_pp(s, *m_manager) << " and " << mk_pp(sP, *m_manager) << "\n";);
74         return false;
75     }
76 }
77 
78 /*
79   \brief match right associative operator.
80 */
match_assoc(psig & sig,unsigned dsz,sort * const * dom,sort * range,sort_ref & range_out)81 void seq_decl_plugin::match_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
82     ptr_vector<sort> binding;
83     ast_manager& m = *m_manager;
84     if (dsz == 0) {
85         std::ostringstream strm;
86         strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
87         strm << "at least one argument expected " << dsz << " given";
88         m.raise_exception(strm.str());
89     }
90     bool is_match = true;
91     for (unsigned i = 0; is_match && i < dsz; ++i) {
92         SASSERT(dom[i]);
93         is_match = match(binding, dom[i], sig.m_dom.get(0));
94     }
95     if (range && is_match) {
96         is_match = match(binding, range, sig.m_range);
97     }
98     if (!is_match) {
99         std::ostringstream strm;
100         strm << "Sort of function '" << sig.m_name << "' ";
101         strm << "does not match the declared type. Given domain: ";
102         for (unsigned i = 0; i < dsz; ++i) {
103             strm << mk_pp(dom[i], m) << " ";
104         }
105         if (range) {
106             strm << " and range: " << mk_pp(range, m);
107         }
108         m.raise_exception(strm.str());
109     }
110     range_out = apply_binding(binding, sig.m_range);
111     SASSERT(range_out);
112 }
113 
match(psig & sig,unsigned dsz,sort * const * dom,sort * range,sort_ref & range_out)114 void seq_decl_plugin::match(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
115     m_binding.reset();
116     ast_manager& m = *m_manager;
117     if (sig.m_dom.size() != dsz) {
118         std::ostringstream strm;
119         strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
120         strm << sig.m_dom.size() << " arguments expected " << dsz << " given";
121         m.raise_exception(strm.str());
122     }
123     bool is_match = true;
124     for (unsigned i = 0; is_match && i < dsz; ++i) {
125         is_match = match(m_binding, dom[i], sig.m_dom[i].get());
126     }
127     if (range && is_match) {
128         is_match = match(m_binding, range, sig.m_range);
129     }
130     if (!is_match) {
131         std::ostringstream strm;
132         strm << "Sort of polymorphic function '" << sig.m_name << "' ";
133         strm << "does not match the declared type. ";
134         strm << "\nGiven domain: ";
135         for (unsigned i = 0; i < dsz; ++i) {
136             strm << mk_pp(dom[i], m) << " ";
137         }
138         if (range) {
139             strm << " and range: " << mk_pp(range, m);
140         }
141         strm << "\nExpected domain: ";
142         for (unsigned i = 0; i < dsz; ++i) {
143             strm << mk_pp(sig.m_dom[i].get(), m) << " ";
144         }
145 
146         m.raise_exception(strm.str());
147     }
148     if (!range && dsz == 0) {
149         std::ostringstream strm;
150         strm << "Sort of polymorphic function '" << sig.m_name << "' ";
151         strm << "is ambiguous. Function takes no arguments and sort of range has not been constrained";
152         m.raise_exception(strm.str());
153     }
154     range_out = apply_binding(m_binding, sig.m_range);
155     SASSERT(range_out);
156 }
157 
apply_binding(ptr_vector<sort> const & binding,sort * s)158 sort* seq_decl_plugin::apply_binding(ptr_vector<sort> const& binding, sort* s) {
159     unsigned i;
160     if (is_sort_param(s, i)) {
161         if (binding.size() <= i || !binding[i]) {
162             m_manager->raise_exception("Expecting type parameter to be bound");
163         }
164         return binding[i];
165     }
166     if (is_sort_of(s, m_family_id, SEQ_SORT) || is_sort_of(s, m_family_id, RE_SORT)) {
167         SASSERT(s->get_num_parameters() == 1);
168         SASSERT(s->get_parameter(0).is_ast());
169         SASSERT(is_sort(s->get_parameter(0).get_ast()));
170         sort* p = apply_binding(binding, to_sort(s->get_parameter(0).get_ast()));
171         parameter param(p);
172         if (p == m_char && s->get_decl_kind() == SEQ_SORT)
173             return m_string;
174         if (p == m_string && s->get_decl_kind() == RE_SORT)
175             return mk_reglan();
176         return mk_sort(s->get_decl_kind(), 1, &param);
177     }
178     return s;
179 }
180 
181 
init()182 void seq_decl_plugin::init() {
183     if (m_init) return;
184     ast_manager& m = *m_manager;
185     m_init = true;
186     sort* A = m.mk_uninterpreted_sort(symbol(0u));
187     sort* strT = m_string;
188     parameter paramA(A);
189     parameter paramS(strT);
190     sort* seqA = m.mk_sort(m_family_id, SEQ_SORT, 1, &paramA);
191     parameter paramSA(seqA);
192     sort* reA  = m.mk_sort(m_family_id, RE_SORT, 1, &paramSA);
193     sort* reT  = m.mk_sort(m_family_id, RE_SORT, 1, &paramS);
194     sort* boolT = m.mk_bool_sort();
195     sort* intT  = arith_util(m).mk_int();
196     sort* predA = array_util(m).mk_array_sort(A, boolT);
197     sort* seqAseqAseqA[3] = { seqA, seqA, seqA };
198     sort* seqAreAseqA[3] = { seqA, reA, seqA };
199     sort* seqAseqA[2] = { seqA, seqA };
200     sort* seqAreA[2] = { seqA, reA };
201     sort* reAreA[2] = { reA, reA };
202     sort* AreA[2] = { A, reA };
203     sort* seqAint2T[3] = { seqA, intT, intT };
204     sort* seq2AintT[3] = { seqA, seqA, intT };
205     sort* str2T[2] = { strT, strT };
206     sort* str3T[3] = { strT, strT, strT };
207     sort* strTint2T[3] = { strT, intT, intT };
208     sort* strTreT[2] = { strT, reT };
209     sort* str2TintT[3] = { strT, strT, intT };
210     sort* seqAintT[2] = { seqA, intT };
211     sort* seq3A[3] = { seqA, seqA, seqA };
212     m_sigs.resize(LAST_SEQ_OP);
213     // TBD: have (par ..) construct and load parameterized signature from premable.
214     m_sigs[OP_SEQ_UNIT]      = alloc(psig, m, "seq.unit",     1, 1, &A, seqA);
215     m_sigs[OP_SEQ_EMPTY]     = alloc(psig, m, "seq.empty",    1, 0, nullptr, seqA);
216     m_sigs[OP_SEQ_CONCAT]    = alloc(psig, m, "seq.++",       1, 2, seqAseqA, seqA);
217     m_sigs[OP_SEQ_PREFIX]    = alloc(psig, m, "seq.prefixof", 1, 2, seqAseqA, boolT);
218     m_sigs[OP_SEQ_SUFFIX]    = alloc(psig, m, "seq.suffixof", 1, 2, seqAseqA, boolT);
219     m_sigs[OP_SEQ_CONTAINS]  = alloc(psig, m, "seq.contains", 1, 2, seqAseqA, boolT);
220     m_sigs[OP_SEQ_EXTRACT]   = alloc(psig, m, "seq.extract",  1, 3, seqAint2T, seqA);
221     m_sigs[OP_SEQ_REPLACE]   = alloc(psig, m, "seq.replace",  1, 3, seq3A, seqA);
222     m_sigs[OP_SEQ_INDEX]     = alloc(psig, m, "seq.indexof",  1, 3, seq2AintT, intT);
223     m_sigs[OP_SEQ_LAST_INDEX] = alloc(psig, m, "seq.last_indexof",  1, 2, seqAseqA, intT);
224     m_sigs[OP_SEQ_AT]        = alloc(psig, m, "seq.at",       1, 2, seqAintT, seqA);
225     m_sigs[OP_SEQ_NTH]       = alloc(psig, m, "seq.nth",      1, 2, seqAintT, A);
226     m_sigs[OP_SEQ_NTH_I]     = alloc(psig, m, "seq.nth_i",    1, 2, seqAintT, A);
227     m_sigs[OP_SEQ_NTH_U]     = alloc(psig, m, "seq.nth_u",    1, 2, seqAintT, A);
228     m_sigs[OP_SEQ_LENGTH]    = alloc(psig, m, "seq.len",      1, 1, &seqA, intT);
229     m_sigs[OP_RE_PLUS]       = alloc(psig, m, "re.+",         1, 1, &reA, reA);
230     m_sigs[OP_RE_STAR]       = alloc(psig, m, "re.*",         1, 1, &reA, reA);
231     m_sigs[OP_RE_OPTION]     = alloc(psig, m, "re.opt",       1, 1, &reA, reA);
232     m_sigs[OP_RE_RANGE]      = alloc(psig, m, "re.range",     1, 2, seqAseqA,  reA);
233     m_sigs[OP_RE_CONCAT]     = alloc(psig, m, "re.++",        1, 2, reAreA, reA);
234     m_sigs[OP_RE_UNION]      = alloc(psig, m, "re.union",     1, 2, reAreA, reA);
235     m_sigs[OP_RE_INTERSECT]  = alloc(psig, m, "re.inter",     1, 2, reAreA, reA);
236     m_sigs[OP_RE_DIFF]       = alloc(psig, m, "re.diff",      1, 2, reAreA, reA);
237     m_sigs[OP_RE_LOOP]           = alloc(psig, m, "re.loop",    1, 1, &reA, reA);
238     m_sigs[OP_RE_POWER]          = alloc(psig, m, "re.^", 1, 1, &reA, reA);
239     m_sigs[OP_RE_COMPLEMENT]     = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
240     m_sigs[OP_RE_EMPTY_SET]      = alloc(psig, m, "re.empty", 1, 0, nullptr, reA);
241     m_sigs[OP_RE_FULL_SEQ_SET]   = alloc(psig, m, "re.all", 1, 0, nullptr, reA);
242     m_sigs[OP_RE_FULL_CHAR_SET]  = alloc(psig, m, "re.allchar", 1, 0, nullptr, reA);
243     m_sigs[OP_RE_OF_PRED]        = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
244     m_sigs[OP_RE_REVERSE]        = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
245     m_sigs[OP_RE_DERIVATIVE]     = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
246     m_sigs[_OP_RE_ANTIMOROV_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
247     m_sigs[OP_SEQ_TO_RE]         = alloc(psig, m, "seq.to.re",  1, 1, &seqA, reA);
248     m_sigs[OP_SEQ_IN_RE]         = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
249     m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
250     m_sigs[OP_SEQ_REPLACE_RE]    = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA);
251     m_sigs[OP_SEQ_REPLACE_ALL]   = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA);
252     m_sigs[OP_STRING_CONST]      = nullptr;
253     m_sigs[_OP_STRING_STRIDOF]   = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT);
254     m_sigs[_OP_STRING_STRREPL]   = alloc(psig, m, "str.replace", 0, 3, str3T, strT);
255     m_sigs[_OP_STRING_FROM_CHAR] = alloc(psig, m, "char", 1, 0, nullptr, strT);
256     m_sigs[OP_STRING_ITOS]       = alloc(psig, m, "str.from_int", 0, 1, &intT, strT);
257     m_sigs[OP_STRING_STOI]       = alloc(psig, m, "str.to_int", 0, 1, &strT, intT);
258     m_sigs[OP_STRING_LT]         = alloc(psig, m, "str.<", 0, 2, str2T, boolT);
259     m_sigs[OP_STRING_LE]         = alloc(psig, m, "str.<=", 0, 2, str2T, boolT);
260     m_sigs[OP_STRING_IS_DIGIT]   = alloc(psig, m, "str.is_digit", 0, 1, &strT, boolT);
261     m_sigs[OP_STRING_TO_CODE]    = alloc(psig, m, "str.to_code", 0, 1, &strT, intT);
262     m_sigs[OP_STRING_FROM_CODE]  = alloc(psig, m, "str.from_code", 0, 1, &intT, strT);
263     m_sigs[_OP_STRING_CONCAT]    = alloc(psig, m, "str.++", 1, 2, str2T, strT);
264     m_sigs[_OP_STRING_LENGTH]    = alloc(psig, m, "str.len", 0, 1, &strT, intT);
265     m_sigs[_OP_STRING_STRCTN]    = alloc(psig, m, "str.contains", 0, 2, str2T, boolT);
266     m_sigs[_OP_STRING_CHARAT]    = alloc(psig, m, "str.at", 0, 2, strTint2T, strT);
267     m_sigs[_OP_STRING_PREFIX]    = alloc(psig, m, "str.prefixof", 0, 2, str2T, boolT);
268     m_sigs[_OP_STRING_SUFFIX]    = alloc(psig, m, "str.suffixof", 0, 2, str2T, boolT);
269     m_sigs[_OP_STRING_IN_REGEXP]  = alloc(psig, m, "str.in_re", 0, 2, strTreT, boolT);
270     m_sigs[_OP_STRING_TO_REGEXP]  = alloc(psig, m, "str.to_re", 0, 1, &strT, reT);
271     m_sigs[_OP_REGEXP_EMPTY]      = alloc(psig, m, "re.none", 0, 0, nullptr, reT);
272     m_sigs[_OP_REGEXP_FULL_CHAR]  = alloc(psig, m, "re.allchar", 0, 0, nullptr, reT);
273     m_sigs[_OP_STRING_SUBSTR]     = alloc(psig, m, "str.substr", 0, 3, strTint2T, strT);
274 }
275 
mk_reglan()276 sort* seq_decl_plugin::mk_reglan() {
277     if (!m_reglan) {
278         ast_manager& m = *m_manager;
279         parameter paramS(m_string);
280         m_reglan = m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, 1, &paramS));
281         m.inc_ref(m_reglan);
282     }
283     return m_reglan;
284 }
285 
set_manager(ast_manager * m,family_id id)286 void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
287     decl_plugin::set_manager(m, id);
288     m_char_plugin = static_cast<char_decl_plugin*>(m_manager->get_plugin(m_manager->mk_family_id("char")));
289     m_char = get_char_plugin().char_sort();
290     m->inc_ref(m_char);
291     parameter param(m_char);
292     m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, &param));
293     m->inc_ref(m_string);
294 }
295 
mk_sort(decl_kind k,unsigned num_parameters,parameter const * parameters)296 sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) {
297     init();
298     ast_manager& m = *m_manager;
299     switch (k) {
300     case SEQ_SORT:
301         if (num_parameters != 1) {
302             m.raise_exception("Invalid sequence sort, expecting one parameter");
303         }
304         if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
305             m.raise_exception("invalid sequence sort, parameter is not a sort");
306         }
307         if (parameters[0].get_ast() == m_char) {
308             return m_string;
309         }
310         return m.mk_sort(symbol("Seq"), sort_info(m_family_id, SEQ_SORT, num_parameters, parameters));
311     case RE_SORT: {
312         if (num_parameters != 1) {
313             m.raise_exception("Invalid regex sort, expecting one parameter");
314         }
315         if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
316             m.raise_exception("invalid regex sort, parameter is not a sort");
317         }
318         return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters));
319     }
320     case _STRING_SORT:
321         return m_string;
322     case _REGLAN_SORT:
323         return mk_reglan();
324     default:
325         UNREACHABLE();
326         return nullptr;
327     }
328 }
329 
mk_seq_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_string)330 func_decl* seq_decl_plugin::mk_seq_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string) {
331     ast_manager& m = *m_manager;
332     sort_ref rng(m);
333     match(*m_sigs[k], arity, domain, range, rng);
334     return m.mk_func_decl(m_sigs[(domain[0] == m_string)?k_string:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
335 }
336 
337 
mk_str_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq)338 func_decl* seq_decl_plugin::mk_str_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq) {
339     ast_manager& m = *m_manager;
340     sort_ref rng(m);
341     match(*m_sigs[k], arity, domain, range, rng);
342     return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k_seq));
343 }
344 
mk_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string)345 func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
346     return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, true);
347 }
348 
mk_left_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string)349 func_decl* seq_decl_plugin::mk_left_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
350     return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, false);
351 }
352 
mk_ubv2s(unsigned arity,sort * const * domain) const353 func_decl* seq_decl_plugin::mk_ubv2s(unsigned arity, sort* const* domain) const {
354     ast_manager& m = *m_manager;
355     if (arity != 1)
356         m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
357     bv_util bv(m);
358     if (!bv.is_bv_sort(domain[0]))
359         m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
360     sort* rng = m_string;
361     return m.mk_func_decl(symbol("str.from_ubv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_UBVTOS));
362 }
363 
mk_sbv2s(unsigned arity,sort * const * domain) const364 func_decl* seq_decl_plugin::mk_sbv2s(unsigned arity, sort* const* domain) const {
365     ast_manager &m = *m_manager;
366     if (arity != 1)
367         m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
368     bv_util bv(m);
369     if (!bv.is_bv_sort(domain[0]))
370         m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
371     sort *rng = m_string;
372     return m.mk_func_decl(symbol("str.from_sbv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_SBVTOS));
373 }
374 
mk_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string,bool is_right)375 func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string, bool is_right) {
376     ast_manager& m = *m_manager;
377     sort_ref rng(m);
378     if (arity == 0) {
379         m.raise_exception("Invalid function application. At least one argument expected");
380     }
381     match_assoc(*m_sigs[k], arity, domain, range, rng);
382     func_decl_info info(m_family_id, k_seq);
383     if (is_right)
384         info.set_right_associative(true);
385     info.set_left_associative(true);
386     return m.mk_func_decl(m_sigs[(rng == m_string)?k_string:k_seq]->m_name, rng, rng, rng, info);
387 }
388 
389 
mk_func_decl(decl_kind k,unsigned num_parameters,parameter const * parameters,unsigned arity,sort * const * domain,sort * range)390 func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
391                                           unsigned arity, sort * const * domain, sort * range) {
392     init();
393     m_has_seq = true;
394     ast_manager& m = *m_manager;
395     sort_ref rng(m);
396     switch(k) {
397     case OP_SEQ_EMPTY:
398         match(*m_sigs[k], arity, domain, range, rng);
399         if (rng == m_string) {
400             parameter param(zstring(""));
401             return mk_func_decl(OP_STRING_CONST, 1, &param, 0, nullptr, m_string);
402         }
403         else {
404             parameter param(rng.get());
405             func_decl_info info(m_family_id, k, 1, &param);
406             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, info);
407         }
408 
409     case OP_RE_PLUS:
410     case OP_RE_STAR:
411     case OP_RE_OPTION:
412     case OP_RE_RANGE:
413     case OP_RE_OF_PRED:
414     case OP_RE_COMPLEMENT:
415     case OP_RE_REVERSE:
416     case OP_RE_DERIVATIVE:
417     case _OP_RE_ANTIMOROV_UNION:
418         m_has_re = true;
419         // fall-through
420     case OP_SEQ_UNIT:
421     case OP_STRING_ITOS:
422     case OP_STRING_STOI:
423     case OP_STRING_LT:
424     case OP_STRING_LE:
425     case OP_STRING_IS_DIGIT:
426     case OP_STRING_TO_CODE:
427     case OP_STRING_FROM_CODE:
428         match(*m_sigs[k], arity, domain, range, rng);
429         return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
430 
431     case OP_STRING_UBVTOS:
432         return mk_ubv2s(arity, domain);
433 
434     case OP_STRING_SBVTOS:
435         return mk_sbv2s(arity, domain);
436 
437     case _OP_REGEXP_FULL_CHAR:
438         m_has_re = true;
439         if (!range) range = mk_reglan();
440         match(*m_sigs[k], arity, domain, range, rng);
441         return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_CHAR_SET));
442 
443     case OP_RE_FULL_CHAR_SET:
444         m_has_re = true;
445         if (!range) range = mk_reglan();
446         if (range == mk_reglan()) {
447             match(*m_sigs[k], arity, domain, range, rng);
448             return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, k));
449         }
450         return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
451 
452     case OP_RE_FULL_SEQ_SET:
453         m_has_re = true;
454         if (!range) range = mk_reglan();
455         return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
456 
457     case _OP_REGEXP_EMPTY:
458         m_has_re = true;
459         if (!range) range = mk_reglan();
460         match(*m_sigs[k], arity, domain, range, rng);
461         return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET));
462 
463     case OP_RE_EMPTY_SET:
464         m_has_re = true;
465         if (!range) range = mk_reglan();
466         if (range == mk_reglan()) {
467             match(*m_sigs[k], arity, domain, range, rng);
468             return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, k));
469         }
470         return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
471 
472     case OP_RE_LOOP:
473         m_has_re = true;
474         switch (arity) {
475         case 1:
476             match(*m_sigs[k], arity, domain, range, rng);
477             if (num_parameters == 0 || num_parameters > 2 || !parameters[0].is_int() || (num_parameters == 2 && !parameters[1].is_int())) {
478                 m.raise_exception("Expecting two numeral parameters to function re-loop");
479             }
480             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
481         case 2:
482             if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1])) {
483                 m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
484             }
485             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
486         case 3:
487             if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1]) || !arith_util(m).is_int(domain[2])) {
488                 m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
489             }
490             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
491         default:
492             m.raise_exception("Incorrect number of arguments passed to loop. Expected 1 regular expression and two integer parameters");
493         }
494     case OP_RE_POWER:
495         m_has_re = true;
496         if (num_parameters == 1 && parameters[0].is_int() && arity == 1 && parameters[0].get_int() >= 0) {
497             rng = domain[0];
498             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
499         }
500         m.raise_exception("Incorrect arguments used for re.^. Expected one non-negative integer parameter");
501 
502     case OP_STRING_CONST:
503         if (!(num_parameters == 1 && arity == 0 && parameters[0].is_zstring())) {
504             m.raise_exception("invalid string declaration");
505         }
506         return m.mk_const_decl(m_stringc_sym, m_string,
507                                func_decl_info(m_family_id, OP_STRING_CONST, num_parameters, parameters));
508 
509     case OP_RE_UNION:
510     case OP_RE_CONCAT:
511     case OP_RE_INTERSECT:
512     case OP_RE_DIFF:
513         m_has_re = true;
514         return mk_left_assoc_fun(k, arity, domain, range, k, k);
515 
516     case OP_SEQ_REPLACE_RE_ALL:
517     case OP_SEQ_REPLACE_RE:
518         m_has_re = true;
519     case OP_SEQ_REPLACE_ALL:
520         return mk_str_fun(k, arity, domain, range, k);
521 
522     case OP_SEQ_CONCAT:
523         return mk_assoc_fun(k, arity, domain, range, k, _OP_STRING_CONCAT);
524 
525     case _OP_STRING_CONCAT:
526         return mk_assoc_fun(k, arity, domain, range, OP_SEQ_CONCAT, k);
527 
528     case _OP_STRING_FROM_CHAR: {
529         if (!(num_parameters == 1 && parameters[0].is_int()))
530             m.raise_exception("character literal expects integer parameter");
531         zstring zs(parameters[0].get_int());
532         parameter p(zs);
533         return m.mk_const_decl(m_stringc_sym, m_string,func_decl_info(m_family_id, OP_STRING_CONST, 1, &p));
534     }
535 
536     case OP_SEQ_REPLACE:
537         return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRREPL);
538     case _OP_STRING_STRREPL:
539         return mk_str_fun(k, arity, domain, range, OP_SEQ_REPLACE);
540 
541     case OP_SEQ_INDEX:
542         if (arity == 2) {
543             sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
544             sort_ref rng(m);
545             match(*m_sigs[k], 3, dom, range, rng);
546             return m.mk_func_decl(m_sigs[(dom[0] == m_string)?_OP_STRING_STRIDOF:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
547         }
548         return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRIDOF);
549     case _OP_STRING_STRIDOF:
550         if (arity == 2) {
551             sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
552             sort_ref rng(m);
553             match(*m_sigs[k], 3, dom, range, rng);
554             return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, OP_SEQ_INDEX));
555         }
556         return mk_str_fun(k, arity, domain, range, OP_SEQ_INDEX);
557     case OP_SEQ_LAST_INDEX:
558         if (arity != 2) {
559             m.raise_exception("two arguments expected tin last_indexof");
560         }
561         else {
562             return mk_seq_fun(k, arity, domain, range, OP_SEQ_LAST_INDEX);
563         }
564     case OP_SEQ_PREFIX:
565         return mk_seq_fun(k, arity, domain, range, _OP_STRING_PREFIX);
566     case _OP_STRING_PREFIX:
567         return mk_str_fun(k, arity, domain, range, OP_SEQ_PREFIX);
568 
569     case OP_SEQ_SUFFIX:
570         return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUFFIX);
571     case _OP_STRING_SUFFIX:
572         return mk_str_fun(k, arity, domain, range, OP_SEQ_SUFFIX);
573 
574     case OP_SEQ_LENGTH:
575         return mk_seq_fun(k, arity, domain, range, _OP_STRING_LENGTH);
576     case _OP_STRING_LENGTH:
577         return mk_str_fun(k, arity, domain, range, OP_SEQ_LENGTH);
578 
579     case OP_SEQ_CONTAINS:
580         return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRCTN);
581     case _OP_STRING_STRCTN:
582         return mk_str_fun(k, arity, domain, range, OP_SEQ_CONTAINS);
583 
584     case OP_SEQ_TO_RE:
585         m_has_re = true;
586         return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
587     case _OP_STRING_TO_REGEXP:
588         m_has_re = true;
589         return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);
590 
591     case OP_SEQ_IN_RE:
592         m_has_re = true;
593         return mk_seq_fun(k, arity, domain, range, _OP_STRING_IN_REGEXP);
594     case _OP_STRING_IN_REGEXP:
595         m_has_re = true;
596         return mk_str_fun(k, arity, domain, range, OP_SEQ_IN_RE);
597 
598     case OP_SEQ_AT:
599         return mk_seq_fun(k, arity, domain, range, _OP_STRING_CHARAT);
600     case _OP_STRING_CHARAT:
601         return mk_str_fun(k, arity, domain, range, OP_SEQ_AT);
602 
603     case OP_SEQ_NTH:
604     case OP_SEQ_NTH_I:
605     case OP_SEQ_NTH_U:
606         match(*m_sigs[k], arity, domain, range, rng);
607         return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
608 
609     case OP_SEQ_EXTRACT:
610         return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUBSTR);
611     case _OP_STRING_SUBSTR:
612         return mk_str_fun(k, arity, domain, range, OP_SEQ_EXTRACT);
613 
614     case _OP_SEQ_SKOLEM: {
615         if (num_parameters == 0 || !parameters[0].is_symbol()) {
616             m.raise_exception("first parameter to skolem symbol should be a parameter");
617         }
618         symbol s = parameters[0].get_symbol();
619         return m.mk_func_decl(s, arity, domain, range, func_decl_info(m_family_id, k, num_parameters, parameters));
620     }
621     default:
622         UNREACHABLE();
623         return nullptr;
624     }
625 }
626 
get_op_names(svector<builtin_name> & op_names,symbol const & logic)627 void seq_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol const & logic) {
628     init();
629     for (unsigned i = 0; i < m_sigs.size(); ++i) {
630         if (m_sigs[i]) {
631             op_names.push_back(builtin_name(m_sigs[i]->m_name.str(), i));
632         }
633     }
634     op_names.push_back(builtin_name("str.in.re", _OP_STRING_IN_REGEXP));
635     op_names.push_back(builtin_name("str.in-re", _OP_STRING_IN_REGEXP));
636     op_names.push_back(builtin_name("str.to.re", _OP_STRING_TO_REGEXP));
637     op_names.push_back(builtin_name("str.to-re", _OP_STRING_TO_REGEXP));
638     op_names.push_back(builtin_name("str.to-int", OP_STRING_STOI));
639     op_names.push_back(builtin_name("str.to.int", OP_STRING_STOI));
640     op_names.push_back(builtin_name("str.from-int", OP_STRING_ITOS));
641     op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
642     op_names.push_back(builtin_name("re.nostr",  _OP_REGEXP_EMPTY));
643     op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
644     op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
645     op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
646 }
647 
get_sort_names(svector<builtin_name> & sort_names,symbol const & logic)648 void seq_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) {
649     init();
650     sort_names.push_back(builtin_name("Seq",   SEQ_SORT));
651     sort_names.push_back(builtin_name("RegEx", RE_SORT));
652 
653     // SMTLIB 2.6 RegLan, String
654     sort_names.push_back(builtin_name("RegLan", _REGLAN_SORT));
655     sort_names.push_back(builtin_name("String", _STRING_SORT));
656 
657     // SMTLIB 2.5 compatibility
658     sort_names.push_back(builtin_name("StringSequence", _STRING_SORT));
659 }
660 
mk_string(zstring const & s)661 app* seq_decl_plugin::mk_string(zstring const& s) {
662     parameter param(s);
663     func_decl* f = m_manager->mk_const_decl(m_stringc_sym, m_string,
664                                             func_decl_info(m_family_id, OP_STRING_CONST, 1, &param));
665     return m_manager->mk_const(f);
666 }
667 
mk_char(unsigned u)668 app* seq_decl_plugin::mk_char(unsigned u) {
669     return get_char_plugin().mk_char(u);
670 }
671 
is_considered_uninterpreted(func_decl * f)672 bool seq_decl_plugin::is_considered_uninterpreted(func_decl * f) {
673     seq_util util(*m_manager);
674     return util.str.is_nth_u(f);
675 }
676 
is_unique_value(app * e) const677 bool seq_decl_plugin::is_unique_value(app* e) const {
678     return false;
679 }
680 
is_value(app * e) const681 bool seq_decl_plugin::is_value(app* e) const {
682     while (true) {
683         if (is_app_of(e, m_family_id, OP_SEQ_EMPTY))
684             return true;
685         if (is_app_of(e, m_family_id, OP_STRING_CONST))
686             return true;
687         if (is_app_of(e, m_family_id, OP_SEQ_UNIT) &&
688             m_manager->is_value(e->get_arg(0)))
689             return true;
690         if (is_app_of(e, m_family_id, OP_SEQ_CONCAT)) {
691             bool first = true;
692             for (expr* arg : *e) {
693                 if (first) {
694                     first = false;
695                 }
696                 else if (is_app(arg) && !is_value(to_app(arg))) {
697                     return false;
698                 }
699             }
700             if (!is_app(e->get_arg(0))) return false;
701             e = to_app(e->get_arg(0));
702             continue;
703         }
704         return false;
705     }
706 }
707 
are_equal(app * a,app * b) const708 bool seq_decl_plugin::are_equal(app* a, app* b) const {
709     if (a == b) return true;
710     // handle concatenations
711     return false;
712 }
713 
are_distinct(app * a,app * b) const714 bool seq_decl_plugin::are_distinct(app* a, app* b) const {
715     if (a == b)
716         return false;
717     if (is_app_of(a, m_family_id, OP_STRING_CONST) &&
718         is_app_of(b, m_family_id, OP_STRING_CONST))
719         return true;
720     if (is_app_of(a, m_family_id, OP_SEQ_UNIT) &&
721         is_app_of(b, m_family_id, OP_SEQ_UNIT))
722         return m_manager->are_distinct(a->get_arg(0), b->get_arg(0));
723     if (is_app_of(a, m_family_id, OP_SEQ_EMPTY) &&
724         is_app_of(b, m_family_id, OP_SEQ_UNIT))
725         return true;
726     if (is_app_of(b, m_family_id, OP_SEQ_EMPTY) &&
727         is_app_of(a, m_family_id, OP_SEQ_UNIT))
728         return true;
729     return false;
730 }
731 
732 
get_some_value(sort * s)733 expr* seq_decl_plugin::get_some_value(sort* s) {
734     seq_util util(*m_manager);
735     if (util.is_seq(s)) {
736         return util.str.mk_empty(s);
737     }
738     sort* seq;
739     if (util.is_re(s, seq)) {
740         return util.re.mk_to_re(util.str.mk_empty(seq));
741     }
742     UNREACHABLE();
743     return nullptr;
744 }
745 
746 
mk_skolem(symbol const & name,unsigned n,expr * const * args,sort * range)747 app* seq_util::mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range) {
748     SASSERT(range);
749     parameter param(name);
750     func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 1, &param, n, args, range);
751     return m.mk_app(f, n, args);
752 }
753 
mk_string(zstring const & s) const754 app* seq_util::str::mk_string(zstring const& s) const {
755     return u.seq.mk_string(s);
756 }
757 
mk_char(zstring const & s,unsigned idx) const758 app* seq_util::str::mk_char(zstring const& s, unsigned idx) const {
759     return u.mk_char(s[idx]);
760 }
761 
mk_char(unsigned ch) const762 app* seq_util::str::mk_char(unsigned ch) const {
763     return u.mk_char(ch);
764 }
765 
mk_char_bit(expr * e,unsigned idx)766 app* seq_util::str::mk_char_bit(expr* e, unsigned idx) {
767     return u.mk_char_bit(e, idx);
768 }
769 
mk_char_bit(expr * e,unsigned i)770 app* seq_util::mk_char_bit(expr* e, unsigned i) {
771     parameter params[2] = { parameter(symbol("char.bit")), parameter(i) };
772     sort* range = m.mk_bool_sort();
773     func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 2, params, 1, &e, range);
774     return m.mk_app(f, 1, &e);
775 }
776 
max_plus(unsigned x,unsigned y) const777 unsigned seq_util::max_plus(unsigned x, unsigned y) const {
778     if (x + y < x || x + y < y)
779         return UINT_MAX;
780     return x + y;
781 }
782 
max_mul(unsigned x,unsigned y) const783 unsigned seq_util::max_mul(unsigned x, unsigned y) const {
784     uint64_t r = ((uint64_t)x)*((uint64_t)y);
785     return (r > UINT_MAX) ? UINT_MAX : (unsigned)r;
786 }
787 
788 
is_const_char(expr * e,unsigned & c) const789 bool seq_util::is_const_char(expr* e, unsigned& c) const {
790     return ch.is_const_char(e, c);
791 }
792 
is_char_le(expr const * e) const793 bool seq_util::is_char_le(expr const* e) const {
794     return ch.is_le(e);
795 }
796 
is_char2int(expr const * e) const797 bool seq_util::is_char2int(expr const* e) const {
798     return ch.is_to_int(e);
799 }
800 
is_bv2char(expr const * e) const801 bool seq_util::is_bv2char(expr const* e) const {
802     return ch.is_bv2char(e);
803 }
804 
is_char2bv(expr const * e) const805 bool seq_util::is_char2bv(expr const* e) const {
806     return ch.is_char2bv(e);
807 }
808 
809 
mk_char(unsigned ch) const810 app* seq_util::mk_char(unsigned ch) const {
811     return seq.mk_char(ch);
812 }
813 
mk_le(expr * ch1,expr * ch2) const814 app* seq_util::mk_le(expr* ch1, expr* ch2) const {
815     return ch.mk_le(ch1, ch2);
816 }
817 
mk_lt(expr * ch1,expr * ch2) const818 app* seq_util::mk_lt(expr* ch1, expr* ch2) const {
819     return m.mk_not(mk_le(ch2, ch1));
820 }
821 
is_string(func_decl const * f,zstring & s) const822 bool seq_util::str::is_string(func_decl const* f, zstring& s) const {
823     if (is_string(f)) {
824         s = f->get_parameter(0).get_zstring();
825         return true;
826     }
827     else {
828         return false;
829     }
830 }
831 
is_string(expr const * n,zstring & s) const832 bool seq_util::str::is_string(expr const* n, zstring& s) const {
833     return is_app(n) && is_string(to_app(n)->get_decl(), s);
834 }
835 
is_nth_i(expr const * n,expr * & s,unsigned & idx) const836 bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const {
837     expr* i = nullptr;
838     if (!is_nth_i(n, s, i)) return false;
839     return arith_util(m).is_unsigned(i, idx);
840 }
841 
mk_nth_c(expr * s,unsigned i) const842 app* seq_util::str::mk_nth_c(expr* s, unsigned i) const {
843     return mk_nth_i(s, arith_util(m).mk_int(i));
844 }
845 
get_concat(expr * e,expr_ref_vector & es) const846 void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const {
847     expr* e1, *e2;
848     while (is_concat(e, e1, e2)) {
849         get_concat(e1, es);
850         e = e2;
851     }
852     if (!is_empty(e)) {
853         es.push_back(e);
854     }
855 }
856 
857 /*
858 Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k).
859 Also returns true and assigns k=0 and l=s if s is |u|.
860 */
is_len_sub(expr const * s,expr * & l,expr * & u,rational & k) const861 bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const {
862     expr* x;
863     rational v;
864     arith_util a(m);
865     if (is_length(s, l)) {
866         k = 0;
867         return true;
868     }
869     else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) {
870         k = v;
871         return true;
872     }
873     else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
874         k = - v;
875         return true;
876     }
877     else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
878         k = - v;
879         return true;
880     }
881     else
882         return false;
883 }
884 
is_unit_string(expr const * s,expr_ref & c) const885 bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const {
886     zstring z;
887     expr* ch = nullptr;
888     if (is_string(s, z) && z.length() == 1) {
889         c = mk_char(z[0]);
890         return true;
891     }
892     else if (is_unit(s, ch)) {
893         c = ch;
894         return true;
895     }
896     return false;
897 }
898 
get_concat_units(expr * e,expr_ref_vector & es) const899 void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const {
900     expr* e1, *e2;
901     while (is_concat(e, e1, e2)) {
902         get_concat_units(e1, es);
903         e = e2;
904     }
905     zstring s;
906     if (is_string(e, s)) {
907         unsigned sz = s.length();
908         for (unsigned j = 0; j < sz; ++j) {
909             es.push_back(mk_unit(mk_char(s, j)));
910         }
911     }
912     else if (!is_empty(e)) {
913         es.push_back(e);
914     }
915 }
916 
mk_is_empty(expr * s) const917 app* seq_util::str::mk_is_empty(expr* s) const {
918     return m.mk_eq(s, mk_empty(s->get_sort()));
919 }
920 
min_length(expr * s) const921 unsigned seq_util::str::min_length(expr* s) const {
922     SASSERT(u.is_seq(s));
923     unsigned result = 0;
924     expr* s1 = nullptr, *s2 = nullptr;
925     auto get_length = [&](expr* s1) {
926         zstring st;
927         if (is_unit(s1))
928             return 1u;
929         else if (is_string(s1, st))
930             return st.length();
931         else
932             return 0u;
933     };
934     while (is_concat(s, s1, s2)) {
935         if (is_concat(s1))
936             result += min_length(s1);
937         else
938             result += get_length(s1);
939         s = s2;
940     }
941     result += get_length(s);
942     return result;
943 }
944 
max_length(expr * s) const945 unsigned seq_util::str::max_length(expr* s) const {
946     SASSERT(u.is_seq(s));
947     unsigned result = 0;
948     expr* s1 = nullptr, *s2 = nullptr, *s3 = nullptr;
949     unsigned n = 0;
950     zstring st;
951     auto get_length = [&](expr* s1) {
952         if (is_empty(s1))
953             return 0u;
954         else if (is_unit(s1))
955             return 1u;
956         else if (is_at(s1))
957             return 1u;
958         else if (is_extract(s1, s1, s2, s3))
959             return (arith_util(m).is_unsigned(s3, n)) ? n : UINT_MAX;
960         else if (is_string(s1, st))
961             return st.length();
962         else
963             return UINT_MAX;
964     };
965     while (is_concat(s, s1, s2)) {
966         if (is_concat(s1))
967             result = u.max_plus(max_length(s1), result);
968         else
969             result = u.max_plus(get_length(s1), result);
970         s = s2;
971     }
972     result = u.max_plus(get_length(s), result);
973     return result;
974 }
975 
min_length(expr * r) const976 unsigned seq_util::rex::min_length(expr* r) const {
977     SASSERT(u.is_re(r));
978     return get_info(r).min_length;
979 }
980 
max_length(expr * r) const981 unsigned seq_util::rex::max_length(expr* r) const {
982     SASSERT(u.is_re(r));
983     expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
984     unsigned lo = 0, hi = 0;
985     if (is_empty(r))
986         return 0;
987     if (is_concat(r, r1, r2))
988         return u.max_plus(max_length(r1), max_length(r2));
989     if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
990         return std::max(max_length(r1), max_length(r2));
991     if (is_intersection(r, r1, r2))
992         return std::min(max_length(r1), max_length(r2));
993     if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1))
994         return max_length(r1);
995     if (is_loop(r, r1, lo, hi))
996         return u.max_mul(hi, max_length(r1));
997     if (is_to_re(r, s))
998         return u.str.max_length(s);
999     if (is_range(r) || is_of_pred(r) || is_full_char(r))
1000         return 1;
1001     // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative
1002     return UINT_MAX;
1003 }
1004 
to_seq(sort * re)1005 sort* seq_util::rex::to_seq(sort* re) {
1006     (void)u;
1007     SASSERT(u.is_re(re));
1008     return to_sort(re->get_parameter(0).get_ast());
1009 }
1010 
mk_loop(expr * r,unsigned lo)1011 app* seq_util::rex::mk_loop(expr* r, unsigned lo) {
1012     parameter param(lo);
1013     return m.mk_app(m_fid, OP_RE_LOOP, 1, &param, 1, &r);
1014 }
1015 
mk_loop(expr * r,unsigned lo,unsigned hi)1016 app* seq_util::rex::mk_loop(expr* r, unsigned lo, unsigned hi) {
1017     parameter params[2] = { parameter(lo), parameter(hi) };
1018     return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r);
1019 }
1020 
mk_loop(expr * r,expr * lo)1021 app* seq_util::rex::mk_loop(expr* r, expr* lo) {
1022     expr* rs[2] = { r, lo };
1023     return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 2, rs);
1024 }
1025 
mk_loop(expr * r,expr * lo,expr * hi)1026 app* seq_util::rex::mk_loop(expr* r, expr* lo, expr* hi) {
1027     expr* rs[3] = { r, lo, hi };
1028     return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 3, rs);
1029 }
1030 
mk_full_char(sort * s)1031 app* seq_util::rex::mk_full_char(sort* s) {
1032     return m.mk_app(m_fid, OP_RE_FULL_CHAR_SET, 0, nullptr, 0, nullptr, s);
1033 }
1034 
mk_full_seq(sort * s)1035 app* seq_util::rex::mk_full_seq(sort* s) {
1036     return m.mk_app(m_fid, OP_RE_FULL_SEQ_SET, 0, nullptr, 0, nullptr, s);
1037 }
1038 
mk_empty(sort * s)1039 app* seq_util::rex::mk_empty(sort* s) {
1040     return m.mk_app(m_fid, OP_RE_EMPTY_SET, 0, nullptr, 0, nullptr, s);
1041 }
1042 
mk_of_pred(expr * p)1043 app* seq_util::rex::mk_of_pred(expr* p) {
1044     return m.mk_app(m_fid, OP_RE_OF_PRED, 0, nullptr, 1, &p);
1045 }
1046 
is_loop(expr const * n,expr * & body,unsigned & lo,unsigned & hi) const1047 bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const {
1048     if (is_loop(n)) {
1049         app const* a = to_app(n);
1050         if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 2) {
1051             body = a->get_arg(0);
1052             lo = a->get_decl()->get_parameter(0).get_int();
1053             hi = a->get_decl()->get_parameter(1).get_int();
1054             return true;
1055         }
1056     }
1057     return false;
1058 }
1059 
is_loop(expr const * n,expr * & body,unsigned & lo) const1060 bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo) const {
1061     if (is_loop(n)) {
1062         app const* a = to_app(n);
1063         if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 1) {
1064             body = a->get_arg(0);
1065             lo = a->get_decl()->get_parameter(0).get_int();
1066             return true;
1067         }
1068     }
1069     return false;
1070 }
1071 
is_loop(expr const * n,expr * & body,expr * & lo,expr * & hi) const1072 bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const {
1073     if (is_loop(n)) {
1074         app const* a = to_app(n);
1075         if (a->get_num_args() == 3) {
1076             body = a->get_arg(0);
1077             lo = a->get_arg(1);
1078             hi = a->get_arg(2);
1079             return true;
1080         }
1081     }
1082     return false;
1083 }
1084 
is_loop(expr const * n,expr * & body,expr * & lo) const1085 bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo) const {
1086     if (is_loop(n)) {
1087         app const* a = to_app(n);
1088         if (a->get_num_args() == 2) {
1089             body = a->get_arg(0);
1090             lo = a->get_arg(1);
1091             return true;
1092         }
1093     }
1094     return false;
1095 }
1096 
1097 /**
1098    Returns true iff e is the epsilon regex.
1099  */
is_epsilon(expr * r) const1100 bool seq_util::rex::is_epsilon(expr* r) const {
1101     expr* s;
1102     return is_to_re(r, s) && u.str.is_empty(s);
1103 }
1104 /**
1105    Makes the epsilon regex for a given sequence sort.
1106  */
mk_epsilon(sort * seq_sort)1107 app* seq_util::rex::mk_epsilon(sort* seq_sort) {
1108     return mk_to_re(u.str.mk_empty(seq_sort));
1109 }
1110 
1111 /*
1112   Produces compact view of concrete concatenations such as (abcd).
1113 */
print_seq(std::ostream & out,expr * s) const1114 bool seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const {
1115     zstring z;
1116     expr* x, * j, * k, * l, * i, * x_;
1117     if (re.u.str.is_empty(s))
1118         out << "()";
1119     else if (re.u.str.is_concat(s)) {
1120         expr_ref_vector es(re.m);
1121         re.u.str.get_concat(s, es);
1122         for (expr* e : es)
1123             print(out, e);
1124     }
1125     else if (re.u.str.is_string(s, z)) {
1126         for (unsigned i = 0; i < z.length(); i++)
1127             out << (char)z[i];
1128     }
1129     else if (re.u.str.is_at(s, x, i))
1130         print(out, x) << "@", print(out, i);
1131     else if (re.u.str.is_extract(s, x, j, k)) {
1132         rational jv, iv;
1133         print(out, x);
1134         if (arith_util(re.m).is_numeral(j, jv)) {
1135             if (arith_util(re.m).is_numeral(k, iv)) {
1136                 // output X[j,k]
1137                 out << "[" << jv.get_int32() << "," << jv.get_int32() << "]";
1138             }
1139             else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ &&
1140                 arith_util(re.m).is_numeral(i, iv) && iv == jv) {
1141                 // case X[j,|X|-j] is denoted by X[j..]
1142                 out << "[" << jv.get_int32() << "..]";
1143             }
1144             else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) ||
1145                 (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ &&
1146                 arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) {
1147                 // case X[j,|X|-j] is denoted by X[j..]
1148                 out << "[" << jv.get_int32() << "..]";
1149             }
1150             else {
1151                 out << "[" << jv.get_int32() << ",";
1152                 print(out, k);
1153                 out << "]";
1154             }
1155         }
1156         else {
1157             out << "[";
1158             print(out, j);
1159             out << ",";
1160             print(out, k);
1161             out << "]";
1162         }
1163     }
1164     else
1165         return false;
1166     return true;
1167 }
1168 
1169 /*
1170   Produces output such as [a-z] for a range.
1171 */
print_range(std::ostream & out,expr * s1,expr * s2) const1172 std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const {
1173     out << "[";
1174     print(out, s1);
1175     out << "-";
1176     print(out, s2);
1177     out << "]";
1178     return out;
1179 }
1180 
1181 /*
1182   Checks if parenthesis can be omitted in some cases in a loop body or in complement.
1183 */
can_skip_parenth(expr * r) const1184 bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
1185     expr* s;
1186     return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r) || re.is_full_char(r));
1187 }
1188 
1189 /*
1190   Specialize output for a unit sequence converting to visible ASCII characters if possible.
1191 */
print_unit(std::ostream & out,expr * s) const1192 bool seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const {
1193     expr* e, * i;
1194     unsigned n = 0;
1195 
1196     if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
1197         char c = (char)n;
1198         if (c == '\n')
1199             out << "\\n";
1200         else if (c == '\r')
1201             out << "\\r";
1202         else if (c == '\f')
1203             out << "\\f";
1204         else if (32 <= n && n < 127 && n != '\"' && n != ' '
1205             && n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']'
1206             && n != '{' && n != '}' && n != '&') {
1207             if (html_encode) {
1208                 if (c == '<')
1209                     out << "&lt;";
1210                 else if (c == '>')
1211                     out << "&gt;";
1212                 //else if (c == '&')
1213                 //    out << "&amp;";
1214                 //else if (c == '\"')
1215                 //    out << "&quot;";
1216                 else
1217                     //out << "\\x" << std::hex << n;
1218                     out << c;
1219             }
1220             else
1221                 out << c;
1222         }
1223         else if (n <= 0xF)
1224             out << "\\x0" << std::hex << n;
1225         else if (n <= 0xFF)
1226             out << "\\x" << std::hex << n;
1227         else if (n <= 0xFFF)
1228             out << "\\u0" << std::hex << n;
1229         else
1230             out << "\\u" << std::hex << n;
1231     }
1232     else if (re.u.str.is_nth_i(s, e, i)) {
1233         print(out, e) << "[";
1234         print(out, i) << "]";
1235     }
1236     else if (re.u.str.is_length(s, e))
1237         print(out << "|", e) << "|";
1238     else
1239         return false;
1240     return true;
1241 }
1242 
1243 /*
1244   Pretty prints the regex r into the ostream out
1245 */
print(std::ostream & out,expr * e) const1246 std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
1247     expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
1248     unsigned lo = 0, hi = 0;
1249     arith_util a(re.m);
1250     rational v;
1251     if (!e)
1252         out << "null";
1253     else if (print_unit(out, e))
1254         ;
1255     else if (print_seq(out, e))
1256         ;
1257     else if (re.is_full_char(e))
1258         out << ".";
1259     else if (re.is_full_seq(e))
1260         out << ".*";
1261     else if (re.is_to_re(e, s))
1262         print(out, s);
1263     else if (re.is_range(e, s, s2))
1264         print_range(out, s, s2);
1265     else if (re.is_epsilon(e))
1266         // &#x03B5; = epsilon
1267         out << (html_encode ? "&#x03B5;" : "()");
1268     else if (re.is_empty(e))
1269         // &#x2205; = emptyset
1270         out << (html_encode ? "&#x2205;" : "[]");
1271     else if (re.is_concat(e, r1, r2)) {
1272         print(out, r1);
1273         print(out, r2);
1274     }
1275     else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
1276         out << "(";
1277         print(out, r1);
1278         out << (html_encode ? "&#x22C3;" : "|");
1279         print(out, r2);
1280         out << ")";
1281     }
1282     else if (re.is_intersection(e, r1, r2)) {
1283         out << "(";
1284         print(out, r1);
1285         out << (html_encode ? "&#x22C2;" : "&");
1286         print(out, r2);
1287         out << ")";
1288     }
1289     else if (re.is_complement(e, r1)) {
1290         out << "~";
1291         if (can_skip_parenth(r1))
1292             print(out, r1);
1293         else {
1294             out << "(";
1295             print(out, r1);
1296             out << ")";
1297         }
1298     }
1299     else if (re.is_plus(e, r1)) {
1300         if (can_skip_parenth(r1)) {
1301             print(out, r1);
1302             out << "+";
1303         }
1304         else {
1305             out << "(";
1306             print(out, r1);
1307             out << ")+";
1308         }
1309     }
1310     else if (re.is_star(e, r1)) {
1311         if (can_skip_parenth(r1)) {
1312             print(out, r1);
1313             out << "*";
1314         }
1315         else {
1316             out << "(";
1317             print(out, r1);
1318             out << ")*";
1319         }
1320     }
1321     else if (re.is_loop(e, r1, lo)) {
1322         if (can_skip_parenth(r1))
1323             print(out, r1) << "{" << lo << ",}";
1324         else {
1325             out << "(";
1326             print(out, r1);
1327             out << "){" << lo << ",}";
1328         }
1329     }
1330     else if (re.is_loop(e, r1, lo, hi)) {
1331         if (can_skip_parenth(r1)) {
1332             print(out, r1);
1333             if (lo == hi)
1334                 out << "{" << lo << "}";
1335             else
1336                 out << "{" << lo << "," << hi << "}";
1337         }
1338         else {
1339             out << "(";
1340             print(out, r1);
1341             if (lo == hi)
1342                 out << "){" << lo << "}";
1343             else
1344                 out << "){" << lo << "," << hi << "}";
1345         }
1346     }
1347     else if (re.is_diff(e, r1, r2)) {
1348         out << "(";
1349         print(out, r1);
1350         out << ")\\(";
1351         print(out, r2);
1352         out << ")";
1353     }
1354     else if (re.m.is_ite(e, s, r1, r2)) {
1355         out << (html_encode ? "(&#x1D422;&#x1D41F; " : "(if ");
1356         print(out, s);
1357         out << (html_encode ? " &#x1D42D;&#x1D5F5;&#x1D41E;&#x1D427; " : " then ");
1358         print(out, r1);
1359         out << (html_encode ? " &#x1D41E;&#x1D425;&#x1D600;&#x1D41E; " : " else ");
1360         print(out, r2);
1361         out << ")";
1362     }
1363     else if (re.is_opt(e, r1)) {
1364         if (can_skip_parenth(r1))
1365             print(out, r1) << "?";
1366         else {
1367             out << "(";
1368             print(out, r1);
1369             out << ")?";
1370         }
1371     }
1372     else if (re.is_reverse(e, r1)) {
1373         out << "(reverse ";
1374         print(out, r1);
1375         out << ")";
1376     }
1377     else if (re.m.is_eq(e, r1, r2)) {
1378         out << "(";
1379         print(out, r1);
1380         out << " = ";
1381         print(out, r2);
1382         out << ")";
1383     }
1384     else if (re.m.is_not(e, r1)) {
1385         out << "!";
1386         print(out, r1);
1387     }
1388     else if (a.is_add(e, s, s2) && a.is_numeral(s, v) && v < 0)
1389         print(out, s2) << " - " << -v;
1390     else if (a.is_add(e, s, s2) && a.is_numeral(s2, v) && v < 0)
1391         print(out, s) << " - " << -v;
1392     else if (a.is_add(e, s, s2))
1393         print(out, s) << " + ", print(out, s2);
1394     else if (a.is_sub(e, s, s2) && a.is_numeral(s2, v) && v > 0)
1395         print(out, s) << " - " << v;
1396     else if (a.is_le(e, s, s2))
1397         print(out << "(", s) << " <= ", print(out, s2) << ")";
1398     else if (re.m.is_value(e))
1399         out << mk_pp(e, re.m);
1400     else if (is_app(e) && to_app(e)->get_num_args() == 0)
1401         out << mk_pp(e, re.m);
1402     else if (is_app(e)) {
1403         out << "(" << to_app(e)->get_decl()->get_name();
1404         for (expr* arg : *to_app(e))
1405             print(out << " ", arg);
1406         out << ")";
1407     }
1408     else
1409         // for all remaining cases use the default pretty printer
1410         out << mk_pp(e, re.m);
1411     return out;
1412 }
1413 
display(std::ostream & out) const1414 std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
1415     return print(out, ex);
1416 }
1417 
1418 /*
1419   Pretty prints the regex r into the output string
1420 */
to_str(expr * r) const1421 std::string seq_util::rex::to_str(expr* r) const {
1422     std::ostringstream out;
1423     pp(u.re, r, false).display(out);
1424     return out.str();
1425 }
1426 
1427 /*
1428   Pretty prints the regex r into the output string that is htmlencoded
1429 */
to_strh(expr * r) const1430 std::string seq_util::rex::to_strh(expr* r) const {
1431     std::ostringstream out;
1432     pp(u.re, r, true).display(out);
1433     return out.str();
1434 }
1435 
1436 /*
1437   Returns true iff info has been computed for the regex r
1438 */
has_valid_info(expr * r) const1439 bool seq_util::rex::has_valid_info(expr* r) const {
1440     return r->get_id() < m_infos.size() && m_infos[r->get_id()].is_valid();
1441 }
1442 
1443 /*
1444   Returns the info in the cache if the info is valid. Returns invalid_info otherwise.
1445 */
get_cached_info(expr * e) const1446 seq_util::rex::info seq_util::rex::get_cached_info(expr* e) const {
1447     if (has_valid_info(e))
1448         return m_infos[e->get_id()];
1449     else
1450         return invalid_info;
1451 }
1452 
1453 /*
1454   Get the information value associated with the regular expression e
1455 */
get_info(expr * e) const1456 seq_util::rex::info seq_util::rex::get_info(expr* e) const
1457 {
1458     SASSERT(u.is_re(e));
1459     auto result = get_cached_info(e);
1460     if (result.is_valid())
1461         return result;
1462     m_info_pinned.push_back(e);
1463     return get_info_rec(e);
1464 }
1465 
1466 /*
1467   Gets the info value for the given regex e, possibly making a new info recursively over the structure of e.
1468 */
get_info_rec(expr * e) const1469 seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
1470     auto result = get_cached_info(e);
1471     if (result.is_valid())
1472         return result;
1473     if (!is_app(e))
1474         result = unknown_info;
1475     else
1476         result = mk_info_rec(to_app(e));
1477     m_infos.setx(e->get_id(), result, invalid_info);
1478     STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;);
1479     return result;
1480 }
1481 
1482 /*
1483   Computes the info value for the given regex e recursively over the structure of e.
1484   The regex e does not yet have an entry in the cache.
1485 */
mk_info_rec(app * e) const1486 seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
1487     info i1, i2;
1488     lbool nullable(l_false);
1489     unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX);
1490     bool is_value(false);
1491     if (e->get_family_id() == u.get_family_id()) {
1492         switch (e->get_decl()->get_decl_kind()) {
1493         case OP_RE_EMPTY_SET:
1494             return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
1495         case OP_RE_FULL_SEQ_SET:
1496             return info(true, true, true, true, true, true, false, l_true, 0, 1);
1497         case OP_RE_STAR:
1498             i1 = get_info_rec(e->get_arg(0));
1499             return i1.star();
1500         case OP_RE_OPTION:
1501             i1 = get_info_rec(e->get_arg(0));
1502             return i1.opt();
1503         case OP_RE_RANGE:
1504         case OP_RE_FULL_CHAR_SET:
1505         case OP_RE_OF_PRED:
1506             //TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
1507             //TBD: check if the range is unsat
1508             return info(true, true, true, true, true, true, true, l_false, 1, 0);
1509         case OP_RE_CONCAT:
1510             i1 = get_info_rec(e->get_arg(0));
1511             i2 = get_info_rec(e->get_arg(1));
1512             return i1.concat(i2, u.re.is_concat(e->get_arg(0)));
1513         case OP_RE_UNION:
1514             i1 = get_info_rec(e->get_arg(0));
1515             i2 = get_info_rec(e->get_arg(1));
1516             return i1.disj(i2);
1517         case OP_RE_INTERSECT:
1518             i1 = get_info_rec(e->get_arg(0));
1519             i2 = get_info_rec(e->get_arg(1));
1520             return i1.conj(i2);
1521         case OP_SEQ_TO_RE:
1522             min_length = u.str.min_length(e->get_arg(0));
1523             is_value = m.is_value(e->get_arg(0));
1524             nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
1525             return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0);
1526         case OP_RE_REVERSE:
1527             return get_info_rec(e->get_arg(0));
1528         case OP_RE_PLUS:
1529             i1 = get_info_rec(e->get_arg(0));
1530             return i1.plus();
1531         case OP_RE_COMPLEMENT:
1532             i1 = get_info_rec(e->get_arg(0));
1533             return i1.complement();
1534         case OP_RE_LOOP:
1535             i1 = get_info_rec(e->get_arg(0));
1536             if (e->get_decl()->get_num_parameters() >= 1)
1537                 lower_bound = e->get_decl()->get_parameter(0).get_int();
1538             if (e->get_decl()->get_num_parameters() == 2)
1539                 upper_bound = e->get_decl()->get_parameter(1).get_int();
1540             return i1.loop(lower_bound, upper_bound);
1541         case OP_RE_DIFF:
1542             i1 = get_info_rec(e->get_arg(0));
1543             i2 = get_info_rec(e->get_arg(1));
1544             return i1.diff(i2);
1545         }
1546         return unknown_info;
1547     }
1548     expr* c, * t, * f;
1549     if (u.m.is_ite(e, c, t, f)) {
1550         i1 = get_info_rec(t);
1551         i2 = get_info_rec(f);
1552         return i1.orelse(i2);
1553     }
1554     return unknown_info;
1555 }
1556 
display(std::ostream & out) const1557 std::ostream& seq_util::rex::info::display(std::ostream& out) const {
1558     if (is_known()) {
1559         out << "info("
1560             << "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
1561             << "classical=" << (classical ? "T" : "F") << ", "
1562             << "standard=" << (standard ? "T" : "F") << ", "
1563             << "nonbranching=" << (nonbranching ? "T" : "F") << ", "
1564             << "normalized=" << (normalized ? "T" : "F") << ", "
1565             << "monadic=" << (monadic ? "T" : "F") << ", "
1566             << "singleton=" << (singleton ? "T" : "F") << ", "
1567             << "min_length=" << min_length << ", "
1568             << "star_height=" << star_height << ")";
1569     }
1570     else if (is_valid())
1571         out << "UNKNOWN";
1572     else
1573         out << "INVALID";
1574     return out;
1575 }
1576 
1577 /*
1578   String representation of the info.
1579 */
str() const1580 std::string seq_util::rex::info::str() const {
1581     std::ostringstream out;
1582     display(out);
1583     return out.str();
1584 }
1585 
star() const1586 seq_util::rex::info seq_util::rex::info::star() const {
1587     //if is_known() is false then all mentioned properties will remain false
1588     return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1);
1589 }
1590 
plus() const1591 seq_util::rex::info seq_util::rex::info::plus() const {
1592     if (is_known()) {
1593         //plus never occurs in a normalized regex
1594         return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1);
1595     }
1596     else
1597         return *this;
1598 }
1599 
opt() const1600 seq_util::rex::info seq_util::rex::info::opt() const {
1601     // if is_known() is false then all mentioned properties will remain false
1602     // optional construct never occurs in a normalized regex
1603     return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height);
1604 }
1605 
complement() const1606 seq_util::rex::info seq_util::rex::info::complement() const {
1607     if (is_known()) {
1608         lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
1609         unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
1610         return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height);
1611     }
1612     else
1613         return *this;
1614 }
1615 
concat(seq_util::rex::info const & rhs,bool lhs_is_concat) const1616 seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const {
1617     if (is_known()) {
1618         if (rhs.is_known()) {
1619             unsigned m = min_length + rhs.min_length;
1620             if (m < min_length || m < rhs.min_length)
1621                 m = UINT_MAX;
1622             return info(classical & rhs.classical,
1623                 classical && rhs.classical, //both args of concat must be classical for it to be standard
1624                 interpreted && rhs.interpreted,
1625                 nonbranching && rhs.nonbranching,
1626                 (normalized && !lhs_is_concat && rhs.normalized),
1627                 monadic && rhs.monadic,
1628                 false,
1629                 ((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
1630                 m,
1631                 std::max(star_height, rhs.star_height));
1632         }
1633         else
1634             return rhs;
1635     }
1636     else
1637         return *this;
1638 }
1639 
disj(seq_util::rex::info const & rhs) const1640 seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
1641     if (is_known() || rhs.is_known()) {
1642         //works correctly if one of the arguments is unknown
1643         return info(classical & rhs.classical,
1644             standard && rhs.standard,
1645             interpreted && rhs.interpreted,
1646             nonbranching && rhs.nonbranching,
1647             normalized && rhs.normalized,
1648             monadic && rhs.monadic,
1649             singleton && rhs.singleton,
1650             ((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
1651             std::min(min_length, rhs.min_length),
1652             std::max(star_height, rhs.star_height));
1653     }
1654     else
1655         return rhs;
1656 }
1657 
conj(seq_util::rex::info const & rhs) const1658 seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
1659     if (is_known()) {
1660         if (rhs.is_known()) {
1661             return info(false,
1662                 standard && rhs.standard,
1663                 interpreted && rhs.interpreted,
1664                 nonbranching && rhs.nonbranching,
1665                 normalized && rhs.normalized,
1666                 monadic && rhs.monadic,
1667                 singleton && rhs.singleton,
1668                 ((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
1669                 std::max(min_length, rhs.min_length),
1670                 std::max(star_height, rhs.star_height));
1671         }
1672         else
1673             return rhs;
1674     }
1675     else
1676         return *this;
1677 }
1678 
diff(seq_util::rex::info const & rhs) const1679 seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
1680     if (is_known()) {
1681         if (rhs.is_known()) {
1682             return info(false,
1683                 standard & rhs.standard,
1684                 interpreted & rhs.interpreted,
1685                 nonbranching & rhs.nonbranching,
1686                 normalized & rhs.normalized,
1687                 monadic & rhs.monadic,
1688                 false,
1689                 ((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
1690                 std::max(min_length, rhs.min_length),
1691                 std::max(star_height, rhs.star_height));
1692         }
1693         else
1694             return rhs;
1695     }
1696     else
1697         return *this;
1698 }
1699 
orelse(seq_util::rex::info const & i) const1700 seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
1701     if (is_known()) {
1702         if (i.is_known()) {
1703             // unsigned ite_min_length = std::min(min_length, i.min_length);
1704             // lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
1705             // TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
1706             return info(false, false, false, false,
1707                 normalized && i.normalized,
1708                 monadic && i.monadic,
1709                 singleton && i.singleton,
1710                 ((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
1711                 std::min(min_length, i.min_length),
1712                 std::max(star_height, i.star_height));
1713         }
1714         else
1715             return i;
1716     }
1717     else
1718         return *this;
1719 }
1720 
loop(unsigned lower,unsigned upper) const1721 seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const {
1722     if (is_known()) {
1723         unsigned m = min_length * lower;
1724         // Code review: this is not a complete overflow check.
1725         if (m > 0 && (m < min_length || m < lower))
1726             m = UINT_MAX;
1727         lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
1728         if (upper == UINT_MAX) {
1729             // this means the loop is r{lower,*} and is therefore not normalized
1730             // normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r*
1731             return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1);
1732         }
1733         else {
1734             bool loop_normalized = normalized;
1735             // r{lower,upper} is not normalized if r is nullable but lower > 0
1736             // r{0,1} is not normalized: it should be ()|r
1737             // r{1,1} is not normalized: it should be r
1738             // r{lower,upper} is not normalized if lower > upper it should then be [] (empty)
1739             if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper)
1740                 loop_normalized = false;
1741             return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
1742         }
1743     }
1744     else
1745         return *this;
1746 }
1747 
1748 
1749