1 /*++
2 Copyright (c) 2011 Microsoft Corporation
3
4 Module Name:
5
6 seq_decl_plugin.h
7
8 Abstract:
9
10 decl_plugin for the theory of sequences
11
12 Author:
13
14 Nikolaj Bjorner (nbjorner) 2011-14-11
15
16 Revision History:
17
18 --*/
19 #include "ast/seq_decl_plugin.h"
20 #include "ast/arith_decl_plugin.h"
21 #include "ast/array_decl_plugin.h"
22 #include "ast/ast_pp.h"
23 #include <sstream>
24
25
seq_decl_plugin()26 seq_decl_plugin::seq_decl_plugin(): m_init(false),
27 m_stringc_sym("String"),
28 m_string(nullptr),
29 m_char(nullptr),
30 m_reglan(nullptr),
31 m_has_re(false),
32 m_has_seq(false) {
33 }
34
finalize()35 void seq_decl_plugin::finalize() {
36 for (psig* s : m_sigs)
37 dealloc(s);
38 m_manager->dec_ref(m_string);
39 m_manager->dec_ref(m_char);
40 m_manager->dec_ref(m_reglan);
41 }
42
is_sort_param(sort * s,unsigned & idx)43 bool seq_decl_plugin::is_sort_param(sort* s, unsigned& idx) {
44 return
45 s->get_name().is_numerical() &&
46 (idx = s->get_name().get_num(), true);
47 }
48
match(ptr_vector<sort> & binding,sort * s,sort * sP)49 bool seq_decl_plugin::match(ptr_vector<sort>& binding, sort* s, sort* sP) {
50 if (s == sP) return true;
51 unsigned idx;
52 if (is_sort_param(sP, idx)) {
53 if (binding.size() <= idx) binding.resize(idx+1);
54 if (binding[idx] && (binding[idx] != s)) return false;
55 binding[idx] = s;
56 return true;
57 }
58
59
60 if (s->get_family_id() == sP->get_family_id() &&
61 s->get_decl_kind() == sP->get_decl_kind() &&
62 s->get_num_parameters() == sP->get_num_parameters()) {
63 for (unsigned i = 0, sz = s->get_num_parameters(); i < sz; ++i) {
64 parameter const& p = s->get_parameter(i);
65 if (p.is_ast() && is_sort(p.get_ast())) {
66 parameter const& p2 = sP->get_parameter(i);
67 if (!match(binding, to_sort(p.get_ast()), to_sort(p2.get_ast()))) return false;
68 }
69 }
70 return true;
71 }
72 else {
73 TRACE("seq", tout << "Could not match " << mk_pp(s, *m_manager) << " and " << mk_pp(sP, *m_manager) << "\n";);
74 return false;
75 }
76 }
77
78 /*
79 \brief match right associative operator.
80 */
match_assoc(psig & sig,unsigned dsz,sort * const * dom,sort * range,sort_ref & range_out)81 void seq_decl_plugin::match_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
82 ptr_vector<sort> binding;
83 ast_manager& m = *m_manager;
84 if (dsz == 0) {
85 std::ostringstream strm;
86 strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
87 strm << "at least one argument expected " << dsz << " given";
88 m.raise_exception(strm.str());
89 }
90 bool is_match = true;
91 for (unsigned i = 0; is_match && i < dsz; ++i) {
92 SASSERT(dom[i]);
93 is_match = match(binding, dom[i], sig.m_dom.get(0));
94 }
95 if (range && is_match) {
96 is_match = match(binding, range, sig.m_range);
97 }
98 if (!is_match) {
99 std::ostringstream strm;
100 strm << "Sort of function '" << sig.m_name << "' ";
101 strm << "does not match the declared type. Given domain: ";
102 for (unsigned i = 0; i < dsz; ++i) {
103 strm << mk_pp(dom[i], m) << " ";
104 }
105 if (range) {
106 strm << " and range: " << mk_pp(range, m);
107 }
108 m.raise_exception(strm.str());
109 }
110 range_out = apply_binding(binding, sig.m_range);
111 SASSERT(range_out);
112 }
113
match(psig & sig,unsigned dsz,sort * const * dom,sort * range,sort_ref & range_out)114 void seq_decl_plugin::match(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
115 m_binding.reset();
116 ast_manager& m = *m_manager;
117 if (sig.m_dom.size() != dsz) {
118 std::ostringstream strm;
119 strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
120 strm << sig.m_dom.size() << " arguments expected " << dsz << " given";
121 m.raise_exception(strm.str());
122 }
123 bool is_match = true;
124 for (unsigned i = 0; is_match && i < dsz; ++i) {
125 is_match = match(m_binding, dom[i], sig.m_dom[i].get());
126 }
127 if (range && is_match) {
128 is_match = match(m_binding, range, sig.m_range);
129 }
130 if (!is_match) {
131 std::ostringstream strm;
132 strm << "Sort of polymorphic function '" << sig.m_name << "' ";
133 strm << "does not match the declared type. ";
134 strm << "\nGiven domain: ";
135 for (unsigned i = 0; i < dsz; ++i) {
136 strm << mk_pp(dom[i], m) << " ";
137 }
138 if (range) {
139 strm << " and range: " << mk_pp(range, m);
140 }
141 strm << "\nExpected domain: ";
142 for (unsigned i = 0; i < dsz; ++i) {
143 strm << mk_pp(sig.m_dom[i].get(), m) << " ";
144 }
145
146 m.raise_exception(strm.str());
147 }
148 if (!range && dsz == 0) {
149 std::ostringstream strm;
150 strm << "Sort of polymorphic function '" << sig.m_name << "' ";
151 strm << "is ambiguous. Function takes no arguments and sort of range has not been constrained";
152 m.raise_exception(strm.str());
153 }
154 range_out = apply_binding(m_binding, sig.m_range);
155 SASSERT(range_out);
156 }
157
apply_binding(ptr_vector<sort> const & binding,sort * s)158 sort* seq_decl_plugin::apply_binding(ptr_vector<sort> const& binding, sort* s) {
159 unsigned i;
160 if (is_sort_param(s, i)) {
161 if (binding.size() <= i || !binding[i]) {
162 m_manager->raise_exception("Expecting type parameter to be bound");
163 }
164 return binding[i];
165 }
166 if (is_sort_of(s, m_family_id, SEQ_SORT) || is_sort_of(s, m_family_id, RE_SORT)) {
167 SASSERT(s->get_num_parameters() == 1);
168 SASSERT(s->get_parameter(0).is_ast());
169 SASSERT(is_sort(s->get_parameter(0).get_ast()));
170 sort* p = apply_binding(binding, to_sort(s->get_parameter(0).get_ast()));
171 parameter param(p);
172 if (p == m_char && s->get_decl_kind() == SEQ_SORT)
173 return m_string;
174 if (p == m_string && s->get_decl_kind() == RE_SORT)
175 return mk_reglan();
176 return mk_sort(s->get_decl_kind(), 1, ¶m);
177 }
178 return s;
179 }
180
181
init()182 void seq_decl_plugin::init() {
183 if (m_init) return;
184 ast_manager& m = *m_manager;
185 m_init = true;
186 sort* A = m.mk_uninterpreted_sort(symbol(0u));
187 sort* strT = m_string;
188 parameter paramA(A);
189 parameter paramS(strT);
190 sort* seqA = m.mk_sort(m_family_id, SEQ_SORT, 1, ¶mA);
191 parameter paramSA(seqA);
192 sort* reA = m.mk_sort(m_family_id, RE_SORT, 1, ¶mSA);
193 sort* reT = m.mk_sort(m_family_id, RE_SORT, 1, ¶mS);
194 sort* boolT = m.mk_bool_sort();
195 sort* intT = arith_util(m).mk_int();
196 sort* predA = array_util(m).mk_array_sort(A, boolT);
197 sort* seqAseqAseqA[3] = { seqA, seqA, seqA };
198 sort* seqAreAseqA[3] = { seqA, reA, seqA };
199 sort* seqAseqA[2] = { seqA, seqA };
200 sort* seqAreA[2] = { seqA, reA };
201 sort* reAreA[2] = { reA, reA };
202 sort* AreA[2] = { A, reA };
203 sort* seqAint2T[3] = { seqA, intT, intT };
204 sort* seq2AintT[3] = { seqA, seqA, intT };
205 sort* str2T[2] = { strT, strT };
206 sort* str3T[3] = { strT, strT, strT };
207 sort* strTint2T[3] = { strT, intT, intT };
208 sort* strTreT[2] = { strT, reT };
209 sort* str2TintT[3] = { strT, strT, intT };
210 sort* seqAintT[2] = { seqA, intT };
211 sort* seq3A[3] = { seqA, seqA, seqA };
212 m_sigs.resize(LAST_SEQ_OP);
213 // TBD: have (par ..) construct and load parameterized signature from premable.
214 m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq.unit", 1, 1, &A, seqA);
215 m_sigs[OP_SEQ_EMPTY] = alloc(psig, m, "seq.empty", 1, 0, nullptr, seqA);
216 m_sigs[OP_SEQ_CONCAT] = alloc(psig, m, "seq.++", 1, 2, seqAseqA, seqA);
217 m_sigs[OP_SEQ_PREFIX] = alloc(psig, m, "seq.prefixof", 1, 2, seqAseqA, boolT);
218 m_sigs[OP_SEQ_SUFFIX] = alloc(psig, m, "seq.suffixof", 1, 2, seqAseqA, boolT);
219 m_sigs[OP_SEQ_CONTAINS] = alloc(psig, m, "seq.contains", 1, 2, seqAseqA, boolT);
220 m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA);
221 m_sigs[OP_SEQ_REPLACE] = alloc(psig, m, "seq.replace", 1, 3, seq3A, seqA);
222 m_sigs[OP_SEQ_INDEX] = alloc(psig, m, "seq.indexof", 1, 3, seq2AintT, intT);
223 m_sigs[OP_SEQ_LAST_INDEX] = alloc(psig, m, "seq.last_indexof", 1, 2, seqAseqA, intT);
224 m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA);
225 m_sigs[OP_SEQ_NTH] = alloc(psig, m, "seq.nth", 1, 2, seqAintT, A);
226 m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A);
227 m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A);
228 m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT);
229 m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA);
230 m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA);
231 m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA);
232 m_sigs[OP_RE_RANGE] = alloc(psig, m, "re.range", 1, 2, seqAseqA, reA);
233 m_sigs[OP_RE_CONCAT] = alloc(psig, m, "re.++", 1, 2, reAreA, reA);
234 m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
235 m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA);
236 m_sigs[OP_RE_DIFF] = alloc(psig, m, "re.diff", 1, 2, reAreA, reA);
237 m_sigs[OP_RE_LOOP] = alloc(psig, m, "re.loop", 1, 1, &reA, reA);
238 m_sigs[OP_RE_POWER] = alloc(psig, m, "re.^", 1, 1, &reA, reA);
239 m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
240 m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re.empty", 1, 0, nullptr, reA);
241 m_sigs[OP_RE_FULL_SEQ_SET] = alloc(psig, m, "re.all", 1, 0, nullptr, reA);
242 m_sigs[OP_RE_FULL_CHAR_SET] = alloc(psig, m, "re.allchar", 1, 0, nullptr, reA);
243 m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
244 m_sigs[OP_RE_REVERSE] = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
245 m_sigs[OP_RE_DERIVATIVE] = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
246 m_sigs[_OP_RE_ANTIMOROV_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
247 m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA);
248 m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
249 m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
250 m_sigs[OP_SEQ_REPLACE_RE] = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA);
251 m_sigs[OP_SEQ_REPLACE_ALL] = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA);
252 m_sigs[OP_STRING_CONST] = nullptr;
253 m_sigs[_OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT);
254 m_sigs[_OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT);
255 m_sigs[_OP_STRING_FROM_CHAR] = alloc(psig, m, "char", 1, 0, nullptr, strT);
256 m_sigs[OP_STRING_ITOS] = alloc(psig, m, "str.from_int", 0, 1, &intT, strT);
257 m_sigs[OP_STRING_STOI] = alloc(psig, m, "str.to_int", 0, 1, &strT, intT);
258 m_sigs[OP_STRING_LT] = alloc(psig, m, "str.<", 0, 2, str2T, boolT);
259 m_sigs[OP_STRING_LE] = alloc(psig, m, "str.<=", 0, 2, str2T, boolT);
260 m_sigs[OP_STRING_IS_DIGIT] = alloc(psig, m, "str.is_digit", 0, 1, &strT, boolT);
261 m_sigs[OP_STRING_TO_CODE] = alloc(psig, m, "str.to_code", 0, 1, &strT, intT);
262 m_sigs[OP_STRING_FROM_CODE] = alloc(psig, m, "str.from_code", 0, 1, &intT, strT);
263 m_sigs[_OP_STRING_CONCAT] = alloc(psig, m, "str.++", 1, 2, str2T, strT);
264 m_sigs[_OP_STRING_LENGTH] = alloc(psig, m, "str.len", 0, 1, &strT, intT);
265 m_sigs[_OP_STRING_STRCTN] = alloc(psig, m, "str.contains", 0, 2, str2T, boolT);
266 m_sigs[_OP_STRING_CHARAT] = alloc(psig, m, "str.at", 0, 2, strTint2T, strT);
267 m_sigs[_OP_STRING_PREFIX] = alloc(psig, m, "str.prefixof", 0, 2, str2T, boolT);
268 m_sigs[_OP_STRING_SUFFIX] = alloc(psig, m, "str.suffixof", 0, 2, str2T, boolT);
269 m_sigs[_OP_STRING_IN_REGEXP] = alloc(psig, m, "str.in_re", 0, 2, strTreT, boolT);
270 m_sigs[_OP_STRING_TO_REGEXP] = alloc(psig, m, "str.to_re", 0, 1, &strT, reT);
271 m_sigs[_OP_REGEXP_EMPTY] = alloc(psig, m, "re.none", 0, 0, nullptr, reT);
272 m_sigs[_OP_REGEXP_FULL_CHAR] = alloc(psig, m, "re.allchar", 0, 0, nullptr, reT);
273 m_sigs[_OP_STRING_SUBSTR] = alloc(psig, m, "str.substr", 0, 3, strTint2T, strT);
274 }
275
mk_reglan()276 sort* seq_decl_plugin::mk_reglan() {
277 if (!m_reglan) {
278 ast_manager& m = *m_manager;
279 parameter paramS(m_string);
280 m_reglan = m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, 1, ¶mS));
281 m.inc_ref(m_reglan);
282 }
283 return m_reglan;
284 }
285
set_manager(ast_manager * m,family_id id)286 void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
287 decl_plugin::set_manager(m, id);
288 m_char_plugin = static_cast<char_decl_plugin*>(m_manager->get_plugin(m_manager->mk_family_id("char")));
289 m_char = get_char_plugin().char_sort();
290 m->inc_ref(m_char);
291 parameter param(m_char);
292 m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, ¶m));
293 m->inc_ref(m_string);
294 }
295
mk_sort(decl_kind k,unsigned num_parameters,parameter const * parameters)296 sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) {
297 init();
298 ast_manager& m = *m_manager;
299 switch (k) {
300 case SEQ_SORT:
301 if (num_parameters != 1) {
302 m.raise_exception("Invalid sequence sort, expecting one parameter");
303 }
304 if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
305 m.raise_exception("invalid sequence sort, parameter is not a sort");
306 }
307 if (parameters[0].get_ast() == m_char) {
308 return m_string;
309 }
310 return m.mk_sort(symbol("Seq"), sort_info(m_family_id, SEQ_SORT, num_parameters, parameters));
311 case RE_SORT: {
312 if (num_parameters != 1) {
313 m.raise_exception("Invalid regex sort, expecting one parameter");
314 }
315 if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
316 m.raise_exception("invalid regex sort, parameter is not a sort");
317 }
318 return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters));
319 }
320 case _STRING_SORT:
321 return m_string;
322 case _REGLAN_SORT:
323 return mk_reglan();
324 default:
325 UNREACHABLE();
326 return nullptr;
327 }
328 }
329
mk_seq_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_string)330 func_decl* seq_decl_plugin::mk_seq_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string) {
331 ast_manager& m = *m_manager;
332 sort_ref rng(m);
333 match(*m_sigs[k], arity, domain, range, rng);
334 return m.mk_func_decl(m_sigs[(domain[0] == m_string)?k_string:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
335 }
336
337
mk_str_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq)338 func_decl* seq_decl_plugin::mk_str_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq) {
339 ast_manager& m = *m_manager;
340 sort_ref rng(m);
341 match(*m_sigs[k], arity, domain, range, rng);
342 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k_seq));
343 }
344
mk_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string)345 func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
346 return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, true);
347 }
348
mk_left_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string)349 func_decl* seq_decl_plugin::mk_left_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
350 return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, false);
351 }
352
mk_ubv2s(unsigned arity,sort * const * domain) const353 func_decl* seq_decl_plugin::mk_ubv2s(unsigned arity, sort* const* domain) const {
354 ast_manager& m = *m_manager;
355 if (arity != 1)
356 m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
357 bv_util bv(m);
358 if (!bv.is_bv_sort(domain[0]))
359 m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
360 sort* rng = m_string;
361 return m.mk_func_decl(symbol("str.from_ubv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_UBVTOS));
362 }
363
mk_sbv2s(unsigned arity,sort * const * domain) const364 func_decl* seq_decl_plugin::mk_sbv2s(unsigned arity, sort* const* domain) const {
365 ast_manager &m = *m_manager;
366 if (arity != 1)
367 m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
368 bv_util bv(m);
369 if (!bv.is_bv_sort(domain[0]))
370 m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
371 sort *rng = m_string;
372 return m.mk_func_decl(symbol("str.from_sbv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_SBVTOS));
373 }
374
mk_assoc_fun(decl_kind k,unsigned arity,sort * const * domain,sort * range,decl_kind k_seq,decl_kind k_string,bool is_right)375 func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string, bool is_right) {
376 ast_manager& m = *m_manager;
377 sort_ref rng(m);
378 if (arity == 0) {
379 m.raise_exception("Invalid function application. At least one argument expected");
380 }
381 match_assoc(*m_sigs[k], arity, domain, range, rng);
382 func_decl_info info(m_family_id, k_seq);
383 if (is_right)
384 info.set_right_associative(true);
385 info.set_left_associative(true);
386 return m.mk_func_decl(m_sigs[(rng == m_string)?k_string:k_seq]->m_name, rng, rng, rng, info);
387 }
388
389
mk_func_decl(decl_kind k,unsigned num_parameters,parameter const * parameters,unsigned arity,sort * const * domain,sort * range)390 func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
391 unsigned arity, sort * const * domain, sort * range) {
392 init();
393 m_has_seq = true;
394 ast_manager& m = *m_manager;
395 sort_ref rng(m);
396 switch(k) {
397 case OP_SEQ_EMPTY:
398 match(*m_sigs[k], arity, domain, range, rng);
399 if (rng == m_string) {
400 parameter param(zstring(""));
401 return mk_func_decl(OP_STRING_CONST, 1, ¶m, 0, nullptr, m_string);
402 }
403 else {
404 parameter param(rng.get());
405 func_decl_info info(m_family_id, k, 1, ¶m);
406 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, info);
407 }
408
409 case OP_RE_PLUS:
410 case OP_RE_STAR:
411 case OP_RE_OPTION:
412 case OP_RE_RANGE:
413 case OP_RE_OF_PRED:
414 case OP_RE_COMPLEMENT:
415 case OP_RE_REVERSE:
416 case OP_RE_DERIVATIVE:
417 case _OP_RE_ANTIMOROV_UNION:
418 m_has_re = true;
419 // fall-through
420 case OP_SEQ_UNIT:
421 case OP_STRING_ITOS:
422 case OP_STRING_STOI:
423 case OP_STRING_LT:
424 case OP_STRING_LE:
425 case OP_STRING_IS_DIGIT:
426 case OP_STRING_TO_CODE:
427 case OP_STRING_FROM_CODE:
428 match(*m_sigs[k], arity, domain, range, rng);
429 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
430
431 case OP_STRING_UBVTOS:
432 return mk_ubv2s(arity, domain);
433
434 case OP_STRING_SBVTOS:
435 return mk_sbv2s(arity, domain);
436
437 case _OP_REGEXP_FULL_CHAR:
438 m_has_re = true;
439 if (!range) range = mk_reglan();
440 match(*m_sigs[k], arity, domain, range, rng);
441 return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_CHAR_SET));
442
443 case OP_RE_FULL_CHAR_SET:
444 m_has_re = true;
445 if (!range) range = mk_reglan();
446 if (range == mk_reglan()) {
447 match(*m_sigs[k], arity, domain, range, rng);
448 return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, k));
449 }
450 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
451
452 case OP_RE_FULL_SEQ_SET:
453 m_has_re = true;
454 if (!range) range = mk_reglan();
455 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
456
457 case _OP_REGEXP_EMPTY:
458 m_has_re = true;
459 if (!range) range = mk_reglan();
460 match(*m_sigs[k], arity, domain, range, rng);
461 return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET));
462
463 case OP_RE_EMPTY_SET:
464 m_has_re = true;
465 if (!range) range = mk_reglan();
466 if (range == mk_reglan()) {
467 match(*m_sigs[k], arity, domain, range, rng);
468 return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, k));
469 }
470 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
471
472 case OP_RE_LOOP:
473 m_has_re = true;
474 switch (arity) {
475 case 1:
476 match(*m_sigs[k], arity, domain, range, rng);
477 if (num_parameters == 0 || num_parameters > 2 || !parameters[0].is_int() || (num_parameters == 2 && !parameters[1].is_int())) {
478 m.raise_exception("Expecting two numeral parameters to function re-loop");
479 }
480 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
481 case 2:
482 if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1])) {
483 m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
484 }
485 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
486 case 3:
487 if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1]) || !arith_util(m).is_int(domain[2])) {
488 m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
489 }
490 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
491 default:
492 m.raise_exception("Incorrect number of arguments passed to loop. Expected 1 regular expression and two integer parameters");
493 }
494 case OP_RE_POWER:
495 m_has_re = true;
496 if (num_parameters == 1 && parameters[0].is_int() && arity == 1 && parameters[0].get_int() >= 0) {
497 rng = domain[0];
498 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
499 }
500 m.raise_exception("Incorrect arguments used for re.^. Expected one non-negative integer parameter");
501
502 case OP_STRING_CONST:
503 if (!(num_parameters == 1 && arity == 0 && parameters[0].is_zstring())) {
504 m.raise_exception("invalid string declaration");
505 }
506 return m.mk_const_decl(m_stringc_sym, m_string,
507 func_decl_info(m_family_id, OP_STRING_CONST, num_parameters, parameters));
508
509 case OP_RE_UNION:
510 case OP_RE_CONCAT:
511 case OP_RE_INTERSECT:
512 case OP_RE_DIFF:
513 m_has_re = true;
514 return mk_left_assoc_fun(k, arity, domain, range, k, k);
515
516 case OP_SEQ_REPLACE_RE_ALL:
517 case OP_SEQ_REPLACE_RE:
518 m_has_re = true;
519 case OP_SEQ_REPLACE_ALL:
520 return mk_str_fun(k, arity, domain, range, k);
521
522 case OP_SEQ_CONCAT:
523 return mk_assoc_fun(k, arity, domain, range, k, _OP_STRING_CONCAT);
524
525 case _OP_STRING_CONCAT:
526 return mk_assoc_fun(k, arity, domain, range, OP_SEQ_CONCAT, k);
527
528 case _OP_STRING_FROM_CHAR: {
529 if (!(num_parameters == 1 && parameters[0].is_int()))
530 m.raise_exception("character literal expects integer parameter");
531 zstring zs(parameters[0].get_int());
532 parameter p(zs);
533 return m.mk_const_decl(m_stringc_sym, m_string,func_decl_info(m_family_id, OP_STRING_CONST, 1, &p));
534 }
535
536 case OP_SEQ_REPLACE:
537 return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRREPL);
538 case _OP_STRING_STRREPL:
539 return mk_str_fun(k, arity, domain, range, OP_SEQ_REPLACE);
540
541 case OP_SEQ_INDEX:
542 if (arity == 2) {
543 sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
544 sort_ref rng(m);
545 match(*m_sigs[k], 3, dom, range, rng);
546 return m.mk_func_decl(m_sigs[(dom[0] == m_string)?_OP_STRING_STRIDOF:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
547 }
548 return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRIDOF);
549 case _OP_STRING_STRIDOF:
550 if (arity == 2) {
551 sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
552 sort_ref rng(m);
553 match(*m_sigs[k], 3, dom, range, rng);
554 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, OP_SEQ_INDEX));
555 }
556 return mk_str_fun(k, arity, domain, range, OP_SEQ_INDEX);
557 case OP_SEQ_LAST_INDEX:
558 if (arity != 2) {
559 m.raise_exception("two arguments expected tin last_indexof");
560 }
561 else {
562 return mk_seq_fun(k, arity, domain, range, OP_SEQ_LAST_INDEX);
563 }
564 case OP_SEQ_PREFIX:
565 return mk_seq_fun(k, arity, domain, range, _OP_STRING_PREFIX);
566 case _OP_STRING_PREFIX:
567 return mk_str_fun(k, arity, domain, range, OP_SEQ_PREFIX);
568
569 case OP_SEQ_SUFFIX:
570 return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUFFIX);
571 case _OP_STRING_SUFFIX:
572 return mk_str_fun(k, arity, domain, range, OP_SEQ_SUFFIX);
573
574 case OP_SEQ_LENGTH:
575 return mk_seq_fun(k, arity, domain, range, _OP_STRING_LENGTH);
576 case _OP_STRING_LENGTH:
577 return mk_str_fun(k, arity, domain, range, OP_SEQ_LENGTH);
578
579 case OP_SEQ_CONTAINS:
580 return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRCTN);
581 case _OP_STRING_STRCTN:
582 return mk_str_fun(k, arity, domain, range, OP_SEQ_CONTAINS);
583
584 case OP_SEQ_TO_RE:
585 m_has_re = true;
586 return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
587 case _OP_STRING_TO_REGEXP:
588 m_has_re = true;
589 return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);
590
591 case OP_SEQ_IN_RE:
592 m_has_re = true;
593 return mk_seq_fun(k, arity, domain, range, _OP_STRING_IN_REGEXP);
594 case _OP_STRING_IN_REGEXP:
595 m_has_re = true;
596 return mk_str_fun(k, arity, domain, range, OP_SEQ_IN_RE);
597
598 case OP_SEQ_AT:
599 return mk_seq_fun(k, arity, domain, range, _OP_STRING_CHARAT);
600 case _OP_STRING_CHARAT:
601 return mk_str_fun(k, arity, domain, range, OP_SEQ_AT);
602
603 case OP_SEQ_NTH:
604 case OP_SEQ_NTH_I:
605 case OP_SEQ_NTH_U:
606 match(*m_sigs[k], arity, domain, range, rng);
607 return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
608
609 case OP_SEQ_EXTRACT:
610 return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUBSTR);
611 case _OP_STRING_SUBSTR:
612 return mk_str_fun(k, arity, domain, range, OP_SEQ_EXTRACT);
613
614 case _OP_SEQ_SKOLEM: {
615 if (num_parameters == 0 || !parameters[0].is_symbol()) {
616 m.raise_exception("first parameter to skolem symbol should be a parameter");
617 }
618 symbol s = parameters[0].get_symbol();
619 return m.mk_func_decl(s, arity, domain, range, func_decl_info(m_family_id, k, num_parameters, parameters));
620 }
621 default:
622 UNREACHABLE();
623 return nullptr;
624 }
625 }
626
get_op_names(svector<builtin_name> & op_names,symbol const & logic)627 void seq_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol const & logic) {
628 init();
629 for (unsigned i = 0; i < m_sigs.size(); ++i) {
630 if (m_sigs[i]) {
631 op_names.push_back(builtin_name(m_sigs[i]->m_name.str(), i));
632 }
633 }
634 op_names.push_back(builtin_name("str.in.re", _OP_STRING_IN_REGEXP));
635 op_names.push_back(builtin_name("str.in-re", _OP_STRING_IN_REGEXP));
636 op_names.push_back(builtin_name("str.to.re", _OP_STRING_TO_REGEXP));
637 op_names.push_back(builtin_name("str.to-re", _OP_STRING_TO_REGEXP));
638 op_names.push_back(builtin_name("str.to-int", OP_STRING_STOI));
639 op_names.push_back(builtin_name("str.to.int", OP_STRING_STOI));
640 op_names.push_back(builtin_name("str.from-int", OP_STRING_ITOS));
641 op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
642 op_names.push_back(builtin_name("re.nostr", _OP_REGEXP_EMPTY));
643 op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
644 op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
645 op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
646 }
647
get_sort_names(svector<builtin_name> & sort_names,symbol const & logic)648 void seq_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) {
649 init();
650 sort_names.push_back(builtin_name("Seq", SEQ_SORT));
651 sort_names.push_back(builtin_name("RegEx", RE_SORT));
652
653 // SMTLIB 2.6 RegLan, String
654 sort_names.push_back(builtin_name("RegLan", _REGLAN_SORT));
655 sort_names.push_back(builtin_name("String", _STRING_SORT));
656
657 // SMTLIB 2.5 compatibility
658 sort_names.push_back(builtin_name("StringSequence", _STRING_SORT));
659 }
660
mk_string(zstring const & s)661 app* seq_decl_plugin::mk_string(zstring const& s) {
662 parameter param(s);
663 func_decl* f = m_manager->mk_const_decl(m_stringc_sym, m_string,
664 func_decl_info(m_family_id, OP_STRING_CONST, 1, ¶m));
665 return m_manager->mk_const(f);
666 }
667
mk_char(unsigned u)668 app* seq_decl_plugin::mk_char(unsigned u) {
669 return get_char_plugin().mk_char(u);
670 }
671
is_considered_uninterpreted(func_decl * f)672 bool seq_decl_plugin::is_considered_uninterpreted(func_decl * f) {
673 seq_util util(*m_manager);
674 return util.str.is_nth_u(f);
675 }
676
is_unique_value(app * e) const677 bool seq_decl_plugin::is_unique_value(app* e) const {
678 return false;
679 }
680
is_value(app * e) const681 bool seq_decl_plugin::is_value(app* e) const {
682 while (true) {
683 if (is_app_of(e, m_family_id, OP_SEQ_EMPTY))
684 return true;
685 if (is_app_of(e, m_family_id, OP_STRING_CONST))
686 return true;
687 if (is_app_of(e, m_family_id, OP_SEQ_UNIT) &&
688 m_manager->is_value(e->get_arg(0)))
689 return true;
690 if (is_app_of(e, m_family_id, OP_SEQ_CONCAT)) {
691 bool first = true;
692 for (expr* arg : *e) {
693 if (first) {
694 first = false;
695 }
696 else if (is_app(arg) && !is_value(to_app(arg))) {
697 return false;
698 }
699 }
700 if (!is_app(e->get_arg(0))) return false;
701 e = to_app(e->get_arg(0));
702 continue;
703 }
704 return false;
705 }
706 }
707
are_equal(app * a,app * b) const708 bool seq_decl_plugin::are_equal(app* a, app* b) const {
709 if (a == b) return true;
710 // handle concatenations
711 return false;
712 }
713
are_distinct(app * a,app * b) const714 bool seq_decl_plugin::are_distinct(app* a, app* b) const {
715 if (a == b)
716 return false;
717 if (is_app_of(a, m_family_id, OP_STRING_CONST) &&
718 is_app_of(b, m_family_id, OP_STRING_CONST))
719 return true;
720 if (is_app_of(a, m_family_id, OP_SEQ_UNIT) &&
721 is_app_of(b, m_family_id, OP_SEQ_UNIT))
722 return m_manager->are_distinct(a->get_arg(0), b->get_arg(0));
723 if (is_app_of(a, m_family_id, OP_SEQ_EMPTY) &&
724 is_app_of(b, m_family_id, OP_SEQ_UNIT))
725 return true;
726 if (is_app_of(b, m_family_id, OP_SEQ_EMPTY) &&
727 is_app_of(a, m_family_id, OP_SEQ_UNIT))
728 return true;
729 return false;
730 }
731
732
get_some_value(sort * s)733 expr* seq_decl_plugin::get_some_value(sort* s) {
734 seq_util util(*m_manager);
735 if (util.is_seq(s)) {
736 return util.str.mk_empty(s);
737 }
738 sort* seq;
739 if (util.is_re(s, seq)) {
740 return util.re.mk_to_re(util.str.mk_empty(seq));
741 }
742 UNREACHABLE();
743 return nullptr;
744 }
745
746
mk_skolem(symbol const & name,unsigned n,expr * const * args,sort * range)747 app* seq_util::mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range) {
748 SASSERT(range);
749 parameter param(name);
750 func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 1, ¶m, n, args, range);
751 return m.mk_app(f, n, args);
752 }
753
mk_string(zstring const & s) const754 app* seq_util::str::mk_string(zstring const& s) const {
755 return u.seq.mk_string(s);
756 }
757
mk_char(zstring const & s,unsigned idx) const758 app* seq_util::str::mk_char(zstring const& s, unsigned idx) const {
759 return u.mk_char(s[idx]);
760 }
761
mk_char(unsigned ch) const762 app* seq_util::str::mk_char(unsigned ch) const {
763 return u.mk_char(ch);
764 }
765
mk_char_bit(expr * e,unsigned idx)766 app* seq_util::str::mk_char_bit(expr* e, unsigned idx) {
767 return u.mk_char_bit(e, idx);
768 }
769
mk_char_bit(expr * e,unsigned i)770 app* seq_util::mk_char_bit(expr* e, unsigned i) {
771 parameter params[2] = { parameter(symbol("char.bit")), parameter(i) };
772 sort* range = m.mk_bool_sort();
773 func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 2, params, 1, &e, range);
774 return m.mk_app(f, 1, &e);
775 }
776
max_plus(unsigned x,unsigned y) const777 unsigned seq_util::max_plus(unsigned x, unsigned y) const {
778 if (x + y < x || x + y < y)
779 return UINT_MAX;
780 return x + y;
781 }
782
max_mul(unsigned x,unsigned y) const783 unsigned seq_util::max_mul(unsigned x, unsigned y) const {
784 uint64_t r = ((uint64_t)x)*((uint64_t)y);
785 return (r > UINT_MAX) ? UINT_MAX : (unsigned)r;
786 }
787
788
is_const_char(expr * e,unsigned & c) const789 bool seq_util::is_const_char(expr* e, unsigned& c) const {
790 return ch.is_const_char(e, c);
791 }
792
is_char_le(expr const * e) const793 bool seq_util::is_char_le(expr const* e) const {
794 return ch.is_le(e);
795 }
796
is_char2int(expr const * e) const797 bool seq_util::is_char2int(expr const* e) const {
798 return ch.is_to_int(e);
799 }
800
is_bv2char(expr const * e) const801 bool seq_util::is_bv2char(expr const* e) const {
802 return ch.is_bv2char(e);
803 }
804
is_char2bv(expr const * e) const805 bool seq_util::is_char2bv(expr const* e) const {
806 return ch.is_char2bv(e);
807 }
808
809
mk_char(unsigned ch) const810 app* seq_util::mk_char(unsigned ch) const {
811 return seq.mk_char(ch);
812 }
813
mk_le(expr * ch1,expr * ch2) const814 app* seq_util::mk_le(expr* ch1, expr* ch2) const {
815 return ch.mk_le(ch1, ch2);
816 }
817
mk_lt(expr * ch1,expr * ch2) const818 app* seq_util::mk_lt(expr* ch1, expr* ch2) const {
819 return m.mk_not(mk_le(ch2, ch1));
820 }
821
is_string(func_decl const * f,zstring & s) const822 bool seq_util::str::is_string(func_decl const* f, zstring& s) const {
823 if (is_string(f)) {
824 s = f->get_parameter(0).get_zstring();
825 return true;
826 }
827 else {
828 return false;
829 }
830 }
831
is_string(expr const * n,zstring & s) const832 bool seq_util::str::is_string(expr const* n, zstring& s) const {
833 return is_app(n) && is_string(to_app(n)->get_decl(), s);
834 }
835
is_nth_i(expr const * n,expr * & s,unsigned & idx) const836 bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const {
837 expr* i = nullptr;
838 if (!is_nth_i(n, s, i)) return false;
839 return arith_util(m).is_unsigned(i, idx);
840 }
841
mk_nth_c(expr * s,unsigned i) const842 app* seq_util::str::mk_nth_c(expr* s, unsigned i) const {
843 return mk_nth_i(s, arith_util(m).mk_int(i));
844 }
845
get_concat(expr * e,expr_ref_vector & es) const846 void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const {
847 expr* e1, *e2;
848 while (is_concat(e, e1, e2)) {
849 get_concat(e1, es);
850 e = e2;
851 }
852 if (!is_empty(e)) {
853 es.push_back(e);
854 }
855 }
856
857 /*
858 Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k).
859 Also returns true and assigns k=0 and l=s if s is |u|.
860 */
is_len_sub(expr const * s,expr * & l,expr * & u,rational & k) const861 bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const {
862 expr* x;
863 rational v;
864 arith_util a(m);
865 if (is_length(s, l)) {
866 k = 0;
867 return true;
868 }
869 else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) {
870 k = v;
871 return true;
872 }
873 else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
874 k = - v;
875 return true;
876 }
877 else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
878 k = - v;
879 return true;
880 }
881 else
882 return false;
883 }
884
is_unit_string(expr const * s,expr_ref & c) const885 bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const {
886 zstring z;
887 expr* ch = nullptr;
888 if (is_string(s, z) && z.length() == 1) {
889 c = mk_char(z[0]);
890 return true;
891 }
892 else if (is_unit(s, ch)) {
893 c = ch;
894 return true;
895 }
896 return false;
897 }
898
get_concat_units(expr * e,expr_ref_vector & es) const899 void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const {
900 expr* e1, *e2;
901 while (is_concat(e, e1, e2)) {
902 get_concat_units(e1, es);
903 e = e2;
904 }
905 zstring s;
906 if (is_string(e, s)) {
907 unsigned sz = s.length();
908 for (unsigned j = 0; j < sz; ++j) {
909 es.push_back(mk_unit(mk_char(s, j)));
910 }
911 }
912 else if (!is_empty(e)) {
913 es.push_back(e);
914 }
915 }
916
mk_is_empty(expr * s) const917 app* seq_util::str::mk_is_empty(expr* s) const {
918 return m.mk_eq(s, mk_empty(s->get_sort()));
919 }
920
min_length(expr * s) const921 unsigned seq_util::str::min_length(expr* s) const {
922 SASSERT(u.is_seq(s));
923 unsigned result = 0;
924 expr* s1 = nullptr, *s2 = nullptr;
925 auto get_length = [&](expr* s1) {
926 zstring st;
927 if (is_unit(s1))
928 return 1u;
929 else if (is_string(s1, st))
930 return st.length();
931 else
932 return 0u;
933 };
934 while (is_concat(s, s1, s2)) {
935 if (is_concat(s1))
936 result += min_length(s1);
937 else
938 result += get_length(s1);
939 s = s2;
940 }
941 result += get_length(s);
942 return result;
943 }
944
max_length(expr * s) const945 unsigned seq_util::str::max_length(expr* s) const {
946 SASSERT(u.is_seq(s));
947 unsigned result = 0;
948 expr* s1 = nullptr, *s2 = nullptr, *s3 = nullptr;
949 unsigned n = 0;
950 zstring st;
951 auto get_length = [&](expr* s1) {
952 if (is_empty(s1))
953 return 0u;
954 else if (is_unit(s1))
955 return 1u;
956 else if (is_at(s1))
957 return 1u;
958 else if (is_extract(s1, s1, s2, s3))
959 return (arith_util(m).is_unsigned(s3, n)) ? n : UINT_MAX;
960 else if (is_string(s1, st))
961 return st.length();
962 else
963 return UINT_MAX;
964 };
965 while (is_concat(s, s1, s2)) {
966 if (is_concat(s1))
967 result = u.max_plus(max_length(s1), result);
968 else
969 result = u.max_plus(get_length(s1), result);
970 s = s2;
971 }
972 result = u.max_plus(get_length(s), result);
973 return result;
974 }
975
min_length(expr * r) const976 unsigned seq_util::rex::min_length(expr* r) const {
977 SASSERT(u.is_re(r));
978 return get_info(r).min_length;
979 }
980
max_length(expr * r) const981 unsigned seq_util::rex::max_length(expr* r) const {
982 SASSERT(u.is_re(r));
983 expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
984 unsigned lo = 0, hi = 0;
985 if (is_empty(r))
986 return 0;
987 if (is_concat(r, r1, r2))
988 return u.max_plus(max_length(r1), max_length(r2));
989 if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
990 return std::max(max_length(r1), max_length(r2));
991 if (is_intersection(r, r1, r2))
992 return std::min(max_length(r1), max_length(r2));
993 if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1))
994 return max_length(r1);
995 if (is_loop(r, r1, lo, hi))
996 return u.max_mul(hi, max_length(r1));
997 if (is_to_re(r, s))
998 return u.str.max_length(s);
999 if (is_range(r) || is_of_pred(r) || is_full_char(r))
1000 return 1;
1001 // Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative
1002 return UINT_MAX;
1003 }
1004
to_seq(sort * re)1005 sort* seq_util::rex::to_seq(sort* re) {
1006 (void)u;
1007 SASSERT(u.is_re(re));
1008 return to_sort(re->get_parameter(0).get_ast());
1009 }
1010
mk_loop(expr * r,unsigned lo)1011 app* seq_util::rex::mk_loop(expr* r, unsigned lo) {
1012 parameter param(lo);
1013 return m.mk_app(m_fid, OP_RE_LOOP, 1, ¶m, 1, &r);
1014 }
1015
mk_loop(expr * r,unsigned lo,unsigned hi)1016 app* seq_util::rex::mk_loop(expr* r, unsigned lo, unsigned hi) {
1017 parameter params[2] = { parameter(lo), parameter(hi) };
1018 return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r);
1019 }
1020
mk_loop(expr * r,expr * lo)1021 app* seq_util::rex::mk_loop(expr* r, expr* lo) {
1022 expr* rs[2] = { r, lo };
1023 return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 2, rs);
1024 }
1025
mk_loop(expr * r,expr * lo,expr * hi)1026 app* seq_util::rex::mk_loop(expr* r, expr* lo, expr* hi) {
1027 expr* rs[3] = { r, lo, hi };
1028 return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 3, rs);
1029 }
1030
mk_full_char(sort * s)1031 app* seq_util::rex::mk_full_char(sort* s) {
1032 return m.mk_app(m_fid, OP_RE_FULL_CHAR_SET, 0, nullptr, 0, nullptr, s);
1033 }
1034
mk_full_seq(sort * s)1035 app* seq_util::rex::mk_full_seq(sort* s) {
1036 return m.mk_app(m_fid, OP_RE_FULL_SEQ_SET, 0, nullptr, 0, nullptr, s);
1037 }
1038
mk_empty(sort * s)1039 app* seq_util::rex::mk_empty(sort* s) {
1040 return m.mk_app(m_fid, OP_RE_EMPTY_SET, 0, nullptr, 0, nullptr, s);
1041 }
1042
mk_of_pred(expr * p)1043 app* seq_util::rex::mk_of_pred(expr* p) {
1044 return m.mk_app(m_fid, OP_RE_OF_PRED, 0, nullptr, 1, &p);
1045 }
1046
is_loop(expr const * n,expr * & body,unsigned & lo,unsigned & hi) const1047 bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const {
1048 if (is_loop(n)) {
1049 app const* a = to_app(n);
1050 if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 2) {
1051 body = a->get_arg(0);
1052 lo = a->get_decl()->get_parameter(0).get_int();
1053 hi = a->get_decl()->get_parameter(1).get_int();
1054 return true;
1055 }
1056 }
1057 return false;
1058 }
1059
is_loop(expr const * n,expr * & body,unsigned & lo) const1060 bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo) const {
1061 if (is_loop(n)) {
1062 app const* a = to_app(n);
1063 if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 1) {
1064 body = a->get_arg(0);
1065 lo = a->get_decl()->get_parameter(0).get_int();
1066 return true;
1067 }
1068 }
1069 return false;
1070 }
1071
is_loop(expr const * n,expr * & body,expr * & lo,expr * & hi) const1072 bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const {
1073 if (is_loop(n)) {
1074 app const* a = to_app(n);
1075 if (a->get_num_args() == 3) {
1076 body = a->get_arg(0);
1077 lo = a->get_arg(1);
1078 hi = a->get_arg(2);
1079 return true;
1080 }
1081 }
1082 return false;
1083 }
1084
is_loop(expr const * n,expr * & body,expr * & lo) const1085 bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo) const {
1086 if (is_loop(n)) {
1087 app const* a = to_app(n);
1088 if (a->get_num_args() == 2) {
1089 body = a->get_arg(0);
1090 lo = a->get_arg(1);
1091 return true;
1092 }
1093 }
1094 return false;
1095 }
1096
1097 /**
1098 Returns true iff e is the epsilon regex.
1099 */
is_epsilon(expr * r) const1100 bool seq_util::rex::is_epsilon(expr* r) const {
1101 expr* s;
1102 return is_to_re(r, s) && u.str.is_empty(s);
1103 }
1104 /**
1105 Makes the epsilon regex for a given sequence sort.
1106 */
mk_epsilon(sort * seq_sort)1107 app* seq_util::rex::mk_epsilon(sort* seq_sort) {
1108 return mk_to_re(u.str.mk_empty(seq_sort));
1109 }
1110
1111 /*
1112 Produces compact view of concrete concatenations such as (abcd).
1113 */
print_seq(std::ostream & out,expr * s) const1114 bool seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const {
1115 zstring z;
1116 expr* x, * j, * k, * l, * i, * x_;
1117 if (re.u.str.is_empty(s))
1118 out << "()";
1119 else if (re.u.str.is_concat(s)) {
1120 expr_ref_vector es(re.m);
1121 re.u.str.get_concat(s, es);
1122 for (expr* e : es)
1123 print(out, e);
1124 }
1125 else if (re.u.str.is_string(s, z)) {
1126 for (unsigned i = 0; i < z.length(); i++)
1127 out << (char)z[i];
1128 }
1129 else if (re.u.str.is_at(s, x, i))
1130 print(out, x) << "@", print(out, i);
1131 else if (re.u.str.is_extract(s, x, j, k)) {
1132 rational jv, iv;
1133 print(out, x);
1134 if (arith_util(re.m).is_numeral(j, jv)) {
1135 if (arith_util(re.m).is_numeral(k, iv)) {
1136 // output X[j,k]
1137 out << "[" << jv.get_int32() << "," << jv.get_int32() << "]";
1138 }
1139 else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ &&
1140 arith_util(re.m).is_numeral(i, iv) && iv == jv) {
1141 // case X[j,|X|-j] is denoted by X[j..]
1142 out << "[" << jv.get_int32() << "..]";
1143 }
1144 else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) ||
1145 (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ &&
1146 arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) {
1147 // case X[j,|X|-j] is denoted by X[j..]
1148 out << "[" << jv.get_int32() << "..]";
1149 }
1150 else {
1151 out << "[" << jv.get_int32() << ",";
1152 print(out, k);
1153 out << "]";
1154 }
1155 }
1156 else {
1157 out << "[";
1158 print(out, j);
1159 out << ",";
1160 print(out, k);
1161 out << "]";
1162 }
1163 }
1164 else
1165 return false;
1166 return true;
1167 }
1168
1169 /*
1170 Produces output such as [a-z] for a range.
1171 */
print_range(std::ostream & out,expr * s1,expr * s2) const1172 std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const {
1173 out << "[";
1174 print(out, s1);
1175 out << "-";
1176 print(out, s2);
1177 out << "]";
1178 return out;
1179 }
1180
1181 /*
1182 Checks if parenthesis can be omitted in some cases in a loop body or in complement.
1183 */
can_skip_parenth(expr * r) const1184 bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
1185 expr* s;
1186 return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r) || re.is_full_char(r));
1187 }
1188
1189 /*
1190 Specialize output for a unit sequence converting to visible ASCII characters if possible.
1191 */
print_unit(std::ostream & out,expr * s) const1192 bool seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const {
1193 expr* e, * i;
1194 unsigned n = 0;
1195
1196 if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
1197 char c = (char)n;
1198 if (c == '\n')
1199 out << "\\n";
1200 else if (c == '\r')
1201 out << "\\r";
1202 else if (c == '\f')
1203 out << "\\f";
1204 else if (32 <= n && n < 127 && n != '\"' && n != ' '
1205 && n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']'
1206 && n != '{' && n != '}' && n != '&') {
1207 if (html_encode) {
1208 if (c == '<')
1209 out << "<";
1210 else if (c == '>')
1211 out << ">";
1212 //else if (c == '&')
1213 // out << "&";
1214 //else if (c == '\"')
1215 // out << """;
1216 else
1217 //out << "\\x" << std::hex << n;
1218 out << c;
1219 }
1220 else
1221 out << c;
1222 }
1223 else if (n <= 0xF)
1224 out << "\\x0" << std::hex << n;
1225 else if (n <= 0xFF)
1226 out << "\\x" << std::hex << n;
1227 else if (n <= 0xFFF)
1228 out << "\\u0" << std::hex << n;
1229 else
1230 out << "\\u" << std::hex << n;
1231 }
1232 else if (re.u.str.is_nth_i(s, e, i)) {
1233 print(out, e) << "[";
1234 print(out, i) << "]";
1235 }
1236 else if (re.u.str.is_length(s, e))
1237 print(out << "|", e) << "|";
1238 else
1239 return false;
1240 return true;
1241 }
1242
1243 /*
1244 Pretty prints the regex r into the ostream out
1245 */
print(std::ostream & out,expr * e) const1246 std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
1247 expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
1248 unsigned lo = 0, hi = 0;
1249 arith_util a(re.m);
1250 rational v;
1251 if (!e)
1252 out << "null";
1253 else if (print_unit(out, e))
1254 ;
1255 else if (print_seq(out, e))
1256 ;
1257 else if (re.is_full_char(e))
1258 out << ".";
1259 else if (re.is_full_seq(e))
1260 out << ".*";
1261 else if (re.is_to_re(e, s))
1262 print(out, s);
1263 else if (re.is_range(e, s, s2))
1264 print_range(out, s, s2);
1265 else if (re.is_epsilon(e))
1266 // ε = epsilon
1267 out << (html_encode ? "ε" : "()");
1268 else if (re.is_empty(e))
1269 // ∅ = emptyset
1270 out << (html_encode ? "∅" : "[]");
1271 else if (re.is_concat(e, r1, r2)) {
1272 print(out, r1);
1273 print(out, r2);
1274 }
1275 else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
1276 out << "(";
1277 print(out, r1);
1278 out << (html_encode ? "⋃" : "|");
1279 print(out, r2);
1280 out << ")";
1281 }
1282 else if (re.is_intersection(e, r1, r2)) {
1283 out << "(";
1284 print(out, r1);
1285 out << (html_encode ? "⋂" : "&");
1286 print(out, r2);
1287 out << ")";
1288 }
1289 else if (re.is_complement(e, r1)) {
1290 out << "~";
1291 if (can_skip_parenth(r1))
1292 print(out, r1);
1293 else {
1294 out << "(";
1295 print(out, r1);
1296 out << ")";
1297 }
1298 }
1299 else if (re.is_plus(e, r1)) {
1300 if (can_skip_parenth(r1)) {
1301 print(out, r1);
1302 out << "+";
1303 }
1304 else {
1305 out << "(";
1306 print(out, r1);
1307 out << ")+";
1308 }
1309 }
1310 else if (re.is_star(e, r1)) {
1311 if (can_skip_parenth(r1)) {
1312 print(out, r1);
1313 out << "*";
1314 }
1315 else {
1316 out << "(";
1317 print(out, r1);
1318 out << ")*";
1319 }
1320 }
1321 else if (re.is_loop(e, r1, lo)) {
1322 if (can_skip_parenth(r1))
1323 print(out, r1) << "{" << lo << ",}";
1324 else {
1325 out << "(";
1326 print(out, r1);
1327 out << "){" << lo << ",}";
1328 }
1329 }
1330 else if (re.is_loop(e, r1, lo, hi)) {
1331 if (can_skip_parenth(r1)) {
1332 print(out, r1);
1333 if (lo == hi)
1334 out << "{" << lo << "}";
1335 else
1336 out << "{" << lo << "," << hi << "}";
1337 }
1338 else {
1339 out << "(";
1340 print(out, r1);
1341 if (lo == hi)
1342 out << "){" << lo << "}";
1343 else
1344 out << "){" << lo << "," << hi << "}";
1345 }
1346 }
1347 else if (re.is_diff(e, r1, r2)) {
1348 out << "(";
1349 print(out, r1);
1350 out << ")\\(";
1351 print(out, r2);
1352 out << ")";
1353 }
1354 else if (re.m.is_ite(e, s, r1, r2)) {
1355 out << (html_encode ? "(𝐢𝐟 " : "(if ");
1356 print(out, s);
1357 out << (html_encode ? " 𝐭𝗵𝐞𝐧 " : " then ");
1358 print(out, r1);
1359 out << (html_encode ? " 𝐞𝐥𝘀𝐞 " : " else ");
1360 print(out, r2);
1361 out << ")";
1362 }
1363 else if (re.is_opt(e, r1)) {
1364 if (can_skip_parenth(r1))
1365 print(out, r1) << "?";
1366 else {
1367 out << "(";
1368 print(out, r1);
1369 out << ")?";
1370 }
1371 }
1372 else if (re.is_reverse(e, r1)) {
1373 out << "(reverse ";
1374 print(out, r1);
1375 out << ")";
1376 }
1377 else if (re.m.is_eq(e, r1, r2)) {
1378 out << "(";
1379 print(out, r1);
1380 out << " = ";
1381 print(out, r2);
1382 out << ")";
1383 }
1384 else if (re.m.is_not(e, r1)) {
1385 out << "!";
1386 print(out, r1);
1387 }
1388 else if (a.is_add(e, s, s2) && a.is_numeral(s, v) && v < 0)
1389 print(out, s2) << " - " << -v;
1390 else if (a.is_add(e, s, s2) && a.is_numeral(s2, v) && v < 0)
1391 print(out, s) << " - " << -v;
1392 else if (a.is_add(e, s, s2))
1393 print(out, s) << " + ", print(out, s2);
1394 else if (a.is_sub(e, s, s2) && a.is_numeral(s2, v) && v > 0)
1395 print(out, s) << " - " << v;
1396 else if (a.is_le(e, s, s2))
1397 print(out << "(", s) << " <= ", print(out, s2) << ")";
1398 else if (re.m.is_value(e))
1399 out << mk_pp(e, re.m);
1400 else if (is_app(e) && to_app(e)->get_num_args() == 0)
1401 out << mk_pp(e, re.m);
1402 else if (is_app(e)) {
1403 out << "(" << to_app(e)->get_decl()->get_name();
1404 for (expr* arg : *to_app(e))
1405 print(out << " ", arg);
1406 out << ")";
1407 }
1408 else
1409 // for all remaining cases use the default pretty printer
1410 out << mk_pp(e, re.m);
1411 return out;
1412 }
1413
display(std::ostream & out) const1414 std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
1415 return print(out, ex);
1416 }
1417
1418 /*
1419 Pretty prints the regex r into the output string
1420 */
to_str(expr * r) const1421 std::string seq_util::rex::to_str(expr* r) const {
1422 std::ostringstream out;
1423 pp(u.re, r, false).display(out);
1424 return out.str();
1425 }
1426
1427 /*
1428 Pretty prints the regex r into the output string that is htmlencoded
1429 */
to_strh(expr * r) const1430 std::string seq_util::rex::to_strh(expr* r) const {
1431 std::ostringstream out;
1432 pp(u.re, r, true).display(out);
1433 return out.str();
1434 }
1435
1436 /*
1437 Returns true iff info has been computed for the regex r
1438 */
has_valid_info(expr * r) const1439 bool seq_util::rex::has_valid_info(expr* r) const {
1440 return r->get_id() < m_infos.size() && m_infos[r->get_id()].is_valid();
1441 }
1442
1443 /*
1444 Returns the info in the cache if the info is valid. Returns invalid_info otherwise.
1445 */
get_cached_info(expr * e) const1446 seq_util::rex::info seq_util::rex::get_cached_info(expr* e) const {
1447 if (has_valid_info(e))
1448 return m_infos[e->get_id()];
1449 else
1450 return invalid_info;
1451 }
1452
1453 /*
1454 Get the information value associated with the regular expression e
1455 */
get_info(expr * e) const1456 seq_util::rex::info seq_util::rex::get_info(expr* e) const
1457 {
1458 SASSERT(u.is_re(e));
1459 auto result = get_cached_info(e);
1460 if (result.is_valid())
1461 return result;
1462 m_info_pinned.push_back(e);
1463 return get_info_rec(e);
1464 }
1465
1466 /*
1467 Gets the info value for the given regex e, possibly making a new info recursively over the structure of e.
1468 */
get_info_rec(expr * e) const1469 seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
1470 auto result = get_cached_info(e);
1471 if (result.is_valid())
1472 return result;
1473 if (!is_app(e))
1474 result = unknown_info;
1475 else
1476 result = mk_info_rec(to_app(e));
1477 m_infos.setx(e->get_id(), result, invalid_info);
1478 STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;);
1479 return result;
1480 }
1481
1482 /*
1483 Computes the info value for the given regex e recursively over the structure of e.
1484 The regex e does not yet have an entry in the cache.
1485 */
mk_info_rec(app * e) const1486 seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
1487 info i1, i2;
1488 lbool nullable(l_false);
1489 unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX);
1490 bool is_value(false);
1491 if (e->get_family_id() == u.get_family_id()) {
1492 switch (e->get_decl()->get_decl_kind()) {
1493 case OP_RE_EMPTY_SET:
1494 return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
1495 case OP_RE_FULL_SEQ_SET:
1496 return info(true, true, true, true, true, true, false, l_true, 0, 1);
1497 case OP_RE_STAR:
1498 i1 = get_info_rec(e->get_arg(0));
1499 return i1.star();
1500 case OP_RE_OPTION:
1501 i1 = get_info_rec(e->get_arg(0));
1502 return i1.opt();
1503 case OP_RE_RANGE:
1504 case OP_RE_FULL_CHAR_SET:
1505 case OP_RE_OF_PRED:
1506 //TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
1507 //TBD: check if the range is unsat
1508 return info(true, true, true, true, true, true, true, l_false, 1, 0);
1509 case OP_RE_CONCAT:
1510 i1 = get_info_rec(e->get_arg(0));
1511 i2 = get_info_rec(e->get_arg(1));
1512 return i1.concat(i2, u.re.is_concat(e->get_arg(0)));
1513 case OP_RE_UNION:
1514 i1 = get_info_rec(e->get_arg(0));
1515 i2 = get_info_rec(e->get_arg(1));
1516 return i1.disj(i2);
1517 case OP_RE_INTERSECT:
1518 i1 = get_info_rec(e->get_arg(0));
1519 i2 = get_info_rec(e->get_arg(1));
1520 return i1.conj(i2);
1521 case OP_SEQ_TO_RE:
1522 min_length = u.str.min_length(e->get_arg(0));
1523 is_value = m.is_value(e->get_arg(0));
1524 nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
1525 return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0);
1526 case OP_RE_REVERSE:
1527 return get_info_rec(e->get_arg(0));
1528 case OP_RE_PLUS:
1529 i1 = get_info_rec(e->get_arg(0));
1530 return i1.plus();
1531 case OP_RE_COMPLEMENT:
1532 i1 = get_info_rec(e->get_arg(0));
1533 return i1.complement();
1534 case OP_RE_LOOP:
1535 i1 = get_info_rec(e->get_arg(0));
1536 if (e->get_decl()->get_num_parameters() >= 1)
1537 lower_bound = e->get_decl()->get_parameter(0).get_int();
1538 if (e->get_decl()->get_num_parameters() == 2)
1539 upper_bound = e->get_decl()->get_parameter(1).get_int();
1540 return i1.loop(lower_bound, upper_bound);
1541 case OP_RE_DIFF:
1542 i1 = get_info_rec(e->get_arg(0));
1543 i2 = get_info_rec(e->get_arg(1));
1544 return i1.diff(i2);
1545 }
1546 return unknown_info;
1547 }
1548 expr* c, * t, * f;
1549 if (u.m.is_ite(e, c, t, f)) {
1550 i1 = get_info_rec(t);
1551 i2 = get_info_rec(f);
1552 return i1.orelse(i2);
1553 }
1554 return unknown_info;
1555 }
1556
display(std::ostream & out) const1557 std::ostream& seq_util::rex::info::display(std::ostream& out) const {
1558 if (is_known()) {
1559 out << "info("
1560 << "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
1561 << "classical=" << (classical ? "T" : "F") << ", "
1562 << "standard=" << (standard ? "T" : "F") << ", "
1563 << "nonbranching=" << (nonbranching ? "T" : "F") << ", "
1564 << "normalized=" << (normalized ? "T" : "F") << ", "
1565 << "monadic=" << (monadic ? "T" : "F") << ", "
1566 << "singleton=" << (singleton ? "T" : "F") << ", "
1567 << "min_length=" << min_length << ", "
1568 << "star_height=" << star_height << ")";
1569 }
1570 else if (is_valid())
1571 out << "UNKNOWN";
1572 else
1573 out << "INVALID";
1574 return out;
1575 }
1576
1577 /*
1578 String representation of the info.
1579 */
str() const1580 std::string seq_util::rex::info::str() const {
1581 std::ostringstream out;
1582 display(out);
1583 return out.str();
1584 }
1585
star() const1586 seq_util::rex::info seq_util::rex::info::star() const {
1587 //if is_known() is false then all mentioned properties will remain false
1588 return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1);
1589 }
1590
plus() const1591 seq_util::rex::info seq_util::rex::info::plus() const {
1592 if (is_known()) {
1593 //plus never occurs in a normalized regex
1594 return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1);
1595 }
1596 else
1597 return *this;
1598 }
1599
opt() const1600 seq_util::rex::info seq_util::rex::info::opt() const {
1601 // if is_known() is false then all mentioned properties will remain false
1602 // optional construct never occurs in a normalized regex
1603 return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height);
1604 }
1605
complement() const1606 seq_util::rex::info seq_util::rex::info::complement() const {
1607 if (is_known()) {
1608 lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
1609 unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
1610 return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height);
1611 }
1612 else
1613 return *this;
1614 }
1615
concat(seq_util::rex::info const & rhs,bool lhs_is_concat) const1616 seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const {
1617 if (is_known()) {
1618 if (rhs.is_known()) {
1619 unsigned m = min_length + rhs.min_length;
1620 if (m < min_length || m < rhs.min_length)
1621 m = UINT_MAX;
1622 return info(classical & rhs.classical,
1623 classical && rhs.classical, //both args of concat must be classical for it to be standard
1624 interpreted && rhs.interpreted,
1625 nonbranching && rhs.nonbranching,
1626 (normalized && !lhs_is_concat && rhs.normalized),
1627 monadic && rhs.monadic,
1628 false,
1629 ((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
1630 m,
1631 std::max(star_height, rhs.star_height));
1632 }
1633 else
1634 return rhs;
1635 }
1636 else
1637 return *this;
1638 }
1639
disj(seq_util::rex::info const & rhs) const1640 seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
1641 if (is_known() || rhs.is_known()) {
1642 //works correctly if one of the arguments is unknown
1643 return info(classical & rhs.classical,
1644 standard && rhs.standard,
1645 interpreted && rhs.interpreted,
1646 nonbranching && rhs.nonbranching,
1647 normalized && rhs.normalized,
1648 monadic && rhs.monadic,
1649 singleton && rhs.singleton,
1650 ((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
1651 std::min(min_length, rhs.min_length),
1652 std::max(star_height, rhs.star_height));
1653 }
1654 else
1655 return rhs;
1656 }
1657
conj(seq_util::rex::info const & rhs) const1658 seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
1659 if (is_known()) {
1660 if (rhs.is_known()) {
1661 return info(false,
1662 standard && rhs.standard,
1663 interpreted && rhs.interpreted,
1664 nonbranching && rhs.nonbranching,
1665 normalized && rhs.normalized,
1666 monadic && rhs.monadic,
1667 singleton && rhs.singleton,
1668 ((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
1669 std::max(min_length, rhs.min_length),
1670 std::max(star_height, rhs.star_height));
1671 }
1672 else
1673 return rhs;
1674 }
1675 else
1676 return *this;
1677 }
1678
diff(seq_util::rex::info const & rhs) const1679 seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
1680 if (is_known()) {
1681 if (rhs.is_known()) {
1682 return info(false,
1683 standard & rhs.standard,
1684 interpreted & rhs.interpreted,
1685 nonbranching & rhs.nonbranching,
1686 normalized & rhs.normalized,
1687 monadic & rhs.monadic,
1688 false,
1689 ((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
1690 std::max(min_length, rhs.min_length),
1691 std::max(star_height, rhs.star_height));
1692 }
1693 else
1694 return rhs;
1695 }
1696 else
1697 return *this;
1698 }
1699
orelse(seq_util::rex::info const & i) const1700 seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
1701 if (is_known()) {
1702 if (i.is_known()) {
1703 // unsigned ite_min_length = std::min(min_length, i.min_length);
1704 // lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
1705 // TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
1706 return info(false, false, false, false,
1707 normalized && i.normalized,
1708 monadic && i.monadic,
1709 singleton && i.singleton,
1710 ((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
1711 std::min(min_length, i.min_length),
1712 std::max(star_height, i.star_height));
1713 }
1714 else
1715 return i;
1716 }
1717 else
1718 return *this;
1719 }
1720
loop(unsigned lower,unsigned upper) const1721 seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const {
1722 if (is_known()) {
1723 unsigned m = min_length * lower;
1724 // Code review: this is not a complete overflow check.
1725 if (m > 0 && (m < min_length || m < lower))
1726 m = UINT_MAX;
1727 lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
1728 if (upper == UINT_MAX) {
1729 // this means the loop is r{lower,*} and is therefore not normalized
1730 // normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r*
1731 return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1);
1732 }
1733 else {
1734 bool loop_normalized = normalized;
1735 // r{lower,upper} is not normalized if r is nullable but lower > 0
1736 // r{0,1} is not normalized: it should be ()|r
1737 // r{1,1} is not normalized: it should be r
1738 // r{lower,upper} is not normalized if lower > upper it should then be [] (empty)
1739 if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper)
1740 loop_normalized = false;
1741 return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
1742 }
1743 }
1744 else
1745 return *this;
1746 }
1747
1748
1749