1 /*++
2 Copyright (c) 2011 Microsoft Corporation
3 
4 Module Name:
5 
6     seq_axioms.cpp
7 
8 Abstract:
9 
10     Axiomatize string operations that can be reduced to
11     more basic operations. These axioms are kept outside
12     of a particular solver: they are mainly solver independent.
13 
14 Author:
15 
16     Nikolaj Bjorner (nbjorner) 2020-4-16
17 
18 Revision History:
19 
20 --*/
21 
22 #include "ast/ast_pp.h"
23 #include "ast/ast_ll_pp.h"
24 #include "smt/seq_axioms.h"
25 #include "smt/smt_context.h"
26 
27 using namespace smt;
28 
seq_axioms(theory & th,th_rewriter & r)29 seq_axioms::seq_axioms(theory& th, th_rewriter& r):
30     th(th),
31     m_rewrite(r),
32     m(r.m()),
33     a(m),
34     seq(m),
35     m_sk(m, r),
36     m_digits_initialized(false)
37 {}
38 
mk_eq(expr * a,expr * b)39 literal seq_axioms::mk_eq(expr* a, expr* b) {
40     return th.mk_eq(a, b, false);
41 }
42 
mk_sub(expr * x,expr * y)43 expr_ref seq_axioms::mk_sub(expr* x, expr* y) {
44     expr_ref result(a.mk_sub(x, y), m);
45     m_rewrite(result);
46     return result;
47 }
48 
mk_len(expr * s)49 expr_ref seq_axioms::mk_len(expr* s) {
50     expr_ref result(seq.str.mk_length(s), m);
51     m_rewrite(result);
52     return result;
53 }
54 
mk_literal(expr * _e)55 literal seq_axioms::mk_literal(expr* _e) {
56     expr_ref e(_e, m);
57     if (a.is_arith_expr(e)) {
58         m_rewrite(e);
59     }
60     th.ensure_enode(e);
61     return ctx().get_literal(e);
62 }
63 
64 /***
65 
66   let e = extract(s, i, l)
67 
68   i is start index, l is length of substring starting at index.
69 
70   i < 0 => e = ""
71   i >= |s| => e = ""
72   l <= 0 => e = ""
73   0 <= i < |s| & l > 0 => s = xey, |x| = i, |e| = min(l, |s|-i)
74   l <= 0 => e = ""
75 
76 this translates to:
77 
78   0 <= i <= |s| -> s = xey
79   0 <= i <= |s| -> len(x) = i
80   0 <= i <= |s| & 0 <= l <= |s| - i -> |e| = l
81   0 <= i <= |s| & |s| < l + i  -> |e| = |s| - i
82   |e| = 0 <=> i < 0 | |s| <= i | l <= 0 | |s| <= 0
83 
84   It follows that:
85   |e| = min(l, |s| - i) for 0 <= i < |s| and 0 < |l|
86 
87 */
88 
add_extract_axiom(expr * e)89 void seq_axioms::add_extract_axiom(expr* e) {
90     TRACE("seq", tout << mk_pp(e, m) << "\n";);
91     expr* _s = nullptr, *_i = nullptr, *_l = nullptr;
92     VERIFY(seq.str.is_extract(e, _s, _i, _l));
93     expr_ref s(_s, m), i(_i, m), l(_l, m);
94     m_rewrite(s);
95     m_rewrite(i);
96     if (l) m_rewrite(l);
97     if (is_tail(s, i, l)) {
98         add_tail_axiom(e, s);
99         return;
100     }
101     if (is_drop_last(s, i, l)) {
102         add_drop_last_axiom(e, s);
103         return;
104     }
105     if (is_extract_prefix0(s, i, l)) {
106         add_extract_prefix_axiom(e, s, l);
107         return;
108     }
109     if (is_extract_suffix(s, i, l)) {
110         add_extract_suffix_axiom(e, s, i);
111         return;
112     }
113     expr_ref x = m_sk.mk_pre(s, i);
114     expr_ref ls = mk_len(s);
115     expr_ref lx = mk_len(x);
116     expr_ref le = mk_len(e);
117     expr_ref ls_minus_i_l(mk_sub(mk_sub(ls, i), l), m);
118     expr_ref y = m_sk.mk_post(s, a.mk_add(i, l));
119     expr_ref xe = mk_concat(x, e);
120     expr_ref xey = mk_concat(x, e, y);
121     expr_ref zero(a.mk_int(0), m);
122 
123     literal i_ge_0    = mk_ge(i, 0);
124     literal i_le_ls   = mk_le(mk_sub(i, ls), 0);
125     literal ls_le_i   = mk_le(mk_sub(ls, i), 0);
126     literal ls_ge_li  = mk_ge(ls_minus_i_l, 0);
127     literal l_ge_0    = mk_ge(l, 0);
128     literal l_le_0    = mk_le(l, 0);
129     literal ls_le_0   = mk_le(ls, 0);
130     literal le_is_0   = mk_eq(le, zero);
131 
132 
133     // 0 <= i & i <= |s| & 0 <= l => xey = s
134     // 0 <= i & i <= |s| => |x| = i
135     // 0 <= i & i <= |s| & l >= 0 & |s| >= l + i => |e| = l
136     // 0 <= i & i <= |s| & |s| < l + i => |e| = |s| - i
137     // i < 0 => |e| = 0
138     // |s| <= i => |e| = 0
139     // |s| <= 0 => |e| = 0
140     // l <= 0 => |e| = 0
141     // |e| = 0 & i >= 0 & |s| > i & |s| > 0 => l <= 0
142     add_axiom(~i_ge_0, ~i_le_ls, ~l_ge_0, mk_seq_eq(xey, s));
143     add_axiom(~i_ge_0, ~i_le_ls, mk_eq(lx, i));
144     add_axiom(~i_ge_0, ~i_le_ls, ~l_ge_0, ~ls_ge_li, mk_eq(le, l));
145     add_axiom(~i_ge_0, ~i_le_ls, ~l_ge_0, ls_ge_li, mk_eq(le, mk_sub(ls, i)));
146     add_axiom(i_ge_0,   le_is_0);
147     add_axiom(~ls_le_i, le_is_0);
148     add_axiom(~ls_le_0, le_is_0);
149     add_axiom(~l_le_0,  le_is_0);
150     add_axiom(~le_is_0, ~i_ge_0, ls_le_i, ls_le_0, l_le_0);
151 }
152 
add_tail_axiom(expr * e,expr * s)153 void seq_axioms::add_tail_axiom(expr* e, expr* s) {
154     expr_ref head(m), tail(m);
155     m_sk.decompose(s, head, tail);
156     TRACE("seq", tout << "tail " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";);
157     literal emp = mk_eq_empty(s);
158     add_axiom(emp, mk_seq_eq(s, mk_concat(head, e)));
159     add_axiom(~emp, mk_eq_empty(e));
160 }
161 
add_drop_last_axiom(expr * e,expr * s)162 void seq_axioms::add_drop_last_axiom(expr* e, expr* s) {
163     TRACE("seq", tout << "drop last " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";);
164     literal emp = mk_eq_empty(s);
165     add_axiom(emp, mk_seq_eq(s, mk_concat(e, seq.str.mk_unit(m_sk.mk_last(s)))));
166     add_axiom(~emp, mk_eq_empty(e));
167 }
168 
is_drop_last(expr * s,expr * i,expr * l)169 bool seq_axioms::is_drop_last(expr* s, expr* i, expr* l) {
170     rational i1;
171     if (!a.is_numeral(i, i1) || !i1.is_zero()) {
172         return false;
173     }
174     expr_ref l2(m), l1(l, m);
175     l2 = mk_sub(mk_len(s), a.mk_int(1));
176     m_rewrite(l1);
177     m_rewrite(l2);
178     return l1 == l2;
179 }
180 
is_tail(expr * s,expr * i,expr * l)181 bool seq_axioms::is_tail(expr* s, expr* i, expr* l) {
182     rational i1;
183     if (!a.is_numeral(i, i1) || !i1.is_one()) {
184         return false;
185     }
186     expr_ref l2(m), l1(l, m);
187     l2 = mk_sub(mk_len(s), a.mk_int(1));
188     m_rewrite(l1);
189     m_rewrite(l2);
190     return l1 == l2;
191 }
192 
is_extract_prefix0(expr * s,expr * i,expr * l)193 bool seq_axioms::is_extract_prefix0(expr* s, expr* i, expr* l) {
194     rational i1;
195     return a.is_numeral(i, i1) && i1.is_zero();
196 }
197 
is_extract_suffix(expr * s,expr * i,expr * l)198 bool seq_axioms::is_extract_suffix(expr* s, expr* i, expr* l) {
199     expr_ref len(a.mk_add(l, i), m);
200     m_rewrite(len);
201     return seq.str.is_length(len, l) && l == s;
202 }
203 
204 /*
205   0 <= l <= len(s) => s = ey & l = len(e)
206   len(s) < l => s = e
207   l < 0 => e = empty
208  */
add_extract_prefix_axiom(expr * e,expr * s,expr * l)209 void seq_axioms::add_extract_prefix_axiom(expr* e, expr* s, expr* l) {
210     TRACE("seq", tout << "prefix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << " " << mk_bounded_pp(l, m, 2) << "\n";);
211     expr_ref le = mk_len(e);
212     expr_ref ls = mk_len(s);
213     expr_ref ls_minus_l(mk_sub(ls, l), m);
214     expr_ref zero(a.mk_int(0), m);
215     expr_ref y = m_sk.mk_post(s, l);
216     expr_ref ey = mk_concat(e, y);
217     literal l_ge_0 = mk_ge(l, 0);
218     literal l_le_s = mk_le(mk_sub(l, ls), 0);
219     add_axiom(~l_ge_0, ~l_le_s, mk_seq_eq(s, ey));
220     add_axiom(~l_ge_0, ~l_le_s, mk_eq(l, le));
221     add_axiom(~l_ge_0, ~l_le_s, mk_eq(ls_minus_l, mk_len(y)));
222     add_axiom(l_le_s, mk_eq(e, s));
223     add_axiom(l_ge_0, mk_eq_empty(e));
224 }
225 
226 /*
227   0 <= i <= len(s) => s = xe & i = len(x)
228   i < 0 => e = empty
229   i > len(s) => e = empty
230  */
add_extract_suffix_axiom(expr * e,expr * s,expr * i)231 void seq_axioms::add_extract_suffix_axiom(expr* e, expr* s, expr* i) {
232     TRACE("seq", tout << "suffix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";);
233     expr_ref x = m_sk.mk_pre(s, i);
234     expr_ref lx = mk_len(x);
235     expr_ref ls = mk_len(s);
236     expr_ref zero(a.mk_int(0), m);
237     expr_ref xe = mk_concat(x, e);
238     literal le_is_0 = mk_eq_empty(e);
239     literal i_ge_0 = mk_ge(i, 0);
240     literal i_le_s = mk_le(mk_sub(i, ls), 0);
241     add_axiom(~i_ge_0, ~i_le_s, mk_seq_eq(s, xe));
242     add_axiom(~i_ge_0, ~i_le_s, mk_eq(i, lx));
243     add_axiom(i_ge_0, le_is_0);
244     add_axiom(i_le_s, le_is_0);
245 }
246 
247 /*
248   encode that s is not contained in of xs1
249   where s1 is all of s, except the last element.
250 
251   s = "" or s = s1*(unit c)
252   s = "" or !contains(x*s1, s)
253 */
tightest_prefix(expr * s,expr * x)254 void seq_axioms::tightest_prefix(expr* s, expr* x) {
255     literal s_eq_emp = mk_eq_empty(s);
256     if (seq.str.max_length(s) <= 1) {
257         add_axiom(s_eq_emp, ~mk_literal(seq.str.mk_contains(x, s)));
258         return;
259     }
260     expr_ref s1 = m_sk.mk_first(s);
261     expr_ref c  = m_sk.mk_last(s);
262     expr_ref s1c = mk_concat(s1, seq.str.mk_unit(c));
263     add_axiom(s_eq_emp, mk_seq_eq(s, s1c));
264     add_axiom(s_eq_emp, ~mk_literal(seq.str.mk_contains(mk_concat(x, s1), s)));
265 }
266 
267 /*
268     [[str.indexof]](w, w2, i) is the smallest n such that for some some w1, w3
269     - w = w1w2w3
270     - i <= n = |w1|
271 
272     if [[str.contains]](w, w2) = true, |w2| > 0 and i >= 0.
273 
274     [[str.indexof]](w,w2,i) = -1  otherwise.
275 
276 
277   let i = Index(t, s, offset):
278   // index of s in t starting at offset.
279 
280 
281   |t| = 0 => |s| = 0 or indexof(t,s,offset) = -1
282   |t| = 0 & |s| = 0 => indexof(t,s,offset) = 0
283 
284   offset >= len(t) => |s| = 0 or i = -1
285 
286   len(t) != 0 & !contains(t, s) => i = -1
287 
288 
289   offset = 0 & len(t) != 0 & contains(t, s) => t = xsy & i = len(x)
290   tightest_prefix(x, s)
291 
292 
293   0 <= offset < len(t) => xy = t &
294                           len(x) = offset &
295                           (-1 = indexof(y, s, 0) => -1 = i) &
296                           (indexof(y, s, 0) >= 0 => indexof(t, s, 0) + offset = i)
297 
298   offset < 0 => i = -1
299 
300   optional lemmas:
301    (len(s) > len(t)  -> i = -1)
302    (len(s) <= len(t) -> i <= len(t)-len(s))
303 */
add_indexof_axiom(expr * i)304 void seq_axioms::add_indexof_axiom(expr* i) {
305     expr* _s = nullptr, *_t = nullptr, *_offset = nullptr;
306     rational r;
307     VERIFY(seq.str.is_index(i, _t, _s) ||
308            seq.str.is_index(i, _t, _s, _offset));
309     expr_ref minus_one(a.mk_int(-1), m);
310     expr_ref zero(a.mk_int(0), m);
311     expr_ref xsy(m), t(_t, m), s(_s, m), offset(_offset, m);
312     m_rewrite(t);
313     m_rewrite(s);
314     if (offset) m_rewrite(offset);
315     literal cnt = mk_literal(seq.str.mk_contains(t, s));
316     literal i_eq_m1 = mk_eq(i, minus_one);
317     literal i_eq_0 = mk_eq(i, zero);
318     literal s_eq_empty = mk_eq_empty(s);
319     literal t_eq_empty = mk_eq_empty(t);
320 
321     // |t| = 0 => |s| = 0 or indexof(t,s,offset) = -1
322     // ~contains(t,s) <=> indexof(t,s,offset) = -1
323 
324     add_axiom(cnt,  i_eq_m1);
325     add_axiom(~t_eq_empty, s_eq_empty, i_eq_m1);
326 
327     if (!offset || (a.is_numeral(offset, r) && r.is_zero())) {
328         // |s| = 0 => indexof(t,s,0) = 0
329         add_axiom(~s_eq_empty, i_eq_0);
330 #if 1
331         expr_ref x  = m_sk.mk_indexof_left(t, s);
332         expr_ref y  = m_sk.mk_indexof_right(t, s);
333         xsy         = mk_concat(x, s, y);
334         expr_ref lenx = mk_len(x);
335         // contains(t,s) & |s| != 0 => t = xsy & indexof(t,s,0) = |x|
336         add_axiom(~cnt, s_eq_empty, mk_seq_eq(t, xsy));
337         add_axiom(~cnt, s_eq_empty, mk_eq(i, lenx));
338         add_axiom(~cnt, mk_ge(i, 0));
339         tightest_prefix(s, x);
340 #else
341         // let i := indexof(t,s,0)
342         // contains(t, s) & |s| != 0 => ~contains(substr(t,0,i+len(s)-1), s)
343         //                           => substr(t,0,i+len(s)) = substr(t,0,i) ++ s
344         //
345         expr_ref len_s = mk_len(s);
346         expr_ref mone(a.mk_int(-1), m);
347         add_axiom(~cnt, s_eq_empty, ~mk_literal(seq.str.mk_contains(seq.str.mk_substr(t,zero,a.mk_add(i,len_s,mone)),s)));
348         add_axiom(~cnt, s_eq_empty, mk_seq_eq(seq.str.mk_substr(t,zero,a.mk_add(i,len_s)),
349                                               seq.str.mk_concat(seq.str.mk_substr(t,zero,i), s)));
350 #endif
351     }
352     else {
353         // offset >= len(t) => |s| = 0 or indexof(t, s, offset) = -1
354         // offset > len(t) => indexof(t, s, offset) = -1
355         // offset = len(t) & |s| = 0 => indexof(t, s, offset) = offset
356         expr_ref len_t = mk_len(t);
357         literal offset_ge_len = mk_ge(mk_sub(offset, len_t), 0);
358         literal offset_le_len = mk_le(mk_sub(offset, len_t), 0);
359         literal i_eq_offset = mk_eq(i, offset);
360         add_axiom(~offset_ge_len, s_eq_empty, i_eq_m1);
361         add_axiom(offset_le_len, i_eq_m1);
362         add_axiom(~offset_ge_len, ~offset_le_len, ~s_eq_empty, i_eq_offset);
363 
364         expr_ref x = m_sk.mk_indexof_left(t, s, offset);
365         expr_ref y = m_sk.mk_indexof_right(t, s, offset);
366         expr_ref indexof0(seq.str.mk_index(y, s, zero), m);
367         expr_ref offset_p_indexof0(a.mk_add(offset, indexof0), m);
368         literal offset_ge_0 = mk_ge(offset, 0);
369 
370         // 0 <= offset & offset < len(t) => t = xy
371         // 0 <= offset & offset < len(t) => len(x) = offset
372         // 0 <= offset & offset < len(t) & indexof(y,s,0) = -1 => -1 = i
373         // 0 <= offset & offset < len(t) & indexof(y,s,0) >= 0 =>
374         //                  -1 = indexof(y,s,0) + offset = indexof(t, s, offset)
375 
376         add_axiom(~offset_ge_0, offset_ge_len, mk_seq_eq(t, mk_concat(x, y)));
377         add_axiom(~offset_ge_0, offset_ge_len, mk_eq(mk_len(x), offset));
378         add_axiom(~offset_ge_0, offset_ge_len,
379                   ~mk_eq(indexof0, minus_one), i_eq_m1);
380         add_axiom(~offset_ge_0, offset_ge_len,
381                   ~mk_ge(indexof0, 0),
382                   mk_eq(offset_p_indexof0, i));
383 
384         // offset < 0 => -1 = i
385         add_axiom(offset_ge_0, i_eq_m1);
386     }
387 }
388 
389 /**
390 
391   !contains(t, s) => i = -1
392   |t| = 0 => |s| = 0 or i = -1
393   |t| = 0 & |s| = 0 => i = 0
394   |t| != 0 & contains(t, s) => t = xsy & i = len(x)
395   |s| = 0 or s = s_head*s_tail
396   |s| = 0 or !contains(s_tail*y, s)
397 
398  */
add_last_indexof_axiom(expr * i)399 void seq_axioms::add_last_indexof_axiom(expr* i) {
400     expr* _s = nullptr, *_t = nullptr;
401     VERIFY(seq.str.is_last_index(i, _t, _s));
402     expr_ref s(_s, m), t(_t, m);
403     m_rewrite(s);
404     m_rewrite(t);
405     expr_ref minus_one(a.mk_int(-1), m);
406     expr_ref zero(a.mk_int(0), m);
407     expr_ref s_head(m), s_tail(m);
408     expr_ref x  = m_sk.mk_last_indexof_left(t, s);
409     expr_ref y  = m_sk.mk_last_indexof_right(t, s);
410     m_sk.decompose(s, s_head, s_tail);
411     literal cnt  = mk_literal(seq.str.mk_contains(t, s));
412     literal cnt2 = mk_literal(seq.str.mk_contains(mk_concat(s_tail, y), s));
413     literal i_eq_m1 = mk_eq(i, minus_one);
414     literal i_eq_0  = mk_eq(i, zero);
415     literal s_eq_empty = mk_eq_empty(s);
416     literal t_eq_empty = mk_eq_empty(t);
417     expr_ref xsy       = mk_concat(x, s, y);
418 
419     add_axiom(cnt, i_eq_m1);
420     add_axiom(~t_eq_empty, s_eq_empty, i_eq_m1);
421     add_axiom(~t_eq_empty, ~s_eq_empty, i_eq_0);
422     add_axiom(t_eq_empty, ~cnt, mk_seq_eq(t, xsy));
423     add_axiom(t_eq_empty, ~cnt, mk_eq(i, mk_len(x)));
424     add_axiom(s_eq_empty, mk_eq(s, mk_concat(s_head, s_tail)));
425     add_axiom(s_eq_empty, ~cnt2);
426 }
427 
428 /*
429   let r = replace(u, s, t)
430 
431 
432   - if s is empty, the result is to prepend t to u;
433   - if s does not occur in u then the result is u.
434 
435   s = "" => r = t+u
436   u = "" => s = "" or r = u
437   ~contains(u,s) => r = u
438 
439   tightest_prefix(s, x)
440   contains(u, s) => r = xty & u = xsy
441   ~contains(u, s) => r = u
442 
443 */
add_replace_axiom(expr * r)444 void seq_axioms::add_replace_axiom(expr* r) {
445     expr* _u = nullptr, *_s = nullptr, *_t = nullptr;
446     VERIFY(seq.str.is_replace(r, _u, _s, _t));
447     expr_ref u(_u, m), s(_s, m), t(_t, m);
448     m_rewrite(u);
449     m_rewrite(s);
450     m_rewrite(t);
451     expr_ref x  = m_sk.mk_indexof_left(u, s);
452     expr_ref y  = m_sk.mk_indexof_right(u, s);
453     expr_ref xty = mk_concat(x, t, y);
454     expr_ref xsy = mk_concat(x, s, y);
455     literal u_emp = mk_eq_empty(u, true);
456     literal s_emp = mk_eq_empty(s, true);
457     literal cnt = mk_literal(seq.str.mk_contains(u, s));
458     add_axiom(~s_emp, mk_seq_eq(r, mk_concat(t, u)));
459     add_axiom(~u_emp, s_emp, mk_seq_eq(r, u));
460     add_axiom(cnt,  mk_seq_eq(r, u));
461     add_axiom(~cnt, u_emp, s_emp, mk_seq_eq(u, xsy));
462     add_axiom(~cnt, u_emp, s_emp, mk_seq_eq(r, xty));
463     ctx().force_phase(cnt);
464     tightest_prefix(s, x);
465 }
466 
467 
468 /*
469    let e = at(s, i)
470 
471    0 <= i < len(s) -> s = xey & len(x) = i & len(e) = 1
472    i < 0 \/ i >= len(s) -> e = empty
473 
474 */
add_at_axiom(expr * e)475 void seq_axioms::add_at_axiom(expr* e) {
476     TRACE("seq", tout << "at-axiom: " << ctx().get_scope_level() << " " << mk_bounded_pp(e, m) << "\n";);
477     expr* _s = nullptr, *_i = nullptr;
478     VERIFY(seq.str.is_at(e, _s, _i));
479     expr_ref s(_s, m), i(_i, m);
480     m_rewrite(s);
481     m_rewrite(i);
482     expr_ref zero(a.mk_int(0), m);
483     expr_ref one(a.mk_int(1), m);
484     expr_ref emp(seq.str.mk_empty(m.get_sort(e)), m);
485     expr_ref len_s = mk_len(s);
486     literal i_ge_0 = mk_ge(i, 0);
487     literal i_ge_len_s = mk_ge(mk_sub(i, mk_len(s)), 0);
488     expr_ref len_e = mk_len(e);
489 
490     rational iv;
491     if (a.is_numeral(i, iv) && iv.is_unsigned()) {
492         expr_ref_vector es(m);
493         expr_ref nth(m);
494         unsigned k = iv.get_unsigned();
495         for (unsigned j = 0; j <= k; ++j) {
496             es.push_back(seq.str.mk_unit(mk_nth(s, j)));
497         }
498         nth = es.back();
499         es.push_back(m_sk.mk_tail(s, i));
500         add_axiom(~i_ge_0, i_ge_len_s, mk_seq_eq(s, seq.str.mk_concat(es, m.get_sort(e))));
501         add_axiom(~i_ge_0, i_ge_len_s, mk_seq_eq(nth, e));
502     }
503     else {
504         expr_ref x =     m_sk.mk_pre(s, i);
505         expr_ref y =     m_sk.mk_tail(s, i);
506         expr_ref xey   = mk_concat(x, e, y);
507         expr_ref len_x = mk_len(x);
508         add_axiom(~i_ge_0, i_ge_len_s, mk_seq_eq(s, xey));
509         add_axiom(~i_ge_0, i_ge_len_s, mk_eq(i, len_x));
510     }
511 
512     add_axiom(i_ge_0, mk_eq(e, emp));
513     add_axiom(~i_ge_len_s, mk_eq(e, emp));
514     add_axiom(~i_ge_0, i_ge_len_s, mk_eq(one, len_e));
515     add_axiom(mk_le(len_e, 1));
516 }
517 
518 /**
519    i >= 0 i < len(s) => unit(nth_i(s, i)) = at(s, i)
520    nth_i(unit(nth_i(s, i)), 0) = nth_i(s, i)
521 */
522 
add_nth_axiom(expr * e)523 void seq_axioms::add_nth_axiom(expr* e) {
524     expr* s = nullptr, *i = nullptr;
525     rational n;
526     zstring str;
527     VERIFY(seq.str.is_nth_i(e, s, i));
528     if (seq.str.is_string(s, str) && a.is_numeral(i, n) &&
529         n.is_unsigned() && n.get_unsigned() < str.length()) {
530         app_ref ch(seq.str.mk_char(str[n.get_unsigned()]), m);
531         add_axiom(mk_eq(ch, e));
532     }
533     else {
534         expr_ref zero(a.mk_int(0), m);
535         literal i_ge_0 =     mk_ge(i, 0);
536         literal i_ge_len_s = mk_ge(mk_sub(i, mk_len(s)), 0);
537         // at(s,i) = [nth(s,i)]
538         expr_ref rhs(s, m);
539         expr_ref lhs(seq.str.mk_unit(e), m);
540         if (!seq.str.is_at(s) || zero != i) rhs = seq.str.mk_at(s, i);
541         m_rewrite(rhs);
542         add_axiom(~i_ge_0, i_ge_len_s, mk_eq(lhs, rhs));
543     }
544 }
545 
546 
add_itos_axiom(expr * e)547 void seq_axioms::add_itos_axiom(expr* e) {
548     expr* _n = nullptr;
549     TRACE("seq", tout << mk_pp(e, m) << "\n";);
550     VERIFY(seq.str.is_itos(e, _n));
551     expr_ref n(_n, m);
552     m_rewrite(n);
553 
554     // itos(n) = "" <=> n < 0
555     expr_ref zero(a.mk_int(0), m);
556     literal eq1 = mk_literal(seq.str.mk_is_empty(e));
557     literal ge0 = mk_ge(n, 0);
558     // n >= 0 => itos(n) != ""
559     // itos(n) = "" or n >= 0
560     add_axiom(~eq1, ~ge0);
561     add_axiom(eq1, ge0);
562     add_axiom(mk_ge(mk_len(e), 0));
563 
564     // n >= 0 => stoi(itos(n)) = n
565     app_ref stoi(seq.str.mk_stoi(e), m);
566     add_axiom(~ge0, th.mk_preferred_eq(stoi, n));
567 
568     // itos(n) does not start with "0" when n > 0
569     // n = 0 or at(itos(n),0) != "0"
570     // alternative: n >= 0 => itos(stoi(itos(n))) = itos(n)
571     expr_ref zs(seq.str.mk_string(symbol("0")), m);
572     m_rewrite(zs);
573     literal eq0 = mk_eq(n, zero);
574     literal at0 = mk_eq(seq.str.mk_at(e, zero), zs);
575     add_axiom(eq0, ~at0);
576     add_axiom(~eq0, mk_eq(e, zs));
577 }
578 
579 /**
580    stoi(s) >= -1
581    stoi("") = -1
582    stoi(s) >= 0 => is_digit(nth(s,0))
583 */
add_stoi_axiom(expr * e)584 void seq_axioms::add_stoi_axiom(expr* e) {
585     TRACE("seq", tout << mk_pp(e, m) << "\n";);
586     literal ge0 = mk_ge(e, 0);
587     expr* s = nullptr;
588     VERIFY (seq.str.is_stoi(e, s));
589     add_axiom(mk_ge(e, -1));                             // stoi(s) >= -1
590     add_axiom(~mk_eq_empty(s), mk_eq(e, a.mk_int(-1)));  // s = "" => stoi(s) = -1
591     add_axiom(~ge0, is_digit(mk_nth(s, 0)));             // stoi(s) >= 0 => is_digit(nth(s,0))
592 
593 }
594 
595 /**
596 
597    len(s) <= k => stoi(s) = stoi(s, k)
598    len(s) > 0,  is_digit(nth(s,0)) => stoi(s, 0) = digit(nth_i(s, 0))
599    len(s) > 0, ~is_digit(nth(s,0)) => stoi(s, 0) = -1
600 
601    0 < i, len(s) <= i =>        stoi(s, i) = stoi(s, i - 1)
602    0 < i, len(s) > i, stoi(s, i - 1) >= 0, is_digit(nth(s, i - 1)) => stoi(s, i) = 10*stoi(s, i - 1) + digit(nth_i(s, i - 1))
603    0 < i, len(s) > i, stoi(s, i - 1) < 0       => stoi(s, i) = -1
604    0 < i, len(s) > i, ~is_digit(nth(s, i - 1)) => stoi(s, i) = -1
605 
606 
607 
608 Define auxiliary function with the property:
609    for 0 <= i < k
610      stoi(s, i) := stoi(extract(s, 0, i+1))
611 
612    for 0 < i < k:
613      len(s) > i  => stoi(s, i) := stoi(extract(s, 0, i))*10 + stoi(extract(s, i, 1))
614      len(s) <= i => stoi(s, i) := stoi(extract(s, 0, i-1), i-1)
615 
616    for i <= i < k:
617      stoi(s) > = 0, len(s) > i => is_digit(nth(s, i))
618 
619  */
add_stoi_axiom(expr * e,unsigned k)620 void seq_axioms::add_stoi_axiom(expr* e, unsigned k) {
621     SASSERT(k > 0);
622     expr* _s = nullptr;
623     VERIFY (seq.str.is_stoi(e, _s));
624     expr_ref s(_s, m);
625     m_rewrite(s);
626     auto stoi2 = [&](unsigned j) { return m_sk.mk("seq.stoi", s, a.mk_int(j), a.mk_int()); };
627     auto digit = [&](unsigned j) { return m_sk.mk_digit2int(mk_nth(s, j)); };
628     auto is_digit_ = [&](unsigned j) { return is_digit(mk_nth(s, j)); };
629     expr_ref len = mk_len(s);
630     literal ge0 = mk_ge(e, 0);
631     literal lek = mk_le(len, k);
632     add_axiom(~lek, mk_eq(e, stoi2(k-1)));                                    // len(s) <= k  => stoi(s) = stoi(s, k-1)
633     add_axiom(mk_le(len, 0), ~is_digit_(0), mk_eq(stoi2(0), digit(0)));       // len(s) > 0, is_digit(nth(s, 0)) => stoi(s,0) = digit(s,0)
634     add_axiom(mk_le(len, 0), is_digit_(0),  mk_eq(stoi2(0), a.mk_int(-1)));   // len(s) > 0, ~is_digit(nth(s, 0)) => stoi(s,0) = -1
635     for (unsigned i = 1; i < k; ++i) {
636 
637         // len(s) <= i => stoi(s, i) = stoi(s, i - 1)
638 
639         add_axiom(~mk_le(len, i),  mk_eq(stoi2(i), stoi2(i-1)));
640 
641         // len(s) > i, stoi(s, i - 1) >= 0, is_digit(nth(s, i)) => stoi(s, i) = 10*stoi(s, i - 1) + digit(i)
642         // len(s) > i, stoi(s, i - 1) < 0 => stoi(s, i) = -1
643         // len(s) > i, ~is_digit(nth(s, i)) => stoi(s, i) = -1
644 
645         add_axiom(mk_le(len, i), ~mk_ge(stoi2(i-1), 0), ~is_digit_(i), mk_eq(stoi2(i), a.mk_add(a.mk_mul(a.mk_int(10), stoi2(i-1)), digit(i))));
646         add_axiom(mk_le(len, i), is_digit_(i),                         mk_eq(stoi2(i), a.mk_int(-1)));
647         add_axiom(mk_le(len, i), mk_ge(stoi2(i-1), 0),                 mk_eq(stoi2(i), a.mk_int(-1)));
648 
649         // stoi(s) >= 0, i < len(s) => is_digit(nth(s, i))
650 
651         add_axiom(~ge0, mk_le(len, i), is_digit_(i));
652     }
653 }
654 
655 /**
656    Let s := itos(e)
657 
658    Relate values of e with len(s) where len(s) is bounded by k.
659 
660    |s| = 0 => e < 0
661 
662    |s| <= 1 => e < 10
663    |s| <= 2 => e < 100
664    |s| <= 3 => e < 1000
665 
666    |s| >= 1 => e >= 0
667    |s| >= 2 => e >= 10
668    |s| >= 3 => e >= 100
669 
670    There are no constraints to ensure that the string itos(e)
671    contains the valid digits corresponding to e >= 0.
672    The validity of itos(e) is ensured by the following property:
673    e is either of the form stoi(s) for some s, or there is a term
674    stoi(itos(e)) and axiom e >= 0 => stoi(itos(e)) = e.
675    Then the axioms for stoi(itos(e)) ensure that the characters of
676    itos(e) are valid digits and the axiom stoi(itos(e)) = e ensures
677    these digits encode e.
678    The option of constraining itos(e) digits directly does not
679    seem appealing becaues it requires an order of quadratic number
680    of constraints for all possible lengths of itos(e) (e.g, log_10(e)).
681 
682 */
683 
add_itos_axiom(expr * s,unsigned k)684 void seq_axioms::add_itos_axiom(expr* s, unsigned k) {
685     expr* e = nullptr;
686     VERIFY(seq.str.is_itos(s, e));
687     expr_ref len = mk_len(s);
688     add_axiom(mk_ge(e, 10), mk_le(len, 1));
689     add_axiom(mk_le(e, -1), mk_ge(len, 1));
690     rational lo(1);
691     for (unsigned i = 1; i <= k; ++i) {
692         lo *= rational(10);
693         add_axiom(mk_ge(e, lo), mk_le(len, i));
694         add_axiom(mk_le(e, lo - 1), mk_ge(len, i + 1));
695     }
696 }
697 
is_digit(expr * ch)698 literal seq_axioms::is_digit(expr* ch) {
699     ensure_digit_axiom();
700     literal isd = mk_literal(m_sk.mk_is_digit(ch));
701     expr_ref d2i = m_sk.mk_digit2int(ch);
702     expr_ref _lo(seq.mk_le(seq.mk_char('0'), ch), m);
703     expr_ref _hi(seq.mk_le(ch, seq.mk_char('9')), m);
704     literal lo = mk_literal(_lo);
705     literal hi = mk_literal(_hi);
706     add_axiom(~lo, ~hi, isd);
707     add_axiom(~isd, lo);
708     add_axiom(~isd, hi);
709     return isd;
710 }
711 
712 /**
713    Bridge character digits to integers.
714 */
715 
ensure_digit_axiom()716 void seq_axioms::ensure_digit_axiom() {
717     if (!m_digits_initialized) {
718         for (unsigned i = 0; i < 10; ++i) {
719             expr_ref cnst(seq.mk_char('0'+i), m);
720             add_axiom(mk_eq(m_sk.mk_digit2int(cnst), a.mk_int(i)));
721         }
722         ctx().push_trail(value_trail<context, bool>(m_digits_initialized));
723         m_digits_initialized = true;
724     }
725 }
726 
727 
728 /**
729    e1 < e2 => prefix(e1, e2) or e1 = xcy
730    e1 < e2 => prefix(e1, e2) or c < d
731    e1 < e2 => prefix(e1, e2) or e2 = xdz
732    e1 < e2 => e1 != e2
733    !(e1 < e2) => prefix(e2, e1) or e2 = xdz
734    !(e1 < e2) => prefix(e2, e1) or d < c
735    !(e1 < e2) => prefix(e2, e1) or e1 = xcy
736    !(e1 = e2) or !(e1 < e2)
737 
738    optional:
739    e1 < e2 or e1 = e2 or e2 < e1
740    !(e1 = e2) or !(e2 < e1)
741    !(e1 < e2) or !(e2 < e1)
742  */
add_lt_axiom(expr * n)743 void seq_axioms::add_lt_axiom(expr* n) {
744     expr* _e1 = nullptr, *_e2 = nullptr;
745     VERIFY(seq.str.is_lt(n, _e1, _e2));
746     expr_ref e1(_e1, m), e2(_e2, m);
747     m_rewrite(e1);
748     m_rewrite(e2);
749     sort* s = m.get_sort(e1);
750     sort* char_sort = nullptr;
751     VERIFY(seq.is_seq(s, char_sort));
752     literal lt = mk_literal(n);
753     expr_ref x = m_sk.mk("str.<.x", e1, e2);
754     expr_ref y = m_sk.mk("str.<.y", e1, e2);
755     expr_ref z = m_sk.mk("str.<.z", e1, e2);
756     expr_ref c = m_sk.mk("str.<.c", e1, e2, char_sort);
757     expr_ref d = m_sk.mk("str.<.d", e1, e2, char_sort);
758     expr_ref xcy = mk_concat(x, seq.str.mk_unit(c), y);
759     expr_ref xdz = mk_concat(x, seq.str.mk_unit(d), z);
760     literal eq   = mk_eq(e1, e2);
761     literal pref21 = mk_literal(seq.str.mk_prefix(e2, e1));
762     literal pref12 = mk_literal(seq.str.mk_prefix(e1, e2));
763     literal e1xcy = mk_eq(e1, xcy);
764     literal e2xdz = mk_eq(e2, xdz);
765     literal ltcd = mk_literal(seq.mk_lt(c, d));
766     literal ltdc = mk_literal(seq.mk_lt(d, c));
767     add_axiom(~lt, pref12, e2xdz);
768     add_axiom(~lt, pref12, e1xcy);
769     add_axiom(~lt, pref12, ltcd);
770     add_axiom(lt, pref21, e1xcy);
771     add_axiom(lt, pref21, ltdc);
772     add_axiom(lt, pref21, e2xdz);
773     add_axiom(~eq, ~lt);
774 }
775 
776 /**
777    e1 <= e2 <=> e1 < e2 or e1 = e2
778 */
add_le_axiom(expr * n)779 void seq_axioms::add_le_axiom(expr* n) {
780     expr* e1 = nullptr, *e2 = nullptr;
781     VERIFY(seq.str.is_le(n, e1, e2));
782     literal lt = mk_literal(seq.str.mk_lex_lt(e1, e2));
783     literal le = mk_literal(n);
784     literal eq = mk_eq(e1, e2);
785     add_axiom(~le, lt, eq);
786     add_axiom(~eq, le);
787     add_axiom(~lt, le);
788 }
789 
790 /**
791    is_digit(e) <=> to_code('0') <= to_code(e) <= to_code('9')
792  */
add_is_digit_axiom(expr * n)793 void seq_axioms::add_is_digit_axiom(expr* n) {
794     expr* e = nullptr;
795     VERIFY(seq.str.is_is_digit(n, e));
796     literal is_digit = mk_literal(n);
797     expr_ref to_code(seq.str.mk_to_code(e), m);
798     literal ge0 = mk_ge(to_code, (unsigned)'0');
799     literal le9 = mk_le(to_code, (unsigned)'9');
800     add_axiom(~is_digit, ge0);
801     add_axiom(~is_digit, le9);
802     add_axiom(is_digit, ~ge0, ~le9);
803 }
804 
805 /**
806    len(e) = 1 => 0 <= to_code(e) <= max_code
807    len(e) != 1 => to_code(e) = -1
808  */
add_str_to_code_axiom(expr * n)809 void seq_axioms::add_str_to_code_axiom(expr* n) {
810     expr* e = nullptr;
811     VERIFY(seq.str.is_to_code(n, e));
812     literal len_is1 = mk_eq(mk_len(e), a.mk_int(1));
813     add_axiom(~len_is1, mk_ge(n, 0));
814     add_axiom(~len_is1, mk_le(n, zstring::max_char()));
815     add_axiom(len_is1, mk_eq(n, a.mk_int(-1)));
816 }
817 
818 /**
819    0 <= e <= max_char => len(from_code(e)) = 1
820    0 <= e <= max_char => to_code(from_code(e)) = e
821    e < 0 or e > max_char => len(from_code(e)) = ""
822  */
add_str_from_code_axiom(expr * n)823 void seq_axioms::add_str_from_code_axiom(expr* n) {
824     expr* e = nullptr;
825     VERIFY(seq.str.is_from_code(n, e));
826     literal ge = mk_ge(e, 0);
827     literal le = mk_le(e, zstring::max_char());
828     literal emp = mk_literal(seq.str.mk_is_empty(n));
829     add_axiom(~ge, ~le, mk_eq(mk_len(n), a.mk_int(1)));
830     add_axiom(~ge, ~le, mk_eq(seq.str.mk_to_code(n), e));
831     add_axiom(ge, emp);
832     add_axiom(le, emp);
833 }
834 
835 
836 /**
837 Unit is injective:
838 
839    u = inv-unit(unit(u))
840 */
841 
add_unit_axiom(expr * n)842 void seq_axioms::add_unit_axiom(expr* n) {
843     expr* u = nullptr;
844     VERIFY(seq.str.is_unit(n, u));
845     add_axiom(mk_eq(u, m_sk.mk_unit_inv(n)));
846 }
847 
848 /**
849 
850  suffix(s, t) => s = seq.suffix_inv(s, t) + t
851 ~suffix(s, t) => len(s) > len(t) or s = y(s, t) + unit(c(s, t)) + x(s, t)
852 ~suffix(s, t) => len(s) > len(t) or t = z(s, t) + unit(d(s, t)) + x(s, t)
853 ~suffix(s, t) => len(s) > len(t) or c(s,t) != d(s,t)
854 
855 */
856 
add_suffix_axiom(expr * e)857 void seq_axioms::add_suffix_axiom(expr* e) {
858     expr* _s = nullptr, *_t = nullptr;
859     VERIFY(seq.str.is_suffix(e, _s, _t));
860     expr_ref s(_s, m), t(_t, m);
861     m_rewrite(s);
862     m_rewrite(t);
863     literal lit = mk_literal(e);
864     literal s_gt_t = mk_ge(mk_sub(mk_len(s), mk_len(t)), 1);
865 #if 0
866     expr_ref x = m_sk.mk_pre(t, mk_sub(mk_len(t), mk_len(s)));
867     expr_ref y = m_sk.mk_tail(t, mk_sub(mk_len(s), a.mk_int(1)));
868     add_axiom(lit, s_gt_t, mk_seq_eq(t, mk_concat(x, y)));
869     add_axiom(lit, s_gt_t, mk_eq(mk_len(y), mk_len(s)));
870     add_axiom(lit, s_gt_t, ~mk_eq(y, s));
871 #else
872     sort* char_sort = nullptr;
873     VERIFY(seq.is_seq(m.get_sort(s), char_sort));
874     expr_ref x = m_sk.mk("seq.suffix.x", s, t);
875     expr_ref y = m_sk.mk("seq.suffix.y", s, t);
876     expr_ref z = m_sk.mk("seq.suffix.z", s, t);
877     expr_ref c = m_sk.mk("seq.suffix.c", s, t, char_sort);
878     expr_ref d = m_sk.mk("seq.suffix.d", s, t, char_sort);
879     add_axiom(lit, s_gt_t, mk_seq_eq(s, mk_concat(y, seq.str.mk_unit(c), x)));
880     add_axiom(lit, s_gt_t, mk_seq_eq(t, mk_concat(z, seq.str.mk_unit(d), x)));
881     add_axiom(lit, s_gt_t, ~mk_eq(c, d));
882 #endif
883 }
884 
add_prefix_axiom(expr * e)885 void seq_axioms::add_prefix_axiom(expr* e) {
886     expr* _s = nullptr, *_t = nullptr;
887     VERIFY(seq.str.is_prefix(e, _s, _t));
888     expr_ref s(_s, m), t(_t, m);
889     m_rewrite(s);
890     m_rewrite(t);
891     literal lit = mk_literal(e);
892     literal s_gt_t = mk_ge(mk_sub(mk_len(s), mk_len(t)), 1);
893 #if 0
894     expr_ref x = m_sk.mk_pre(t, mk_len(s));
895     expr_ref y = m_sk.mk_tail(t, mk_sub(mk_sub(mk_len(t), mk_len(s)), a.mk_int(1)));
896     add_axiom(lit, s_gt_t, mk_seq_eq(t, mk_concat(x, y)));
897     add_axiom(lit, s_gt_t, mk_eq(mk_len(x), mk_len(s)));
898     add_axiom(lit, s_gt_t, ~mk_eq(x, s));
899 
900 #else
901     sort* char_sort = nullptr;
902     VERIFY(seq.is_seq(m.get_sort(s), char_sort));
903     expr_ref x = m_sk.mk("seq.prefix.x", s, t);
904     expr_ref y = m_sk.mk("seq.prefix.y", s, t);
905     expr_ref z = m_sk.mk("seq.prefix.z", s, t);
906     expr_ref c = m_sk.mk("seq.prefix.c", s, t, char_sort);
907     expr_ref d = m_sk.mk("seq.prefix.d", s, t, char_sort);
908     add_axiom(lit, s_gt_t, mk_seq_eq(s, mk_concat(x, seq.str.mk_unit(c), y)));
909     add_axiom(lit, s_gt_t, mk_seq_eq(t, mk_concat(x, seq.str.mk_unit(d), z)), mk_seq_eq(t, x));
910     add_axiom(lit, s_gt_t, ~mk_eq(c, d));
911 #endif
912 }
913 
914 /***
915     let n = len(x)
916     - len(a ++ b) = len(a) + len(b) if x = a ++ b
917     - len(unit(u)) = 1              if x = unit(u)
918     - len(str) = str.length()       if x = str
919     - len(empty) = 0                if x = empty
920     - len(int.to.str(i)) >= 1       if x = int.to.str(i) and more generally if i = 0 then 1 else 1+floor(log(|i|))
921     - len(x) >= 0                   otherwise
922  */
add_length_axiom(expr * n)923 void seq_axioms::add_length_axiom(expr* n) {
924     expr* x = nullptr;
925     VERIFY(seq.str.is_length(n, x));
926     if (seq.str.is_concat(x) ||
927         seq.str.is_unit(x) ||
928         seq.str.is_empty(x) ||
929         seq.str.is_string(x)) {
930         expr_ref len(n, m);
931         m_rewrite(len);
932         SASSERT(n != len);
933         add_axiom(mk_eq(len, n));
934     }
935     else {
936         add_axiom(mk_ge(n, 0));
937     }
938 }
939 
940 /**
941    ~contains(a, b) => ~prefix(b, a)
942    ~contains(a, b) => ~contains(tail(a), b) or a = empty
943    ~contains(a, b) & a = empty => b != empty
944    ~(a = empty) => a = head + tail
945  */
unroll_not_contains(expr * e)946 void seq_axioms::unroll_not_contains(expr* e) {
947     expr_ref head(m), tail(m);
948     expr* a = nullptr, *b = nullptr;
949     VERIFY(seq.str.is_contains(e, a, b));
950     m_sk.decompose(a, head, tail);
951     expr_ref pref(seq.str.mk_prefix(b, a), m);
952     expr_ref postf(seq.str.mk_contains(tail, b), m);
953     m_rewrite(pref);
954     m_rewrite(postf);
955     literal pre = mk_literal(pref);
956     literal cnt = mk_literal(e);
957     literal ctail = mk_literal(postf);
958     literal emp = mk_eq_empty(a, true);
959     add_axiom(cnt, ~pre);
960     add_axiom(cnt, ~ctail);
961     add_axiom(~emp, mk_eq_empty(tail));
962     add_axiom(emp, mk_eq(a, seq.str.mk_concat(head, tail)));
963 }
964 
965 
add_length_limit(expr * s,unsigned k)966 expr_ref seq_axioms::add_length_limit(expr* s, unsigned k) {
967     expr_ref bound_tracker  = m_sk.mk_length_limit(s, k);
968     expr* s0 = nullptr;
969     if (seq.str.is_stoi(s, s0))
970         s = s0;
971     literal bound_predicate = mk_le(mk_len(s), k);
972     add_axiom(~mk_literal(bound_tracker), bound_predicate);
973     return bound_tracker;
974 }
975