1 /*++
2   Module Name:
3 
4   theory_str_mc.cpp
5 
6   Abstract:
7 
8   Model Construction for String Theory Plugin
9 
10   Author:
11 
12   Murphy Berzish and Yunhui Zheng
13 
14   Revision History:
15 
16   --*/
17 #include "ast/ast_smt2_pp.h"
18 #include "smt/smt_context.h"
19 #include "smt/theory_str.h"
20 #include "smt/smt_model_generator.h"
21 #include "ast/ast_pp.h"
22 #include "ast/ast_ll_pp.h"
23 #include<list>
24 #include<algorithm>
25 #include "smt/theory_seq_empty.h"
26 #include "smt/theory_arith.h"
27 #include "ast/ast_util.h"
28 #include "ast/rewriter/seq_rewriter.h"
29 #include "ast/rewriter/expr_replacer.h"
30 #include "smt_kernel.h"
31 #include "model/model_smt2_pp.h"
32 
33 namespace smt {
34 
35     /*
36      * Use the current model in the arithmetic solver to get the length of a term.
37      * Returns true if this could be done, placing result in 'termLen', or false otherwise.
38      * Works like get_len_value() except uses arithmetic solver model instead of EQCs.
39      */
fixed_length_get_len_value(expr * e,rational & val)40     bool theory_str::fixed_length_get_len_value(expr * e, rational & val) {
41         ast_manager & m = get_manager();
42 
43         rational val1;
44         expr_ref len(m), len_val(m);
45         expr* e1 = nullptr, *e2 = nullptr;
46         expr_ref_vector todo(m);
47         todo.push_back(e);
48         val.reset();
49         while (!todo.empty()) {
50             expr* c = todo.back();
51             todo.pop_back();
52             zstring tmp;
53             if (u.str.is_concat(c, e1, e2)) {
54                 todo.push_back(e1);
55                 todo.push_back(e2);
56             }
57             else if (u.str.is_string(c, tmp)) {
58                 unsigned int sl = tmp.length();
59                 val += rational(sl);
60             }
61             else {
62                 len = mk_strlen(c);
63                 arith_value v(get_manager());
64                 v.init(&get_context());
65                 if (v.get_value(len, val1)) {
66                     val += val1;
67                 } else {
68                     return false;
69                 }
70             }
71         }
72         return val.is_int();
73     }
74 
75 
fixed_length_reduce_suffix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)76     bool theory_str::fixed_length_reduce_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
77         ast_manager & m = get_manager();
78 
79         ast_manager & sub_m = subsolver.m();
80 
81         expr * full = nullptr;
82         expr * suff = nullptr;
83         VERIFY(u.str.is_suffix(f, suff, full));
84 
85         expr_ref haystack(full, m);
86         expr_ref needle(suff, m);
87 
88         ptr_vector<expr> full_chars, suff_chars;
89 
90         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex)
91                 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) {
92             return false;
93         }
94 
95         if (suff_chars.size() == 0) {
96             // all strings endwith the empty one
97             return true;
98         }
99 
100         if (full_chars.size() == 0 && suff_chars.size() > 0) {
101             // the empty string doesn't "endwith" any non-empty string
102             cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(suff), mk_int(0)),
103                     m_autil.mk_ge(mk_strlen(full), mk_int(0)));
104             th_rewriter m_rw(m);
105             m_rw(cex);
106             return false;
107         }
108 
109         if (full_chars.size() < suff_chars.size()) {
110             // a string can't endwith a longer one
111             // X startswith Y -> len(X) >= len(Y)
112             expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m);
113             expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m);
114             expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(suff))), m);
115             cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero));
116             th_rewriter m_rw(m);
117             m_rw(cex);
118             return false;
119         }
120 
121         expr_ref_vector branch(sub_m);
122         for (unsigned j = 0; j < suff_chars.size(); ++j) {
123             // full[j] == suff[j]
124             expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m);
125             expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m);
126             expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
127             branch.push_back(_e);
128         }
129 
130         expr_ref final_diseq(mk_and(branch), sub_m);
131         fixed_length_assumptions.push_back(final_diseq);
132         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
133         fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f));
134 
135         return true;
136     }
137 
fixed_length_reduce_negative_suffix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)138     bool theory_str::fixed_length_reduce_negative_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
139         ast_manager & m = get_manager();
140 
141         ast_manager & sub_m = subsolver.m();
142 
143         expr * full = nullptr;
144         expr * suff = nullptr;
145         VERIFY(u.str.is_suffix(f, suff, full));
146 
147         expr_ref haystack(full, m);
148         expr_ref needle(suff, m);
149 
150         ptr_vector<expr> full_chars, suff_chars;
151         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex)
152                 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) {
153             return false;
154         }
155 
156         if (suff_chars.size() == 0) {
157             // all strings endwith the empty one
158             cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(suff), mk_int(0))));
159             th_rewriter m_rw(m);
160             m_rw(cex);
161             return false;
162         }
163 
164         if (full_chars.size() == 0 && suff_chars.size() > 0) {
165             // the empty string doesn't "endwith" any non-empty string
166             return true;
167         }
168 
169         if (full_chars.size() < suff_chars.size()) {
170             // a string can't endwith a longer one
171             // X startswith Y -> len(X) >= len(Y)
172             return true;
173         }
174 
175         expr_ref_vector branch(sub_m);
176         for (unsigned j = 0; j < suff_chars.size(); ++j) {
177             // full[j] == suff[j]
178             expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m);
179             expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m);
180             expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
181             branch.push_back(_e);
182         }
183 
184         expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m);
185         fixed_length_assumptions.push_back(final_diseq);
186         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
187         fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f));
188 
189         return true;
190     }
191 
fixed_length_reduce_prefix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)192     bool theory_str::fixed_length_reduce_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
193         ast_manager & m = get_manager();
194 
195         ast_manager & sub_m = subsolver.m();
196 
197         expr * full = nullptr;
198         expr * pref = nullptr;
199         VERIFY(u.str.is_prefix(f, pref, full));
200 
201         expr_ref haystack(full, m);
202         expr_ref needle(pref, m);
203 
204         ptr_vector<expr> full_chars, pref_chars;
205         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex)
206                 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) {
207             return false;
208         }
209 
210 
211         if (pref_chars.size() == 0) {
212             // all strings startwith the empty one
213             return true;
214         }
215 
216         if (full_chars.size() == 0 && pref_chars.size() > 0) {
217             // the empty string doesn't "stratwith" any non-empty string
218             cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(pref), mk_int(0)),
219                     m_autil.mk_ge(mk_strlen(full), mk_int(0)));
220             th_rewriter m_rw(m);
221             m_rw(cex);
222             return false;
223         }
224 
225         if (full_chars.size() < pref_chars.size()) {
226             // a string can't startwith a longer one
227             // X startswith Y -> len(X) >= len(Y)
228             expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m);
229             expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m);
230             expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(pref))), m);
231             cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero));
232             th_rewriter m_rw(m);
233             m_rw(cex);
234             return false;
235         }
236 
237         expr_ref_vector branch(m);
238         for (unsigned j = 0; j < pref_chars.size(); ++j) {
239             // full[j] == pref[j]
240             expr_ref cLHS(full_chars.get(j), sub_m);
241             expr_ref cRHS(pref_chars.get(j), sub_m);
242             expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
243             branch.push_back(_e);
244         }
245 
246         expr_ref final_diseq(mk_and(branch), sub_m);
247         fixed_length_assumptions.push_back(final_diseq);
248         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
249         fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f));
250 
251         return true;
252     }
253 
fixed_length_reduce_negative_prefix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)254     bool theory_str::fixed_length_reduce_negative_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
255         ast_manager & m = get_manager();
256 
257         ast_manager & sub_m = subsolver.m();
258 
259         expr * pref = nullptr, *full = nullptr;
260         VERIFY(u.str.is_prefix(f, pref, full));
261 
262         expr_ref haystack(full, m);
263         expr_ref needle(pref, m);
264 
265         ptr_vector<expr> full_chars, pref_chars;
266         if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex)
267                 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) {
268             return false;
269         }
270 
271         if (pref_chars.size() == 0) {
272             // all strings startwith the empty one
273             cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(pref), mk_int(0))));
274             th_rewriter m_rw(m);
275             m_rw(cex);
276             return false;
277         }
278 
279         if (full_chars.size() == 0 && pref_chars.size() > 0) {
280             // the empty string doesn't "stratwith" any non-empty string
281             return true;
282         }
283 
284         if (full_chars.size() < pref_chars.size()) {
285             // a string can't startwith a longer one
286             // X startswith Y -> len(X) >= len(Y)
287             return true;
288         }
289 
290         expr_ref_vector branch(m);
291         for (unsigned j = 0; j < pref_chars.size(); ++j) {
292             // full[j] == pref[j]
293             expr_ref cLHS(full_chars.get(j), sub_m);
294             expr_ref cRHS(pref_chars.get(j), sub_m);
295             expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
296             branch.push_back(_e);
297         }
298 
299         expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m);
300         fixed_length_assumptions.push_back(final_diseq);
301         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
302         fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f));
303 
304         return true;
305     }
306 
fixed_length_reduce_contains(smt::kernel & subsolver,expr_ref f,expr_ref & cex)307     bool theory_str::fixed_length_reduce_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
308         ast_manager & m = get_manager();
309 
310         ast_manager & sub_m = subsolver.m();
311 
312         expr * full = nullptr;
313         expr * small = nullptr;
314         VERIFY(u.str.is_contains(f, full, small));
315 
316         expr_ref haystack(full, m);
317         expr_ref needle(small, m);
318 
319         ptr_vector<expr> haystack_chars, needle_chars;
320         if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex)
321                 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) {
322             return false;
323         }
324 
325         if (needle_chars.size() == 0) {
326             // all strings "contain" the empty one
327             return true;
328         }
329 
330         if (haystack_chars.size() == 0 && needle_chars.size() > 0) {
331             // the empty string doesn't "contain" any non-empty string
332             cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(needle), mk_int(0)),
333                     m_autil.mk_ge(mk_strlen(haystack), mk_int(0)));
334             th_rewriter m_rw(m);
335             m_rw(cex);
336             return false;
337         }
338 
339         if (needle_chars.size() > haystack_chars.size()) {
340             // a string can't contain a longer one
341             // X contains Y -> len(X) >= len(Y)
342             expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m);
343             expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m);
344             expr_ref lens(m_autil.mk_add(mk_strlen(haystack), m_autil.mk_mul(minus_one, mk_strlen(needle))), m);
345             cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero));
346             th_rewriter m_rw(m);
347             m_rw(cex);
348             return false;
349         }
350         // find all positions at which `needle` could occur in `haystack`
351         expr_ref_vector branches(m);
352         for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) {
353             // i defines the offset into haystack_chars
354             expr_ref_vector branch(m);
355             for (unsigned j = 0; j < needle_chars.size(); ++j) {
356                 // needle[j] == haystack[i+j]
357                 ENSURE(i+j < haystack_chars.size());
358                 expr_ref cLHS(needle_chars.get(j), sub_m);
359                 expr_ref cRHS(haystack_chars.get(i+j), sub_m);
360                 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
361                 branch.push_back(_e);
362             }
363             branches.push_back(mk_and(branch));
364         }
365 
366         expr_ref final_diseq(mk_or(branches), sub_m);
367         fixed_length_assumptions.push_back(final_diseq);
368         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
369         fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f));
370 
371         return true;
372     }
373 
fixed_length_reduce_negative_contains(smt::kernel & subsolver,expr_ref f,expr_ref & cex)374     bool theory_str::fixed_length_reduce_negative_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex) {
375         ast_manager & m = get_manager();
376 
377         ast_manager & sub_m = subsolver.m();
378 
379         expr * small = nullptr, *full = nullptr;
380         VERIFY(u.str.is_contains(f, full, small));
381 
382         expr_ref haystack(full, m);
383         expr_ref needle(small, m);
384 
385         ptr_vector<expr> haystack_chars, needle_chars;
386         if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex)
387                 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) {
388             return false;
389         }
390 
391         if (needle_chars.size() == 0) {
392             // all strings "contain" the empty one
393             cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(needle), mk_int(0))));
394             ctx.get_rewriter()(cex);
395             return false;
396         }
397 
398         if (haystack_chars.size() == 0 && needle_chars.size() > 0) {
399             // the empty string doesn't "contain" any non-empty string
400             return true;
401         }
402 
403         if (needle_chars.size() > haystack_chars.size()) {
404             // a string can't contain a longer one
405             // X contains Y -> len(X) >= len(Y)
406             return true;
407         }
408 
409 
410         // find all positions at which `needle` could occur in `haystack`
411         expr_ref_vector branches(m);
412         for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) {
413             // i defines the offset into haystack_chars
414             expr_ref_vector branch(m);
415             for (unsigned j = 0; j < needle_chars.size(); ++j) {
416                 // needle[j] == haystack[i+j]
417                 ENSURE(i+j < haystack_chars.size());
418                 expr_ref cLHS(needle_chars.get(j), sub_m);
419                 expr_ref cRHS(haystack_chars.get(i+j), sub_m);
420                 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
421                 branch.push_back(_e);
422             }
423             branches.push_back(mk_and(branch));
424         }
425 
426         expr_ref final_diseq(mk_not(sub_m, mk_or(branches)), sub_m);
427         fixed_length_assumptions.push_back(final_diseq);
428         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
429         fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f));
430 
431         return true;
432     }
433 
add_next(u_map<expr * > & next,expr_ref_vector & trail,unsigned idx,expr * cond,ast_manager & m)434     static inline void add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond, ast_manager & m) {
435         expr* acc;
436         if (!m.is_true(cond) && next.find(idx, acc)) {
437             expr* args[2] = { cond, acc };
438             cond = mk_or(m, 2, args);
439         }
440         trail.push_back(cond);
441         next.insert(idx, cond);
442 
443     }
444 
fixed_length_reduce_regex_membership(smt::kernel & subsolver,expr_ref f,expr_ref & cex,bool polarity)445     bool theory_str::fixed_length_reduce_regex_membership(smt::kernel & subsolver, expr_ref f, expr_ref & cex, bool polarity) {
446         ast_manager & m = get_manager();
447 
448         ast_manager & sub_m = subsolver.m();
449 
450         // NSB code review: to remove dependencies on subsolver.get_context().
451         // It uses a method that should be removed from smt_kernel.
452         // currently sub_ctx is used to retrieve a rewriter. Theory_str already has a rewriter attahed.
453         context & sub_ctx = subsolver.get_context();
454 
455         expr * str = nullptr, *re = nullptr;
456         VERIFY(u.str.is_in_re(f, str, re));
457 
458         // TODO reuse some of the automaton framework from theory_str_regex
459         eautomaton * aut = m_mk_aut(re);
460         aut->compress();
461 
462         ptr_vector<expr> str_chars;
463         if (!fixed_length_reduce_string_term(subsolver, str, str_chars, cex)) {
464             return false;
465         }
466 
467         if (str_chars.empty()) {
468             // check 0-length solution
469             bool zero_solution = false;
470             unsigned initial_state = aut->init();
471             if (aut->is_final_state(initial_state)) {
472                 zero_solution = true;
473             } else {
474                 unsigned_vector eps_states;
475                 aut->get_epsilon_closure(initial_state, eps_states);
476                 for (unsigned state : eps_states) {
477                     if (aut->is_final_state(state)) {
478                         zero_solution = true;
479                         break;
480                     }
481                 }
482             }
483             if (!zero_solution && polarity) {
484                 TRACE("str_fl", tout << "contradiction: regex has no zero-length solutions, but our string must be a solution" << std::endl;);
485                 cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0))));
486                 ctx.get_rewriter()(cex);
487                 return false;
488             } else if (zero_solution && !polarity) {
489                 TRACE("str_fl", tout << "contradiction: regex has zero-length solutions, but our string must not be a solution" << std::endl;);
490                 cex = m.mk_or(f, m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0))));
491                 ctx.get_rewriter()(cex);
492                 return false;
493             } else {
494                 TRACE("str_fl", tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;);
495                 return true;
496             }
497         } else {
498             expr_ref_vector trail(m);
499             u_map<expr*> maps[2];
500             bool select_map = false;
501             expr_ref cond(m);
502             eautomaton::moves mvs;
503             maps[0].insert(aut->init(), m.mk_true());
504             // is_accepted(a, aut) & some state in frontier is final.
505 
506             for (auto& ch : str_chars) {
507                 u_map<expr*>& frontier = maps[select_map];
508                 u_map<expr*>& next = maps[!select_map];
509                 select_map = !select_map;
510                 next.reset();
511                 u_map<expr*>::iterator it = frontier.begin(), end = frontier.end();
512                 for (; it != end; ++it) {
513                     mvs.reset();
514                     unsigned state = it->m_key;
515                     expr*    acc  = it->m_value;
516                     aut->get_moves_from(state, mvs, false);
517                     for (eautomaton::move& mv : mvs) {
518                         SASSERT(mv.t());
519                         if (mv.t()->is_char() && m.is_value(mv.t()->get_char()) && m.is_value(ch)) {
520                             if (mv.t()->get_char() == ch) {
521                                 add_next(next, trail, mv.dst(), acc, sub_m);
522                             }
523                             else {
524                                 continue;
525                             }
526                         }
527                         else {
528                             cond = mv.t()->accept(ch);
529                             if (m.is_false(cond)) {
530                                 continue;
531                             }
532                             if (m.is_true(cond)) {
533                                 add_next(next, trail, mv.dst(), acc, sub_m);
534                                 continue;
535                             }
536                             expr* args[2] = { cond, acc };
537                             cond = mk_and(m, 2, args);
538                             add_next(next, trail, mv.dst(), cond, sub_m);
539                         }
540                     }
541                 }
542             }
543             u_map<expr*> const& frontier = maps[select_map];
544             expr_ref_vector ors(sub_m);
545             for (auto const& kv : frontier) {
546                 unsigned_vector states;
547                 bool has_final = false;
548                 aut->get_epsilon_closure(kv.m_key, states);
549                 for (unsigned i = 0; i < states.size() && !has_final; ++i) {
550                     has_final = aut->is_final_state(states[i]);
551                 }
552                 if (has_final) {
553                     ors.push_back(kv.m_value);
554                 }
555             }
556             expr_ref result(mk_or(ors), sub_m);
557             sub_ctx.get_rewriter()(result);
558             TRACE("str_fl", tout << "regex path constraint: " << mk_pp(result, sub_m) << std::endl;);
559 
560             if (sub_m.is_false(result)) {
561                 // There are no solutions of that length in the automaton.
562                 // If the membership constraint is true, we assert a conflict clause.
563                 // If the membership constraint is false, we ignore the constraint.
564                 if (polarity) {
565                     // Decompose `str` into its components if it is a concatenation of terms.
566                     // This fixes cases where the length of S in (S in RE) might be correct
567                     // if the lengths of components of S are assigned in a different way.
568                     expr_ref_vector str_terms(m);
569                     expr_ref_vector str_terms_eq_len(m);
570                     str_terms.push_back(str);
571                     while (!str_terms.empty()) {
572                         expr* str_term = str_terms.back();
573                         str_terms.pop_back();
574                         expr* arg0;
575                         expr* arg1;
576                         if (u.str.is_concat(str_term, arg0, arg1)) {
577                             str_terms.push_back(arg0);
578                             str_terms.push_back(arg1);
579                         } else {
580                             rational termLen;
581                             if (fixed_length_get_len_value(str_term, termLen)) {
582                                 str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen)));
583                             } else {
584                                 // this is strange, since we knew the length of `str` in order to get here
585                                 cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m);
586                                 return false;
587                             }
588                         }
589                     }
590 
591                     cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len)));
592                     ctx.get_rewriter()(cex);
593                     return false;
594                 } else {
595                     TRACE("str_fl", tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;);
596                     return true;
597                 }
598             } else {
599                 if (polarity) {
600                     fixed_length_assumptions.push_back(result);
601                     fixed_length_lesson.insert(result, std::make_tuple(PFUN, f, f));
602                 } else {
603                     fixed_length_assumptions.push_back(sub_m.mk_not(result));
604                     fixed_length_lesson.insert(sub_m.mk_not(result), std::make_tuple(NFUN, f, f));
605                 }
606                 return true;
607             }
608         }
609     }
610 
611     /*
612      * Expressions in the vector eqc_chars exist only in the subsolver.
613      * If this method returns false, a conflict clause is returned in cex;
614      * this conflict clause exists in the main solver.
615      */
fixed_length_reduce_string_term(smt::kernel & subsolver,expr * term,ptr_vector<expr> & eqc_chars,expr_ref & cex)616     bool theory_str::fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term,
617             ptr_vector<expr> & eqc_chars, expr_ref & cex) {
618         ast_manager & m = get_manager();
619 
620         ast_manager & sub_m = subsolver.m();
621 
622         bv_util bv(m);
623         sort * bv8_sort = bv.mk_sort(8);
624 
625         expr * arg0;
626         expr * arg1;
627         expr * arg2;
628 
629         zstring strConst;
630         if (u.str.is_string(term, strConst)) {
631             for (unsigned i = 0; i < strConst.length(); ++i) {
632                 expr_ref chTerm(bitvector_character_constants.get(strConst[i]), m);
633                 eqc_chars.push_back(chTerm);
634             }
635         } else if (to_app(term)->get_num_args() == 0 && !u.str.is_string(term)) {
636             // this is a variable; get its length and create/reuse character terms
637             if (!var_to_char_subterm_map.contains(term)) {
638                 rational varLen_value;
639                 bool var_hasLen = fixed_length_get_len_value(term, varLen_value);
640                 if (!var_hasLen || varLen_value.is_neg()) {
641                     TRACE("str_fl", tout << "variable " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;);
642                     cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m);
643                     return false;
644                 }
645                 TRACE("str_fl", tout << "creating character terms for variable " << mk_pp(term, m) << ", length = " << varLen_value << std::endl;);
646                 ptr_vector<expr> new_chars;
647                 for (rational i = rational::zero(); i < varLen_value; ++i) {
648                     // TODO we can probably name these better for the sake of debugging
649                     expr_ref ch(mk_fresh_const("char", bv8_sort), m);
650                     new_chars.push_back(ch);
651                     fixed_length_subterm_trail.push_back(ch);
652                 }
653                 var_to_char_subterm_map.insert(term, new_chars);
654                 fixed_length_used_len_terms.insert(term, varLen_value);
655             }
656             var_to_char_subterm_map.find(term, eqc_chars);
657         } else if (u.str.is_concat(term, arg0, arg1)) {
658             expr_ref first(arg0, sub_m);
659             expr_ref second(arg1, sub_m);
660             ptr_vector<expr> chars0, chars1;
661             if (!fixed_length_reduce_string_term(subsolver, first, chars0, cex)
662                     || !fixed_length_reduce_string_term(subsolver, second, chars1, cex)) {
663                 return false;
664             }
665             eqc_chars.append(chars0);
666             eqc_chars.append(chars1);
667         } else if (u.str.is_extract(term, arg0, arg1, arg2)) {
668             // (str.substr Base Pos Len)
669             expr_ref first(arg0, sub_m);
670             expr_ref second(arg1, sub_m);
671             expr_ref third(arg2, sub_m);
672             ptr_vector<expr> base_chars;
673             if (!fixed_length_reduce_string_term(subsolver, first, base_chars, cex)) {
674                 return false;
675             }
676             arith_value v(m);
677             v.init(&get_context());
678             rational pos, len;
679             bool pos_exists = v.get_value(arg1, pos);
680             bool len_exists = v.get_value(arg2, len);
681             if (!pos_exists) {
682                 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg1, mk_int(0)), m_autil.mk_le(arg1, mk_int(0))), m);
683                 return false;
684             }
685             if (!len_exists) {
686                 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg2, mk_int(0)), m_autil.mk_le(arg2, mk_int(0))), m);
687                 return false;
688             }
689             TRACE("str_fl", tout << "reduce substring term: base=" << mk_pp(term, m) << " (length="<<base_chars.size()<<"), pos=" << pos.to_string() << ", len=" << len.to_string() << std::endl;);
690             // Case 1: pos < 0 or pos >= strlen(base) or len < 0
691             // ==> (Substr ...) = ""
692             if (pos.is_neg() || pos >= rational(base_chars.size()) || len.is_neg()) {
693                 eqc_chars.reset();
694                 return true;
695             }
696             else if (!pos.is_unsigned() || !len.is_unsigned()) {
697                 return false;
698             } else {
699                 unsigned _pos = pos.get_unsigned();
700                 unsigned _len = len.get_unsigned();
701                 if (_pos + _len < _pos)
702                     return false;
703                 if (_pos + _len >= base_chars.size()) {
704                     // take as many characters as possible up to the end of base_chars
705                     for (unsigned i = _pos; i < base_chars.size(); ++i) {
706                         eqc_chars.push_back(base_chars.get(i));
707                     }
708                 } else {
709                     for (unsigned i = _pos; i < _pos + _len; ++i) {
710                         eqc_chars.push_back(base_chars.get(i));
711                     }
712                 }
713             }
714         } else if (u.str.is_at(term, arg0, arg1)) {
715             // (str.at Base Pos)
716             expr_ref base(arg0, sub_m);
717             expr_ref pos(arg1, sub_m);
718             ptr_vector<expr> base_chars;
719             if (!fixed_length_reduce_string_term(subsolver, base, base_chars, cex)) {
720                 return false;
721             }
722             arith_value v(m);
723             v.init(&get_context());
724             rational pos_value;
725             bool pos_exists = v.get_value(pos, pos_value);
726             if (!pos_exists) {
727                 cex = m.mk_or(m_autil.mk_ge(pos, mk_int(0)), m_autil.mk_le(pos, mk_int(0)));
728                 return false;
729             }
730             TRACE("str_fl", tout << "reduce str.at: base=" << mk_pp(base, m) << ", pos=" << pos_value.to_string() << std::endl;);
731             if (pos_value.is_neg() || pos_value >= rational(base_chars.size())) {
732                 // return the empty string
733                 eqc_chars.reset();
734             }
735             else if (!pos_value.is_unsigned()) {
736                 return false;
737             } else {
738                 eqc_chars.push_back(base_chars.get(pos_value.get_unsigned()));
739             }
740             return true;
741         } else if (u.str.is_itos(term, arg0)) {
742             expr_ref i(arg0, m);
743             arith_value v(m);
744             v.init(&get_context());
745             rational iValue;
746             bool iValue_exists = v.get_value(i, iValue);
747             if (!iValue_exists) {
748                 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg0, mk_int(0)), m_autil.mk_le(arg0, mk_int(0))), m);
749                 return false;
750             }
751             rational termLen;
752             bool termLen_exists = v.get_value(mk_strlen(term), termLen);
753             if(!termLen_exists) {
754                 cex = expr_ref(m.mk_or(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m_autil.mk_le(mk_strlen(term), mk_int(0))), m);
755                 return false;
756             }
757             TRACE("str_fl", tout << "reduce int.to.str: n=" << iValue << std::endl;);
758             if (iValue.is_neg()) {
759                 if (!termLen.is_zero()) {
760                     // conflict
761                     cex = expr_ref(m.mk_not(m.mk_and(m_autil.mk_le(arg0, mk_int(-1)), m.mk_not(mk_strlen(term)))), m);
762                     return false;
763                 }
764                 // return the empty string
765                 eqc_chars.reset();
766                 return true;
767             } else {
768                 if (termLen != iValue.get_num_decimal()) {
769                     // conflict
770                     cex = expr_ref(m.mk_not(m.mk_and(get_context().mk_eq_atom(mk_strlen(term), mk_int(termLen)), get_context().mk_eq_atom(arg0, mk_int(iValue)))), m);
771                     return false;
772                 }
773                 // convert iValue to a constant
774                 zstring iValue_str(iValue.to_string());
775                 for (unsigned idx = 0; idx < iValue_str.length(); ++idx) {
776                     expr_ref chTerm(bitvector_character_constants.get(iValue_str[idx]), sub_m);
777                     eqc_chars.push_back(chTerm);
778                 }
779                 return true;
780             }
781         } else {
782             TRACE("str_fl", tout << "string term " << mk_pp(term, m) << " handled as uninterpreted function" << std::endl;);
783             if (!uninterpreted_to_char_subterm_map.contains(term)) {
784                 rational ufLen_value;
785                 bool uf_hasLen = fixed_length_get_len_value(term, ufLen_value);
786                 if (!uf_hasLen || ufLen_value.is_neg()) {
787                     TRACE("str_fl", tout << "uninterpreted function " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;);
788                     cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m);
789                     return false;
790                 }
791                 TRACE("str_fl", tout << "creating character terms for uninterpreted function " << mk_pp(term, m) << ", length = " << ufLen_value << std::endl;);
792                 ptr_vector<expr> new_chars;
793                 for (rational i = rational::zero(); i < ufLen_value; ++i) {
794                     expr_ref ch(mk_fresh_const("char", bv8_sort), m);
795                     new_chars.push_back(ch);
796                     fixed_length_subterm_trail.push_back(ch);
797                 }
798                 uninterpreted_to_char_subterm_map.insert(term, new_chars);
799                 fixed_length_used_len_terms.insert(term, ufLen_value);
800             }
801             uninterpreted_to_char_subterm_map.find(term, eqc_chars);
802         }
803         return true;
804     }
805 
fixed_length_reduce_eq(smt::kernel & subsolver,expr_ref lhs,expr_ref rhs,expr_ref & cex)806     bool theory_str::fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex) {
807         ast_manager & m = get_manager();
808 
809         ast_manager & sub_m = subsolver.m();
810 
811         ptr_vector<expr> lhs_chars, rhs_chars;
812 
813         if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex)
814                 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) {
815             return false;
816         }
817 
818         if (lhs_chars.size() != rhs_chars.size()) {
819             TRACE("str_fl", tout << "length information inconsistent: " << mk_pp(lhs, m) << " has " << lhs_chars.size() <<
820                     " chars, " << mk_pp(rhs, m) << " has " << rhs_chars.size() << " chars" << std::endl;);
821             // equal strings ought to have equal lengths
822             cex = m.mk_or(m.mk_not(ctx.mk_eq_atom(lhs, rhs)), ctx.mk_eq_atom(mk_strlen(lhs), mk_strlen(rhs)));
823             return false;
824         }
825         for (unsigned i = 0; i < lhs_chars.size(); ++i) {
826             expr_ref cLHS(lhs_chars.get(i), sub_m);
827             expr_ref cRHS(rhs_chars.get(i), sub_m);
828             expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m);
829             fixed_length_assumptions.push_back(_e);
830             TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
831             fixed_length_lesson.insert(_e, std::make_tuple(rational(i), lhs, rhs));
832         }
833         return true;
834     }
835 
fixed_length_reduce_diseq(smt::kernel & subsolver,expr_ref lhs,expr_ref rhs,expr_ref & cex)836     bool theory_str::fixed_length_reduce_diseq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex) {
837         ast_manager & m = get_manager();
838 
839         ast_manager & sub_m = subsolver.m();
840 
841         // we do generation before this check to make sure that
842         // variables which only appear in disequalities show up in the model
843         rational lhsLen, rhsLen;
844         bool lhsLen_exists = fixed_length_get_len_value(lhs, lhsLen);
845         bool rhsLen_exists = fixed_length_get_len_value(rhs, rhsLen);
846 
847         if (!lhsLen_exists) {
848             cex = m_autil.mk_ge(mk_strlen(lhs), mk_int(0));
849             return false;
850         }
851 
852         if (!rhsLen_exists) {
853             cex = m_autil.mk_ge(mk_strlen(rhs), mk_int(0));
854             return false;
855         }
856 
857         ptr_vector<expr> lhs_chars, rhs_chars;
858         if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex)
859                 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) {
860             return false;
861         }
862 
863         if (lhsLen != rhsLen) {
864             TRACE("str", tout << "skip disequality: len(lhs) = " << lhsLen << ", len(rhs) = " << rhsLen << std::endl;);
865             return true;
866         }
867 
868         SASSERT(lhs_chars.size() == rhs_chars.size());
869         expr_ref_vector diseqs(m);
870         for (unsigned i = 0; i < lhs_chars.size(); ++i) {
871             expr_ref cLHS(lhs_chars.get(i), sub_m);
872             expr_ref cRHS(rhs_chars.get(i), sub_m);
873             diseqs.push_back(sub_m.mk_not(sub_m.mk_eq(cLHS, cRHS)));
874         }
875 
876         expr_ref final_diseq(mk_or(diseqs), sub_m);
877         fixed_length_assumptions.push_back(final_diseq);
878         TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;);
879         fixed_length_lesson.insert(final_diseq, std::make_tuple(NEQ, lhs, rhs));
880 
881         return true;
882     }
883 
fixed_length_model_construction(expr_ref_vector formulas,expr_ref_vector & precondition,expr_ref_vector & free_variables,obj_map<expr,zstring> & model,expr_ref_vector & cex)884     lbool theory_str::fixed_length_model_construction(expr_ref_vector formulas, expr_ref_vector &precondition,
885             expr_ref_vector& free_variables,
886             obj_map<expr, zstring> &model, expr_ref_vector &cex) {
887 
888         ast_manager & m = get_manager();
889 
890         if (bitvector_character_constants.empty()) {
891             bv_util bv(m);
892             sort * bv8_sort = bv.mk_sort(8);
893             for (unsigned i = 0; i < 256; ++i) {
894                 rational ch(i);
895                 expr_ref chTerm(bv.mk_numeral(ch, bv8_sort), m);
896                 bitvector_character_constants.push_back(chTerm);
897                 fixed_length_subterm_trail.push_back(chTerm);
898             }
899         }
900 
901         TRACE("str",
902             ast_manager & m = get_manager();
903             tout << "dumping all formulas:" << std::endl;
904             for (expr_ref_vector::iterator i = formulas.begin(); i != formulas.end(); ++i) {
905                 expr * ex = *i;
906                 tout << mk_pp(ex, m) << (ctx.is_relevant(ex) ? "" : " (NOT REL)") << std::endl;
907             }
908         );
909 
910         fixed_length_subterm_trail.reset();
911         fixed_length_used_len_terms.reset();
912         fixed_length_assumptions.reset();
913         var_to_char_subterm_map.reset();
914         uninterpreted_to_char_subterm_map.reset();
915         fixed_length_lesson.reset();
916 
917         // All reduced Boolean formulas in the current assignment
918         expr_ref_vector fixed_length_reduced_boolean_formulas(m);
919 
920         // Boolean formulas on which to apply abstraction refinement.
921         expr_ref_vector abstracted_boolean_formulas(m);
922 
923         smt_params subsolver_params;
924         smt::kernel subsolver(m, subsolver_params);
925         subsolver.set_logic(symbol("QF_BV"));
926 
927         sort * str_sort = u.str.mk_string_sort();
928         sort * bool_sort = m.mk_bool_sort();
929 
930         for (expr * var : free_variables) {
931             TRACE("str_fl", tout << "initialize free variable " << mk_pp(var, m) << std::endl;);
932             rational var_lenVal;
933             if (!fixed_length_get_len_value(var, var_lenVal)) {
934                 TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " has no length assignment" << std::endl;);
935                 expr_ref var_len_assertion(m_autil.mk_ge(mk_strlen(var), mk_int(0)), m);
936                 assert_axiom(var_len_assertion);
937                 add_persisted_axiom(var_len_assertion);
938                 return l_undef;
939             }
940             ptr_vector<expr> var_chars;
941             expr_ref str_counterexample(m);
942             if (!fixed_length_reduce_string_term(subsolver, var, var_chars, str_counterexample)) {
943                 TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " caused a conflict; asserting and continuing" << std::endl;);
944                 assert_axiom(str_counterexample);
945                 return l_undef;
946             }
947         }
948 
949         for (expr * f : formulas) {
950             if (!get_context().is_relevant(f)) {
951                 expr * subformula = nullptr;
952                 if (m.is_not(f, subformula)) {
953                     if (!get_context().is_relevant(subformula)) {
954                         TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant (and neither is its subformula)" << std::endl;);
955                         continue;
956                     } else {
957                         TRACE("str_fl", tout << "considering formula " << mk_pp(f, m) << ", its subformula is relevant but it is not" << std::endl;);
958                     }
959                 } else {
960                     TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant" << std::endl;);
961                     continue;
962                 }
963             }
964             // reduce string formulas only. ignore others
965             sort * fSort = m.get_sort(f);
966             if (fSort == bool_sort && !is_quantifier(f)) {
967                 // extracted terms
968                 expr * subterm;
969                 expr * lhs;
970                 expr * rhs;
971                 if (m.is_eq(f, lhs, rhs)) {
972                     sort * lhs_sort = m.get_sort(lhs);
973                     if (lhs_sort == str_sort) {
974                         TRACE("str_fl", tout << "reduce string equality: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << std::endl;);
975                         expr_ref cex(m);
976                         expr_ref left(lhs, m);
977                         expr_ref right(rhs, m);
978                         if (!fixed_length_reduce_eq(subsolver, left, right, cex)) {
979                             // missing a side condition. assert it and return unknown
980                             assert_axiom(cex);
981                             add_persisted_axiom(cex);
982                             return l_undef;
983                         }
984                         fixed_length_reduced_boolean_formulas.push_back(f);
985                     } else {
986                         TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;);
987                     }
988                 } else if (u.str.is_in_re(f)) {
989                     TRACE("str_fl", tout << "reduce regex membership: " << mk_pp(f, m) << std::endl;);
990                     expr_ref cex_clause(m);
991                     expr_ref re(f, m);
992                     if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, true)) {
993                         assert_axiom(cex_clause);
994                         add_persisted_axiom(cex_clause);
995                         return l_undef;
996                     }
997                     fixed_length_reduced_boolean_formulas.push_back(f);
998                 } else if (u.str.is_contains(f)) {
999                     // TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle))
1000                     // we might be okay to assert the full disjunction because there are very few disjuncts
1001                     if (m_params.m_FixedLengthRefinement) {
1002                         TRACE("str_fl", tout << "abstracting out positive contains: " << mk_pp(f, m) << std::endl;);
1003                         abstracted_boolean_formulas.push_back(f);
1004                     } else {
1005                         TRACE("str_fl", tout << "reduce positive contains: " << mk_pp(f, m) << std::endl;);
1006                         expr_ref cex(m);
1007                         expr_ref cont(f, m);
1008                         if (!fixed_length_reduce_contains(subsolver, cont, cex)) {
1009                             assert_axiom(cex);
1010                             add_persisted_axiom(cex);
1011                             return l_undef;
1012                         }
1013                         fixed_length_reduced_boolean_formulas.push_back(f);
1014                     }
1015                 } else if (u.str.is_prefix(f)) {
1016                     TRACE("str_fl", tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;);
1017                     expr_ref cex(m);
1018                     expr_ref pref(f, m);
1019                     if (!fixed_length_reduce_prefix(subsolver, pref, cex)) {
1020                         assert_axiom(cex);
1021                         add_persisted_axiom(cex);
1022                         return l_undef;
1023                     }
1024                     fixed_length_reduced_boolean_formulas.push_back(f);
1025                 } else if (u.str.is_suffix(f)) {
1026                     TRACE("str_fl", tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;);
1027                     expr_ref cex(m);
1028                     expr_ref suf(f, m);
1029                     if (!fixed_length_reduce_suffix(subsolver, suf, cex)) {
1030                         assert_axiom(cex);
1031                         add_persisted_axiom(cex);
1032                         return l_undef;
1033                     }
1034                     fixed_length_reduced_boolean_formulas.push_back(f);
1035                 }else if (m.is_not(f, subterm)) {
1036                     // if subterm is a string formula such as an equality, reduce it as a disequality
1037                     if (m.is_eq(subterm, lhs, rhs)) {
1038                         sort * lhs_sort = m.get_sort(lhs);
1039                         if (lhs_sort == str_sort) {
1040                             TRACE("str_fl", tout << "reduce string disequality: " << mk_pp(lhs, m) << " != " << mk_pp(rhs, m) << std::endl;);
1041                             expr_ref cex(m);
1042                             expr_ref left(lhs, m);
1043                             expr_ref right(rhs, m);
1044                             if (!fixed_length_reduce_diseq(subsolver, left, right, cex)) {
1045                                 // missing a side condition. assert it and return unknown
1046                                 assert_axiom(cex);
1047                                 add_persisted_axiom(cex);
1048                                 return l_undef;
1049                             }
1050                             fixed_length_reduced_boolean_formulas.push_back(f);
1051                         }
1052                     } else if (u.str.is_in_re(subterm)) {
1053                         TRACE("str_fl", tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;);
1054                         expr_ref cex_clause(m);
1055                         expr_ref re(subterm, m);
1056                         if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, false)) {
1057                             assert_axiom(cex_clause);
1058                             add_persisted_axiom(cex_clause);
1059                             return l_undef;
1060                         }
1061                         fixed_length_reduced_boolean_formulas.push_back(f);
1062                     } else if (u.str.is_contains(subterm)) {
1063                         TRACE("str_fl", tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;);
1064                         expr_ref cex(m);
1065                         expr_ref cont(subterm, m);
1066                         if (!fixed_length_reduce_negative_contains(subsolver, cont, cex)) {
1067                             assert_axiom(cex);
1068                             add_persisted_axiom(cex);
1069                             return l_undef;
1070                         }
1071                         fixed_length_reduced_boolean_formulas.push_back(f);
1072                     } else if (u.str.is_prefix(subterm)) {
1073                         TRACE("str_fl", tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;);
1074                         expr_ref cex(m);
1075                         expr_ref pref(subterm, m);
1076                         if (!fixed_length_reduce_negative_prefix(subsolver, pref, cex)) {
1077                             assert_axiom(cex);
1078                             add_persisted_axiom(cex);
1079                             return l_undef;
1080                         }
1081                         fixed_length_reduced_boolean_formulas.push_back(f);
1082                     } else if (u.str.is_suffix(subterm)) {
1083                         TRACE("str_fl", tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;);
1084                         expr_ref cex(m);
1085                         expr_ref suf(subterm, m);
1086                         if (!fixed_length_reduce_negative_suffix(subsolver, suf, cex)) {
1087                             assert_axiom(cex);
1088                             add_persisted_axiom(cex);
1089                             return l_undef;
1090                         }
1091                         fixed_length_reduced_boolean_formulas.push_back(f);
1092                     } else {
1093                         TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;);
1094                     }
1095                 } else {
1096                     TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;);
1097                     continue;
1098                 }
1099             } else {
1100                 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant to strings" << std::endl;);
1101                 continue;
1102             }
1103         }
1104 
1105         // Check consistency of all string-integer conversion terms wrt. integer theory before we solve,
1106         // possibly generating additional constraints for the bit-vector solver.
1107         {
1108             arith_value v(get_manager());
1109             v.init(&get_context());
1110             for (auto e : string_int_conversion_terms) {
1111                 TRACE("str_fl", tout << "pre-run check str-int term " << mk_pp(e, get_manager()) << std::endl;);
1112                 expr* _arg;
1113                 if (u.str.is_stoi(e, _arg)) {
1114                     expr_ref arg(_arg, m);
1115                     rational slen;
1116                     if (!fixed_length_get_len_value(arg, slen)) {
1117                         expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(arg), mk_int(0)), m);
1118                         assert_axiom(stoi_cex);
1119                         add_persisted_axiom(stoi_cex);
1120                         return l_undef;
1121                     }
1122                     TRACE("str_fl", tout << "length of term is " << slen << std::endl;);
1123 
1124                     rational ival;
1125                     if (v.get_value(e, ival)) {
1126                         TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(e, get_manager()) << std::endl;);
1127                         // if ival is non-negative, because we know the length of arg, we can add a character constraint for arg
1128                         if (ival.is_nonneg()) {
1129                             zstring ival_str(ival.to_string());
1130                             zstring padding;
1131                             for (rational i = rational::zero(); i < slen - rational(ival_str.length()); ++i) {
1132                                 padding = padding + zstring("0");
1133                             }
1134                             zstring arg_val = padding + ival_str;
1135                             expr_ref stoi_cex(m);
1136                             expr_ref arg_char_expr(mk_string(arg_val), m);
1137 
1138                             // Add (e == ival) as a precondition.
1139                             precondition.push_back(m.mk_eq(e, mk_int(ival)));
1140                             // Assert (arg == arg_chars) in the subsolver.
1141                             if (!fixed_length_reduce_eq(subsolver, arg, arg_char_expr, stoi_cex)) {
1142                                 // Counterexample: (str.to_int S) == ival AND len(S) == slen cannot both be true.
1143                                 stoi_cex = expr_ref(m.mk_not(m.mk_and(
1144                                     m.mk_eq(e, mk_int(ival)),
1145                                     m.mk_eq(mk_strlen(arg), mk_int(slen))
1146                                 )), m);
1147                                 assert_axiom(stoi_cex);
1148                                 add_persisted_axiom(stoi_cex);
1149 
1150                                 return l_undef;
1151                             }
1152 
1153                             fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival)));
1154                         }
1155                     } else {
1156                         TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(e, get_manager()) << std::endl;);
1157                         // consistency needs to be checked after the string is assigned
1158                     }
1159                 } else if (u.str.is_itos(e, _arg)) {
1160                     expr_ref arg(_arg, m);
1161                     rational slen;
1162                     if (!fixed_length_get_len_value(e, slen)) {
1163                         expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(e), mk_int(0)), m);
1164                         assert_axiom(stoi_cex);
1165                         add_persisted_axiom(stoi_cex);
1166                         return l_undef;
1167                     }
1168                     TRACE("str_fl", tout << "length of term is " << slen << std::endl;);
1169                     rational ival;
1170                     if (v.get_value(arg, ival)) {
1171                         TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(arg, get_manager()) << std::endl;);
1172                         zstring ival_str;
1173                         if (ival.is_neg()) {
1174                             // e must be the empty string, i.e. have length 0
1175                             ival_str = zstring("");
1176                         } else {
1177                             // e must be equal to the string representation of ival
1178                             ival_str = zstring(ival.to_string());
1179                         }
1180                         // Add (arg == ival) as a precondition.
1181                         precondition.push_back(m.mk_eq(arg, mk_int(ival)));
1182                         // Assert (e == ival_str) in the subsolver.
1183                         expr_ref itos_cex(m);
1184                         expr_ref _e(e, m);
1185                         expr_ref arg_char_expr(mk_string(ival_str), m);
1186                         if (!fixed_length_reduce_eq(subsolver, _e, arg_char_expr, itos_cex)) {
1187                             // Counterexample: N in (str.from_int N) == ival AND len(str.from_int N) == slen cannot both be true.
1188                             itos_cex = expr_ref(m.mk_not(m.mk_and(
1189                                 m.mk_eq(arg, mk_int(ival)),
1190                                 m.mk_eq(mk_strlen(e), mk_int(slen))
1191                             )), m);
1192                             assert_axiom(itos_cex);
1193                             add_persisted_axiom(itos_cex);
1194                             return l_undef;
1195                         }
1196                         fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(arg, mk_int(ival)));
1197                     } else {
1198                         TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(arg, get_manager()) << std::endl;);
1199                         // consistency needs to be checked after the string is assigned
1200                     }
1201                 }
1202             }
1203         }
1204 
1205         for (auto e : fixed_length_used_len_terms) {
1206             expr * var = &e.get_key();
1207             rational val = e.get_value();
1208             precondition.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val)));
1209         }
1210 
1211         TRACE("str_fl",
1212             tout << "formulas asserted to bitvector subsolver:" << std::endl;
1213             for (auto e : fixed_length_assumptions) {
1214                 tout << mk_pp(e, subsolver.m()) << std::endl;
1215             }
1216             tout << "variable to character mappings:" << std::endl;
1217             for (auto &entry : var_to_char_subterm_map) {
1218                 tout << mk_pp(entry.m_key, get_manager()) << ":";
1219                 for (auto e : entry.m_value) {
1220                     tout << " " << mk_pp(e, subsolver.m());
1221                 }
1222                 tout << std::endl;
1223             }
1224             tout << "reduced boolean formulas:" << std::endl;
1225               for (auto e : fixed_length_reduced_boolean_formulas) {
1226                   tout << mk_pp(e, m) << std::endl;
1227               }
1228         );
1229 
1230         TRACE("str_fl", tout << "calling subsolver" << std::endl;);
1231 
1232         lbool subproblem_status = subsolver.check(fixed_length_assumptions);
1233 
1234         if (subproblem_status == l_true) {
1235             bv_util bv(m);
1236             TRACE("str_fl", tout << "subsolver found SAT; reconstructing model" << std::endl;);
1237             model_ref subModel;
1238             subsolver.get_model(subModel);
1239 
1240             expr_substitution subst(m);
1241 
1242             //model_smt2_pp(std::cout, m, *subModel, 2);
1243             for (auto entry : var_to_char_subterm_map) {
1244                 svector<unsigned> assignment;
1245                 expr * var = entry.m_key;
1246                 ptr_vector<expr> char_subterms(entry.m_value);
1247                 for (expr * chExpr : char_subterms) {
1248                     expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m);
1249                     rational n;
1250                     if (chAssignment != nullptr && bv.is_numeral(chAssignment, n) && n.is_unsigned()) {
1251                         assignment.push_back(n.get_unsigned());
1252                     } else {
1253                         assignment.push_back((unsigned)'?');
1254                     }
1255                 }
1256                 zstring strValue(assignment.size(), assignment.c_ptr());
1257                 model.insert(var, strValue);
1258                 subst.insert(var, mk_string(strValue));
1259             }
1260             TRACE("str_fl",
1261                 for (auto entry : model) {
1262                     tout << mk_pp(entry.m_key, m) << " = " << entry.m_value << std::endl;
1263                 }
1264             );
1265             for (auto entry : uninterpreted_to_char_subterm_map) {
1266                 svector<unsigned> assignment;
1267                 expr * var = entry.m_key;
1268                 ptr_vector<expr> char_subterms(entry.m_value);
1269                 for (expr * chExpr : char_subterms) {
1270                     expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m);
1271                     rational n;
1272                     if (chAssignment != nullptr && bv.is_numeral(chAssignment, n) && n.is_unsigned()) {
1273                         assignment.push_back(n.get_unsigned());
1274                     } else {
1275                         assignment.push_back((unsigned)'?');
1276                     }
1277                 }
1278                 zstring strValue(assignment.size(), assignment.c_ptr());
1279                 model.insert(var, strValue);
1280                 subst.insert(var, mk_string(strValue));
1281             }
1282 
1283             // Check consistency of string-integer conversion terms after the search.
1284             {
1285                 scoped_ptr<expr_replacer> replacer = mk_default_expr_replacer(m, false);
1286                 replacer->set_substitution(&subst);
1287                 th_rewriter rw(m);
1288                 arith_value v(get_manager());
1289                 v.init(&get_context());
1290                 for (auto e : string_int_conversion_terms) {
1291                     TRACE("str_fl", tout << "post-run check str-int term " << mk_pp(e, get_manager()) << std::endl;);
1292                     expr* _arg;
1293                     if (u.str.is_stoi(e, _arg)) {
1294                         expr_ref arg(_arg, m);
1295                         rational ival;
1296                         if (v.get_value(e, ival)) {
1297                             expr_ref arg_subst(arg, m);
1298                             (*replacer)(arg, arg_subst);
1299                             rw(arg_subst);
1300                             TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(arg_subst, m) << std::endl;);
1301 
1302                             symbol arg_str;
1303                             if (u.str.is_string(arg_subst, arg_str)) {
1304                                 zstring arg_zstr(arg_str.bare_str());
1305                                 rational arg_value;
1306                                 if (string_integer_conversion_valid(arg_zstr, arg_value)) {
1307                                     if (ival != arg_value) {
1308                                         // contradiction
1309                                         expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m);
1310                                         assert_axiom(cex);
1311                                         return l_undef;
1312                                     }
1313                                 } else {
1314                                     if (!ival.is_minus_one()) {
1315                                         expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m);
1316                                         assert_axiom(cex);
1317                                         return l_undef;
1318                                     }
1319                                 }
1320                             }
1321                         }
1322                     } else if (u.str.is_itos(e, _arg)) {
1323                         expr_ref arg(_arg, m);
1324                         rational ival;
1325                         if (v.get_value(arg, ival)) {
1326                             expr_ref e_subst(e, m);
1327                             (*replacer)(e, e_subst);
1328                             rw(e_subst);
1329                             TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(e_subst, m) << std::endl;);
1330 
1331                             symbol e_str;
1332                             if (u.str.is_string(e_subst, e_str)) {
1333                                 zstring e_zstr(e_str.bare_str());
1334                                 // if arg is negative, e must be empty
1335                                 // if arg is non-negative, e must be valid AND cannot contain leading zeroes
1336 
1337                                 if (ival.is_neg()) {
1338                                     if (!e_zstr.empty()) {
1339                                         // contradiction
1340                                         expr_ref cex(ctx.mk_eq_atom(m_autil.mk_le(arg, mk_int(-1)), ctx.mk_eq_atom(e, mk_string(""))), m);
1341                                         assert_axiom(cex);
1342                                         return l_undef;
1343                                     }
1344                                 } else {
1345                                     rational e_value;
1346                                     if (string_integer_conversion_valid(e_zstr, e_value)) {
1347                                         // e contains leading zeroes if its first character is 0 but converted to something other than 0
1348                                         if (e_zstr[0] == '0' && !e_value.is_zero()) {
1349                                             // contradiction
1350                                             expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m);
1351                                             assert_axiom(cex);
1352                                             return l_undef;
1353                                         }
1354                                     } else {
1355                                         // contradiction
1356                                         expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m);
1357                                         assert_axiom(cex);
1358                                         return l_undef;
1359                                     }
1360                                 }
1361                             }
1362                         }
1363                     }
1364                 }
1365             }
1366 
1367             // TODO insert length values into substitution table as well?
1368             if (m_params.m_FixedLengthRefinement) {
1369                 scoped_ptr<expr_replacer> replacer = mk_default_expr_replacer(m, false);
1370                 replacer->set_substitution(&subst);
1371                 th_rewriter rw(m);
1372                 if (!abstracted_boolean_formulas.empty()) {
1373                     for (auto f : abstracted_boolean_formulas) {
1374                         TRACE("str_fl", tout << "refinement of boolean formula: " << mk_pp(f, m) << std::endl;);
1375                         expr_ref f_new(m);
1376                         (*replacer)(f, f_new);
1377                         rw(f_new);
1378                         TRACE("str_fl", tout << "after substitution and simplification, evaluates to: " << mk_pp(f_new, m) << std::endl;);
1379                         // now there are three cases, depending on what f_new evaluates to:
1380                         // true -> OK, do nothing
1381                         // false -> refine abstraction by generating conflict clause
1382                         // anything else -> error, probably our substitution was incomplete
1383                         if (m.is_true(f_new)) {
1384                             // do nothing
1385                         } else if (m.is_false(f_new)) {
1386                             expr * needle = nullptr, *haystack = nullptr;
1387                             if (u.str.is_contains(f, haystack, needle)) {
1388                                 expr_ref haystack_assignment(m);
1389                                 expr_ref needle_assignment(m);
1390                                 (*replacer)(haystack, haystack_assignment);
1391                                 (*replacer)(needle, needle_assignment);
1392                                 cex.push_back(f);
1393                                 cex.push_back(ctx.mk_eq_atom(haystack, haystack_assignment));
1394                                 cex.push_back(ctx.mk_eq_atom(needle, needle_assignment));
1395                                 return l_false;
1396                             } else {
1397                                 TRACE("str_fl", tout << "error: unhandled refinement term " << mk_pp(f, m) << std::endl;);
1398                                 NOT_IMPLEMENTED_YET();
1399                             }
1400                         } else {
1401                             NOT_IMPLEMENTED_YET();
1402                         }
1403                     }
1404                 }
1405             }
1406 
1407             return l_true;
1408         } else if (subproblem_status == l_false) {
1409             if (m_params.m_FixedLengthNaiveCounterexamples) {
1410                 TRACE("str_fl", tout << "subsolver found UNSAT; constructing length counterexample" << std::endl;);
1411                 for (auto e : fixed_length_used_len_terms) {
1412                     expr * var = &e.get_key();
1413                     rational val = e.get_value();
1414                     cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val)));
1415                 }
1416                 for (auto e : fixed_length_reduced_boolean_formulas) {
1417                     cex.push_back(e);
1418                 }
1419                 return l_false;
1420             } else {
1421                 TRACE("str_fl", tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;);
1422                 TRACE("str_fl", tout << "unsat core has size " << subsolver.get_unsat_core_size() << std::endl;);
1423                 bool negate_pre = false;
1424                 for (unsigned i = 0; i < subsolver.get_unsat_core_size(); ++i) {
1425                     TRACE("str", tout << "entry " << i << " = " << mk_pp(subsolver.get_unsat_core_expr(i), m) << std::endl;);
1426                     rational index;
1427                     expr* lhs;
1428                     expr* rhs;
1429                     TRACE("str_fl", tout << fixed_length_lesson.size() << std::endl;);
1430                     std::tie(index, lhs, rhs) = fixed_length_lesson.find(subsolver.get_unsat_core_expr(i));
1431                     TRACE("str_fl", tout << "lesson: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << " at index " << index << std::endl;);
1432                     cex.push_back(refine(lhs, rhs, index));
1433                     if (index < rational(0)) {
1434                         negate_pre = true;
1435                     }
1436                 }
1437                 if (negate_pre || subsolver.get_unsat_core_size() == 0){
1438                     for (auto ex : precondition) {
1439                         cex.push_back(ex);
1440                     }
1441                 }
1442                 return l_false;
1443             }
1444         } else { // l_undef
1445             TRACE("str_fl", tout << "WARNING: subsolver found UNKNOWN" << std::endl;);
1446             return l_undef;
1447         }
1448     }
1449 
1450 }; // namespace smt
1451