1 /*++ 2 Module Name: 3 4 theory_str_mc.cpp 5 6 Abstract: 7 8 Model Construction for String Theory Plugin 9 10 Author: 11 12 Murphy Berzish and Yunhui Zheng 13 14 Revision History: 15 16 --*/ 17 #include "ast/ast_smt2_pp.h" 18 #include "smt/smt_context.h" 19 #include "smt/theory_str.h" 20 #include "smt/smt_model_generator.h" 21 #include "ast/ast_pp.h" 22 #include "ast/ast_ll_pp.h" 23 #include<list> 24 #include<algorithm> 25 #include "smt/theory_seq_empty.h" 26 #include "smt/theory_arith.h" 27 #include "ast/ast_util.h" 28 #include "ast/rewriter/seq_rewriter.h" 29 #include "ast/rewriter/expr_replacer.h" 30 #include "smt_kernel.h" 31 #include "model/model_smt2_pp.h" 32 33 namespace smt { 34 35 /* 36 * Use the current model in the arithmetic solver to get the length of a term. 37 * Returns true if this could be done, placing result in 'termLen', or false otherwise. 38 * Works like get_len_value() except uses arithmetic solver model instead of EQCs. 39 */ fixed_length_get_len_value(expr * e,rational & val)40 bool theory_str::fixed_length_get_len_value(expr * e, rational & val) { 41 ast_manager & m = get_manager(); 42 43 rational val1; 44 expr_ref len(m), len_val(m); 45 expr* e1 = nullptr, *e2 = nullptr; 46 expr_ref_vector todo(m); 47 todo.push_back(e); 48 val.reset(); 49 while (!todo.empty()) { 50 expr* c = todo.back(); 51 todo.pop_back(); 52 zstring tmp; 53 if (u.str.is_concat(c, e1, e2)) { 54 todo.push_back(e1); 55 todo.push_back(e2); 56 } 57 else if (u.str.is_string(c, tmp)) { 58 unsigned int sl = tmp.length(); 59 val += rational(sl); 60 } 61 else { 62 len = mk_strlen(c); 63 arith_value v(get_manager()); 64 v.init(&get_context()); 65 if (v.get_value(len, val1)) { 66 val += val1; 67 } else { 68 return false; 69 } 70 } 71 } 72 return val.is_int(); 73 } 74 75 fixed_length_reduce_suffix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)76 bool theory_str::fixed_length_reduce_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 77 ast_manager & m = get_manager(); 78 79 ast_manager & sub_m = subsolver.m(); 80 81 expr * full = nullptr; 82 expr * suff = nullptr; 83 VERIFY(u.str.is_suffix(f, suff, full)); 84 85 expr_ref haystack(full, m); 86 expr_ref needle(suff, m); 87 88 ptr_vector<expr> full_chars, suff_chars; 89 90 if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) 91 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { 92 return false; 93 } 94 95 if (suff_chars.size() == 0) { 96 // all strings endwith the empty one 97 return true; 98 } 99 100 if (full_chars.size() == 0 && suff_chars.size() > 0) { 101 // the empty string doesn't "endwith" any non-empty string 102 cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(suff), mk_int(0)), 103 m_autil.mk_ge(mk_strlen(full), mk_int(0))); 104 th_rewriter m_rw(m); 105 m_rw(cex); 106 return false; 107 } 108 109 if (full_chars.size() < suff_chars.size()) { 110 // a string can't endwith a longer one 111 // X startswith Y -> len(X) >= len(Y) 112 expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); 113 expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); 114 expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(suff))), m); 115 cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); 116 th_rewriter m_rw(m); 117 m_rw(cex); 118 return false; 119 } 120 121 expr_ref_vector branch(sub_m); 122 for (unsigned j = 0; j < suff_chars.size(); ++j) { 123 // full[j] == suff[j] 124 expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m); 125 expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m); 126 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 127 branch.push_back(_e); 128 } 129 130 expr_ref final_diseq(mk_and(branch), sub_m); 131 fixed_length_assumptions.push_back(final_diseq); 132 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 133 fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f)); 134 135 return true; 136 } 137 fixed_length_reduce_negative_suffix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)138 bool theory_str::fixed_length_reduce_negative_suffix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 139 ast_manager & m = get_manager(); 140 141 ast_manager & sub_m = subsolver.m(); 142 143 expr * full = nullptr; 144 expr * suff = nullptr; 145 VERIFY(u.str.is_suffix(f, suff, full)); 146 147 expr_ref haystack(full, m); 148 expr_ref needle(suff, m); 149 150 ptr_vector<expr> full_chars, suff_chars; 151 if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) 152 || !fixed_length_reduce_string_term(subsolver, needle, suff_chars, cex)) { 153 return false; 154 } 155 156 if (suff_chars.size() == 0) { 157 // all strings endwith the empty one 158 cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(suff), mk_int(0)))); 159 th_rewriter m_rw(m); 160 m_rw(cex); 161 return false; 162 } 163 164 if (full_chars.size() == 0 && suff_chars.size() > 0) { 165 // the empty string doesn't "endwith" any non-empty string 166 return true; 167 } 168 169 if (full_chars.size() < suff_chars.size()) { 170 // a string can't endwith a longer one 171 // X startswith Y -> len(X) >= len(Y) 172 return true; 173 } 174 175 expr_ref_vector branch(sub_m); 176 for (unsigned j = 0; j < suff_chars.size(); ++j) { 177 // full[j] == suff[j] 178 expr_ref cLHS(full_chars.get(full_chars.size() - j - 1), sub_m); 179 expr_ref cRHS(suff_chars.get(suff_chars.size() - j - 1), sub_m); 180 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 181 branch.push_back(_e); 182 } 183 184 expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m); 185 fixed_length_assumptions.push_back(final_diseq); 186 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 187 fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f)); 188 189 return true; 190 } 191 fixed_length_reduce_prefix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)192 bool theory_str::fixed_length_reduce_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 193 ast_manager & m = get_manager(); 194 195 ast_manager & sub_m = subsolver.m(); 196 197 expr * full = nullptr; 198 expr * pref = nullptr; 199 VERIFY(u.str.is_prefix(f, pref, full)); 200 201 expr_ref haystack(full, m); 202 expr_ref needle(pref, m); 203 204 ptr_vector<expr> full_chars, pref_chars; 205 if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) 206 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { 207 return false; 208 } 209 210 211 if (pref_chars.size() == 0) { 212 // all strings startwith the empty one 213 return true; 214 } 215 216 if (full_chars.size() == 0 && pref_chars.size() > 0) { 217 // the empty string doesn't "stratwith" any non-empty string 218 cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(pref), mk_int(0)), 219 m_autil.mk_ge(mk_strlen(full), mk_int(0))); 220 th_rewriter m_rw(m); 221 m_rw(cex); 222 return false; 223 } 224 225 if (full_chars.size() < pref_chars.size()) { 226 // a string can't startwith a longer one 227 // X startswith Y -> len(X) >= len(Y) 228 expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); 229 expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); 230 expr_ref lens(m_autil.mk_add(mk_strlen(full), m_autil.mk_mul(minus_one, mk_strlen(pref))), m); 231 cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); 232 th_rewriter m_rw(m); 233 m_rw(cex); 234 return false; 235 } 236 237 expr_ref_vector branch(m); 238 for (unsigned j = 0; j < pref_chars.size(); ++j) { 239 // full[j] == pref[j] 240 expr_ref cLHS(full_chars.get(j), sub_m); 241 expr_ref cRHS(pref_chars.get(j), sub_m); 242 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 243 branch.push_back(_e); 244 } 245 246 expr_ref final_diseq(mk_and(branch), sub_m); 247 fixed_length_assumptions.push_back(final_diseq); 248 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 249 fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f)); 250 251 return true; 252 } 253 fixed_length_reduce_negative_prefix(smt::kernel & subsolver,expr_ref f,expr_ref & cex)254 bool theory_str::fixed_length_reduce_negative_prefix(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 255 ast_manager & m = get_manager(); 256 257 ast_manager & sub_m = subsolver.m(); 258 259 expr * pref = nullptr, *full = nullptr; 260 VERIFY(u.str.is_prefix(f, pref, full)); 261 262 expr_ref haystack(full, m); 263 expr_ref needle(pref, m); 264 265 ptr_vector<expr> full_chars, pref_chars; 266 if (!fixed_length_reduce_string_term(subsolver, haystack, full_chars, cex) 267 || !fixed_length_reduce_string_term(subsolver, needle, pref_chars, cex)) { 268 return false; 269 } 270 271 if (pref_chars.size() == 0) { 272 // all strings startwith the empty one 273 cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(pref), mk_int(0)))); 274 th_rewriter m_rw(m); 275 m_rw(cex); 276 return false; 277 } 278 279 if (full_chars.size() == 0 && pref_chars.size() > 0) { 280 // the empty string doesn't "stratwith" any non-empty string 281 return true; 282 } 283 284 if (full_chars.size() < pref_chars.size()) { 285 // a string can't startwith a longer one 286 // X startswith Y -> len(X) >= len(Y) 287 return true; 288 } 289 290 expr_ref_vector branch(m); 291 for (unsigned j = 0; j < pref_chars.size(); ++j) { 292 // full[j] == pref[j] 293 expr_ref cLHS(full_chars.get(j), sub_m); 294 expr_ref cRHS(pref_chars.get(j), sub_m); 295 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 296 branch.push_back(_e); 297 } 298 299 expr_ref final_diseq(mk_not(sub_m, mk_and(branch)), sub_m); 300 fixed_length_assumptions.push_back(final_diseq); 301 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 302 fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f)); 303 304 return true; 305 } 306 fixed_length_reduce_contains(smt::kernel & subsolver,expr_ref f,expr_ref & cex)307 bool theory_str::fixed_length_reduce_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 308 ast_manager & m = get_manager(); 309 310 ast_manager & sub_m = subsolver.m(); 311 312 expr * full = nullptr; 313 expr * small = nullptr; 314 VERIFY(u.str.is_contains(f, full, small)); 315 316 expr_ref haystack(full, m); 317 expr_ref needle(small, m); 318 319 ptr_vector<expr> haystack_chars, needle_chars; 320 if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) 321 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { 322 return false; 323 } 324 325 if (needle_chars.size() == 0) { 326 // all strings "contain" the empty one 327 return true; 328 } 329 330 if (haystack_chars.size() == 0 && needle_chars.size() > 0) { 331 // the empty string doesn't "contain" any non-empty string 332 cex = m.mk_or(m.mk_not(f), ctx.mk_eq_atom(mk_strlen(needle), mk_int(0)), 333 m_autil.mk_ge(mk_strlen(haystack), mk_int(0))); 334 th_rewriter m_rw(m); 335 m_rw(cex); 336 return false; 337 } 338 339 if (needle_chars.size() > haystack_chars.size()) { 340 // a string can't contain a longer one 341 // X contains Y -> len(X) >= len(Y) 342 expr_ref minus_one(m_autil.mk_numeral(rational::minus_one(), true), m); 343 expr_ref zero(m_autil.mk_numeral(rational::zero(), true), m); 344 expr_ref lens(m_autil.mk_add(mk_strlen(haystack), m_autil.mk_mul(minus_one, mk_strlen(needle))), m); 345 cex = m.mk_or(m.mk_not(f), m_autil.mk_ge(lens, zero)); 346 th_rewriter m_rw(m); 347 m_rw(cex); 348 return false; 349 } 350 // find all positions at which `needle` could occur in `haystack` 351 expr_ref_vector branches(m); 352 for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) { 353 // i defines the offset into haystack_chars 354 expr_ref_vector branch(m); 355 for (unsigned j = 0; j < needle_chars.size(); ++j) { 356 // needle[j] == haystack[i+j] 357 ENSURE(i+j < haystack_chars.size()); 358 expr_ref cLHS(needle_chars.get(j), sub_m); 359 expr_ref cRHS(haystack_chars.get(i+j), sub_m); 360 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 361 branch.push_back(_e); 362 } 363 branches.push_back(mk_and(branch)); 364 } 365 366 expr_ref final_diseq(mk_or(branches), sub_m); 367 fixed_length_assumptions.push_back(final_diseq); 368 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 369 fixed_length_lesson.insert(final_diseq, std::make_tuple(PFUN, f, f)); 370 371 return true; 372 } 373 fixed_length_reduce_negative_contains(smt::kernel & subsolver,expr_ref f,expr_ref & cex)374 bool theory_str::fixed_length_reduce_negative_contains(smt::kernel & subsolver, expr_ref f, expr_ref & cex) { 375 ast_manager & m = get_manager(); 376 377 ast_manager & sub_m = subsolver.m(); 378 379 expr * small = nullptr, *full = nullptr; 380 VERIFY(u.str.is_contains(f, full, small)); 381 382 expr_ref haystack(full, m); 383 expr_ref needle(small, m); 384 385 ptr_vector<expr> haystack_chars, needle_chars; 386 if (!fixed_length_reduce_string_term(subsolver, haystack, haystack_chars, cex) 387 || !fixed_length_reduce_string_term(subsolver, needle, needle_chars, cex)) { 388 return false; 389 } 390 391 if (needle_chars.size() == 0) { 392 // all strings "contain" the empty one 393 cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(needle), mk_int(0)))); 394 ctx.get_rewriter()(cex); 395 return false; 396 } 397 398 if (haystack_chars.size() == 0 && needle_chars.size() > 0) { 399 // the empty string doesn't "contain" any non-empty string 400 return true; 401 } 402 403 if (needle_chars.size() > haystack_chars.size()) { 404 // a string can't contain a longer one 405 // X contains Y -> len(X) >= len(Y) 406 return true; 407 } 408 409 410 // find all positions at which `needle` could occur in `haystack` 411 expr_ref_vector branches(m); 412 for (unsigned i = 0; i <= (haystack_chars.size() - needle_chars.size()); ++i) { 413 // i defines the offset into haystack_chars 414 expr_ref_vector branch(m); 415 for (unsigned j = 0; j < needle_chars.size(); ++j) { 416 // needle[j] == haystack[i+j] 417 ENSURE(i+j < haystack_chars.size()); 418 expr_ref cLHS(needle_chars.get(j), sub_m); 419 expr_ref cRHS(haystack_chars.get(i+j), sub_m); 420 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 421 branch.push_back(_e); 422 } 423 branches.push_back(mk_and(branch)); 424 } 425 426 expr_ref final_diseq(mk_not(sub_m, mk_or(branches)), sub_m); 427 fixed_length_assumptions.push_back(final_diseq); 428 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 429 fixed_length_lesson.insert(final_diseq, std::make_tuple(NFUN, f, f)); 430 431 return true; 432 } 433 add_next(u_map<expr * > & next,expr_ref_vector & trail,unsigned idx,expr * cond,ast_manager & m)434 static inline void add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond, ast_manager & m) { 435 expr* acc; 436 if (!m.is_true(cond) && next.find(idx, acc)) { 437 expr* args[2] = { cond, acc }; 438 cond = mk_or(m, 2, args); 439 } 440 trail.push_back(cond); 441 next.insert(idx, cond); 442 443 } 444 fixed_length_reduce_regex_membership(smt::kernel & subsolver,expr_ref f,expr_ref & cex,bool polarity)445 bool theory_str::fixed_length_reduce_regex_membership(smt::kernel & subsolver, expr_ref f, expr_ref & cex, bool polarity) { 446 ast_manager & m = get_manager(); 447 448 ast_manager & sub_m = subsolver.m(); 449 450 // NSB code review: to remove dependencies on subsolver.get_context(). 451 // It uses a method that should be removed from smt_kernel. 452 // currently sub_ctx is used to retrieve a rewriter. Theory_str already has a rewriter attahed. 453 context & sub_ctx = subsolver.get_context(); 454 455 expr * str = nullptr, *re = nullptr; 456 VERIFY(u.str.is_in_re(f, str, re)); 457 458 // TODO reuse some of the automaton framework from theory_str_regex 459 eautomaton * aut = m_mk_aut(re); 460 aut->compress(); 461 462 ptr_vector<expr> str_chars; 463 if (!fixed_length_reduce_string_term(subsolver, str, str_chars, cex)) { 464 return false; 465 } 466 467 if (str_chars.empty()) { 468 // check 0-length solution 469 bool zero_solution = false; 470 unsigned initial_state = aut->init(); 471 if (aut->is_final_state(initial_state)) { 472 zero_solution = true; 473 } else { 474 unsigned_vector eps_states; 475 aut->get_epsilon_closure(initial_state, eps_states); 476 for (unsigned state : eps_states) { 477 if (aut->is_final_state(state)) { 478 zero_solution = true; 479 break; 480 } 481 } 482 } 483 if (!zero_solution && polarity) { 484 TRACE("str_fl", tout << "contradiction: regex has no zero-length solutions, but our string must be a solution" << std::endl;); 485 cex = m.mk_or(m.mk_not(f), m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0)))); 486 ctx.get_rewriter()(cex); 487 return false; 488 } else if (zero_solution && !polarity) { 489 TRACE("str_fl", tout << "contradiction: regex has zero-length solutions, but our string must not be a solution" << std::endl;); 490 cex = m.mk_or(f, m.mk_not(ctx.mk_eq_atom(mk_strlen(str), mk_int(0)))); 491 ctx.get_rewriter()(cex); 492 return false; 493 } else { 494 TRACE("str_fl", tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;); 495 return true; 496 } 497 } else { 498 expr_ref_vector trail(m); 499 u_map<expr*> maps[2]; 500 bool select_map = false; 501 expr_ref cond(m); 502 eautomaton::moves mvs; 503 maps[0].insert(aut->init(), m.mk_true()); 504 // is_accepted(a, aut) & some state in frontier is final. 505 506 for (auto& ch : str_chars) { 507 u_map<expr*>& frontier = maps[select_map]; 508 u_map<expr*>& next = maps[!select_map]; 509 select_map = !select_map; 510 next.reset(); 511 u_map<expr*>::iterator it = frontier.begin(), end = frontier.end(); 512 for (; it != end; ++it) { 513 mvs.reset(); 514 unsigned state = it->m_key; 515 expr* acc = it->m_value; 516 aut->get_moves_from(state, mvs, false); 517 for (eautomaton::move& mv : mvs) { 518 SASSERT(mv.t()); 519 if (mv.t()->is_char() && m.is_value(mv.t()->get_char()) && m.is_value(ch)) { 520 if (mv.t()->get_char() == ch) { 521 add_next(next, trail, mv.dst(), acc, sub_m); 522 } 523 else { 524 continue; 525 } 526 } 527 else { 528 cond = mv.t()->accept(ch); 529 if (m.is_false(cond)) { 530 continue; 531 } 532 if (m.is_true(cond)) { 533 add_next(next, trail, mv.dst(), acc, sub_m); 534 continue; 535 } 536 expr* args[2] = { cond, acc }; 537 cond = mk_and(m, 2, args); 538 add_next(next, trail, mv.dst(), cond, sub_m); 539 } 540 } 541 } 542 } 543 u_map<expr*> const& frontier = maps[select_map]; 544 expr_ref_vector ors(sub_m); 545 for (auto const& kv : frontier) { 546 unsigned_vector states; 547 bool has_final = false; 548 aut->get_epsilon_closure(kv.m_key, states); 549 for (unsigned i = 0; i < states.size() && !has_final; ++i) { 550 has_final = aut->is_final_state(states[i]); 551 } 552 if (has_final) { 553 ors.push_back(kv.m_value); 554 } 555 } 556 expr_ref result(mk_or(ors), sub_m); 557 sub_ctx.get_rewriter()(result); 558 TRACE("str_fl", tout << "regex path constraint: " << mk_pp(result, sub_m) << std::endl;); 559 560 if (sub_m.is_false(result)) { 561 // There are no solutions of that length in the automaton. 562 // If the membership constraint is true, we assert a conflict clause. 563 // If the membership constraint is false, we ignore the constraint. 564 if (polarity) { 565 // Decompose `str` into its components if it is a concatenation of terms. 566 // This fixes cases where the length of S in (S in RE) might be correct 567 // if the lengths of components of S are assigned in a different way. 568 expr_ref_vector str_terms(m); 569 expr_ref_vector str_terms_eq_len(m); 570 str_terms.push_back(str); 571 while (!str_terms.empty()) { 572 expr* str_term = str_terms.back(); 573 str_terms.pop_back(); 574 expr* arg0; 575 expr* arg1; 576 if (u.str.is_concat(str_term, arg0, arg1)) { 577 str_terms.push_back(arg0); 578 str_terms.push_back(arg1); 579 } else { 580 rational termLen; 581 if (fixed_length_get_len_value(str_term, termLen)) { 582 str_terms_eq_len.push_back(ctx.mk_eq_atom(mk_strlen(str_term), mk_int(termLen))); 583 } else { 584 // this is strange, since we knew the length of `str` in order to get here 585 cex = expr_ref(m_autil.mk_ge(mk_strlen(str_term), mk_int(0)), m); 586 return false; 587 } 588 } 589 } 590 591 cex = m.mk_or(m.mk_not(f), m.mk_not(mk_and(str_terms_eq_len))); 592 ctx.get_rewriter()(cex); 593 return false; 594 } else { 595 TRACE("str_fl", tout << "regex constraint satisfied without asserting constraints to subsolver" << std::endl;); 596 return true; 597 } 598 } else { 599 if (polarity) { 600 fixed_length_assumptions.push_back(result); 601 fixed_length_lesson.insert(result, std::make_tuple(PFUN, f, f)); 602 } else { 603 fixed_length_assumptions.push_back(sub_m.mk_not(result)); 604 fixed_length_lesson.insert(sub_m.mk_not(result), std::make_tuple(NFUN, f, f)); 605 } 606 return true; 607 } 608 } 609 } 610 611 /* 612 * Expressions in the vector eqc_chars exist only in the subsolver. 613 * If this method returns false, a conflict clause is returned in cex; 614 * this conflict clause exists in the main solver. 615 */ fixed_length_reduce_string_term(smt::kernel & subsolver,expr * term,ptr_vector<expr> & eqc_chars,expr_ref & cex)616 bool theory_str::fixed_length_reduce_string_term(smt::kernel & subsolver, expr * term, 617 ptr_vector<expr> & eqc_chars, expr_ref & cex) { 618 ast_manager & m = get_manager(); 619 620 ast_manager & sub_m = subsolver.m(); 621 622 bv_util bv(m); 623 sort * bv8_sort = bv.mk_sort(8); 624 625 expr * arg0; 626 expr * arg1; 627 expr * arg2; 628 629 zstring strConst; 630 if (u.str.is_string(term, strConst)) { 631 for (unsigned i = 0; i < strConst.length(); ++i) { 632 expr_ref chTerm(bitvector_character_constants.get(strConst[i]), m); 633 eqc_chars.push_back(chTerm); 634 } 635 } else if (to_app(term)->get_num_args() == 0 && !u.str.is_string(term)) { 636 // this is a variable; get its length and create/reuse character terms 637 if (!var_to_char_subterm_map.contains(term)) { 638 rational varLen_value; 639 bool var_hasLen = fixed_length_get_len_value(term, varLen_value); 640 if (!var_hasLen || varLen_value.is_neg()) { 641 TRACE("str_fl", tout << "variable " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;); 642 cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m); 643 return false; 644 } 645 TRACE("str_fl", tout << "creating character terms for variable " << mk_pp(term, m) << ", length = " << varLen_value << std::endl;); 646 ptr_vector<expr> new_chars; 647 for (rational i = rational::zero(); i < varLen_value; ++i) { 648 // TODO we can probably name these better for the sake of debugging 649 expr_ref ch(mk_fresh_const("char", bv8_sort), m); 650 new_chars.push_back(ch); 651 fixed_length_subterm_trail.push_back(ch); 652 } 653 var_to_char_subterm_map.insert(term, new_chars); 654 fixed_length_used_len_terms.insert(term, varLen_value); 655 } 656 var_to_char_subterm_map.find(term, eqc_chars); 657 } else if (u.str.is_concat(term, arg0, arg1)) { 658 expr_ref first(arg0, sub_m); 659 expr_ref second(arg1, sub_m); 660 ptr_vector<expr> chars0, chars1; 661 if (!fixed_length_reduce_string_term(subsolver, first, chars0, cex) 662 || !fixed_length_reduce_string_term(subsolver, second, chars1, cex)) { 663 return false; 664 } 665 eqc_chars.append(chars0); 666 eqc_chars.append(chars1); 667 } else if (u.str.is_extract(term, arg0, arg1, arg2)) { 668 // (str.substr Base Pos Len) 669 expr_ref first(arg0, sub_m); 670 expr_ref second(arg1, sub_m); 671 expr_ref third(arg2, sub_m); 672 ptr_vector<expr> base_chars; 673 if (!fixed_length_reduce_string_term(subsolver, first, base_chars, cex)) { 674 return false; 675 } 676 arith_value v(m); 677 v.init(&get_context()); 678 rational pos, len; 679 bool pos_exists = v.get_value(arg1, pos); 680 bool len_exists = v.get_value(arg2, len); 681 if (!pos_exists) { 682 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg1, mk_int(0)), m_autil.mk_le(arg1, mk_int(0))), m); 683 return false; 684 } 685 if (!len_exists) { 686 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg2, mk_int(0)), m_autil.mk_le(arg2, mk_int(0))), m); 687 return false; 688 } 689 TRACE("str_fl", tout << "reduce substring term: base=" << mk_pp(term, m) << " (length="<<base_chars.size()<<"), pos=" << pos.to_string() << ", len=" << len.to_string() << std::endl;); 690 // Case 1: pos < 0 or pos >= strlen(base) or len < 0 691 // ==> (Substr ...) = "" 692 if (pos.is_neg() || pos >= rational(base_chars.size()) || len.is_neg()) { 693 eqc_chars.reset(); 694 return true; 695 } 696 else if (!pos.is_unsigned() || !len.is_unsigned()) { 697 return false; 698 } else { 699 unsigned _pos = pos.get_unsigned(); 700 unsigned _len = len.get_unsigned(); 701 if (_pos + _len < _pos) 702 return false; 703 if (_pos + _len >= base_chars.size()) { 704 // take as many characters as possible up to the end of base_chars 705 for (unsigned i = _pos; i < base_chars.size(); ++i) { 706 eqc_chars.push_back(base_chars.get(i)); 707 } 708 } else { 709 for (unsigned i = _pos; i < _pos + _len; ++i) { 710 eqc_chars.push_back(base_chars.get(i)); 711 } 712 } 713 } 714 } else if (u.str.is_at(term, arg0, arg1)) { 715 // (str.at Base Pos) 716 expr_ref base(arg0, sub_m); 717 expr_ref pos(arg1, sub_m); 718 ptr_vector<expr> base_chars; 719 if (!fixed_length_reduce_string_term(subsolver, base, base_chars, cex)) { 720 return false; 721 } 722 arith_value v(m); 723 v.init(&get_context()); 724 rational pos_value; 725 bool pos_exists = v.get_value(pos, pos_value); 726 if (!pos_exists) { 727 cex = m.mk_or(m_autil.mk_ge(pos, mk_int(0)), m_autil.mk_le(pos, mk_int(0))); 728 return false; 729 } 730 TRACE("str_fl", tout << "reduce str.at: base=" << mk_pp(base, m) << ", pos=" << pos_value.to_string() << std::endl;); 731 if (pos_value.is_neg() || pos_value >= rational(base_chars.size())) { 732 // return the empty string 733 eqc_chars.reset(); 734 } 735 else if (!pos_value.is_unsigned()) { 736 return false; 737 } else { 738 eqc_chars.push_back(base_chars.get(pos_value.get_unsigned())); 739 } 740 return true; 741 } else if (u.str.is_itos(term, arg0)) { 742 expr_ref i(arg0, m); 743 arith_value v(m); 744 v.init(&get_context()); 745 rational iValue; 746 bool iValue_exists = v.get_value(i, iValue); 747 if (!iValue_exists) { 748 cex = expr_ref(m.mk_or(m_autil.mk_ge(arg0, mk_int(0)), m_autil.mk_le(arg0, mk_int(0))), m); 749 return false; 750 } 751 rational termLen; 752 bool termLen_exists = v.get_value(mk_strlen(term), termLen); 753 if(!termLen_exists) { 754 cex = expr_ref(m.mk_or(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m_autil.mk_le(mk_strlen(term), mk_int(0))), m); 755 return false; 756 } 757 TRACE("str_fl", tout << "reduce int.to.str: n=" << iValue << std::endl;); 758 if (iValue.is_neg()) { 759 if (!termLen.is_zero()) { 760 // conflict 761 cex = expr_ref(m.mk_not(m.mk_and(m_autil.mk_le(arg0, mk_int(-1)), m.mk_not(mk_strlen(term)))), m); 762 return false; 763 } 764 // return the empty string 765 eqc_chars.reset(); 766 return true; 767 } else { 768 if (termLen != iValue.get_num_decimal()) { 769 // conflict 770 cex = expr_ref(m.mk_not(m.mk_and(get_context().mk_eq_atom(mk_strlen(term), mk_int(termLen)), get_context().mk_eq_atom(arg0, mk_int(iValue)))), m); 771 return false; 772 } 773 // convert iValue to a constant 774 zstring iValue_str(iValue.to_string()); 775 for (unsigned idx = 0; idx < iValue_str.length(); ++idx) { 776 expr_ref chTerm(bitvector_character_constants.get(iValue_str[idx]), sub_m); 777 eqc_chars.push_back(chTerm); 778 } 779 return true; 780 } 781 } else { 782 TRACE("str_fl", tout << "string term " << mk_pp(term, m) << " handled as uninterpreted function" << std::endl;); 783 if (!uninterpreted_to_char_subterm_map.contains(term)) { 784 rational ufLen_value; 785 bool uf_hasLen = fixed_length_get_len_value(term, ufLen_value); 786 if (!uf_hasLen || ufLen_value.is_neg()) { 787 TRACE("str_fl", tout << "uninterpreted function " << mk_pp(term, m) << " has no length assignment or impossible length assignment - asserting conflict axiom" << std::endl;); 788 cex = expr_ref(m_autil.mk_ge(mk_strlen(term), mk_int(0)), m); 789 return false; 790 } 791 TRACE("str_fl", tout << "creating character terms for uninterpreted function " << mk_pp(term, m) << ", length = " << ufLen_value << std::endl;); 792 ptr_vector<expr> new_chars; 793 for (rational i = rational::zero(); i < ufLen_value; ++i) { 794 expr_ref ch(mk_fresh_const("char", bv8_sort), m); 795 new_chars.push_back(ch); 796 fixed_length_subterm_trail.push_back(ch); 797 } 798 uninterpreted_to_char_subterm_map.insert(term, new_chars); 799 fixed_length_used_len_terms.insert(term, ufLen_value); 800 } 801 uninterpreted_to_char_subterm_map.find(term, eqc_chars); 802 } 803 return true; 804 } 805 fixed_length_reduce_eq(smt::kernel & subsolver,expr_ref lhs,expr_ref rhs,expr_ref & cex)806 bool theory_str::fixed_length_reduce_eq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex) { 807 ast_manager & m = get_manager(); 808 809 ast_manager & sub_m = subsolver.m(); 810 811 ptr_vector<expr> lhs_chars, rhs_chars; 812 813 if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) 814 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { 815 return false; 816 } 817 818 if (lhs_chars.size() != rhs_chars.size()) { 819 TRACE("str_fl", tout << "length information inconsistent: " << mk_pp(lhs, m) << " has " << lhs_chars.size() << 820 " chars, " << mk_pp(rhs, m) << " has " << rhs_chars.size() << " chars" << std::endl;); 821 // equal strings ought to have equal lengths 822 cex = m.mk_or(m.mk_not(ctx.mk_eq_atom(lhs, rhs)), ctx.mk_eq_atom(mk_strlen(lhs), mk_strlen(rhs))); 823 return false; 824 } 825 for (unsigned i = 0; i < lhs_chars.size(); ++i) { 826 expr_ref cLHS(lhs_chars.get(i), sub_m); 827 expr_ref cRHS(rhs_chars.get(i), sub_m); 828 expr_ref _e(sub_m.mk_eq(cLHS, cRHS), sub_m); 829 fixed_length_assumptions.push_back(_e); 830 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 831 fixed_length_lesson.insert(_e, std::make_tuple(rational(i), lhs, rhs)); 832 } 833 return true; 834 } 835 fixed_length_reduce_diseq(smt::kernel & subsolver,expr_ref lhs,expr_ref rhs,expr_ref & cex)836 bool theory_str::fixed_length_reduce_diseq(smt::kernel & subsolver, expr_ref lhs, expr_ref rhs, expr_ref & cex) { 837 ast_manager & m = get_manager(); 838 839 ast_manager & sub_m = subsolver.m(); 840 841 // we do generation before this check to make sure that 842 // variables which only appear in disequalities show up in the model 843 rational lhsLen, rhsLen; 844 bool lhsLen_exists = fixed_length_get_len_value(lhs, lhsLen); 845 bool rhsLen_exists = fixed_length_get_len_value(rhs, rhsLen); 846 847 if (!lhsLen_exists) { 848 cex = m_autil.mk_ge(mk_strlen(lhs), mk_int(0)); 849 return false; 850 } 851 852 if (!rhsLen_exists) { 853 cex = m_autil.mk_ge(mk_strlen(rhs), mk_int(0)); 854 return false; 855 } 856 857 ptr_vector<expr> lhs_chars, rhs_chars; 858 if (!fixed_length_reduce_string_term(subsolver, lhs, lhs_chars, cex) 859 || !fixed_length_reduce_string_term(subsolver, rhs, rhs_chars, cex)) { 860 return false; 861 } 862 863 if (lhsLen != rhsLen) { 864 TRACE("str", tout << "skip disequality: len(lhs) = " << lhsLen << ", len(rhs) = " << rhsLen << std::endl;); 865 return true; 866 } 867 868 SASSERT(lhs_chars.size() == rhs_chars.size()); 869 expr_ref_vector diseqs(m); 870 for (unsigned i = 0; i < lhs_chars.size(); ++i) { 871 expr_ref cLHS(lhs_chars.get(i), sub_m); 872 expr_ref cRHS(rhs_chars.get(i), sub_m); 873 diseqs.push_back(sub_m.mk_not(sub_m.mk_eq(cLHS, cRHS))); 874 } 875 876 expr_ref final_diseq(mk_or(diseqs), sub_m); 877 fixed_length_assumptions.push_back(final_diseq); 878 TRACE("str_fl", tout << "inserting into fixed_lesson" <<std::endl;); 879 fixed_length_lesson.insert(final_diseq, std::make_tuple(NEQ, lhs, rhs)); 880 881 return true; 882 } 883 fixed_length_model_construction(expr_ref_vector formulas,expr_ref_vector & precondition,expr_ref_vector & free_variables,obj_map<expr,zstring> & model,expr_ref_vector & cex)884 lbool theory_str::fixed_length_model_construction(expr_ref_vector formulas, expr_ref_vector &precondition, 885 expr_ref_vector& free_variables, 886 obj_map<expr, zstring> &model, expr_ref_vector &cex) { 887 888 ast_manager & m = get_manager(); 889 890 if (bitvector_character_constants.empty()) { 891 bv_util bv(m); 892 sort * bv8_sort = bv.mk_sort(8); 893 for (unsigned i = 0; i < 256; ++i) { 894 rational ch(i); 895 expr_ref chTerm(bv.mk_numeral(ch, bv8_sort), m); 896 bitvector_character_constants.push_back(chTerm); 897 fixed_length_subterm_trail.push_back(chTerm); 898 } 899 } 900 901 TRACE("str", 902 ast_manager & m = get_manager(); 903 tout << "dumping all formulas:" << std::endl; 904 for (expr_ref_vector::iterator i = formulas.begin(); i != formulas.end(); ++i) { 905 expr * ex = *i; 906 tout << mk_pp(ex, m) << (ctx.is_relevant(ex) ? "" : " (NOT REL)") << std::endl; 907 } 908 ); 909 910 fixed_length_subterm_trail.reset(); 911 fixed_length_used_len_terms.reset(); 912 fixed_length_assumptions.reset(); 913 var_to_char_subterm_map.reset(); 914 uninterpreted_to_char_subterm_map.reset(); 915 fixed_length_lesson.reset(); 916 917 // All reduced Boolean formulas in the current assignment 918 expr_ref_vector fixed_length_reduced_boolean_formulas(m); 919 920 // Boolean formulas on which to apply abstraction refinement. 921 expr_ref_vector abstracted_boolean_formulas(m); 922 923 smt_params subsolver_params; 924 smt::kernel subsolver(m, subsolver_params); 925 subsolver.set_logic(symbol("QF_BV")); 926 927 sort * str_sort = u.str.mk_string_sort(); 928 sort * bool_sort = m.mk_bool_sort(); 929 930 for (expr * var : free_variables) { 931 TRACE("str_fl", tout << "initialize free variable " << mk_pp(var, m) << std::endl;); 932 rational var_lenVal; 933 if (!fixed_length_get_len_value(var, var_lenVal)) { 934 TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " has no length assignment" << std::endl;); 935 expr_ref var_len_assertion(m_autil.mk_ge(mk_strlen(var), mk_int(0)), m); 936 assert_axiom(var_len_assertion); 937 add_persisted_axiom(var_len_assertion); 938 return l_undef; 939 } 940 ptr_vector<expr> var_chars; 941 expr_ref str_counterexample(m); 942 if (!fixed_length_reduce_string_term(subsolver, var, var_chars, str_counterexample)) { 943 TRACE("str_fl", tout << "free variable " << mk_pp(var, m) << " caused a conflict; asserting and continuing" << std::endl;); 944 assert_axiom(str_counterexample); 945 return l_undef; 946 } 947 } 948 949 for (expr * f : formulas) { 950 if (!get_context().is_relevant(f)) { 951 expr * subformula = nullptr; 952 if (m.is_not(f, subformula)) { 953 if (!get_context().is_relevant(subformula)) { 954 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant (and neither is its subformula)" << std::endl;); 955 continue; 956 } else { 957 TRACE("str_fl", tout << "considering formula " << mk_pp(f, m) << ", its subformula is relevant but it is not" << std::endl;); 958 } 959 } else { 960 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant" << std::endl;); 961 continue; 962 } 963 } 964 // reduce string formulas only. ignore others 965 sort * fSort = m.get_sort(f); 966 if (fSort == bool_sort && !is_quantifier(f)) { 967 // extracted terms 968 expr * subterm; 969 expr * lhs; 970 expr * rhs; 971 if (m.is_eq(f, lhs, rhs)) { 972 sort * lhs_sort = m.get_sort(lhs); 973 if (lhs_sort == str_sort) { 974 TRACE("str_fl", tout << "reduce string equality: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << std::endl;); 975 expr_ref cex(m); 976 expr_ref left(lhs, m); 977 expr_ref right(rhs, m); 978 if (!fixed_length_reduce_eq(subsolver, left, right, cex)) { 979 // missing a side condition. assert it and return unknown 980 assert_axiom(cex); 981 add_persisted_axiom(cex); 982 return l_undef; 983 } 984 fixed_length_reduced_boolean_formulas.push_back(f); 985 } else { 986 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not an equality over strings" << std::endl;); 987 } 988 } else if (u.str.is_in_re(f)) { 989 TRACE("str_fl", tout << "reduce regex membership: " << mk_pp(f, m) << std::endl;); 990 expr_ref cex_clause(m); 991 expr_ref re(f, m); 992 if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, true)) { 993 assert_axiom(cex_clause); 994 add_persisted_axiom(cex_clause); 995 return l_undef; 996 } 997 fixed_length_reduced_boolean_formulas.push_back(f); 998 } else if (u.str.is_contains(f)) { 999 // TODO in some cases (e.g. len(haystack) is only slightly greater than len(needle)) 1000 // we might be okay to assert the full disjunction because there are very few disjuncts 1001 if (m_params.m_FixedLengthRefinement) { 1002 TRACE("str_fl", tout << "abstracting out positive contains: " << mk_pp(f, m) << std::endl;); 1003 abstracted_boolean_formulas.push_back(f); 1004 } else { 1005 TRACE("str_fl", tout << "reduce positive contains: " << mk_pp(f, m) << std::endl;); 1006 expr_ref cex(m); 1007 expr_ref cont(f, m); 1008 if (!fixed_length_reduce_contains(subsolver, cont, cex)) { 1009 assert_axiom(cex); 1010 add_persisted_axiom(cex); 1011 return l_undef; 1012 } 1013 fixed_length_reduced_boolean_formulas.push_back(f); 1014 } 1015 } else if (u.str.is_prefix(f)) { 1016 TRACE("str_fl", tout << "reduce positive prefix: " << mk_pp(f, m) << std::endl;); 1017 expr_ref cex(m); 1018 expr_ref pref(f, m); 1019 if (!fixed_length_reduce_prefix(subsolver, pref, cex)) { 1020 assert_axiom(cex); 1021 add_persisted_axiom(cex); 1022 return l_undef; 1023 } 1024 fixed_length_reduced_boolean_formulas.push_back(f); 1025 } else if (u.str.is_suffix(f)) { 1026 TRACE("str_fl", tout << "reduce positive suffix: " << mk_pp(f, m) << std::endl;); 1027 expr_ref cex(m); 1028 expr_ref suf(f, m); 1029 if (!fixed_length_reduce_suffix(subsolver, suf, cex)) { 1030 assert_axiom(cex); 1031 add_persisted_axiom(cex); 1032 return l_undef; 1033 } 1034 fixed_length_reduced_boolean_formulas.push_back(f); 1035 }else if (m.is_not(f, subterm)) { 1036 // if subterm is a string formula such as an equality, reduce it as a disequality 1037 if (m.is_eq(subterm, lhs, rhs)) { 1038 sort * lhs_sort = m.get_sort(lhs); 1039 if (lhs_sort == str_sort) { 1040 TRACE("str_fl", tout << "reduce string disequality: " << mk_pp(lhs, m) << " != " << mk_pp(rhs, m) << std::endl;); 1041 expr_ref cex(m); 1042 expr_ref left(lhs, m); 1043 expr_ref right(rhs, m); 1044 if (!fixed_length_reduce_diseq(subsolver, left, right, cex)) { 1045 // missing a side condition. assert it and return unknown 1046 assert_axiom(cex); 1047 add_persisted_axiom(cex); 1048 return l_undef; 1049 } 1050 fixed_length_reduced_boolean_formulas.push_back(f); 1051 } 1052 } else if (u.str.is_in_re(subterm)) { 1053 TRACE("str_fl", tout << "reduce negative regex membership: " << mk_pp(f, m) << std::endl;); 1054 expr_ref cex_clause(m); 1055 expr_ref re(subterm, m); 1056 if (!fixed_length_reduce_regex_membership(subsolver, re, cex_clause, false)) { 1057 assert_axiom(cex_clause); 1058 add_persisted_axiom(cex_clause); 1059 return l_undef; 1060 } 1061 fixed_length_reduced_boolean_formulas.push_back(f); 1062 } else if (u.str.is_contains(subterm)) { 1063 TRACE("str_fl", tout << "reduce negative contains: " << mk_pp(subterm, m) << std::endl;); 1064 expr_ref cex(m); 1065 expr_ref cont(subterm, m); 1066 if (!fixed_length_reduce_negative_contains(subsolver, cont, cex)) { 1067 assert_axiom(cex); 1068 add_persisted_axiom(cex); 1069 return l_undef; 1070 } 1071 fixed_length_reduced_boolean_formulas.push_back(f); 1072 } else if (u.str.is_prefix(subterm)) { 1073 TRACE("str_fl", tout << "reduce negative prefix: " << mk_pp(subterm, m) << std::endl;); 1074 expr_ref cex(m); 1075 expr_ref pref(subterm, m); 1076 if (!fixed_length_reduce_negative_prefix(subsolver, pref, cex)) { 1077 assert_axiom(cex); 1078 add_persisted_axiom(cex); 1079 return l_undef; 1080 } 1081 fixed_length_reduced_boolean_formulas.push_back(f); 1082 } else if (u.str.is_suffix(subterm)) { 1083 TRACE("str_fl", tout << "reduce negative suffix: " << mk_pp(subterm, m) << std::endl;); 1084 expr_ref cex(m); 1085 expr_ref suf(subterm, m); 1086 if (!fixed_length_reduce_negative_suffix(subsolver, suf, cex)) { 1087 assert_axiom(cex); 1088 add_persisted_axiom(cex); 1089 return l_undef; 1090 } 1091 fixed_length_reduced_boolean_formulas.push_back(f); 1092 } else { 1093 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;); 1094 } 1095 } else { 1096 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not a boolean formula we handle" << std::endl;); 1097 continue; 1098 } 1099 } else { 1100 TRACE("str_fl", tout << "skip reducing formula " << mk_pp(f, m) << ", not relevant to strings" << std::endl;); 1101 continue; 1102 } 1103 } 1104 1105 // Check consistency of all string-integer conversion terms wrt. integer theory before we solve, 1106 // possibly generating additional constraints for the bit-vector solver. 1107 { 1108 arith_value v(get_manager()); 1109 v.init(&get_context()); 1110 for (auto e : string_int_conversion_terms) { 1111 TRACE("str_fl", tout << "pre-run check str-int term " << mk_pp(e, get_manager()) << std::endl;); 1112 expr* _arg; 1113 if (u.str.is_stoi(e, _arg)) { 1114 expr_ref arg(_arg, m); 1115 rational slen; 1116 if (!fixed_length_get_len_value(arg, slen)) { 1117 expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(arg), mk_int(0)), m); 1118 assert_axiom(stoi_cex); 1119 add_persisted_axiom(stoi_cex); 1120 return l_undef; 1121 } 1122 TRACE("str_fl", tout << "length of term is " << slen << std::endl;); 1123 1124 rational ival; 1125 if (v.get_value(e, ival)) { 1126 TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(e, get_manager()) << std::endl;); 1127 // if ival is non-negative, because we know the length of arg, we can add a character constraint for arg 1128 if (ival.is_nonneg()) { 1129 zstring ival_str(ival.to_string()); 1130 zstring padding; 1131 for (rational i = rational::zero(); i < slen - rational(ival_str.length()); ++i) { 1132 padding = padding + zstring("0"); 1133 } 1134 zstring arg_val = padding + ival_str; 1135 expr_ref stoi_cex(m); 1136 expr_ref arg_char_expr(mk_string(arg_val), m); 1137 1138 // Add (e == ival) as a precondition. 1139 precondition.push_back(m.mk_eq(e, mk_int(ival))); 1140 // Assert (arg == arg_chars) in the subsolver. 1141 if (!fixed_length_reduce_eq(subsolver, arg, arg_char_expr, stoi_cex)) { 1142 // Counterexample: (str.to_int S) == ival AND len(S) == slen cannot both be true. 1143 stoi_cex = expr_ref(m.mk_not(m.mk_and( 1144 m.mk_eq(e, mk_int(ival)), 1145 m.mk_eq(mk_strlen(arg), mk_int(slen)) 1146 )), m); 1147 assert_axiom(stoi_cex); 1148 add_persisted_axiom(stoi_cex); 1149 1150 return l_undef; 1151 } 1152 1153 fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(e, mk_int(ival))); 1154 } 1155 } else { 1156 TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(e, get_manager()) << std::endl;); 1157 // consistency needs to be checked after the string is assigned 1158 } 1159 } else if (u.str.is_itos(e, _arg)) { 1160 expr_ref arg(_arg, m); 1161 rational slen; 1162 if (!fixed_length_get_len_value(e, slen)) { 1163 expr_ref stoi_cex(m_autil.mk_ge(mk_strlen(e), mk_int(0)), m); 1164 assert_axiom(stoi_cex); 1165 add_persisted_axiom(stoi_cex); 1166 return l_undef; 1167 } 1168 TRACE("str_fl", tout << "length of term is " << slen << std::endl;); 1169 rational ival; 1170 if (v.get_value(arg, ival)) { 1171 TRACE("str_fl", tout << "integer theory assigns " << ival << " to " << mk_pp(arg, get_manager()) << std::endl;); 1172 zstring ival_str; 1173 if (ival.is_neg()) { 1174 // e must be the empty string, i.e. have length 0 1175 ival_str = zstring(""); 1176 } else { 1177 // e must be equal to the string representation of ival 1178 ival_str = zstring(ival.to_string()); 1179 } 1180 // Add (arg == ival) as a precondition. 1181 precondition.push_back(m.mk_eq(arg, mk_int(ival))); 1182 // Assert (e == ival_str) in the subsolver. 1183 expr_ref itos_cex(m); 1184 expr_ref _e(e, m); 1185 expr_ref arg_char_expr(mk_string(ival_str), m); 1186 if (!fixed_length_reduce_eq(subsolver, _e, arg_char_expr, itos_cex)) { 1187 // Counterexample: N in (str.from_int N) == ival AND len(str.from_int N) == slen cannot both be true. 1188 itos_cex = expr_ref(m.mk_not(m.mk_and( 1189 m.mk_eq(arg, mk_int(ival)), 1190 m.mk_eq(mk_strlen(e), mk_int(slen)) 1191 )), m); 1192 assert_axiom(itos_cex); 1193 add_persisted_axiom(itos_cex); 1194 return l_undef; 1195 } 1196 fixed_length_reduced_boolean_formulas.push_back(m.mk_eq(arg, mk_int(ival))); 1197 } else { 1198 TRACE("str_fl", tout << "integer theory has no assignment for " << mk_pp(arg, get_manager()) << std::endl;); 1199 // consistency needs to be checked after the string is assigned 1200 } 1201 } 1202 } 1203 } 1204 1205 for (auto e : fixed_length_used_len_terms) { 1206 expr * var = &e.get_key(); 1207 rational val = e.get_value(); 1208 precondition.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val))); 1209 } 1210 1211 TRACE("str_fl", 1212 tout << "formulas asserted to bitvector subsolver:" << std::endl; 1213 for (auto e : fixed_length_assumptions) { 1214 tout << mk_pp(e, subsolver.m()) << std::endl; 1215 } 1216 tout << "variable to character mappings:" << std::endl; 1217 for (auto &entry : var_to_char_subterm_map) { 1218 tout << mk_pp(entry.m_key, get_manager()) << ":"; 1219 for (auto e : entry.m_value) { 1220 tout << " " << mk_pp(e, subsolver.m()); 1221 } 1222 tout << std::endl; 1223 } 1224 tout << "reduced boolean formulas:" << std::endl; 1225 for (auto e : fixed_length_reduced_boolean_formulas) { 1226 tout << mk_pp(e, m) << std::endl; 1227 } 1228 ); 1229 1230 TRACE("str_fl", tout << "calling subsolver" << std::endl;); 1231 1232 lbool subproblem_status = subsolver.check(fixed_length_assumptions); 1233 1234 if (subproblem_status == l_true) { 1235 bv_util bv(m); 1236 TRACE("str_fl", tout << "subsolver found SAT; reconstructing model" << std::endl;); 1237 model_ref subModel; 1238 subsolver.get_model(subModel); 1239 1240 expr_substitution subst(m); 1241 1242 //model_smt2_pp(std::cout, m, *subModel, 2); 1243 for (auto entry : var_to_char_subterm_map) { 1244 svector<unsigned> assignment; 1245 expr * var = entry.m_key; 1246 ptr_vector<expr> char_subterms(entry.m_value); 1247 for (expr * chExpr : char_subterms) { 1248 expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); 1249 rational n; 1250 if (chAssignment != nullptr && bv.is_numeral(chAssignment, n) && n.is_unsigned()) { 1251 assignment.push_back(n.get_unsigned()); 1252 } else { 1253 assignment.push_back((unsigned)'?'); 1254 } 1255 } 1256 zstring strValue(assignment.size(), assignment.c_ptr()); 1257 model.insert(var, strValue); 1258 subst.insert(var, mk_string(strValue)); 1259 } 1260 TRACE("str_fl", 1261 for (auto entry : model) { 1262 tout << mk_pp(entry.m_key, m) << " = " << entry.m_value << std::endl; 1263 } 1264 ); 1265 for (auto entry : uninterpreted_to_char_subterm_map) { 1266 svector<unsigned> assignment; 1267 expr * var = entry.m_key; 1268 ptr_vector<expr> char_subterms(entry.m_value); 1269 for (expr * chExpr : char_subterms) { 1270 expr_ref chAssignment(subModel->get_const_interp(to_app(chExpr)->get_decl()), m); 1271 rational n; 1272 if (chAssignment != nullptr && bv.is_numeral(chAssignment, n) && n.is_unsigned()) { 1273 assignment.push_back(n.get_unsigned()); 1274 } else { 1275 assignment.push_back((unsigned)'?'); 1276 } 1277 } 1278 zstring strValue(assignment.size(), assignment.c_ptr()); 1279 model.insert(var, strValue); 1280 subst.insert(var, mk_string(strValue)); 1281 } 1282 1283 // Check consistency of string-integer conversion terms after the search. 1284 { 1285 scoped_ptr<expr_replacer> replacer = mk_default_expr_replacer(m, false); 1286 replacer->set_substitution(&subst); 1287 th_rewriter rw(m); 1288 arith_value v(get_manager()); 1289 v.init(&get_context()); 1290 for (auto e : string_int_conversion_terms) { 1291 TRACE("str_fl", tout << "post-run check str-int term " << mk_pp(e, get_manager()) << std::endl;); 1292 expr* _arg; 1293 if (u.str.is_stoi(e, _arg)) { 1294 expr_ref arg(_arg, m); 1295 rational ival; 1296 if (v.get_value(e, ival)) { 1297 expr_ref arg_subst(arg, m); 1298 (*replacer)(arg, arg_subst); 1299 rw(arg_subst); 1300 TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(arg_subst, m) << std::endl;); 1301 1302 symbol arg_str; 1303 if (u.str.is_string(arg_subst, arg_str)) { 1304 zstring arg_zstr(arg_str.bare_str()); 1305 rational arg_value; 1306 if (string_integer_conversion_valid(arg_zstr, arg_value)) { 1307 if (ival != arg_value) { 1308 // contradiction 1309 expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); 1310 assert_axiom(cex); 1311 return l_undef; 1312 } 1313 } else { 1314 if (!ival.is_minus_one()) { 1315 expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_string(arg_zstr)), ctx.mk_eq_atom(e, mk_int(ival)))), m); 1316 assert_axiom(cex); 1317 return l_undef; 1318 } 1319 } 1320 } 1321 } 1322 } else if (u.str.is_itos(e, _arg)) { 1323 expr_ref arg(_arg, m); 1324 rational ival; 1325 if (v.get_value(arg, ival)) { 1326 expr_ref e_subst(e, m); 1327 (*replacer)(e, e_subst); 1328 rw(e_subst); 1329 TRACE("str_fl", tout << "ival = " << ival << ", string arg evaluates to " << mk_pp(e_subst, m) << std::endl;); 1330 1331 symbol e_str; 1332 if (u.str.is_string(e_subst, e_str)) { 1333 zstring e_zstr(e_str.bare_str()); 1334 // if arg is negative, e must be empty 1335 // if arg is non-negative, e must be valid AND cannot contain leading zeroes 1336 1337 if (ival.is_neg()) { 1338 if (!e_zstr.empty()) { 1339 // contradiction 1340 expr_ref cex(ctx.mk_eq_atom(m_autil.mk_le(arg, mk_int(-1)), ctx.mk_eq_atom(e, mk_string(""))), m); 1341 assert_axiom(cex); 1342 return l_undef; 1343 } 1344 } else { 1345 rational e_value; 1346 if (string_integer_conversion_valid(e_zstr, e_value)) { 1347 // e contains leading zeroes if its first character is 0 but converted to something other than 0 1348 if (e_zstr[0] == '0' && !e_value.is_zero()) { 1349 // contradiction 1350 expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m); 1351 assert_axiom(cex); 1352 return l_undef; 1353 } 1354 } else { 1355 // contradiction 1356 expr_ref cex(m.mk_not(m.mk_and(ctx.mk_eq_atom(arg, mk_int(ival)), ctx.mk_eq_atom(e, mk_string(e_zstr)))), m); 1357 assert_axiom(cex); 1358 return l_undef; 1359 } 1360 } 1361 } 1362 } 1363 } 1364 } 1365 } 1366 1367 // TODO insert length values into substitution table as well? 1368 if (m_params.m_FixedLengthRefinement) { 1369 scoped_ptr<expr_replacer> replacer = mk_default_expr_replacer(m, false); 1370 replacer->set_substitution(&subst); 1371 th_rewriter rw(m); 1372 if (!abstracted_boolean_formulas.empty()) { 1373 for (auto f : abstracted_boolean_formulas) { 1374 TRACE("str_fl", tout << "refinement of boolean formula: " << mk_pp(f, m) << std::endl;); 1375 expr_ref f_new(m); 1376 (*replacer)(f, f_new); 1377 rw(f_new); 1378 TRACE("str_fl", tout << "after substitution and simplification, evaluates to: " << mk_pp(f_new, m) << std::endl;); 1379 // now there are three cases, depending on what f_new evaluates to: 1380 // true -> OK, do nothing 1381 // false -> refine abstraction by generating conflict clause 1382 // anything else -> error, probably our substitution was incomplete 1383 if (m.is_true(f_new)) { 1384 // do nothing 1385 } else if (m.is_false(f_new)) { 1386 expr * needle = nullptr, *haystack = nullptr; 1387 if (u.str.is_contains(f, haystack, needle)) { 1388 expr_ref haystack_assignment(m); 1389 expr_ref needle_assignment(m); 1390 (*replacer)(haystack, haystack_assignment); 1391 (*replacer)(needle, needle_assignment); 1392 cex.push_back(f); 1393 cex.push_back(ctx.mk_eq_atom(haystack, haystack_assignment)); 1394 cex.push_back(ctx.mk_eq_atom(needle, needle_assignment)); 1395 return l_false; 1396 } else { 1397 TRACE("str_fl", tout << "error: unhandled refinement term " << mk_pp(f, m) << std::endl;); 1398 NOT_IMPLEMENTED_YET(); 1399 } 1400 } else { 1401 NOT_IMPLEMENTED_YET(); 1402 } 1403 } 1404 } 1405 } 1406 1407 return l_true; 1408 } else if (subproblem_status == l_false) { 1409 if (m_params.m_FixedLengthNaiveCounterexamples) { 1410 TRACE("str_fl", tout << "subsolver found UNSAT; constructing length counterexample" << std::endl;); 1411 for (auto e : fixed_length_used_len_terms) { 1412 expr * var = &e.get_key(); 1413 rational val = e.get_value(); 1414 cex.push_back(m.mk_eq(u.str.mk_length(var), mk_int(val))); 1415 } 1416 for (auto e : fixed_length_reduced_boolean_formulas) { 1417 cex.push_back(e); 1418 } 1419 return l_false; 1420 } else { 1421 TRACE("str_fl", tout << "subsolver found UNSAT; reconstructing unsat core" << std::endl;); 1422 TRACE("str_fl", tout << "unsat core has size " << subsolver.get_unsat_core_size() << std::endl;); 1423 bool negate_pre = false; 1424 for (unsigned i = 0; i < subsolver.get_unsat_core_size(); ++i) { 1425 TRACE("str", tout << "entry " << i << " = " << mk_pp(subsolver.get_unsat_core_expr(i), m) << std::endl;); 1426 rational index; 1427 expr* lhs; 1428 expr* rhs; 1429 TRACE("str_fl", tout << fixed_length_lesson.size() << std::endl;); 1430 std::tie(index, lhs, rhs) = fixed_length_lesson.find(subsolver.get_unsat_core_expr(i)); 1431 TRACE("str_fl", tout << "lesson: " << mk_pp(lhs, m) << " == " << mk_pp(rhs, m) << " at index " << index << std::endl;); 1432 cex.push_back(refine(lhs, rhs, index)); 1433 if (index < rational(0)) { 1434 negate_pre = true; 1435 } 1436 } 1437 if (negate_pre || subsolver.get_unsat_core_size() == 0){ 1438 for (auto ex : precondition) { 1439 cex.push_back(ex); 1440 } 1441 } 1442 return l_false; 1443 } 1444 } else { // l_undef 1445 TRACE("str_fl", tout << "WARNING: subsolver found UNKNOWN" << std::endl;); 1446 return l_undef; 1447 } 1448 } 1449 1450 }; // namespace smt 1451