1 /*********************                                                        */
2 /*! \file theory_strings_preprocess.cpp
3  ** \verbatim
4  ** Top contributors (to current version):
5  **   Andrew Reynolds, Tianyi Liang
6  ** This file is part of the CVC4 project.
7  ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8  ** in the top-level source directory) and their institutional affiliations.
9  ** All rights reserved.  See the file COPYING in the top-level source
10  ** directory for licensing information.\endverbatim
11  **
12  ** \brief Strings Preprocess
13  **
14  ** Strings Preprocess.
15  **/
16 
17 #include "theory/strings/theory_strings_preprocess.h"
18 
19 #include <stdint.h>
20 
21 #include "expr/kind.h"
22 #include "options/strings_options.h"
23 #include "proof/proof_manager.h"
24 #include "smt/logic_exception.h"
25 #include "theory/strings/theory_strings_rewriter.h"
26 
27 using namespace CVC4;
28 using namespace CVC4::kind;
29 
30 namespace CVC4 {
31 namespace theory {
32 namespace strings {
33 
StringsPreprocess(SkolemCache * sc,context::UserContext * u)34 StringsPreprocess::StringsPreprocess(SkolemCache *sc, context::UserContext *u)
35     : d_sc(sc)
36 {
37   //Constants
38   d_zero = NodeManager::currentNM()->mkConst(Rational(0));
39   d_one = NodeManager::currentNM()->mkConst(Rational(1));
40   d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1));
41   d_empty_str = NodeManager::currentNM()->mkConst(String(""));
42 }
43 
~StringsPreprocess()44 StringsPreprocess::~StringsPreprocess(){
45 
46 }
47 
simplify(Node t,std::vector<Node> & new_nodes)48 Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
49   unsigned prev_new_nodes = new_nodes.size();
50   Trace("strings-preprocess-debug") << "StringsPreprocess::simplify: " << t << std::endl;
51   Node retNode = t;
52   NodeManager *nm = NodeManager::currentNM();
53 
54   if( t.getKind() == kind::STRING_SUBSTR ) {
55     // processing term:  substr( s, n, m )
56     Node s = t[0];
57     Node n = t[1];
58     Node m = t[2];
59     Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
60     Node t12 = nm->mkNode(PLUS, n, m);
61     t12 = Rewriter::rewrite(t12);
62     Node lt0 = nm->mkNode(STRING_LENGTH, s);
63     //start point is greater than or equal zero
64     Node c1 = nm->mkNode(GEQ, n, d_zero);
65     //start point is less than end of string
66     Node c2 = nm->mkNode(GT, lt0, n);
67     //length is positive
68     Node c3 = nm->mkNode(GT, m, d_zero);
69     Node cond = nm->mkNode(AND, c1, c2, c3);
70 
71     Node sk1 = n == d_zero ? d_empty_str
72                            : d_sc->mkSkolemCached(
73                                  s, n, SkolemCache::SK_PREFIX, "sspre");
74     Node sk2 = TheoryStringsRewriter::checkEntailArith(t12, lt0)
75                    ? d_empty_str
76                    : d_sc->mkSkolemCached(
77                          s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
78     Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2));
79     //length of first skolem is second argument
80     Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
81     //length of second skolem is abs difference between end point and end of string
82     Node b13 = nm->mkNode(STRING_LENGTH, sk2)
83                    .eqNode(nm->mkNode(ITE,
84                                       nm->mkNode(GEQ, lt0, t12),
85                                       nm->mkNode(MINUS, lt0, t12),
86                                       d_zero));
87 
88     Node b1 = nm->mkNode(AND, b11, b12, b13);
89     Node b2 = skt.eqNode(d_empty_str);
90     Node lemma = nm->mkNode(ITE, cond, b1, b2);
91 
92     // assert:
93     // IF    n >=0 AND n < len( s ) AND m > 0
94     // THEN: s = sk1 ++ skt ++ sk2 AND
95     //       len( sk1 ) = n AND
96     //       len( sk2 ) = ite( len( s ) >= n+m, len( s )-(n+m), 0 )
97     // ELSE: skt = ""
98     new_nodes.push_back( lemma );
99 
100     // Thus, substr( s, n, m ) = skt
101     retNode = skt;
102   }
103   else if (t.getKind() == kind::STRING_STRIDOF)
104   {
105     // processing term:  indexof( x, y, n )
106     Node x = t[0];
107     Node y = t[1];
108     Node n = t[2];
109     Node skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof");
110 
111     Node negone = nm->mkConst(Rational(-1));
112     Node krange = nm->mkNode(GEQ, skk, negone);
113     // assert:   indexof( x, y, n ) >= -1
114     new_nodes.push_back( krange );
115     krange = nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, x), skk);
116     // assert:   len( x ) >= indexof( x, y, z )
117     new_nodes.push_back( krange );
118 
119     // substr( x, n, len( x ) - n )
120     Node st = nm->mkNode(STRING_SUBSTR,
121                          x,
122                          n,
123                          nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n));
124     Node io2 =
125         d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre");
126     Node io4 =
127         d_sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost");
128 
129     // ~contains( substr( x, n, len( x ) - n ), y )
130     Node c11 = nm->mkNode(STRING_STRCTN, st, y).negate();
131     // n > len( x )
132     Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x));
133     // 0 > n
134     Node c13 = nm->mkNode(GT, d_zero, n);
135     Node cond1 = nm->mkNode(OR, c11, c12, c13);
136     // skk = -1
137     Node cc1 = skk.eqNode(negone);
138 
139     // y = ""
140     Node cond2 = y.eqNode(d_empty_str);
141     // skk = n
142     Node cc2 = skk.eqNode(t[2]);
143 
144     // substr( x, n, len( x ) - n ) = str.++( io2, y, io4 )
145     Node c31 = st.eqNode(nm->mkNode(STRING_CONCAT, io2, y, io4));
146     // ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y )
147     Node c32 =
148         nm->mkNode(
149               STRING_STRCTN,
150               nm->mkNode(
151                   STRING_CONCAT,
152                   io2,
153                   nm->mkNode(
154                       STRING_SUBSTR,
155                       y,
156                       d_zero,
157                       nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), d_one))),
158               y)
159             .negate();
160     // skk = n + len( io2 )
161     Node c33 = skk.eqNode(nm->mkNode(PLUS, n, nm->mkNode(STRING_LENGTH, io2)));
162     Node cc3 = nm->mkNode(AND, c31, c32, c33);
163 
164     // assert:
165     // IF:   ~contains( substr( x, n, len( x ) - n ), y ) OR n > len(x) OR 0 > n
166     // THEN: skk = -1
167     // ELIF: y = ""
168     // THEN: skk = n
169     // ELSE: substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) ^
170     //       ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) ^
171     //       skk = n + len( io2 )
172     // for fresh io2, io4.
173     Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3));
174     new_nodes.push_back( rr );
175 
176     // Thus, indexof( x, y, n ) = skk.
177     retNode = skk;
178   }
179   else if (t.getKind() == STRING_ITOS)
180   {
181     // processing term:  int.to.str( n )
182     Node n = t[0];
183     Node itost = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost");
184     Node leni = nm->mkNode(STRING_LENGTH, itost);
185 
186     std::vector<Node> conc;
187     std::vector< TypeNode > argTypes;
188     argTypes.push_back(nm->integerType());
189     Node u = nm->mkSkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
190     Node us =
191         nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
192     Node ud =
193         nm->mkSkolem("Ud", nm->mkFunctionType(argTypes, nm->stringType()));
194 
195     Node lem = nm->mkNode(GEQ, leni, d_one);
196     conc.push_back(lem);
197 
198     lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni));
199     conc.push_back(lem);
200 
201     lem = d_zero.eqNode(nm->mkNode(APPLY_UF, u, d_zero));
202     conc.push_back(lem);
203 
204     lem = d_empty_str.eqNode(nm->mkNode(APPLY_UF, us, leni));
205     conc.push_back(lem);
206 
207     lem = itost.eqNode(nm->mkNode(APPLY_UF, us, d_zero));
208     conc.push_back(lem);
209 
210     Node x = nm->mkBoundVar(nm->integerType());
211     Node xPlusOne = nm->mkNode(PLUS, x, d_one);
212     Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
213     Node g =
214         nm->mkNode(AND, nm->mkNode(GEQ, x, d_zero), nm->mkNode(LT, x, leni));
215     Node udx = nm->mkNode(APPLY_UF, ud, x);
216     Node ux = nm->mkNode(APPLY_UF, u, x);
217     Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne);
218     Node c0 = nm->mkNode(STRING_CODE, nm->mkConst(String("0")));
219     Node c = nm->mkNode(MINUS, nm->mkNode(STRING_CODE, udx), c0);
220     Node usx = nm->mkNode(APPLY_UF, us, x);
221     Node usx1 = nm->mkNode(APPLY_UF, us, xPlusOne);
222 
223     Node ten = nm->mkConst(Rational(10));
224     Node eqs = usx.eqNode(nm->mkNode(STRING_CONCAT, udx, usx1));
225     Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
226     Node leadingZeroPos =
227         nm->mkNode(AND, x.eqNode(d_zero), nm->mkNode(GT, leni, d_one));
228     Node cb = nm->mkNode(
229         AND,
230         nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, d_one, d_zero)),
231         nm->mkNode(LT, c, ten));
232 
233     lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eqs, eq, cb));
234     lem = nm->mkNode(FORALL, xbv, lem);
235     conc.push_back(lem);
236 
237     Node nonneg = nm->mkNode(GEQ, n, d_zero);
238 
239     lem = nm->mkNode(
240         ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(d_empty_str));
241     new_nodes.push_back(lem);
242     // assert:
243     // IF n>=0
244     // THEN:
245     //   len( itost ) >= 1 ^
246     //   n = U( len( itost ) ) ^ U( 0 ) = 0 ^
247     //   "" = Us( len( itost ) ) ^ itost = Us( 0 ) ^
248     //   forall x. (x>=0 ^ x < str.len(itost)) =>
249     //     Us( x ) = Ud( x ) ++ Us( x+1 ) ^
250     //     U( x+1 ) = (str.code( Ud( x ) )-48) + 10*U( x ) ^
251     //     ite( x=0 AND str.len(itost)>1, 49, 48 ) <= str.code( Ud( x ) ) < 58
252     // ELSE
253     //   itost = ""
254     // thus:
255     //   int.to.str( n ) = itost
256 
257     // In the above encoding, we use Us/Ud to introduce a chain of strings
258     // that allow us to refer to each character substring of itost. Notice this
259     // is more efficient than using str.substr( itost, x, 1 ).
260     // The function U is an accumulator, where U( x ) for x>0 is the value of
261     // str.to.int( str.substr( int.to.str( n ), 0, x ) ). For example, for
262     // n=345, we have that U(1), U(2), U(3) = 3, 34, 345.
263     // Above, we use str.code to map characters to their integer value, where
264     // note that str.code( "0" ) = 48. Further notice that
265     //   ite( x=0 AND str.len(itost)>1, 49, 48 )
266     // enforces that int.to.str( n ) has no leading zeroes.
267     retNode = itost;
268   } else if( t.getKind() == kind::STRING_STOI ) {
269     Node s = t[0];
270     Node stoit = nm->mkSkolem("stoit", nm->integerType(), "created for stoi");
271     Node lens = nm->mkNode(STRING_LENGTH, s);
272 
273     std::vector<Node> conc1;
274     Node lem = stoit.eqNode(d_neg_one);
275     conc1.push_back(lem);
276 
277     Node sEmpty = s.eqNode(d_empty_str);
278     Node k = nm->mkSkolem("k", nm->integerType());
279     Node kc1 = nm->mkNode(GEQ, k, d_zero);
280     Node kc2 = nm->mkNode(LT, k, lens);
281     Node c0 = nm->mkNode(STRING_CODE, nm->mkConst(String("0")));
282     Node codeSk = nm->mkNode(
283         MINUS,
284         nm->mkNode(STRING_CODE, nm->mkNode(STRING_SUBSTR, s, k, d_one)),
285         c0);
286     Node ten = nm->mkConst(Rational(10));
287     Node kc3 = nm->mkNode(
288         OR, nm->mkNode(LT, codeSk, d_zero), nm->mkNode(GEQ, codeSk, ten));
289     conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3)));
290 
291     std::vector<Node> conc2;
292     std::vector< TypeNode > argTypes;
293     argTypes.push_back(nm->integerType());
294     Node u = nm->mkSkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
295     Node us =
296         nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
297     Node ud =
298         nm->mkSkolem("Ud", nm->mkFunctionType(argTypes, nm->stringType()));
299 
300     lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens));
301     conc2.push_back(lem);
302 
303     lem = d_zero.eqNode(nm->mkNode(APPLY_UF, u, d_zero));
304     conc2.push_back(lem);
305 
306     lem = d_empty_str.eqNode(nm->mkNode(APPLY_UF, us, lens));
307     conc2.push_back(lem);
308 
309     lem = s.eqNode(nm->mkNode(APPLY_UF, us, d_zero));
310     conc2.push_back(lem);
311 
312     Node x = nm->mkBoundVar(nm->integerType());
313     Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
314     Node g =
315         nm->mkNode(AND, nm->mkNode(GEQ, x, d_zero), nm->mkNode(LT, x, lens));
316     Node udx = nm->mkNode(APPLY_UF, ud, x);
317     Node ux = nm->mkNode(APPLY_UF, u, x);
318     Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, d_one));
319     Node c = nm->mkNode(MINUS, nm->mkNode(STRING_CODE, udx), c0);
320     Node usx = nm->mkNode(APPLY_UF, us, x);
321     Node usx1 = nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, x, d_one));
322 
323     Node eqs = usx.eqNode(nm->mkNode(STRING_CONCAT, udx, usx1));
324     Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
325     Node cb =
326         nm->mkNode(AND, nm->mkNode(GEQ, c, d_zero), nm->mkNode(LT, c, ten));
327 
328     lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eqs, eq, cb));
329     lem = nm->mkNode(FORALL, xbv, lem);
330     conc2.push_back(lem);
331 
332     Node sneg = nm->mkNode(LT, stoit, d_zero);
333     lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2));
334     new_nodes.push_back(lem);
335 
336     // assert:
337     // IF stoit < 0
338     // THEN:
339     //   stoit = -1 ^
340     //   ( s = "" OR
341     //     ( k>=0 ^ k<len( s ) ^ ( str.code( str.substr( s, k, 1 ) ) < 48 OR
342     //                             str.code( str.substr( s, k, 1 ) ) >= 58 )))
343     // ELSE:
344     //   stoit = U( len( s ) ) ^ U( 0 ) = 0 ^
345     //   "" = Us( len( s ) ) ^ s = Us( 0 ) ^
346     //   forall x. (x>=0 ^ x < str.len(s)) =>
347     //     Us( x ) = Ud( x ) ++ Us( x+1 ) ^
348     //     U( x+1 ) = ( str.code( Ud( x ) ) - 48 ) + 10*U( x )
349     //     48 <= str.code( Ud( x ) ) < 58
350     // Thus, str.to.int( s ) = stoit
351 
352     retNode = stoit;
353   }
354   else if (t.getKind() == kind::STRING_STRREPL)
355   {
356     // processing term: replace( x, y, z )
357     Node x = t[0];
358     Node y = t[1];
359     Node z = t[2];
360     TypeNode tn = t[0].getType();
361     Node rp1 =
362         d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre");
363     Node rp2 =
364         d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost");
365     Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw");
366 
367     // y = ""
368     Node cond1 = y.eqNode(nm->mkConst(CVC4::String("")));
369     // rpw = str.++( z, x )
370     Node c1 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, z, x));
371 
372     // contains( x, y )
373     Node cond2 = nm->mkNode(kind::STRING_STRCTN, x, y);
374     // x = str.++( rp1, y, rp2 )
375     Node c21 = x.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, y, rp2));
376     // rpw = str.++( rp1, z, rp2 )
377     Node c22 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, z, rp2));
378     // ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y )
379     Node c23 =
380         nm->mkNode(kind::STRING_STRCTN,
381                    nm->mkNode(
382                        kind::STRING_CONCAT,
383                        rp1,
384                        nm->mkNode(kind::STRING_SUBSTR,
385                                   y,
386                                   d_zero,
387                                   nm->mkNode(kind::MINUS,
388                                              nm->mkNode(kind::STRING_LENGTH, y),
389                                              d_one))),
390                    y)
391             .negate();
392 
393     // assert:
394     //   IF    y=""
395     //   THEN: rpw = str.++( z, x )
396     //   ELIF: contains( x, y )
397     //   THEN: x = str.++( rp1, y, rp2 ) ^
398     //         rpw = str.++( rp1, z, rp2 ) ^
399     //         ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ),
400     //   ELSE: rpw = x
401     // for fresh rp1, rp2, rpw
402     Node rr = nm->mkNode(kind::ITE,
403                          cond1,
404                          c1,
405                          nm->mkNode(kind::ITE,
406                                     cond2,
407                                     nm->mkNode(kind::AND, c21, c22, c23),
408                                     rpw.eqNode(x)));
409     new_nodes.push_back( rr );
410 
411     // Thus, replace( x, y, z ) = rpw.
412     retNode = rpw;
413   }
414   else if (t.getKind() == kind::STRING_STRREPLALL)
415   {
416     // processing term: replaceall( x, y, z )
417     Node x = t[0];
418     Node y = t[1];
419     Node z = t[2];
420     Node rpaw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw");
421 
422     Node numOcc = d_sc->mkTypedSkolemCached(
423         nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
424     std::vector<TypeNode> argTypes;
425     argTypes.push_back(nm->integerType());
426     Node us =
427         nm->mkSkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
428     TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
429     Node uf = d_sc->mkTypedSkolemCached(
430         ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
431 
432     Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
433     Node usno = nm->mkNode(APPLY_UF, us, numOcc);
434     Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
435 
436     std::vector<Node> lem;
437     lem.push_back(nm->mkNode(GEQ, numOcc, d_zero));
438     lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, d_zero)));
439     lem.push_back(usno.eqNode(rem));
440     lem.push_back(nm->mkNode(APPLY_UF, uf, d_zero).eqNode(d_zero));
441     lem.push_back(nm->mkNode(STRING_STRIDOF, x, y, ufno).eqNode(d_neg_one));
442 
443     Node i = nm->mkBoundVar(nm->integerType());
444     Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
445     Node bound =
446         nm->mkNode(AND, nm->mkNode(GEQ, i, d_zero), nm->mkNode(LT, i, numOcc));
447     Node ufi = nm->mkNode(APPLY_UF, uf, i);
448     Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, d_one));
449     Node ii = nm->mkNode(STRING_STRIDOF, x, y, ufi);
450     Node cc = nm->mkNode(
451         STRING_CONCAT,
452         nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)),
453         z,
454         nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, d_one)));
455 
456     std::vector<Node> flem;
457     flem.push_back(ii.eqNode(d_neg_one).negate());
458     flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc));
459     flem.push_back(
460         ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y))));
461 
462     Node q = nm->mkNode(
463         FORALL, bvli, nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)));
464     lem.push_back(q);
465 
466     // assert:
467     //   IF y=""
468     //   THEN: rpaw = x
469     //   ELSE:
470     //     numOcc >= 0 ^
471     //     rpaw = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc), len(x)) ^
472     //     Uf(0) = 0 ^ indexof( x, y, Uf(numOcc) ) = -1 ^
473     //     forall i. 0 <= i < numOcc =>
474     //        ii != -1 ^
475     //        Us( i ) = str.substr( x, Uf(i), ii - Uf(i) ) ++ z ++ Us(i+1) ^
476     //        Uf( i+1 ) = ii + len(y)
477     //        where ii == indexof( x, y, Uf(i) )
478 
479     // Conceptually, numOcc is the number of occurrences of y in x, Uf( i ) is
480     // the index to begin searching in x for y after the i^th occurrence of y in
481     // x, and Us( i ) is the result of processing the remainder after processing
482     // the i^th occurrence of y in x.
483     Node assert = nm->mkNode(
484         ITE, y.eqNode(d_empty_str), rpaw.eqNode(x), nm->mkNode(AND, lem));
485     new_nodes.push_back(assert);
486 
487     // Thus, replaceall( x, y, z ) = rpaw
488     retNode = rpaw;
489   } else if( t.getKind() == kind::STRING_STRCTN ){
490     Node x = t[0];
491     Node s = t[1];
492     //negative contains reduces to existential
493     Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x);
494     Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
495     Node b1 = NodeManager::currentNM()->mkBoundVar(NodeManager::currentNM()->integerType());
496     Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
497     Node body = NodeManager::currentNM()->mkNode( kind::AND,
498                   NodeManager::currentNM()->mkNode( kind::LEQ, d_zero, b1 ),
499                   NodeManager::currentNM()->mkNode( kind::LEQ, b1, NodeManager::currentNM()->mkNode( kind::MINUS, lenx, lens ) ),
500                   NodeManager::currentNM()->mkNode( kind::EQUAL, NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens), s )
501                 );
502     retNode = NodeManager::currentNM()->mkNode( kind::EXISTS, b1v, body );
503   }
504   else if (t.getKind() == kind::STRING_LEQ)
505   {
506     Node ltp = nm->mkSkolem("ltp", nm->booleanType());
507     Node k = nm->mkSkolem("k", nm->integerType());
508 
509     std::vector<Node> conj;
510     conj.push_back(nm->mkNode(GEQ, k, d_zero));
511     Node substr[2];
512     Node code[2];
513     for (unsigned r = 0; r < 2; r++)
514     {
515       Node ta = t[r];
516       Node tb = t[1 - r];
517       substr[r] = nm->mkNode(STRING_SUBSTR, ta, d_zero, k);
518       code[r] =
519           nm->mkNode(STRING_CODE, nm->mkNode(STRING_SUBSTR, ta, k, d_one));
520       conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta)));
521     }
522     conj.push_back(substr[0].eqNode(substr[1]));
523     std::vector<Node> ite_ch;
524     ite_ch.push_back(ltp);
525     for (unsigned r = 0; r < 2; r++)
526     {
527       ite_ch.push_back(nm->mkNode(LT, code[r], code[1 - r]));
528     }
529     conj.push_back(nm->mkNode(ITE, ite_ch));
530 
531     // Intuitively, the reduction says either x and y are equal, or they have
532     // some (maximal) common prefix after which their characters at position k
533     // are distinct, and the comparison of their code matches the return value
534     // of the overall term.
535     // Notice the below reduction relies on the semantics of str.code being -1
536     // for the empty string. In particular, say x = "a" and y = "ab", then the
537     // reduction below is satisfied for k = 1, since
538     //   str.code(substr( "a", 1, 1 )) = str.code( "" ) = -1 <
539     //   str.code(substr( "ab", 1, 1 )) = str.code( "b" ) = 66
540 
541     // assert:
542     //  IF x=y
543     //  THEN: ltp
544     //  ELSE: k >= 0 AND k <= len( x ) AND k <= len( y ) AND
545     //        substr( x, 0, k ) = substr( y, 0, k ) AND
546     //        IF    ltp
547     //        THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 ))
548     //        ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 ))
549     Node assert =
550         nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, nm->mkNode(AND, conj));
551     new_nodes.push_back(assert);
552 
553     // Thus, str.<=( x, y ) = ltp
554     retNode = ltp;
555   }
556 
557   if( t!=retNode ){
558     Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl;
559     if(!new_nodes.empty()) {
560       Trace("strings-preprocess") << " ... new nodes (" << (new_nodes.size()-prev_new_nodes) << "):" << std::endl;
561       for(unsigned int i=prev_new_nodes; i<new_nodes.size(); ++i) {
562         Trace("strings-preprocess") << "   " << new_nodes[i] << std::endl;
563       }
564     }
565   }
566   return retNode;
567 }
568 
simplifyRec(Node t,std::vector<Node> & new_nodes,std::map<Node,Node> & visited)569 Node StringsPreprocess::simplifyRec( Node t, std::vector< Node > & new_nodes, std::map< Node, Node >& visited ){
570   std::map< Node, Node >::iterator it = visited.find(t);
571   if( it!=visited.end() ){
572     return it->second;
573   }else{
574     Node retNode;
575     if( t.getNumChildren()==0 ){
576       retNode = simplify( t, new_nodes );
577     }else if( t.getKind()!=kind::FORALL ){
578       bool changed = false;
579       std::vector< Node > cc;
580       if( t.getMetaKind() == kind::metakind::PARAMETERIZED ){
581         cc.push_back( t.getOperator() );
582       }
583       for(unsigned i=0; i<t.getNumChildren(); i++) {
584         Node s = simplifyRec( t[i], new_nodes, visited );
585         cc.push_back( s );
586         if( s!=t[i] ) {
587           changed = true;
588         }
589       }
590       Node tmp = t;
591       if( changed ){
592         tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc );
593       }
594       retNode = simplify( tmp, new_nodes );
595     }
596     visited[t] = retNode;
597     return retNode;
598   }
599 }
600 
processAssertion(Node n,std::vector<Node> & new_nodes)601 Node StringsPreprocess::processAssertion( Node n, std::vector< Node > &new_nodes ) {
602   std::map< Node, Node > visited;
603   std::vector< Node > new_nodes_curr;
604   Node ret = simplifyRec( n, new_nodes_curr, visited );
605   while( !new_nodes_curr.empty() ){
606     Node curr = new_nodes_curr.back();
607     new_nodes_curr.pop_back();
608     std::vector< Node > new_nodes_tmp;
609     curr = simplifyRec( curr, new_nodes_tmp, visited );
610     new_nodes_curr.insert( new_nodes_curr.end(), new_nodes_tmp.begin(), new_nodes_tmp.end() );
611     new_nodes.push_back( curr );
612   }
613   return ret;
614 }
615 
processAssertions(std::vector<Node> & vec_node)616 void StringsPreprocess::processAssertions( std::vector< Node > &vec_node ){
617   std::map< Node, Node > visited;
618   for( unsigned i=0; i<vec_node.size(); i++ ){
619     Trace("strings-preprocess-debug") << "Preprocessing assertion " << vec_node[i] << std::endl;
620     //preprocess until fixed point
621     std::vector< Node > new_nodes;
622     std::vector< Node > new_nodes_curr;
623     new_nodes_curr.push_back( vec_node[i] );
624     while( !new_nodes_curr.empty() ){
625       Node curr = new_nodes_curr.back();
626       new_nodes_curr.pop_back();
627       std::vector< Node > new_nodes_tmp;
628       curr = simplifyRec( curr, new_nodes_tmp, visited );
629       new_nodes_curr.insert( new_nodes_curr.end(), new_nodes_tmp.begin(), new_nodes_tmp.end() );
630       new_nodes.push_back( curr );
631     }
632     Node res = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes );
633     if( res!=vec_node[i] ){
634       res = Rewriter::rewrite( res );
635       PROOF( ProofManager::currentPM()->addDependence( res, vec_node[i] ); );
636       vec_node[i] = res;
637     }
638   }
639 }
640 
641 }/* CVC4::theory::strings namespace */
642 }/* CVC4::theory namespace */
643 }/* CVC4 namespace */
644