1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the test suite of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 or version 3 as published by the Free
20 ** Software Foundation and appearing in the file LICENSE.LGPLv21 and
21 ** LICENSE.LGPLv3 included in the packaging of this file. Please review the
22 ** following information to ensure the GNU Lesser General Public License
23 ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
24 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 **
26 ** As a special exception, The Qt Company gives you certain additional
27 ** rights. These rights are described in The Qt Company LGPL Exception
28 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 **
30 ** GNU General Public License Usage
31 ** Alternatively, this file may be used under the terms of the GNU
32 ** General Public License version 3.0 as published by the Free Software
33 ** Foundation and appearing in the file LICENSE.GPL included in the
34 ** packaging of this file.  Please review the following information to
35 ** ensure the GNU General Public License version 3.0 requirements will be
36 ** met: http://www.gnu.org/copyleft/gpl.html.
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include <QDebug>
43 #include <QRegExp>
44 #include <QString>
45 #include <QFile>
46 
47 #include <qtest.h>
48 #ifdef HAVE_BOOST
49 #include <boost/regex.hpp>
50 #endif
51 
52 #include <QtScript>
53 #include "pcre/pcre.h"
54 
55 #define ZLIB_VERSION "1.2.3.4"
56 
57 class tst_qregexp : public QObject
58 {
59     Q_OBJECT
60 public:
61     tst_qregexp();
62 private slots:
63     void escape_old();
escape_old_data()64     void escape_old_data() { escape_data(); }
65     void escape_new1();
escape_new1_data()66     void escape_new1_data() { escape_data(); }
67     void escape_new2();
escape_new2_data()68     void escape_new2_data() { escape_data(); }
69     void escape_new3();
escape_new3_data()70     void escape_new3_data() { escape_data(); }
71     void escape_new4();
escape_new4_data()72     void escape_new4_data() { escape_data(); }
73 /*
74    JSC outperforms everything.
75    Boost is less impressive then expected.
76  */
77     void simpleFind1();
78     void rangeReplace1();
79     void matchReplace1();
80 
81     void simpleFind2();
82     void rangeReplace2();
83     void matchReplace2();
84 
85     void simpleFindJSC();
86     void rangeReplaceJSC();
87     void matchReplaceJSC();
88 
89 #ifdef HAVE_BOOST
90     void simpleFindBoost();
91     void rangeReplaceBoost();
92     void matchReplaceBoost();
93 #endif
94 
95 /* those apply an (incorrect) regexp on entire source
96    (this main.cpp). JSC appears to handle this
97    (ab)use case best. QRegExp performs extremly bad.
98  */
99     void horribleWrongReplace1();
100     void horribleReplace1();
101     void horribleReplace2();
102     void horribleWrongReplace2();
103     void horribleWrongReplaceJSC();
104     void horribleReplaceJSC();
105 #ifdef HAVE_BOOST
106     void horribleWrongReplaceBoost();
107     void horribleReplaceBoost();
108 #endif
109 private:
110     QString str1;
111     QString str2;
112     void escape_data();
113 };
114 
tst_qregexp()115 tst_qregexp::tst_qregexp()
116     :QObject()
117     ,str1("We are all happy monkeys")
118 {
119         QFile f(":/main.cpp");
120         f.open(QFile::ReadOnly);
121         str2=f.readAll();
122 }
123 
verify(const QString & quoted,const QString & expected)124 static void verify(const QString &quoted, const QString &expected)
125 {
126     if (quoted != expected)
127         qDebug() << "ERROR:" << quoted << expected;
128 }
129 
escape_data()130 void tst_qregexp::escape_data()
131 {
132     QTest::addColumn<QString>("pattern");
133     QTest::addColumn<QString>("expected");
134 
135     QTest::newRow("escape 0") << "Hello world" << "Hello world";
136     QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)";
137     {
138         QString s;
139         for (int i = 0; i < 10; ++i)
140             s += "(escape)";
141         QTest::newRow("escape 10") << s << QRegExp::escape(s);
142     }
143     {
144         QString s;
145         for (int i = 0; i < 100; ++i)
146             s += "(escape)";
147         QTest::newRow("escape 100") << s << QRegExp::escape(s);
148     }
149 }
150 
escape_old()151 void tst_qregexp::escape_old()
152 {
153     QFETCH(QString, pattern);
154     QFETCH(QString, expected);
155 
156     QBENCHMARK {
157         static const char meta[] = "$()*+.?[\\]^{|}";
158         QString quoted = pattern;
159         int i = 0;
160 
161         while (i < quoted.length()) {
162             if (strchr(meta, quoted.at(i).toLatin1()) != 0)
163                 quoted.insert(i++, QLatin1Char('\\'));
164             ++i;
165         }
166 
167         verify(quoted, expected);
168     }
169 }
170 
escape_new1()171 void tst_qregexp::escape_new1()
172 {
173     QFETCH(QString, pattern);
174     QFETCH(QString, expected);
175 
176     QBENCHMARK {
177         QString quoted;
178         const int count = pattern.count();
179         quoted.reserve(count * 2);
180         const QLatin1Char backslash('\\');
181         for (int i = 0; i < count; i++) {
182             switch (pattern.at(i).toLatin1()) {
183             case '$':
184             case '(':
185             case ')':
186             case '*':
187             case '+':
188             case '.':
189             case '?':
190             case '[':
191             case '\\':
192             case ']':
193             case '^':
194             case '{':
195             case '|':
196             case '}':
197                 quoted.append(backslash);
198             }
199             quoted.append(pattern.at(i));
200         }
201         verify(quoted, expected);
202     }
203 }
204 
escape_new2()205 void tst_qregexp::escape_new2()
206 {
207     QFETCH(QString, pattern);
208     QFETCH(QString, expected);
209 
210     QBENCHMARK {
211         int count = pattern.count();
212         const QLatin1Char backslash('\\');
213         QString quoted(count * 2, backslash);
214         const QChar *patternData = pattern.data();
215         QChar *quotedData = quoted.data();
216         int escaped = 0;
217         for ( ; --count >= 0; ++patternData) {
218             const QChar c = *patternData;
219             switch (c.unicode()) {
220             case '$':
221             case '(':
222             case ')':
223             case '*':
224             case '+':
225             case '.':
226             case '?':
227             case '[':
228             case '\\':
229             case ']':
230             case '^':
231             case '{':
232             case '|':
233             case '}':
234                 ++escaped;
235                 ++quotedData;
236             }
237             *quotedData = c;
238             ++quotedData;
239         }
240         quoted.resize(pattern.size() + escaped);
241 
242         verify(quoted, expected);
243     }
244 }
245 
escape_new3()246 void tst_qregexp::escape_new3()
247 {
248     QFETCH(QString, pattern);
249     QFETCH(QString, expected);
250 
251     QBENCHMARK {
252         QString quoted;
253         const int count = pattern.count();
254         quoted.reserve(count * 2);
255         const QLatin1Char backslash('\\');
256         for (int i = 0; i < count; i++) {
257             switch (pattern.at(i).toLatin1()) {
258             case '$':
259             case '(':
260             case ')':
261             case '*':
262             case '+':
263             case '.':
264             case '?':
265             case '[':
266             case '\\':
267             case ']':
268             case '^':
269             case '{':
270             case '|':
271             case '}':
272                 quoted += backslash;
273             }
274             quoted += pattern.at(i);
275         }
276 
277         verify(quoted, expected);
278     }
279 }
280 
281 
needsEscaping(int c)282 static inline bool needsEscaping(int c)
283 {
284     switch (c) {
285     case '$':
286     case '(':
287     case ')':
288     case '*':
289     case '+':
290     case '.':
291     case '?':
292     case '[':
293     case '\\':
294     case ']':
295     case '^':
296     case '{':
297     case '|':
298     case '}':
299         return true;
300     }
301     return false;
302 }
303 
escape_new4()304 void tst_qregexp::escape_new4()
305 {
306     QFETCH(QString, pattern);
307     QFETCH(QString, expected);
308 
309     QBENCHMARK {
310         const int n = pattern.size();
311         const QChar *patternData = pattern.data();
312         // try to prevent copy if no escape is needed
313         int i = 0;
314         for (int i = 0; i != n; ++i) {
315             const QChar c = patternData[i];
316             if (needsEscaping(c.unicode()))
317                 break;
318         }
319         if (i == n) {
320             verify(pattern, expected);
321             // no escaping needed, "return pattern" should be done here.
322             return;
323         }
324         const QLatin1Char backslash('\\');
325         QString quoted(n * 2, backslash);
326         QChar *quotedData = quoted.data();
327         for (int j = 0; j != i; ++j)
328             *quotedData++ = *patternData++;
329         int escaped = 0;
330         for (; i != n; ++i) {
331             const QChar c = *patternData;
332             if (needsEscaping(c.unicode())) {
333                 ++escaped;
334                 ++quotedData;
335             }
336             *quotedData = c;
337             ++quotedData;
338             ++patternData;
339         }
340         quoted.resize(n + escaped);
341         verify(quoted, expected);
342         // "return quoted"
343     }
344 }
345 
346 
simpleFind1()347 void tst_qregexp::simpleFind1()
348 {
349     int roff;
350     QRegExp rx("happy");
351     rx.setPatternSyntax(QRegExp::RegExp);
352     QBENCHMARK{
353         roff = rx.indexIn(str1);
354     }
355     QCOMPARE(roff, 11);
356 }
357 
rangeReplace1()358 void tst_qregexp::rangeReplace1()
359 {
360     QString r;
361     QRegExp rx("[a-f]");
362     rx.setPatternSyntax(QRegExp::RegExp);
363     QBENCHMARK{
364         r = QString(str1).replace(rx, "-");
365     }
366     QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
367 }
368 
matchReplace1()369 void tst_qregexp::matchReplace1()
370 {
371     QString r;
372     QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
373     rx.setPatternSyntax(QRegExp::RegExp);
374     QBENCHMARK{
375         r = QString(str1).replace(rx, "\\1");
376     }
377     QCOMPARE(r, QString("eaeaae"));
378 }
379 
horribleWrongReplace1()380 void tst_qregexp::horribleWrongReplace1()
381 {
382     QString r;
383     QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
384     rx.setPatternSyntax(QRegExp::RegExp);
385     QBENCHMARK{
386         r = QString(str2).replace(rx, "\\1.\\2.\\3");
387     }
388     QCOMPARE(r, str2);
389 }
390 
horribleReplace1()391 void tst_qregexp::horribleReplace1()
392 {
393     QString r;
394     QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
395     rx.setPatternSyntax(QRegExp::RegExp);
396     QBENCHMARK{
397         r = QString(str2).replace(rx, "\\1.\\2.\\3");
398     }
399     QCOMPARE(r, QString("1.2.3"));
400 }
401 
402 
simpleFind2()403 void tst_qregexp::simpleFind2()
404 {
405     int roff;
406     QRegExp rx("happy");
407     rx.setPatternSyntax(QRegExp::RegExp2);
408     QBENCHMARK{
409         roff = rx.indexIn(str1);
410     }
411     QCOMPARE(roff, 11);
412 }
413 
rangeReplace2()414 void tst_qregexp::rangeReplace2()
415 {
416     QString r;
417     QRegExp rx("[a-f]");
418     rx.setPatternSyntax(QRegExp::RegExp2);
419     QBENCHMARK{
420         r = QString(str1).replace(rx, "-");
421     }
422     QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
423 }
424 
matchReplace2()425 void tst_qregexp::matchReplace2()
426 {
427     QString r;
428     QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
429     rx.setPatternSyntax(QRegExp::RegExp2);
430     QBENCHMARK{
431         r = QString(str1).replace(rx, "\\1");
432     }
433     QCOMPARE(r, QString("eaeaae"));
434 }
435 
horribleWrongReplace2()436 void tst_qregexp::horribleWrongReplace2()
437 {
438     QString r;
439     QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
440     rx.setPatternSyntax(QRegExp::RegExp2);
441     QBENCHMARK{
442         r = QString(str2).replace(rx, "\\1.\\2.\\3");
443     }
444     QCOMPARE(r, str2);
445 }
446 
horribleReplace2()447 void tst_qregexp::horribleReplace2()
448 {
449     QString r;
450     QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
451     rx.setPatternSyntax(QRegExp::RegExp2);
452     QBENCHMARK{
453         r = QString(str2).replace(rx, "\\1.\\2.\\3");
454     }
455     QCOMPARE(r, QString("1.2.3"));
456 }
457 
458 
simpleFindJSC()459 void tst_qregexp::simpleFindJSC()
460 {
461     int numr;
462     const char * errmsg="  ";
463     QString rxs("happy");
464     JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg);
465     QVERIFY(rx != 0);
466     QString s(str1);
467     int offsetVector[3];
468     QBENCHMARK{
469         numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0,  offsetVector, 3);
470     }
471     jsRegExpFree(rx);
472     QCOMPARE(numr, 1);
473     QCOMPARE(offsetVector[0], 11);
474 }
475 
rangeReplaceJSC()476 void tst_qregexp::rangeReplaceJSC()
477 {
478     QScriptValue r;
479     QScriptEngine engine;
480     engine.globalObject().setProperty("s", str1);
481     QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-')  } )");
482     QVERIFY(replaceFunc.isFunction());
483     QBENCHMARK{
484         r = replaceFunc.call(QScriptValue());
485     }
486     QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys"));
487 }
488 
matchReplaceJSC()489 void tst_qregexp::matchReplaceJSC()
490 {
491     QScriptValue r;
492     QScriptEngine engine;
493     engine.globalObject().setProperty("s", str1);
494     QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1')  } )");
495     QVERIFY(replaceFunc.isFunction());
496     QBENCHMARK{
497         r = replaceFunc.call(QScriptValue());
498     }
499     QCOMPARE(r.toString(), QString("eaeaae"));
500 }
501 
horribleWrongReplaceJSC()502 void tst_qregexp::horribleWrongReplaceJSC()
503 {
504     QScriptValue r;
505     QScriptEngine engine;
506     engine.globalObject().setProperty("s", str2);
507     QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3')  } )");
508     QVERIFY(replaceFunc.isFunction());
509     QBENCHMARK{
510         r = replaceFunc.call(QScriptValue());
511     }
512     QCOMPARE(r.toString(), str2);
513 }
514 
horribleReplaceJSC()515 void tst_qregexp::horribleReplaceJSC()
516 {
517     QScriptValue r;
518     QScriptEngine engine;
519     // the m flag doesnt actually work here; dunno
520     engine.globalObject().setProperty("s", str2.replace('\n', ' '));
521     QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3')  } )");
522     QVERIFY(replaceFunc.isFunction());
523     QBENCHMARK{
524         r = replaceFunc.call(QScriptValue());
525     }
526     QCOMPARE(r.toString(), QString("1.2.3"));
527 }
528 
529 
530 #ifdef HAVE_BOOST
simpleFindBoost()531 void tst_qregexp::simpleFindBoost(){
532     int roff;
533     boost::regex rx ("happy", boost::regex_constants::perl);
534     std::string s = str1.toStdString();
535     std::string::const_iterator start, end;
536     start = s.begin();
537     end = s.end();
538     boost::match_flag_type flags = boost::match_default;
539     QBENCHMARK{
540         boost::match_results<std::string::const_iterator> what;
541         regex_search(start, end, what, rx, flags);
542         roff = (what[0].first)-start;
543     }
544     QCOMPARE(roff, 11);
545 }
546 
rangeReplaceBoost()547 void tst_qregexp::rangeReplaceBoost()
548 {
549     boost::regex pattern ("[a-f]", boost::regex_constants::perl);
550     std::string s = str1.toStdString();
551     std::string r;
552     QBENCHMARK{
553         r = boost::regex_replace (s, pattern, "-");
554     }
555     QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys"));
556 }
557 
matchReplaceBoost()558 void tst_qregexp::matchReplaceBoost()
559 {
560     boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl);
561     std::string s = str1.toStdString();
562     std::string r;
563     QBENCHMARK{
564         r = boost::regex_replace (s, pattern, "$1");
565     }
566     QCOMPARE(r, std::string("eaeaae"));
567 }
568 
horribleWrongReplaceBoost()569 void tst_qregexp::horribleWrongReplaceBoost()
570 {
571     boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl);
572     std::string s = str2.toStdString();
573     std::string r;
574     QBENCHMARK{
575         r = boost::regex_replace (s, pattern, "$1.$2.$3");
576     }
577     QCOMPARE(r, s);
578 }
579 
horribleReplaceBoost()580 void tst_qregexp::horribleReplaceBoost()
581 {
582     boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl);
583     std::string s = str2.toStdString();
584     std::string r;
585     QBENCHMARK{
586         r = boost::regex_replace (s, pattern, "$1.$2.$3");
587     }
588     QCOMPARE(r, std::string("1.2.3"));
589 }
590 #endif //HAVE_BOOST
591 
592 QTEST_MAIN(tst_qregexp)
593 
594 #include "main.moc"
595