1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the test suite of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 or version 3 as published by the Free
20 ** Software Foundation and appearing in the file LICENSE.LGPLv21 and
21 ** LICENSE.LGPLv3 included in the packaging of this file. Please review the
22 ** following information to ensure the GNU Lesser General Public License
23 ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
24 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 **
26 ** As a special exception, The Qt Company gives you certain additional
27 ** rights. These rights are described in The Qt Company LGPL Exception
28 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 **
30 ** GNU General Public License Usage
31 ** Alternatively, this file may be used under the terms of the GNU
32 ** General Public License version 3.0 as published by the Free Software
33 ** Foundation and appearing in the file LICENSE.GPL included in the
34 ** packaging of this file. Please review the following information to
35 ** ensure the GNU General Public License version 3.0 requirements will be
36 ** met: http://www.gnu.org/copyleft/gpl.html.
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41
42 #include <QDebug>
43 #include <QRegExp>
44 #include <QString>
45 #include <QFile>
46
47 #include <qtest.h>
48 #ifdef HAVE_BOOST
49 #include <boost/regex.hpp>
50 #endif
51
52 #include <QtScript>
53 #include "pcre/pcre.h"
54
55 #define ZLIB_VERSION "1.2.3.4"
56
57 class tst_qregexp : public QObject
58 {
59 Q_OBJECT
60 public:
61 tst_qregexp();
62 private slots:
63 void escape_old();
escape_old_data()64 void escape_old_data() { escape_data(); }
65 void escape_new1();
escape_new1_data()66 void escape_new1_data() { escape_data(); }
67 void escape_new2();
escape_new2_data()68 void escape_new2_data() { escape_data(); }
69 void escape_new3();
escape_new3_data()70 void escape_new3_data() { escape_data(); }
71 void escape_new4();
escape_new4_data()72 void escape_new4_data() { escape_data(); }
73 /*
74 JSC outperforms everything.
75 Boost is less impressive then expected.
76 */
77 void simpleFind1();
78 void rangeReplace1();
79 void matchReplace1();
80
81 void simpleFind2();
82 void rangeReplace2();
83 void matchReplace2();
84
85 void simpleFindJSC();
86 void rangeReplaceJSC();
87 void matchReplaceJSC();
88
89 #ifdef HAVE_BOOST
90 void simpleFindBoost();
91 void rangeReplaceBoost();
92 void matchReplaceBoost();
93 #endif
94
95 /* those apply an (incorrect) regexp on entire source
96 (this main.cpp). JSC appears to handle this
97 (ab)use case best. QRegExp performs extremly bad.
98 */
99 void horribleWrongReplace1();
100 void horribleReplace1();
101 void horribleReplace2();
102 void horribleWrongReplace2();
103 void horribleWrongReplaceJSC();
104 void horribleReplaceJSC();
105 #ifdef HAVE_BOOST
106 void horribleWrongReplaceBoost();
107 void horribleReplaceBoost();
108 #endif
109 private:
110 QString str1;
111 QString str2;
112 void escape_data();
113 };
114
tst_qregexp()115 tst_qregexp::tst_qregexp()
116 :QObject()
117 ,str1("We are all happy monkeys")
118 {
119 QFile f(":/main.cpp");
120 f.open(QFile::ReadOnly);
121 str2=f.readAll();
122 }
123
verify(const QString & quoted,const QString & expected)124 static void verify(const QString "ed, const QString &expected)
125 {
126 if (quoted != expected)
127 qDebug() << "ERROR:" << quoted << expected;
128 }
129
escape_data()130 void tst_qregexp::escape_data()
131 {
132 QTest::addColumn<QString>("pattern");
133 QTest::addColumn<QString>("expected");
134
135 QTest::newRow("escape 0") << "Hello world" << "Hello world";
136 QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)";
137 {
138 QString s;
139 for (int i = 0; i < 10; ++i)
140 s += "(escape)";
141 QTest::newRow("escape 10") << s << QRegExp::escape(s);
142 }
143 {
144 QString s;
145 for (int i = 0; i < 100; ++i)
146 s += "(escape)";
147 QTest::newRow("escape 100") << s << QRegExp::escape(s);
148 }
149 }
150
escape_old()151 void tst_qregexp::escape_old()
152 {
153 QFETCH(QString, pattern);
154 QFETCH(QString, expected);
155
156 QBENCHMARK {
157 static const char meta[] = "$()*+.?[\\]^{|}";
158 QString quoted = pattern;
159 int i = 0;
160
161 while (i < quoted.length()) {
162 if (strchr(meta, quoted.at(i).toLatin1()) != 0)
163 quoted.insert(i++, QLatin1Char('\\'));
164 ++i;
165 }
166
167 verify(quoted, expected);
168 }
169 }
170
escape_new1()171 void tst_qregexp::escape_new1()
172 {
173 QFETCH(QString, pattern);
174 QFETCH(QString, expected);
175
176 QBENCHMARK {
177 QString quoted;
178 const int count = pattern.count();
179 quoted.reserve(count * 2);
180 const QLatin1Char backslash('\\');
181 for (int i = 0; i < count; i++) {
182 switch (pattern.at(i).toLatin1()) {
183 case '$':
184 case '(':
185 case ')':
186 case '*':
187 case '+':
188 case '.':
189 case '?':
190 case '[':
191 case '\\':
192 case ']':
193 case '^':
194 case '{':
195 case '|':
196 case '}':
197 quoted.append(backslash);
198 }
199 quoted.append(pattern.at(i));
200 }
201 verify(quoted, expected);
202 }
203 }
204
escape_new2()205 void tst_qregexp::escape_new2()
206 {
207 QFETCH(QString, pattern);
208 QFETCH(QString, expected);
209
210 QBENCHMARK {
211 int count = pattern.count();
212 const QLatin1Char backslash('\\');
213 QString quoted(count * 2, backslash);
214 const QChar *patternData = pattern.data();
215 QChar *quotedData = quoted.data();
216 int escaped = 0;
217 for ( ; --count >= 0; ++patternData) {
218 const QChar c = *patternData;
219 switch (c.unicode()) {
220 case '$':
221 case '(':
222 case ')':
223 case '*':
224 case '+':
225 case '.':
226 case '?':
227 case '[':
228 case '\\':
229 case ']':
230 case '^':
231 case '{':
232 case '|':
233 case '}':
234 ++escaped;
235 ++quotedData;
236 }
237 *quotedData = c;
238 ++quotedData;
239 }
240 quoted.resize(pattern.size() + escaped);
241
242 verify(quoted, expected);
243 }
244 }
245
escape_new3()246 void tst_qregexp::escape_new3()
247 {
248 QFETCH(QString, pattern);
249 QFETCH(QString, expected);
250
251 QBENCHMARK {
252 QString quoted;
253 const int count = pattern.count();
254 quoted.reserve(count * 2);
255 const QLatin1Char backslash('\\');
256 for (int i = 0; i < count; i++) {
257 switch (pattern.at(i).toLatin1()) {
258 case '$':
259 case '(':
260 case ')':
261 case '*':
262 case '+':
263 case '.':
264 case '?':
265 case '[':
266 case '\\':
267 case ']':
268 case '^':
269 case '{':
270 case '|':
271 case '}':
272 quoted += backslash;
273 }
274 quoted += pattern.at(i);
275 }
276
277 verify(quoted, expected);
278 }
279 }
280
281
needsEscaping(int c)282 static inline bool needsEscaping(int c)
283 {
284 switch (c) {
285 case '$':
286 case '(':
287 case ')':
288 case '*':
289 case '+':
290 case '.':
291 case '?':
292 case '[':
293 case '\\':
294 case ']':
295 case '^':
296 case '{':
297 case '|':
298 case '}':
299 return true;
300 }
301 return false;
302 }
303
escape_new4()304 void tst_qregexp::escape_new4()
305 {
306 QFETCH(QString, pattern);
307 QFETCH(QString, expected);
308
309 QBENCHMARK {
310 const int n = pattern.size();
311 const QChar *patternData = pattern.data();
312 // try to prevent copy if no escape is needed
313 int i = 0;
314 for (int i = 0; i != n; ++i) {
315 const QChar c = patternData[i];
316 if (needsEscaping(c.unicode()))
317 break;
318 }
319 if (i == n) {
320 verify(pattern, expected);
321 // no escaping needed, "return pattern" should be done here.
322 return;
323 }
324 const QLatin1Char backslash('\\');
325 QString quoted(n * 2, backslash);
326 QChar *quotedData = quoted.data();
327 for (int j = 0; j != i; ++j)
328 *quotedData++ = *patternData++;
329 int escaped = 0;
330 for (; i != n; ++i) {
331 const QChar c = *patternData;
332 if (needsEscaping(c.unicode())) {
333 ++escaped;
334 ++quotedData;
335 }
336 *quotedData = c;
337 ++quotedData;
338 ++patternData;
339 }
340 quoted.resize(n + escaped);
341 verify(quoted, expected);
342 // "return quoted"
343 }
344 }
345
346
simpleFind1()347 void tst_qregexp::simpleFind1()
348 {
349 int roff;
350 QRegExp rx("happy");
351 rx.setPatternSyntax(QRegExp::RegExp);
352 QBENCHMARK{
353 roff = rx.indexIn(str1);
354 }
355 QCOMPARE(roff, 11);
356 }
357
rangeReplace1()358 void tst_qregexp::rangeReplace1()
359 {
360 QString r;
361 QRegExp rx("[a-f]");
362 rx.setPatternSyntax(QRegExp::RegExp);
363 QBENCHMARK{
364 r = QString(str1).replace(rx, "-");
365 }
366 QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
367 }
368
matchReplace1()369 void tst_qregexp::matchReplace1()
370 {
371 QString r;
372 QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
373 rx.setPatternSyntax(QRegExp::RegExp);
374 QBENCHMARK{
375 r = QString(str1).replace(rx, "\\1");
376 }
377 QCOMPARE(r, QString("eaeaae"));
378 }
379
horribleWrongReplace1()380 void tst_qregexp::horribleWrongReplace1()
381 {
382 QString r;
383 QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
384 rx.setPatternSyntax(QRegExp::RegExp);
385 QBENCHMARK{
386 r = QString(str2).replace(rx, "\\1.\\2.\\3");
387 }
388 QCOMPARE(r, str2);
389 }
390
horribleReplace1()391 void tst_qregexp::horribleReplace1()
392 {
393 QString r;
394 QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
395 rx.setPatternSyntax(QRegExp::RegExp);
396 QBENCHMARK{
397 r = QString(str2).replace(rx, "\\1.\\2.\\3");
398 }
399 QCOMPARE(r, QString("1.2.3"));
400 }
401
402
simpleFind2()403 void tst_qregexp::simpleFind2()
404 {
405 int roff;
406 QRegExp rx("happy");
407 rx.setPatternSyntax(QRegExp::RegExp2);
408 QBENCHMARK{
409 roff = rx.indexIn(str1);
410 }
411 QCOMPARE(roff, 11);
412 }
413
rangeReplace2()414 void tst_qregexp::rangeReplace2()
415 {
416 QString r;
417 QRegExp rx("[a-f]");
418 rx.setPatternSyntax(QRegExp::RegExp2);
419 QBENCHMARK{
420 r = QString(str1).replace(rx, "-");
421 }
422 QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
423 }
424
matchReplace2()425 void tst_qregexp::matchReplace2()
426 {
427 QString r;
428 QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
429 rx.setPatternSyntax(QRegExp::RegExp2);
430 QBENCHMARK{
431 r = QString(str1).replace(rx, "\\1");
432 }
433 QCOMPARE(r, QString("eaeaae"));
434 }
435
horribleWrongReplace2()436 void tst_qregexp::horribleWrongReplace2()
437 {
438 QString r;
439 QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
440 rx.setPatternSyntax(QRegExp::RegExp2);
441 QBENCHMARK{
442 r = QString(str2).replace(rx, "\\1.\\2.\\3");
443 }
444 QCOMPARE(r, str2);
445 }
446
horribleReplace2()447 void tst_qregexp::horribleReplace2()
448 {
449 QString r;
450 QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
451 rx.setPatternSyntax(QRegExp::RegExp2);
452 QBENCHMARK{
453 r = QString(str2).replace(rx, "\\1.\\2.\\3");
454 }
455 QCOMPARE(r, QString("1.2.3"));
456 }
457
458
simpleFindJSC()459 void tst_qregexp::simpleFindJSC()
460 {
461 int numr;
462 const char * errmsg=" ";
463 QString rxs("happy");
464 JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg);
465 QVERIFY(rx != 0);
466 QString s(str1);
467 int offsetVector[3];
468 QBENCHMARK{
469 numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3);
470 }
471 jsRegExpFree(rx);
472 QCOMPARE(numr, 1);
473 QCOMPARE(offsetVector[0], 11);
474 }
475
rangeReplaceJSC()476 void tst_qregexp::rangeReplaceJSC()
477 {
478 QScriptValue r;
479 QScriptEngine engine;
480 engine.globalObject().setProperty("s", str1);
481 QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )");
482 QVERIFY(replaceFunc.isFunction());
483 QBENCHMARK{
484 r = replaceFunc.call(QScriptValue());
485 }
486 QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys"));
487 }
488
matchReplaceJSC()489 void tst_qregexp::matchReplaceJSC()
490 {
491 QScriptValue r;
492 QScriptEngine engine;
493 engine.globalObject().setProperty("s", str1);
494 QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )");
495 QVERIFY(replaceFunc.isFunction());
496 QBENCHMARK{
497 r = replaceFunc.call(QScriptValue());
498 }
499 QCOMPARE(r.toString(), QString("eaeaae"));
500 }
501
horribleWrongReplaceJSC()502 void tst_qregexp::horribleWrongReplaceJSC()
503 {
504 QScriptValue r;
505 QScriptEngine engine;
506 engine.globalObject().setProperty("s", str2);
507 QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )");
508 QVERIFY(replaceFunc.isFunction());
509 QBENCHMARK{
510 r = replaceFunc.call(QScriptValue());
511 }
512 QCOMPARE(r.toString(), str2);
513 }
514
horribleReplaceJSC()515 void tst_qregexp::horribleReplaceJSC()
516 {
517 QScriptValue r;
518 QScriptEngine engine;
519 // the m flag doesnt actually work here; dunno
520 engine.globalObject().setProperty("s", str2.replace('\n', ' '));
521 QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )");
522 QVERIFY(replaceFunc.isFunction());
523 QBENCHMARK{
524 r = replaceFunc.call(QScriptValue());
525 }
526 QCOMPARE(r.toString(), QString("1.2.3"));
527 }
528
529
530 #ifdef HAVE_BOOST
simpleFindBoost()531 void tst_qregexp::simpleFindBoost(){
532 int roff;
533 boost::regex rx ("happy", boost::regex_constants::perl);
534 std::string s = str1.toStdString();
535 std::string::const_iterator start, end;
536 start = s.begin();
537 end = s.end();
538 boost::match_flag_type flags = boost::match_default;
539 QBENCHMARK{
540 boost::match_results<std::string::const_iterator> what;
541 regex_search(start, end, what, rx, flags);
542 roff = (what[0].first)-start;
543 }
544 QCOMPARE(roff, 11);
545 }
546
rangeReplaceBoost()547 void tst_qregexp::rangeReplaceBoost()
548 {
549 boost::regex pattern ("[a-f]", boost::regex_constants::perl);
550 std::string s = str1.toStdString();
551 std::string r;
552 QBENCHMARK{
553 r = boost::regex_replace (s, pattern, "-");
554 }
555 QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys"));
556 }
557
matchReplaceBoost()558 void tst_qregexp::matchReplaceBoost()
559 {
560 boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl);
561 std::string s = str1.toStdString();
562 std::string r;
563 QBENCHMARK{
564 r = boost::regex_replace (s, pattern, "$1");
565 }
566 QCOMPARE(r, std::string("eaeaae"));
567 }
568
horribleWrongReplaceBoost()569 void tst_qregexp::horribleWrongReplaceBoost()
570 {
571 boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl);
572 std::string s = str2.toStdString();
573 std::string r;
574 QBENCHMARK{
575 r = boost::regex_replace (s, pattern, "$1.$2.$3");
576 }
577 QCOMPARE(r, s);
578 }
579
horribleReplaceBoost()580 void tst_qregexp::horribleReplaceBoost()
581 {
582 boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl);
583 std::string s = str2.toStdString();
584 std::string r;
585 QBENCHMARK{
586 r = boost::regex_replace (s, pattern, "$1.$2.$3");
587 }
588 QCOMPARE(r, std::string("1.2.3"));
589 }
590 #endif //HAVE_BOOST
591
592 QTEST_MAIN(tst_qregexp)
593
594 #include "main.moc"
595