1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35 
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39 
40 #include <stdio.h>
41 #include <string.h>      /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
45 
46 using std::string;
47 using pcrecpp::StringPiece;
48 using pcrecpp::RE;
49 using pcrecpp::RE_Options;
50 using pcrecpp::Hex;
51 using pcrecpp::Octal;
52 using pcrecpp::CRadix;
53 
54 static bool VERBOSE_TEST  = false;
55 
56 // CHECK dies with a fatal error if condition is not true.  It is *not*
57 // controlled by NDEBUG, so the check will be executed regardless of
58 // compilation mode.  Therefore, it is safe to do things like:
59 //    CHECK_EQ(fp->Write(x), 4)
60 #define CHECK(condition) do {                           \
61   if (!(condition)) {                                   \
62     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
63             __FILE__, __LINE__, #condition);            \
64     exit(1);                                            \
65   }                                                     \
66 } while (0)
67 
68 #define CHECK_EQ(a, b)   CHECK(a == b)
69 
70 static void Timing1(int num_iters) {
71   // Same pattern lots of times
72   RE pattern("ruby:\\d+");
73   StringPiece p("ruby:1234");
74   for (int j = num_iters; j > 0; j--) {
75     CHECK(pattern.FullMatch(p));
76   }
77 }
78 
79 static void Timing2(int num_iters) {
80   // Same pattern lots of times
81   RE pattern("ruby:(\\d+)");
82   int i;
83   for (int j = num_iters; j > 0; j--) {
84     CHECK(pattern.FullMatch("ruby:1234", &i));
85     CHECK_EQ(i, 1234);
86   }
87 }
88 
89 static void Timing3(int num_iters) {
90   string text_string;
91   for (int j = num_iters; j > 0; j--) {
92     text_string += "this is another line\n";
93   }
94 
95   RE line_matcher(".*\n");
96   string line;
97   StringPiece text(text_string);
98   int counter = 0;
99   while (line_matcher.Consume(&text)) {
100     counter++;
101   }
102   printf("Matched %d lines\n", counter);
103 }
104 
105 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
106 
107 static void LeakTest() {
108   // Check for memory leaks
109   unsigned long long initial_size = 0;
110   for (int i = 0; i < 100000; i++) {
111     if (i == 50000) {
112       initial_size = VirtualProcessSize();
113       printf("Size after 50000: %llu\n", initial_size);
114     }
115     char buf[100];  // definitely big enough
116     sprintf(buf, "pat%09d", i);
117     RE newre(buf);
118   }
119   uint64 final_size = VirtualProcessSize();
120   printf("Size after 100000: %llu\n", final_size);
121   const double growth = double(final_size - initial_size) / final_size;
122   printf("Growth: %0.2f%%", growth * 100);
123   CHECK(growth < 0.02);       // Allow < 2% growth
124 }
125 
126 #endif
127 
128 static void RadixTests() {
129   printf("Testing hex\n");
130 
131 #define CHECK_HEX(type, value) \
132   do { \
133     type v; \
134     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
137     CHECK_EQ(v, 0x ## value); \
138   } while(0)
139 
140   CHECK_HEX(short,              2bad);
141   CHECK_HEX(unsigned short,     2badU);
142   CHECK_HEX(int,                dead);
143   CHECK_HEX(unsigned int,       deadU);
144   CHECK_HEX(long,               7eadbeefL);
145   CHECK_HEX(unsigned long,      deadbeefUL);
146 #ifdef HAVE_LONG_LONG
147   CHECK_HEX(long long,          12345678deadbeefLL);
148 #endif
149 #ifdef HAVE_UNSIGNED_LONG_LONG
150   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
151 #endif
152 
153 #undef CHECK_HEX
154 
155   printf("Testing octal\n");
156 
157 #define CHECK_OCTAL(type, value) \
158   do { \
159     type v; \
160     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
163     CHECK_EQ(v, 0 ## value); \
164   } while(0)
165 
166   CHECK_OCTAL(short,              77777);
167   CHECK_OCTAL(unsigned short,     177777U);
168   CHECK_OCTAL(int,                17777777777);
169   CHECK_OCTAL(unsigned int,       37777777777U);
170   CHECK_OCTAL(long,               17777777777L);
171   CHECK_OCTAL(unsigned long,      37777777777UL);
172 #ifdef HAVE_LONG_LONG
173   CHECK_OCTAL(long long,          777777777777777777777LL);
174 #endif
175 #ifdef HAVE_UNSIGNED_LONG_LONG
176   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
177 #endif
178 
179 #undef CHECK_OCTAL
180 
181   printf("Testing decimal\n");
182 
183 #define CHECK_DECIMAL(type, value) \
184   do { \
185     type v; \
186     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
187     CHECK_EQ(v, value); \
188     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
189     CHECK_EQ(v, value); \
190   } while(0)
191 
192   CHECK_DECIMAL(short,              -1);
193   CHECK_DECIMAL(unsigned short,     9999);
194   CHECK_DECIMAL(int,                -1000);
195   CHECK_DECIMAL(unsigned int,       12345U);
196   CHECK_DECIMAL(long,               -10000000L);
197   CHECK_DECIMAL(unsigned long,      3083324652U);
198 #ifdef HAVE_LONG_LONG
199   CHECK_DECIMAL(long long,          -100000000000000LL);
200 #endif
201 #ifdef HAVE_UNSIGNED_LONG_LONG
202   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
203 #endif
204 
205 #undef CHECK_DECIMAL
206 
207 }
208 
209 static void TestReplace() {
210   printf("Testing Replace\n");
211 
212   struct ReplaceTest {
213     const char *regexp;
214     const char *rewrite;
215     const char *original;
216     const char *single;
217     const char *global;
218     int global_count;         // the expected return value from ReplaceAll
219   };
220   static const ReplaceTest tests[] = {
221     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
222       "\\2\\1ay",
223       "the quick brown fox jumps over the lazy dogs.",
224       "ethay quick brown fox jumps over the lazy dogs.",
225       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
226       9 },
227     { "\\w+",
228       "\\0-NOSPAM",
229       "paul.haahr@google.com",
230       "paul-NOSPAM.haahr@google.com",
231       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
232       4 },
233     { "^",
234       "(START)",
235       "foo",
236       "(START)foo",
237       "(START)foo",
238       1 },
239     { "^",
240       "(START)",
241       "",
242       "(START)",
243       "(START)",
244       1 },
245     { "$",
246       "(END)",
247       "",
248       "(END)",
249       "(END)",
250       1 },
251     { "b",
252       "bb",
253       "ababababab",
254       "abbabababab",
255       "abbabbabbabbabb",
256        5 },
257     { "b",
258       "bb",
259       "bbbbbb",
260       "bbbbbbb",
261       "bbbbbbbbbbbb",
262       6 },
263     { "b+",
264       "bb",
265       "bbbbbb",
266       "bb",
267       "bb",
268       1 },
269     { "b*",
270       "bb",
271       "bbbbbb",
272       "bb",
273       "bbbb",
274       2 },
275     { "b*",
276       "bb",
277       "aaaaa",
278       "bbaaaaa",
279       "bbabbabbabbabbabb",
280       6 },
281     { "b*",
282       "bb",
283       "aa\naa\n",
284       "bbaa\naa\n",
285       "bbabbabb\nbbabbabb\nbb",
286       7 },
287     { "b*",
288       "bb",
289       "aa\raa\r",
290       "bbaa\raa\r",
291       "bbabbabb\rbbabbabb\rbb",
292       7 },
293     { "b*",
294       "bb",
295       "aa\r\naa\r\n",
296       "bbaa\r\naa\r\n",
297       "bbabbabb\r\nbbabbabb\r\nbb",
298       7 },
299     // Check empty-string matching (it's tricky!)
300     { "aa|b*",
301       "@",
302       "aa",
303       "@",
304       "@@",
305       2 },
306     { "b*|aa",
307       "@",
308       "aa",
309       "@aa",
310       "@@@",
311       3 },
312 #ifdef SUPPORT_UTF
313     { "b*",
314       "bb",
315       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
316       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
317       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
318       5 },
319     { "b*",
320       "bb",
321       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
322       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
323       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
324        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
325       9 },
326 #endif
327     { "", NULL, NULL, NULL, NULL, 0 }
328   };
329 
330 #ifdef SUPPORT_UTF
331   const bool support_utf8 = true;
332 #else
333   const bool support_utf8 = false;
334 #endif
335 
336   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
337     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
338     assert(re.error().empty());
339     string one(t->original);
340     CHECK(re.Replace(t->rewrite, &one));
341     CHECK_EQ(one, t->single);
342     string all(t->original);
343     const int replace_count = re.GlobalReplace(t->rewrite, &all);
344     CHECK_EQ(all, t->global);
345     CHECK_EQ(replace_count, t->global_count);
346   }
347 
348   // One final test: test \r\n replacement when we're not in CRLF mode
349   {
350     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
351     assert(re.error().empty());
352     string all("aa\r\naa\r\n");
353     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
354     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
355   }
356   {
357     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
358     assert(re.error().empty());
359     string all("aa\r\naa\r\n");
360     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
361     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
362   }
363   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
364   //       Alas, the answer depends on how pcre was compiled.
365 }
366 
367 static void TestExtract() {
368   printf("Testing Extract\n");
369 
370   string s;
371 
372   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
373   CHECK_EQ(s, "kremvax!boris");
374 
375   // check the RE interface as well
376   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
377   CHECK_EQ(s, "'foo'");
378   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
379   CHECK_EQ(s, "'foo'");
380 }
381 
382 static void TestConsume() {
383   printf("Testing Consume\n");
384 
385   string word;
386 
387   string s("   aaa b!@#$@#$cccc");
388   StringPiece input(s);
389 
390   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
391   CHECK(r.Consume(&input, &word));
392   CHECK_EQ(word, "aaa");
393   CHECK(r.Consume(&input, &word));
394   CHECK_EQ(word, "b");
395   CHECK(! r.Consume(&input, &word));
396 }
397 
398 static void TestFindAndConsume() {
399   printf("Testing FindAndConsume\n");
400 
401   string word;
402 
403   string s("   aaa b!@#$@#$cccc");
404   StringPiece input(s);
405 
406   RE r("(\\w+)");      // matches a word
407   CHECK(r.FindAndConsume(&input, &word));
408   CHECK_EQ(word, "aaa");
409   CHECK(r.FindAndConsume(&input, &word));
410   CHECK_EQ(word, "b");
411   CHECK(r.FindAndConsume(&input, &word));
412   CHECK_EQ(word, "cccc");
413   CHECK(! r.FindAndConsume(&input, &word));
414 }
415 
416 static void TestMatchNumberPeculiarity() {
417   printf("Testing match-number peculiarity\n");
418 
419   string word1;
420   string word2;
421   string word3;
422 
423   RE r("(foo)|(bar)|(baz)");
424   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
425   CHECK_EQ(word1, "foo");
426   CHECK_EQ(word2, "");
427   CHECK_EQ(word3, "");
428   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
429   CHECK_EQ(word1, "");
430   CHECK_EQ(word2, "bar");
431   CHECK_EQ(word3, "");
432   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
433   CHECK_EQ(word1, "");
434   CHECK_EQ(word2, "");
435   CHECK_EQ(word3, "baz");
436   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
437 
438   string a;
439   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
440   CHECK_EQ(a, "");
441 }
442 
443 static void TestRecursion() {
444   printf("Testing recursion\n");
445 
446   // Get one string that passes (sometimes), one that never does.
447   string text_good("abcdefghijk");
448   string text_bad("acdefghijkl");
449 
450   // According to pcretest, matching text_good against (\w+)*b
451   // requires match_limit of at least 8192, and match_recursion_limit
452   // of at least 37.
453 
454   RE_Options options_ml;
455   options_ml.set_match_limit(8192);
456   RE re("(\\w+)*b", options_ml);
457   CHECK(re.PartialMatch(text_good) == true);
458   CHECK(re.PartialMatch(text_bad) == false);
459   CHECK(re.FullMatch(text_good) == false);
460   CHECK(re.FullMatch(text_bad) == false);
461 
462   options_ml.set_match_limit(1024);
463   RE re2("(\\w+)*b", options_ml);
464   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
465   CHECK(re2.PartialMatch(text_bad) == false);
466   CHECK(re2.FullMatch(text_good) == false);
467   CHECK(re2.FullMatch(text_bad) == false);
468 
469   RE_Options options_mlr;
470   options_mlr.set_match_limit_recursion(50);
471   RE re3("(\\w+)*b", options_mlr);
472   CHECK(re3.PartialMatch(text_good) == true);
473   CHECK(re3.PartialMatch(text_bad) == false);
474   CHECK(re3.FullMatch(text_good) == false);
475   CHECK(re3.FullMatch(text_bad) == false);
476 
477   options_mlr.set_match_limit_recursion(10);
478   RE re4("(\\w+)*b", options_mlr);
479   CHECK(re4.PartialMatch(text_good) == false);
480   CHECK(re4.PartialMatch(text_bad) == false);
481   CHECK(re4.FullMatch(text_good) == false);
482   CHECK(re4.FullMatch(text_bad) == false);
483 }
484 
485 // A meta-quoted string, interpreted as a pattern, should always match
486 // the original unquoted string.
487 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
488   string quoted = RE::QuoteMeta(unquoted);
489   RE re(quoted, options);
490   CHECK(re.FullMatch(unquoted));
491 }
492 
493 // A string containing meaningful regexp characters, which is then meta-
494 // quoted, should not generally match a string the unquoted string does.
495 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
496                                   RE_Options options = RE_Options()) {
497   string quoted = RE::QuoteMeta(unquoted);
498   RE re(quoted, options);
499   CHECK(!re.FullMatch(should_not_match));
500 }
501 
502 // Tests that quoted meta characters match their original strings,
503 // and that a few things that shouldn't match indeed do not.
504 static void TestQuotaMetaSimple() {
505   TestQuoteMeta("foo");
506   TestQuoteMeta("foo.bar");
507   TestQuoteMeta("foo\\.bar");
508   TestQuoteMeta("[1-9]");
509   TestQuoteMeta("1.5-2.0?");
510   TestQuoteMeta("\\d");
511   TestQuoteMeta("Who doesn't like ice cream?");
512   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
513   TestQuoteMeta("((?!)xxx).*yyy");
514   TestQuoteMeta("([");
515   TestQuoteMeta(string("foo\0bar", 7));
516 }
517 
518 static void TestQuoteMetaSimpleNegative() {
519   NegativeTestQuoteMeta("foo", "bar");
520   NegativeTestQuoteMeta("...", "bar");
521   NegativeTestQuoteMeta("\\.", ".");
522   NegativeTestQuoteMeta("\\.", "..");
523   NegativeTestQuoteMeta("(a)", "a");
524   NegativeTestQuoteMeta("(a|b)", "a");
525   NegativeTestQuoteMeta("(a|b)", "(a)");
526   NegativeTestQuoteMeta("(a|b)", "a|b");
527   NegativeTestQuoteMeta("[0-9]", "0");
528   NegativeTestQuoteMeta("[0-9]", "0-9");
529   NegativeTestQuoteMeta("[0-9]", "[9]");
530   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
531 }
532 
533 static void TestQuoteMetaLatin1() {
534   TestQuoteMeta("3\xb2 = 9");
535 }
536 
537 static void TestQuoteMetaUtf8() {
538 #ifdef SUPPORT_UTF
539   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
540   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
541   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
542   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
543   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
544   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
545   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
546   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
547                         "27\\\xc2\\\xb0",
548                         pcrecpp::UTF8());
549 #endif
550 }
551 
552 static void TestQuoteMetaAll() {
553   printf("Testing QuoteMeta\n");
554   TestQuotaMetaSimple();
555   TestQuoteMetaSimpleNegative();
556   TestQuoteMetaLatin1();
557   TestQuoteMetaUtf8();
558 }
559 
560 //
561 // Options tests contributed by
562 // Giuseppe Maxia, CTO, Stardata s.r.l.
563 // July 2005
564 //
565 static void GetOneOptionResult(
566                 const char *option_name,
567                 const char *regex,
568                 const char *str,
569                 RE_Options options,
570                 bool full,
571                 string expected) {
572 
573   printf("Testing Option <%s>\n", option_name);
574   if(VERBOSE_TEST)
575     printf("/%s/ finds \"%s\" within \"%s\" \n",
576                     regex,
577                     expected.c_str(),
578                     str);
579   string captured("");
580   if (full)
581     RE(regex,options).FullMatch(str, &captured);
582   else
583     RE(regex,options).PartialMatch(str, &captured);
584   CHECK_EQ(captured, expected);
585 }
586 
587 static void TestOneOption(
588                 const char *option_name,
589                 const char *regex,
590                 const char *str,
591                 RE_Options options,
592                 bool full,
593                 bool assertive = true) {
594 
595   printf("Testing Option <%s>\n", option_name);
596   if (VERBOSE_TEST)
597     printf("'%s' %s /%s/ \n",
598                   str,
599                   (assertive? "matches" : "doesn't match"),
600                   regex);
601   if (assertive) {
602     if (full)
603       CHECK(RE(regex,options).FullMatch(str));
604     else
605       CHECK(RE(regex,options).PartialMatch(str));
606   } else {
607     if (full)
608       CHECK(!RE(regex,options).FullMatch(str));
609     else
610       CHECK(!RE(regex,options).PartialMatch(str));
611   }
612 }
613 
614 static void Test_CASELESS() {
615   RE_Options options;
616   RE_Options options2;
617 
618   options.set_caseless(true);
619   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
620   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
621   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
622 
623   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
624   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
625   options.set_caseless(false);
626   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
627 }
628 
629 static void Test_MULTILINE() {
630   RE_Options options;
631   RE_Options options2;
632   const char *str = "HELLO\n" "cruel\n" "world\n";
633 
634   options.set_multiline(true);
635   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
636   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
637   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
638   options.set_multiline(false);
639   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
640 }
641 
642 static void Test_DOTALL() {
643   RE_Options options;
644   RE_Options options2;
645   const char *str = "HELLO\n" "cruel\n" "world";
646 
647   options.set_dotall(true);
648   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
649   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
650   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
651   options.set_dotall(false);
652   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
653 }
654 
655 static void Test_DOLLAR_ENDONLY() {
656   RE_Options options;
657   RE_Options options2;
658   const char *str = "HELLO world\n";
659 
660   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
661   options.set_dollar_endonly(true);
662   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
663   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
664 }
665 
666 static void Test_EXTRA() {
667   RE_Options options;
668   const char *str = "HELLO";
669 
670   options.set_extra(true);
671   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
672   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
673   options.set_extra(false);
674   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
675 }
676 
677 static void Test_EXTENDED() {
678   RE_Options options;
679   RE_Options options2;
680   const char *str = "HELLO world";
681 
682   options.set_extended(true);
683   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
684   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
685   TestOneOption("EXTENDED (class)",
686                     "^ HE L{2} O "
687                     "\\s+        "
688                     "\\w+ $      ",
689                     str,
690                     options,
691                     false);
692 
693   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
694   TestOneOption("EXTENDED (function)",
695                     "^ HE L{2} O "
696                     "\\s+        "
697                     "\\w+ $      ",
698                     str,
699                     pcrecpp::EXTENDED(),
700                     false);
701 
702   options.set_extended(false);
703   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
704 }
705 
706 static void Test_NO_AUTO_CAPTURE() {
707   RE_Options options;
708   const char *str = "HELLO world";
709   string captured;
710 
711   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
712   if (VERBOSE_TEST)
713     printf("parentheses capture text\n");
714   RE re("(world|universe)$", options);
715   CHECK(re.Extract("\\1", str , &captured));
716   CHECK_EQ(captured, "world");
717   options.set_no_auto_capture(true);
718   printf("testing Option <NO_AUTO_CAPTURE>\n");
719   if (VERBOSE_TEST)
720     printf("parentheses do not capture text\n");
721   re.Extract("\\1",str, &captured );
722   CHECK_EQ(captured, "world");
723 }
724 
725 static void Test_UNGREEDY() {
726   RE_Options options;
727   const char *str = "HELLO, 'this' is the 'world'";
728 
729   options.set_ungreedy(true);
730   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
731   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
732   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
733 
734   options.set_ungreedy(false);
735   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
736   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
737 }
738 
739 static void Test_all_options() {
740   const char *str = "HELLO\n" "cruel\n" "world";
741   RE_Options options;
742   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
743 
744   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
745   options.set_all_options(0);
746   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
747   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
748 
749   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
750   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
751                   " ^ c r u e l $ ",
752                   str,
753                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
754                   false);
755 
756   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
757                   " ^ c r u e l $ ",
758                   str,
759                   RE_Options()
760                        .set_multiline(true)
761                        .set_extended(true),
762                   false);
763 
764   options.set_all_options(0);
765   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
766 
767 }
768 
769 static void TestOptions() {
770   printf("Testing Options\n");
771   Test_CASELESS();
772   Test_MULTILINE();
773   Test_DOTALL();
774   Test_DOLLAR_ENDONLY();
775   Test_EXTENDED();
776   Test_NO_AUTO_CAPTURE();
777   Test_UNGREEDY();
778   Test_EXTRA();
779   Test_all_options();
780 }
781 
782 static void TestConstructors() {
783   printf("Testing constructors\n");
784 
785   RE_Options options;
786   options.set_dotall(true);
787   const char *str = "HELLO\n" "cruel\n" "world";
788 
789   RE orig("HELLO.*world", options);
790   CHECK(orig.FullMatch(str));
791 
792   RE copy1(orig);
793   CHECK(copy1.FullMatch(str));
794 
795   RE copy2("not a match");
796   CHECK(!copy2.FullMatch(str));
797   copy2 = copy1;
798   CHECK(copy2.FullMatch(str));
799   copy2 = orig;
800   CHECK(copy2.FullMatch(str));
801 
802   // Make sure when we assign to ourselves, nothing bad happens
803   orig = orig;
804   copy1 = copy1;
805   copy2 = copy2;
806   CHECK(orig.FullMatch(str));
807   CHECK(copy1.FullMatch(str));
808   CHECK(copy2.FullMatch(str));
809 }
810 
811 int main(int argc, char** argv) {
812   // Treat any flag as --help
813   if (argc > 1 && argv[1][0] == '-') {
814     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
815            "       If 'timingX ###' is specified, run the given timing test\n"
816            "       with the given number of iterations, rather than running\n"
817            "       the default corectness test.\n", argv[0]);
818     return 0;
819   }
820 
821   if (argc > 1) {
822     if ( argc == 2 || atoi(argv[2]) == 0) {
823       printf("timing mode needs a num-iters argument\n");
824       return 1;
825     }
826     if (!strcmp(argv[1], "timing1"))
827       Timing1(atoi(argv[2]));
828     else if (!strcmp(argv[1], "timing2"))
829       Timing2(atoi(argv[2]));
830     else if (!strcmp(argv[1], "timing3"))
831       Timing3(atoi(argv[2]));
832     else
833       printf("Unknown argument '%s'\n", argv[1]);
834     return 0;
835   }
836 
837   printf("PCRE C++ wrapper tests\n");
838   printf("Testing FullMatch\n");
839 
840   int i;
841   string s;
842 
843   /***** FullMatch with no args *****/
844 
845   CHECK(RE("h.*o").FullMatch("hello"));
846   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
847   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
848   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
849   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
850   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
851 
852   /***** FullMatch with args *****/
853 
854   // Zero-arg
855   CHECK(RE("\\d+").FullMatch("1001"));
856 
857   // Single-arg
858   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
859   CHECK_EQ(i, 1001);
860   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
861   CHECK_EQ(i, -123);
862   CHECK(!RE("()\\d+").FullMatch("10", &i));
863   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
864                                 &i));
865 
866   // Digits surrounding integer-arg
867   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
868   CHECK_EQ(i, 23);
869   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
870   CHECK_EQ(i, 1);
871   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
872   CHECK_EQ(i, -1);
873   CHECK(RE("(\\d)").PartialMatch("1234", &i));
874   CHECK_EQ(i, 1);
875   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
876   CHECK_EQ(i, -1);
877 
878   // String-arg
879   CHECK(RE("h(.*)o").FullMatch("hello", &s));
880   CHECK_EQ(s, string("ell"));
881 
882   // StringPiece-arg
883   StringPiece sp;
884   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
885   CHECK_EQ(sp.size(), 4);
886   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
887   CHECK_EQ(i, 1234);
888 
889   // Multi-arg
890   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
891   CHECK_EQ(s, string("ruby"));
892   CHECK_EQ(i, 1234);
893 
894   // Ignore non-void* NULL arg
895   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
896   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
897   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
898   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
899 #ifdef HAVE_LONG_LONG
900   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
901 #endif
902   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
903   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
904 
905   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
906   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
907   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
908   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
909   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
910   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
911 
912   // Ignored arg
913   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
914   CHECK_EQ(s, string("ruby"));
915   CHECK_EQ(i, 1234);
916 
917   // Type tests
918   {
919     char c;
920     CHECK(RE("(H)ello").FullMatch("Hello", &c));
921     CHECK_EQ(c, 'H');
922   }
923   {
924     unsigned char c;
925     CHECK(RE("(H)ello").FullMatch("Hello", &c));
926     CHECK_EQ(c, static_cast<unsigned char>('H'));
927   }
928   {
929     short v;
930     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
931     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
932     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
933     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
934     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
935     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
936   }
937   {
938     unsigned short v;
939     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
940     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
941     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
942     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
943   }
944   {
945     int v;
946     static const int max_value = 0x7fffffff;
947     static const int min_value = -max_value - 1;
948     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
949     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
950     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
951     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
952     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
953     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
954   }
955   {
956     unsigned int v;
957     static const unsigned int max_value = 0xfffffffful;
958     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
959     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
960     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
961   }
962 #ifdef HAVE_LONG_LONG
963 # if defined(__MINGW__) || defined(__MINGW32__)
964 #   define LLD "%I64d"
965 #   define LLU "%I64u"
966 # else
967 #   define LLD "%lld"
968 #   define LLU "%llu"
969 # endif
970   {
971     long long v;
972     static const long long max_value = 0x7fffffffffffffffLL;
973     static const long long min_value = -max_value - 1;
974     char buf[32];  // definitely big enough for a long long
975 
976     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
977     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
978 
979     sprintf(buf, LLD, max_value);
980     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
981 
982     sprintf(buf, LLD, min_value);
983     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
984 
985     sprintf(buf, LLD, max_value);
986     assert(buf[strlen(buf)-1] != '9');
987     buf[strlen(buf)-1]++;
988     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
989 
990     sprintf(buf, LLD, min_value);
991     assert(buf[strlen(buf)-1] != '9');
992     buf[strlen(buf)-1]++;
993     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
994   }
995 #endif
996 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
997   {
998     unsigned long long v;
999     long long v2;
1000     static const unsigned long long max_value = 0xffffffffffffffffULL;
1001     char buf[32];  // definitely big enough for a unsigned long long
1002 
1003     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1004     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1005 
1006     sprintf(buf, LLU, max_value);
1007     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1008 
1009     assert(buf[strlen(buf)-1] != '9');
1010     buf[strlen(buf)-1]++;
1011     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1012   }
1013 #endif
1014   {
1015     float v;
1016     CHECK(RE("(.*)").FullMatch("100", &v));
1017     CHECK(RE("(.*)").FullMatch("-100.", &v));
1018     CHECK(RE("(.*)").FullMatch("1e23", &v));
1019   }
1020   {
1021     double v;
1022     CHECK(RE("(.*)").FullMatch("100", &v));
1023     CHECK(RE("(.*)").FullMatch("-100.", &v));
1024     CHECK(RE("(.*)").FullMatch("1e23", &v));
1025   }
1026 
1027   // Check that matching is fully anchored
1028   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1029   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1030   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1031   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1032 
1033   // Braces
1034   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1035   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1036   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1037 
1038   // Complicated RE
1039   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1040   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1041   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1042   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1043 
1044   // Check full-match handling (needs '$' tacked on internally)
1045   CHECK(RE("fo|foo").FullMatch("fo"));
1046   CHECK(RE("fo|foo").FullMatch("foo"));
1047   CHECK(RE("fo|foo$").FullMatch("fo"));
1048   CHECK(RE("fo|foo$").FullMatch("foo"));
1049   CHECK(RE("foo$").FullMatch("foo"));
1050   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1051   CHECK(!RE("fo|bar").FullMatch("fox"));
1052 
1053   // Uncomment the following if we change the handling of '$' to
1054   // prevent it from matching a trailing newline
1055   if (false) {
1056     // Check that we don't get bitten by pcre's special handling of a
1057     // '\n' at the end of the string matching '$'
1058     CHECK(!RE("foo$").PartialMatch("foo\n"));
1059   }
1060 
1061   // Number of args
1062   int a[16];
1063   CHECK(RE("").FullMatch(""));
1064 
1065   memset(a, 0, sizeof(0));
1066   CHECK(RE("(\\d){1}").FullMatch("1",
1067                                  &a[0]));
1068   CHECK_EQ(a[0], 1);
1069 
1070   memset(a, 0, sizeof(0));
1071   CHECK(RE("(\\d)(\\d)").FullMatch("12",
1072                                    &a[0],  &a[1]));
1073   CHECK_EQ(a[0], 1);
1074   CHECK_EQ(a[1], 2);
1075 
1076   memset(a, 0, sizeof(0));
1077   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1078                                         &a[0],  &a[1],  &a[2]));
1079   CHECK_EQ(a[0], 1);
1080   CHECK_EQ(a[1], 2);
1081   CHECK_EQ(a[2], 3);
1082 
1083   memset(a, 0, sizeof(0));
1084   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1085                                              &a[0],  &a[1],  &a[2],  &a[3]));
1086   CHECK_EQ(a[0], 1);
1087   CHECK_EQ(a[1], 2);
1088   CHECK_EQ(a[2], 3);
1089   CHECK_EQ(a[3], 4);
1090 
1091   memset(a, 0, sizeof(0));
1092   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1093                                                   &a[0],  &a[1],  &a[2],
1094                                                   &a[3],  &a[4]));
1095   CHECK_EQ(a[0], 1);
1096   CHECK_EQ(a[1], 2);
1097   CHECK_EQ(a[2], 3);
1098   CHECK_EQ(a[3], 4);
1099   CHECK_EQ(a[4], 5);
1100 
1101   memset(a, 0, sizeof(0));
1102   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1103                                                        &a[0],  &a[1],  &a[2],
1104                                                        &a[3],  &a[4],  &a[5]));
1105   CHECK_EQ(a[0], 1);
1106   CHECK_EQ(a[1], 2);
1107   CHECK_EQ(a[2], 3);
1108   CHECK_EQ(a[3], 4);
1109   CHECK_EQ(a[4], 5);
1110   CHECK_EQ(a[5], 6);
1111 
1112   memset(a, 0, sizeof(0));
1113   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1114                                                             &a[0],  &a[1],  &a[2],  &a[3],
1115                                                             &a[4],  &a[5],  &a[6]));
1116   CHECK_EQ(a[0], 1);
1117   CHECK_EQ(a[1], 2);
1118   CHECK_EQ(a[2], 3);
1119   CHECK_EQ(a[3], 4);
1120   CHECK_EQ(a[4], 5);
1121   CHECK_EQ(a[5], 6);
1122   CHECK_EQ(a[6], 7);
1123 
1124   memset(a, 0, sizeof(0));
1125   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1126            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1127                "1234567890123456",
1128                &a[0],  &a[1],  &a[2],  &a[3],
1129                &a[4],  &a[5],  &a[6],  &a[7],
1130                &a[8],  &a[9],  &a[10], &a[11],
1131                &a[12], &a[13], &a[14], &a[15]));
1132   CHECK_EQ(a[0], 1);
1133   CHECK_EQ(a[1], 2);
1134   CHECK_EQ(a[2], 3);
1135   CHECK_EQ(a[3], 4);
1136   CHECK_EQ(a[4], 5);
1137   CHECK_EQ(a[5], 6);
1138   CHECK_EQ(a[6], 7);
1139   CHECK_EQ(a[7], 8);
1140   CHECK_EQ(a[8], 9);
1141   CHECK_EQ(a[9], 0);
1142   CHECK_EQ(a[10], 1);
1143   CHECK_EQ(a[11], 2);
1144   CHECK_EQ(a[12], 3);
1145   CHECK_EQ(a[13], 4);
1146   CHECK_EQ(a[14], 5);
1147   CHECK_EQ(a[15], 6);
1148 
1149   /***** PartialMatch *****/
1150 
1151   printf("Testing PartialMatch\n");
1152 
1153   CHECK(RE("h.*o").PartialMatch("hello"));
1154   CHECK(RE("h.*o").PartialMatch("othello"));
1155   CHECK(RE("h.*o").PartialMatch("hello!"));
1156   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1157 
1158   /***** other tests *****/
1159 
1160   RadixTests();
1161   TestReplace();
1162   TestExtract();
1163   TestConsume();
1164   TestFindAndConsume();
1165   TestQuoteMetaAll();
1166   TestMatchNumberPeculiarity();
1167 
1168   // Check the pattern() accessor
1169   {
1170     const string kPattern = "http://([^/]+)/.*";
1171     const RE re(kPattern);
1172     CHECK_EQ(kPattern, re.pattern());
1173   }
1174 
1175   // Check RE error field.
1176   {
1177     RE re("foo");
1178     CHECK(re.error().empty());  // Must have no error
1179   }
1180 
1181 #ifdef SUPPORT_UTF
1182   // Check UTF-8 handling
1183   {
1184     printf("Testing UTF-8 handling\n");
1185 
1186     // Three Japanese characters (nihongo)
1187     const unsigned char utf8_string[] = {
1188          0xe6, 0x97, 0xa5, // 65e5
1189          0xe6, 0x9c, 0xac, // 627c
1190          0xe8, 0xaa, 0x9e, // 8a9e
1191          0
1192     };
1193     const unsigned char utf8_pattern[] = {
1194          '.',
1195          0xe6, 0x9c, 0xac, // 627c
1196          '.',
1197          0
1198     };
1199 
1200     // Both should match in either mode, bytes or UTF-8
1201     RE re_test1(".........");
1202     CHECK(re_test1.FullMatch(utf8_string));
1203     RE re_test2("...", pcrecpp::UTF8());
1204     CHECK(re_test2.FullMatch(utf8_string));
1205 
1206     // PH added these tests for leading option settings
1207 
1208     RE re_testZ0("(*CR)(*NO_START_OPT).........");
1209     CHECK(re_testZ0.FullMatch(utf8_string));
1210 
1211 #ifdef SUPPORT_UTF
1212     RE re_testZ1("(*UTF8)...");
1213     CHECK(re_testZ1.FullMatch(utf8_string));
1214 
1215     RE re_testZ2("(*UTF)...");
1216     CHECK(re_testZ2.FullMatch(utf8_string));
1217 
1218 #ifdef SUPPORT_UCP
1219     RE re_testZ3("(*UCP)(*UTF)...");
1220     CHECK(re_testZ3.FullMatch(utf8_string));
1221 
1222     RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)...");
1223     CHECK(re_testZ4.FullMatch(utf8_string));
1224 
1225     RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)...");
1226     CHECK(re_testZ5.FullMatch(utf8_string));
1227 #endif
1228 #endif
1229 
1230     // Check that '.' matches one byte or UTF-8 character
1231     // according to the mode.
1232     string ss;
1233     RE re_test3("(.)");
1234     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1235     CHECK_EQ(ss, string("\xe6"));
1236     RE re_test4("(.)", pcrecpp::UTF8());
1237     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1238     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1239 
1240     // Check that string matches itself in either mode
1241     RE re_test5(utf8_string);
1242     CHECK(re_test5.FullMatch(utf8_string));
1243     RE re_test6(utf8_string, pcrecpp::UTF8());
1244     CHECK(re_test6.FullMatch(utf8_string));
1245 
1246     // Check that pattern matches string only in UTF8 mode
1247     RE re_test7(utf8_pattern);
1248     CHECK(!re_test7.FullMatch(utf8_string));
1249     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1250     CHECK(re_test8.FullMatch(utf8_string));
1251   }
1252 
1253   // Check that ungreedy, UTF8 regular expressions don't match when they
1254   // oughtn't -- see bug 82246.
1255   {
1256     // This code always worked.
1257     const char* pattern = "\\w+X";
1258     const string target = "a aX";
1259     RE match_sentence(pattern);
1260     RE match_sentence_re(pattern, pcrecpp::UTF8());
1261 
1262     CHECK(!match_sentence.FullMatch(target));
1263     CHECK(!match_sentence_re.FullMatch(target));
1264   }
1265 
1266   {
1267     const char* pattern = "(?U)\\w+X";
1268     const string target = "a aX";
1269     RE match_sentence(pattern);
1270     RE match_sentence_re(pattern, pcrecpp::UTF8());
1271 
1272     CHECK(!match_sentence.FullMatch(target));
1273     CHECK(!match_sentence_re.FullMatch(target));
1274   }
1275 #endif  /* def SUPPORT_UTF */
1276 
1277   printf("Testing error reporting\n");
1278 
1279   { RE re("a\\1"); CHECK(!re.error().empty()); }
1280   {
1281     RE re("a[x");
1282     CHECK(!re.error().empty());
1283   }
1284   {
1285     RE re("a[z-a]");
1286     CHECK(!re.error().empty());
1287   }
1288   {
1289     RE re("a[[:foobar:]]");
1290     CHECK(!re.error().empty());
1291   }
1292   {
1293     RE re("a(b");
1294     CHECK(!re.error().empty());
1295   }
1296   {
1297     RE re("a\\");
1298     CHECK(!re.error().empty());
1299   }
1300 
1301   // Test that recursion is stopped
1302   TestRecursion();
1303 
1304   // Test Options
1305   if (getenv("VERBOSE_TEST") != NULL)
1306     VERBOSE_TEST  = true;
1307   TestOptions();
1308 
1309   // Test the constructors
1310   TestConstructors();
1311 
1312   // Done
1313   printf("OK\n");
1314 
1315   return 0;
1316 }
1317