1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include "util/test.h"
6 #include "util/logging.h"
7 #include "re2/prog.h"
8 #include "re2/regexp.h"
9 
10 namespace re2 {
11 
12 struct PCRETest {
13   const char* regexp;
14   bool should_match;
15 };
16 
17 static PCRETest tests[] = {
18   // Most things should behave exactly.
19   { "abc",       true  },
20   { "(a|b)c",    true  },
21   { "(a*|b)c",   true  },
22   { "(a|b*)c",   true  },
23   { "a(b|c)d",   true  },
24   { "a(()|())c", true  },
25   { "ab*c",      true  },
26   { "ab+c",      true  },
27   { "a(b*|c*)d", true  },
28   { "\\W",       true  },
29   { "\\W{1,2}",  true  },
30   { "\\d",       true  },
31 
32   // Check that repeated empty strings do not.
33   { "(a*)*",     false },
34   { "x(a*)*y",   false },
35   { "(a*)+",     false },
36   { "(a+)*",     true  },
37   { "(a+)+",     true  },
38   { "(a+)+",     true  },
39 
40   // \v is the only character class that shouldn't.
41   { "\\b",       true  },
42   { "\\v",       false },
43   { "\\d",       true  },
44 
45   // The handling of ^ in multi-line mode is different, as is
46   // the handling of $ in single-line mode.  (Both involve
47   // boundary cases if the string ends with \n.)
48   { "\\A",       true  },
49   { "\\z",       true  },
50   { "(?m)^",     false },
51   { "(?m)$",     true  },
52   { "(?-m)^",    true  },
53   { "(?-m)$",    false },  // In PCRE, == \Z
54   { "(?m)\\A",   true  },
55   { "(?m)\\z",   true  },
56   { "(?-m)\\A",  true  },
57   { "(?-m)\\z",  true  },
58 };
59 
TEST(MimicsPCRE,SimpleTests)60 TEST(MimicsPCRE, SimpleTests) {
61   for (size_t i = 0; i < arraysize(tests); i++) {
62     const PCRETest& t = tests[i];
63     for (size_t j = 0; j < 2; j++) {
64       Regexp::ParseFlags flags = Regexp::LikePerl;
65       if (j == 0)
66         flags = flags | Regexp::Latin1;
67       Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
68       ASSERT_TRUE(re != NULL) << " " << t.regexp;
69       ASSERT_EQ(t.should_match, re->MimicsPCRE())
70         << " " << t.regexp << " "
71         << (j == 0 ? "latin1" : "utf");
72       re->Decref();
73     }
74   }
75 }
76 
77 }  // namespace re2
78