1 //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Support/Regex.h"
11 #include "llvm/ADT/SmallVector.h"
12 #include "gtest/gtest.h"
13 #include <cstring>
14 
15 using namespace llvm;
16 namespace {
17 
18 class RegexTest : public ::testing::Test {
19 };
20 
TEST_F(RegexTest,Basics)21 TEST_F(RegexTest, Basics) {
22   Regex r1("^[0-9]+$");
23   EXPECT_TRUE(r1.match("916"));
24   EXPECT_TRUE(r1.match("9"));
25   EXPECT_FALSE(r1.match("9a"));
26 
27   SmallVector<StringRef, 1> Matches;
28   Regex r2("[0-9]+");
29   EXPECT_TRUE(r2.match("aa216b", &Matches));
30   EXPECT_EQ(1u, Matches.size());
31   EXPECT_EQ("216", Matches[0].str());
32 
33   Regex r3("[0-9]+([a-f])?:([0-9]+)");
34   EXPECT_TRUE(r3.match("9a:513b", &Matches));
35   EXPECT_EQ(3u, Matches.size());
36   EXPECT_EQ("9a:513", Matches[0].str());
37   EXPECT_EQ("a", Matches[1].str());
38   EXPECT_EQ("513", Matches[2].str());
39 
40   EXPECT_TRUE(r3.match("9:513b", &Matches));
41   EXPECT_EQ(3u, Matches.size());
42   EXPECT_EQ("9:513", Matches[0].str());
43   EXPECT_EQ("", Matches[1].str());
44   EXPECT_EQ("513", Matches[2].str());
45 
46   Regex r4("a[^b]+b");
47   std::string String="axxb";
48   String[2] = '\0';
49   EXPECT_FALSE(r4.match("abb"));
50   EXPECT_TRUE(r4.match(String, &Matches));
51   EXPECT_EQ(1u, Matches.size());
52   EXPECT_EQ(String, Matches[0].str());
53 
54   std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
55   String="YX99a:513b";
56   NulPattern[7] = '\0';
57   Regex r5(NulPattern);
58   EXPECT_FALSE(r5.match(String));
59   EXPECT_FALSE(r5.match("X9"));
60   String[3]='\0';
61   EXPECT_TRUE(r5.match(String));
62 }
63 
TEST_F(RegexTest,Backreferences)64 TEST_F(RegexTest, Backreferences) {
65   Regex r1("([a-z]+)_\\1");
66   SmallVector<StringRef, 4> Matches;
67   EXPECT_TRUE(r1.match("abc_abc", &Matches));
68   EXPECT_EQ(2u, Matches.size());
69   EXPECT_FALSE(r1.match("abc_ab", &Matches));
70 
71   Regex r2("a([0-9])b\\1c\\1");
72   EXPECT_TRUE(r2.match("a4b4c4", &Matches));
73   EXPECT_EQ(2u, Matches.size());
74   EXPECT_EQ("4", Matches[1].str());
75   EXPECT_FALSE(r2.match("a2b2c3"));
76 
77   Regex r3("a([0-9])([a-z])b\\1\\2");
78   EXPECT_TRUE(r3.match("a6zb6z", &Matches));
79   EXPECT_EQ(3u, Matches.size());
80   EXPECT_EQ("6", Matches[1].str());
81   EXPECT_EQ("z", Matches[2].str());
82   EXPECT_FALSE(r3.match("a6zb6y"));
83   EXPECT_FALSE(r3.match("a6zb7z"));
84 }
85 
TEST_F(RegexTest,Substitution)86 TEST_F(RegexTest, Substitution) {
87   std::string Error;
88 
89   EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
90 
91   // Standard Escapes
92   EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
93   EXPECT_EQ("", Error);
94   EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
95   EXPECT_EQ("", Error);
96   EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
97   EXPECT_EQ("", Error);
98   EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
99   EXPECT_EQ("", Error);
100 
101   EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
102   EXPECT_EQ(Error, "replacement string contained trailing backslash");
103 
104   // Backreferences
105   EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
106   EXPECT_EQ("", Error);
107 
108   EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
109   EXPECT_EQ("", Error);
110 
111   EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
112   EXPECT_EQ(Error, "invalid backreference string '100'");
113 }
114 
TEST_F(RegexTest,IsLiteralERE)115 TEST_F(RegexTest, IsLiteralERE) {
116   EXPECT_TRUE(Regex::isLiteralERE("abc"));
117   EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
118   EXPECT_FALSE(Regex::isLiteralERE("^abc"));
119   EXPECT_FALSE(Regex::isLiteralERE("abc$"));
120   EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
121   EXPECT_FALSE(Regex::isLiteralERE("abc*"));
122   EXPECT_FALSE(Regex::isLiteralERE("abc+"));
123   EXPECT_FALSE(Regex::isLiteralERE("abc?"));
124   EXPECT_FALSE(Regex::isLiteralERE("abc."));
125   EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
126   EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
127   EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
128 }
129 
TEST_F(RegexTest,Escape)130 TEST_F(RegexTest, Escape) {
131   EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
132   EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
133 }
134 
TEST_F(RegexTest,IsValid)135 TEST_F(RegexTest, IsValid) {
136   std::string Error;
137   EXPECT_FALSE(Regex("(foo").isValid(Error));
138   EXPECT_EQ("parentheses not balanced", Error);
139   EXPECT_FALSE(Regex("a[b-").isValid(Error));
140   EXPECT_EQ("invalid character range", Error);
141 }
142 
TEST_F(RegexTest,MoveConstruct)143 TEST_F(RegexTest, MoveConstruct) {
144   Regex r1("^[0-9]+$");
145   Regex r2(std::move(r1));
146   EXPECT_TRUE(r2.match("916"));
147 }
148 
TEST_F(RegexTest,MoveAssign)149 TEST_F(RegexTest, MoveAssign) {
150   Regex r1("^[0-9]+$");
151   Regex r2("abc");
152   r2 = std::move(r1);
153   EXPECT_TRUE(r2.match("916"));
154 }
155 
156 }
157