1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/chrome_cleaner/strings/string_util.h"
6 
7 #include <string>
8 
9 #include "base/strings/string_util.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11 
12 namespace chrome_cleaner {
13 
14 namespace {
15 
16 const wchar_t kEmptyStr[] = L"";
17 const wchar_t kFooStr[] = L"Foo";
18 const wchar_t kFooLowerCaseStr[] = L"foo";
19 const wchar_t kBarStr[] = L"Bar";
20 const wchar_t kBatStr[] = L"Bat";
21 const wchar_t kFooSetStr[] = L"Set,with,Foo,and,bar";
22 const wchar_t kFoooSetStr[] = L"Set,with,Fooo,and,bar";
23 const wchar_t kSeparators[] = L",";
24 const char kSomeInvalidUTF8Chars[] = " a\x80 b\x80 c ";
25 const char kStartWithInvalidUTF8Char[] = "\x80 a b c ";
26 const char kEndWithInvalidUTF8Chars[] = " a b c\x80 ";
27 const char kPrunedOfUTF8Chars[] = " a b c ";
28 const char kOnlyInvalidUTF8Chars[] = "\x80 \x80 ";
29 const char kNoCharsLeftOnlySpaces[] = "  ";
30 const char kSingleInvalidUTF8Char[] = "\xf1";
31 
WildcardMatchInsensitive(const std::wstring & text,const std::wstring & pattern)32 bool WildcardMatchInsensitive(const std::wstring& text,
33                               const std::wstring& pattern) {
34   return WStringWildcardMatchInsensitive(text, pattern, L'\\');
35 }
36 
37 }  // namespace
38 
TEST(StringUtilTest,WStringEqualsCaseInsensitive)39 TEST(StringUtilTest, WStringEqualsCaseInsensitive) {
40   EXPECT_FALSE(WStringEqualsCaseInsensitive(kFooStr, kEmptyStr));
41   EXPECT_TRUE(WStringEqualsCaseInsensitive(kFooStr, kFooStr));
42   EXPECT_TRUE(WStringEqualsCaseInsensitive(kFooStr, kFooLowerCaseStr));
43   EXPECT_FALSE(WStringEqualsCaseInsensitive(kFooStr, kBarStr));
44   EXPECT_FALSE(WStringEqualsCaseInsensitive(kFooStr, kFooSetStr));
45 }
46 
TEST(StringUtilTest,WStringContainsCaseInsensitive)47 TEST(StringUtilTest, WStringContainsCaseInsensitive) {
48   EXPECT_TRUE(WStringContainsCaseInsensitive(kFooStr, kEmptyStr));
49   EXPECT_FALSE(WStringContainsCaseInsensitive(kEmptyStr, kFooStr));
50   EXPECT_TRUE(WStringContainsCaseInsensitive(kFooStr, kFooStr));
51   EXPECT_TRUE(WStringContainsCaseInsensitive(kFooLowerCaseStr, kFooStr));
52   EXPECT_FALSE(WStringContainsCaseInsensitive(kBarStr, kFooStr));
53   EXPECT_TRUE(WStringContainsCaseInsensitive(kFooSetStr, kFooStr));
54   EXPECT_TRUE(WStringContainsCaseInsensitive(kFoooSetStr, kFooStr));
55 }
56 
TEST(StringUtilTest,WStringSetMatchEntry)57 TEST(StringUtilTest, WStringSetMatchEntry) {
58   EXPECT_TRUE(WStringSetMatchEntry(kFooSetStr, kSeparators, kFooStr,
59                                    WStringContainsCaseInsensitive));
60   EXPECT_FALSE(WStringSetMatchEntry(kFooSetStr, kSeparators, kBatStr,
61                                     WStringContainsCaseInsensitive));
62   EXPECT_TRUE(WStringSetMatchEntry(kFoooSetStr, kSeparators, kFooStr,
63                                    WStringContainsCaseInsensitive));
64 
65   EXPECT_TRUE(WStringSetMatchEntry(kFooSetStr, kSeparators, kFooStr,
66                                    WStringEqualsCaseInsensitive));
67   EXPECT_FALSE(WStringSetMatchEntry(kFooSetStr, kSeparators, kBatStr,
68                                     WStringEqualsCaseInsensitive));
69   EXPECT_FALSE(WStringSetMatchEntry(kFoooSetStr, kSeparators, kFooStr,
70                                     WStringEqualsCaseInsensitive));
71 }
72 
TEST(StringUtilTest,WStringMatchPatternTest)73 TEST(StringUtilTest, WStringMatchPatternTest) {
74   // Test matching on an empty text or pattern.
75   EXPECT_TRUE(WildcardMatchInsensitive(L"", L""));
76   EXPECT_FALSE(WildcardMatchInsensitive(L"", L"*.*"));
77   EXPECT_TRUE(WildcardMatchInsensitive(L"", L"*"));
78   EXPECT_FALSE(WildcardMatchInsensitive(L"", L"?"));
79 
80   // Test matching recursion ending.
81   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"a?"));
82   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"b?"));
83   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L""));
84   EXPECT_FALSE(WildcardMatchInsensitive(L"ab", L"a"));
85   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"ab"));
86 
87   // Test wild-cards matching.
88   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"*.com"));
89   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"*"));
90   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"?*"));
91   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"*?"));
92   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"**"));
93   EXPECT_FALSE(WildcardMatchInsensitive(L"www.google.com", L"www*.g*.org"));
94   EXPECT_TRUE(WildcardMatchInsensitive(L"Hello", L"H?l?o"));
95   EXPECT_FALSE(WildcardMatchInsensitive(L"www.google.com", L"http://*)"));
96   EXPECT_FALSE(WildcardMatchInsensitive(L"Hello", L""));
97   EXPECT_TRUE(WildcardMatchInsensitive(L"Hello*", L"Hello*"));
98   EXPECT_TRUE(WildcardMatchInsensitive(L"1234-5678-1234-5678",
99                                        LR"(????-????-????-????)"));
100   EXPECT_TRUE(WildcardMatchInsensitive(L"1234-5678-1234-5678", L"*-*-*-*"));
101   EXPECT_TRUE(WildcardMatchInsensitive(L"123456789012345678",
102                                        LR"(?????????????????*)"));
103 
104   // Test the case insensitive comparison.
105   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"*.COM"));
106   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"WWW.*"));
107   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"*.*.COM"));
108   EXPECT_TRUE(WildcardMatchInsensitive(L"www.google.com", L"WWW.*.*"));
109   EXPECT_TRUE(WildcardMatchInsensitive(L"WWW.GooGLe.com", L"www.*.c?m"));
110 
111   // Test escape characters.
112   EXPECT_TRUE(WildcardMatchInsensitive(L"*", L"\\*"));
113   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"\\*"));
114   EXPECT_TRUE(WildcardMatchInsensitive(L"?", L"\\?"));
115   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"\\?"));
116   EXPECT_TRUE(WildcardMatchInsensitive(L"*?*", L"\\*\\?\\*"));
117   EXPECT_FALSE(WildcardMatchInsensitive(L"*x*", L"\\*\\?\\*"));
118   EXPECT_TRUE(WildcardMatchInsensitive(L"Hello*1234", L"He??o\\*1*"));
119   EXPECT_FALSE(WildcardMatchInsensitive(L"Hello*1234", L"He??o\\?1*"));
120 
121   EXPECT_TRUE(WStringWildcardMatchInsensitive(L":", L"::", L':'));
122   EXPECT_FALSE(WStringWildcardMatchInsensitive(L"*", L"\\*", L':'));
123   EXPECT_TRUE(WStringWildcardMatchInsensitive(L"*", L":*", L':'));
124   EXPECT_FALSE(WStringWildcardMatchInsensitive(L"?", L"\\?", L':'));
125   EXPECT_TRUE(WStringWildcardMatchInsensitive(L"?", L":?", L':'));
126 
127   EXPECT_TRUE(WStringWildcardMatchInsensitive(L"*", L"%*", L'%'));
128   EXPECT_FALSE(WStringWildcardMatchInsensitive(L"a", L"%*", L'%'));
129   EXPECT_TRUE(WStringWildcardMatchInsensitive(L"?", L"%?", L'%'));
130   EXPECT_FALSE(WStringWildcardMatchInsensitive(L"a", L"%?", L'%'));
131   EXPECT_TRUE(WStringWildcardMatchInsensitive(L"*?*", L"%*%?%*", L'%'));
132   EXPECT_FALSE(WStringWildcardMatchInsensitive(L"*x*", L"%*%?%*", L'%'));
133   EXPECT_TRUE(
134       WStringWildcardMatchInsensitive(L"Hello*1234", L"He??o%*1*", L'%'));
135   EXPECT_FALSE(
136       WStringWildcardMatchInsensitive(L"Hello*1234", L"He??o%?1*", L'%'));
137 
138   // Test the algorithmic complexity.
139   EXPECT_TRUE(WildcardMatchInsensitive(L"", L"********************"));
140   EXPECT_TRUE(WildcardMatchInsensitive(L"a", L"**********?*********"));
141   EXPECT_TRUE(WildcardMatchInsensitive(L"ab", L"****?********?******"));
142   EXPECT_TRUE(WildcardMatchInsensitive(L"axb", L"****?****x***?******"));
143   EXPECT_TRUE(WildcardMatchInsensitive(L"^axb$", L"^****?****x***?******$"));
144 
145   EXPECT_FALSE(
146       WildcardMatchInsensitive(L"a", L"x******************************"));
147   EXPECT_FALSE(
148       WildcardMatchInsensitive(L"a", L"******************************x"));
149   EXPECT_TRUE(
150       WildcardMatchInsensitive(L"a", L"?******************************"));
151   EXPECT_TRUE(
152       WildcardMatchInsensitive(L"a", L"******************************?"));
153   EXPECT_FALSE(WildcardMatchInsensitive(
154       L"1234", L"1********2********3********4********x"));
155   EXPECT_FALSE(
156       WildcardMatchInsensitive(L"1234567890", L"1*2*3*4*5*6*7*8*9*0x"));
157   EXPECT_TRUE(WildcardMatchInsensitive(L"1234567890", L"1*2*3*4*5*6*7*8*9*0"));
158   EXPECT_FALSE(WildcardMatchInsensitive(L"aaaaaaaaaaaaaaaaaaaaaaaaaa",
159                                         L"********************************b"));
160   EXPECT_TRUE(WildcardMatchInsensitive(L"aaaaaaaaaaaaaaaaaaaaaaaaaa",
161                                        L"********************************"));
162 
163   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"**********x*********"));
164   EXPECT_FALSE(WildcardMatchInsensitive(L"a", L"****a*****x*********"));
165   EXPECT_FALSE(WildcardMatchInsensitive(L"b", L"**********x*****b***"));
166   EXPECT_FALSE(WildcardMatchInsensitive(L"ab", L"***a*****x*****b***"));
167   EXPECT_TRUE(WildcardMatchInsensitive(L"axb", L"***a*****x*****b***"));
168 }
169 
TEST(StringUtilTest,RemoveInvalidUTF8Chars)170 TEST(StringUtilTest, RemoveInvalidUTF8Chars) {
171   ASSERT_FALSE(base::IsStringUTF8(kSomeInvalidUTF8Chars));
172   ASSERT_FALSE(base::IsStringUTF8(kStartWithInvalidUTF8Char));
173   ASSERT_FALSE(base::IsStringUTF8(kEndWithInvalidUTF8Chars));
174   ASSERT_FALSE(base::IsStringUTF8(kOnlyInvalidUTF8Chars));
175   ASSERT_FALSE(base::IsStringUTF8(kSingleInvalidUTF8Char));
176   ASSERT_TRUE(base::IsStringUTF8(kPrunedOfUTF8Chars));
177   ASSERT_TRUE(base::IsStringUTF8(kNoCharsLeftOnlySpaces));
178 
179   EXPECT_STRNE(kPrunedOfUTF8Chars, kSomeInvalidUTF8Chars);
180   EXPECT_STRNE(kPrunedOfUTF8Chars, kEndWithInvalidUTF8Chars);
181   EXPECT_STRNE(kPrunedOfUTF8Chars, kEndWithInvalidUTF8Chars);
182   EXPECT_STRNE("", kSingleInvalidUTF8Char);
183   EXPECT_STRNE(kNoCharsLeftOnlySpaces, kOnlyInvalidUTF8Chars);
184 
185   EXPECT_STREQ(kPrunedOfUTF8Chars,
186                RemoveInvalidUTF8Chars(kSomeInvalidUTF8Chars).c_str());
187   EXPECT_STREQ(kPrunedOfUTF8Chars,
188                RemoveInvalidUTF8Chars(kStartWithInvalidUTF8Char).c_str());
189   EXPECT_STREQ(kPrunedOfUTF8Chars,
190                RemoveInvalidUTF8Chars(kEndWithInvalidUTF8Chars).c_str());
191   EXPECT_STREQ(kNoCharsLeftOnlySpaces,
192                RemoveInvalidUTF8Chars(kOnlyInvalidUTF8Chars).c_str());
193   EXPECT_STREQ(kPrunedOfUTF8Chars,
194                RemoveInvalidUTF8Chars(kPrunedOfUTF8Chars).c_str());
195   EXPECT_STREQ(kNoCharsLeftOnlySpaces,
196                RemoveInvalidUTF8Chars(kNoCharsLeftOnlySpaces).c_str());
197   EXPECT_STREQ("", RemoveInvalidUTF8Chars(kSingleInvalidUTF8Char).c_str());
198 }
199 
TEST(StringUtilTest,WStringInsensitiveLess)200 TEST(StringUtilTest, WStringInsensitiveLess) {
201   WStringInsensitiveLess less;
202   EXPECT_TRUE(less(L"a", L"b"));
203   EXPECT_TRUE(less(L"A", L"b"));
204   EXPECT_TRUE(less(L"a", L"B"));
205   EXPECT_TRUE(less(L"A", L"B"));
206 
207   EXPECT_FALSE(less(L"b", L"a"));
208   EXPECT_FALSE(less(L"B", L"a"));
209   EXPECT_FALSE(less(L"b", L"A"));
210   EXPECT_FALSE(less(L"B", L"A"));
211 
212   EXPECT_FALSE(less(L"a", L"a"));
213   EXPECT_FALSE(less(L"a", L"A"));
214   EXPECT_FALSE(less(L"A", L"a"));
215   EXPECT_FALSE(less(L"A", L"A"));
216 }
217 
TEST(StringUtilTest,WStringInsensitiveSet)218 TEST(StringUtilTest, WStringInsensitiveSet) {
219   WStringCaseInsensitiveSet set = {L"a", L"B"};
220   EXPECT_NE(set.find(L"a"), set.end());
221   EXPECT_NE(set.find(L"A"), set.end());
222   EXPECT_NE(set.find(L"b"), set.end());
223   EXPECT_NE(set.find(L"B"), set.end());
224   EXPECT_EQ(set.find(L"c"), set.end());
225   EXPECT_EQ(set.find(L"C"), set.end());
226   EXPECT_FALSE(set.insert(L"A").second);
227   EXPECT_FALSE(set.insert(L"b").second);
228   EXPECT_TRUE(set.insert(L"c").second);
229 }
230 
231 }  // namespace chrome_cleaner
232