1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13 #include <type_traits>
14
15 #include "base/stl_util.h"
16 #include "base/strings/string16.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "build/build_config.h"
19 #include "testing/gmock/include/gmock/gmock.h"
20 #include "testing/gtest/include/gtest/gtest.h"
21
22 using ::testing::ElementsAre;
23
24 namespace base {
25
26 static const struct trim_case {
27 const wchar_t* input;
28 const TrimPositions positions;
29 const wchar_t* output;
30 const TrimPositions return_value;
31 } trim_cases[] = {
32 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
33 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
34 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
35 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
36 {L"", TRIM_ALL, L"", TRIM_NONE},
37 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
38 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
39 {L" ", TRIM_ALL, L"", TRIM_ALL},
40 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
41 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
42 };
43
44 static const struct trim_case_ascii {
45 const char* input;
46 const TrimPositions positions;
47 const char* output;
48 const TrimPositions return_value;
49 } trim_cases_ascii[] = {
50 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
51 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
52 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
53 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
54 {"", TRIM_ALL, "", TRIM_NONE},
55 {" ", TRIM_LEADING, "", TRIM_LEADING},
56 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
57 {" ", TRIM_ALL, "", TRIM_ALL},
58 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
59 };
60
61 namespace {
62
63 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)64 bool Truncated(const std::string& input,
65 const size_t byte_size,
66 std::string* output) {
67 size_t prev = input.length();
68 TruncateUTF8ToByteSize(input, byte_size, output);
69 return prev != output->length();
70 }
71
72 using TestFunction = bool (*)(StringPiece str);
73
74 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestStructurallyValidUtf8(TestFunction fn)75 void TestStructurallyValidUtf8(TestFunction fn) {
76 EXPECT_TRUE(fn("abc"));
77 EXPECT_TRUE(fn("\xC2\x81"));
78 EXPECT_TRUE(fn("\xE1\x80\xBF"));
79 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
80 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
81 EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
82
83 // U+FEFF used as UTF-8 BOM.
84 // clang-format off
85 EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
86 // clang-format on
87
88 // Embedded nulls in canonical UTF-8 representation.
89 using std::string_literals::operator""s;
90 const std::string kEmbeddedNull = "embedded\0null"s;
91 EXPECT_TRUE(fn(kEmbeddedNull));
92 }
93
94 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestStructurallyInvalidUtf8(TestFunction fn)95 void TestStructurallyInvalidUtf8(TestFunction fn) {
96 // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
97 EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
98
99 // Surrogate code points
100 EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
101 EXPECT_FALSE(fn("\xED\xA0\x8F"));
102 EXPECT_FALSE(fn("\xED\xBF\xBF"));
103
104 // Overlong sequences
105 EXPECT_FALSE(fn("\xC0\x80")); // U+0000
106 EXPECT_FALSE(fn("\xC1\x80\xC1\x81")); // "AB"
107 EXPECT_FALSE(fn("\xE0\x80\x80")); // U+0000
108 EXPECT_FALSE(fn("\xE0\x82\x80")); // U+0080
109 EXPECT_FALSE(fn("\xE0\x9F\xBF")); // U+07FF
110 EXPECT_FALSE(fn("\xF0\x80\x80\x8D")); // U+000D
111 EXPECT_FALSE(fn("\xF0\x80\x82\x91")); // U+0091
112 EXPECT_FALSE(fn("\xF0\x80\xA0\x80")); // U+0800
113 EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF")); // U+FEFF (BOM)
114 EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF")); // U+003F
115 EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5")); // U+00A5
116
117 // Beyond U+10FFFF (the upper limit of Unicode codespace)
118 EXPECT_FALSE(fn("\xF4\x90\x80\x80")); // U+110000
119 EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF")); // 5 bytes
120 EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80")); // 6 bytes
121
122 // BOM in UTF-16(BE|LE)
123 EXPECT_FALSE(fn("\xFE\xFF"));
124 EXPECT_FALSE(fn("\xFF\xFE"));
125
126 // Strings in legacy encodings. We can certainly make up strings
127 // in a legacy encoding that are valid in UTF-8, but in real data,
128 // most of them are invalid as UTF-8.
129
130 // cafe with U+00E9 in ISO-8859-1
131 EXPECT_FALSE(fn("caf\xE9"));
132 // U+AC00, U+AC001 in EUC-KR
133 EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
134 // U+4F60 U+597D in Big5
135 EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
136 // "abc" with U+201[CD] in windows-125[0-8]
137 // clang-format off
138 EXPECT_FALSE(fn("\x93" "abc\x94"));
139 // clang-format on
140 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
141 EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
142 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
143 EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
144
145 // BOM in UTF-32(BE|LE)
146 using std::string_literals::operator""s;
147 const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
148 EXPECT_FALSE(fn(kUtf32BeBom));
149 const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
150 EXPECT_FALSE(fn(kUtf32LeBom));
151 }
152
153 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestNoncharacters(TestFunction fn,bool expected_result)154 void TestNoncharacters(TestFunction fn, bool expected_result) {
155 EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result); // U+FDD0
156 EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result); // U+FDDF
157 EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result); // U+FDEF
158 EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result); // U+FFFE
159 EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result); // U+FFFF
160 EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result); // U+01FFFE
161 EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result); // U+01FFFF
162 EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result); // U+02FFFE
163 EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result); // U+02FFFF
164 EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result); // U+03FFFE
165 EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result); // U+03FFFF
166 EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result); // U+04FFFE
167 EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result); // U+04FFFF
168 EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result); // U+05FFFE
169 EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result); // U+05FFFF
170 EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result); // U+06FFFE
171 EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result); // U+06FFFF
172 EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result); // U+07FFFE
173 EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result); // U+07FFFF
174 EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result); // U+08FFFE
175 EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result); // U+08FFFF
176 EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result); // U+09FFFE
177 EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result); // U+09FFFF
178 EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result); // U+0AFFFE
179 EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result); // U+0AFFFF
180 EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result); // U+0BFFFE
181 EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result); // U+0BFFFF
182 EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result); // U+0CFFFE
183 EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result); // U+0CFFFF
184 EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result); // U+0DFFFE
185 EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result); // U+0DFFFF
186 EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result); // U+0EFFFE
187 EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result); // U+0EFFFF
188 EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result); // U+0FFFFE
189 EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result); // U+0FFFFF
190 EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result); // U+10FFFE
191 EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result); // U+10FFFF
192 }
193
194 } // namespace
195
TEST(StringUtilTest,TruncateUTF8ToByteSize)196 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
197 std::string output;
198
199 // Empty strings and invalid byte_size arguments
200 EXPECT_FALSE(Truncated(std::string(), 0, &output));
201 EXPECT_EQ(output, "");
202 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
203 EXPECT_EQ(output, "");
204 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
205 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
206
207 // Testing the truncation of valid UTF8 correctly
208 EXPECT_TRUE(Truncated("abc", 2, &output));
209 EXPECT_EQ(output, "ab");
210 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
211 EXPECT_EQ(output.compare("\xc2\x81"), 0);
212 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
213 EXPECT_EQ(output.compare("\xc2\x81"), 0);
214 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
215 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
216
217 {
218 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
219 const std::string array_string(array, base::size(array));
220 EXPECT_TRUE(Truncated(array_string, 4, &output));
221 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
222 }
223
224 {
225 const char array[] = "\x00\xc2\x81\xc2\x81";
226 const std::string array_string(array, base::size(array));
227 EXPECT_TRUE(Truncated(array_string, 4, &output));
228 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
229 }
230
231 // Testing invalid UTF8
232 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
233 EXPECT_EQ(output.compare(""), 0);
234 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
235 EXPECT_EQ(output.compare(""), 0);
236 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
237 EXPECT_EQ(output.compare(""), 0);
238
239 // Testing invalid UTF8 mixed with valid UTF8
240 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
241 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
242 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
243 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
244 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
245 10, &output));
246 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
247 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
248 10, &output));
249 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
250 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
251 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
252
253 // Overlong sequences
254 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
255 EXPECT_EQ(output.compare(""), 0);
256 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
257 EXPECT_EQ(output.compare(""), 0);
258 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
259 EXPECT_EQ(output.compare(""), 0);
260 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
261 EXPECT_EQ(output.compare(""), 0);
262 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
263 EXPECT_EQ(output.compare(""), 0);
264 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
265 EXPECT_EQ(output.compare(""), 0);
266 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
267 EXPECT_EQ(output.compare(""), 0);
268 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
269 EXPECT_EQ(output.compare(""), 0);
270 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
271 EXPECT_EQ(output.compare(""), 0);
272 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
273 EXPECT_EQ(output.compare(""), 0);
274 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
275 EXPECT_EQ(output.compare(""), 0);
276
277 // Beyond U+10FFFF (the upper limit of Unicode codespace)
278 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
279 EXPECT_EQ(output.compare(""), 0);
280 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
281 EXPECT_EQ(output.compare(""), 0);
282 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
283 EXPECT_EQ(output.compare(""), 0);
284
285 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
286 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
287 EXPECT_EQ(output.compare(""), 0);
288 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
289 EXPECT_EQ(output.compare(""), 0);
290
291 {
292 const char array[] = "\x00\x00\xfe\xff";
293 const std::string array_string(array, base::size(array));
294 EXPECT_TRUE(Truncated(array_string, 4, &output));
295 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
296 }
297
298 // Variants on the previous test
299 {
300 const char array[] = "\xff\xfe\x00\x00";
301 const std::string array_string(array, 4);
302 EXPECT_FALSE(Truncated(array_string, 4, &output));
303 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
304 }
305 {
306 const char array[] = "\xff\x00\x00\xfe";
307 const std::string array_string(array, base::size(array));
308 EXPECT_TRUE(Truncated(array_string, 4, &output));
309 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
310 }
311
312 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
313 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
314 EXPECT_EQ(output.compare(""), 0);
315 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
316 EXPECT_EQ(output.compare(""), 0);
317 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
318 EXPECT_EQ(output.compare(""), 0);
319 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
320 EXPECT_EQ(output.compare(""), 0);
321 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
322 EXPECT_EQ(output.compare(""), 0);
323
324 // Strings in legacy encodings that are valid in UTF-8, but
325 // are invalid as UTF-8 in real data.
326 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
327 EXPECT_EQ(output.compare("caf"), 0);
328 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
329 EXPECT_EQ(output.compare(""), 0);
330 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
331 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
332 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
333 &output));
334 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
335
336 // Testing using the same string as input and output.
337 EXPECT_FALSE(Truncated(output, 4, &output));
338 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
339 EXPECT_TRUE(Truncated(output, 3, &output));
340 EXPECT_EQ(output.compare("\xa7\x41"), 0);
341
342 // "abc" with U+201[CD] in windows-125[0-8]
343 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
344 EXPECT_EQ(output.compare("\x93" "abc"), 0);
345
346 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
347 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
348 EXPECT_EQ(output.compare(""), 0);
349
350 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
351 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
352 EXPECT_EQ(output.compare(""), 0);
353 }
354
355 #if defined(WCHAR_T_IS_UTF16)
TEST(StringUtilTest,as_wcstr)356 TEST(StringUtilTest, as_wcstr) {
357 char16 rw_buffer[10] = {};
358 static_assert(
359 std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>::value,
360 "");
361 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
362
363 string16 rw_str(10, '\0');
364 static_assert(
365 std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_str))>::value, "");
366 EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
367
368 const char16 ro_buffer[10] = {};
369 static_assert(
370 std::is_same<const wchar_t*, decltype(as_wcstr(ro_buffer))>::value, "");
371 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
372
373 const string16 ro_str(10, '\0');
374 static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(ro_str))>::value,
375 "");
376 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
377
378 StringPiece16 piece = ro_buffer;
379 static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(piece))>::value,
380 "");
381 EXPECT_EQ(static_cast<const void*>(piece.data()), as_wcstr(piece));
382 }
383
TEST(StringUtilTest,as_u16cstr)384 TEST(StringUtilTest, as_u16cstr) {
385 wchar_t rw_buffer[10] = {};
386 static_assert(
387 std::is_same<char16*, decltype(as_writable_u16cstr(rw_buffer))>::value,
388 "");
389 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
390
391 std::wstring rw_str(10, '\0');
392 static_assert(
393 std::is_same<char16*, decltype(as_writable_u16cstr(rw_str))>::value, "");
394 EXPECT_EQ(static_cast<const void*>(rw_str.data()),
395 as_writable_u16cstr(rw_str));
396
397 const wchar_t ro_buffer[10] = {};
398 static_assert(
399 std::is_same<const char16*, decltype(as_u16cstr(ro_buffer))>::value, "");
400 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
401
402 const std::wstring ro_str(10, '\0');
403 static_assert(
404 std::is_same<const char16*, decltype(as_u16cstr(ro_str))>::value, "");
405 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
406
407 WStringPiece piece = ro_buffer;
408 static_assert(std::is_same<const char16*, decltype(as_u16cstr(piece))>::value,
409 "");
410 EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
411 }
412 #endif // defined(WCHAR_T_IS_UTF16)
413
TEST(StringUtilTest,TrimWhitespace)414 TEST(StringUtilTest, TrimWhitespace) {
415 string16 output; // Allow contents to carry over to next testcase
416 for (const auto& value : trim_cases) {
417 EXPECT_EQ(value.return_value,
418 TrimWhitespace(WideToUTF16(value.input), value.positions,
419 &output));
420 EXPECT_EQ(WideToUTF16(value.output), output);
421 }
422
423 // Test that TrimWhitespace() can take the same string for input and output
424 output = ASCIIToUTF16(" This is a test \r\n");
425 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
426 EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
427
428 // Once more, but with a string of whitespace
429 output = ASCIIToUTF16(" \r\n");
430 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
431 EXPECT_EQ(string16(), output);
432
433 std::string output_ascii;
434 for (const auto& value : trim_cases_ascii) {
435 EXPECT_EQ(value.return_value,
436 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
437 EXPECT_EQ(value.output, output_ascii);
438 }
439 }
440
441 static const struct collapse_case {
442 const wchar_t* input;
443 const bool trim;
444 const wchar_t* output;
445 } collapse_cases[] = {
446 {L" Google Video ", false, L"Google Video"},
447 {L"Google Video", false, L"Google Video"},
448 {L"", false, L""},
449 {L" ", false, L""},
450 {L"\t\rTest String\n", false, L"Test String"},
451 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
452 {L" Test \n \t String ", false, L"Test String"},
453 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
454 {L" Test String", false, L"Test String"},
455 {L"Test String ", false, L"Test String"},
456 {L"Test String", false, L"Test String"},
457 {L"", true, L""},
458 {L"\n", true, L""},
459 {L" \r ", true, L""},
460 {L"\nFoo", true, L"Foo"},
461 {L"\r Foo ", true, L"Foo"},
462 {L" Foo bar ", true, L"Foo bar"},
463 {L" \tFoo bar \n", true, L"Foo bar"},
464 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
465 };
466
TEST(StringUtilTest,CollapseWhitespace)467 TEST(StringUtilTest, CollapseWhitespace) {
468 for (const auto& value : collapse_cases) {
469 EXPECT_EQ(WideToUTF16(value.output),
470 CollapseWhitespace(WideToUTF16(value.input), value.trim));
471 }
472 }
473
474 static const struct collapse_case_ascii {
475 const char* input;
476 const bool trim;
477 const char* output;
478 } collapse_cases_ascii[] = {
479 {" Google Video ", false, "Google Video"},
480 {"Google Video", false, "Google Video"},
481 {"", false, ""},
482 {" ", false, ""},
483 {"\t\rTest String\n", false, "Test String"},
484 {" Test \n \t String ", false, "Test String"},
485 {" Test String", false, "Test String"},
486 {"Test String ", false, "Test String"},
487 {"Test String", false, "Test String"},
488 {"", true, ""},
489 {"\n", true, ""},
490 {" \r ", true, ""},
491 {"\nFoo", true, "Foo"},
492 {"\r Foo ", true, "Foo"},
493 {" Foo bar ", true, "Foo bar"},
494 {" \tFoo bar \n", true, "Foo bar"},
495 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
496 };
497
TEST(StringUtilTest,CollapseWhitespaceASCII)498 TEST(StringUtilTest, CollapseWhitespaceASCII) {
499 for (const auto& value : collapse_cases_ascii) {
500 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
501 }
502 }
503
TEST(StringUtilTest,IsStringUTF8)504 TEST(StringUtilTest, IsStringUTF8) {
505 {
506 SCOPED_TRACE("IsStringUTF8");
507 TestStructurallyValidUtf8(&IsStringUTF8);
508 TestStructurallyInvalidUtf8(&IsStringUTF8);
509 TestNoncharacters(&IsStringUTF8, false);
510 }
511
512 {
513 SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
514 TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
515 TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
516 TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
517 }
518 }
519
TEST(StringUtilTest,IsStringASCII)520 TEST(StringUtilTest, IsStringASCII) {
521 static char char_ascii[] =
522 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
523 static char16 char16_ascii[] = {
524 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
525 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
526 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
527 static std::wstring wchar_ascii(
528 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
529
530 // Test a variety of the fragment start positions and lengths in order to make
531 // sure that bit masking in IsStringASCII works correctly.
532 // Also, test that a non-ASCII character will be detected regardless of its
533 // position inside the string.
534 {
535 const size_t string_length = base::size(char_ascii) - 1;
536 for (size_t offset = 0; offset < 8; ++offset) {
537 for (size_t len = 0, max_len = string_length - offset; len < max_len;
538 ++len) {
539 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
540 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
541 char_ascii[char_pos] |= '\x80';
542 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
543 char_ascii[char_pos] &= ~'\x80';
544 }
545 }
546 }
547 }
548
549 {
550 const size_t string_length = base::size(char16_ascii) - 1;
551 for (size_t offset = 0; offset < 4; ++offset) {
552 for (size_t len = 0, max_len = string_length - offset; len < max_len;
553 ++len) {
554 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
555 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
556 char16_ascii[char_pos] |= 0x80;
557 EXPECT_FALSE(
558 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
559 char16_ascii[char_pos] &= ~0x80;
560 // Also test when the upper half is non-zero.
561 char16_ascii[char_pos] |= 0x100;
562 EXPECT_FALSE(
563 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
564 char16_ascii[char_pos] &= ~0x100;
565 }
566 }
567 }
568 }
569
570 #if defined(WCHAR_T_IS_UTF32)
571 {
572 const size_t string_length = wchar_ascii.length();
573 for (size_t len = 0; len < string_length; ++len) {
574 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
575 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
576 wchar_ascii[char_pos] |= 0x80;
577 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
578 wchar_ascii[char_pos] &= ~0x80;
579 wchar_ascii[char_pos] |= 0x100;
580 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
581 wchar_ascii[char_pos] &= ~0x100;
582 wchar_ascii[char_pos] |= 0x10000;
583 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
584 wchar_ascii[char_pos] &= ~0x10000;
585 }
586 }
587 }
588 #endif // WCHAR_T_IS_UTF32
589 }
590
TEST(StringUtilTest,ConvertASCII)591 TEST(StringUtilTest, ConvertASCII) {
592 static const char* const char_cases[] = {
593 "Google Video",
594 "Hello, world\n",
595 "0123ABCDwxyz \a\b\t\r\n!+,.~"
596 };
597
598 static const wchar_t* const wchar_cases[] = {
599 L"Google Video",
600 L"Hello, world\n",
601 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
602 };
603
604 for (size_t i = 0; i < base::size(char_cases); ++i) {
605 EXPECT_TRUE(IsStringASCII(char_cases[i]));
606 string16 utf16 = ASCIIToUTF16(char_cases[i]);
607 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
608
609 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
610 EXPECT_EQ(char_cases[i], ascii);
611 }
612
613 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
614
615 // Convert empty strings.
616 string16 empty16;
617 std::string empty;
618 EXPECT_EQ(empty, UTF16ToASCII(empty16));
619 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
620
621 // Convert strings with an embedded NUL character.
622 const char chars_with_nul[] = "test\0string";
623 const int length_with_nul = base::size(chars_with_nul) - 1;
624 std::string string_with_nul(chars_with_nul, length_with_nul);
625 string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
626 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
627 string16_with_nul.length());
628 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
629 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
630 narrow_with_nul.length());
631 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
632 }
633
TEST(StringUtilTest,ToLowerASCII)634 TEST(StringUtilTest, ToLowerASCII) {
635 EXPECT_EQ('c', ToLowerASCII('C'));
636 EXPECT_EQ('c', ToLowerASCII('c'));
637 EXPECT_EQ('2', ToLowerASCII('2'));
638
639 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
640 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
641 EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
642
643 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
644 EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
645 }
646
TEST(StringUtilTest,ToUpperASCII)647 TEST(StringUtilTest, ToUpperASCII) {
648 EXPECT_EQ('C', ToUpperASCII('C'));
649 EXPECT_EQ('C', ToUpperASCII('c'));
650 EXPECT_EQ('2', ToUpperASCII('2'));
651
652 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
653 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
654 EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
655
656 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
657 EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
658 }
659
TEST(StringUtilTest,LowerCaseEqualsASCII)660 TEST(StringUtilTest, LowerCaseEqualsASCII) {
661 static const struct {
662 const char* src_a;
663 const char* dst;
664 } lowercase_cases[] = {
665 { "FoO", "foo" },
666 { "foo", "foo" },
667 { "FOO", "foo" },
668 };
669
670 for (const auto& i : lowercase_cases) {
671 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(i.src_a), i.dst));
672 EXPECT_TRUE(LowerCaseEqualsASCII(i.src_a, i.dst));
673 }
674 }
675
TEST(StringUtilTest,FormatBytesUnlocalized)676 TEST(StringUtilTest, FormatBytesUnlocalized) {
677 static const struct {
678 int64_t bytes;
679 const char* expected;
680 } cases[] = {
681 // Expected behavior: we show one post-decimal digit when we have
682 // under two pre-decimal digits, except in cases where it makes no
683 // sense (zero or bytes).
684 // Since we switch units once we cross the 1000 mark, this keeps
685 // the display of file sizes or bytes consistently around three
686 // digits.
687 {0, "0 B"},
688 {512, "512 B"},
689 {1024*1024, "1.0 MB"},
690 {1024*1024*1024, "1.0 GB"},
691 {10LL*1024*1024*1024, "10.0 GB"},
692 {99LL*1024*1024*1024, "99.0 GB"},
693 {105LL*1024*1024*1024, "105 GB"},
694 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
695 {~(1LL << 63), "8192 PB"},
696
697 {99*1024 + 103, "99.1 kB"},
698 {1024*1024 + 103, "1.0 MB"},
699 {1024*1024 + 205 * 1024, "1.2 MB"},
700 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
701 {10LL*1024*1024*1024, "10.0 GB"},
702 {100LL*1024*1024*1024, "100 GB"},
703 };
704
705 for (const auto& i : cases) {
706 EXPECT_EQ(ASCIIToUTF16(i.expected), FormatBytesUnlocalized(i.bytes));
707 }
708 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)709 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
710 static const struct {
711 StringPiece str;
712 size_t start_offset;
713 StringPiece find_this;
714 StringPiece replace_with;
715 StringPiece expected;
716 } cases[] = {
717 {"aaa", 0, "", "b", "aaa"},
718 {"aaa", 1, "", "b", "aaa"},
719 {"aaa", 0, "a", "b", "bbb"},
720 {"aaa", 0, "aa", "b", "ba"},
721 {"aaa", 0, "aa", "bbb", "bbba"},
722 {"aaaaa", 0, "aa", "b", "bba"},
723 {"ababaaababa", 0, "aba", "", "baaba"},
724 {"ababaaababa", 0, "aba", "_", "_baa_ba"},
725 {"ababaaababa", 0, "aba", "__", "__baa__ba"},
726 {"ababaaababa", 0, "aba", "___", "___baa___ba"},
727 {"ababaaababa", 0, "aba", "____", "____baa____ba"},
728 {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
729 {"abb", 0, "ab", "a", "ab"},
730 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
731 {"Not found", 0, "x", "0", "Not found"},
732 {"Not found again", 5, "x", "0", "Not found again"},
733 {" Making it much longer ", 0, " ", "Four score and seven years ago",
734 "Four score and seven years agoMakingFour score and seven years agoit"
735 "Four score and seven years agomuchFour score and seven years agolonger"
736 "Four score and seven years ago"},
737 {" Making it much much much much shorter ", 0,
738 "Making it much much much much shorter", "", " "},
739 {"so much much much much much very much much much shorter", 0, "much ",
740 "", "so very shorter"},
741 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
742 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
743 {"abababab", 2, "ab", "c", "abccc"},
744 {"abababab", 1, "ab", "c", "abccc"},
745 {"abababab", 1, "aba", "c", "abcbab"},
746 };
747
748 // base::string16 variant
749 for (const auto& scenario : cases) {
750 string16 str = ASCIIToUTF16(scenario.str);
751 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
752 ASCIIToUTF16(scenario.find_this),
753 ASCIIToUTF16(scenario.replace_with));
754 EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
755 }
756
757 // std::string with insufficient capacity: expansion must realloc the buffer.
758 for (const auto& scenario : cases) {
759 std::string str = scenario.str.as_string();
760 str.shrink_to_fit(); // This is nonbinding, but it's the best we've got.
761 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
762 scenario.find_this, scenario.replace_with);
763 EXPECT_EQ(scenario.expected, str);
764 }
765
766 // std::string with ample capacity: should be possible to grow in-place.
767 for (const auto& scenario : cases) {
768 std::string str = scenario.str.as_string();
769 str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
770 2);
771
772 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
773 scenario.find_this, scenario.replace_with);
774 EXPECT_EQ(scenario.expected, str);
775 }
776 }
777
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)778 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
779 static const struct {
780 const char* str;
781 string16::size_type start_offset;
782 const char* find_this;
783 const char* replace_with;
784 const char* expected;
785 } cases[] = {
786 {"aaa", 0, "a", "b", "baa"},
787 {"abb", 0, "ab", "a", "ab"},
788 {"Removing some substrings inging", 0, "ing", "",
789 "Remov some substrings inging"},
790 {"Not found", 0, "x", "0", "Not found"},
791 {"Not found again", 5, "x", "0", "Not found again"},
792 {" Making it much longer ", 0, " ", "Four score and seven years ago",
793 "Four score and seven years agoMaking it much longer "},
794 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
795 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
796 {"abababab", 2, "ab", "c", "abcabab"},
797 };
798
799 for (const auto& i : cases) {
800 string16 str = ASCIIToUTF16(i.str);
801 ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
802 ASCIIToUTF16(i.find_this),
803 ASCIIToUTF16(i.replace_with));
804 EXPECT_EQ(ASCIIToUTF16(i.expected), str);
805 }
806 }
807
TEST(StringUtilTest,HexDigitToInt)808 TEST(StringUtilTest, HexDigitToInt) {
809 EXPECT_EQ(0, HexDigitToInt('0'));
810 EXPECT_EQ(1, HexDigitToInt('1'));
811 EXPECT_EQ(2, HexDigitToInt('2'));
812 EXPECT_EQ(3, HexDigitToInt('3'));
813 EXPECT_EQ(4, HexDigitToInt('4'));
814 EXPECT_EQ(5, HexDigitToInt('5'));
815 EXPECT_EQ(6, HexDigitToInt('6'));
816 EXPECT_EQ(7, HexDigitToInt('7'));
817 EXPECT_EQ(8, HexDigitToInt('8'));
818 EXPECT_EQ(9, HexDigitToInt('9'));
819 EXPECT_EQ(10, HexDigitToInt('A'));
820 EXPECT_EQ(11, HexDigitToInt('B'));
821 EXPECT_EQ(12, HexDigitToInt('C'));
822 EXPECT_EQ(13, HexDigitToInt('D'));
823 EXPECT_EQ(14, HexDigitToInt('E'));
824 EXPECT_EQ(15, HexDigitToInt('F'));
825
826 // Verify the lower case as well.
827 EXPECT_EQ(10, HexDigitToInt('a'));
828 EXPECT_EQ(11, HexDigitToInt('b'));
829 EXPECT_EQ(12, HexDigitToInt('c'));
830 EXPECT_EQ(13, HexDigitToInt('d'));
831 EXPECT_EQ(14, HexDigitToInt('e'));
832 EXPECT_EQ(15, HexDigitToInt('f'));
833 }
834
TEST(StringUtilTest,JoinString)835 TEST(StringUtilTest, JoinString) {
836 std::string separator(", ");
837 std::vector<std::string> parts;
838 EXPECT_EQ(std::string(), JoinString(parts, separator));
839
840 parts.push_back(std::string());
841 EXPECT_EQ(std::string(), JoinString(parts, separator));
842 parts.clear();
843
844 parts.push_back("a");
845 EXPECT_EQ("a", JoinString(parts, separator));
846
847 parts.push_back("b");
848 parts.push_back("c");
849 EXPECT_EQ("a, b, c", JoinString(parts, separator));
850
851 parts.push_back(std::string());
852 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
853 parts.push_back(" ");
854 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
855 }
856
TEST(StringUtilTest,JoinString16)857 TEST(StringUtilTest, JoinString16) {
858 string16 separator = ASCIIToUTF16(", ");
859 std::vector<string16> parts;
860 EXPECT_EQ(string16(), JoinString(parts, separator));
861
862 parts.push_back(string16());
863 EXPECT_EQ(string16(), JoinString(parts, separator));
864 parts.clear();
865
866 parts.push_back(ASCIIToUTF16("a"));
867 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
868
869 parts.push_back(ASCIIToUTF16("b"));
870 parts.push_back(ASCIIToUTF16("c"));
871 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
872
873 parts.push_back(ASCIIToUTF16(""));
874 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
875 parts.push_back(ASCIIToUTF16(" "));
876 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
877 }
878
TEST(StringUtilTest,JoinStringPiece)879 TEST(StringUtilTest, JoinStringPiece) {
880 std::string separator(", ");
881 std::vector<StringPiece> parts;
882 EXPECT_EQ(std::string(), JoinString(parts, separator));
883
884 // Test empty first part (https://crbug.com/698073).
885 parts.push_back(StringPiece());
886 EXPECT_EQ(std::string(), JoinString(parts, separator));
887 parts.clear();
888
889 parts.push_back("a");
890 EXPECT_EQ("a", JoinString(parts, separator));
891
892 parts.push_back("b");
893 parts.push_back("c");
894 EXPECT_EQ("a, b, c", JoinString(parts, separator));
895
896 parts.push_back(StringPiece());
897 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
898 parts.push_back(" ");
899 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
900 }
901
TEST(StringUtilTest,JoinStringPiece16)902 TEST(StringUtilTest, JoinStringPiece16) {
903 string16 separator = ASCIIToUTF16(", ");
904 std::vector<StringPiece16> parts;
905 EXPECT_EQ(string16(), JoinString(parts, separator));
906
907 // Test empty first part (https://crbug.com/698073).
908 parts.push_back(StringPiece16());
909 EXPECT_EQ(string16(), JoinString(parts, separator));
910 parts.clear();
911
912 const string16 kA = ASCIIToUTF16("a");
913 parts.push_back(kA);
914 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
915
916 const string16 kB = ASCIIToUTF16("b");
917 parts.push_back(kB);
918 const string16 kC = ASCIIToUTF16("c");
919 parts.push_back(kC);
920 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
921
922 parts.push_back(StringPiece16());
923 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
924 const string16 kSpace = ASCIIToUTF16(" ");
925 parts.push_back(kSpace);
926 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
927 }
928
TEST(StringUtilTest,JoinStringInitializerList)929 TEST(StringUtilTest, JoinStringInitializerList) {
930 std::string separator(", ");
931 EXPECT_EQ(std::string(), JoinString({}, separator));
932
933 // Test empty first part (https://crbug.com/698073).
934 EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
935
936 // With const char*s.
937 EXPECT_EQ("a", JoinString({"a"}, separator));
938 EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
939 EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
940 EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
941
942 // With std::strings.
943 const std::string kA = "a";
944 const std::string kB = "b";
945 EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
946
947 // With StringPieces.
948 const StringPiece kPieceA = kA;
949 const StringPiece kPieceB = kB;
950 EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
951 }
952
TEST(StringUtilTest,JoinStringInitializerList16)953 TEST(StringUtilTest, JoinStringInitializerList16) {
954 string16 separator = ASCIIToUTF16(", ");
955 EXPECT_EQ(string16(), JoinString({}, separator));
956
957 // Test empty first part (https://crbug.com/698073).
958 EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
959
960 // With string16s.
961 const string16 kA = ASCIIToUTF16("a");
962 EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
963
964 const string16 kB = ASCIIToUTF16("b");
965 const string16 kC = ASCIIToUTF16("c");
966 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
967
968 EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
969 JoinString({kA, kB, kC, StringPiece16()}, separator));
970 const string16 kSpace = ASCIIToUTF16(" ");
971 EXPECT_EQ(
972 ASCIIToUTF16("a|b|c|| "),
973 JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
974
975 // With StringPiece16s.
976 const StringPiece16 kPieceA = kA;
977 const StringPiece16 kPieceB = kB;
978 EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
979 }
980
TEST(StringUtilTest,StartsWith)981 TEST(StringUtilTest, StartsWith) {
982 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
983 base::CompareCase::SENSITIVE));
984 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
985 base::CompareCase::SENSITIVE));
986 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
987 base::CompareCase::INSENSITIVE_ASCII));
988 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
989 base::CompareCase::INSENSITIVE_ASCII));
990 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
991 EXPECT_FALSE(StartsWith("java", "javascript",
992 base::CompareCase::INSENSITIVE_ASCII));
993 EXPECT_FALSE(StartsWith(std::string(), "javascript",
994 base::CompareCase::INSENSITIVE_ASCII));
995 EXPECT_FALSE(StartsWith(std::string(), "javascript",
996 base::CompareCase::SENSITIVE));
997 EXPECT_TRUE(StartsWith("java", std::string(),
998 base::CompareCase::INSENSITIVE_ASCII));
999 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
1000
1001 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
1002 ASCIIToUTF16("javascript"),
1003 base::CompareCase::SENSITIVE));
1004 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
1005 ASCIIToUTF16("javascript"),
1006 base::CompareCase::SENSITIVE));
1007 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
1008 ASCIIToUTF16("javascript"),
1009 base::CompareCase::INSENSITIVE_ASCII));
1010 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
1011 ASCIIToUTF16("javascript"),
1012 base::CompareCase::INSENSITIVE_ASCII));
1013 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
1014 base::CompareCase::SENSITIVE));
1015 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
1016 base::CompareCase::INSENSITIVE_ASCII));
1017 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
1018 base::CompareCase::INSENSITIVE_ASCII));
1019 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
1020 base::CompareCase::SENSITIVE));
1021 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
1022 base::CompareCase::INSENSITIVE_ASCII));
1023 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
1024 base::CompareCase::SENSITIVE));
1025 }
1026
TEST(StringUtilTest,EndsWith)1027 TEST(StringUtilTest, EndsWith) {
1028 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
1029 base::CompareCase::SENSITIVE));
1030 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
1031 base::CompareCase::SENSITIVE));
1032 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
1033 base::CompareCase::INSENSITIVE_ASCII));
1034 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
1035 base::CompareCase::INSENSITIVE_ASCII));
1036 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
1037 base::CompareCase::SENSITIVE));
1038 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
1039 base::CompareCase::INSENSITIVE_ASCII));
1040 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
1041 base::CompareCase::SENSITIVE));
1042 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
1043 base::CompareCase::INSENSITIVE_ASCII));
1044 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
1045 base::CompareCase::INSENSITIVE_ASCII));
1046 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
1047 base::CompareCase::SENSITIVE));
1048 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
1049 base::CompareCase::INSENSITIVE_ASCII));
1050 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
1051 base::CompareCase::SENSITIVE));
1052 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
1053 base::CompareCase::INSENSITIVE_ASCII));
1054 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
1055 base::CompareCase::SENSITIVE));
1056 EXPECT_TRUE(
1057 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
1058 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
1059 }
1060
TEST(StringUtilTest,GetStringFWithOffsets)1061 TEST(StringUtilTest, GetStringFWithOffsets) {
1062 std::vector<string16> subst;
1063 subst.push_back(ASCIIToUTF16("1"));
1064 subst.push_back(ASCIIToUTF16("2"));
1065 std::vector<size_t> offsets;
1066
1067 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
1068 subst,
1069 &offsets);
1070 EXPECT_EQ(2U, offsets.size());
1071 EXPECT_EQ(7U, offsets[0]);
1072 EXPECT_EQ(25U, offsets[1]);
1073 offsets.clear();
1074
1075 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
1076 subst,
1077 &offsets);
1078 EXPECT_EQ(2U, offsets.size());
1079 EXPECT_EQ(25U, offsets[0]);
1080 EXPECT_EQ(7U, offsets[1]);
1081 offsets.clear();
1082 }
1083
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)1084 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1085 // Test whether replacestringplaceholders works as expected when there
1086 // are fewer inputs than outputs.
1087 std::vector<string16> subst;
1088 subst.push_back(ASCIIToUTF16("9a"));
1089 subst.push_back(ASCIIToUTF16("8b"));
1090 subst.push_back(ASCIIToUTF16("7c"));
1091
1092 string16 formatted =
1093 ReplaceStringPlaceholders(
1094 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
1095
1096 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
1097 }
1098
TEST(StringUtilTest,ReplaceStringPlaceholders)1099 TEST(StringUtilTest, ReplaceStringPlaceholders) {
1100 std::vector<string16> subst;
1101 subst.push_back(ASCIIToUTF16("9a"));
1102 subst.push_back(ASCIIToUTF16("8b"));
1103 subst.push_back(ASCIIToUTF16("7c"));
1104 subst.push_back(ASCIIToUTF16("6d"));
1105 subst.push_back(ASCIIToUTF16("5e"));
1106 subst.push_back(ASCIIToUTF16("4f"));
1107 subst.push_back(ASCIIToUTF16("3g"));
1108 subst.push_back(ASCIIToUTF16("2h"));
1109 subst.push_back(ASCIIToUTF16("1i"));
1110
1111 string16 formatted =
1112 ReplaceStringPlaceholders(
1113 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
1114
1115 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
1116 }
1117
TEST(StringUtilTest,ReplaceStringPlaceholdersNetExpansionWithContraction)1118 TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
1119 // In this test, some of the substitutions are shorter than the placeholders,
1120 // but overall the string gets longer.
1121 std::vector<string16> subst;
1122 subst.push_back(ASCIIToUTF16("9a____"));
1123 subst.push_back(ASCIIToUTF16("B"));
1124 subst.push_back(ASCIIToUTF16("7c___"));
1125 subst.push_back(ASCIIToUTF16("d"));
1126 subst.push_back(ASCIIToUTF16("5e____"));
1127 subst.push_back(ASCIIToUTF16("F"));
1128 subst.push_back(ASCIIToUTF16("3g___"));
1129 subst.push_back(ASCIIToUTF16("h"));
1130 subst.push_back(ASCIIToUTF16("1i_____"));
1131
1132 string16 original = ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i");
1133 string16 expected =
1134 ASCIIToUTF16("9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i");
1135
1136 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1137
1138 std::vector<size_t> offsets;
1139 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1140 std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1141 EXPECT_EQ(offsets.size(), subst.size());
1142 EXPECT_EQ(expected_offsets, offsets);
1143 for (size_t i = 0; i < offsets.size(); i++) {
1144 EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1145 subst[i]);
1146 }
1147 }
1148
TEST(StringUtilTest,ReplaceStringPlaceholdersNetContractionWithExpansion)1149 TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1150 // In this test, some of the substitutions are longer than the placeholders,
1151 // but overall the string gets smaller. Additionally, the placeholders appear
1152 // in a permuted order.
1153 std::vector<string16> subst;
1154 subst.push_back(ASCIIToUTF16("z"));
1155 subst.push_back(ASCIIToUTF16("y"));
1156 subst.push_back(ASCIIToUTF16("XYZW"));
1157 subst.push_back(ASCIIToUTF16("x"));
1158 subst.push_back(ASCIIToUTF16("w"));
1159
1160 string16 formatted =
1161 ReplaceStringPlaceholders(ASCIIToUTF16("$3_$4$2$1$5"), subst, nullptr);
1162
1163 EXPECT_EQ(ASCIIToUTF16("XYZW_xyzw"), formatted);
1164 }
1165
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)1166 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
1167 std::vector<string16> subst;
1168 subst.push_back(ASCIIToUTF16("1a"));
1169 string16 formatted =
1170 ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
1171 EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
1172 }
1173
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)1174 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
1175 std::vector<string16> subst;
1176 subst.push_back(ASCIIToUTF16("1a"));
1177 string16 formatted =
1178 ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
1179 EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
1180 }
1181
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)1182 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1183 std::vector<std::string> subst;
1184 subst.push_back("9a");
1185 subst.push_back("8b");
1186 subst.push_back("7c");
1187 subst.push_back("6d");
1188 subst.push_back("5e");
1189 subst.push_back("4f");
1190 subst.push_back("3g");
1191 subst.push_back("2h");
1192 subst.push_back("1i");
1193
1194 std::string formatted =
1195 ReplaceStringPlaceholders(
1196 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1197
1198 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1199 }
1200
TEST(StringUtilTest,StdStringReplaceStringPlaceholdersMultipleMatches)1201 TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1202 std::vector<std::string> subst;
1203 subst.push_back("4"); // Referenced twice.
1204 subst.push_back("?"); // Unreferenced.
1205 subst.push_back("!"); // Unreferenced.
1206 subst.push_back("16"); // Referenced once.
1207
1208 std::string original = "$1 * $1 == $4";
1209 std::string expected = "4 * 4 == 16";
1210 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1211 std::vector<size_t> offsets;
1212 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1213 std::vector<size_t> expected_offsets = {0, 4, 9};
1214 EXPECT_EQ(expected_offsets, offsets);
1215 }
1216
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)1217 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1218 std::vector<std::string> subst;
1219 subst.push_back("a");
1220 subst.push_back("b");
1221 subst.push_back("c");
1222 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
1223 "$1 $$2 $$$3");
1224 }
1225
TEST(StringUtilTest,LcpyTest)1226 TEST(StringUtilTest, LcpyTest) {
1227 // Test the normal case where we fit in our buffer.
1228 {
1229 char dst[10];
1230 wchar_t wdst[10];
1231 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1232 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1233 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1234 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1235 }
1236
1237 // Test dst_size == 0, nothing should be written to |dst| and we should
1238 // have the equivalent of strlen(src).
1239 {
1240 char dst[2] = {1, 2};
1241 wchar_t wdst[2] = {1, 2};
1242 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1243 EXPECT_EQ(1, dst[0]);
1244 EXPECT_EQ(2, dst[1]);
1245 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1246 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1247 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1248 }
1249
1250 // Test the case were we _just_ competely fit including the null.
1251 {
1252 char dst[8];
1253 wchar_t wdst[8];
1254 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1255 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1256 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1257 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1258 }
1259
1260 // Test the case were we we are one smaller, so we can't fit the null.
1261 {
1262 char dst[7];
1263 wchar_t wdst[7];
1264 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1265 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1266 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1267 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1268 }
1269
1270 // Test the case were we are just too small.
1271 {
1272 char dst[3];
1273 wchar_t wdst[3];
1274 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1275 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1276 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1277 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1278 }
1279 }
1280
TEST(StringUtilTest,WprintfFormatPortabilityTest)1281 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1282 static const struct {
1283 const wchar_t* input;
1284 bool portable;
1285 } cases[] = {
1286 { L"%ls", true },
1287 { L"%s", false },
1288 { L"%S", false },
1289 { L"%lS", false },
1290 { L"Hello, %s", false },
1291 { L"%lc", true },
1292 { L"%c", false },
1293 { L"%C", false },
1294 { L"%lC", false },
1295 { L"%ls %s", false },
1296 { L"%s %ls", false },
1297 { L"%s %ls %s", false },
1298 { L"%f", true },
1299 { L"%f %F", false },
1300 { L"%d %D", false },
1301 { L"%o %O", false },
1302 { L"%u %U", false },
1303 { L"%f %d %o %u", true },
1304 { L"%-8d (%02.1f%)", true },
1305 { L"% 10s", false },
1306 { L"% 10ls", true }
1307 };
1308 for (const auto& i : cases)
1309 EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
1310 }
1311
TEST(StringUtilTest,RemoveChars)1312 TEST(StringUtilTest, RemoveChars) {
1313 const char kRemoveChars[] = "-/+*";
1314 std::string input = "A-+bc/d!*";
1315 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1316 EXPECT_EQ("Abcd!", input);
1317
1318 // No characters match kRemoveChars.
1319 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1320 EXPECT_EQ("Abcd!", input);
1321
1322 // Empty string.
1323 input.clear();
1324 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1325 EXPECT_EQ(std::string(), input);
1326 }
1327
TEST(StringUtilTest,ReplaceChars)1328 TEST(StringUtilTest, ReplaceChars) {
1329 struct TestData {
1330 const char* input;
1331 const char* replace_chars;
1332 const char* replace_with;
1333 const char* output;
1334 bool result;
1335 } cases[] = {
1336 {"", "", "", "", false},
1337 {"t", "t", "t", "t", true},
1338 {"a", "b", "c", "a", false},
1339 {"b", "b", "c", "c", true},
1340 {"bob", "b", "p", "pop", true},
1341 {"bob", "o", "i", "bib", true},
1342 {"test", "", "", "test", false},
1343 {"test", "", "!", "test", false},
1344 {"test", "z", "!", "test", false},
1345 {"test", "e", "!", "t!st", true},
1346 {"test", "e", "!?", "t!?st", true},
1347 {"test", "ez", "!", "t!st", true},
1348 {"test", "zed", "!?", "t!?st", true},
1349 {"test", "t", "!?", "!?es!?", true},
1350 {"test", "et", "!>", "!>!>s!>", true},
1351 {"test", "zest", "!", "!!!!", true},
1352 {"test", "szt", "!", "!e!!", true},
1353 {"test", "t", "test", "testestest", true},
1354 {"tetst", "t", "test", "testeteststest", true},
1355 {"ttttttt", "t", "-", "-------", true},
1356 {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1357 {"xxxxxxxxxx", "x", "", "", true},
1358 {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1359 {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1360 {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1361 {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1362 {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1363 {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
1364 };
1365
1366 for (const TestData& scenario : cases) {
1367 // Test with separate output and input vars.
1368 std::string output;
1369 bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1370 scenario.replace_with, &output);
1371 EXPECT_EQ(scenario.result, result) << scenario.input;
1372 EXPECT_EQ(scenario.output, output);
1373 }
1374
1375 for (const TestData& scenario : cases) {
1376 // Test with an input/output var of limited capacity.
1377 std::string input_output = scenario.input;
1378 input_output.shrink_to_fit();
1379 bool result = ReplaceChars(input_output, scenario.replace_chars,
1380 scenario.replace_with, &input_output);
1381 EXPECT_EQ(scenario.result, result) << scenario.input;
1382 EXPECT_EQ(scenario.output, input_output);
1383 }
1384
1385 for (const TestData& scenario : cases) {
1386 // Test with an input/output var of ample capacity; should
1387 // not realloc.
1388 std::string input_output = scenario.input;
1389 input_output.reserve(strlen(scenario.output) * 2);
1390 const void* original_buffer = input_output.data();
1391 bool result = ReplaceChars(input_output, scenario.replace_chars,
1392 scenario.replace_with, &input_output);
1393 EXPECT_EQ(scenario.result, result) << scenario.input;
1394 EXPECT_EQ(scenario.output, input_output);
1395 EXPECT_EQ(original_buffer, input_output.data());
1396 }
1397 }
1398
TEST(StringUtilTest,ContainsOnlyChars)1399 TEST(StringUtilTest, ContainsOnlyChars) {
1400 // Providing an empty list of characters should return false but for the empty
1401 // string.
1402 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1403 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1404
1405 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1406 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1407 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1408 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1409 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1410
1411 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1412 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1413 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1414 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1415 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1416 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1417
1418 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1419 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1420 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1421 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
1422 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1423 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
1424 kWhitespaceUTF16));
1425 }
1426
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1427 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1428 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1429 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1430
1431 // Differing lengths.
1432 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1433 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1434
1435 // Differing values.
1436 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1437 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1438 }
1439
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1440 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1441 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1442 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1443 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1444 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1445 }
1446
TEST(StringUtilTest,IsUnicodeWhitespace)1447 TEST(StringUtilTest, IsUnicodeWhitespace) {
1448 // NOT unicode white space.
1449 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1450 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1451 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1452 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1453 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1454 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1455
1456 // Actual unicode whitespace.
1457 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1458 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1459 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1460 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1461 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1462 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1463 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1464 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1465 }
1466
1467 class WriteIntoTest : public testing::Test {
1468 protected:
WritesCorrectly(size_t num_chars)1469 static void WritesCorrectly(size_t num_chars) {
1470 std::string buffer;
1471 char kOriginal[] = "supercali";
1472 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1473 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1474 // string at the first \0.
1475 EXPECT_EQ(
1476 std::string(kOriginal, std::min(num_chars, base::size(kOriginal) - 1)),
1477 std::string(buffer.c_str()));
1478 EXPECT_EQ(num_chars, buffer.size());
1479 }
1480 };
1481
TEST_F(WriteIntoTest,WriteInto)1482 TEST_F(WriteIntoTest, WriteInto) {
1483 // Validate that WriteInto reserves enough space and
1484 // sizes a string correctly.
1485 WritesCorrectly(1);
1486 WritesCorrectly(2);
1487 WritesCorrectly(5000);
1488
1489 // Validate that WriteInto handles 0-length strings
1490 std::string empty;
1491 const char kOriginal[] = "original";
1492 strncpy(WriteInto(&empty, 1), kOriginal, 0);
1493 EXPECT_STREQ("", empty.c_str());
1494 EXPECT_EQ(0u, empty.size());
1495
1496 // Validate that WriteInto doesn't modify other strings
1497 // when using a Copy-on-Write implementation.
1498 const char kLive[] = "live";
1499 const char kDead[] = "dead";
1500 const std::string live = kLive;
1501 std::string dead = live;
1502 strncpy(WriteInto(&dead, 5), kDead, 4);
1503 EXPECT_EQ(kDead, dead);
1504 EXPECT_EQ(4u, dead.size());
1505 EXPECT_EQ(kLive, live);
1506 EXPECT_EQ(4u, live.size());
1507 }
1508
1509 } // namespace base
1510