1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 
12 #include <algorithm>
13 #include <type_traits>
14 
15 #include "base/stl_util.h"
16 #include "base/strings/string16.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "build/build_config.h"
19 #include "testing/gmock/include/gmock/gmock.h"
20 #include "testing/gtest/include/gtest/gtest.h"
21 
22 using ::testing::ElementsAre;
23 
24 namespace base {
25 
26 static const struct trim_case {
27   const wchar_t* input;
28   const TrimPositions positions;
29   const wchar_t* output;
30   const TrimPositions return_value;
31 } trim_cases[] = {
32   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
33   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
34   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
35   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
36   {L"", TRIM_ALL, L"", TRIM_NONE},
37   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
38   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
39   {L"  ", TRIM_ALL, L"", TRIM_ALL},
40   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
41   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
42 };
43 
44 static const struct trim_case_ascii {
45   const char* input;
46   const TrimPositions positions;
47   const char* output;
48   const TrimPositions return_value;
49 } trim_cases_ascii[] = {
50   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
51   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
52   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
53   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
54   {"", TRIM_ALL, "", TRIM_NONE},
55   {"  ", TRIM_LEADING, "", TRIM_LEADING},
56   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
57   {"  ", TRIM_ALL, "", TRIM_ALL},
58   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
59 };
60 
61 namespace {
62 
63 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)64 bool Truncated(const std::string& input,
65                const size_t byte_size,
66                std::string* output) {
67     size_t prev = input.length();
68     TruncateUTF8ToByteSize(input, byte_size, output);
69     return prev != output->length();
70 }
71 
72 using TestFunction = bool (*)(StringPiece str);
73 
74 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestStructurallyValidUtf8(TestFunction fn)75 void TestStructurallyValidUtf8(TestFunction fn) {
76   EXPECT_TRUE(fn("abc"));
77   EXPECT_TRUE(fn("\xC2\x81"));
78   EXPECT_TRUE(fn("\xE1\x80\xBF"));
79   EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
80   EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
81   EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
82 
83   // U+FEFF used as UTF-8 BOM.
84   // clang-format off
85   EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
86   // clang-format on
87 
88   // Embedded nulls in canonical UTF-8 representation.
89   using std::string_literals::operator""s;
90   const std::string kEmbeddedNull = "embedded\0null"s;
91   EXPECT_TRUE(fn(kEmbeddedNull));
92 }
93 
94 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestStructurallyInvalidUtf8(TestFunction fn)95 void TestStructurallyInvalidUtf8(TestFunction fn) {
96   // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
97   EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
98 
99   // Surrogate code points
100   EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
101   EXPECT_FALSE(fn("\xED\xA0\x8F"));
102   EXPECT_FALSE(fn("\xED\xBF\xBF"));
103 
104   // Overlong sequences
105   EXPECT_FALSE(fn("\xC0\x80"));                  // U+0000
106   EXPECT_FALSE(fn("\xC1\x80\xC1\x81"));          // "AB"
107   EXPECT_FALSE(fn("\xE0\x80\x80"));              // U+0000
108   EXPECT_FALSE(fn("\xE0\x82\x80"));              // U+0080
109   EXPECT_FALSE(fn("\xE0\x9F\xBF"));              // U+07FF
110   EXPECT_FALSE(fn("\xF0\x80\x80\x8D"));          // U+000D
111   EXPECT_FALSE(fn("\xF0\x80\x82\x91"));          // U+0091
112   EXPECT_FALSE(fn("\xF0\x80\xA0\x80"));          // U+0800
113   EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF"));          // U+FEFF (BOM)
114   EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF"));      // U+003F
115   EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5"));  // U+00A5
116 
117   // Beyond U+10FFFF (the upper limit of Unicode codespace)
118   EXPECT_FALSE(fn("\xF4\x90\x80\x80"));          // U+110000
119   EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF"));      // 5 bytes
120   EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80"));  // 6 bytes
121 
122   // BOM in UTF-16(BE|LE)
123   EXPECT_FALSE(fn("\xFE\xFF"));
124   EXPECT_FALSE(fn("\xFF\xFE"));
125 
126   // Strings in legacy encodings. We can certainly make up strings
127   // in a legacy encoding that are valid in UTF-8, but in real data,
128   // most of them are invalid as UTF-8.
129 
130   // cafe with U+00E9 in ISO-8859-1
131   EXPECT_FALSE(fn("caf\xE9"));
132   // U+AC00, U+AC001 in EUC-KR
133   EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
134   // U+4F60 U+597D in Big5
135   EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
136   // "abc" with U+201[CD] in windows-125[0-8]
137   // clang-format off
138   EXPECT_FALSE(fn("\x93" "abc\x94"));
139   // clang-format on
140   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
141   EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
142   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
143   EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
144 
145   // BOM in UTF-32(BE|LE)
146   using std::string_literals::operator""s;
147   const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
148   EXPECT_FALSE(fn(kUtf32BeBom));
149   const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
150   EXPECT_FALSE(fn(kUtf32LeBom));
151 }
152 
153 // Helper used to test IsStringUTF8{,AllowingNoncharacters}.
TestNoncharacters(TestFunction fn,bool expected_result)154 void TestNoncharacters(TestFunction fn, bool expected_result) {
155   EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result);      // U+FDD0
156   EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result);      // U+FDDF
157   EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result);      // U+FDEF
158   EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result);      // U+FFFE
159   EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result);      // U+FFFF
160   EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result);  // U+01FFFE
161   EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result);  // U+01FFFF
162   EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result);  // U+02FFFE
163   EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result);  // U+02FFFF
164   EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result);  // U+03FFFE
165   EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result);  // U+03FFFF
166   EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result);  // U+04FFFE
167   EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result);  // U+04FFFF
168   EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result);  // U+05FFFE
169   EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result);  // U+05FFFF
170   EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result);  // U+06FFFE
171   EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result);  // U+06FFFF
172   EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result);  // U+07FFFE
173   EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result);  // U+07FFFF
174   EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result);  // U+08FFFE
175   EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result);  // U+08FFFF
176   EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result);  // U+09FFFE
177   EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result);  // U+09FFFF
178   EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result);  // U+0AFFFE
179   EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result);  // U+0AFFFF
180   EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result);  // U+0BFFFE
181   EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result);  // U+0BFFFF
182   EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result);  // U+0CFFFE
183   EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result);  // U+0CFFFF
184   EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result);  // U+0DFFFE
185   EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result);  // U+0DFFFF
186   EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result);  // U+0EFFFE
187   EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result);  // U+0EFFFF
188   EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result);  // U+0FFFFE
189   EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result);  // U+0FFFFF
190   EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result);  // U+10FFFE
191   EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result);  // U+10FFFF
192 }
193 
194 }  // namespace
195 
TEST(StringUtilTest,TruncateUTF8ToByteSize)196 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
197   std::string output;
198 
199   // Empty strings and invalid byte_size arguments
200   EXPECT_FALSE(Truncated(std::string(), 0, &output));
201   EXPECT_EQ(output, "");
202   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
203   EXPECT_EQ(output, "");
204   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
205   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
206 
207   // Testing the truncation of valid UTF8 correctly
208   EXPECT_TRUE(Truncated("abc", 2, &output));
209   EXPECT_EQ(output, "ab");
210   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
211   EXPECT_EQ(output.compare("\xc2\x81"), 0);
212   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
213   EXPECT_EQ(output.compare("\xc2\x81"), 0);
214   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
215   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
216 
217   {
218     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
219     const std::string array_string(array, base::size(array));
220     EXPECT_TRUE(Truncated(array_string, 4, &output));
221     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
222   }
223 
224   {
225     const char array[] = "\x00\xc2\x81\xc2\x81";
226     const std::string array_string(array, base::size(array));
227     EXPECT_TRUE(Truncated(array_string, 4, &output));
228     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
229   }
230 
231   // Testing invalid UTF8
232   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
233   EXPECT_EQ(output.compare(""), 0);
234   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
235   EXPECT_EQ(output.compare(""), 0);
236   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
237   EXPECT_EQ(output.compare(""), 0);
238 
239   // Testing invalid UTF8 mixed with valid UTF8
240   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
241   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
242   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
243   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
244   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
245               10, &output));
246   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
247   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
248               10, &output));
249   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
250   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
251   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
252 
253   // Overlong sequences
254   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
255   EXPECT_EQ(output.compare(""), 0);
256   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
257   EXPECT_EQ(output.compare(""), 0);
258   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
259   EXPECT_EQ(output.compare(""), 0);
260   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
261   EXPECT_EQ(output.compare(""), 0);
262   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
263   EXPECT_EQ(output.compare(""), 0);
264   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
265   EXPECT_EQ(output.compare(""), 0);
266   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
267   EXPECT_EQ(output.compare(""), 0);
268   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
269   EXPECT_EQ(output.compare(""), 0);
270   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
271   EXPECT_EQ(output.compare(""), 0);
272   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
273   EXPECT_EQ(output.compare(""), 0);
274   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
275   EXPECT_EQ(output.compare(""), 0);
276 
277   // Beyond U+10FFFF (the upper limit of Unicode codespace)
278   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
279   EXPECT_EQ(output.compare(""), 0);
280   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
281   EXPECT_EQ(output.compare(""), 0);
282   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
283   EXPECT_EQ(output.compare(""), 0);
284 
285   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
286   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
287   EXPECT_EQ(output.compare(""), 0);
288   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
289   EXPECT_EQ(output.compare(""), 0);
290 
291   {
292     const char array[] = "\x00\x00\xfe\xff";
293     const std::string array_string(array, base::size(array));
294     EXPECT_TRUE(Truncated(array_string, 4, &output));
295     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
296   }
297 
298   // Variants on the previous test
299   {
300     const char array[] = "\xff\xfe\x00\x00";
301     const std::string array_string(array, 4);
302     EXPECT_FALSE(Truncated(array_string, 4, &output));
303     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
304   }
305   {
306     const char array[] = "\xff\x00\x00\xfe";
307     const std::string array_string(array, base::size(array));
308     EXPECT_TRUE(Truncated(array_string, 4, &output));
309     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
310   }
311 
312   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
313   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
314   EXPECT_EQ(output.compare(""), 0);
315   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
316   EXPECT_EQ(output.compare(""), 0);
317   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
318   EXPECT_EQ(output.compare(""), 0);
319   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
320   EXPECT_EQ(output.compare(""), 0);
321   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
322   EXPECT_EQ(output.compare(""), 0);
323 
324   // Strings in legacy encodings that are valid in UTF-8, but
325   // are invalid as UTF-8 in real data.
326   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
327   EXPECT_EQ(output.compare("caf"), 0);
328   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
329   EXPECT_EQ(output.compare(""), 0);
330   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
331   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
332   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
333               &output));
334   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
335 
336   // Testing using the same string as input and output.
337   EXPECT_FALSE(Truncated(output, 4, &output));
338   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
339   EXPECT_TRUE(Truncated(output, 3, &output));
340   EXPECT_EQ(output.compare("\xa7\x41"), 0);
341 
342   // "abc" with U+201[CD] in windows-125[0-8]
343   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
344   EXPECT_EQ(output.compare("\x93" "abc"), 0);
345 
346   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
347   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
348   EXPECT_EQ(output.compare(""), 0);
349 
350   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
351   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
352   EXPECT_EQ(output.compare(""), 0);
353 }
354 
355 #if defined(WCHAR_T_IS_UTF16)
TEST(StringUtilTest,as_wcstr)356 TEST(StringUtilTest, as_wcstr) {
357   char16 rw_buffer[10] = {};
358   static_assert(
359       std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>::value,
360       "");
361   EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
362 
363   string16 rw_str(10, '\0');
364   static_assert(
365       std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_str))>::value, "");
366   EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
367 
368   const char16 ro_buffer[10] = {};
369   static_assert(
370       std::is_same<const wchar_t*, decltype(as_wcstr(ro_buffer))>::value, "");
371   EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
372 
373   const string16 ro_str(10, '\0');
374   static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(ro_str))>::value,
375                 "");
376   EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
377 
378   StringPiece16 piece = ro_buffer;
379   static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(piece))>::value,
380                 "");
381   EXPECT_EQ(static_cast<const void*>(piece.data()), as_wcstr(piece));
382 }
383 
TEST(StringUtilTest,as_u16cstr)384 TEST(StringUtilTest, as_u16cstr) {
385   wchar_t rw_buffer[10] = {};
386   static_assert(
387       std::is_same<char16*, decltype(as_writable_u16cstr(rw_buffer))>::value,
388       "");
389   EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
390 
391   std::wstring rw_str(10, '\0');
392   static_assert(
393       std::is_same<char16*, decltype(as_writable_u16cstr(rw_str))>::value, "");
394   EXPECT_EQ(static_cast<const void*>(rw_str.data()),
395             as_writable_u16cstr(rw_str));
396 
397   const wchar_t ro_buffer[10] = {};
398   static_assert(
399       std::is_same<const char16*, decltype(as_u16cstr(ro_buffer))>::value, "");
400   EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
401 
402   const std::wstring ro_str(10, '\0');
403   static_assert(
404       std::is_same<const char16*, decltype(as_u16cstr(ro_str))>::value, "");
405   EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
406 
407   WStringPiece piece = ro_buffer;
408   static_assert(std::is_same<const char16*, decltype(as_u16cstr(piece))>::value,
409                 "");
410   EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
411 }
412 #endif  // defined(WCHAR_T_IS_UTF16)
413 
TEST(StringUtilTest,TrimWhitespace)414 TEST(StringUtilTest, TrimWhitespace) {
415   string16 output;  // Allow contents to carry over to next testcase
416   for (const auto& value : trim_cases) {
417     EXPECT_EQ(value.return_value,
418               TrimWhitespace(WideToUTF16(value.input), value.positions,
419                              &output));
420     EXPECT_EQ(WideToUTF16(value.output), output);
421   }
422 
423   // Test that TrimWhitespace() can take the same string for input and output
424   output = ASCIIToUTF16("  This is a test \r\n");
425   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
426   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
427 
428   // Once more, but with a string of whitespace
429   output = ASCIIToUTF16("  \r\n");
430   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
431   EXPECT_EQ(string16(), output);
432 
433   std::string output_ascii;
434   for (const auto& value : trim_cases_ascii) {
435     EXPECT_EQ(value.return_value,
436               TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
437     EXPECT_EQ(value.output, output_ascii);
438   }
439 }
440 
441 static const struct collapse_case {
442   const wchar_t* input;
443   const bool trim;
444   const wchar_t* output;
445 } collapse_cases[] = {
446   {L" Google Video ", false, L"Google Video"},
447   {L"Google Video", false, L"Google Video"},
448   {L"", false, L""},
449   {L"  ", false, L""},
450   {L"\t\rTest String\n", false, L"Test String"},
451   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
452   {L"    Test     \n  \t String    ", false, L"Test String"},
453   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
454   {L"   Test String", false, L"Test String"},
455   {L"Test String    ", false, L"Test String"},
456   {L"Test String", false, L"Test String"},
457   {L"", true, L""},
458   {L"\n", true, L""},
459   {L"  \r  ", true, L""},
460   {L"\nFoo", true, L"Foo"},
461   {L"\r  Foo  ", true, L"Foo"},
462   {L" Foo bar ", true, L"Foo bar"},
463   {L"  \tFoo  bar  \n", true, L"Foo bar"},
464   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
465 };
466 
TEST(StringUtilTest,CollapseWhitespace)467 TEST(StringUtilTest, CollapseWhitespace) {
468   for (const auto& value : collapse_cases) {
469     EXPECT_EQ(WideToUTF16(value.output),
470               CollapseWhitespace(WideToUTF16(value.input), value.trim));
471   }
472 }
473 
474 static const struct collapse_case_ascii {
475   const char* input;
476   const bool trim;
477   const char* output;
478 } collapse_cases_ascii[] = {
479   {" Google Video ", false, "Google Video"},
480   {"Google Video", false, "Google Video"},
481   {"", false, ""},
482   {"  ", false, ""},
483   {"\t\rTest String\n", false, "Test String"},
484   {"    Test     \n  \t String    ", false, "Test String"},
485   {"   Test String", false, "Test String"},
486   {"Test String    ", false, "Test String"},
487   {"Test String", false, "Test String"},
488   {"", true, ""},
489   {"\n", true, ""},
490   {"  \r  ", true, ""},
491   {"\nFoo", true, "Foo"},
492   {"\r  Foo  ", true, "Foo"},
493   {" Foo bar ", true, "Foo bar"},
494   {"  \tFoo  bar  \n", true, "Foo bar"},
495   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
496 };
497 
TEST(StringUtilTest,CollapseWhitespaceASCII)498 TEST(StringUtilTest, CollapseWhitespaceASCII) {
499   for (const auto& value : collapse_cases_ascii) {
500     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
501   }
502 }
503 
TEST(StringUtilTest,IsStringUTF8)504 TEST(StringUtilTest, IsStringUTF8) {
505   {
506     SCOPED_TRACE("IsStringUTF8");
507     TestStructurallyValidUtf8(&IsStringUTF8);
508     TestStructurallyInvalidUtf8(&IsStringUTF8);
509     TestNoncharacters(&IsStringUTF8, false);
510   }
511 
512   {
513     SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
514     TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
515     TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
516     TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
517   }
518 }
519 
TEST(StringUtilTest,IsStringASCII)520 TEST(StringUtilTest, IsStringASCII) {
521   static char char_ascii[] =
522       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
523   static char16 char16_ascii[] = {
524       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
525       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
526       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
527   static std::wstring wchar_ascii(
528       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
529 
530   // Test a variety of the fragment start positions and lengths in order to make
531   // sure that bit masking in IsStringASCII works correctly.
532   // Also, test that a non-ASCII character will be detected regardless of its
533   // position inside the string.
534   {
535     const size_t string_length = base::size(char_ascii) - 1;
536     for (size_t offset = 0; offset < 8; ++offset) {
537       for (size_t len = 0, max_len = string_length - offset; len < max_len;
538            ++len) {
539         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
540         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
541           char_ascii[char_pos] |= '\x80';
542           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
543           char_ascii[char_pos] &= ~'\x80';
544         }
545       }
546     }
547   }
548 
549   {
550     const size_t string_length = base::size(char16_ascii) - 1;
551     for (size_t offset = 0; offset < 4; ++offset) {
552       for (size_t len = 0, max_len = string_length - offset; len < max_len;
553            ++len) {
554         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
555         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
556           char16_ascii[char_pos] |= 0x80;
557           EXPECT_FALSE(
558               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
559           char16_ascii[char_pos] &= ~0x80;
560           // Also test when the upper half is non-zero.
561           char16_ascii[char_pos] |= 0x100;
562           EXPECT_FALSE(
563               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
564           char16_ascii[char_pos] &= ~0x100;
565         }
566       }
567     }
568   }
569 
570 #if defined(WCHAR_T_IS_UTF32)
571   {
572     const size_t string_length = wchar_ascii.length();
573     for (size_t len = 0; len < string_length; ++len) {
574       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
575       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
576         wchar_ascii[char_pos] |= 0x80;
577         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
578         wchar_ascii[char_pos] &= ~0x80;
579         wchar_ascii[char_pos] |= 0x100;
580         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
581         wchar_ascii[char_pos] &= ~0x100;
582         wchar_ascii[char_pos] |= 0x10000;
583         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
584         wchar_ascii[char_pos] &= ~0x10000;
585       }
586     }
587   }
588 #endif  // WCHAR_T_IS_UTF32
589 }
590 
TEST(StringUtilTest,ConvertASCII)591 TEST(StringUtilTest, ConvertASCII) {
592   static const char* const char_cases[] = {
593     "Google Video",
594     "Hello, world\n",
595     "0123ABCDwxyz \a\b\t\r\n!+,.~"
596   };
597 
598   static const wchar_t* const wchar_cases[] = {
599     L"Google Video",
600     L"Hello, world\n",
601     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
602   };
603 
604   for (size_t i = 0; i < base::size(char_cases); ++i) {
605     EXPECT_TRUE(IsStringASCII(char_cases[i]));
606     string16 utf16 = ASCIIToUTF16(char_cases[i]);
607     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
608 
609     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
610     EXPECT_EQ(char_cases[i], ascii);
611   }
612 
613   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
614 
615   // Convert empty strings.
616   string16 empty16;
617   std::string empty;
618   EXPECT_EQ(empty, UTF16ToASCII(empty16));
619   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
620 
621   // Convert strings with an embedded NUL character.
622   const char chars_with_nul[] = "test\0string";
623   const int length_with_nul = base::size(chars_with_nul) - 1;
624   std::string string_with_nul(chars_with_nul, length_with_nul);
625   string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
626   EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
627             string16_with_nul.length());
628   std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
629   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
630             narrow_with_nul.length());
631   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
632 }
633 
TEST(StringUtilTest,ToLowerASCII)634 TEST(StringUtilTest, ToLowerASCII) {
635   EXPECT_EQ('c', ToLowerASCII('C'));
636   EXPECT_EQ('c', ToLowerASCII('c'));
637   EXPECT_EQ('2', ToLowerASCII('2'));
638 
639   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
640   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
641   EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
642 
643   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
644   EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
645 }
646 
TEST(StringUtilTest,ToUpperASCII)647 TEST(StringUtilTest, ToUpperASCII) {
648   EXPECT_EQ('C', ToUpperASCII('C'));
649   EXPECT_EQ('C', ToUpperASCII('c'));
650   EXPECT_EQ('2', ToUpperASCII('2'));
651 
652   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
653   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
654   EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
655 
656   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
657   EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
658 }
659 
TEST(StringUtilTest,LowerCaseEqualsASCII)660 TEST(StringUtilTest, LowerCaseEqualsASCII) {
661   static const struct {
662     const char*    src_a;
663     const char*    dst;
664   } lowercase_cases[] = {
665     { "FoO", "foo" },
666     { "foo", "foo" },
667     { "FOO", "foo" },
668   };
669 
670   for (const auto& i : lowercase_cases) {
671     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(i.src_a), i.dst));
672     EXPECT_TRUE(LowerCaseEqualsASCII(i.src_a, i.dst));
673   }
674 }
675 
TEST(StringUtilTest,FormatBytesUnlocalized)676 TEST(StringUtilTest, FormatBytesUnlocalized) {
677   static const struct {
678     int64_t bytes;
679     const char* expected;
680   } cases[] = {
681     // Expected behavior: we show one post-decimal digit when we have
682     // under two pre-decimal digits, except in cases where it makes no
683     // sense (zero or bytes).
684     // Since we switch units once we cross the 1000 mark, this keeps
685     // the display of file sizes or bytes consistently around three
686     // digits.
687     {0, "0 B"},
688     {512, "512 B"},
689     {1024*1024, "1.0 MB"},
690     {1024*1024*1024, "1.0 GB"},
691     {10LL*1024*1024*1024, "10.0 GB"},
692     {99LL*1024*1024*1024, "99.0 GB"},
693     {105LL*1024*1024*1024, "105 GB"},
694     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
695     {~(1LL << 63), "8192 PB"},
696 
697     {99*1024 + 103, "99.1 kB"},
698     {1024*1024 + 103, "1.0 MB"},
699     {1024*1024 + 205 * 1024, "1.2 MB"},
700     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
701     {10LL*1024*1024*1024, "10.0 GB"},
702     {100LL*1024*1024*1024, "100 GB"},
703   };
704 
705   for (const auto& i : cases) {
706     EXPECT_EQ(ASCIIToUTF16(i.expected), FormatBytesUnlocalized(i.bytes));
707   }
708 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)709 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
710   static const struct {
711     StringPiece str;
712     size_t start_offset;
713     StringPiece find_this;
714     StringPiece replace_with;
715     StringPiece expected;
716   } cases[] = {
717       {"aaa", 0, "", "b", "aaa"},
718       {"aaa", 1, "", "b", "aaa"},
719       {"aaa", 0, "a", "b", "bbb"},
720       {"aaa", 0, "aa", "b", "ba"},
721       {"aaa", 0, "aa", "bbb", "bbba"},
722       {"aaaaa", 0, "aa", "b", "bba"},
723       {"ababaaababa", 0, "aba", "", "baaba"},
724       {"ababaaababa", 0, "aba", "_", "_baa_ba"},
725       {"ababaaababa", 0, "aba", "__", "__baa__ba"},
726       {"ababaaababa", 0, "aba", "___", "___baa___ba"},
727       {"ababaaababa", 0, "aba", "____", "____baa____ba"},
728       {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
729       {"abb", 0, "ab", "a", "ab"},
730       {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
731       {"Not found", 0, "x", "0", "Not found"},
732       {"Not found again", 5, "x", "0", "Not found again"},
733       {" Making it much longer ", 0, " ", "Four score and seven years ago",
734        "Four score and seven years agoMakingFour score and seven years agoit"
735        "Four score and seven years agomuchFour score and seven years agolonger"
736        "Four score and seven years ago"},
737       {" Making it much much much much shorter ", 0,
738        "Making it much much much much shorter", "", "  "},
739       {"so much much much much much very much much much shorter", 0, "much ",
740        "", "so very shorter"},
741       {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
742       {"Replace me only me once", 9, "me ", "", "Replace me only once"},
743       {"abababab", 2, "ab", "c", "abccc"},
744       {"abababab", 1, "ab", "c", "abccc"},
745       {"abababab", 1, "aba", "c", "abcbab"},
746   };
747 
748   // base::string16 variant
749   for (const auto& scenario : cases) {
750     string16 str = ASCIIToUTF16(scenario.str);
751     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
752                                  ASCIIToUTF16(scenario.find_this),
753                                  ASCIIToUTF16(scenario.replace_with));
754     EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
755   }
756 
757   // std::string with insufficient capacity: expansion must realloc the buffer.
758   for (const auto& scenario : cases) {
759     std::string str = scenario.str.as_string();
760     str.shrink_to_fit();  // This is nonbinding, but it's the best we've got.
761     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
762                                  scenario.find_this, scenario.replace_with);
763     EXPECT_EQ(scenario.expected, str);
764   }
765 
766   // std::string with ample capacity: should be possible to grow in-place.
767   for (const auto& scenario : cases) {
768     std::string str = scenario.str.as_string();
769     str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
770                 2);
771 
772     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
773                                  scenario.find_this, scenario.replace_with);
774     EXPECT_EQ(scenario.expected, str);
775   }
776 }
777 
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)778 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
779   static const struct {
780     const char* str;
781     string16::size_type start_offset;
782     const char* find_this;
783     const char* replace_with;
784     const char* expected;
785   } cases[] = {
786     {"aaa", 0, "a", "b", "baa"},
787     {"abb", 0, "ab", "a", "ab"},
788     {"Removing some substrings inging", 0, "ing", "",
789       "Remov some substrings inging"},
790     {"Not found", 0, "x", "0", "Not found"},
791     {"Not found again", 5, "x", "0", "Not found again"},
792     {" Making it much longer ", 0, " ", "Four score and seven years ago",
793      "Four score and seven years agoMaking it much longer "},
794     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
795     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
796     {"abababab", 2, "ab", "c", "abcabab"},
797   };
798 
799   for (const auto& i : cases) {
800     string16 str = ASCIIToUTF16(i.str);
801     ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
802                                      ASCIIToUTF16(i.find_this),
803                                      ASCIIToUTF16(i.replace_with));
804     EXPECT_EQ(ASCIIToUTF16(i.expected), str);
805   }
806 }
807 
TEST(StringUtilTest,HexDigitToInt)808 TEST(StringUtilTest, HexDigitToInt) {
809   EXPECT_EQ(0, HexDigitToInt('0'));
810   EXPECT_EQ(1, HexDigitToInt('1'));
811   EXPECT_EQ(2, HexDigitToInt('2'));
812   EXPECT_EQ(3, HexDigitToInt('3'));
813   EXPECT_EQ(4, HexDigitToInt('4'));
814   EXPECT_EQ(5, HexDigitToInt('5'));
815   EXPECT_EQ(6, HexDigitToInt('6'));
816   EXPECT_EQ(7, HexDigitToInt('7'));
817   EXPECT_EQ(8, HexDigitToInt('8'));
818   EXPECT_EQ(9, HexDigitToInt('9'));
819   EXPECT_EQ(10, HexDigitToInt('A'));
820   EXPECT_EQ(11, HexDigitToInt('B'));
821   EXPECT_EQ(12, HexDigitToInt('C'));
822   EXPECT_EQ(13, HexDigitToInt('D'));
823   EXPECT_EQ(14, HexDigitToInt('E'));
824   EXPECT_EQ(15, HexDigitToInt('F'));
825 
826   // Verify the lower case as well.
827   EXPECT_EQ(10, HexDigitToInt('a'));
828   EXPECT_EQ(11, HexDigitToInt('b'));
829   EXPECT_EQ(12, HexDigitToInt('c'));
830   EXPECT_EQ(13, HexDigitToInt('d'));
831   EXPECT_EQ(14, HexDigitToInt('e'));
832   EXPECT_EQ(15, HexDigitToInt('f'));
833 }
834 
TEST(StringUtilTest,JoinString)835 TEST(StringUtilTest, JoinString) {
836   std::string separator(", ");
837   std::vector<std::string> parts;
838   EXPECT_EQ(std::string(), JoinString(parts, separator));
839 
840   parts.push_back(std::string());
841   EXPECT_EQ(std::string(), JoinString(parts, separator));
842   parts.clear();
843 
844   parts.push_back("a");
845   EXPECT_EQ("a", JoinString(parts, separator));
846 
847   parts.push_back("b");
848   parts.push_back("c");
849   EXPECT_EQ("a, b, c", JoinString(parts, separator));
850 
851   parts.push_back(std::string());
852   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
853   parts.push_back(" ");
854   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
855 }
856 
TEST(StringUtilTest,JoinString16)857 TEST(StringUtilTest, JoinString16) {
858   string16 separator = ASCIIToUTF16(", ");
859   std::vector<string16> parts;
860   EXPECT_EQ(string16(), JoinString(parts, separator));
861 
862   parts.push_back(string16());
863   EXPECT_EQ(string16(), JoinString(parts, separator));
864   parts.clear();
865 
866   parts.push_back(ASCIIToUTF16("a"));
867   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
868 
869   parts.push_back(ASCIIToUTF16("b"));
870   parts.push_back(ASCIIToUTF16("c"));
871   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
872 
873   parts.push_back(ASCIIToUTF16(""));
874   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
875   parts.push_back(ASCIIToUTF16(" "));
876   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
877 }
878 
TEST(StringUtilTest,JoinStringPiece)879 TEST(StringUtilTest, JoinStringPiece) {
880   std::string separator(", ");
881   std::vector<StringPiece> parts;
882   EXPECT_EQ(std::string(), JoinString(parts, separator));
883 
884   // Test empty first part (https://crbug.com/698073).
885   parts.push_back(StringPiece());
886   EXPECT_EQ(std::string(), JoinString(parts, separator));
887   parts.clear();
888 
889   parts.push_back("a");
890   EXPECT_EQ("a", JoinString(parts, separator));
891 
892   parts.push_back("b");
893   parts.push_back("c");
894   EXPECT_EQ("a, b, c", JoinString(parts, separator));
895 
896   parts.push_back(StringPiece());
897   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
898   parts.push_back(" ");
899   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
900 }
901 
TEST(StringUtilTest,JoinStringPiece16)902 TEST(StringUtilTest, JoinStringPiece16) {
903   string16 separator = ASCIIToUTF16(", ");
904   std::vector<StringPiece16> parts;
905   EXPECT_EQ(string16(), JoinString(parts, separator));
906 
907   // Test empty first part (https://crbug.com/698073).
908   parts.push_back(StringPiece16());
909   EXPECT_EQ(string16(), JoinString(parts, separator));
910   parts.clear();
911 
912   const string16 kA = ASCIIToUTF16("a");
913   parts.push_back(kA);
914   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
915 
916   const string16 kB = ASCIIToUTF16("b");
917   parts.push_back(kB);
918   const string16 kC = ASCIIToUTF16("c");
919   parts.push_back(kC);
920   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
921 
922   parts.push_back(StringPiece16());
923   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
924   const string16 kSpace = ASCIIToUTF16(" ");
925   parts.push_back(kSpace);
926   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
927 }
928 
TEST(StringUtilTest,JoinStringInitializerList)929 TEST(StringUtilTest, JoinStringInitializerList) {
930   std::string separator(", ");
931   EXPECT_EQ(std::string(), JoinString({}, separator));
932 
933   // Test empty first part (https://crbug.com/698073).
934   EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
935 
936   // With const char*s.
937   EXPECT_EQ("a", JoinString({"a"}, separator));
938   EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
939   EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
940   EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
941 
942   // With std::strings.
943   const std::string kA = "a";
944   const std::string kB = "b";
945   EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
946 
947   // With StringPieces.
948   const StringPiece kPieceA = kA;
949   const StringPiece kPieceB = kB;
950   EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
951 }
952 
TEST(StringUtilTest,JoinStringInitializerList16)953 TEST(StringUtilTest, JoinStringInitializerList16) {
954   string16 separator = ASCIIToUTF16(", ");
955   EXPECT_EQ(string16(), JoinString({}, separator));
956 
957   // Test empty first part (https://crbug.com/698073).
958   EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
959 
960   // With string16s.
961   const string16 kA = ASCIIToUTF16("a");
962   EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
963 
964   const string16 kB = ASCIIToUTF16("b");
965   const string16 kC = ASCIIToUTF16("c");
966   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
967 
968   EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
969             JoinString({kA, kB, kC, StringPiece16()}, separator));
970   const string16 kSpace = ASCIIToUTF16(" ");
971   EXPECT_EQ(
972       ASCIIToUTF16("a|b|c|| "),
973       JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
974 
975   // With StringPiece16s.
976   const StringPiece16 kPieceA = kA;
977   const StringPiece16 kPieceB = kB;
978   EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
979 }
980 
TEST(StringUtilTest,StartsWith)981 TEST(StringUtilTest, StartsWith) {
982   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
983                          base::CompareCase::SENSITIVE));
984   EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
985                           base::CompareCase::SENSITIVE));
986   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
987                          base::CompareCase::INSENSITIVE_ASCII));
988   EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
989                          base::CompareCase::INSENSITIVE_ASCII));
990   EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
991   EXPECT_FALSE(StartsWith("java", "javascript",
992                           base::CompareCase::INSENSITIVE_ASCII));
993   EXPECT_FALSE(StartsWith(std::string(), "javascript",
994                           base::CompareCase::INSENSITIVE_ASCII));
995   EXPECT_FALSE(StartsWith(std::string(), "javascript",
996                           base::CompareCase::SENSITIVE));
997   EXPECT_TRUE(StartsWith("java", std::string(),
998                          base::CompareCase::INSENSITIVE_ASCII));
999   EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
1000 
1001   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
1002                          ASCIIToUTF16("javascript"),
1003                          base::CompareCase::SENSITIVE));
1004   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
1005                           ASCIIToUTF16("javascript"),
1006                           base::CompareCase::SENSITIVE));
1007   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
1008                          ASCIIToUTF16("javascript"),
1009                          base::CompareCase::INSENSITIVE_ASCII));
1010   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
1011                          ASCIIToUTF16("javascript"),
1012                          base::CompareCase::INSENSITIVE_ASCII));
1013   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
1014                           base::CompareCase::SENSITIVE));
1015   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
1016                           base::CompareCase::INSENSITIVE_ASCII));
1017   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
1018                           base::CompareCase::INSENSITIVE_ASCII));
1019   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
1020                           base::CompareCase::SENSITIVE));
1021   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
1022                          base::CompareCase::INSENSITIVE_ASCII));
1023   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
1024                          base::CompareCase::SENSITIVE));
1025 }
1026 
TEST(StringUtilTest,EndsWith)1027 TEST(StringUtilTest, EndsWith) {
1028   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
1029                        base::CompareCase::SENSITIVE));
1030   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
1031                         base::CompareCase::SENSITIVE));
1032   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
1033                        base::CompareCase::INSENSITIVE_ASCII));
1034   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
1035                        base::CompareCase::INSENSITIVE_ASCII));
1036   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
1037                         base::CompareCase::SENSITIVE));
1038   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
1039                         base::CompareCase::INSENSITIVE_ASCII));
1040   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
1041                         base::CompareCase::SENSITIVE));
1042   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
1043                         base::CompareCase::INSENSITIVE_ASCII));
1044   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
1045                         base::CompareCase::INSENSITIVE_ASCII));
1046   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
1047                         base::CompareCase::SENSITIVE));
1048   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
1049                        base::CompareCase::INSENSITIVE_ASCII));
1050   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
1051                        base::CompareCase::SENSITIVE));
1052   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
1053                        base::CompareCase::INSENSITIVE_ASCII));
1054   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
1055                        base::CompareCase::SENSITIVE));
1056   EXPECT_TRUE(
1057       EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
1058   EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
1059 }
1060 
TEST(StringUtilTest,GetStringFWithOffsets)1061 TEST(StringUtilTest, GetStringFWithOffsets) {
1062   std::vector<string16> subst;
1063   subst.push_back(ASCIIToUTF16("1"));
1064   subst.push_back(ASCIIToUTF16("2"));
1065   std::vector<size_t> offsets;
1066 
1067   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
1068                             subst,
1069                             &offsets);
1070   EXPECT_EQ(2U, offsets.size());
1071   EXPECT_EQ(7U, offsets[0]);
1072   EXPECT_EQ(25U, offsets[1]);
1073   offsets.clear();
1074 
1075   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
1076                             subst,
1077                             &offsets);
1078   EXPECT_EQ(2U, offsets.size());
1079   EXPECT_EQ(25U, offsets[0]);
1080   EXPECT_EQ(7U, offsets[1]);
1081   offsets.clear();
1082 }
1083 
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)1084 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1085   // Test whether replacestringplaceholders works as expected when there
1086   // are fewer inputs than outputs.
1087   std::vector<string16> subst;
1088   subst.push_back(ASCIIToUTF16("9a"));
1089   subst.push_back(ASCIIToUTF16("8b"));
1090   subst.push_back(ASCIIToUTF16("7c"));
1091 
1092   string16 formatted =
1093       ReplaceStringPlaceholders(
1094           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
1095 
1096   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
1097 }
1098 
TEST(StringUtilTest,ReplaceStringPlaceholders)1099 TEST(StringUtilTest, ReplaceStringPlaceholders) {
1100   std::vector<string16> subst;
1101   subst.push_back(ASCIIToUTF16("9a"));
1102   subst.push_back(ASCIIToUTF16("8b"));
1103   subst.push_back(ASCIIToUTF16("7c"));
1104   subst.push_back(ASCIIToUTF16("6d"));
1105   subst.push_back(ASCIIToUTF16("5e"));
1106   subst.push_back(ASCIIToUTF16("4f"));
1107   subst.push_back(ASCIIToUTF16("3g"));
1108   subst.push_back(ASCIIToUTF16("2h"));
1109   subst.push_back(ASCIIToUTF16("1i"));
1110 
1111   string16 formatted =
1112       ReplaceStringPlaceholders(
1113           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
1114 
1115   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
1116 }
1117 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetExpansionWithContraction)1118 TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
1119   // In this test, some of the substitutions are shorter than the placeholders,
1120   // but overall the string gets longer.
1121   std::vector<string16> subst;
1122   subst.push_back(ASCIIToUTF16("9a____"));
1123   subst.push_back(ASCIIToUTF16("B"));
1124   subst.push_back(ASCIIToUTF16("7c___"));
1125   subst.push_back(ASCIIToUTF16("d"));
1126   subst.push_back(ASCIIToUTF16("5e____"));
1127   subst.push_back(ASCIIToUTF16("F"));
1128   subst.push_back(ASCIIToUTF16("3g___"));
1129   subst.push_back(ASCIIToUTF16("h"));
1130   subst.push_back(ASCIIToUTF16("1i_____"));
1131 
1132   string16 original = ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i");
1133   string16 expected =
1134       ASCIIToUTF16("9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i");
1135 
1136   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1137 
1138   std::vector<size_t> offsets;
1139   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1140   std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1141   EXPECT_EQ(offsets.size(), subst.size());
1142   EXPECT_EQ(expected_offsets, offsets);
1143   for (size_t i = 0; i < offsets.size(); i++) {
1144     EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1145               subst[i]);
1146   }
1147 }
1148 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetContractionWithExpansion)1149 TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1150   // In this test, some of the substitutions are longer than the placeholders,
1151   // but overall the string gets smaller. Additionally, the placeholders appear
1152   // in a permuted order.
1153   std::vector<string16> subst;
1154   subst.push_back(ASCIIToUTF16("z"));
1155   subst.push_back(ASCIIToUTF16("y"));
1156   subst.push_back(ASCIIToUTF16("XYZW"));
1157   subst.push_back(ASCIIToUTF16("x"));
1158   subst.push_back(ASCIIToUTF16("w"));
1159 
1160   string16 formatted =
1161       ReplaceStringPlaceholders(ASCIIToUTF16("$3_$4$2$1$5"), subst, nullptr);
1162 
1163   EXPECT_EQ(ASCIIToUTF16("XYZW_xyzw"), formatted);
1164 }
1165 
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)1166 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
1167   std::vector<string16> subst;
1168   subst.push_back(ASCIIToUTF16("1a"));
1169   string16 formatted =
1170       ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
1171   EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
1172 }
1173 
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)1174 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
1175   std::vector<string16> subst;
1176   subst.push_back(ASCIIToUTF16("1a"));
1177   string16 formatted =
1178       ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
1179   EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
1180 }
1181 
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)1182 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1183   std::vector<std::string> subst;
1184   subst.push_back("9a");
1185   subst.push_back("8b");
1186   subst.push_back("7c");
1187   subst.push_back("6d");
1188   subst.push_back("5e");
1189   subst.push_back("4f");
1190   subst.push_back("3g");
1191   subst.push_back("2h");
1192   subst.push_back("1i");
1193 
1194   std::string formatted =
1195       ReplaceStringPlaceholders(
1196           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1197 
1198   EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1199 }
1200 
TEST(StringUtilTest,StdStringReplaceStringPlaceholdersMultipleMatches)1201 TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1202   std::vector<std::string> subst;
1203   subst.push_back("4");   // Referenced twice.
1204   subst.push_back("?");   // Unreferenced.
1205   subst.push_back("!");   // Unreferenced.
1206   subst.push_back("16");  // Referenced once.
1207 
1208   std::string original = "$1 * $1 == $4";
1209   std::string expected = "4 * 4 == 16";
1210   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1211   std::vector<size_t> offsets;
1212   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1213   std::vector<size_t> expected_offsets = {0, 4, 9};
1214   EXPECT_EQ(expected_offsets, offsets);
1215 }
1216 
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)1217 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1218   std::vector<std::string> subst;
1219   subst.push_back("a");
1220   subst.push_back("b");
1221   subst.push_back("c");
1222   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
1223             "$1 $$2 $$$3");
1224 }
1225 
TEST(StringUtilTest,LcpyTest)1226 TEST(StringUtilTest, LcpyTest) {
1227   // Test the normal case where we fit in our buffer.
1228   {
1229     char dst[10];
1230     wchar_t wdst[10];
1231     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1232     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1233     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1234     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1235   }
1236 
1237   // Test dst_size == 0, nothing should be written to |dst| and we should
1238   // have the equivalent of strlen(src).
1239   {
1240     char dst[2] = {1, 2};
1241     wchar_t wdst[2] = {1, 2};
1242     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1243     EXPECT_EQ(1, dst[0]);
1244     EXPECT_EQ(2, dst[1]);
1245     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1246     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1247     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1248   }
1249 
1250   // Test the case were we _just_ competely fit including the null.
1251   {
1252     char dst[8];
1253     wchar_t wdst[8];
1254     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1255     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1256     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1257     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1258   }
1259 
1260   // Test the case were we we are one smaller, so we can't fit the null.
1261   {
1262     char dst[7];
1263     wchar_t wdst[7];
1264     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1265     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1266     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1267     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1268   }
1269 
1270   // Test the case were we are just too small.
1271   {
1272     char dst[3];
1273     wchar_t wdst[3];
1274     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", base::size(dst)));
1275     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1276     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", base::size(wdst)));
1277     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1278   }
1279 }
1280 
TEST(StringUtilTest,WprintfFormatPortabilityTest)1281 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1282   static const struct {
1283     const wchar_t* input;
1284     bool portable;
1285   } cases[] = {
1286     { L"%ls", true },
1287     { L"%s", false },
1288     { L"%S", false },
1289     { L"%lS", false },
1290     { L"Hello, %s", false },
1291     { L"%lc", true },
1292     { L"%c", false },
1293     { L"%C", false },
1294     { L"%lC", false },
1295     { L"%ls %s", false },
1296     { L"%s %ls", false },
1297     { L"%s %ls %s", false },
1298     { L"%f", true },
1299     { L"%f %F", false },
1300     { L"%d %D", false },
1301     { L"%o %O", false },
1302     { L"%u %U", false },
1303     { L"%f %d %o %u", true },
1304     { L"%-8d (%02.1f%)", true },
1305     { L"% 10s", false },
1306     { L"% 10ls", true }
1307   };
1308   for (const auto& i : cases)
1309     EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
1310 }
1311 
TEST(StringUtilTest,RemoveChars)1312 TEST(StringUtilTest, RemoveChars) {
1313   const char kRemoveChars[] = "-/+*";
1314   std::string input = "A-+bc/d!*";
1315   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1316   EXPECT_EQ("Abcd!", input);
1317 
1318   // No characters match kRemoveChars.
1319   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1320   EXPECT_EQ("Abcd!", input);
1321 
1322   // Empty string.
1323   input.clear();
1324   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1325   EXPECT_EQ(std::string(), input);
1326 }
1327 
TEST(StringUtilTest,ReplaceChars)1328 TEST(StringUtilTest, ReplaceChars) {
1329   struct TestData {
1330     const char* input;
1331     const char* replace_chars;
1332     const char* replace_with;
1333     const char* output;
1334     bool result;
1335   } cases[] = {
1336       {"", "", "", "", false},
1337       {"t", "t", "t", "t", true},
1338       {"a", "b", "c", "a", false},
1339       {"b", "b", "c", "c", true},
1340       {"bob", "b", "p", "pop", true},
1341       {"bob", "o", "i", "bib", true},
1342       {"test", "", "", "test", false},
1343       {"test", "", "!", "test", false},
1344       {"test", "z", "!", "test", false},
1345       {"test", "e", "!", "t!st", true},
1346       {"test", "e", "!?", "t!?st", true},
1347       {"test", "ez", "!", "t!st", true},
1348       {"test", "zed", "!?", "t!?st", true},
1349       {"test", "t", "!?", "!?es!?", true},
1350       {"test", "et", "!>", "!>!>s!>", true},
1351       {"test", "zest", "!", "!!!!", true},
1352       {"test", "szt", "!", "!e!!", true},
1353       {"test", "t", "test", "testestest", true},
1354       {"tetst", "t", "test", "testeteststest", true},
1355       {"ttttttt", "t", "-", "-------", true},
1356       {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1357       {"xxxxxxxxxx", "x", "", "", true},
1358       {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1359       {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1360       {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1361       {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1362       {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1363       {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
1364   };
1365 
1366   for (const TestData& scenario : cases) {
1367     // Test with separate output and input vars.
1368     std::string output;
1369     bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1370                                scenario.replace_with, &output);
1371     EXPECT_EQ(scenario.result, result) << scenario.input;
1372     EXPECT_EQ(scenario.output, output);
1373   }
1374 
1375   for (const TestData& scenario : cases) {
1376     // Test with an input/output var of limited capacity.
1377     std::string input_output = scenario.input;
1378     input_output.shrink_to_fit();
1379     bool result = ReplaceChars(input_output, scenario.replace_chars,
1380                                scenario.replace_with, &input_output);
1381     EXPECT_EQ(scenario.result, result) << scenario.input;
1382     EXPECT_EQ(scenario.output, input_output);
1383   }
1384 
1385   for (const TestData& scenario : cases) {
1386     // Test with an input/output var of ample capacity; should
1387     // not realloc.
1388     std::string input_output = scenario.input;
1389     input_output.reserve(strlen(scenario.output) * 2);
1390     const void* original_buffer = input_output.data();
1391     bool result = ReplaceChars(input_output, scenario.replace_chars,
1392                                scenario.replace_with, &input_output);
1393     EXPECT_EQ(scenario.result, result) << scenario.input;
1394     EXPECT_EQ(scenario.output, input_output);
1395     EXPECT_EQ(original_buffer, input_output.data());
1396   }
1397 }
1398 
TEST(StringUtilTest,ContainsOnlyChars)1399 TEST(StringUtilTest, ContainsOnlyChars) {
1400   // Providing an empty list of characters should return false but for the empty
1401   // string.
1402   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1403   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1404 
1405   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1406   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1407   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1408   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1409   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1410 
1411   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1412   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1413   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1414   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1415   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1416   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1417 
1418   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1419   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1420   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1421   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1422   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1423   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1424                                   kWhitespaceUTF16));
1425 }
1426 
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1427 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1428   EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1429   EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1430 
1431   // Differing lengths.
1432   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1433   EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1434 
1435   // Differing values.
1436   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1437   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1438 }
1439 
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1440 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1441   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1442   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1443   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1444   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1445 }
1446 
TEST(StringUtilTest,IsUnicodeWhitespace)1447 TEST(StringUtilTest, IsUnicodeWhitespace) {
1448   // NOT unicode white space.
1449   EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1450   EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1451   EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1452   EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1453   EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1454   EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1455 
1456   // Actual unicode whitespace.
1457   EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1458   EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1459   EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1460   EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1461   EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1462   EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1463   EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1464   EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1465 }
1466 
1467 class WriteIntoTest : public testing::Test {
1468  protected:
WritesCorrectly(size_t num_chars)1469   static void WritesCorrectly(size_t num_chars) {
1470     std::string buffer;
1471     char kOriginal[] = "supercali";
1472     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1473     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1474     // string at the first \0.
1475     EXPECT_EQ(
1476         std::string(kOriginal, std::min(num_chars, base::size(kOriginal) - 1)),
1477         std::string(buffer.c_str()));
1478     EXPECT_EQ(num_chars, buffer.size());
1479   }
1480 };
1481 
TEST_F(WriteIntoTest,WriteInto)1482 TEST_F(WriteIntoTest, WriteInto) {
1483   // Validate that WriteInto reserves enough space and
1484   // sizes a string correctly.
1485   WritesCorrectly(1);
1486   WritesCorrectly(2);
1487   WritesCorrectly(5000);
1488 
1489   // Validate that WriteInto handles 0-length strings
1490   std::string empty;
1491   const char kOriginal[] = "original";
1492   strncpy(WriteInto(&empty, 1), kOriginal, 0);
1493   EXPECT_STREQ("", empty.c_str());
1494   EXPECT_EQ(0u, empty.size());
1495 
1496   // Validate that WriteInto doesn't modify other strings
1497   // when using a Copy-on-Write implementation.
1498   const char kLive[] = "live";
1499   const char kDead[] = "dead";
1500   const std::string live = kLive;
1501   std::string dead = live;
1502   strncpy(WriteInto(&dead, 5), kDead, 4);
1503   EXPECT_EQ(kDead, dead);
1504   EXPECT_EQ(4u, dead.size());
1505   EXPECT_EQ(kLive, live);
1506   EXPECT_EQ(4u, live.size());
1507 }
1508 
1509 }  // namespace base
1510