1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <time.h>
16 #include <wchar.h>
17 #include <wctype.h>
18
19 #include <algorithm>
20 #include <limits>
21 #include <type_traits>
22 #include <vector>
23
24 #include "base/check_op.h"
25 #include "base/no_destructor.h"
26 #include "base/stl_util.h"
27 #include "base/strings/string_util_internal.h"
28 #include "base/strings/utf_string_conversion_utils.h"
29 #include "base/strings/utf_string_conversions.h"
30 #include "base/third_party/icu/icu_utf.h"
31 #include "build/build_config.h"
32
33 namespace base {
34
IsWprintfFormatPortable(const wchar_t * format)35 bool IsWprintfFormatPortable(const wchar_t* format) {
36 for (const wchar_t* position = format; *position != '\0'; ++position) {
37 if (*position == '%') {
38 bool in_specification = true;
39 bool modifier_l = false;
40 while (in_specification) {
41 // Eat up characters until reaching a known specifier.
42 if (*++position == '\0') {
43 // The format string ended in the middle of a specification. Call
44 // it portable because no unportable specifications were found. The
45 // string is equally broken on all platforms.
46 return true;
47 }
48
49 if (*position == 'l') {
50 // 'l' is the only thing that can save the 's' and 'c' specifiers.
51 modifier_l = true;
52 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
53 *position == 'S' || *position == 'C' || *position == 'F' ||
54 *position == 'D' || *position == 'O' || *position == 'U') {
55 // Not portable.
56 return false;
57 }
58
59 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
60 // Portable, keep scanning the rest of the format string.
61 in_specification = false;
62 }
63 }
64 }
65 }
66
67 return true;
68 }
69
ToLowerASCII(StringPiece str)70 std::string ToLowerASCII(StringPiece str) {
71 return internal::ToLowerASCIIImpl(str);
72 }
73
ToLowerASCII(StringPiece16 str)74 string16 ToLowerASCII(StringPiece16 str) {
75 return internal::ToLowerASCIIImpl(str);
76 }
77
ToUpperASCII(StringPiece str)78 std::string ToUpperASCII(StringPiece str) {
79 return internal::ToUpperASCIIImpl(str);
80 }
81
ToUpperASCII(StringPiece16 str)82 string16 ToUpperASCII(StringPiece16 str) {
83 return internal::ToUpperASCIIImpl(str);
84 }
85
CompareCaseInsensitiveASCII(StringPiece a,StringPiece b)86 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
87 return internal::CompareCaseInsensitiveASCIIT(a, b);
88 }
89
CompareCaseInsensitiveASCII(StringPiece16 a,StringPiece16 b)90 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
91 return internal::CompareCaseInsensitiveASCIIT(a, b);
92 }
93
EqualsCaseInsensitiveASCII(StringPiece a,StringPiece b)94 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
95 return a.size() == b.size() &&
96 internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
97 }
98
EqualsCaseInsensitiveASCII(StringPiece16 a,StringPiece16 b)99 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
100 return a.size() == b.size() &&
101 internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
102 }
103
EmptyString()104 const std::string& EmptyString() {
105 static const base::NoDestructor<std::string> s;
106 return *s;
107 }
108
EmptyString16()109 const string16& EmptyString16() {
110 static const base::NoDestructor<string16> s16;
111 return *s16;
112 }
113
ReplaceChars(StringPiece16 input,StringPiece16 replace_chars,StringPiece16 replace_with,string16 * output)114 bool ReplaceChars(StringPiece16 input,
115 StringPiece16 replace_chars,
116 StringPiece16 replace_with,
117 string16* output) {
118 return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
119 }
120
ReplaceChars(StringPiece input,StringPiece replace_chars,StringPiece replace_with,std::string * output)121 bool ReplaceChars(StringPiece input,
122 StringPiece replace_chars,
123 StringPiece replace_with,
124 std::string* output) {
125 return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
126 }
127
RemoveChars(StringPiece16 input,StringPiece16 remove_chars,string16 * output)128 bool RemoveChars(StringPiece16 input,
129 StringPiece16 remove_chars,
130 string16* output) {
131 return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
132 }
133
RemoveChars(StringPiece input,StringPiece remove_chars,std::string * output)134 bool RemoveChars(StringPiece input,
135 StringPiece remove_chars,
136 std::string* output) {
137 return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output);
138 }
139
TrimString(StringPiece16 input,StringPiece16 trim_chars,string16 * output)140 bool TrimString(StringPiece16 input,
141 StringPiece16 trim_chars,
142 string16* output) {
143 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
144 TRIM_NONE;
145 }
146
TrimString(StringPiece input,StringPiece trim_chars,std::string * output)147 bool TrimString(StringPiece input,
148 StringPiece trim_chars,
149 std::string* output) {
150 return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
151 TRIM_NONE;
152 }
153
TrimString(StringPiece16 input,StringPiece16 trim_chars,TrimPositions positions)154 StringPiece16 TrimString(StringPiece16 input,
155 StringPiece16 trim_chars,
156 TrimPositions positions) {
157 return internal::TrimStringPieceT(input, trim_chars, positions);
158 }
159
TrimString(StringPiece input,StringPiece trim_chars,TrimPositions positions)160 StringPiece TrimString(StringPiece input,
161 StringPiece trim_chars,
162 TrimPositions positions) {
163 return internal::TrimStringPieceT(input, trim_chars, positions);
164 }
165
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)166 void TruncateUTF8ToByteSize(const std::string& input,
167 const size_t byte_size,
168 std::string* output) {
169 DCHECK(output);
170 if (byte_size > input.length()) {
171 *output = input;
172 return;
173 }
174 DCHECK_LE(byte_size,
175 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
176 // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
177 int32_t truncation_length = static_cast<int32_t>(byte_size);
178 int32_t char_index = truncation_length - 1;
179 const char* data = input.data();
180
181 // Using CBU8, we will move backwards from the truncation point
182 // to the beginning of the string looking for a valid UTF8
183 // character. Once a full UTF8 character is found, we will
184 // truncate the string to the end of that character.
185 while (char_index >= 0) {
186 int32_t prev = char_index;
187 base_icu::UChar32 code_point = 0;
188 CBU8_NEXT(data, char_index, truncation_length, code_point);
189 if (!IsValidCharacter(code_point) ||
190 !IsValidCodepoint(code_point)) {
191 char_index = prev - 1;
192 } else {
193 break;
194 }
195 }
196
197 if (char_index >= 0 )
198 *output = input.substr(0, char_index);
199 else
200 output->clear();
201 }
202
TrimWhitespace(StringPiece16 input,TrimPositions positions,string16 * output)203 TrimPositions TrimWhitespace(StringPiece16 input,
204 TrimPositions positions,
205 string16* output) {
206 return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
207 positions, output);
208 }
209
TrimWhitespace(StringPiece16 input,TrimPositions positions)210 StringPiece16 TrimWhitespace(StringPiece16 input,
211 TrimPositions positions) {
212 return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16),
213 positions);
214 }
215
TrimWhitespaceASCII(StringPiece input,TrimPositions positions,std::string * output)216 TrimPositions TrimWhitespaceASCII(StringPiece input,
217 TrimPositions positions,
218 std::string* output) {
219 return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions,
220 output);
221 }
222
TrimWhitespaceASCII(StringPiece input,TrimPositions positions)223 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
224 return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII),
225 positions);
226 }
227
CollapseWhitespace(StringPiece16 text,bool trim_sequences_with_line_breaks)228 string16 CollapseWhitespace(StringPiece16 text,
229 bool trim_sequences_with_line_breaks) {
230 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
231 }
232
CollapseWhitespaceASCII(StringPiece text,bool trim_sequences_with_line_breaks)233 std::string CollapseWhitespaceASCII(StringPiece text,
234 bool trim_sequences_with_line_breaks) {
235 return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
236 }
237
ContainsOnlyChars(StringPiece input,StringPiece characters)238 bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
239 return input.find_first_not_of(characters) == StringPiece::npos;
240 }
241
ContainsOnlyChars(StringPiece16 input,StringPiece16 characters)242 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
243 return input.find_first_not_of(characters) == StringPiece16::npos;
244 }
245
246
IsStringASCII(StringPiece str)247 bool IsStringASCII(StringPiece str) {
248 return internal::DoIsStringASCII(str.data(), str.length());
249 }
250
IsStringASCII(StringPiece16 str)251 bool IsStringASCII(StringPiece16 str) {
252 return internal::DoIsStringASCII(str.data(), str.length());
253 }
254
255 #if defined(WCHAR_T_IS_UTF32)
IsStringASCII(WStringPiece str)256 bool IsStringASCII(WStringPiece str) {
257 return internal::DoIsStringASCII(str.data(), str.length());
258 }
259 #endif
260
IsStringUTF8(StringPiece str)261 bool IsStringUTF8(StringPiece str) {
262 return internal::DoIsStringUTF8<IsValidCharacter>(str);
263 }
264
IsStringUTF8AllowingNoncharacters(StringPiece str)265 bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
266 return internal::DoIsStringUTF8<IsValidCodepoint>(str);
267 }
268
LowerCaseEqualsASCII(StringPiece str,StringPiece lowercase_ascii)269 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
270 return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
271 }
272
LowerCaseEqualsASCII(StringPiece16 str,StringPiece lowercase_ascii)273 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
274 return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
275 }
276
EqualsASCII(StringPiece16 str,StringPiece ascii)277 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
278 return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
279 }
280
StartsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)281 bool StartsWith(StringPiece str,
282 StringPiece search_for,
283 CompareCase case_sensitivity) {
284 return internal::StartsWithT(str, search_for, case_sensitivity);
285 }
286
StartsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)287 bool StartsWith(StringPiece16 str,
288 StringPiece16 search_for,
289 CompareCase case_sensitivity) {
290 return internal::StartsWithT(str, search_for, case_sensitivity);
291 }
292
EndsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)293 bool EndsWith(StringPiece str,
294 StringPiece search_for,
295 CompareCase case_sensitivity) {
296 return internal::EndsWithT(str, search_for, case_sensitivity);
297 }
298
EndsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)299 bool EndsWith(StringPiece16 str,
300 StringPiece16 search_for,
301 CompareCase case_sensitivity) {
302 return internal::EndsWithT(str, search_for, case_sensitivity);
303 }
304
HexDigitToInt(wchar_t c)305 char HexDigitToInt(wchar_t c) {
306 DCHECK(IsHexDigit(c));
307 if (c >= '0' && c <= '9')
308 return static_cast<char>(c - '0');
309 if (c >= 'A' && c <= 'F')
310 return static_cast<char>(c - 'A' + 10);
311 if (c >= 'a' && c <= 'f')
312 return static_cast<char>(c - 'a' + 10);
313 return 0;
314 }
315
IsUnicodeWhitespace(wchar_t c)316 bool IsUnicodeWhitespace(wchar_t c) {
317 // kWhitespaceWide is a NULL-terminated string
318 for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
319 if (*cur == c)
320 return true;
321 }
322 return false;
323 }
324
325 static const char* const kByteStringsUnlocalized[] = {
326 " B",
327 " kB",
328 " MB",
329 " GB",
330 " TB",
331 " PB"
332 };
333
FormatBytesUnlocalized(int64_t bytes)334 string16 FormatBytesUnlocalized(int64_t bytes) {
335 double unit_amount = static_cast<double>(bytes);
336 size_t dimension = 0;
337 const int kKilo = 1024;
338 while (unit_amount >= kKilo &&
339 dimension < base::size(kByteStringsUnlocalized) - 1) {
340 unit_amount /= kKilo;
341 dimension++;
342 }
343
344 char buf[64];
345 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
346 base::snprintf(buf, base::size(buf), "%.1lf%s", unit_amount,
347 kByteStringsUnlocalized[dimension]);
348 } else {
349 base::snprintf(buf, base::size(buf), "%.0lf%s", unit_amount,
350 kByteStringsUnlocalized[dimension]);
351 }
352
353 return ASCIIToUTF16(buf);
354 }
355
ReplaceFirstSubstringAfterOffset(string16 * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)356 void ReplaceFirstSubstringAfterOffset(string16* str,
357 size_t start_offset,
358 StringPiece16 find_this,
359 StringPiece16 replace_with) {
360 internal::DoReplaceMatchesAfterOffset(
361 str, start_offset, internal::SubstringMatcher<string16>{find_this},
362 replace_with, internal::ReplaceType::REPLACE_FIRST);
363 }
364
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)365 void ReplaceFirstSubstringAfterOffset(std::string* str,
366 size_t start_offset,
367 StringPiece find_this,
368 StringPiece replace_with) {
369 internal::DoReplaceMatchesAfterOffset(
370 str, start_offset, internal::SubstringMatcher<std::string>{find_this},
371 replace_with, internal::ReplaceType::REPLACE_FIRST);
372 }
373
ReplaceSubstringsAfterOffset(string16 * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)374 void ReplaceSubstringsAfterOffset(string16* str,
375 size_t start_offset,
376 StringPiece16 find_this,
377 StringPiece16 replace_with) {
378 internal::DoReplaceMatchesAfterOffset(
379 str, start_offset, internal::SubstringMatcher<string16>{find_this},
380 replace_with, internal::ReplaceType::REPLACE_ALL);
381 }
382
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)383 void ReplaceSubstringsAfterOffset(std::string* str,
384 size_t start_offset,
385 StringPiece find_this,
386 StringPiece replace_with) {
387 internal::DoReplaceMatchesAfterOffset(
388 str, start_offset, internal::SubstringMatcher<std::string>{find_this},
389 replace_with, internal::ReplaceType::REPLACE_ALL);
390 }
391
WriteInto(std::string * str,size_t length_with_null)392 char* WriteInto(std::string* str, size_t length_with_null) {
393 return internal::WriteIntoT(str, length_with_null);
394 }
395
WriteInto(string16 * str,size_t length_with_null)396 char16* WriteInto(string16* str, size_t length_with_null) {
397 return internal::WriteIntoT(str, length_with_null);
398 }
399
JoinString(span<const std::string> parts,StringPiece separator)400 std::string JoinString(span<const std::string> parts, StringPiece separator) {
401 return internal::JoinStringT(parts, separator);
402 }
403
JoinString(span<const string16> parts,StringPiece16 separator)404 string16 JoinString(span<const string16> parts, StringPiece16 separator) {
405 return internal::JoinStringT(parts, separator);
406 }
407
JoinString(span<const StringPiece> parts,StringPiece separator)408 std::string JoinString(span<const StringPiece> parts, StringPiece separator) {
409 return internal::JoinStringT(parts, separator);
410 }
411
JoinString(span<const StringPiece16> parts,StringPiece16 separator)412 string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) {
413 return internal::JoinStringT(parts, separator);
414 }
415
JoinString(std::initializer_list<StringPiece> parts,StringPiece separator)416 std::string JoinString(std::initializer_list<StringPiece> parts,
417 StringPiece separator) {
418 return internal::JoinStringT(parts, separator);
419 }
420
JoinString(std::initializer_list<StringPiece16> parts,StringPiece16 separator)421 string16 JoinString(std::initializer_list<StringPiece16> parts,
422 StringPiece16 separator) {
423 return internal::JoinStringT(parts, separator);
424 }
425
ReplaceStringPlaceholders(StringPiece16 format_string,const std::vector<string16> & subst,std::vector<size_t> * offsets)426 string16 ReplaceStringPlaceholders(StringPiece16 format_string,
427 const std::vector<string16>& subst,
428 std::vector<size_t>* offsets) {
429 return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
430 }
431
ReplaceStringPlaceholders(StringPiece format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)432 std::string ReplaceStringPlaceholders(StringPiece format_string,
433 const std::vector<std::string>& subst,
434 std::vector<size_t>* offsets) {
435 return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
436 }
437
ReplaceStringPlaceholders(const string16 & format_string,const string16 & a,size_t * offset)438 string16 ReplaceStringPlaceholders(const string16& format_string,
439 const string16& a,
440 size_t* offset) {
441 std::vector<size_t> offsets;
442 string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets);
443
444 DCHECK_EQ(1U, offsets.size());
445 if (offset)
446 *offset = offsets[0];
447 return result;
448 }
449
strlcpy(char * dst,const char * src,size_t dst_size)450 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
451 return internal::lcpyT(dst, src, dst_size);
452 }
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)453 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
454 return internal::lcpyT(dst, src, dst_size);
455 }
456
457 } // namespace base
458