1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <time.h>
16 #include <wchar.h>
17 #include <wctype.h>
18 
19 #include <algorithm>
20 #include <limits>
21 #include <type_traits>
22 #include <vector>
23 
24 #include "base/check_op.h"
25 #include "base/no_destructor.h"
26 #include "base/stl_util.h"
27 #include "base/strings/string_util_internal.h"
28 #include "base/strings/utf_string_conversion_utils.h"
29 #include "base/strings/utf_string_conversions.h"
30 #include "base/third_party/icu/icu_utf.h"
31 #include "build/build_config.h"
32 
33 namespace base {
34 
IsWprintfFormatPortable(const wchar_t * format)35 bool IsWprintfFormatPortable(const wchar_t* format) {
36   for (const wchar_t* position = format; *position != '\0'; ++position) {
37     if (*position == '%') {
38       bool in_specification = true;
39       bool modifier_l = false;
40       while (in_specification) {
41         // Eat up characters until reaching a known specifier.
42         if (*++position == '\0') {
43           // The format string ended in the middle of a specification.  Call
44           // it portable because no unportable specifications were found.  The
45           // string is equally broken on all platforms.
46           return true;
47         }
48 
49         if (*position == 'l') {
50           // 'l' is the only thing that can save the 's' and 'c' specifiers.
51           modifier_l = true;
52         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
53                    *position == 'S' || *position == 'C' || *position == 'F' ||
54                    *position == 'D' || *position == 'O' || *position == 'U') {
55           // Not portable.
56           return false;
57         }
58 
59         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
60           // Portable, keep scanning the rest of the format string.
61           in_specification = false;
62         }
63       }
64     }
65   }
66 
67   return true;
68 }
69 
ToLowerASCII(StringPiece str)70 std::string ToLowerASCII(StringPiece str) {
71   return internal::ToLowerASCIIImpl(str);
72 }
73 
ToLowerASCII(StringPiece16 str)74 string16 ToLowerASCII(StringPiece16 str) {
75   return internal::ToLowerASCIIImpl(str);
76 }
77 
ToUpperASCII(StringPiece str)78 std::string ToUpperASCII(StringPiece str) {
79   return internal::ToUpperASCIIImpl(str);
80 }
81 
ToUpperASCII(StringPiece16 str)82 string16 ToUpperASCII(StringPiece16 str) {
83   return internal::ToUpperASCIIImpl(str);
84 }
85 
CompareCaseInsensitiveASCII(StringPiece a,StringPiece b)86 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
87   return internal::CompareCaseInsensitiveASCIIT(a, b);
88 }
89 
CompareCaseInsensitiveASCII(StringPiece16 a,StringPiece16 b)90 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
91   return internal::CompareCaseInsensitiveASCIIT(a, b);
92 }
93 
EqualsCaseInsensitiveASCII(StringPiece a,StringPiece b)94 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
95   return a.size() == b.size() &&
96          internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
97 }
98 
EqualsCaseInsensitiveASCII(StringPiece16 a,StringPiece16 b)99 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
100   return a.size() == b.size() &&
101          internal::CompareCaseInsensitiveASCIIT(a, b) == 0;
102 }
103 
EmptyString()104 const std::string& EmptyString() {
105   static const base::NoDestructor<std::string> s;
106   return *s;
107 }
108 
EmptyString16()109 const string16& EmptyString16() {
110   static const base::NoDestructor<string16> s16;
111   return *s16;
112 }
113 
ReplaceChars(StringPiece16 input,StringPiece16 replace_chars,StringPiece16 replace_with,string16 * output)114 bool ReplaceChars(StringPiece16 input,
115                   StringPiece16 replace_chars,
116                   StringPiece16 replace_with,
117                   string16* output) {
118   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
119 }
120 
ReplaceChars(StringPiece input,StringPiece replace_chars,StringPiece replace_with,std::string * output)121 bool ReplaceChars(StringPiece input,
122                   StringPiece replace_chars,
123                   StringPiece replace_with,
124                   std::string* output) {
125   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
126 }
127 
RemoveChars(StringPiece16 input,StringPiece16 remove_chars,string16 * output)128 bool RemoveChars(StringPiece16 input,
129                  StringPiece16 remove_chars,
130                  string16* output) {
131   return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
132 }
133 
RemoveChars(StringPiece input,StringPiece remove_chars,std::string * output)134 bool RemoveChars(StringPiece input,
135                  StringPiece remove_chars,
136                  std::string* output) {
137   return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output);
138 }
139 
TrimString(StringPiece16 input,StringPiece16 trim_chars,string16 * output)140 bool TrimString(StringPiece16 input,
141                 StringPiece16 trim_chars,
142                 string16* output) {
143   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
144          TRIM_NONE;
145 }
146 
TrimString(StringPiece input,StringPiece trim_chars,std::string * output)147 bool TrimString(StringPiece input,
148                 StringPiece trim_chars,
149                 std::string* output) {
150   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
151          TRIM_NONE;
152 }
153 
TrimString(StringPiece16 input,StringPiece16 trim_chars,TrimPositions positions)154 StringPiece16 TrimString(StringPiece16 input,
155                          StringPiece16 trim_chars,
156                          TrimPositions positions) {
157   return internal::TrimStringPieceT(input, trim_chars, positions);
158 }
159 
TrimString(StringPiece input,StringPiece trim_chars,TrimPositions positions)160 StringPiece TrimString(StringPiece input,
161                        StringPiece trim_chars,
162                        TrimPositions positions) {
163   return internal::TrimStringPieceT(input, trim_chars, positions);
164 }
165 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)166 void TruncateUTF8ToByteSize(const std::string& input,
167                             const size_t byte_size,
168                             std::string* output) {
169   DCHECK(output);
170   if (byte_size > input.length()) {
171     *output = input;
172     return;
173   }
174   DCHECK_LE(byte_size,
175             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
176   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
177   int32_t truncation_length = static_cast<int32_t>(byte_size);
178   int32_t char_index = truncation_length - 1;
179   const char* data = input.data();
180 
181   // Using CBU8, we will move backwards from the truncation point
182   // to the beginning of the string looking for a valid UTF8
183   // character.  Once a full UTF8 character is found, we will
184   // truncate the string to the end of that character.
185   while (char_index >= 0) {
186     int32_t prev = char_index;
187     base_icu::UChar32 code_point = 0;
188     CBU8_NEXT(data, char_index, truncation_length, code_point);
189     if (!IsValidCharacter(code_point) ||
190         !IsValidCodepoint(code_point)) {
191       char_index = prev - 1;
192     } else {
193       break;
194     }
195   }
196 
197   if (char_index >= 0 )
198     *output = input.substr(0, char_index);
199   else
200     output->clear();
201 }
202 
TrimWhitespace(StringPiece16 input,TrimPositions positions,string16 * output)203 TrimPositions TrimWhitespace(StringPiece16 input,
204                              TrimPositions positions,
205                              string16* output) {
206   return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
207                                positions, output);
208 }
209 
TrimWhitespace(StringPiece16 input,TrimPositions positions)210 StringPiece16 TrimWhitespace(StringPiece16 input,
211                              TrimPositions positions) {
212   return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16),
213                                     positions);
214 }
215 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions,std::string * output)216 TrimPositions TrimWhitespaceASCII(StringPiece input,
217                                   TrimPositions positions,
218                                   std::string* output) {
219   return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions,
220                                output);
221 }
222 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions)223 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
224   return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII),
225                                     positions);
226 }
227 
CollapseWhitespace(StringPiece16 text,bool trim_sequences_with_line_breaks)228 string16 CollapseWhitespace(StringPiece16 text,
229                             bool trim_sequences_with_line_breaks) {
230   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
231 }
232 
CollapseWhitespaceASCII(StringPiece text,bool trim_sequences_with_line_breaks)233 std::string CollapseWhitespaceASCII(StringPiece text,
234                                     bool trim_sequences_with_line_breaks) {
235   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
236 }
237 
ContainsOnlyChars(StringPiece input,StringPiece characters)238 bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
239   return input.find_first_not_of(characters) == StringPiece::npos;
240 }
241 
ContainsOnlyChars(StringPiece16 input,StringPiece16 characters)242 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
243   return input.find_first_not_of(characters) == StringPiece16::npos;
244 }
245 
246 
IsStringASCII(StringPiece str)247 bool IsStringASCII(StringPiece str) {
248   return internal::DoIsStringASCII(str.data(), str.length());
249 }
250 
IsStringASCII(StringPiece16 str)251 bool IsStringASCII(StringPiece16 str) {
252   return internal::DoIsStringASCII(str.data(), str.length());
253 }
254 
255 #if defined(WCHAR_T_IS_UTF32)
IsStringASCII(WStringPiece str)256 bool IsStringASCII(WStringPiece str) {
257   return internal::DoIsStringASCII(str.data(), str.length());
258 }
259 #endif
260 
IsStringUTF8(StringPiece str)261 bool IsStringUTF8(StringPiece str) {
262   return internal::DoIsStringUTF8<IsValidCharacter>(str);
263 }
264 
IsStringUTF8AllowingNoncharacters(StringPiece str)265 bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
266   return internal::DoIsStringUTF8<IsValidCodepoint>(str);
267 }
268 
LowerCaseEqualsASCII(StringPiece str,StringPiece lowercase_ascii)269 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
270   return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
271 }
272 
LowerCaseEqualsASCII(StringPiece16 str,StringPiece lowercase_ascii)273 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
274   return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
275 }
276 
EqualsASCII(StringPiece16 str,StringPiece ascii)277 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
278   return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
279 }
280 
StartsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)281 bool StartsWith(StringPiece str,
282                 StringPiece search_for,
283                 CompareCase case_sensitivity) {
284   return internal::StartsWithT(str, search_for, case_sensitivity);
285 }
286 
StartsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)287 bool StartsWith(StringPiece16 str,
288                 StringPiece16 search_for,
289                 CompareCase case_sensitivity) {
290   return internal::StartsWithT(str, search_for, case_sensitivity);
291 }
292 
EndsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)293 bool EndsWith(StringPiece str,
294               StringPiece search_for,
295               CompareCase case_sensitivity) {
296   return internal::EndsWithT(str, search_for, case_sensitivity);
297 }
298 
EndsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)299 bool EndsWith(StringPiece16 str,
300               StringPiece16 search_for,
301               CompareCase case_sensitivity) {
302   return internal::EndsWithT(str, search_for, case_sensitivity);
303 }
304 
HexDigitToInt(wchar_t c)305 char HexDigitToInt(wchar_t c) {
306   DCHECK(IsHexDigit(c));
307   if (c >= '0' && c <= '9')
308     return static_cast<char>(c - '0');
309   if (c >= 'A' && c <= 'F')
310     return static_cast<char>(c - 'A' + 10);
311   if (c >= 'a' && c <= 'f')
312     return static_cast<char>(c - 'a' + 10);
313   return 0;
314 }
315 
IsUnicodeWhitespace(wchar_t c)316 bool IsUnicodeWhitespace(wchar_t c) {
317   // kWhitespaceWide is a NULL-terminated string
318   for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
319     if (*cur == c)
320       return true;
321   }
322   return false;
323 }
324 
325 static const char* const kByteStringsUnlocalized[] = {
326   " B",
327   " kB",
328   " MB",
329   " GB",
330   " TB",
331   " PB"
332 };
333 
FormatBytesUnlocalized(int64_t bytes)334 string16 FormatBytesUnlocalized(int64_t bytes) {
335   double unit_amount = static_cast<double>(bytes);
336   size_t dimension = 0;
337   const int kKilo = 1024;
338   while (unit_amount >= kKilo &&
339          dimension < base::size(kByteStringsUnlocalized) - 1) {
340     unit_amount /= kKilo;
341     dimension++;
342   }
343 
344   char buf[64];
345   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
346     base::snprintf(buf, base::size(buf), "%.1lf%s", unit_amount,
347                    kByteStringsUnlocalized[dimension]);
348   } else {
349     base::snprintf(buf, base::size(buf), "%.0lf%s", unit_amount,
350                    kByteStringsUnlocalized[dimension]);
351   }
352 
353   return ASCIIToUTF16(buf);
354 }
355 
ReplaceFirstSubstringAfterOffset(string16 * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)356 void ReplaceFirstSubstringAfterOffset(string16* str,
357                                       size_t start_offset,
358                                       StringPiece16 find_this,
359                                       StringPiece16 replace_with) {
360   internal::DoReplaceMatchesAfterOffset(
361       str, start_offset, internal::SubstringMatcher<string16>{find_this},
362       replace_with, internal::ReplaceType::REPLACE_FIRST);
363 }
364 
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)365 void ReplaceFirstSubstringAfterOffset(std::string* str,
366                                       size_t start_offset,
367                                       StringPiece find_this,
368                                       StringPiece replace_with) {
369   internal::DoReplaceMatchesAfterOffset(
370       str, start_offset, internal::SubstringMatcher<std::string>{find_this},
371       replace_with, internal::ReplaceType::REPLACE_FIRST);
372 }
373 
ReplaceSubstringsAfterOffset(string16 * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)374 void ReplaceSubstringsAfterOffset(string16* str,
375                                   size_t start_offset,
376                                   StringPiece16 find_this,
377                                   StringPiece16 replace_with) {
378   internal::DoReplaceMatchesAfterOffset(
379       str, start_offset, internal::SubstringMatcher<string16>{find_this},
380       replace_with, internal::ReplaceType::REPLACE_ALL);
381 }
382 
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)383 void ReplaceSubstringsAfterOffset(std::string* str,
384                                   size_t start_offset,
385                                   StringPiece find_this,
386                                   StringPiece replace_with) {
387   internal::DoReplaceMatchesAfterOffset(
388       str, start_offset, internal::SubstringMatcher<std::string>{find_this},
389       replace_with, internal::ReplaceType::REPLACE_ALL);
390 }
391 
WriteInto(std::string * str,size_t length_with_null)392 char* WriteInto(std::string* str, size_t length_with_null) {
393   return internal::WriteIntoT(str, length_with_null);
394 }
395 
WriteInto(string16 * str,size_t length_with_null)396 char16* WriteInto(string16* str, size_t length_with_null) {
397   return internal::WriteIntoT(str, length_with_null);
398 }
399 
JoinString(span<const std::string> parts,StringPiece separator)400 std::string JoinString(span<const std::string> parts, StringPiece separator) {
401   return internal::JoinStringT(parts, separator);
402 }
403 
JoinString(span<const string16> parts,StringPiece16 separator)404 string16 JoinString(span<const string16> parts, StringPiece16 separator) {
405   return internal::JoinStringT(parts, separator);
406 }
407 
JoinString(span<const StringPiece> parts,StringPiece separator)408 std::string JoinString(span<const StringPiece> parts, StringPiece separator) {
409   return internal::JoinStringT(parts, separator);
410 }
411 
JoinString(span<const StringPiece16> parts,StringPiece16 separator)412 string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) {
413   return internal::JoinStringT(parts, separator);
414 }
415 
JoinString(std::initializer_list<StringPiece> parts,StringPiece separator)416 std::string JoinString(std::initializer_list<StringPiece> parts,
417                        StringPiece separator) {
418   return internal::JoinStringT(parts, separator);
419 }
420 
JoinString(std::initializer_list<StringPiece16> parts,StringPiece16 separator)421 string16 JoinString(std::initializer_list<StringPiece16> parts,
422                     StringPiece16 separator) {
423   return internal::JoinStringT(parts, separator);
424 }
425 
ReplaceStringPlaceholders(StringPiece16 format_string,const std::vector<string16> & subst,std::vector<size_t> * offsets)426 string16 ReplaceStringPlaceholders(StringPiece16 format_string,
427                                    const std::vector<string16>& subst,
428                                    std::vector<size_t>* offsets) {
429   return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
430 }
431 
ReplaceStringPlaceholders(StringPiece format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)432 std::string ReplaceStringPlaceholders(StringPiece format_string,
433                                       const std::vector<std::string>& subst,
434                                       std::vector<size_t>* offsets) {
435   return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
436 }
437 
ReplaceStringPlaceholders(const string16 & format_string,const string16 & a,size_t * offset)438 string16 ReplaceStringPlaceholders(const string16& format_string,
439                                    const string16& a,
440                                    size_t* offset) {
441   std::vector<size_t> offsets;
442   string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets);
443 
444   DCHECK_EQ(1U, offsets.size());
445   if (offset)
446     *offset = offsets[0];
447   return result;
448 }
449 
strlcpy(char * dst,const char * src,size_t dst_size)450 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
451   return internal::lcpyT(dst, src, dst_size);
452 }
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)453 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
454   return internal::lcpyT(dst, src, dst_size);
455 }
456 
457 }  // namespace base
458