1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_split.h"
6 
7 #include <stddef.h>
8 
9 #include "base/logging.h"
10 #include "base/strings/string_util.h"
11 #include "base/third_party/icu/icu_utf.h"
12 
13 namespace base {
14 
15 namespace {
16 
17 // Returns either the ASCII or UTF-16 whitespace.
18 template<typename Str> BasicStringPiece<Str> WhitespaceForType();
19 #if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
20 template <>
WhitespaceForType()21 WStringPiece WhitespaceForType<std::wstring>() {
22   return kWhitespaceWide;
23 }
24 #endif
25 
WhitespaceForType()26 template<> StringPiece16 WhitespaceForType<string16>() {
27   return kWhitespaceUTF16;
28 }
WhitespaceForType()29 template<> StringPiece WhitespaceForType<std::string>() {
30   return kWhitespaceASCII;
31 }
32 
33 // General string splitter template. Can take 8- or 16-bit input, can produce
34 // the corresponding string or StringPiece output.
35 template <typename OutputStringType, typename Str>
SplitStringT(BasicStringPiece<Str> str,BasicStringPiece<Str> delimiter,WhitespaceHandling whitespace,SplitResult result_type)36 static std::vector<OutputStringType> SplitStringT(
37     BasicStringPiece<Str> str,
38     BasicStringPiece<Str> delimiter,
39     WhitespaceHandling whitespace,
40     SplitResult result_type) {
41   std::vector<OutputStringType> result;
42   if (str.empty())
43     return result;
44 
45   size_t start = 0;
46   while (start != Str::npos) {
47     size_t end = str.find_first_of(delimiter, start);
48 
49     BasicStringPiece<Str> piece;
50     if (end == Str::npos) {
51       piece = str.substr(start);
52       start = Str::npos;
53     } else {
54       piece = str.substr(start, end - start);
55       start = end + 1;
56     }
57 
58     if (whitespace == TRIM_WHITESPACE)
59       piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
60 
61     if (result_type == SPLIT_WANT_ALL || !piece.empty())
62       result.emplace_back(piece);
63   }
64   return result;
65 }
66 
AppendStringKeyValue(StringPiece input,char delimiter,StringPairs * result)67 bool AppendStringKeyValue(StringPiece input,
68                           char delimiter,
69                           StringPairs* result) {
70   // Always append a new item regardless of success (it might be empty). The
71   // below code will copy the strings directly into the result pair.
72   result->resize(result->size() + 1);
73   auto& result_pair = result->back();
74 
75   // Find the delimiter.
76   size_t end_key_pos = input.find_first_of(delimiter);
77   if (end_key_pos == std::string::npos) {
78     DVLOG(1) << "cannot find delimiter in: " << input;
79     return false;    // No delimiter.
80   }
81   result_pair.first = std::string(input.substr(0, end_key_pos));
82 
83   // Find the value string.
84   StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
85   size_t begin_value_pos = remains.find_first_not_of(delimiter);
86   if (begin_value_pos == StringPiece::npos) {
87     DVLOG(1) << "cannot parse value from input: " << input;
88     return false;   // No value.
89   }
90 
91   result_pair.second = std::string(
92       remains.substr(begin_value_pos, remains.size() - begin_value_pos));
93 
94   return true;
95 }
96 
97 template <typename OutputStringType, typename Str>
SplitStringUsingSubstrT(BasicStringPiece<Str> input,BasicStringPiece<Str> delimiter,WhitespaceHandling whitespace,SplitResult result_type)98 std::vector<OutputStringType> SplitStringUsingSubstrT(
99     BasicStringPiece<Str> input,
100     BasicStringPiece<Str> delimiter,
101     WhitespaceHandling whitespace,
102     SplitResult result_type) {
103   using Piece = BasicStringPiece<Str>;
104   using size_type = typename Piece::size_type;
105 
106   std::vector<OutputStringType> result;
107   for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
108        begin_index = end_index + delimiter.size()) {
109     end_index = input.find(delimiter, begin_index);
110     Piece term = end_index == Piece::npos
111                      ? input.substr(begin_index)
112                      : input.substr(begin_index, end_index - begin_index);
113 
114     if (whitespace == TRIM_WHITESPACE)
115       term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
116 
117     if (result_type == SPLIT_WANT_ALL || !term.empty())
118       result.emplace_back(term);
119   }
120 
121   return result;
122 }
123 
124 }  // namespace
125 
SplitString(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)126 std::vector<std::string> SplitString(StringPiece input,
127                                      StringPiece separators,
128                                      WhitespaceHandling whitespace,
129                                      SplitResult result_type) {
130   return SplitStringT<std::string>(input, separators, whitespace, result_type);
131 }
132 
SplitString(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)133 std::vector<string16> SplitString(StringPiece16 input,
134                                   StringPiece16 separators,
135                                   WhitespaceHandling whitespace,
136                                   SplitResult result_type) {
137   return SplitStringT<string16>(input, separators, whitespace, result_type);
138 }
139 
SplitStringPiece(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)140 std::vector<StringPiece> SplitStringPiece(StringPiece input,
141                                           StringPiece separators,
142                                           WhitespaceHandling whitespace,
143                                           SplitResult result_type) {
144   return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
145 }
146 
SplitStringPiece(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)147 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
148                                             StringPiece16 separators,
149                                             WhitespaceHandling whitespace,
150                                             SplitResult result_type) {
151   return SplitStringT<StringPiece16>(input, separators, whitespace,
152                                      result_type);
153 }
154 
SplitStringIntoKeyValuePairs(StringPiece input,char key_value_delimiter,char key_value_pair_delimiter,StringPairs * key_value_pairs)155 bool SplitStringIntoKeyValuePairs(StringPiece input,
156                                   char key_value_delimiter,
157                                   char key_value_pair_delimiter,
158                                   StringPairs* key_value_pairs) {
159   return SplitStringIntoKeyValuePairsUsingSubstr(
160       input, key_value_delimiter, StringPiece(&key_value_pair_delimiter, 1),
161       key_value_pairs);
162 }
163 
SplitStringIntoKeyValuePairsUsingSubstr(StringPiece input,char key_value_delimiter,StringPiece key_value_pair_delimiter,StringPairs * key_value_pairs)164 bool SplitStringIntoKeyValuePairsUsingSubstr(
165     StringPiece input,
166     char key_value_delimiter,
167     StringPiece key_value_pair_delimiter,
168     StringPairs* key_value_pairs) {
169   key_value_pairs->clear();
170 
171   std::vector<StringPiece> pairs = SplitStringPieceUsingSubstr(
172       input, key_value_pair_delimiter, TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
173   key_value_pairs->reserve(pairs.size());
174 
175   bool success = true;
176   for (const StringPiece& pair : pairs) {
177     if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
178       // Don't return here, to allow for pairs without associated
179       // value or key; just record that the split failed.
180       success = false;
181     }
182   }
183   return success;
184 }
185 
SplitStringUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)186 std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
187                                              StringPiece16 delimiter,
188                                              WhitespaceHandling whitespace,
189                                              SplitResult result_type) {
190   return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
191                                            result_type);
192 }
193 
SplitStringUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)194 std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
195                                                 StringPiece delimiter,
196                                                 WhitespaceHandling whitespace,
197                                                 SplitResult result_type) {
198   return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
199                                               result_type);
200 }
201 
SplitStringPieceUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)202 std::vector<StringPiece16> SplitStringPieceUsingSubstr(
203     StringPiece16 input,
204     StringPiece16 delimiter,
205     WhitespaceHandling whitespace,
206     SplitResult result_type) {
207   std::vector<StringPiece16> result;
208   return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
209                                                 result_type);
210 }
211 
SplitStringPieceUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)212 std::vector<StringPiece> SplitStringPieceUsingSubstr(
213     StringPiece input,
214     StringPiece delimiter,
215     WhitespaceHandling whitespace,
216     SplitResult result_type) {
217   return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
218                                               result_type);
219 }
220 
221 #if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
SplitString(WStringPiece input,WStringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)222 std::vector<std::wstring> SplitString(WStringPiece input,
223                                       WStringPiece separators,
224                                       WhitespaceHandling whitespace,
225                                       SplitResult result_type) {
226   return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
227 }
228 
SplitStringPiece(WStringPiece input,WStringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)229 std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
230                                            WStringPiece separators,
231                                            WhitespaceHandling whitespace,
232                                            SplitResult result_type) {
233   return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
234 }
235 
SplitStringUsingSubstr(WStringPiece input,WStringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)236 std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
237                                                  WStringPiece delimiter,
238                                                  WhitespaceHandling whitespace,
239                                                  SplitResult result_type) {
240   return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
241                                                result_type);
242 }
243 
SplitStringPieceUsingSubstr(WStringPiece input,WStringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)244 std::vector<WStringPiece> SplitStringPieceUsingSubstr(
245     WStringPiece input,
246     WStringPiece delimiter,
247     WhitespaceHandling whitespace,
248     SplitResult result_type) {
249   return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
250                                                result_type);
251 }
252 #endif
253 
254 }  // namespace base
255