1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_split.h"
6
7 #include <stddef.h>
8
9 #include "base/logging.h"
10 #include "base/strings/string_util.h"
11 #include "base/third_party/icu/icu_utf.h"
12
13 namespace base {
14
15 namespace {
16
17 // Returns either the ASCII or UTF-16 whitespace.
18 template<typename Str> BasicStringPiece<Str> WhitespaceForType();
19 #if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
20 template <>
WhitespaceForType()21 WStringPiece WhitespaceForType<std::wstring>() {
22 return kWhitespaceWide;
23 }
24 #endif
25
WhitespaceForType()26 template<> StringPiece16 WhitespaceForType<string16>() {
27 return kWhitespaceUTF16;
28 }
WhitespaceForType()29 template<> StringPiece WhitespaceForType<std::string>() {
30 return kWhitespaceASCII;
31 }
32
33 // General string splitter template. Can take 8- or 16-bit input, can produce
34 // the corresponding string or StringPiece output.
35 template <typename OutputStringType, typename Str>
SplitStringT(BasicStringPiece<Str> str,BasicStringPiece<Str> delimiter,WhitespaceHandling whitespace,SplitResult result_type)36 static std::vector<OutputStringType> SplitStringT(
37 BasicStringPiece<Str> str,
38 BasicStringPiece<Str> delimiter,
39 WhitespaceHandling whitespace,
40 SplitResult result_type) {
41 std::vector<OutputStringType> result;
42 if (str.empty())
43 return result;
44
45 size_t start = 0;
46 while (start != Str::npos) {
47 size_t end = str.find_first_of(delimiter, start);
48
49 BasicStringPiece<Str> piece;
50 if (end == Str::npos) {
51 piece = str.substr(start);
52 start = Str::npos;
53 } else {
54 piece = str.substr(start, end - start);
55 start = end + 1;
56 }
57
58 if (whitespace == TRIM_WHITESPACE)
59 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
60
61 if (result_type == SPLIT_WANT_ALL || !piece.empty())
62 result.emplace_back(piece);
63 }
64 return result;
65 }
66
AppendStringKeyValue(StringPiece input,char delimiter,StringPairs * result)67 bool AppendStringKeyValue(StringPiece input,
68 char delimiter,
69 StringPairs* result) {
70 // Always append a new item regardless of success (it might be empty). The
71 // below code will copy the strings directly into the result pair.
72 result->resize(result->size() + 1);
73 auto& result_pair = result->back();
74
75 // Find the delimiter.
76 size_t end_key_pos = input.find_first_of(delimiter);
77 if (end_key_pos == std::string::npos) {
78 DVLOG(1) << "cannot find delimiter in: " << input;
79 return false; // No delimiter.
80 }
81 result_pair.first = std::string(input.substr(0, end_key_pos));
82
83 // Find the value string.
84 StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
85 size_t begin_value_pos = remains.find_first_not_of(delimiter);
86 if (begin_value_pos == StringPiece::npos) {
87 DVLOG(1) << "cannot parse value from input: " << input;
88 return false; // No value.
89 }
90
91 result_pair.second = std::string(
92 remains.substr(begin_value_pos, remains.size() - begin_value_pos));
93
94 return true;
95 }
96
97 template <typename OutputStringType, typename Str>
SplitStringUsingSubstrT(BasicStringPiece<Str> input,BasicStringPiece<Str> delimiter,WhitespaceHandling whitespace,SplitResult result_type)98 std::vector<OutputStringType> SplitStringUsingSubstrT(
99 BasicStringPiece<Str> input,
100 BasicStringPiece<Str> delimiter,
101 WhitespaceHandling whitespace,
102 SplitResult result_type) {
103 using Piece = BasicStringPiece<Str>;
104 using size_type = typename Piece::size_type;
105
106 std::vector<OutputStringType> result;
107 for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
108 begin_index = end_index + delimiter.size()) {
109 end_index = input.find(delimiter, begin_index);
110 Piece term = end_index == Piece::npos
111 ? input.substr(begin_index)
112 : input.substr(begin_index, end_index - begin_index);
113
114 if (whitespace == TRIM_WHITESPACE)
115 term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
116
117 if (result_type == SPLIT_WANT_ALL || !term.empty())
118 result.emplace_back(term);
119 }
120
121 return result;
122 }
123
124 } // namespace
125
SplitString(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)126 std::vector<std::string> SplitString(StringPiece input,
127 StringPiece separators,
128 WhitespaceHandling whitespace,
129 SplitResult result_type) {
130 return SplitStringT<std::string>(input, separators, whitespace, result_type);
131 }
132
SplitString(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)133 std::vector<string16> SplitString(StringPiece16 input,
134 StringPiece16 separators,
135 WhitespaceHandling whitespace,
136 SplitResult result_type) {
137 return SplitStringT<string16>(input, separators, whitespace, result_type);
138 }
139
SplitStringPiece(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)140 std::vector<StringPiece> SplitStringPiece(StringPiece input,
141 StringPiece separators,
142 WhitespaceHandling whitespace,
143 SplitResult result_type) {
144 return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
145 }
146
SplitStringPiece(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)147 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
148 StringPiece16 separators,
149 WhitespaceHandling whitespace,
150 SplitResult result_type) {
151 return SplitStringT<StringPiece16>(input, separators, whitespace,
152 result_type);
153 }
154
SplitStringIntoKeyValuePairs(StringPiece input,char key_value_delimiter,char key_value_pair_delimiter,StringPairs * key_value_pairs)155 bool SplitStringIntoKeyValuePairs(StringPiece input,
156 char key_value_delimiter,
157 char key_value_pair_delimiter,
158 StringPairs* key_value_pairs) {
159 return SplitStringIntoKeyValuePairsUsingSubstr(
160 input, key_value_delimiter, StringPiece(&key_value_pair_delimiter, 1),
161 key_value_pairs);
162 }
163
SplitStringIntoKeyValuePairsUsingSubstr(StringPiece input,char key_value_delimiter,StringPiece key_value_pair_delimiter,StringPairs * key_value_pairs)164 bool SplitStringIntoKeyValuePairsUsingSubstr(
165 StringPiece input,
166 char key_value_delimiter,
167 StringPiece key_value_pair_delimiter,
168 StringPairs* key_value_pairs) {
169 key_value_pairs->clear();
170
171 std::vector<StringPiece> pairs = SplitStringPieceUsingSubstr(
172 input, key_value_pair_delimiter, TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
173 key_value_pairs->reserve(pairs.size());
174
175 bool success = true;
176 for (const StringPiece& pair : pairs) {
177 if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
178 // Don't return here, to allow for pairs without associated
179 // value or key; just record that the split failed.
180 success = false;
181 }
182 }
183 return success;
184 }
185
SplitStringUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)186 std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
187 StringPiece16 delimiter,
188 WhitespaceHandling whitespace,
189 SplitResult result_type) {
190 return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
191 result_type);
192 }
193
SplitStringUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)194 std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
195 StringPiece delimiter,
196 WhitespaceHandling whitespace,
197 SplitResult result_type) {
198 return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
199 result_type);
200 }
201
SplitStringPieceUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)202 std::vector<StringPiece16> SplitStringPieceUsingSubstr(
203 StringPiece16 input,
204 StringPiece16 delimiter,
205 WhitespaceHandling whitespace,
206 SplitResult result_type) {
207 std::vector<StringPiece16> result;
208 return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
209 result_type);
210 }
211
SplitStringPieceUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)212 std::vector<StringPiece> SplitStringPieceUsingSubstr(
213 StringPiece input,
214 StringPiece delimiter,
215 WhitespaceHandling whitespace,
216 SplitResult result_type) {
217 return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
218 result_type);
219 }
220
221 #if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
SplitString(WStringPiece input,WStringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)222 std::vector<std::wstring> SplitString(WStringPiece input,
223 WStringPiece separators,
224 WhitespaceHandling whitespace,
225 SplitResult result_type) {
226 return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
227 }
228
SplitStringPiece(WStringPiece input,WStringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)229 std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
230 WStringPiece separators,
231 WhitespaceHandling whitespace,
232 SplitResult result_type) {
233 return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
234 }
235
SplitStringUsingSubstr(WStringPiece input,WStringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)236 std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
237 WStringPiece delimiter,
238 WhitespaceHandling whitespace,
239 SplitResult result_type) {
240 return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
241 result_type);
242 }
243
SplitStringPieceUsingSubstr(WStringPiece input,WStringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)244 std::vector<WStringPiece> SplitStringPieceUsingSubstr(
245 WStringPiece input,
246 WStringPiece delimiter,
247 WhitespaceHandling whitespace,
248 SplitResult result_type) {
249 return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
250 result_type);
251 }
252 #endif
253
254 } // namespace base
255