1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/str_split.h"
16 
17 #include <deque>
18 #include <initializer_list>
19 #include <list>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <type_traits>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "absl/base/dynamic_annotations.h"
31 #include "absl/base/macros.h"
32 #include "absl/container/flat_hash_map.h"
33 #include "absl/container/node_hash_map.h"
34 #include "absl/strings/numbers.h"
35 
36 namespace {
37 
38 using ::testing::ElementsAre;
39 using ::testing::Pair;
40 using ::testing::UnorderedElementsAre;
41 
TEST(Split,TraitsTest)42 TEST(Split, TraitsTest) {
43   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
44                 "");
45   static_assert(
46       !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
47   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
48                     std::vector<std::string>>::value,
49                 "");
50   static_assert(
51       !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
52       "");
53   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
54                     std::vector<absl::string_view>>::value,
55                 "");
56   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
57                     std::map<std::string, std::string>>::value,
58                 "");
59   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
60                     std::map<absl::string_view, absl::string_view>>::value,
61                 "");
62   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
63                     std::map<int, std::string>>::value,
64                 "");
65   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
66                     std::map<std::string, int>>::value,
67                 "");
68 }
69 
70 // This tests the overall split API, which is made up of the absl::StrSplit()
71 // function and the Delimiter objects in the absl:: namespace.
72 // This TEST macro is outside of any namespace to require full specification of
73 // namespaces just like callers will need to use.
TEST(Split,APIExamples)74 TEST(Split, APIExamples) {
75   {
76     // Passes string delimiter. Assumes the default of ByString.
77     std::vector<std::string> v = absl::StrSplit("a,b,c", ",");  // NOLINT
78     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
79 
80     // Equivalent to...
81     using absl::ByString;
82     v = absl::StrSplit("a,b,c", ByString(","));
83     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
84 
85     // Equivalent to...
86     EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
87                 ElementsAre("a", "b", "c"));
88   }
89 
90   {
91     // Same as above, but using a single character as the delimiter.
92     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
93     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
94 
95     // Equivalent to...
96     using absl::ByChar;
97     v = absl::StrSplit("a,b,c", ByChar(','));
98     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
99   }
100 
101   {
102     // Uses the Literal string "=>" as the delimiter.
103     const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
104     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
105   }
106 
107   {
108     // The substrings are returned as string_views, eliminating copying.
109     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
110     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
111   }
112 
113   {
114     // Leading and trailing empty substrings.
115     std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
116     EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
117   }
118 
119   {
120     // Splits on a delimiter that is not found.
121     std::vector<std::string> v = absl::StrSplit("abc", ',');
122     EXPECT_THAT(v, ElementsAre("abc"));
123   }
124 
125   {
126     // Splits the input string into individual characters by using an empty
127     // string as the delimiter.
128     std::vector<std::string> v = absl::StrSplit("abc", "");
129     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
130   }
131 
132   {
133     // Splits string data with embedded NUL characters, using NUL as the
134     // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
135     // say that's the empty string when constructing the absl::string_view
136     // delimiter. Instead, a non-empty string containing NUL can be used as the
137     // delimiter.
138     std::string embedded_nulls("a\0b\0c", 5);
139     std::string null_delim("\0", 1);
140     std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
141     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
142   }
143 
144   {
145     // Stores first two split strings as the members in a std::pair.
146     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
147     EXPECT_EQ("a", p.first);
148     EXPECT_EQ("b", p.second);
149     // "c" is omitted because std::pair can hold only two elements.
150   }
151 
152   {
153     // Results stored in std::set<std::string>
154     std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
155     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
156   }
157 
158   {
159     // Uses a non-const char* delimiter.
160     char a[] = ",";
161     char* d = a + 0;
162     std::vector<std::string> v = absl::StrSplit("a,b,c", d);
163     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
164   }
165 
166   {
167     // Results split using either of , or ;
168     using absl::ByAnyChar;
169     std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
170     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
171   }
172 
173   {
174     // Uses the SkipWhitespace predicate.
175     using absl::SkipWhitespace;
176     std::vector<std::string> v =
177         absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
178     EXPECT_THAT(v, ElementsAre(" a ", "b"));
179   }
180 
181   {
182     // Uses the ByLength delimiter.
183     using absl::ByLength;
184     std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
185     EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
186   }
187 
188   {
189     // Different forms of initialization / conversion.
190     std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
191     EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
192     std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
193     EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
194     auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
195     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
196     v3 = absl::StrSplit("a,b,c", ',');
197     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
198   }
199 
200   {
201     // Results stored in a std::map.
202     std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
203     EXPECT_EQ(2, m.size());
204     EXPECT_EQ("3", m["a"]);
205     EXPECT_EQ("2", m["b"]);
206   }
207 
208   {
209     // Results stored in a std::multimap.
210     std::multimap<std::string, std::string> m =
211         absl::StrSplit("a,1,b,2,a,3", ',');
212     EXPECT_EQ(3, m.size());
213     auto it = m.find("a");
214     EXPECT_EQ("1", it->second);
215     ++it;
216     EXPECT_EQ("3", it->second);
217     it = m.find("b");
218     EXPECT_EQ("2", it->second);
219   }
220 
221   {
222     // Demonstrates use in a range-based for loop in C++11.
223     std::string s = "x,x,x,x,x,x,x";
224     for (absl::string_view sp : absl::StrSplit(s, ',')) {
225       EXPECT_EQ("x", sp);
226     }
227   }
228 
229   {
230     // Demonstrates use with a Predicate in a range-based for loop.
231     using absl::SkipWhitespace;
232     std::string s = " ,x,,x,,x,x,x,,";
233     for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
234       EXPECT_EQ("x", sp);
235     }
236   }
237 
238   {
239     // Demonstrates a "smart" split to std::map using two separate calls to
240     // absl::StrSplit. One call to split the records, and another call to split
241     // the keys and values. This also uses the Limit delimiter so that the
242     // std::string "a=b=c" will split to "a" -> "b=c".
243     std::map<std::string, std::string> m;
244     for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
245       m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
246     }
247     EXPECT_EQ("b=c", m.find("a")->second);
248     EXPECT_EQ("e", m.find("d")->second);
249     EXPECT_EQ("", m.find("f")->second);
250     EXPECT_EQ("", m.find("g")->second);
251   }
252 }
253 
254 //
255 // Tests for SplitIterator
256 //
257 
TEST(SplitIterator,Basics)258 TEST(SplitIterator, Basics) {
259   auto splitter = absl::StrSplit("a,b", ',');
260   auto it = splitter.begin();
261   auto end = splitter.end();
262 
263   EXPECT_NE(it, end);
264   EXPECT_EQ("a", *it);  // tests dereference
265   ++it;                 // tests preincrement
266   EXPECT_NE(it, end);
267   EXPECT_EQ("b",
268             std::string(it->data(), it->size()));  // tests dereference as ptr
269   it++;                                            // tests postincrement
270   EXPECT_EQ(it, end);
271 }
272 
273 // Simple Predicate to skip a particular string.
274 class Skip {
275  public:
Skip(const std::string & s)276   explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)277   bool operator()(absl::string_view sp) { return sp != s_; }
278 
279  private:
280   std::string s_;
281 };
282 
TEST(SplitIterator,Predicate)283 TEST(SplitIterator, Predicate) {
284   auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
285   auto it = splitter.begin();
286   auto end = splitter.end();
287 
288   EXPECT_NE(it, end);
289   EXPECT_EQ("a", *it);  // tests dereference
290   ++it;                 // tests preincrement -- "b" should be skipped here.
291   EXPECT_NE(it, end);
292   EXPECT_EQ("c",
293             std::string(it->data(), it->size()));  // tests dereference as ptr
294   it++;                                            // tests postincrement
295   EXPECT_EQ(it, end);
296 }
297 
TEST(SplitIterator,EdgeCases)298 TEST(SplitIterator, EdgeCases) {
299   // Expected input and output, assuming a delimiter of ','
300   struct {
301     std::string in;
302     std::vector<std::string> expect;
303   } specs[] = {
304       {"", {""}},
305       {"foo", {"foo"}},
306       {",", {"", ""}},
307       {",foo", {"", "foo"}},
308       {"foo,", {"foo", ""}},
309       {",foo,", {"", "foo", ""}},
310       {"foo,bar", {"foo", "bar"}},
311   };
312 
313   for (const auto& spec : specs) {
314     SCOPED_TRACE(spec.in);
315     auto splitter = absl::StrSplit(spec.in, ',');
316     auto it = splitter.begin();
317     auto end = splitter.end();
318     for (const auto& expected : spec.expect) {
319       EXPECT_NE(it, end);
320       EXPECT_EQ(expected, *it++);
321     }
322     EXPECT_EQ(it, end);
323   }
324 }
325 
TEST(Splitter,Const)326 TEST(Splitter, Const) {
327   const auto splitter = absl::StrSplit("a,b,c", ',');
328   EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
329 }
330 
TEST(Split,EmptyAndNull)331 TEST(Split, EmptyAndNull) {
332   // Attention: Splitting a null absl::string_view is different than splitting
333   // an empty absl::string_view even though both string_views are considered
334   // equal. This behavior is likely surprising and undesirable. However, to
335   // maintain backward compatibility, there is a small "hack" in
336   // str_split_internal.h that preserves this behavior. If that behavior is ever
337   // changed/fixed, this test will need to be updated.
338   EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
339   EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
340 }
341 
TEST(SplitIterator,EqualityAsEndCondition)342 TEST(SplitIterator, EqualityAsEndCondition) {
343   auto splitter = absl::StrSplit("a,b,c", ',');
344   auto it = splitter.begin();
345   auto it2 = it;
346 
347   // Increments it2 twice to point to "c" in the input text.
348   ++it2;
349   ++it2;
350   EXPECT_EQ("c", *it2);
351 
352   // This test uses a non-end SplitIterator as the terminating condition in a
353   // for loop. This relies on SplitIterator equality for non-end SplitIterators
354   // working correctly. At this point it2 points to "c", and we use that as the
355   // "end" condition in this test.
356   std::vector<absl::string_view> v;
357   for (; it != it2; ++it) {
358     v.push_back(*it);
359   }
360   EXPECT_THAT(v, ElementsAre("a", "b"));
361 }
362 
363 //
364 // Tests for Splitter
365 //
366 
TEST(Splitter,RangeIterators)367 TEST(Splitter, RangeIterators) {
368   auto splitter = absl::StrSplit("a,b,c", ',');
369   std::vector<absl::string_view> output;
370   for (const absl::string_view& p : splitter) {
371     output.push_back(p);
372   }
373   EXPECT_THAT(output, ElementsAre("a", "b", "c"));
374 }
375 
376 // Some template functions for use in testing conversion operators
377 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)378 void TestConversionOperator(const Splitter& splitter) {
379   ContainerType output = splitter;
380   EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
381 }
382 
383 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)384 void TestMapConversionOperator(const Splitter& splitter) {
385   MapType m = splitter;
386   EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
387 }
388 
389 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)390 void TestPairConversionOperator(const Splitter& splitter) {
391   std::pair<FirstType, SecondType> p = splitter;
392   EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
393 }
394 
TEST(Splitter,ConversionOperator)395 TEST(Splitter, ConversionOperator) {
396   auto splitter = absl::StrSplit("a,b,c,d", ',');
397 
398   TestConversionOperator<std::vector<absl::string_view>>(splitter);
399   TestConversionOperator<std::vector<std::string>>(splitter);
400   TestConversionOperator<std::list<absl::string_view>>(splitter);
401   TestConversionOperator<std::list<std::string>>(splitter);
402   TestConversionOperator<std::deque<absl::string_view>>(splitter);
403   TestConversionOperator<std::deque<std::string>>(splitter);
404   TestConversionOperator<std::set<absl::string_view>>(splitter);
405   TestConversionOperator<std::set<std::string>>(splitter);
406   TestConversionOperator<std::multiset<absl::string_view>>(splitter);
407   TestConversionOperator<std::multiset<std::string>>(splitter);
408   TestConversionOperator<std::unordered_set<std::string>>(splitter);
409 
410   // Tests conversion to map-like objects.
411 
412   TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
413       splitter);
414   TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
415   TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
416   TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
417   TestMapConversionOperator<
418       std::multimap<absl::string_view, absl::string_view>>(splitter);
419   TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
420       splitter);
421   TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
422       splitter);
423   TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
424   TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
425       splitter);
426   TestMapConversionOperator<
427       absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
428   TestMapConversionOperator<
429       absl::node_hash_map<absl::string_view, std::string>>(splitter);
430   TestMapConversionOperator<
431       absl::node_hash_map<std::string, absl::string_view>>(splitter);
432   TestMapConversionOperator<
433       absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
434   TestMapConversionOperator<
435       absl::flat_hash_map<absl::string_view, std::string>>(splitter);
436   TestMapConversionOperator<
437       absl::flat_hash_map<std::string, absl::string_view>>(splitter);
438 
439   // Tests conversion to std::pair
440 
441   TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
442   TestPairConversionOperator<absl::string_view, std::string>(splitter);
443   TestPairConversionOperator<std::string, absl::string_view>(splitter);
444   TestPairConversionOperator<std::string, std::string>(splitter);
445 }
446 
447 // A few additional tests for conversion to std::pair. This conversion is
448 // different from others because a std::pair always has exactly two elements:
449 // .first and .second. The split has to work even when the split has
450 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)451 TEST(Splitter, ToPair) {
452   {
453     // Empty string
454     std::pair<std::string, std::string> p = absl::StrSplit("", ',');
455     EXPECT_EQ("", p.first);
456     EXPECT_EQ("", p.second);
457   }
458 
459   {
460     // Only first
461     std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
462     EXPECT_EQ("a", p.first);
463     EXPECT_EQ("", p.second);
464   }
465 
466   {
467     // Only second
468     std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
469     EXPECT_EQ("", p.first);
470     EXPECT_EQ("b", p.second);
471   }
472 
473   {
474     // First and second.
475     std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
476     EXPECT_EQ("a", p.first);
477     EXPECT_EQ("b", p.second);
478   }
479 
480   {
481     // First and second and then more stuff that will be ignored.
482     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
483     EXPECT_EQ("a", p.first);
484     EXPECT_EQ("b", p.second);
485     // "c" is omitted.
486   }
487 }
488 
TEST(Splitter,Predicates)489 TEST(Splitter, Predicates) {
490   static const char kTestChars[] = ",a, ,b,";
491   using absl::AllowEmpty;
492   using absl::SkipEmpty;
493   using absl::SkipWhitespace;
494 
495   {
496     // No predicate. Does not skip empties.
497     auto splitter = absl::StrSplit(kTestChars, ',');
498     std::vector<std::string> v = splitter;
499     EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
500   }
501 
502   {
503     // Allows empty strings. Same behavior as no predicate at all.
504     auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
505     std::vector<std::string> v_allowempty = splitter;
506     EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
507 
508     // Ensures AllowEmpty equals the behavior with no predicate.
509     auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
510     std::vector<std::string> v_nopredicate = splitter_nopredicate;
511     EXPECT_EQ(v_allowempty, v_nopredicate);
512   }
513 
514   {
515     // Skips empty strings.
516     auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
517     std::vector<std::string> v = splitter;
518     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
519   }
520 
521   {
522     // Skips empty and all-whitespace strings.
523     auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
524     std::vector<std::string> v = splitter;
525     EXPECT_THAT(v, ElementsAre("a", "b"));
526   }
527 }
528 
529 //
530 // Tests for StrSplit()
531 //
532 
TEST(Split,Basics)533 TEST(Split, Basics) {
534   {
535     // Doesn't really do anything useful because the return value is ignored,
536     // but it should work.
537     absl::StrSplit("a,b,c", ',');
538   }
539 
540   {
541     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
542     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
543   }
544 
545   {
546     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
547     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
548   }
549 
550   {
551     // Ensures that assignment works. This requires a little extra work with
552     // C++11 because of overloads with initializer_list.
553     std::vector<std::string> v;
554     v = absl::StrSplit("a,b,c", ',');
555 
556     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
557     std::map<std::string, std::string> m;
558     m = absl::StrSplit("a,b,c", ',');
559     EXPECT_EQ(2, m.size());
560     std::unordered_map<std::string, std::string> hm;
561     hm = absl::StrSplit("a,b,c", ',');
562     EXPECT_EQ(2, hm.size());
563   }
564 }
565 
ReturnStringView()566 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()567 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()568 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
569 
TEST(Split,AcceptsCertainTemporaries)570 TEST(Split, AcceptsCertainTemporaries) {
571   std::vector<std::string> v;
572   v = absl::StrSplit(ReturnStringView(), ' ');
573   EXPECT_THAT(v, ElementsAre("Hello", "World"));
574   v = absl::StrSplit(ReturnConstCharP(), ' ');
575   EXPECT_THAT(v, ElementsAre("Hello", "World"));
576   v = absl::StrSplit(ReturnCharP(), ' ');
577   EXPECT_THAT(v, ElementsAre("Hello", "World"));
578 }
579 
TEST(Split,Temporary)580 TEST(Split, Temporary) {
581   // Use a std::string longer than the SSO length, so that when the temporary is
582   // destroyed, if the splitter keeps a reference to the string's contents,
583   // it'll reference freed memory instead of just dead on-stack memory.
584   const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
585   EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
586       << "Input should be larger than fits on the stack.";
587 
588   // This happens more often in C++11 as part of a range-based for loop.
589   auto splitter = absl::StrSplit(std::string(input), ',');
590   std::string expected = "a";
591   for (absl::string_view letter : splitter) {
592     EXPECT_EQ(expected, letter);
593     ++expected[0];
594   }
595   EXPECT_EQ("v", expected);
596 
597   // This happens more often in C++11 as part of a range-based for loop.
598   auto std_splitter = absl::StrSplit(std::string(input), ',');
599   expected = "a";
600   for (absl::string_view letter : std_splitter) {
601     EXPECT_EQ(expected, letter);
602     ++expected[0];
603   }
604   EXPECT_EQ("v", expected);
605 }
606 
607 template <typename T>
CopyToHeap(const T & value)608 static std::unique_ptr<T> CopyToHeap(const T& value) {
609   return std::unique_ptr<T>(new T(value));
610 }
611 
TEST(Split,LvalueCaptureIsCopyable)612 TEST(Split, LvalueCaptureIsCopyable) {
613   std::string input = "a,b";
614   auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
615   auto stack_splitter = *heap_splitter;
616   heap_splitter.reset();
617   std::vector<std::string> result = stack_splitter;
618   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
619 }
620 
TEST(Split,TemporaryCaptureIsCopyable)621 TEST(Split, TemporaryCaptureIsCopyable) {
622   auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
623   auto stack_splitter = *heap_splitter;
624   heap_splitter.reset();
625   std::vector<std::string> result = stack_splitter;
626   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
627 }
628 
TEST(Split,SplitterIsCopyableAndMoveable)629 TEST(Split, SplitterIsCopyableAndMoveable) {
630   auto a = absl::StrSplit("foo", '-');
631 
632   // Ensures that the following expressions compile.
633   auto b = a;             // Copy construct
634   auto c = std::move(a);  // Move construct
635   b = c;                  // Copy assign
636   c = std::move(b);       // Move assign
637 
638   EXPECT_THAT(c, ElementsAre("foo"));
639 }
640 
TEST(Split,StringDelimiter)641 TEST(Split, StringDelimiter) {
642   {
643     std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
644     EXPECT_THAT(v, ElementsAre("a", "b"));
645   }
646 
647   {
648     std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
649     EXPECT_THAT(v, ElementsAre("a", "b"));
650   }
651 
652   {
653     std::vector<absl::string_view> v =
654         absl::StrSplit("a,b", absl::string_view(","));
655     EXPECT_THAT(v, ElementsAre("a", "b"));
656   }
657 }
658 
659 #if !defined(__cpp_char8_t)
660 #if defined(__clang__)
661 #pragma clang diagnostic push
662 #pragma clang diagnostic ignored "-Wc++2a-compat"
663 #endif
TEST(Split,UTF8)664 TEST(Split, UTF8) {
665   // Tests splitting utf8 strings and utf8 delimiters.
666   std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
667   {
668     // A utf8 input string with an ascii delimiter.
669     std::string to_split = "a," + utf8_string;
670     std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
671     EXPECT_THAT(v, ElementsAre("a", utf8_string));
672   }
673 
674   {
675     // A utf8 input string and a utf8 delimiter.
676     std::string to_split = "a," + utf8_string + ",b";
677     std::string unicode_delimiter = "," + utf8_string + ",";
678     std::vector<absl::string_view> v =
679         absl::StrSplit(to_split, unicode_delimiter);
680     EXPECT_THAT(v, ElementsAre("a", "b"));
681   }
682 
683   {
684     // A utf8 input string and ByAnyChar with ascii chars.
685     std::vector<absl::string_view> v =
686         absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
687     EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
688   }
689 }
690 #if defined(__clang__)
691 #pragma clang diagnostic pop
692 #endif
693 #endif  // !defined(__cpp_char8_t)
694 
TEST(Split,EmptyStringDelimiter)695 TEST(Split, EmptyStringDelimiter) {
696   {
697     std::vector<std::string> v = absl::StrSplit("", "");
698     EXPECT_THAT(v, ElementsAre(""));
699   }
700 
701   {
702     std::vector<std::string> v = absl::StrSplit("a", "");
703     EXPECT_THAT(v, ElementsAre("a"));
704   }
705 
706   {
707     std::vector<std::string> v = absl::StrSplit("ab", "");
708     EXPECT_THAT(v, ElementsAre("a", "b"));
709   }
710 
711   {
712     std::vector<std::string> v = absl::StrSplit("a b", "");
713     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
714   }
715 }
716 
TEST(Split,SubstrDelimiter)717 TEST(Split, SubstrDelimiter) {
718   std::vector<absl::string_view> results;
719   absl::string_view delim("//");
720 
721   results = absl::StrSplit("", delim);
722   EXPECT_THAT(results, ElementsAre(""));
723 
724   results = absl::StrSplit("//", delim);
725   EXPECT_THAT(results, ElementsAre("", ""));
726 
727   results = absl::StrSplit("ab", delim);
728   EXPECT_THAT(results, ElementsAre("ab"));
729 
730   results = absl::StrSplit("ab//", delim);
731   EXPECT_THAT(results, ElementsAre("ab", ""));
732 
733   results = absl::StrSplit("ab/", delim);
734   EXPECT_THAT(results, ElementsAre("ab/"));
735 
736   results = absl::StrSplit("a/b", delim);
737   EXPECT_THAT(results, ElementsAre("a/b"));
738 
739   results = absl::StrSplit("a//b", delim);
740   EXPECT_THAT(results, ElementsAre("a", "b"));
741 
742   results = absl::StrSplit("a///b", delim);
743   EXPECT_THAT(results, ElementsAre("a", "/b"));
744 
745   results = absl::StrSplit("a////b", delim);
746   EXPECT_THAT(results, ElementsAre("a", "", "b"));
747 }
748 
TEST(Split,EmptyResults)749 TEST(Split, EmptyResults) {
750   std::vector<absl::string_view> results;
751 
752   results = absl::StrSplit("", '#');
753   EXPECT_THAT(results, ElementsAre(""));
754 
755   results = absl::StrSplit("#", '#');
756   EXPECT_THAT(results, ElementsAre("", ""));
757 
758   results = absl::StrSplit("#cd", '#');
759   EXPECT_THAT(results, ElementsAre("", "cd"));
760 
761   results = absl::StrSplit("ab#cd#", '#');
762   EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
763 
764   results = absl::StrSplit("ab##cd", '#');
765   EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
766 
767   results = absl::StrSplit("ab##", '#');
768   EXPECT_THAT(results, ElementsAre("ab", "", ""));
769 
770   results = absl::StrSplit("ab#ab#", '#');
771   EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
772 
773   results = absl::StrSplit("aaaa", 'a');
774   EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
775 
776   results = absl::StrSplit("", '#', absl::SkipEmpty());
777   EXPECT_THAT(results, ElementsAre());
778 }
779 
780 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)781 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
782                                  size_t starting_pos, int expected_pos) {
783   absl::string_view found = d.Find(text, starting_pos);
784   return found.data() != text.data() + text.size() &&
785          expected_pos == found.data() - text.data();
786 }
787 
788 // Helper function for testing Delimiter objects. Returns true if the given
789 // Delimiter is found in the given string at the given position. This function
790 // tests two cases:
791 //   1. The actual text given, staring at position 0
792 //   2. The text given with leading padding that should be ignored
793 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)794 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
795   const std::string leading_text = ",x,y,z,";
796   return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
797          IsFoundAtStartingPos(leading_text + std::string(text), d,
798                               leading_text.length(),
799                               expected_pos + leading_text.length());
800 }
801 
802 //
803 // Tests for ByString
804 //
805 
806 // Tests using any delimiter that represents a single comma.
807 template <typename Delimiter>
TestComma(Delimiter d)808 void TestComma(Delimiter d) {
809   EXPECT_TRUE(IsFoundAt(",", d, 0));
810   EXPECT_TRUE(IsFoundAt("a,", d, 1));
811   EXPECT_TRUE(IsFoundAt(",b", d, 0));
812   EXPECT_TRUE(IsFoundAt("a,b", d, 1));
813   EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
814   EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
815   EXPECT_FALSE(IsFoundAt("", d, -1));
816   EXPECT_FALSE(IsFoundAt(" ", d, -1));
817   EXPECT_FALSE(IsFoundAt("a", d, -1));
818   EXPECT_FALSE(IsFoundAt("a b c", d, -1));
819   EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
820   EXPECT_FALSE(IsFoundAt(";", d, -1));
821 }
822 
TEST(Delimiter,ByString)823 TEST(Delimiter, ByString) {
824   using absl::ByString;
825   TestComma(ByString(","));
826 
827   // Works as named variable.
828   ByString comma_string(",");
829   TestComma(comma_string);
830 
831   // The first occurrence of empty string ("") in a string is at position 0.
832   // There is a test below that demonstrates this for absl::string_view::find().
833   // If the ByString delimiter returned position 0 for this, there would
834   // be an infinite loop in the SplitIterator code. To avoid this, empty string
835   // is a special case in that it always returns the item at position 1.
836   absl::string_view abc("abc");
837   EXPECT_EQ(0, abc.find(""));  // "" is found at position 0
838   ByString empty("");
839   EXPECT_FALSE(IsFoundAt("", empty, 0));
840   EXPECT_FALSE(IsFoundAt("a", empty, 0));
841   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
842   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
843 }
844 
TEST(Split,ByChar)845 TEST(Split, ByChar) {
846   using absl::ByChar;
847   TestComma(ByChar(','));
848 
849   // Works as named variable.
850   ByChar comma_char(',');
851   TestComma(comma_char);
852 }
853 
854 //
855 // Tests for ByAnyChar
856 //
857 
TEST(Delimiter,ByAnyChar)858 TEST(Delimiter, ByAnyChar) {
859   using absl::ByAnyChar;
860   ByAnyChar one_delim(",");
861   // Found
862   EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
863   EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
864   EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
865   EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
866   // Not found
867   EXPECT_FALSE(IsFoundAt("", one_delim, -1));
868   EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
869   EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
870   EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
871   EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
872 
873   ByAnyChar two_delims(",;");
874   // Found
875   EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
876   EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
877   EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
878   EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
879   EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
880   EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
881   EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
882   EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
883   EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
884   EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
885   // Not found
886   EXPECT_FALSE(IsFoundAt("", two_delims, -1));
887   EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
888   EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
889   EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
890   EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
891 
892   // ByAnyChar behaves just like ByString when given a delimiter of empty
893   // string. That is, it always returns a zero-length absl::string_view
894   // referring to the item at position 1, not position 0.
895   ByAnyChar empty("");
896   EXPECT_FALSE(IsFoundAt("", empty, 0));
897   EXPECT_FALSE(IsFoundAt("a", empty, 0));
898   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
899   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
900 }
901 
902 //
903 // Tests for ByLength
904 //
905 
TEST(Delimiter,ByLength)906 TEST(Delimiter, ByLength) {
907   using absl::ByLength;
908 
909   ByLength four_char_delim(4);
910 
911   // Found
912   EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
913   EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
914   EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
915   // Not found
916   EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
917   EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
918   EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
919   EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
920   EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
921 }
922 
TEST(Split,WorksWithLargeStrings)923 TEST(Split, WorksWithLargeStrings) {
924   if (sizeof(size_t) > 4) {
925     std::string s((uint32_t{1} << 31) + 1, 'x');  // 2G + 1 byte
926     s.back() = '-';
927     std::vector<absl::string_view> v = absl::StrSplit(s, '-');
928     EXPECT_EQ(2, v.size());
929     // The first element will contain 2G of 'x's.
930     // testing::StartsWith is too slow with a 2G string.
931     EXPECT_EQ('x', v[0][0]);
932     EXPECT_EQ('x', v[0][1]);
933     EXPECT_EQ('x', v[0][3]);
934     EXPECT_EQ("", v[1]);
935   }
936 }
937 
TEST(SplitInternalTest,TypeTraits)938 TEST(SplitInternalTest, TypeTraits) {
939   EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
940   EXPECT_TRUE(
941       (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
942   EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
943   EXPECT_TRUE(
944       (absl::strings_internal::HasValueType<std::map<int, int>>::value));
945   EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
946   EXPECT_TRUE(
947       (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
948   EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
949   EXPECT_TRUE((absl::strings_internal::IsInitializerList<
950                std::initializer_list<int>>::value));
951 }
952 
953 }  // namespace
954