1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/str_split.h"
16 
17 #include <deque>
18 #include <initializer_list>
19 #include <list>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <type_traits>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "absl/base/dynamic_annotations.h"
31 #include "absl/base/macros.h"
32 #include "absl/container/btree_map.h"
33 #include "absl/container/btree_set.h"
34 #include "absl/container/flat_hash_map.h"
35 #include "absl/container/node_hash_map.h"
36 #include "absl/strings/numbers.h"
37 
38 namespace {
39 
40 using ::testing::ElementsAre;
41 using ::testing::Pair;
42 using ::testing::UnorderedElementsAre;
43 
TEST(Split,TraitsTest)44 TEST(Split, TraitsTest) {
45   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
46                 "");
47   static_assert(
48       !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
49   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
50                     std::vector<std::string>>::value,
51                 "");
52   static_assert(
53       !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
54       "");
55   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
56                     std::vector<absl::string_view>>::value,
57                 "");
58   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
59                     std::map<std::string, std::string>>::value,
60                 "");
61   static_assert(absl::strings_internal::SplitterIsConvertibleTo<
62                     std::map<absl::string_view, absl::string_view>>::value,
63                 "");
64   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
65                     std::map<int, std::string>>::value,
66                 "");
67   static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
68                     std::map<std::string, int>>::value,
69                 "");
70 }
71 
72 // This tests the overall split API, which is made up of the absl::StrSplit()
73 // function and the Delimiter objects in the absl:: namespace.
74 // This TEST macro is outside of any namespace to require full specification of
75 // namespaces just like callers will need to use.
TEST(Split,APIExamples)76 TEST(Split, APIExamples) {
77   {
78     // Passes string delimiter. Assumes the default of ByString.
79     std::vector<std::string> v = absl::StrSplit("a,b,c", ",");  // NOLINT
80     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
81 
82     // Equivalent to...
83     using absl::ByString;
84     v = absl::StrSplit("a,b,c", ByString(","));
85     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
86 
87     // Equivalent to...
88     EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
89                 ElementsAre("a", "b", "c"));
90   }
91 
92   {
93     // Same as above, but using a single character as the delimiter.
94     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
95     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
96 
97     // Equivalent to...
98     using absl::ByChar;
99     v = absl::StrSplit("a,b,c", ByChar(','));
100     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
101   }
102 
103   {
104     // Uses the Literal string "=>" as the delimiter.
105     const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
106     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
107   }
108 
109   {
110     // The substrings are returned as string_views, eliminating copying.
111     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
112     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
113   }
114 
115   {
116     // Leading and trailing empty substrings.
117     std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
118     EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
119   }
120 
121   {
122     // Splits on a delimiter that is not found.
123     std::vector<std::string> v = absl::StrSplit("abc", ',');
124     EXPECT_THAT(v, ElementsAre("abc"));
125   }
126 
127   {
128     // Splits the input string into individual characters by using an empty
129     // string as the delimiter.
130     std::vector<std::string> v = absl::StrSplit("abc", "");
131     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
132   }
133 
134   {
135     // Splits string data with embedded NUL characters, using NUL as the
136     // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
137     // say that's the empty string when constructing the absl::string_view
138     // delimiter. Instead, a non-empty string containing NUL can be used as the
139     // delimiter.
140     std::string embedded_nulls("a\0b\0c", 5);
141     std::string null_delim("\0", 1);
142     std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
143     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
144   }
145 
146   {
147     // Stores first two split strings as the members in a std::pair.
148     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
149     EXPECT_EQ("a", p.first);
150     EXPECT_EQ("b", p.second);
151     // "c" is omitted because std::pair can hold only two elements.
152   }
153 
154   {
155     // Results stored in std::set<std::string>
156     std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
157     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
158   }
159 
160   {
161     // Uses a non-const char* delimiter.
162     char a[] = ",";
163     char* d = a + 0;
164     std::vector<std::string> v = absl::StrSplit("a,b,c", d);
165     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
166   }
167 
168   {
169     // Results split using either of , or ;
170     using absl::ByAnyChar;
171     std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
172     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
173   }
174 
175   {
176     // Uses the SkipWhitespace predicate.
177     using absl::SkipWhitespace;
178     std::vector<std::string> v =
179         absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
180     EXPECT_THAT(v, ElementsAre(" a ", "b"));
181   }
182 
183   {
184     // Uses the ByLength delimiter.
185     using absl::ByLength;
186     std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
187     EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
188   }
189 
190   {
191     // Different forms of initialization / conversion.
192     std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
193     EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
194     std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
195     EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
196     auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
197     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
198     v3 = absl::StrSplit("a,b,c", ',');
199     EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
200   }
201 
202   {
203     // Results stored in a std::map.
204     std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
205     EXPECT_EQ(2, m.size());
206     EXPECT_EQ("3", m["a"]);
207     EXPECT_EQ("2", m["b"]);
208   }
209 
210   {
211     // Results stored in a std::multimap.
212     std::multimap<std::string, std::string> m =
213         absl::StrSplit("a,1,b,2,a,3", ',');
214     EXPECT_EQ(3, m.size());
215     auto it = m.find("a");
216     EXPECT_EQ("1", it->second);
217     ++it;
218     EXPECT_EQ("3", it->second);
219     it = m.find("b");
220     EXPECT_EQ("2", it->second);
221   }
222 
223   {
224     // Demonstrates use in a range-based for loop in C++11.
225     std::string s = "x,x,x,x,x,x,x";
226     for (absl::string_view sp : absl::StrSplit(s, ',')) {
227       EXPECT_EQ("x", sp);
228     }
229   }
230 
231   {
232     // Demonstrates use with a Predicate in a range-based for loop.
233     using absl::SkipWhitespace;
234     std::string s = " ,x,,x,,x,x,x,,";
235     for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
236       EXPECT_EQ("x", sp);
237     }
238   }
239 
240   {
241     // Demonstrates a "smart" split to std::map using two separate calls to
242     // absl::StrSplit. One call to split the records, and another call to split
243     // the keys and values. This also uses the Limit delimiter so that the
244     // std::string "a=b=c" will split to "a" -> "b=c".
245     std::map<std::string, std::string> m;
246     for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
247       m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
248     }
249     EXPECT_EQ("b=c", m.find("a")->second);
250     EXPECT_EQ("e", m.find("d")->second);
251     EXPECT_EQ("", m.find("f")->second);
252     EXPECT_EQ("", m.find("g")->second);
253   }
254 }
255 
256 //
257 // Tests for SplitIterator
258 //
259 
TEST(SplitIterator,Basics)260 TEST(SplitIterator, Basics) {
261   auto splitter = absl::StrSplit("a,b", ',');
262   auto it = splitter.begin();
263   auto end = splitter.end();
264 
265   EXPECT_NE(it, end);
266   EXPECT_EQ("a", *it);  // tests dereference
267   ++it;                 // tests preincrement
268   EXPECT_NE(it, end);
269   EXPECT_EQ("b",
270             std::string(it->data(), it->size()));  // tests dereference as ptr
271   it++;                                            // tests postincrement
272   EXPECT_EQ(it, end);
273 }
274 
275 // Simple Predicate to skip a particular string.
276 class Skip {
277  public:
Skip(const std::string & s)278   explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)279   bool operator()(absl::string_view sp) { return sp != s_; }
280 
281  private:
282   std::string s_;
283 };
284 
TEST(SplitIterator,Predicate)285 TEST(SplitIterator, Predicate) {
286   auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
287   auto it = splitter.begin();
288   auto end = splitter.end();
289 
290   EXPECT_NE(it, end);
291   EXPECT_EQ("a", *it);  // tests dereference
292   ++it;                 // tests preincrement -- "b" should be skipped here.
293   EXPECT_NE(it, end);
294   EXPECT_EQ("c",
295             std::string(it->data(), it->size()));  // tests dereference as ptr
296   it++;                                            // tests postincrement
297   EXPECT_EQ(it, end);
298 }
299 
TEST(SplitIterator,EdgeCases)300 TEST(SplitIterator, EdgeCases) {
301   // Expected input and output, assuming a delimiter of ','
302   struct {
303     std::string in;
304     std::vector<std::string> expect;
305   } specs[] = {
306       {"", {""}},
307       {"foo", {"foo"}},
308       {",", {"", ""}},
309       {",foo", {"", "foo"}},
310       {"foo,", {"foo", ""}},
311       {",foo,", {"", "foo", ""}},
312       {"foo,bar", {"foo", "bar"}},
313   };
314 
315   for (const auto& spec : specs) {
316     SCOPED_TRACE(spec.in);
317     auto splitter = absl::StrSplit(spec.in, ',');
318     auto it = splitter.begin();
319     auto end = splitter.end();
320     for (const auto& expected : spec.expect) {
321       EXPECT_NE(it, end);
322       EXPECT_EQ(expected, *it++);
323     }
324     EXPECT_EQ(it, end);
325   }
326 }
327 
TEST(Splitter,Const)328 TEST(Splitter, Const) {
329   const auto splitter = absl::StrSplit("a,b,c", ',');
330   EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
331 }
332 
TEST(Split,EmptyAndNull)333 TEST(Split, EmptyAndNull) {
334   // Attention: Splitting a null absl::string_view is different than splitting
335   // an empty absl::string_view even though both string_views are considered
336   // equal. This behavior is likely surprising and undesirable. However, to
337   // maintain backward compatibility, there is a small "hack" in
338   // str_split_internal.h that preserves this behavior. If that behavior is ever
339   // changed/fixed, this test will need to be updated.
340   EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
341   EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
342 }
343 
TEST(SplitIterator,EqualityAsEndCondition)344 TEST(SplitIterator, EqualityAsEndCondition) {
345   auto splitter = absl::StrSplit("a,b,c", ',');
346   auto it = splitter.begin();
347   auto it2 = it;
348 
349   // Increments it2 twice to point to "c" in the input text.
350   ++it2;
351   ++it2;
352   EXPECT_EQ("c", *it2);
353 
354   // This test uses a non-end SplitIterator as the terminating condition in a
355   // for loop. This relies on SplitIterator equality for non-end SplitIterators
356   // working correctly. At this point it2 points to "c", and we use that as the
357   // "end" condition in this test.
358   std::vector<absl::string_view> v;
359   for (; it != it2; ++it) {
360     v.push_back(*it);
361   }
362   EXPECT_THAT(v, ElementsAre("a", "b"));
363 }
364 
365 //
366 // Tests for Splitter
367 //
368 
TEST(Splitter,RangeIterators)369 TEST(Splitter, RangeIterators) {
370   auto splitter = absl::StrSplit("a,b,c", ',');
371   std::vector<absl::string_view> output;
372   for (const absl::string_view& p : splitter) {
373     output.push_back(p);
374   }
375   EXPECT_THAT(output, ElementsAre("a", "b", "c"));
376 }
377 
378 // Some template functions for use in testing conversion operators
379 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)380 void TestConversionOperator(const Splitter& splitter) {
381   ContainerType output = splitter;
382   EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
383 }
384 
385 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)386 void TestMapConversionOperator(const Splitter& splitter) {
387   MapType m = splitter;
388   EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
389 }
390 
391 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)392 void TestPairConversionOperator(const Splitter& splitter) {
393   std::pair<FirstType, SecondType> p = splitter;
394   EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
395 }
396 
TEST(Splitter,ConversionOperator)397 TEST(Splitter, ConversionOperator) {
398   auto splitter = absl::StrSplit("a,b,c,d", ',');
399 
400   TestConversionOperator<std::vector<absl::string_view>>(splitter);
401   TestConversionOperator<std::vector<std::string>>(splitter);
402   TestConversionOperator<std::list<absl::string_view>>(splitter);
403   TestConversionOperator<std::list<std::string>>(splitter);
404   TestConversionOperator<std::deque<absl::string_view>>(splitter);
405   TestConversionOperator<std::deque<std::string>>(splitter);
406   TestConversionOperator<std::set<absl::string_view>>(splitter);
407   TestConversionOperator<std::set<std::string>>(splitter);
408   TestConversionOperator<std::multiset<absl::string_view>>(splitter);
409   TestConversionOperator<std::multiset<std::string>>(splitter);
410   TestConversionOperator<absl::btree_set<absl::string_view>>(splitter);
411   TestConversionOperator<absl::btree_set<std::string>>(splitter);
412   TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter);
413   TestConversionOperator<absl::btree_multiset<std::string>>(splitter);
414   TestConversionOperator<std::unordered_set<std::string>>(splitter);
415 
416   // Tests conversion to map-like objects.
417 
418   TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
419       splitter);
420   TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
421   TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
422   TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
423   TestMapConversionOperator<
424       std::multimap<absl::string_view, absl::string_view>>(splitter);
425   TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
426       splitter);
427   TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
428       splitter);
429   TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
430   TestMapConversionOperator<
431       absl::btree_map<absl::string_view, absl::string_view>>(splitter);
432   TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>(
433       splitter);
434   TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>(
435       splitter);
436   TestMapConversionOperator<absl::btree_map<std::string, std::string>>(
437       splitter);
438   TestMapConversionOperator<
439       absl::btree_multimap<absl::string_view, absl::string_view>>(splitter);
440   TestMapConversionOperator<
441       absl::btree_multimap<absl::string_view, std::string>>(splitter);
442   TestMapConversionOperator<
443       absl::btree_multimap<std::string, absl::string_view>>(splitter);
444   TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>(
445       splitter);
446   TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
447       splitter);
448   TestMapConversionOperator<
449       absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
450   TestMapConversionOperator<
451       absl::node_hash_map<absl::string_view, std::string>>(splitter);
452   TestMapConversionOperator<
453       absl::node_hash_map<std::string, absl::string_view>>(splitter);
454   TestMapConversionOperator<
455       absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
456   TestMapConversionOperator<
457       absl::flat_hash_map<absl::string_view, std::string>>(splitter);
458   TestMapConversionOperator<
459       absl::flat_hash_map<std::string, absl::string_view>>(splitter);
460 
461   // Tests conversion to std::pair
462 
463   TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
464   TestPairConversionOperator<absl::string_view, std::string>(splitter);
465   TestPairConversionOperator<std::string, absl::string_view>(splitter);
466   TestPairConversionOperator<std::string, std::string>(splitter);
467 }
468 
469 // A few additional tests for conversion to std::pair. This conversion is
470 // different from others because a std::pair always has exactly two elements:
471 // .first and .second. The split has to work even when the split has
472 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)473 TEST(Splitter, ToPair) {
474   {
475     // Empty string
476     std::pair<std::string, std::string> p = absl::StrSplit("", ',');
477     EXPECT_EQ("", p.first);
478     EXPECT_EQ("", p.second);
479   }
480 
481   {
482     // Only first
483     std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
484     EXPECT_EQ("a", p.first);
485     EXPECT_EQ("", p.second);
486   }
487 
488   {
489     // Only second
490     std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
491     EXPECT_EQ("", p.first);
492     EXPECT_EQ("b", p.second);
493   }
494 
495   {
496     // First and second.
497     std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
498     EXPECT_EQ("a", p.first);
499     EXPECT_EQ("b", p.second);
500   }
501 
502   {
503     // First and second and then more stuff that will be ignored.
504     std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
505     EXPECT_EQ("a", p.first);
506     EXPECT_EQ("b", p.second);
507     // "c" is omitted.
508   }
509 }
510 
TEST(Splitter,Predicates)511 TEST(Splitter, Predicates) {
512   static const char kTestChars[] = ",a, ,b,";
513   using absl::AllowEmpty;
514   using absl::SkipEmpty;
515   using absl::SkipWhitespace;
516 
517   {
518     // No predicate. Does not skip empties.
519     auto splitter = absl::StrSplit(kTestChars, ',');
520     std::vector<std::string> v = splitter;
521     EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
522   }
523 
524   {
525     // Allows empty strings. Same behavior as no predicate at all.
526     auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
527     std::vector<std::string> v_allowempty = splitter;
528     EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
529 
530     // Ensures AllowEmpty equals the behavior with no predicate.
531     auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
532     std::vector<std::string> v_nopredicate = splitter_nopredicate;
533     EXPECT_EQ(v_allowempty, v_nopredicate);
534   }
535 
536   {
537     // Skips empty strings.
538     auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
539     std::vector<std::string> v = splitter;
540     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
541   }
542 
543   {
544     // Skips empty and all-whitespace strings.
545     auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
546     std::vector<std::string> v = splitter;
547     EXPECT_THAT(v, ElementsAre("a", "b"));
548   }
549 }
550 
551 //
552 // Tests for StrSplit()
553 //
554 
TEST(Split,Basics)555 TEST(Split, Basics) {
556   {
557     // Doesn't really do anything useful because the return value is ignored,
558     // but it should work.
559     absl::StrSplit("a,b,c", ',');
560   }
561 
562   {
563     std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
564     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
565   }
566 
567   {
568     std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
569     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
570   }
571 
572   {
573     // Ensures that assignment works. This requires a little extra work with
574     // C++11 because of overloads with initializer_list.
575     std::vector<std::string> v;
576     v = absl::StrSplit("a,b,c", ',');
577 
578     EXPECT_THAT(v, ElementsAre("a", "b", "c"));
579     std::map<std::string, std::string> m;
580     m = absl::StrSplit("a,b,c", ',');
581     EXPECT_EQ(2, m.size());
582     std::unordered_map<std::string, std::string> hm;
583     hm = absl::StrSplit("a,b,c", ',');
584     EXPECT_EQ(2, hm.size());
585   }
586 }
587 
ReturnStringView()588 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()589 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()590 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
591 
TEST(Split,AcceptsCertainTemporaries)592 TEST(Split, AcceptsCertainTemporaries) {
593   std::vector<std::string> v;
594   v = absl::StrSplit(ReturnStringView(), ' ');
595   EXPECT_THAT(v, ElementsAre("Hello", "World"));
596   v = absl::StrSplit(ReturnConstCharP(), ' ');
597   EXPECT_THAT(v, ElementsAre("Hello", "World"));
598   v = absl::StrSplit(ReturnCharP(), ' ');
599   EXPECT_THAT(v, ElementsAre("Hello", "World"));
600 }
601 
TEST(Split,Temporary)602 TEST(Split, Temporary) {
603   // Use a std::string longer than the SSO length, so that when the temporary is
604   // destroyed, if the splitter keeps a reference to the string's contents,
605   // it'll reference freed memory instead of just dead on-stack memory.
606   const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
607   EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
608       << "Input should be larger than fits on the stack.";
609 
610   // This happens more often in C++11 as part of a range-based for loop.
611   auto splitter = absl::StrSplit(std::string(input), ',');
612   std::string expected = "a";
613   for (absl::string_view letter : splitter) {
614     EXPECT_EQ(expected, letter);
615     ++expected[0];
616   }
617   EXPECT_EQ("v", expected);
618 
619   // This happens more often in C++11 as part of a range-based for loop.
620   auto std_splitter = absl::StrSplit(std::string(input), ',');
621   expected = "a";
622   for (absl::string_view letter : std_splitter) {
623     EXPECT_EQ(expected, letter);
624     ++expected[0];
625   }
626   EXPECT_EQ("v", expected);
627 }
628 
629 template <typename T>
CopyToHeap(const T & value)630 static std::unique_ptr<T> CopyToHeap(const T& value) {
631   return std::unique_ptr<T>(new T(value));
632 }
633 
TEST(Split,LvalueCaptureIsCopyable)634 TEST(Split, LvalueCaptureIsCopyable) {
635   std::string input = "a,b";
636   auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
637   auto stack_splitter = *heap_splitter;
638   heap_splitter.reset();
639   std::vector<std::string> result = stack_splitter;
640   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
641 }
642 
TEST(Split,TemporaryCaptureIsCopyable)643 TEST(Split, TemporaryCaptureIsCopyable) {
644   auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
645   auto stack_splitter = *heap_splitter;
646   heap_splitter.reset();
647   std::vector<std::string> result = stack_splitter;
648   EXPECT_THAT(result, testing::ElementsAre("a", "b"));
649 }
650 
TEST(Split,SplitterIsCopyableAndMoveable)651 TEST(Split, SplitterIsCopyableAndMoveable) {
652   auto a = absl::StrSplit("foo", '-');
653 
654   // Ensures that the following expressions compile.
655   auto b = a;             // Copy construct
656   auto c = std::move(a);  // Move construct
657   b = c;                  // Copy assign
658   c = std::move(b);       // Move assign
659 
660   EXPECT_THAT(c, ElementsAre("foo"));
661 }
662 
TEST(Split,StringDelimiter)663 TEST(Split, StringDelimiter) {
664   {
665     std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
666     EXPECT_THAT(v, ElementsAre("a", "b"));
667   }
668 
669   {
670     std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
671     EXPECT_THAT(v, ElementsAre("a", "b"));
672   }
673 
674   {
675     std::vector<absl::string_view> v =
676         absl::StrSplit("a,b", absl::string_view(","));
677     EXPECT_THAT(v, ElementsAre("a", "b"));
678   }
679 }
680 
681 #if !defined(__cpp_char8_t)
682 #if defined(__clang__)
683 #pragma clang diagnostic push
684 #pragma clang diagnostic ignored "-Wc++2a-compat"
685 #endif
TEST(Split,UTF8)686 TEST(Split, UTF8) {
687   // Tests splitting utf8 strings and utf8 delimiters.
688   std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
689   {
690     // A utf8 input string with an ascii delimiter.
691     std::string to_split = "a," + utf8_string;
692     std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
693     EXPECT_THAT(v, ElementsAre("a", utf8_string));
694   }
695 
696   {
697     // A utf8 input string and a utf8 delimiter.
698     std::string to_split = "a," + utf8_string + ",b";
699     std::string unicode_delimiter = "," + utf8_string + ",";
700     std::vector<absl::string_view> v =
701         absl::StrSplit(to_split, unicode_delimiter);
702     EXPECT_THAT(v, ElementsAre("a", "b"));
703   }
704 
705   {
706     // A utf8 input string and ByAnyChar with ascii chars.
707     std::vector<absl::string_view> v =
708         absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
709     EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
710   }
711 }
712 #if defined(__clang__)
713 #pragma clang diagnostic pop
714 #endif
715 #endif  // !defined(__cpp_char8_t)
716 
TEST(Split,EmptyStringDelimiter)717 TEST(Split, EmptyStringDelimiter) {
718   {
719     std::vector<std::string> v = absl::StrSplit("", "");
720     EXPECT_THAT(v, ElementsAre(""));
721   }
722 
723   {
724     std::vector<std::string> v = absl::StrSplit("a", "");
725     EXPECT_THAT(v, ElementsAre("a"));
726   }
727 
728   {
729     std::vector<std::string> v = absl::StrSplit("ab", "");
730     EXPECT_THAT(v, ElementsAre("a", "b"));
731   }
732 
733   {
734     std::vector<std::string> v = absl::StrSplit("a b", "");
735     EXPECT_THAT(v, ElementsAre("a", " ", "b"));
736   }
737 }
738 
TEST(Split,SubstrDelimiter)739 TEST(Split, SubstrDelimiter) {
740   std::vector<absl::string_view> results;
741   absl::string_view delim("//");
742 
743   results = absl::StrSplit("", delim);
744   EXPECT_THAT(results, ElementsAre(""));
745 
746   results = absl::StrSplit("//", delim);
747   EXPECT_THAT(results, ElementsAre("", ""));
748 
749   results = absl::StrSplit("ab", delim);
750   EXPECT_THAT(results, ElementsAre("ab"));
751 
752   results = absl::StrSplit("ab//", delim);
753   EXPECT_THAT(results, ElementsAre("ab", ""));
754 
755   results = absl::StrSplit("ab/", delim);
756   EXPECT_THAT(results, ElementsAre("ab/"));
757 
758   results = absl::StrSplit("a/b", delim);
759   EXPECT_THAT(results, ElementsAre("a/b"));
760 
761   results = absl::StrSplit("a//b", delim);
762   EXPECT_THAT(results, ElementsAre("a", "b"));
763 
764   results = absl::StrSplit("a///b", delim);
765   EXPECT_THAT(results, ElementsAre("a", "/b"));
766 
767   results = absl::StrSplit("a////b", delim);
768   EXPECT_THAT(results, ElementsAre("a", "", "b"));
769 }
770 
TEST(Split,EmptyResults)771 TEST(Split, EmptyResults) {
772   std::vector<absl::string_view> results;
773 
774   results = absl::StrSplit("", '#');
775   EXPECT_THAT(results, ElementsAre(""));
776 
777   results = absl::StrSplit("#", '#');
778   EXPECT_THAT(results, ElementsAre("", ""));
779 
780   results = absl::StrSplit("#cd", '#');
781   EXPECT_THAT(results, ElementsAre("", "cd"));
782 
783   results = absl::StrSplit("ab#cd#", '#');
784   EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
785 
786   results = absl::StrSplit("ab##cd", '#');
787   EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
788 
789   results = absl::StrSplit("ab##", '#');
790   EXPECT_THAT(results, ElementsAre("ab", "", ""));
791 
792   results = absl::StrSplit("ab#ab#", '#');
793   EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
794 
795   results = absl::StrSplit("aaaa", 'a');
796   EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
797 
798   results = absl::StrSplit("", '#', absl::SkipEmpty());
799   EXPECT_THAT(results, ElementsAre());
800 }
801 
802 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)803 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
804                                  size_t starting_pos, int expected_pos) {
805   absl::string_view found = d.Find(text, starting_pos);
806   return found.data() != text.data() + text.size() &&
807          expected_pos == found.data() - text.data();
808 }
809 
810 // Helper function for testing Delimiter objects. Returns true if the given
811 // Delimiter is found in the given string at the given position. This function
812 // tests two cases:
813 //   1. The actual text given, staring at position 0
814 //   2. The text given with leading padding that should be ignored
815 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)816 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
817   const std::string leading_text = ",x,y,z,";
818   return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
819          IsFoundAtStartingPos(leading_text + std::string(text), d,
820                               leading_text.length(),
821                               expected_pos + leading_text.length());
822 }
823 
824 //
825 // Tests for ByString
826 //
827 
828 // Tests using any delimiter that represents a single comma.
829 template <typename Delimiter>
TestComma(Delimiter d)830 void TestComma(Delimiter d) {
831   EXPECT_TRUE(IsFoundAt(",", d, 0));
832   EXPECT_TRUE(IsFoundAt("a,", d, 1));
833   EXPECT_TRUE(IsFoundAt(",b", d, 0));
834   EXPECT_TRUE(IsFoundAt("a,b", d, 1));
835   EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
836   EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
837   EXPECT_FALSE(IsFoundAt("", d, -1));
838   EXPECT_FALSE(IsFoundAt(" ", d, -1));
839   EXPECT_FALSE(IsFoundAt("a", d, -1));
840   EXPECT_FALSE(IsFoundAt("a b c", d, -1));
841   EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
842   EXPECT_FALSE(IsFoundAt(";", d, -1));
843 }
844 
TEST(Delimiter,ByString)845 TEST(Delimiter, ByString) {
846   using absl::ByString;
847   TestComma(ByString(","));
848 
849   // Works as named variable.
850   ByString comma_string(",");
851   TestComma(comma_string);
852 
853   // The first occurrence of empty string ("") in a string is at position 0.
854   // There is a test below that demonstrates this for absl::string_view::find().
855   // If the ByString delimiter returned position 0 for this, there would
856   // be an infinite loop in the SplitIterator code. To avoid this, empty string
857   // is a special case in that it always returns the item at position 1.
858   absl::string_view abc("abc");
859   EXPECT_EQ(0, abc.find(""));  // "" is found at position 0
860   ByString empty("");
861   EXPECT_FALSE(IsFoundAt("", empty, 0));
862   EXPECT_FALSE(IsFoundAt("a", empty, 0));
863   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
864   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
865 }
866 
TEST(Split,ByChar)867 TEST(Split, ByChar) {
868   using absl::ByChar;
869   TestComma(ByChar(','));
870 
871   // Works as named variable.
872   ByChar comma_char(',');
873   TestComma(comma_char);
874 }
875 
876 //
877 // Tests for ByAnyChar
878 //
879 
TEST(Delimiter,ByAnyChar)880 TEST(Delimiter, ByAnyChar) {
881   using absl::ByAnyChar;
882   ByAnyChar one_delim(",");
883   // Found
884   EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
885   EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
886   EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
887   EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
888   // Not found
889   EXPECT_FALSE(IsFoundAt("", one_delim, -1));
890   EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
891   EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
892   EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
893   EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
894 
895   ByAnyChar two_delims(",;");
896   // Found
897   EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
898   EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
899   EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
900   EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
901   EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
902   EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
903   EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
904   EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
905   EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
906   EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
907   // Not found
908   EXPECT_FALSE(IsFoundAt("", two_delims, -1));
909   EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
910   EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
911   EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
912   EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
913 
914   // ByAnyChar behaves just like ByString when given a delimiter of empty
915   // string. That is, it always returns a zero-length absl::string_view
916   // referring to the item at position 1, not position 0.
917   ByAnyChar empty("");
918   EXPECT_FALSE(IsFoundAt("", empty, 0));
919   EXPECT_FALSE(IsFoundAt("a", empty, 0));
920   EXPECT_TRUE(IsFoundAt("ab", empty, 1));
921   EXPECT_TRUE(IsFoundAt("abc", empty, 1));
922 }
923 
924 //
925 // Tests for ByLength
926 //
927 
TEST(Delimiter,ByLength)928 TEST(Delimiter, ByLength) {
929   using absl::ByLength;
930 
931   ByLength four_char_delim(4);
932 
933   // Found
934   EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
935   EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
936   EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
937   // Not found
938   EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
939   EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
940   EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
941   EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
942   EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
943 }
944 
TEST(Split,WorksWithLargeStrings)945 TEST(Split, WorksWithLargeStrings) {
946 #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
947     defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER)
948   constexpr size_t kSize = (uint32_t{1} << 26) + 1;  // 64M + 1 byte
949 #else
950   constexpr size_t kSize = (uint32_t{1} << 31) + 1;  // 2G + 1 byte
951 #endif
952   if (sizeof(size_t) > 4) {
953     std::string s(kSize, 'x');
954     s.back() = '-';
955     std::vector<absl::string_view> v = absl::StrSplit(s, '-');
956     EXPECT_EQ(2, v.size());
957     // The first element will contain 2G of 'x's.
958     // testing::StartsWith is too slow with a 2G string.
959     EXPECT_EQ('x', v[0][0]);
960     EXPECT_EQ('x', v[0][1]);
961     EXPECT_EQ('x', v[0][3]);
962     EXPECT_EQ("", v[1]);
963   }
964 }
965 
TEST(SplitInternalTest,TypeTraits)966 TEST(SplitInternalTest, TypeTraits) {
967   EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
968   EXPECT_TRUE(
969       (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
970   EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
971   EXPECT_TRUE(
972       (absl::strings_internal::HasValueType<std::map<int, int>>::value));
973   EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
974   EXPECT_TRUE(
975       (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
976   EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
977   EXPECT_TRUE((absl::strings_internal::IsInitializerList<
978                std::initializer_list<int>>::value));
979 }
980 
981 }  // namespace
982