1 //! Parallel iterator types for [strings][std::str]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
10 //!
11 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
12 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
13 //!
14 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
15 
16 use crate::iter::plumbing::*;
17 use crate::iter::*;
18 use crate::split_producer::*;
19 
20 /// Test if a byte is the start of a UTF-8 character.
21 /// (extracted from `str::is_char_boundary`)
22 #[inline]
is_char_boundary(b: u8) -> bool23 fn is_char_boundary(b: u8) -> bool {
24     // This is bit magic equivalent to: b < 128 || b >= 192
25     (b as i8) >= -0x40
26 }
27 
28 /// Find the index of a character boundary near the midpoint.
29 #[inline]
find_char_midpoint(chars: &str) -> usize30 fn find_char_midpoint(chars: &str) -> usize {
31     let mid = chars.len() / 2;
32 
33     // We want to split near the midpoint, but we need to find an actual
34     // character boundary.  So we look at the raw bytes, first scanning
35     // forward from the midpoint for a boundary, then trying backward.
36     let (left, right) = chars.as_bytes().split_at(mid);
37     match right.iter().cloned().position(is_char_boundary) {
38         Some(i) => mid + i,
39         None => left
40             .iter()
41             .cloned()
42             .rposition(is_char_boundary)
43             .unwrap_or(0),
44     }
45 }
46 
47 /// Try to split a string near the midpoint.
48 #[inline]
split(chars: &str) -> Option<(&str, &str)>49 fn split(chars: &str) -> Option<(&str, &str)> {
50     let index = find_char_midpoint(chars);
51     if index > 0 {
52         Some(chars.split_at(index))
53     } else {
54         None
55     }
56 }
57 
58 /// Parallel extensions for strings.
59 pub trait ParallelString {
60     /// Returns a plain string slice, which is used to implement the rest of
61     /// the parallel methods.
as_parallel_string(&self) -> &str62     fn as_parallel_string(&self) -> &str;
63 
64     /// Returns a parallel iterator over the characters of a string.
65     ///
66     /// # Examples
67     ///
68     /// ```
69     /// use rayon::prelude::*;
70     /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
71     /// assert_eq!(Some('o'), max);
72     /// ```
par_chars(&self) -> Chars<'_>73     fn par_chars(&self) -> Chars<'_> {
74         Chars {
75             chars: self.as_parallel_string(),
76         }
77     }
78 
79     /// Returns a parallel iterator over the characters of a string, with their positions.
80     ///
81     /// # Examples
82     ///
83     /// ```
84     /// use rayon::prelude::*;
85     /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
86     /// assert_eq!(Some((1, 'e')), min);
87     /// ```
par_char_indices(&self) -> CharIndices<'_>88     fn par_char_indices(&self) -> CharIndices<'_> {
89         CharIndices {
90             chars: self.as_parallel_string(),
91         }
92     }
93 
94     /// Returns a parallel iterator over the bytes of a string.
95     ///
96     /// Note that multi-byte sequences (for code points greater than `U+007F`)
97     /// are produced as separate items, but will not be split across threads.
98     /// If you would prefer an indexed iterator without that guarantee, consider
99     /// `string.as_bytes().par_iter().cloned()` instead.
100     ///
101     /// # Examples
102     ///
103     /// ```
104     /// use rayon::prelude::*;
105     /// let max = "hello".par_bytes().max();
106     /// assert_eq!(Some(b'o'), max);
107     /// ```
par_bytes(&self) -> Bytes<'_>108     fn par_bytes(&self) -> Bytes<'_> {
109         Bytes {
110             chars: self.as_parallel_string(),
111         }
112     }
113 
114     /// Returns a parallel iterator over a string encoded as UTF-16.
115     ///
116     /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
117     /// produced as separate items, but will not be split across threads.
118     ///
119     /// # Examples
120     ///
121     /// ```
122     /// use rayon::prelude::*;
123     ///
124     /// let max = "hello".par_encode_utf16().max();
125     /// assert_eq!(Some(b'o' as u16), max);
126     ///
127     /// let text = "Zażółć gęślą jaźń";
128     /// let utf8_len = text.len();
129     /// let utf16_len = text.par_encode_utf16().count();
130     /// assert!(utf16_len <= utf8_len);
131     /// ```
par_encode_utf16(&self) -> EncodeUtf16<'_>132     fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
133         EncodeUtf16 {
134             chars: self.as_parallel_string(),
135         }
136     }
137 
138     /// Returns a parallel iterator over substrings separated by a
139     /// given character or predicate, similar to `str::split`.
140     ///
141     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
142     /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
143     ///
144     /// # Examples
145     ///
146     /// ```
147     /// use rayon::prelude::*;
148     /// let total = "1, 2, buckle, 3, 4, door"
149     ///    .par_split(',')
150     ///    .filter_map(|s| s.trim().parse::<i32>().ok())
151     ///    .sum();
152     /// assert_eq!(10, total);
153     /// ```
par_split<P: Pattern>(&self, separator: P) -> Split<'_, P>154     fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
155         Split::new(self.as_parallel_string(), separator)
156     }
157 
158     /// Returns a parallel iterator over substrings terminated by a
159     /// given character or predicate, similar to `str::split_terminator`.
160     /// It's equivalent to `par_split`, except it doesn't produce an empty
161     /// substring after a trailing terminator.
162     ///
163     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
164     /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
165     ///
166     /// # Examples
167     ///
168     /// ```
169     /// use rayon::prelude::*;
170     /// let parts: Vec<_> = "((1 + 3) * 2)"
171     ///     .par_split_terminator(|c| c == '(' || c == ')')
172     ///     .collect();
173     /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
174     /// ```
par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P>175     fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
176         SplitTerminator::new(self.as_parallel_string(), terminator)
177     }
178 
179     /// Returns a parallel iterator over the lines of a string, ending with an
180     /// optional carriage return and with a newline (`\r\n` or just `\n`).
181     /// The final line ending is optional, and line endings are not included in
182     /// the output strings.
183     ///
184     /// # Examples
185     ///
186     /// ```
187     /// use rayon::prelude::*;
188     /// let lengths: Vec<_> = "hello world\nfizbuzz"
189     ///     .par_lines()
190     ///     .map(|l| l.len())
191     ///     .collect();
192     /// assert_eq!(vec![11, 7], lengths);
193     /// ```
par_lines(&self) -> Lines<'_>194     fn par_lines(&self) -> Lines<'_> {
195         Lines(self.as_parallel_string())
196     }
197 
198     /// Returns a parallel iterator over the sub-slices of a string that are
199     /// separated by any amount of whitespace.
200     ///
201     /// As with `str::split_whitespace`, 'whitespace' is defined according to
202     /// the terms of the Unicode Derived Core Property `White_Space`.
203     ///
204     /// # Examples
205     ///
206     /// ```
207     /// use rayon::prelude::*;
208     /// let longest = "which is the longest word?"
209     ///     .par_split_whitespace()
210     ///     .max_by_key(|word| word.len());
211     /// assert_eq!(Some("longest"), longest);
212     /// ```
par_split_whitespace(&self) -> SplitWhitespace<'_>213     fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
214         SplitWhitespace(self.as_parallel_string())
215     }
216 
217     /// Returns a parallel iterator over substrings that match a
218     /// given character or predicate, similar to `str::matches`.
219     ///
220     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
221     /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
222     ///
223     /// # Examples
224     ///
225     /// ```
226     /// use rayon::prelude::*;
227     /// let total = "1, 2, buckle, 3, 4, door"
228     ///    .par_matches(char::is_numeric)
229     ///    .map(|s| s.parse::<i32>().expect("digit"))
230     ///    .sum();
231     /// assert_eq!(10, total);
232     /// ```
par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P>233     fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
234         Matches {
235             chars: self.as_parallel_string(),
236             pattern,
237         }
238     }
239 
240     /// Returns a parallel iterator over substrings that match a given character
241     /// or predicate, with their positions, similar to `str::match_indices`.
242     ///
243     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
244     /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
245     ///
246     /// # Examples
247     ///
248     /// ```
249     /// use rayon::prelude::*;
250     /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
251     ///    .par_match_indices(char::is_numeric)
252     ///    .collect();
253     /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
254     /// ```
par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P>255     fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
256         MatchIndices {
257             chars: self.as_parallel_string(),
258             pattern,
259         }
260     }
261 }
262 
263 impl ParallelString for str {
264     #[inline]
as_parallel_string(&self) -> &str265     fn as_parallel_string(&self) -> &str {
266         self
267     }
268 }
269 
270 // /////////////////////////////////////////////////////////////////////////
271 
272 /// We hide the `Pattern` trait in a private module, as its API is not meant
273 /// for general consumption.  If we could have privacy on trait items, then it
274 /// would be nicer to have its basic existence and implementors public while
275 /// keeping all of the methods private.
276 mod private {
277     use crate::iter::plumbing::Folder;
278 
279     /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
280     /// `std::str::pattern::{Pattern, Searcher}`.
281     ///
282     /// Implementing this trait is not permitted outside of `rayon`.
283     pub trait Pattern: Sized + Sync + Send {
284         private_decl! {}
find_in(&self, haystack: &str) -> Option<usize>285         fn find_in(&self, haystack: &str) -> Option<usize>;
rfind_in(&self, haystack: &str) -> Option<usize>286         fn rfind_in(&self, haystack: &str) -> Option<usize>;
is_suffix_of(&self, haystack: &str) -> bool287         fn is_suffix_of(&self, haystack: &str) -> bool;
fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>288         fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
289         where
290             F: Folder<&'ch str>;
fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>291         fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
292         where
293             F: Folder<&'ch str>;
fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>294         fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
295         where
296             F: Folder<(usize, &'ch str)>;
297     }
298 }
299 use self::private::Pattern;
300 
301 #[inline]
offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T)302 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
303     move |(i, x)| (base + i, x)
304 }
305 
306 impl Pattern for char {
307     private_impl! {}
308 
309     #[inline]
find_in(&self, chars: &str) -> Option<usize>310     fn find_in(&self, chars: &str) -> Option<usize> {
311         chars.find(*self)
312     }
313 
314     #[inline]
rfind_in(&self, chars: &str) -> Option<usize>315     fn rfind_in(&self, chars: &str) -> Option<usize> {
316         chars.rfind(*self)
317     }
318 
319     #[inline]
is_suffix_of(&self, chars: &str) -> bool320     fn is_suffix_of(&self, chars: &str) -> bool {
321         chars.ends_with(*self)
322     }
323 
fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>,324     fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
325     where
326         F: Folder<&'ch str>,
327     {
328         let mut split = chars.split(*self);
329         if skip_last {
330             split.next_back();
331         }
332         folder.consume_iter(split)
333     }
334 
fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F where F: Folder<&'ch str>,335     fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
336     where
337         F: Folder<&'ch str>,
338     {
339         folder.consume_iter(chars.matches(*self))
340     }
341 
fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>,342     fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
343     where
344         F: Folder<(usize, &'ch str)>,
345     {
346         folder.consume_iter(chars.match_indices(*self).map(offset(base)))
347     }
348 }
349 
350 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
351     private_impl! {}
352 
find_in(&self, chars: &str) -> Option<usize>353     fn find_in(&self, chars: &str) -> Option<usize> {
354         chars.find(self)
355     }
356 
rfind_in(&self, chars: &str) -> Option<usize>357     fn rfind_in(&self, chars: &str) -> Option<usize> {
358         chars.rfind(self)
359     }
360 
is_suffix_of(&self, chars: &str) -> bool361     fn is_suffix_of(&self, chars: &str) -> bool {
362         chars.ends_with(self)
363     }
364 
fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>,365     fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
366     where
367         F: Folder<&'ch str>,
368     {
369         let mut split = chars.split(self);
370         if skip_last {
371             split.next_back();
372         }
373         folder.consume_iter(split)
374     }
375 
fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F where F: Folder<&'ch str>,376     fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
377     where
378         F: Folder<&'ch str>,
379     {
380         folder.consume_iter(chars.matches(self))
381     }
382 
fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>,383     fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
384     where
385         F: Folder<(usize, &'ch str)>,
386     {
387         folder.consume_iter(chars.match_indices(self).map(offset(base)))
388     }
389 }
390 
391 // /////////////////////////////////////////////////////////////////////////
392 
393 /// Parallel iterator over the characters of a string
394 #[derive(Debug, Clone)]
395 pub struct Chars<'ch> {
396     chars: &'ch str,
397 }
398 
399 struct CharsProducer<'ch> {
400     chars: &'ch str,
401 }
402 
403 impl<'ch> ParallelIterator for Chars<'ch> {
404     type Item = char;
405 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,406     fn drive_unindexed<C>(self, consumer: C) -> C::Result
407     where
408         C: UnindexedConsumer<Self::Item>,
409     {
410         bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
411     }
412 }
413 
414 impl<'ch> UnindexedProducer for CharsProducer<'ch> {
415     type Item = char;
416 
split(self) -> (Self, Option<Self>)417     fn split(self) -> (Self, Option<Self>) {
418         match split(self.chars) {
419             Some((left, right)) => (
420                 CharsProducer { chars: left },
421                 Some(CharsProducer { chars: right }),
422             ),
423             None => (self, None),
424         }
425     }
426 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,427     fn fold_with<F>(self, folder: F) -> F
428     where
429         F: Folder<Self::Item>,
430     {
431         folder.consume_iter(self.chars.chars())
432     }
433 }
434 
435 // /////////////////////////////////////////////////////////////////////////
436 
437 /// Parallel iterator over the characters of a string, with their positions
438 #[derive(Debug, Clone)]
439 pub struct CharIndices<'ch> {
440     chars: &'ch str,
441 }
442 
443 struct CharIndicesProducer<'ch> {
444     index: usize,
445     chars: &'ch str,
446 }
447 
448 impl<'ch> ParallelIterator for CharIndices<'ch> {
449     type Item = (usize, char);
450 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,451     fn drive_unindexed<C>(self, consumer: C) -> C::Result
452     where
453         C: UnindexedConsumer<Self::Item>,
454     {
455         let producer = CharIndicesProducer {
456             index: 0,
457             chars: self.chars,
458         };
459         bridge_unindexed(producer, consumer)
460     }
461 }
462 
463 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
464     type Item = (usize, char);
465 
split(self) -> (Self, Option<Self>)466     fn split(self) -> (Self, Option<Self>) {
467         match split(self.chars) {
468             Some((left, right)) => (
469                 CharIndicesProducer {
470                     chars: left,
471                     ..self
472                 },
473                 Some(CharIndicesProducer {
474                     chars: right,
475                     index: self.index + left.len(),
476                 }),
477             ),
478             None => (self, None),
479         }
480     }
481 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,482     fn fold_with<F>(self, folder: F) -> F
483     where
484         F: Folder<Self::Item>,
485     {
486         let base = self.index;
487         folder.consume_iter(self.chars.char_indices().map(offset(base)))
488     }
489 }
490 
491 // /////////////////////////////////////////////////////////////////////////
492 
493 /// Parallel iterator over the bytes of a string
494 #[derive(Debug, Clone)]
495 pub struct Bytes<'ch> {
496     chars: &'ch str,
497 }
498 
499 struct BytesProducer<'ch> {
500     chars: &'ch str,
501 }
502 
503 impl<'ch> ParallelIterator for Bytes<'ch> {
504     type Item = u8;
505 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,506     fn drive_unindexed<C>(self, consumer: C) -> C::Result
507     where
508         C: UnindexedConsumer<Self::Item>,
509     {
510         bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
511     }
512 }
513 
514 impl<'ch> UnindexedProducer for BytesProducer<'ch> {
515     type Item = u8;
516 
split(self) -> (Self, Option<Self>)517     fn split(self) -> (Self, Option<Self>) {
518         match split(self.chars) {
519             Some((left, right)) => (
520                 BytesProducer { chars: left },
521                 Some(BytesProducer { chars: right }),
522             ),
523             None => (self, None),
524         }
525     }
526 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,527     fn fold_with<F>(self, folder: F) -> F
528     where
529         F: Folder<Self::Item>,
530     {
531         folder.consume_iter(self.chars.bytes())
532     }
533 }
534 
535 // /////////////////////////////////////////////////////////////////////////
536 
537 /// Parallel iterator over a string encoded as UTF-16
538 #[derive(Debug, Clone)]
539 pub struct EncodeUtf16<'ch> {
540     chars: &'ch str,
541 }
542 
543 struct EncodeUtf16Producer<'ch> {
544     chars: &'ch str,
545 }
546 
547 impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
548     type Item = u16;
549 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,550     fn drive_unindexed<C>(self, consumer: C) -> C::Result
551     where
552         C: UnindexedConsumer<Self::Item>,
553     {
554         bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
555     }
556 }
557 
558 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
559     type Item = u16;
560 
split(self) -> (Self, Option<Self>)561     fn split(self) -> (Self, Option<Self>) {
562         match split(self.chars) {
563             Some((left, right)) => (
564                 EncodeUtf16Producer { chars: left },
565                 Some(EncodeUtf16Producer { chars: right }),
566             ),
567             None => (self, None),
568         }
569     }
570 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,571     fn fold_with<F>(self, folder: F) -> F
572     where
573         F: Folder<Self::Item>,
574     {
575         folder.consume_iter(self.chars.encode_utf16())
576     }
577 }
578 
579 // /////////////////////////////////////////////////////////////////////////
580 
581 /// Parallel iterator over substrings separated by a pattern
582 #[derive(Debug, Clone)]
583 pub struct Split<'ch, P: Pattern> {
584     chars: &'ch str,
585     separator: P,
586 }
587 
588 impl<'ch, P: Pattern> Split<'ch, P> {
new(chars: &'ch str, separator: P) -> Self589     fn new(chars: &'ch str, separator: P) -> Self {
590         Split { chars, separator }
591     }
592 }
593 
594 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
595     type Item = &'ch str;
596 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,597     fn drive_unindexed<C>(self, consumer: C) -> C::Result
598     where
599         C: UnindexedConsumer<Self::Item>,
600     {
601         let producer = SplitProducer::new(self.chars, &self.separator);
602         bridge_unindexed(producer, consumer)
603     }
604 }
605 
606 /// Implement support for `SplitProducer`.
607 impl<'ch, P: Pattern> Fissile<P> for &'ch str {
length(&self) -> usize608     fn length(&self) -> usize {
609         self.len()
610     }
611 
midpoint(&self, end: usize) -> usize612     fn midpoint(&self, end: usize) -> usize {
613         // First find a suitable UTF-8 boundary.
614         find_char_midpoint(&self[..end])
615     }
616 
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>617     fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
618         separator.find_in(&self[start..end])
619     }
620 
rfind(&self, separator: &P, end: usize) -> Option<usize>621     fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
622         separator.rfind_in(&self[..end])
623     }
624 
split_once(self, index: usize) -> (Self, Self)625     fn split_once(self, index: usize) -> (Self, Self) {
626         let (left, right) = self.split_at(index);
627         let mut right_iter = right.chars();
628         right_iter.next(); // skip the separator
629         (left, right_iter.as_str())
630     }
631 
fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>,632     fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
633     where
634         F: Folder<Self>,
635     {
636         separator.fold_splits(self, folder, skip_last)
637     }
638 }
639 
640 // /////////////////////////////////////////////////////////////////////////
641 
642 /// Parallel iterator over substrings separated by a terminator pattern
643 #[derive(Debug, Clone)]
644 pub struct SplitTerminator<'ch, P: Pattern> {
645     chars: &'ch str,
646     terminator: P,
647 }
648 
649 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
650     splitter: SplitProducer<'sep, P, &'ch str>,
651     skip_last: bool,
652 }
653 
654 impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
new(chars: &'ch str, terminator: P) -> Self655     fn new(chars: &'ch str, terminator: P) -> Self {
656         SplitTerminator { chars, terminator }
657     }
658 }
659 
660 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
new(chars: &'ch str, terminator: &'sep P) -> Self661     fn new(chars: &'ch str, terminator: &'sep P) -> Self {
662         SplitTerminatorProducer {
663             splitter: SplitProducer::new(chars, terminator),
664             skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
665         }
666     }
667 }
668 
669 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
670     type Item = &'ch str;
671 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,672     fn drive_unindexed<C>(self, consumer: C) -> C::Result
673     where
674         C: UnindexedConsumer<Self::Item>,
675     {
676         let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
677         bridge_unindexed(producer, consumer)
678     }
679 }
680 
681 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
682     type Item = &'ch str;
683 
split(mut self) -> (Self, Option<Self>)684     fn split(mut self) -> (Self, Option<Self>) {
685         let (left, right) = self.splitter.split();
686         self.splitter = left;
687         let right = right.map(|right| {
688             let skip_last = self.skip_last;
689             self.skip_last = false;
690             SplitTerminatorProducer {
691                 splitter: right,
692                 skip_last,
693             }
694         });
695         (self, right)
696     }
697 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,698     fn fold_with<F>(self, folder: F) -> F
699     where
700         F: Folder<Self::Item>,
701     {
702         self.splitter.fold_with(folder, self.skip_last)
703     }
704 }
705 
706 // /////////////////////////////////////////////////////////////////////////
707 
708 /// Parallel iterator over lines in a string
709 #[derive(Debug, Clone)]
710 pub struct Lines<'ch>(&'ch str);
711 
712 #[inline]
no_carriage_return(line: &str) -> &str713 fn no_carriage_return(line: &str) -> &str {
714     if line.ends_with('\r') {
715         &line[..line.len() - 1]
716     } else {
717         line
718     }
719 }
720 
721 impl<'ch> ParallelIterator for Lines<'ch> {
722     type Item = &'ch str;
723 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,724     fn drive_unindexed<C>(self, consumer: C) -> C::Result
725     where
726         C: UnindexedConsumer<Self::Item>,
727     {
728         self.0
729             .par_split_terminator('\n')
730             .map(no_carriage_return)
731             .drive_unindexed(consumer)
732     }
733 }
734 
735 // /////////////////////////////////////////////////////////////////////////
736 
737 /// Parallel iterator over substrings separated by whitespace
738 #[derive(Debug, Clone)]
739 pub struct SplitWhitespace<'ch>(&'ch str);
740 
741 #[inline]
not_empty(s: &&str) -> bool742 fn not_empty(s: &&str) -> bool {
743     !s.is_empty()
744 }
745 
746 impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
747     type Item = &'ch str;
748 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,749     fn drive_unindexed<C>(self, consumer: C) -> C::Result
750     where
751         C: UnindexedConsumer<Self::Item>,
752     {
753         self.0
754             .par_split(char::is_whitespace)
755             .filter(not_empty)
756             .drive_unindexed(consumer)
757     }
758 }
759 
760 // /////////////////////////////////////////////////////////////////////////
761 
762 /// Parallel iterator over substrings that match a pattern
763 #[derive(Debug, Clone)]
764 pub struct Matches<'ch, P: Pattern> {
765     chars: &'ch str,
766     pattern: P,
767 }
768 
769 struct MatchesProducer<'ch, 'pat, P: Pattern> {
770     chars: &'ch str,
771     pattern: &'pat P,
772 }
773 
774 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
775     type Item = &'ch str;
776 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,777     fn drive_unindexed<C>(self, consumer: C) -> C::Result
778     where
779         C: UnindexedConsumer<Self::Item>,
780     {
781         let producer = MatchesProducer {
782             chars: self.chars,
783             pattern: &self.pattern,
784         };
785         bridge_unindexed(producer, consumer)
786     }
787 }
788 
789 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
790     type Item = &'ch str;
791 
split(self) -> (Self, Option<Self>)792     fn split(self) -> (Self, Option<Self>) {
793         match split(self.chars) {
794             Some((left, right)) => (
795                 MatchesProducer {
796                     chars: left,
797                     ..self
798                 },
799                 Some(MatchesProducer {
800                     chars: right,
801                     ..self
802                 }),
803             ),
804             None => (self, None),
805         }
806     }
807 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,808     fn fold_with<F>(self, folder: F) -> F
809     where
810         F: Folder<Self::Item>,
811     {
812         self.pattern.fold_matches(self.chars, folder)
813     }
814 }
815 
816 // /////////////////////////////////////////////////////////////////////////
817 
818 /// Parallel iterator over substrings that match a pattern, with their positions
819 #[derive(Debug, Clone)]
820 pub struct MatchIndices<'ch, P: Pattern> {
821     chars: &'ch str,
822     pattern: P,
823 }
824 
825 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
826     index: usize,
827     chars: &'ch str,
828     pattern: &'pat P,
829 }
830 
831 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
832     type Item = (usize, &'ch str);
833 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,834     fn drive_unindexed<C>(self, consumer: C) -> C::Result
835     where
836         C: UnindexedConsumer<Self::Item>,
837     {
838         let producer = MatchIndicesProducer {
839             index: 0,
840             chars: self.chars,
841             pattern: &self.pattern,
842         };
843         bridge_unindexed(producer, consumer)
844     }
845 }
846 
847 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
848     type Item = (usize, &'ch str);
849 
split(self) -> (Self, Option<Self>)850     fn split(self) -> (Self, Option<Self>) {
851         match split(self.chars) {
852             Some((left, right)) => (
853                 MatchIndicesProducer {
854                     chars: left,
855                     ..self
856                 },
857                 Some(MatchIndicesProducer {
858                     chars: right,
859                     index: self.index + left.len(),
860                     ..self
861                 }),
862             ),
863             None => (self, None),
864         }
865     }
866 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,867     fn fold_with<F>(self, folder: F) -> F
868     where
869         F: Folder<Self::Item>,
870     {
871         self.pattern
872             .fold_match_indices(self.chars, folder, self.index)
873     }
874 }
875