1 //! Parallel iterator types for [strings][std::str] 2 //! 3 //! You will rarely need to interact with this module directly unless you need 4 //! to name one of the iterator types. 5 //! 6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`] 7 //! reference a `Pattern` trait which is not visible outside this crate. 8 //! This trait is intentionally kept private, for use only by Rayon itself. 9 //! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. 10 //! 11 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split 12 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator 13 //! 14 //! [std::str]: https://doc.rust-lang.org/stable/std/str/ 15 16 use crate::iter::plumbing::*; 17 use crate::iter::*; 18 use crate::split_producer::*; 19 20 /// Test if a byte is the start of a UTF-8 character. 21 /// (extracted from `str::is_char_boundary`) 22 #[inline] 23 fn is_char_boundary(b: u8) -> bool { 24 // This is bit magic equivalent to: b < 128 || b >= 192 25 (b as i8) >= -0x40 26 } 27 28 /// Find the index of a character boundary near the midpoint. 29 #[inline] 30 fn find_char_midpoint(chars: &str) -> usize { 31 let mid = chars.len() / 2; 32 33 // We want to split near the midpoint, but we need to find an actual 34 // character boundary. So we look at the raw bytes, first scanning 35 // forward from the midpoint for a boundary, then trying backward. 36 let (left, right) = chars.as_bytes().split_at(mid); 37 match right.iter().cloned().position(is_char_boundary) { 38 Some(i) => mid + i, 39 None => left 40 .iter() 41 .cloned() 42 .rposition(is_char_boundary) 43 .unwrap_or(0), 44 } 45 } 46 47 /// Try to split a string near the midpoint. 48 #[inline] 49 fn split(chars: &str) -> Option<(&str, &str)> { 50 let index = find_char_midpoint(chars); 51 if index > 0 { 52 Some(chars.split_at(index)) 53 } else { 54 None 55 } 56 } 57 58 /// Parallel extensions for strings. 59 pub trait ParallelString { 60 /// Returns a plain string slice, which is used to implement the rest of 61 /// the parallel methods. 62 fn as_parallel_string(&self) -> &str; 63 64 /// Returns a parallel iterator over the characters of a string. 65 /// 66 /// # Examples 67 /// 68 /// ``` 69 /// use rayon::prelude::*; 70 /// let max = "hello".par_chars().max_by_key(|c| *c as i32); 71 /// assert_eq!(Some('o'), max); 72 /// ``` 73 fn par_chars(&self) -> Chars<'_> { 74 Chars { 75 chars: self.as_parallel_string(), 76 } 77 } 78 79 /// Returns a parallel iterator over the characters of a string, with their positions. 80 /// 81 /// # Examples 82 /// 83 /// ``` 84 /// use rayon::prelude::*; 85 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32); 86 /// assert_eq!(Some((1, 'e')), min); 87 /// ``` 88 fn par_char_indices(&self) -> CharIndices<'_> { 89 CharIndices { 90 chars: self.as_parallel_string(), 91 } 92 } 93 94 /// Returns a parallel iterator over the bytes of a string. 95 /// 96 /// Note that multi-byte sequences (for code points greater than `U+007F`) 97 /// are produced as separate items, but will not be split across threads. 98 /// If you would prefer an indexed iterator without that guarantee, consider 99 /// `string.as_bytes().par_iter().cloned()` instead. 100 /// 101 /// # Examples 102 /// 103 /// ``` 104 /// use rayon::prelude::*; 105 /// let max = "hello".par_bytes().max(); 106 /// assert_eq!(Some(b'o'), max); 107 /// ``` 108 fn par_bytes(&self) -> Bytes<'_> { 109 Bytes { 110 chars: self.as_parallel_string(), 111 } 112 } 113 114 /// Returns a parallel iterator over a string encoded as UTF-16. 115 /// 116 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are 117 /// produced as separate items, but will not be split across threads. 118 /// 119 /// # Examples 120 /// 121 /// ``` 122 /// use rayon::prelude::*; 123 /// 124 /// let max = "hello".par_encode_utf16().max(); 125 /// assert_eq!(Some(b'o' as u16), max); 126 /// 127 /// let text = "Zażółć gęślą jaźń"; 128 /// let utf8_len = text.len(); 129 /// let utf16_len = text.par_encode_utf16().count(); 130 /// assert!(utf16_len <= utf8_len); 131 /// ``` 132 fn par_encode_utf16(&self) -> EncodeUtf16<'_> { 133 EncodeUtf16 { 134 chars: self.as_parallel_string(), 135 } 136 } 137 138 /// Returns a parallel iterator over substrings separated by a 139 /// given character or predicate, similar to `str::split`. 140 /// 141 /// Note: the `Pattern` trait is private, for use only by Rayon itself. 142 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. 143 /// 144 /// # Examples 145 /// 146 /// ``` 147 /// use rayon::prelude::*; 148 /// let total = "1, 2, buckle, 3, 4, door" 149 /// .par_split(',') 150 /// .filter_map(|s| s.trim().parse::<i32>().ok()) 151 /// .sum(); 152 /// assert_eq!(10, total); 153 /// ``` 154 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> { 155 Split::new(self.as_parallel_string(), separator) 156 } 157 158 /// Returns a parallel iterator over substrings terminated by a 159 /// given character or predicate, similar to `str::split_terminator`. 160 /// It's equivalent to `par_split`, except it doesn't produce an empty 161 /// substring after a trailing terminator. 162 /// 163 /// Note: the `Pattern` trait is private, for use only by Rayon itself. 164 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. 165 /// 166 /// # Examples 167 /// 168 /// ``` 169 /// use rayon::prelude::*; 170 /// let parts: Vec<_> = "((1 + 3) * 2)" 171 /// .par_split_terminator(|c| c == '(' || c == ')') 172 /// .collect(); 173 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts); 174 /// ``` 175 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> { 176 SplitTerminator::new(self.as_parallel_string(), terminator) 177 } 178 179 /// Returns a parallel iterator over the lines of a string, ending with an 180 /// optional carriage return and with a newline (`\r\n` or just `\n`). 181 /// The final line ending is optional, and line endings are not included in 182 /// the output strings. 183 /// 184 /// # Examples 185 /// 186 /// ``` 187 /// use rayon::prelude::*; 188 /// let lengths: Vec<_> = "hello world\nfizbuzz" 189 /// .par_lines() 190 /// .map(|l| l.len()) 191 /// .collect(); 192 /// assert_eq!(vec![11, 7], lengths); 193 /// ``` 194 fn par_lines(&self) -> Lines<'_> { 195 Lines(self.as_parallel_string()) 196 } 197 198 /// Returns a parallel iterator over the sub-slices of a string that are 199 /// separated by any amount of whitespace. 200 /// 201 /// As with `str::split_whitespace`, 'whitespace' is defined according to 202 /// the terms of the Unicode Derived Core Property `White_Space`. 203 /// 204 /// # Examples 205 /// 206 /// ``` 207 /// use rayon::prelude::*; 208 /// let longest = "which is the longest word?" 209 /// .par_split_whitespace() 210 /// .max_by_key(|word| word.len()); 211 /// assert_eq!(Some("longest"), longest); 212 /// ``` 213 fn par_split_whitespace(&self) -> SplitWhitespace<'_> { 214 SplitWhitespace(self.as_parallel_string()) 215 } 216 217 /// Returns a parallel iterator over substrings that match a 218 /// given character or predicate, similar to `str::matches`. 219 /// 220 /// Note: the `Pattern` trait is private, for use only by Rayon itself. 221 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. 222 /// 223 /// # Examples 224 /// 225 /// ``` 226 /// use rayon::prelude::*; 227 /// let total = "1, 2, buckle, 3, 4, door" 228 /// .par_matches(char::is_numeric) 229 /// .map(|s| s.parse::<i32>().expect("digit")) 230 /// .sum(); 231 /// assert_eq!(10, total); 232 /// ``` 233 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> { 234 Matches { 235 chars: self.as_parallel_string(), 236 pattern, 237 } 238 } 239 240 /// Returns a parallel iterator over substrings that match a given character 241 /// or predicate, with their positions, similar to `str::match_indices`. 242 /// 243 /// Note: the `Pattern` trait is private, for use only by Rayon itself. 244 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. 245 /// 246 /// # Examples 247 /// 248 /// ``` 249 /// use rayon::prelude::*; 250 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door" 251 /// .par_match_indices(char::is_numeric) 252 /// .collect(); 253 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]); 254 /// ``` 255 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> { 256 MatchIndices { 257 chars: self.as_parallel_string(), 258 pattern, 259 } 260 } 261 } 262 263 impl ParallelString for str { 264 #[inline] 265 fn as_parallel_string(&self) -> &str { 266 self 267 } 268 } 269 270 // ///////////////////////////////////////////////////////////////////////// 271 272 /// We hide the `Pattern` trait in a private module, as its API is not meant 273 /// for general consumption. If we could have privacy on trait items, then it 274 /// would be nicer to have its basic existence and implementors public while 275 /// keeping all of the methods private. 276 mod private { 277 use crate::iter::plumbing::Folder; 278 279 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of 280 /// `std::str::pattern::{Pattern, Searcher}`. 281 /// 282 /// Implementing this trait is not permitted outside of `rayon`. 283 pub trait Pattern: Sized + Sync + Send { 284 private_decl! {} 285 fn find_in(&self, haystack: &str) -> Option<usize>; 286 fn rfind_in(&self, haystack: &str) -> Option<usize>; 287 fn is_suffix_of(&self, haystack: &str) -> bool; 288 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F 289 where 290 F: Folder<&'ch str>; 291 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F 292 where 293 F: Folder<&'ch str>; 294 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F 295 where 296 F: Folder<(usize, &'ch str)>; 297 } 298 } 299 use self::private::Pattern; 300 301 #[inline] 302 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) { 303 move |(i, x)| (base + i, x) 304 } 305 306 impl Pattern for char { 307 private_impl! {} 308 309 #[inline] 310 fn find_in(&self, chars: &str) -> Option<usize> { 311 chars.find(*self) 312 } 313 314 #[inline] 315 fn rfind_in(&self, chars: &str) -> Option<usize> { 316 chars.rfind(*self) 317 } 318 319 #[inline] 320 fn is_suffix_of(&self, chars: &str) -> bool { 321 chars.ends_with(*self) 322 } 323 324 fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F 325 where 326 F: Folder<&'ch str>, 327 { 328 let mut split = chars.split(*self); 329 if skip_last { 330 split.next_back(); 331 } 332 folder.consume_iter(split) 333 } 334 335 fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F 336 where 337 F: Folder<&'ch str>, 338 { 339 folder.consume_iter(chars.matches(*self)) 340 } 341 342 fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F 343 where 344 F: Folder<(usize, &'ch str)>, 345 { 346 folder.consume_iter(chars.match_indices(*self).map(offset(base))) 347 } 348 } 349 350 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN { 351 private_impl! {} 352 353 fn find_in(&self, chars: &str) -> Option<usize> { 354 chars.find(self) 355 } 356 357 fn rfind_in(&self, chars: &str) -> Option<usize> { 358 chars.rfind(self) 359 } 360 361 fn is_suffix_of(&self, chars: &str) -> bool { 362 chars.ends_with(self) 363 } 364 365 fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F 366 where 367 F: Folder<&'ch str>, 368 { 369 let mut split = chars.split(self); 370 if skip_last { 371 split.next_back(); 372 } 373 folder.consume_iter(split) 374 } 375 376 fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F 377 where 378 F: Folder<&'ch str>, 379 { 380 folder.consume_iter(chars.matches(self)) 381 } 382 383 fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F 384 where 385 F: Folder<(usize, &'ch str)>, 386 { 387 folder.consume_iter(chars.match_indices(self).map(offset(base))) 388 } 389 } 390 391 // ///////////////////////////////////////////////////////////////////////// 392 393 /// Parallel iterator over the characters of a string 394 #[derive(Debug, Clone)] 395 pub struct Chars<'ch> { 396 chars: &'ch str, 397 } 398 399 struct CharsProducer<'ch> { 400 chars: &'ch str, 401 } 402 403 impl<'ch> ParallelIterator for Chars<'ch> { 404 type Item = char; 405 406 fn drive_unindexed<C>(self, consumer: C) -> C::Result 407 where 408 C: UnindexedConsumer<Self::Item>, 409 { 410 bridge_unindexed(CharsProducer { chars: self.chars }, consumer) 411 } 412 } 413 414 impl<'ch> UnindexedProducer for CharsProducer<'ch> { 415 type Item = char; 416 417 fn split(self) -> (Self, Option<Self>) { 418 match split(self.chars) { 419 Some((left, right)) => ( 420 CharsProducer { chars: left }, 421 Some(CharsProducer { chars: right }), 422 ), 423 None => (self, None), 424 } 425 } 426 427 fn fold_with<F>(self, folder: F) -> F 428 where 429 F: Folder<Self::Item>, 430 { 431 folder.consume_iter(self.chars.chars()) 432 } 433 } 434 435 // ///////////////////////////////////////////////////////////////////////// 436 437 /// Parallel iterator over the characters of a string, with their positions 438 #[derive(Debug, Clone)] 439 pub struct CharIndices<'ch> { 440 chars: &'ch str, 441 } 442 443 struct CharIndicesProducer<'ch> { 444 index: usize, 445 chars: &'ch str, 446 } 447 448 impl<'ch> ParallelIterator for CharIndices<'ch> { 449 type Item = (usize, char); 450 451 fn drive_unindexed<C>(self, consumer: C) -> C::Result 452 where 453 C: UnindexedConsumer<Self::Item>, 454 { 455 let producer = CharIndicesProducer { 456 index: 0, 457 chars: self.chars, 458 }; 459 bridge_unindexed(producer, consumer) 460 } 461 } 462 463 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> { 464 type Item = (usize, char); 465 466 fn split(self) -> (Self, Option<Self>) { 467 match split(self.chars) { 468 Some((left, right)) => ( 469 CharIndicesProducer { 470 chars: left, 471 ..self 472 }, 473 Some(CharIndicesProducer { 474 chars: right, 475 index: self.index + left.len(), 476 }), 477 ), 478 None => (self, None), 479 } 480 } 481 482 fn fold_with<F>(self, folder: F) -> F 483 where 484 F: Folder<Self::Item>, 485 { 486 let base = self.index; 487 folder.consume_iter(self.chars.char_indices().map(offset(base))) 488 } 489 } 490 491 // ///////////////////////////////////////////////////////////////////////// 492 493 /// Parallel iterator over the bytes of a string 494 #[derive(Debug, Clone)] 495 pub struct Bytes<'ch> { 496 chars: &'ch str, 497 } 498 499 struct BytesProducer<'ch> { 500 chars: &'ch str, 501 } 502 503 impl<'ch> ParallelIterator for Bytes<'ch> { 504 type Item = u8; 505 506 fn drive_unindexed<C>(self, consumer: C) -> C::Result 507 where 508 C: UnindexedConsumer<Self::Item>, 509 { 510 bridge_unindexed(BytesProducer { chars: self.chars }, consumer) 511 } 512 } 513 514 impl<'ch> UnindexedProducer for BytesProducer<'ch> { 515 type Item = u8; 516 517 fn split(self) -> (Self, Option<Self>) { 518 match split(self.chars) { 519 Some((left, right)) => ( 520 BytesProducer { chars: left }, 521 Some(BytesProducer { chars: right }), 522 ), 523 None => (self, None), 524 } 525 } 526 527 fn fold_with<F>(self, folder: F) -> F 528 where 529 F: Folder<Self::Item>, 530 { 531 folder.consume_iter(self.chars.bytes()) 532 } 533 } 534 535 // ///////////////////////////////////////////////////////////////////////// 536 537 /// Parallel iterator over a string encoded as UTF-16 538 #[derive(Debug, Clone)] 539 pub struct EncodeUtf16<'ch> { 540 chars: &'ch str, 541 } 542 543 struct EncodeUtf16Producer<'ch> { 544 chars: &'ch str, 545 } 546 547 impl<'ch> ParallelIterator for EncodeUtf16<'ch> { 548 type Item = u16; 549 550 fn drive_unindexed<C>(self, consumer: C) -> C::Result 551 where 552 C: UnindexedConsumer<Self::Item>, 553 { 554 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer) 555 } 556 } 557 558 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> { 559 type Item = u16; 560 561 fn split(self) -> (Self, Option<Self>) { 562 match split(self.chars) { 563 Some((left, right)) => ( 564 EncodeUtf16Producer { chars: left }, 565 Some(EncodeUtf16Producer { chars: right }), 566 ), 567 None => (self, None), 568 } 569 } 570 571 fn fold_with<F>(self, folder: F) -> F 572 where 573 F: Folder<Self::Item>, 574 { 575 folder.consume_iter(self.chars.encode_utf16()) 576 } 577 } 578 579 // ///////////////////////////////////////////////////////////////////////// 580 581 /// Parallel iterator over substrings separated by a pattern 582 #[derive(Debug, Clone)] 583 pub struct Split<'ch, P: Pattern> { 584 chars: &'ch str, 585 separator: P, 586 } 587 588 impl<'ch, P: Pattern> Split<'ch, P> { 589 fn new(chars: &'ch str, separator: P) -> Self { 590 Split { chars, separator } 591 } 592 } 593 594 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> { 595 type Item = &'ch str; 596 597 fn drive_unindexed<C>(self, consumer: C) -> C::Result 598 where 599 C: UnindexedConsumer<Self::Item>, 600 { 601 let producer = SplitProducer::new(self.chars, &self.separator); 602 bridge_unindexed(producer, consumer) 603 } 604 } 605 606 /// Implement support for `SplitProducer`. 607 impl<'ch, P: Pattern> Fissile<P> for &'ch str { 608 fn length(&self) -> usize { 609 self.len() 610 } 611 612 fn midpoint(&self, end: usize) -> usize { 613 // First find a suitable UTF-8 boundary. 614 find_char_midpoint(&self[..end]) 615 } 616 617 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> { 618 separator.find_in(&self[start..end]) 619 } 620 621 fn rfind(&self, separator: &P, end: usize) -> Option<usize> { 622 separator.rfind_in(&self[..end]) 623 } 624 625 fn split_once(self, index: usize) -> (Self, Self) { 626 let (left, right) = self.split_at(index); 627 let mut right_iter = right.chars(); 628 right_iter.next(); // skip the separator 629 (left, right_iter.as_str()) 630 } 631 632 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F 633 where 634 F: Folder<Self>, 635 { 636 separator.fold_splits(self, folder, skip_last) 637 } 638 } 639 640 // ///////////////////////////////////////////////////////////////////////// 641 642 /// Parallel iterator over substrings separated by a terminator pattern 643 #[derive(Debug, Clone)] 644 pub struct SplitTerminator<'ch, P: Pattern> { 645 chars: &'ch str, 646 terminator: P, 647 } 648 649 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> { 650 splitter: SplitProducer<'sep, P, &'ch str>, 651 skip_last: bool, 652 } 653 654 impl<'ch, P: Pattern> SplitTerminator<'ch, P> { 655 fn new(chars: &'ch str, terminator: P) -> Self { 656 SplitTerminator { chars, terminator } 657 } 658 } 659 660 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> { 661 fn new(chars: &'ch str, terminator: &'sep P) -> Self { 662 SplitTerminatorProducer { 663 splitter: SplitProducer::new(chars, terminator), 664 skip_last: chars.is_empty() || terminator.is_suffix_of(chars), 665 } 666 } 667 } 668 669 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> { 670 type Item = &'ch str; 671 672 fn drive_unindexed<C>(self, consumer: C) -> C::Result 673 where 674 C: UnindexedConsumer<Self::Item>, 675 { 676 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator); 677 bridge_unindexed(producer, consumer) 678 } 679 } 680 681 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> { 682 type Item = &'ch str; 683 684 fn split(mut self) -> (Self, Option<Self>) { 685 let (left, right) = self.splitter.split(); 686 self.splitter = left; 687 let right = right.map(|right| { 688 let skip_last = self.skip_last; 689 self.skip_last = false; 690 SplitTerminatorProducer { 691 splitter: right, 692 skip_last, 693 } 694 }); 695 (self, right) 696 } 697 698 fn fold_with<F>(self, folder: F) -> F 699 where 700 F: Folder<Self::Item>, 701 { 702 self.splitter.fold_with(folder, self.skip_last) 703 } 704 } 705 706 // ///////////////////////////////////////////////////////////////////////// 707 708 /// Parallel iterator over lines in a string 709 #[derive(Debug, Clone)] 710 pub struct Lines<'ch>(&'ch str); 711 712 #[inline] 713 fn no_carriage_return(line: &str) -> &str { 714 if line.ends_with('\r') { 715 &line[..line.len() - 1] 716 } else { 717 line 718 } 719 } 720 721 impl<'ch> ParallelIterator for Lines<'ch> { 722 type Item = &'ch str; 723 724 fn drive_unindexed<C>(self, consumer: C) -> C::Result 725 where 726 C: UnindexedConsumer<Self::Item>, 727 { 728 self.0 729 .par_split_terminator('\n') 730 .map(no_carriage_return) 731 .drive_unindexed(consumer) 732 } 733 } 734 735 // ///////////////////////////////////////////////////////////////////////// 736 737 /// Parallel iterator over substrings separated by whitespace 738 #[derive(Debug, Clone)] 739 pub struct SplitWhitespace<'ch>(&'ch str); 740 741 #[inline] 742 fn not_empty(s: &&str) -> bool { 743 !s.is_empty() 744 } 745 746 impl<'ch> ParallelIterator for SplitWhitespace<'ch> { 747 type Item = &'ch str; 748 749 fn drive_unindexed<C>(self, consumer: C) -> C::Result 750 where 751 C: UnindexedConsumer<Self::Item>, 752 { 753 self.0 754 .par_split(char::is_whitespace) 755 .filter(not_empty) 756 .drive_unindexed(consumer) 757 } 758 } 759 760 // ///////////////////////////////////////////////////////////////////////// 761 762 /// Parallel iterator over substrings that match a pattern 763 #[derive(Debug, Clone)] 764 pub struct Matches<'ch, P: Pattern> { 765 chars: &'ch str, 766 pattern: P, 767 } 768 769 struct MatchesProducer<'ch, 'pat, P: Pattern> { 770 chars: &'ch str, 771 pattern: &'pat P, 772 } 773 774 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> { 775 type Item = &'ch str; 776 777 fn drive_unindexed<C>(self, consumer: C) -> C::Result 778 where 779 C: UnindexedConsumer<Self::Item>, 780 { 781 let producer = MatchesProducer { 782 chars: self.chars, 783 pattern: &self.pattern, 784 }; 785 bridge_unindexed(producer, consumer) 786 } 787 } 788 789 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> { 790 type Item = &'ch str; 791 792 fn split(self) -> (Self, Option<Self>) { 793 match split(self.chars) { 794 Some((left, right)) => ( 795 MatchesProducer { 796 chars: left, 797 ..self 798 }, 799 Some(MatchesProducer { 800 chars: right, 801 ..self 802 }), 803 ), 804 None => (self, None), 805 } 806 } 807 808 fn fold_with<F>(self, folder: F) -> F 809 where 810 F: Folder<Self::Item>, 811 { 812 self.pattern.fold_matches(self.chars, folder) 813 } 814 } 815 816 // ///////////////////////////////////////////////////////////////////////// 817 818 /// Parallel iterator over substrings that match a pattern, with their positions 819 #[derive(Debug, Clone)] 820 pub struct MatchIndices<'ch, P: Pattern> { 821 chars: &'ch str, 822 pattern: P, 823 } 824 825 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> { 826 index: usize, 827 chars: &'ch str, 828 pattern: &'pat P, 829 } 830 831 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> { 832 type Item = (usize, &'ch str); 833 834 fn drive_unindexed<C>(self, consumer: C) -> C::Result 835 where 836 C: UnindexedConsumer<Self::Item>, 837 { 838 let producer = MatchIndicesProducer { 839 index: 0, 840 chars: self.chars, 841 pattern: &self.pattern, 842 }; 843 bridge_unindexed(producer, consumer) 844 } 845 } 846 847 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> { 848 type Item = (usize, &'ch str); 849 850 fn split(self) -> (Self, Option<Self>) { 851 match split(self.chars) { 852 Some((left, right)) => ( 853 MatchIndicesProducer { 854 chars: left, 855 ..self 856 }, 857 Some(MatchIndicesProducer { 858 chars: right, 859 index: self.index + left.len(), 860 ..self 861 }), 862 ), 863 None => (self, None), 864 } 865 } 866 867 fn fold_with<F>(self, folder: F) -> F 868 where 869 F: Folder<Self::Item>, 870 { 871 self.pattern 872 .fold_match_indices(self.chars, folder, self.index) 873 } 874 } 875