1 //! Word splitting functionality. 2 //! 3 //! To wrap text into lines, long words sometimes need to be split 4 //! across lines. The [`WordSplitter`] trait defines this 5 //! functionality. [`HyphenSplitter`] is the default implementation of 6 //! this treat: it will simply split words on existing hyphens. 7 8 #[cfg(feature = "hyphenation")] 9 use hyphenation::{Hyphenator, Standard}; 10 11 /// An interface for splitting words. 12 /// 13 /// When the [`wrap_iter`] method will try to fit text into a line, it 14 /// will eventually find a word that it too large the current text 15 /// width. It will then call the currently configured `WordSplitter` to 16 /// have it attempt to split the word into smaller parts. This trait 17 /// describes that functionality via the [`split`] method. 18 /// 19 /// If the `textwrap` crate has been compiled with the `hyphenation` 20 /// feature enabled, you will find an implementation of `WordSplitter` 21 /// by the `hyphenation::language::Corpus` struct. Use this struct for 22 /// language-aware hyphenation. See the [`hyphenation` documentation] 23 /// for details. 24 /// 25 /// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter 26 /// [`split`]: #tymethod.split 27 /// [`hyphenation` documentation]: https://docs.rs/hyphenation/ 28 pub trait WordSplitter { 29 /// Return all possible splits of word. Each split is a triple 30 /// with a head, a hyphen, and a tail where `head + &hyphen + 31 /// &tail == word`. The hyphen can be empty if there is already a 32 /// hyphen in the head. 33 /// 34 /// The splits should go from smallest to longest and should 35 /// include no split at all. So the word "technology" could be 36 /// split into 37 /// 38 /// ```no_run 39 /// vec![("tech", "-", "nology"), 40 /// ("technol", "-", "ogy"), 41 /// ("technolo", "-", "gy"), 42 /// ("technology", "", "")]; 43 /// ``` split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>44 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; 45 } 46 47 /// Use this as a [`Wrapper.splitter`] to avoid any kind of 48 /// hyphenation: 49 /// 50 /// ``` 51 /// use textwrap::{Wrapper, NoHyphenation}; 52 /// 53 /// let wrapper = Wrapper::with_splitter(8, NoHyphenation); 54 /// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); 55 /// ``` 56 /// 57 /// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter 58 #[derive(Clone, Debug)] 59 pub struct NoHyphenation; 60 61 /// `NoHyphenation` implements `WordSplitter` by not splitting the 62 /// word at all. 63 impl WordSplitter for NoHyphenation { split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>64 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { 65 vec![(word, "", "")] 66 } 67 } 68 69 /// Simple and default way to split words: splitting on existing 70 /// hyphens only. 71 /// 72 /// You probably don't need to use this type since it's already used 73 /// by default by `Wrapper::new`. 74 #[derive(Clone, Debug)] 75 pub struct HyphenSplitter; 76 77 /// `HyphenSplitter` is the default `WordSplitter` used by 78 /// `Wrapper::new`. It will split words on any existing hyphens in the 79 /// word. 80 /// 81 /// It will only use hyphens that are surrounded by alphanumeric 82 /// characters, which prevents a word like "--foo-bar" from being 83 /// split on the first or second hyphen. 84 impl WordSplitter for HyphenSplitter { split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>85 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { 86 let mut triples = Vec::new(); 87 // Split on hyphens, smallest split first. We only use hyphens 88 // that are surrounded by alphanumeric characters. This is to 89 // avoid splitting on repeated hyphens, such as those found in 90 // --foo-bar. 91 let mut char_indices = word.char_indices(); 92 // Early return if the word is empty. 93 let mut prev = match char_indices.next() { 94 None => return vec![(word, "", "")], 95 Some((_, ch)) => ch, 96 }; 97 98 // Find current word, or return early if the word only has a 99 // single character. 100 let (mut idx, mut cur) = match char_indices.next() { 101 None => return vec![(word, "", "")], 102 Some((idx, cur)) => (idx, cur), 103 }; 104 105 for (i, next) in char_indices { 106 if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { 107 let (head, tail) = word.split_at(idx + 1); 108 triples.push((head, "", tail)); 109 } 110 prev = cur; 111 idx = i; 112 cur = next; 113 } 114 115 // Finally option is no split at all. 116 triples.push((word, "", "")); 117 118 triples 119 } 120 } 121 122 /// A hyphenation dictionary can be used to do language-specific 123 /// hyphenation using patterns from the hyphenation crate. 124 #[cfg(feature = "hyphenation")] 125 impl WordSplitter for Standard { split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>126 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { 127 // Find splits based on language dictionary. 128 let mut triples = Vec::new(); 129 for n in self.hyphenate(word).breaks { 130 let (head, tail) = word.split_at(n); 131 let hyphen = if head.ends_with('-') { "" } else { "-" }; 132 triples.push((head, hyphen, tail)); 133 } 134 // Finally option is no split at all. 135 triples.push((word, "", "")); 136 137 triples 138 } 139 } 140