1 //! Word splitting functionality.
2 //!
3 //! To wrap text into lines, long words sometimes need to be split
4 //! across lines. The [`WordSplitter`] trait defines this
5 //! functionality. [`HyphenSplitter`] is the default implementation of
6 //! this treat: it will simply split words on existing hyphens.
7 
8 #[cfg(feature = "hyphenation")]
9 use hyphenation::{Hyphenator, Standard};
10 
11 /// An interface for splitting words.
12 ///
13 /// When the [`wrap_iter`] method will try to fit text into a line, it
14 /// will eventually find a word that it too large the current text
15 /// width. It will then call the currently configured `WordSplitter` to
16 /// have it attempt to split the word into smaller parts. This trait
17 /// describes that functionality via the [`split`] method.
18 ///
19 /// If the `textwrap` crate has been compiled with the `hyphenation`
20 /// feature enabled, you will find an implementation of `WordSplitter`
21 /// by the `hyphenation::language::Corpus` struct. Use this struct for
22 /// language-aware hyphenation. See the [`hyphenation` documentation]
23 /// for details.
24 ///
25 /// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
26 /// [`split`]: #tymethod.split
27 /// [`hyphenation` documentation]: https://docs.rs/hyphenation/
28 pub trait WordSplitter {
29     /// Return all possible splits of word. Each split is a triple
30     /// with a head, a hyphen, and a tail where `head + &hyphen +
31     /// &tail == word`. The hyphen can be empty if there is already a
32     /// hyphen in the head.
33     ///
34     /// The splits should go from smallest to longest and should
35     /// include no split at all. So the word "technology" could be
36     /// split into
37     ///
38     /// ```no_run
39     /// vec![("tech", "-", "nology"),
40     ///      ("technol", "-", "ogy"),
41     ///      ("technolo", "-", "gy"),
42     ///      ("technology", "", "")];
43     /// ```
split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>44     fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
45 }
46 
47 /// Use this as a [`Wrapper.splitter`] to avoid any kind of
48 /// hyphenation:
49 ///
50 /// ```
51 /// use textwrap::{Wrapper, NoHyphenation};
52 ///
53 /// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
54 /// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
55 /// ```
56 ///
57 /// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
58 #[derive(Clone, Debug)]
59 pub struct NoHyphenation;
60 
61 /// `NoHyphenation` implements `WordSplitter` by not splitting the
62 /// word at all.
63 impl WordSplitter for NoHyphenation {
split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>64     fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
65         vec![(word, "", "")]
66     }
67 }
68 
69 /// Simple and default way to split words: splitting on existing
70 /// hyphens only.
71 ///
72 /// You probably don't need to use this type since it's already used
73 /// by default by `Wrapper::new`.
74 #[derive(Clone, Debug)]
75 pub struct HyphenSplitter;
76 
77 /// `HyphenSplitter` is the default `WordSplitter` used by
78 /// `Wrapper::new`. It will split words on any existing hyphens in the
79 /// word.
80 ///
81 /// It will only use hyphens that are surrounded by alphanumeric
82 /// characters, which prevents a word like "--foo-bar" from being
83 /// split on the first or second hyphen.
84 impl WordSplitter for HyphenSplitter {
split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>85     fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
86         let mut triples = Vec::new();
87         // Split on hyphens, smallest split first. We only use hyphens
88         // that are surrounded by alphanumeric characters. This is to
89         // avoid splitting on repeated hyphens, such as those found in
90         // --foo-bar.
91         let mut char_indices = word.char_indices();
92         // Early return if the word is empty.
93         let mut prev = match char_indices.next() {
94             None => return vec![(word, "", "")],
95             Some((_, ch)) => ch,
96         };
97 
98         // Find current word, or return early if the word only has a
99         // single character.
100         let (mut idx, mut cur) = match char_indices.next() {
101             None => return vec![(word, "", "")],
102             Some((idx, cur)) => (idx, cur),
103         };
104 
105         for (i, next) in char_indices {
106             if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
107                 let (head, tail) = word.split_at(idx + 1);
108                 triples.push((head, "", tail));
109             }
110             prev = cur;
111             idx = i;
112             cur = next;
113         }
114 
115         // Finally option is no split at all.
116         triples.push((word, "", ""));
117 
118         triples
119     }
120 }
121 
122 /// A hyphenation dictionary can be used to do language-specific
123 /// hyphenation using patterns from the hyphenation crate.
124 #[cfg(feature = "hyphenation")]
125 impl WordSplitter for Standard {
split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>126     fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
127         // Find splits based on language dictionary.
128         let mut triples = Vec::new();
129         for n in self.hyphenate(word).breaks {
130             let (head, tail) = word.split_at(n);
131             let hyphen = if head.ends_with('-') { "" } else { "-" };
132             triples.push((head, hyphen, tail));
133         }
134         // Finally option is no split at all.
135         triples.push((word, "", ""));
136 
137         triples
138     }
139 }
140