1 use {
2 crate::*,
3 minimad::*,
4 unicode_width::UnicodeWidthChar,
5 };
6
7 #[derive(Debug)]
8 pub(crate) struct Token<'s> {
9 pub compound: Compound<'s>,
10 pub blank: bool,
11 pub width: usize,
12 pub start_in_compound: usize,
13 pub end_in_compound: usize,
14 }
15
16 impl<'s> Token<'s> {
to_compound(&self) -> Compound<'s>17 pub fn to_compound(&self) -> Compound<'s> {
18 let mut compound = self.compound.clone();
19 compound.set_str(&self.compound.src[self.start_in_compound..self.end_in_compound]);
20 compound
21 }
22 }
23
24 /// Cut a composite into token, each one being either only spaces or without space, and
25 /// each one from one compound
tokenize<'s, 'c>( composite: &'c Composite<'s>, max_token_width: usize, ) -> Vec<Token<'s>>26 pub(crate) fn tokenize<'s, 'c>(
27 composite: &'c Composite<'s>,
28 max_token_width: usize,
29 ) -> Vec<Token<'s>> {
30 let mut tokens: Vec<Token<'s>> = Vec::new();
31 for compound in &composite.compounds {
32 let mut token: Option<Token> = None;
33 for (idx, char) in compound.src.char_indices() {
34 let blank = char.is_whitespace() && !compound.code;
35 let char_width = char.width().unwrap_or(0);
36 if let Some(token) = token.as_mut() {
37 if token.blank == blank && token.width + char_width <= max_token_width {
38 token.width += char_width;
39 token.end_in_compound += char.len_utf8();
40 continue;
41 }
42 }
43 let new_token = Token {
44 compound: compound.clone(),
45 blank,
46 width: char_width,
47 start_in_compound: idx,
48 end_in_compound: idx + char.len_utf8(),
49 };
50 if let Some(token) = token.replace(new_token) {
51 tokens.push(token);
52 }
53 }
54 if let Some(token) = token {
55 tokens.push(token);
56 }
57 }
58 tokens
59 }
60
61
62