1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! 3.3.3 Preparations for Implicit Processing
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
13 
14 use core::cmp::max;
15 use core::ops::Range;
16 use alloc::vec::Vec;
17 
18 use super::BidiClass::{self, *};
19 use super::level::Level;
20 
21 /// A maximal substring of characters with the same embedding level.
22 ///
23 /// Represented as a range of byte indices.
24 pub type LevelRun = Range<usize>;
25 
26 
27 /// Output of `isolating_run_sequences` (steps X9-X10)
28 #[derive(Debug, PartialEq)]
29 pub struct IsolatingRunSequence {
30     pub runs: Vec<LevelRun>,
31     pub sos: BidiClass, // Start-of-sequence type.
32     pub eos: BidiClass, // End-of-sequence type.
33 }
34 
35 
36 /// Compute the set of isolating run sequences.
37 ///
38 /// An isolating run sequence is a maximal sequence of level runs such that for all level runs
39 /// except the last one in the sequence, the last character of the run is an isolate initiator
40 /// whose matching PDI is the first character of the next level run in the sequence.
41 ///
42 /// Note: This function does *not* return the sequences in order by their first characters.
43 #[cfg_attr(feature = "flame_it", flamer::flame)]
isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec<IsolatingRunSequence>44 pub fn isolating_run_sequences(
45     para_level: Level,
46     original_classes: &[BidiClass],
47     levels: &[Level],
48 ) -> Vec<IsolatingRunSequence> {
49     let runs = level_runs(levels, original_classes);
50 
51     // Compute the set of isolating run sequences.
52     // <http://www.unicode.org/reports/tr9/#BD13>
53     let mut sequences = Vec::with_capacity(runs.len());
54 
55     // When we encounter an isolate initiator, we push the current sequence onto the
56     // stack so we can resume it after the matching PDI.
57     let mut stack = vec![Vec::new()];
58 
59     for run in runs {
60         assert!(run.len() > 0);
61         assert!(!stack.is_empty());
62 
63         let start_class = original_classes[run.start];
64         let end_class = original_classes[run.end - 1];
65 
66         let mut sequence = if start_class == PDI && stack.len() > 1 {
67             // Continue a previous sequence interrupted by an isolate.
68             stack.pop().unwrap()
69         } else {
70             // Start a new sequence.
71             Vec::new()
72         };
73 
74         sequence.push(run);
75 
76         if let RLI | LRI | FSI = end_class {
77             // Resume this sequence after the isolate.
78             stack.push(sequence);
79         } else {
80             // This sequence is finished.
81             sequences.push(sequence);
82         }
83     }
84     // Pop any remaning sequences off the stack.
85     sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
86 
87     // Determine the `sos` and `eos` class for each sequence.
88     // <http://www.unicode.org/reports/tr9/#X10>
89     sequences
90         .into_iter()
91         .map(|sequence: Vec<LevelRun>| {
92             assert!(!sequence.is_empty());
93 
94             let start_of_seq = sequence[0].start;
95             let end_of_seq = sequence[sequence.len() - 1].end;
96             let seq_level = levels[start_of_seq];
97 
98             #[cfg(test)]
99             for run in sequence.clone() {
100                 for idx in run {
101                     if not_removed_by_x9(&original_classes[idx]) {
102                         assert_eq!(seq_level, levels[idx]);
103                     }
104                 }
105             }
106 
107             // Get the level of the last non-removed char before the runs.
108             let pred_level = match original_classes[..start_of_seq].iter().rposition(
109                 not_removed_by_x9,
110             ) {
111                 Some(idx) => levels[idx],
112                 None => para_level,
113             };
114 
115             // Get the level of the next non-removed char after the runs.
116             let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] {
117                 para_level
118             } else {
119                 match original_classes[end_of_seq..].iter().position(
120                     not_removed_by_x9,
121                 ) {
122                     Some(idx) => levels[end_of_seq + idx],
123                     None => para_level,
124                 }
125             };
126 
127             IsolatingRunSequence {
128                 runs: sequence,
129                 sos: max(seq_level, pred_level).bidi_class(),
130                 eos: max(seq_level, succ_level).bidi_class(),
131             }
132         })
133         .collect()
134 }
135 
136 /// Finds the level runs in a paragraph.
137 ///
138 /// <http://www.unicode.org/reports/tr9/#BD7>
level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun>139 fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
140     assert_eq!(levels.len(), original_classes.len());
141 
142     let mut runs = Vec::new();
143     if levels.is_empty() {
144         return runs;
145     }
146 
147     let mut current_run_level = levels[0];
148     let mut current_run_start = 0;
149     for i in 1..levels.len() {
150         if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
151             // End the last run and start a new one.
152             runs.push(current_run_start..i);
153             current_run_level = levels[i];
154             current_run_start = i;
155         }
156     }
157     runs.push(current_run_start..levels.len());
158 
159     runs
160 }
161 
162 /// Should this character be ignored in steps after X9?
163 ///
164 /// <http://www.unicode.org/reports/tr9/#X9>
removed_by_x9(class: BidiClass) -> bool165 pub fn removed_by_x9(class: BidiClass) -> bool {
166     match class {
167         RLE | LRE | RLO | LRO | PDF | BN => true,
168         _ => false,
169     }
170 }
171 
172 // For use as a predicate for `position` / `rposition`
not_removed_by_x9(class: &BidiClass) -> bool173 pub fn not_removed_by_x9(class: &BidiClass) -> bool {
174     !removed_by_x9(*class)
175 }
176 
177 #[cfg(test)]
178 mod tests {
179     use super::*;
180 
181     #[test]
test_level_runs()182     fn test_level_runs() {
183         assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
184         assert_eq!(
185             level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
186             &[0..3, 3..5, 5..6, 6..8]
187         );
188     }
189 
190     // From <http://www.unicode.org/reports/tr9/#BD13>
191     #[rustfmt::skip]
192     #[test]
test_isolating_run_sequences()193     fn test_isolating_run_sequences() {
194 
195         // == Example 1 ==
196         // text1·RLE·text2·PDF·RLE·text3·PDF·text4
197         // index        0    1  2    3    4  5    6  7
198         let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
199         let levels =  &[0,   1, 1,   1,   1, 1,   1, 0];
200         let para_level = Level::ltr();
201         let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
202         sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
203         assert_eq!(
204             sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
205             vec![vec![0..2], vec![2..7], vec![7..8]]
206         );
207 
208         // == Example 2 ==
209         // text1·RLI·text2·PDI·RLI·text3·PDI·text4
210         // index        0    1  2    3    4  5    6  7
211         let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
212         let levels =  &[0,   0, 1,   0,   0, 1,   0, 0];
213         let para_level = Level::ltr();
214         let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
215         sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
216         assert_eq!(
217             sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
218             vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
219         );
220 
221         // == Example 3 ==
222         // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
223         // index        0    1  2    3  4    5  6    7  8    9  10  11  12
224         let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI,  L];
225         let levels =  &[0,   0, 1,   1, 2,   3, 3,   3, 2,   1, 1,   0,  0];
226         let para_level = Level::ltr();
227         let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
228         sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
229         assert_eq!(
230             sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
231             vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
232         );
233     }
234 
235     // From <http://www.unicode.org/reports/tr9/#X10>
236     #[rustfmt::skip]
237     #[test]
test_isolating_run_sequences_sos_and_eos()238     fn test_isolating_run_sequences_sos_and_eos() {
239 
240         // == Example 1 ==
241         // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
242         // index        0    1  2    3  4    5  6    7    8  9   10  11
243         let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF,  L];
244         let levels =  &[0,   1, 1,   2, 2,   2, 1,   1,   1, 1,   1,  0];
245         let para_level = Level::ltr();
246         let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
247         sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
248 
249         // text1
250         assert_eq!(
251             &sequences[0],
252             &IsolatingRunSequence {
253                 runs: vec![0..2],
254                 sos: L,
255                 eos: R,
256             }
257         );
258 
259         // text2
260         assert_eq!(
261             &sequences[1],
262             &IsolatingRunSequence {
263                 runs: vec![2..4],
264                 sos: R,
265                 eos: L,
266             }
267         );
268 
269         // text3
270         assert_eq!(
271             &sequences[2],
272             &IsolatingRunSequence {
273                 runs: vec![4..6],
274                 sos: L,
275                 eos: L,
276             }
277         );
278 
279         // text4 text5
280         assert_eq!(
281             &sequences[3],
282             &IsolatingRunSequence {
283                 runs: vec![6..11],
284                 sos: L,
285                 eos: R,
286             }
287         );
288 
289         // text6
290         assert_eq!(
291             &sequences[4],
292             &IsolatingRunSequence {
293                 runs: vec![11..12],
294                 sos: R,
295                 eos: L,
296             }
297         );
298 
299         // == Example 2 ==
300         // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
301         // index        0    1  2    3  4    5  6    7    8  9   10  11
302         let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI,  L];
303         let levels =  &[0,   0, 1,   1, 2,   1, 1,   0,   0, 1,   0,  0];
304         let para_level = Level::ltr();
305         let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
306         sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
307 
308         // text1·RLI·PDI·RLI·PDI·text6
309         assert_eq!(
310             &sequences[0],
311             &IsolatingRunSequence {
312                 runs: vec![0..2, 7..9, 10..12],
313                 sos: L,
314                 eos: L,
315             }
316         );
317 
318         // text2·LRI·PDI·text4
319         assert_eq!(
320             &sequences[1],
321             &IsolatingRunSequence {
322                 runs: vec![2..4, 5..7],
323                 sos: R,
324                 eos: R,
325             }
326         );
327 
328         // text3
329         assert_eq!(
330             &sequences[2],
331             &IsolatingRunSequence {
332                 runs: vec![4..5],
333                 sos: L,
334                 eos: L,
335             }
336         );
337 
338         // text5
339         assert_eq!(
340             &sequences[3],
341             &IsolatingRunSequence {
342                 runs: vec![9..10],
343                 sos: R,
344                 eos: R,
345             }
346         );
347     }
348 
349     #[test]
test_removed_by_x9()350     fn test_removed_by_x9() {
351         let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
352         let not_classes = &[L, RLI, AL, LRI, PDI];
353         for x in rem_classes {
354             assert_eq!(removed_by_x9(*x), true);
355         }
356         for x in not_classes {
357             assert_eq!(removed_by_x9(*x), false);
358         }
359     }
360 
361     #[test]
test_not_removed_by_x9()362     fn test_not_removed_by_x9() {
363         let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
364         for x in non_x9_classes {
365             assert_eq!(not_removed_by_x9(&x), true);
366         }
367     }
368 }
369