1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! 3.3.3 Preparations for Implicit Processing
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
13
14 use std::cmp::max;
15 use std::ops::Range;
16 use matches::matches;
17
18 use super::BidiClass::{self, *};
19 use super::level::Level;
20
21 /// A maximal substring of characters with the same embedding level.
22 ///
23 /// Represented as a range of byte indices.
24 pub type LevelRun = Range<usize>;
25
26
27 /// Output of `isolating_run_sequences` (steps X9-X10)
28 #[derive(Debug, PartialEq)]
29 pub struct IsolatingRunSequence {
30 pub runs: Vec<LevelRun>,
31 pub sos: BidiClass, // Start-of-sequence type.
32 pub eos: BidiClass, // End-of-sequence type.
33 }
34
35
36 /// Compute the set of isolating run sequences.
37 ///
38 /// An isolating run sequence is a maximal sequence of level runs such that for all level runs
39 /// except the last one in the sequence, the last character of the run is an isolate initiator
40 /// whose matching PDI is the first character of the next level run in the sequence.
41 ///
42 /// Note: This function does *not* return the sequences in order by their first characters.
43 #[cfg_attr(feature = "flame_it", flamer::flame)]
isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec<IsolatingRunSequence>44 pub fn isolating_run_sequences(
45 para_level: Level,
46 original_classes: &[BidiClass],
47 levels: &[Level],
48 ) -> Vec<IsolatingRunSequence> {
49 let runs = level_runs(levels, original_classes);
50
51 // Compute the set of isolating run sequences.
52 // <http://www.unicode.org/reports/tr9/#BD13>
53 let mut sequences = Vec::with_capacity(runs.len());
54
55 // When we encounter an isolate initiator, we push the current sequence onto the
56 // stack so we can resume it after the matching PDI.
57 let mut stack = vec![Vec::new()];
58
59 for run in runs {
60 assert!(run.len() > 0);
61 assert!(!stack.is_empty());
62
63 let start_class = original_classes[run.start];
64 let end_class = original_classes[run.end - 1];
65
66 let mut sequence = if start_class == PDI && stack.len() > 1 {
67 // Continue a previous sequence interrupted by an isolate.
68 stack.pop().unwrap()
69 } else {
70 // Start a new sequence.
71 Vec::new()
72 };
73
74 sequence.push(run);
75
76 if matches!(end_class, RLI | LRI | FSI) {
77 // Resume this sequence after the isolate.
78 stack.push(sequence);
79 } else {
80 // This sequence is finished.
81 sequences.push(sequence);
82 }
83 }
84 // Pop any remaning sequences off the stack.
85 sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
86
87 // Determine the `sos` and `eos` class for each sequence.
88 // <http://www.unicode.org/reports/tr9/#X10>
89 sequences
90 .into_iter()
91 .map(|sequence: Vec<LevelRun>| {
92 assert!(!sequence.is_empty());
93
94 let start_of_seq = sequence[0].start;
95 let end_of_seq = sequence[sequence.len() - 1].end;
96 let seq_level = levels[start_of_seq];
97
98 #[cfg(test)]
99 for run in sequence.clone() {
100 for idx in run {
101 if not_removed_by_x9(&original_classes[idx]) {
102 assert_eq!(seq_level, levels[idx]);
103 }
104 }
105 }
106
107 // Get the level of the last non-removed char before the runs.
108 let pred_level = match original_classes[..start_of_seq].iter().rposition(
109 not_removed_by_x9,
110 ) {
111 Some(idx) => levels[idx],
112 None => para_level,
113 };
114
115 // Get the level of the next non-removed char after the runs.
116 let succ_level = if matches!(original_classes[end_of_seq - 1], RLI | LRI | FSI) {
117 para_level
118 } else {
119 match original_classes[end_of_seq..].iter().position(
120 not_removed_by_x9,
121 ) {
122 Some(idx) => levels[end_of_seq + idx],
123 None => para_level,
124 }
125 };
126
127 IsolatingRunSequence {
128 runs: sequence,
129 sos: max(seq_level, pred_level).bidi_class(),
130 eos: max(seq_level, succ_level).bidi_class(),
131 }
132 })
133 .collect()
134 }
135
136 /// Finds the level runs in a paragraph.
137 ///
138 /// <http://www.unicode.org/reports/tr9/#BD7>
level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun>139 fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
140 assert_eq!(levels.len(), original_classes.len());
141
142 let mut runs = Vec::new();
143 if levels.is_empty() {
144 return runs;
145 }
146
147 let mut current_run_level = levels[0];
148 let mut current_run_start = 0;
149 for i in 1..levels.len() {
150 if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
151 // End the last run and start a new one.
152 runs.push(current_run_start..i);
153 current_run_level = levels[i];
154 current_run_start = i;
155 }
156 }
157 runs.push(current_run_start..levels.len());
158
159 runs
160 }
161
162 /// Should this character be ignored in steps after X9?
163 ///
164 /// <http://www.unicode.org/reports/tr9/#X9>
removed_by_x9(class: BidiClass) -> bool165 pub fn removed_by_x9(class: BidiClass) -> bool {
166 matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
167 }
168
169 // For use as a predicate for `position` / `rposition`
not_removed_by_x9(class: &BidiClass) -> bool170 pub fn not_removed_by_x9(class: &BidiClass) -> bool {
171 !removed_by_x9(*class)
172 }
173
174 #[cfg(test)]
175 mod tests {
176 use super::*;
177
178 #[test]
test_level_runs()179 fn test_level_runs() {
180 assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
181 assert_eq!(
182 level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
183 &[0..3, 3..5, 5..6, 6..8]
184 );
185 }
186
187 // From <http://www.unicode.org/reports/tr9/#BD13>
188 #[rustfmt::skip]
189 #[test]
test_isolating_run_sequences()190 fn test_isolating_run_sequences() {
191
192 // == Example 1 ==
193 // text1·RLE·text2·PDF·RLE·text3·PDF·text4
194 // index 0 1 2 3 4 5 6 7
195 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
196 let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
197 let para_level = Level::ltr();
198 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
199 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
200 assert_eq!(
201 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
202 vec![vec![0..2], vec![2..7], vec![7..8]]
203 );
204
205 // == Example 2 ==
206 // text1·RLI·text2·PDI·RLI·text3·PDI·text4
207 // index 0 1 2 3 4 5 6 7
208 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
209 let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
210 let para_level = Level::ltr();
211 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
212 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
213 assert_eq!(
214 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
215 vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
216 );
217
218 // == Example 3 ==
219 // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
220 // index 0 1 2 3 4 5 6 7 8 9 10 11 12
221 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
222 let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
223 let para_level = Level::ltr();
224 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
225 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
226 assert_eq!(
227 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
228 vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
229 );
230 }
231
232 // From <http://www.unicode.org/reports/tr9/#X10>
233 #[rustfmt::skip]
234 #[test]
test_isolating_run_sequences_sos_and_eos()235 fn test_isolating_run_sequences_sos_and_eos() {
236
237 // == Example 1 ==
238 // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
239 // index 0 1 2 3 4 5 6 7 8 9 10 11
240 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
241 let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
242 let para_level = Level::ltr();
243 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
244 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
245
246 // text1
247 assert_eq!(
248 &sequences[0],
249 &IsolatingRunSequence {
250 runs: vec![0..2],
251 sos: L,
252 eos: R,
253 }
254 );
255
256 // text2
257 assert_eq!(
258 &sequences[1],
259 &IsolatingRunSequence {
260 runs: vec![2..4],
261 sos: R,
262 eos: L,
263 }
264 );
265
266 // text3
267 assert_eq!(
268 &sequences[2],
269 &IsolatingRunSequence {
270 runs: vec![4..6],
271 sos: L,
272 eos: L,
273 }
274 );
275
276 // text4 text5
277 assert_eq!(
278 &sequences[3],
279 &IsolatingRunSequence {
280 runs: vec![6..11],
281 sos: L,
282 eos: R,
283 }
284 );
285
286 // text6
287 assert_eq!(
288 &sequences[4],
289 &IsolatingRunSequence {
290 runs: vec![11..12],
291 sos: R,
292 eos: L,
293 }
294 );
295
296 // == Example 2 ==
297 // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
298 // index 0 1 2 3 4 5 6 7 8 9 10 11
299 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
300 let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
301 let para_level = Level::ltr();
302 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
303 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
304
305 // text1·RLI·PDI·RLI·PDI·text6
306 assert_eq!(
307 &sequences[0],
308 &IsolatingRunSequence {
309 runs: vec![0..2, 7..9, 10..12],
310 sos: L,
311 eos: L,
312 }
313 );
314
315 // text2·LRI·PDI·text4
316 assert_eq!(
317 &sequences[1],
318 &IsolatingRunSequence {
319 runs: vec![2..4, 5..7],
320 sos: R,
321 eos: R,
322 }
323 );
324
325 // text3
326 assert_eq!(
327 &sequences[2],
328 &IsolatingRunSequence {
329 runs: vec![4..5],
330 sos: L,
331 eos: L,
332 }
333 );
334
335 // text5
336 assert_eq!(
337 &sequences[3],
338 &IsolatingRunSequence {
339 runs: vec![9..10],
340 sos: R,
341 eos: R,
342 }
343 );
344 }
345
346 #[test]
test_removed_by_x9()347 fn test_removed_by_x9() {
348 let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
349 let not_classes = &[L, RLI, AL, LRI, PDI];
350 for x in rem_classes {
351 assert_eq!(removed_by_x9(*x), true);
352 }
353 for x in not_classes {
354 assert_eq!(removed_by_x9(*x), false);
355 }
356 }
357
358 #[test]
test_not_removed_by_x9()359 fn test_not_removed_by_x9() {
360 let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
361 for x in non_x9_classes {
362 assert_eq!(not_removed_by_x9(&x), true);
363 }
364 }
365 }
366