1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! 3.3.3 Preparations for Implicit Processing
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
13
14 use std::cmp::max;
15 use std::ops::Range;
16
17 use super::char_data::BidiClass;
18 use super::level::Level;
19
20 use BidiClass::*;
21
22 /// A maximal substring of characters with the same embedding level.
23 ///
24 /// Represented as a range of byte indices.
25 pub type LevelRun = Range<usize>;
26
27
28 /// Output of `isolating_run_sequences` (steps X9-X10)
29 #[derive(Debug, PartialEq)]
30 pub struct IsolatingRunSequence {
31 pub runs: Vec<LevelRun>,
32 pub sos: BidiClass, // Start-of-sequence type.
33 pub eos: BidiClass, // End-of-sequence type.
34 }
35
36
37 /// Compute the set of isolating run sequences.
38 ///
39 /// An isolating run sequence is a maximal sequence of level runs such that for all level runs
40 /// except the last one in the sequence, the last character of the run is an isolate initiator
41 /// whose matching PDI is the first character of the next level run in the sequence.
42 ///
43 /// Note: This function does *not* return the sequences in order by their first characters.
44 #[cfg_attr(feature = "flame_it", flame)]
isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec<IsolatingRunSequence>45 pub fn isolating_run_sequences(
46 para_level: Level,
47 original_classes: &[BidiClass],
48 levels: &[Level],
49 ) -> Vec<IsolatingRunSequence> {
50 let runs = level_runs(levels, original_classes);
51
52 // Compute the set of isolating run sequences.
53 // <http://www.unicode.org/reports/tr9/#BD13>
54 let mut sequences = Vec::with_capacity(runs.len());
55
56 // When we encounter an isolate initiator, we push the current sequence onto the
57 // stack so we can resume it after the matching PDI.
58 let mut stack = vec![Vec::new()];
59
60 for run in runs {
61 assert!(run.len() > 0);
62 assert!(!stack.is_empty());
63
64 let start_class = original_classes[run.start];
65 let end_class = original_classes[run.end - 1];
66
67 let mut sequence = if start_class == PDI && stack.len() > 1 {
68 // Continue a previous sequence interrupted by an isolate.
69 stack.pop().unwrap()
70 } else {
71 // Start a new sequence.
72 Vec::new()
73 };
74
75 sequence.push(run);
76
77 if matches!(end_class, RLI | LRI | FSI) {
78 // Resume this sequence after the isolate.
79 stack.push(sequence);
80 } else {
81 // This sequence is finished.
82 sequences.push(sequence);
83 }
84 }
85 // Pop any remaning sequences off the stack.
86 sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
87
88 // Determine the `sos` and `eos` class for each sequence.
89 // <http://www.unicode.org/reports/tr9/#X10>
90 sequences
91 .into_iter()
92 .map(|sequence: Vec<LevelRun>| {
93 assert!(!sequence.is_empty());
94
95 let start_of_seq = sequence[0].start;
96 let end_of_seq = sequence[sequence.len() - 1].end;
97 let seq_level = levels[start_of_seq];
98
99 #[cfg(test)]
100 for run in sequence.clone() {
101 for idx in run {
102 if not_removed_by_x9(&original_classes[idx]) {
103 assert_eq!(seq_level, levels[idx]);
104 }
105 }
106 }
107
108 // Get the level of the last non-removed char before the runs.
109 let pred_level = match original_classes[..start_of_seq].iter().rposition(
110 not_removed_by_x9,
111 ) {
112 Some(idx) => levels[idx],
113 None => para_level,
114 };
115
116 // Get the level of the next non-removed char after the runs.
117 let succ_level = if matches!(original_classes[end_of_seq - 1], RLI | LRI | FSI) {
118 para_level
119 } else {
120 match original_classes[end_of_seq..].iter().position(
121 not_removed_by_x9,
122 ) {
123 Some(idx) => levels[end_of_seq + idx],
124 None => para_level,
125 }
126 };
127
128 IsolatingRunSequence {
129 runs: sequence,
130 sos: max(seq_level, pred_level).bidi_class(),
131 eos: max(seq_level, succ_level).bidi_class(),
132 }
133 })
134 .collect()
135 }
136
137 /// Finds the level runs in a paragraph.
138 ///
139 /// <http://www.unicode.org/reports/tr9/#BD7>
level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun>140 fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
141 assert_eq!(levels.len(), original_classes.len());
142
143 let mut runs = Vec::new();
144 if levels.is_empty() {
145 return runs;
146 }
147
148 let mut current_run_level = levels[0];
149 let mut current_run_start = 0;
150 for i in 1..levels.len() {
151 if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
152 // End the last run and start a new one.
153 runs.push(current_run_start..i);
154 current_run_level = levels[i];
155 current_run_start = i;
156 }
157 }
158 runs.push(current_run_start..levels.len());
159
160 runs
161 }
162
163 /// Should this character be ignored in steps after X9?
164 ///
165 /// <http://www.unicode.org/reports/tr9/#X9>
removed_by_x9(class: BidiClass) -> bool166 pub fn removed_by_x9(class: BidiClass) -> bool {
167 matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
168 }
169
170 // For use as a predicate for `position` / `rposition`
not_removed_by_x9(class: &BidiClass) -> bool171 pub fn not_removed_by_x9(class: &BidiClass) -> bool {
172 !removed_by_x9(*class)
173 }
174
175 #[cfg(test)]
176 mod tests {
177 use super::*;
178
179 #[test]
test_level_runs()180 fn test_level_runs() {
181 assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
182 assert_eq!(
183 level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
184 &[0..3, 3..5, 5..6, 6..8]
185 );
186 }
187
188 // From <http://www.unicode.org/reports/tr9/#BD13>
189 #[cfg_attr(rustfmt, rustfmt_skip)]
190 #[test]
test_isolating_run_sequences()191 fn test_isolating_run_sequences() {
192
193 // == Example 1 ==
194 // text1·RLE·text2·PDF·RLE·text3·PDF·text4
195 // index 0 1 2 3 4 5 6 7
196 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
197 let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
198 let para_level = Level::ltr();
199 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
200 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
201 assert_eq!(
202 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
203 vec![vec![0..2], vec![2..7], vec![7..8]]
204 );
205
206 // == Example 2 ==
207 // text1·RLI·text2·PDI·RLI·text3·PDI·text4
208 // index 0 1 2 3 4 5 6 7
209 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
210 let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
211 let para_level = Level::ltr();
212 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
213 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
214 assert_eq!(
215 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
216 vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
217 );
218
219 // == Example 3 ==
220 // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
221 // index 0 1 2 3 4 5 6 7 8 9 10 11 12
222 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
223 let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
224 let para_level = Level::ltr();
225 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
226 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
227 assert_eq!(
228 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
229 vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
230 );
231 }
232
233 // From <http://www.unicode.org/reports/tr9/#X10>
234 #[cfg_attr(rustfmt, rustfmt_skip)]
235 #[test]
test_isolating_run_sequences_sos_and_eos()236 fn test_isolating_run_sequences_sos_and_eos() {
237
238 // == Example 1 ==
239 // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
240 // index 0 1 2 3 4 5 6 7 8 9 10 11
241 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
242 let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
243 let para_level = Level::ltr();
244 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
245 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
246
247 // text1
248 assert_eq!(
249 &sequences[0],
250 &IsolatingRunSequence {
251 runs: vec![0..2],
252 sos: L,
253 eos: R,
254 }
255 );
256
257 // text2
258 assert_eq!(
259 &sequences[1],
260 &IsolatingRunSequence {
261 runs: vec![2..4],
262 sos: R,
263 eos: L,
264 }
265 );
266
267 // text3
268 assert_eq!(
269 &sequences[2],
270 &IsolatingRunSequence {
271 runs: vec![4..6],
272 sos: L,
273 eos: L,
274 }
275 );
276
277 // text4 text5
278 assert_eq!(
279 &sequences[3],
280 &IsolatingRunSequence {
281 runs: vec![6..11],
282 sos: L,
283 eos: R,
284 }
285 );
286
287 // text6
288 assert_eq!(
289 &sequences[4],
290 &IsolatingRunSequence {
291 runs: vec![11..12],
292 sos: R,
293 eos: L,
294 }
295 );
296
297 // == Example 2 ==
298 // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
299 // index 0 1 2 3 4 5 6 7 8 9 10 11
300 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
301 let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
302 let para_level = Level::ltr();
303 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
304 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
305
306 // text1·RLI·PDI·RLI·PDI·text6
307 assert_eq!(
308 &sequences[0],
309 &IsolatingRunSequence {
310 runs: vec![0..2, 7..9, 10..12],
311 sos: L,
312 eos: L,
313 }
314 );
315
316 // text2·LRI·PDI·text4
317 assert_eq!(
318 &sequences[1],
319 &IsolatingRunSequence {
320 runs: vec![2..4, 5..7],
321 sos: R,
322 eos: R,
323 }
324 );
325
326 // text3
327 assert_eq!(
328 &sequences[2],
329 &IsolatingRunSequence {
330 runs: vec![4..5],
331 sos: L,
332 eos: L,
333 }
334 );
335
336 // text5
337 assert_eq!(
338 &sequences[3],
339 &IsolatingRunSequence {
340 runs: vec![9..10],
341 sos: R,
342 eos: R,
343 }
344 );
345 }
346
347 #[test]
test_removed_by_x9()348 fn test_removed_by_x9() {
349 let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
350 let not_classes = &[L, RLI, AL, LRI, PDI];
351 for x in rem_classes {
352 assert_eq!(removed_by_x9(*x), true);
353 }
354 for x in not_classes {
355 assert_eq!(removed_by_x9(*x), false);
356 }
357 }
358
359 #[test]
test_not_removed_by_x9()360 fn test_not_removed_by_x9() {
361 let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
362 for x in non_x9_classes {
363 assert_eq!(not_removed_by_x9(&x), true);
364 }
365 }
366 }
367