1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11
12 // Helper functions used for Unicode normalization
canonical_sort(comb: &mut [(char, u8)])13 fn canonical_sort(comb: &mut [(char, u8)]) {
14 let len = comb.len();
15 for i in 0..len {
16 let mut swapped = false;
17 for j in 1..len-i {
18 let class_a = comb[j-1].1;
19 let class_b = comb[j].1;
20 if class_a != 0 && class_b != 0 && class_a > class_b {
21 comb.swap(j-1, j);
22 swapped = true;
23 }
24 }
25 if !swapped { break; }
26 }
27 }
28
29 #[derive(Clone)]
30 enum DecompositionType {
31 Canonical,
32 Compatible
33 }
34
35 /// External iterator for a string decomposition's characters.
36 #[derive(Clone)]
37 pub struct Decompositions<I> {
38 kind: DecompositionType,
39 iter: I,
40 buffer: Vec<(char, u8)>,
41 sorted: bool
42 }
43
44 #[inline]
new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>45 pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
46 Decompositions {
47 iter: iter,
48 buffer: Vec::new(),
49 sorted: false,
50 kind: self::DecompositionType::Canonical,
51 }
52 }
53
54 #[inline]
new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>55 pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
56 Decompositions {
57 iter: iter,
58 buffer: Vec::new(),
59 sorted: false,
60 kind: self::DecompositionType::Compatible,
61 }
62 }
63
64 impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
65 type Item = char;
66
67 #[inline]
next(&mut self) -> Option<char>68 fn next(&mut self) -> Option<char> {
69 use self::DecompositionType::*;
70
71 match self.buffer.first() {
72 Some(&(c, 0)) => {
73 self.sorted = false;
74 self.buffer.remove(0);
75 return Some(c);
76 }
77 Some(&(c, _)) if self.sorted => {
78 self.buffer.remove(0);
79 return Some(c);
80 }
81 _ => self.sorted = false
82 }
83
84 if !self.sorted {
85 for ch in self.iter.by_ref() {
86 let buffer = &mut self.buffer;
87 let sorted = &mut self.sorted;
88 {
89 let callback = |d| {
90 let class =
91 super::char::canonical_combining_class(d);
92 if class == 0 && !*sorted {
93 canonical_sort(buffer);
94 *sorted = true;
95 }
96 buffer.push((d, class));
97 };
98 match self.kind {
99 Canonical => {
100 super::char::decompose_canonical(ch, callback)
101 }
102 Compatible => {
103 super::char::decompose_compatible(ch, callback)
104 }
105 }
106 }
107 if *sorted {
108 break
109 }
110 }
111 }
112
113 if !self.sorted {
114 canonical_sort(&mut self.buffer);
115 self.sorted = true;
116 }
117
118 if self.buffer.is_empty() {
119 None
120 } else {
121 match self.buffer.remove(0) {
122 (c, 0) => {
123 self.sorted = false;
124 Some(c)
125 }
126 (c, _) => Some(c),
127 }
128 }
129 }
130
size_hint(&self) -> (usize, Option<usize>)131 fn size_hint(&self) -> (usize, Option<usize>) {
132 let (lower, _) = self.iter.size_hint();
133 (lower, None)
134 }
135 }
136