1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use std::fmt::{self, Write};
12
13 // Helper functions used for Unicode normalization
canonical_sort(comb: &mut [(char, u8)])14 fn canonical_sort(comb: &mut [(char, u8)]) {
15 let len = comb.len();
16 for i in 0..len {
17 let mut swapped = false;
18 for j in 1..len-i {
19 let class_a = comb[j-1].1;
20 let class_b = comb[j].1;
21 if class_a != 0 && class_b != 0 && class_a > class_b {
22 comb.swap(j-1, j);
23 swapped = true;
24 }
25 }
26 if !swapped { break; }
27 }
28 }
29
30 #[derive(Clone)]
31 enum DecompositionType {
32 Canonical,
33 Compatible
34 }
35
36 /// External iterator for a string decomposition's characters.
37 #[derive(Clone)]
38 pub struct Decompositions<I> {
39 kind: DecompositionType,
40 iter: I,
41 buffer: Vec<(char, u8)>,
42 sorted: bool
43 }
44
45 #[inline]
new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>46 pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
47 Decompositions {
48 iter: iter,
49 buffer: Vec::new(),
50 sorted: false,
51 kind: self::DecompositionType::Canonical,
52 }
53 }
54
55 #[inline]
new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>56 pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
57 Decompositions {
58 iter: iter,
59 buffer: Vec::new(),
60 sorted: false,
61 kind: self::DecompositionType::Compatible,
62 }
63 }
64
65 impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
66 type Item = char;
67
68 #[inline]
next(&mut self) -> Option<char>69 fn next(&mut self) -> Option<char> {
70 use self::DecompositionType::*;
71
72 match self.buffer.first() {
73 Some(&(c, 0)) => {
74 self.sorted = false;
75 self.buffer.remove(0);
76 return Some(c);
77 }
78 Some(&(c, _)) if self.sorted => {
79 self.buffer.remove(0);
80 return Some(c);
81 }
82 _ => self.sorted = false
83 }
84
85 if !self.sorted {
86 for ch in self.iter.by_ref() {
87 let buffer = &mut self.buffer;
88 let sorted = &mut self.sorted;
89 {
90 let callback = |d| {
91 let class =
92 super::char::canonical_combining_class(d);
93 if class == 0 && !*sorted {
94 canonical_sort(buffer);
95 *sorted = true;
96 }
97 buffer.push((d, class));
98 };
99 match self.kind {
100 Canonical => {
101 super::char::decompose_canonical(ch, callback)
102 }
103 Compatible => {
104 super::char::decompose_compatible(ch, callback)
105 }
106 }
107 }
108 if *sorted {
109 break
110 }
111 }
112 }
113
114 if !self.sorted {
115 canonical_sort(&mut self.buffer);
116 self.sorted = true;
117 }
118
119 if self.buffer.is_empty() {
120 None
121 } else {
122 match self.buffer.remove(0) {
123 (c, 0) => {
124 self.sorted = false;
125 Some(c)
126 }
127 (c, _) => Some(c),
128 }
129 }
130 }
131
size_hint(&self) -> (usize, Option<usize>)132 fn size_hint(&self) -> (usize, Option<usize>) {
133 let (lower, _) = self.iter.size_hint();
134 (lower, None)
135 }
136 }
137
138 impl<I: Iterator<Item=char> + Clone> fmt::Display for Decompositions<I> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result139 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
140 for c in self.clone() {
141 f.write_char(c)?;
142 }
143 Ok(())
144 }
145 }
146