1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 use std::fmt::{self, Write};
12 
13 // Helper functions used for Unicode normalization
canonical_sort(comb: &mut [(char, u8)])14 fn canonical_sort(comb: &mut [(char, u8)]) {
15     let len = comb.len();
16     for i in 0..len {
17         let mut swapped = false;
18         for j in 1..len-i {
19             let class_a = comb[j-1].1;
20             let class_b = comb[j].1;
21             if class_a != 0 && class_b != 0 && class_a > class_b {
22                 comb.swap(j-1, j);
23                 swapped = true;
24             }
25         }
26         if !swapped { break; }
27     }
28 }
29 
30 #[derive(Clone)]
31 enum DecompositionType {
32     Canonical,
33     Compatible
34 }
35 
36 /// External iterator for a string decomposition's characters.
37 #[derive(Clone)]
38 pub struct Decompositions<I> {
39     kind: DecompositionType,
40     iter: I,
41     buffer: Vec<(char, u8)>,
42     sorted: bool
43 }
44 
45 #[inline]
new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>46 pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
47     Decompositions {
48         iter: iter,
49         buffer: Vec::new(),
50         sorted: false,
51         kind: self::DecompositionType::Canonical,
52     }
53 }
54 
55 #[inline]
new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I>56 pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
57     Decompositions {
58         iter: iter,
59         buffer: Vec::new(),
60         sorted: false,
61         kind: self::DecompositionType::Compatible,
62     }
63 }
64 
65 impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
66     type Item = char;
67 
68     #[inline]
next(&mut self) -> Option<char>69     fn next(&mut self) -> Option<char> {
70         use self::DecompositionType::*;
71 
72         match self.buffer.first() {
73             Some(&(c, 0)) => {
74                 self.sorted = false;
75                 self.buffer.remove(0);
76                 return Some(c);
77             }
78             Some(&(c, _)) if self.sorted => {
79                 self.buffer.remove(0);
80                 return Some(c);
81             }
82             _ => self.sorted = false
83         }
84 
85         if !self.sorted {
86             for ch in self.iter.by_ref() {
87                 let buffer = &mut self.buffer;
88                 let sorted = &mut self.sorted;
89                 {
90                     let callback = |d| {
91                         let class =
92                             super::char::canonical_combining_class(d);
93                         if class == 0 && !*sorted {
94                             canonical_sort(buffer);
95                             *sorted = true;
96                         }
97                         buffer.push((d, class));
98                     };
99                     match self.kind {
100                         Canonical => {
101                             super::char::decompose_canonical(ch, callback)
102                         }
103                         Compatible => {
104                             super::char::decompose_compatible(ch, callback)
105                         }
106                     }
107                 }
108                 if *sorted {
109                     break
110                 }
111             }
112         }
113 
114         if !self.sorted {
115             canonical_sort(&mut self.buffer);
116             self.sorted = true;
117         }
118 
119         if self.buffer.is_empty() {
120             None
121         } else {
122             match self.buffer.remove(0) {
123                 (c, 0) => {
124                     self.sorted = false;
125                     Some(c)
126                 }
127                 (c, _) => Some(c),
128             }
129         }
130     }
131 
size_hint(&self) -> (usize, Option<usize>)132     fn size_hint(&self) -> (usize, Option<usize>) {
133         let (lower, _) = self.iter.size_hint();
134         (lower, None)
135     }
136 }
137 
138 impl<I: Iterator<Item=char> + Clone> fmt::Display for Decompositions<I> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result139     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
140         for c in self.clone() {
141             f.write_char(c)?;
142         }
143         Ok(())
144     }
145 }
146