1 #[cfg(__unicase__iter_cmp)]
2 use core::cmp::Ordering;
3 use core::hash::{Hash, Hasher};
4 
5 use self::map::lookup;
6 mod map;
7 
8 #[derive(Clone, Copy, Debug, Default)]
9 pub struct Unicode<S>(pub S);
10 
11 impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12     #[inline]
eq(&self, other: &Unicode<S2>) -> bool13     fn eq(&self, other: &Unicode<S2>) -> bool {
14         let mut left = self.0.as_ref().chars().flat_map(lookup);
15         let mut right = other.0.as_ref().chars().flat_map(lookup);
16 
17         // inline Iterator::eq since not added until Rust 1.5
18         loop {
19             let x = match left.next() {
20                 None => return right.next().is_none(),
21                 Some(val) => val,
22             };
23 
24             let y = match right.next() {
25                 None => return false,
26                 Some(val) => val,
27             };
28 
29             if x != y {
30                 return false;
31             }
32         }
33     }
34 }
35 
36 impl<S: AsRef<str>> Eq for Unicode<S> {}
37 
38 #[cfg(__unicase__iter_cmp)]
39 impl<T: AsRef<str>> PartialOrd for Unicode<T> {
40     #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>41     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42         Some(self.cmp(other))
43     }
44 }
45 
46 #[cfg(__unicase__iter_cmp)]
47 impl<T: AsRef<str>> Ord for Unicode<T> {
48     #[inline]
cmp(&self, other: &Self) -> Ordering49     fn cmp(&self, other: &Self) -> Ordering {
50         let self_chars = self.0.as_ref().chars().flat_map(lookup);
51         let other_chars = other.0.as_ref().chars().flat_map(lookup);
52         self_chars.cmp(other_chars)
53     }
54 }
55 
56 impl<S: AsRef<str>> Hash for Unicode<S> {
57     #[inline]
hash<H: Hasher>(&self, hasher: &mut H)58     fn hash<H: Hasher>(&self, hasher: &mut H) {
59         let mut buf = [0; 4];
60         for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
61             let len = char_to_utf8(c, &mut buf);
62             hasher.write(&buf[..len])
63         }
64     }
65 }
66 
67 #[inline]
char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize68 fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
69     const TAG_CONT: u8    = 0b1000_0000;
70     const TAG_TWO_B: u8   = 0b1100_0000;
71     const TAG_THREE_B: u8 = 0b1110_0000;
72     const TAG_FOUR_B: u8  = 0b1111_0000;
73 
74     let code = c as u32;
75     if code <= 0x7F {
76         dst[0] = code as u8;
77         1
78     } else if code <= 0x7FF {
79         dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
80         dst[1] = (code & 0x3F) as u8 | TAG_CONT;
81         2
82     } else if code <= 0xFFFF {
83         dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
84         dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
85         dst[2] = (code & 0x3F) as u8 | TAG_CONT;
86         3
87     } else {
88         dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
89         dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
90         dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
91         dst[3] = (code & 0x3F) as u8 | TAG_CONT;
92         4
93     }
94 }
95 
96 // internal mod so that the enum can be 'pub'
97 // thanks privacy-checker :___(
98 mod fold {
99     #[derive(Clone, Copy)]
100     pub enum Fold {
101         Zero,
102         One(char),
103         Two(char, char),
104         Three(char, char, char),
105     }
106 
107     impl Iterator for Fold {
108         type Item = char;
109         #[inline]
next(&mut self) -> Option<char>110         fn next(&mut self) -> Option<char> {
111             match *self {
112                 Fold::Zero => None,
113                 Fold::One(one) => {
114                     *self = Fold::Zero;
115                     Some(one)
116                 },
117                 Fold::Two(one, two) => {
118                     *self = Fold::One(two);
119                     Some(one)
120                 },
121                 Fold::Three(one, two, three) => {
122                     *self = Fold::Two(one, two);
123                     Some(three)
124                 }
125             }
126         }
127 
128         #[inline]
size_hint(&self) -> (usize, Option<usize>)129         fn size_hint(&self) -> (usize, Option<usize>) {
130             match *self {
131                 Fold::Zero => (0, Some(0)),
132                 Fold::One(..) => (1, Some(1)),
133                 Fold::Two(..) => (2, Some(2)),
134                 Fold::Three(..) => (3, Some(3))
135             }
136         }
137 
138     }
139     impl From<(char,)> for Fold {
140         #[inline]
from((one,): (char,)) -> Fold141         fn from((one,): (char,)) -> Fold {
142             Fold::One(one)
143         }
144     }
145 
146     impl From<(char, char)> for Fold {
147         #[inline]
from((one, two): (char, char)) -> Fold148         fn from((one, two): (char, char)) -> Fold {
149             Fold::Two(one, two)
150         }
151     }
152 
153     impl From<(char, char, char)> for Fold {
154         #[inline]
from((one, two, three): (char, char, char)) -> Fold155         fn from((one, two, three): (char, char, char)) -> Fold {
156             Fold::Three(one, two, three)
157         }
158     }
159 }
160 
161 #[cfg(test)]
162 mod tests {
163     use super::Unicode;
164 
165     macro_rules! eq {
166         ($left:expr, $right:expr) => ({
167             assert_eq!(Unicode($left), Unicode($right));
168         });
169     }
170 
171     #[test]
test_ascii_folding()172     fn test_ascii_folding() {
173         eq!("foo bar", "FoO BAR");
174     }
175 
176     #[test]
test_simple_case_folding()177     fn test_simple_case_folding() {
178         eq!("στιγμας", "στιγμασ");
179     }
180 
181     #[test]
test_full_case_folding()182     fn test_full_case_folding() {
183         eq!("flour", "flour");
184         eq!("Maße", "MASSE");
185         eq!("ᾲ στο διάολο", "ὰι στο διάολο");
186     }
187 
188     #[cfg(feature = "nightly")]
189     #[bench]
bench_ascii_folding(b: &mut ::test::Bencher)190     fn bench_ascii_folding(b: &mut ::test::Bencher) {
191         b.bytes = b"foo bar".len() as u64;
192         b.iter(|| eq!("foo bar", "FoO BAR"));
193     }
194 
195     #[cfg(feature = "nightly")]
196     #[bench]
bench_simple_case_folding(b: &mut ::test::Bencher)197     fn bench_simple_case_folding(b: &mut ::test::Bencher) {
198         b.bytes = "στιγμας".len() as u64;
199         b.iter(|| eq!("στιγμας", "στιγμασ"));
200     }
201 }
202