1 #[cfg(__unicase__iter_cmp)]
2 use std::cmp::Ordering;
3 use std::hash::{Hash, Hasher};
4 
5 use self::map::lookup;
6 mod map;
7 
8 #[derive(Clone, Copy, Debug, Default)]
9 pub struct Unicode<S>(pub S);
10 
11 impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12     #[inline]
eq(&self, other: &Unicode<S2>) -> bool13     fn eq(&self, other: &Unicode<S2>) -> bool {
14         self.0.as_ref().chars().flat_map(lookup)
15             .zip(other.0.as_ref().chars().flat_map(lookup))
16             .all(|(a, b)| a == b)
17     }
18 }
19 
20 impl<S: AsRef<str>> Eq for Unicode<S> {}
21 
22 #[cfg(__unicase__iter_cmp)]
23 impl<T: AsRef<str>> PartialOrd for Unicode<T> {
24     #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>25     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
26         Some(self.cmp(other))
27     }
28 }
29 
30 #[cfg(__unicase__iter_cmp)]
31 impl<T: AsRef<str>> Ord for Unicode<T> {
32     #[inline]
cmp(&self, other: &Self) -> Ordering33     fn cmp(&self, other: &Self) -> Ordering {
34         let self_chars = self.0.as_ref().chars().flat_map(lookup);
35         let other_chars = other.0.as_ref().chars().flat_map(lookup);
36         self_chars.cmp(other_chars)
37     }
38 }
39 
40 impl<S: AsRef<str>> Hash for Unicode<S> {
41     #[inline]
hash<H: Hasher>(&self, hasher: &mut H)42     fn hash<H: Hasher>(&self, hasher: &mut H) {
43         let mut buf = [0; 4];
44         for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
45             let len = char_to_utf8(c, &mut buf);
46             hasher.write(&buf[..len])
47         }
48     }
49 }
50 
51 #[inline]
char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize52 fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
53     const TAG_CONT: u8    = 0b1000_0000;
54     const TAG_TWO_B: u8   = 0b1100_0000;
55     const TAG_THREE_B: u8 = 0b1110_0000;
56     const TAG_FOUR_B: u8  = 0b1111_0000;
57 
58     let code = c as u32;
59     if code <= 0x7F {
60         dst[0] = code as u8;
61         1
62     } else if code <= 0x7FF {
63         dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
64         dst[1] = (code & 0x3F) as u8 | TAG_CONT;
65         2
66     } else if code <= 0xFFFF {
67         dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
68         dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
69         dst[2] = (code & 0x3F) as u8 | TAG_CONT;
70         3
71     } else {
72         dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
73         dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
74         dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
75         dst[3] = (code & 0x3F) as u8 | TAG_CONT;
76         4
77     }
78 }
79 
80 // internal mod so that the enum can be 'pub'
81 // thanks privacy-checker :___(
82 mod fold {
83     #[derive(Clone, Copy)]
84     pub enum Fold {
85         Zero,
86         One(char),
87         Two(char, char),
88         Three(char, char, char),
89     }
90 
91     impl Iterator for Fold {
92         type Item = char;
93         #[inline]
next(&mut self) -> Option<char>94         fn next(&mut self) -> Option<char> {
95             match *self {
96                 Fold::Zero => None,
97                 Fold::One(one) => {
98                     *self = Fold::Zero;
99                     Some(one)
100                 },
101                 Fold::Two(one, two) => {
102                     *self = Fold::One(two);
103                     Some(one)
104                 },
105                 Fold::Three(one, two, three) => {
106                     *self = Fold::Two(one, two);
107                     Some(three)
108                 }
109             }
110         }
111 
112         #[inline]
size_hint(&self) -> (usize, Option<usize>)113         fn size_hint(&self) -> (usize, Option<usize>) {
114             match *self {
115                 Fold::Zero => (0, Some(0)),
116                 Fold::One(..) => (1, Some(1)),
117                 Fold::Two(..) => (2, Some(2)),
118                 Fold::Three(..) => (3, Some(3))
119             }
120         }
121 
122     }
123     impl From<(char,)> for Fold {
124         #[inline]
from((one,): (char,)) -> Fold125         fn from((one,): (char,)) -> Fold {
126             Fold::One(one)
127         }
128     }
129 
130     impl From<(char, char)> for Fold {
131         #[inline]
from((one, two): (char, char)) -> Fold132         fn from((one, two): (char, char)) -> Fold {
133             Fold::Two(one, two)
134         }
135     }
136 
137     impl From<(char, char, char)> for Fold {
138         #[inline]
from((one, two, three): (char, char, char)) -> Fold139         fn from((one, two, three): (char, char, char)) -> Fold {
140             Fold::Three(one, two, three)
141         }
142     }
143 }
144 
145 #[cfg(test)]
146 mod tests {
147     use super::Unicode;
148 
149     macro_rules! eq {
150         ($left:expr, $right:expr) => ({
151             assert_eq!(Unicode($left), Unicode($right));
152         });
153     }
154 
155     #[test]
test_ascii_folding()156     fn test_ascii_folding() {
157         eq!("foo bar", "FoO BAR");
158     }
159 
160     #[test]
test_simple_case_folding()161     fn test_simple_case_folding() {
162         eq!("στιγμας", "στιγμασ");
163     }
164 
165     #[test]
test_full_case_folding()166     fn test_full_case_folding() {
167         eq!("flour", "flour");
168         eq!("Maße", "MASSE");
169         eq!("ᾲ στο διάολο", "ὰι στο διάολο");
170     }
171 
172     #[cfg(feature = "nightly")]
173     #[bench]
bench_ascii_folding(b: &mut ::test::Bencher)174     fn bench_ascii_folding(b: &mut ::test::Bencher) {
175         b.bytes = b"foo bar".len() as u64;
176         b.iter(|| eq!("foo bar", "FoO BAR"));
177     }
178 
179     #[cfg(feature = "nightly")]
180     #[bench]
bench_simple_case_folding(b: &mut ::test::Bencher)181     fn bench_simple_case_folding(b: &mut ::test::Bencher) {
182         b.bytes = "στιγμας".len() as u64;
183         b.iter(|| eq!("στιγμας", "στιγμασ"));
184     }
185 }
186