1 #[cfg(__unicase__iter_cmp)]
2 use std::cmp::Ordering;
3 use std::hash::{Hash, Hasher};
4
5 use self::map::lookup;
6 mod map;
7
8 #[derive(Clone, Copy, Debug, Default)]
9 pub struct Unicode<S>(pub S);
10
11 impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12 #[inline]
eq(&self, other: &Unicode<S2>) -> bool13 fn eq(&self, other: &Unicode<S2>) -> bool {
14 self.0.as_ref().chars().flat_map(lookup)
15 .zip(other.0.as_ref().chars().flat_map(lookup))
16 .all(|(a, b)| a == b)
17 }
18 }
19
20 impl<S: AsRef<str>> Eq for Unicode<S> {}
21
22 #[cfg(__unicase__iter_cmp)]
23 impl<T: AsRef<str>> PartialOrd for Unicode<T> {
24 #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>25 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
26 Some(self.cmp(other))
27 }
28 }
29
30 #[cfg(__unicase__iter_cmp)]
31 impl<T: AsRef<str>> Ord for Unicode<T> {
32 #[inline]
cmp(&self, other: &Self) -> Ordering33 fn cmp(&self, other: &Self) -> Ordering {
34 let self_chars = self.0.as_ref().chars().flat_map(lookup);
35 let other_chars = other.0.as_ref().chars().flat_map(lookup);
36 self_chars.cmp(other_chars)
37 }
38 }
39
40 impl<S: AsRef<str>> Hash for Unicode<S> {
41 #[inline]
hash<H: Hasher>(&self, hasher: &mut H)42 fn hash<H: Hasher>(&self, hasher: &mut H) {
43 let mut buf = [0; 4];
44 for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
45 let len = char_to_utf8(c, &mut buf);
46 hasher.write(&buf[..len])
47 }
48 }
49 }
50
51 #[inline]
char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize52 fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
53 const TAG_CONT: u8 = 0b1000_0000;
54 const TAG_TWO_B: u8 = 0b1100_0000;
55 const TAG_THREE_B: u8 = 0b1110_0000;
56 const TAG_FOUR_B: u8 = 0b1111_0000;
57
58 let code = c as u32;
59 if code <= 0x7F {
60 dst[0] = code as u8;
61 1
62 } else if code <= 0x7FF {
63 dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
64 dst[1] = (code & 0x3F) as u8 | TAG_CONT;
65 2
66 } else if code <= 0xFFFF {
67 dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
68 dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
69 dst[2] = (code & 0x3F) as u8 | TAG_CONT;
70 3
71 } else {
72 dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
73 dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
74 dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
75 dst[3] = (code & 0x3F) as u8 | TAG_CONT;
76 4
77 }
78 }
79
80 // internal mod so that the enum can be 'pub'
81 // thanks privacy-checker :___(
82 mod fold {
83 #[derive(Clone, Copy)]
84 pub enum Fold {
85 Zero,
86 One(char),
87 Two(char, char),
88 Three(char, char, char),
89 }
90
91 impl Iterator for Fold {
92 type Item = char;
93 #[inline]
next(&mut self) -> Option<char>94 fn next(&mut self) -> Option<char> {
95 match *self {
96 Fold::Zero => None,
97 Fold::One(one) => {
98 *self = Fold::Zero;
99 Some(one)
100 },
101 Fold::Two(one, two) => {
102 *self = Fold::One(two);
103 Some(one)
104 },
105 Fold::Three(one, two, three) => {
106 *self = Fold::Two(one, two);
107 Some(three)
108 }
109 }
110 }
111
112 #[inline]
size_hint(&self) -> (usize, Option<usize>)113 fn size_hint(&self) -> (usize, Option<usize>) {
114 match *self {
115 Fold::Zero => (0, Some(0)),
116 Fold::One(..) => (1, Some(1)),
117 Fold::Two(..) => (2, Some(2)),
118 Fold::Three(..) => (3, Some(3))
119 }
120 }
121
122 }
123 impl From<(char,)> for Fold {
124 #[inline]
from((one,): (char,)) -> Fold125 fn from((one,): (char,)) -> Fold {
126 Fold::One(one)
127 }
128 }
129
130 impl From<(char, char)> for Fold {
131 #[inline]
from((one, two): (char, char)) -> Fold132 fn from((one, two): (char, char)) -> Fold {
133 Fold::Two(one, two)
134 }
135 }
136
137 impl From<(char, char, char)> for Fold {
138 #[inline]
from((one, two, three): (char, char, char)) -> Fold139 fn from((one, two, three): (char, char, char)) -> Fold {
140 Fold::Three(one, two, three)
141 }
142 }
143 }
144
145 #[cfg(test)]
146 mod tests {
147 use super::Unicode;
148
149 macro_rules! eq {
150 ($left:expr, $right:expr) => ({
151 assert_eq!(Unicode($left), Unicode($right));
152 });
153 }
154
155 #[test]
test_ascii_folding()156 fn test_ascii_folding() {
157 eq!("foo bar", "FoO BAR");
158 }
159
160 #[test]
test_simple_case_folding()161 fn test_simple_case_folding() {
162 eq!("στιγμας", "στιγμασ");
163 }
164
165 #[test]
test_full_case_folding()166 fn test_full_case_folding() {
167 eq!("flour", "flour");
168 eq!("Maße", "MASSE");
169 eq!("ᾲ στο διάολο", "ὰι στο διάολο");
170 }
171
172 #[cfg(feature = "nightly")]
173 #[bench]
bench_ascii_folding(b: &mut ::test::Bencher)174 fn bench_ascii_folding(b: &mut ::test::Bencher) {
175 b.bytes = b"foo bar".len() as u64;
176 b.iter(|| eq!("foo bar", "FoO BAR"));
177 }
178
179 #[cfg(feature = "nightly")]
180 #[bench]
bench_simple_case_folding(b: &mut ::test::Bencher)181 fn bench_simple_case_folding(b: &mut ::test::Bencher) {
182 b.bytes = "στιγμας".len() as u64;
183 b.iter(|| eq!("στιγμας", "στιγμασ"));
184 }
185 }
186