1 extern crate packed_simd;
2 
3 #[cfg(not(feature = "runtime-dispatch-simd"))]
4 use core::mem;
5 #[cfg(feature = "runtime-dispatch-simd")]
6 use std::mem;
7 
8 use self::packed_simd::{u8x32, u8x64, FromCast};
9 
10 const MASK: [u8; 64] = [
11     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
14     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
15 ];
16 
17 unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 {
18     u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
19 }
20 unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 {
21     u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
22 }
23 
24 fn sum_x64(u8s: &u8x64) -> usize {
25     let mut store = [0; mem::size_of::<u8x64>()];
26     u8s.write_to_slice_unaligned(&mut store);
27     store.iter().map(|&e| e as usize).sum()
28 }
29 fn sum_x32(u8s: &u8x32) -> usize {
30     let mut store = [0; mem::size_of::<u8x32>()];
31     u8s.write_to_slice_unaligned(&mut store);
32     store.iter().map(|&e| e as usize).sum()
33 }
34 
35 pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
36     assert!(haystack.len() >= 32);
37 
38     unsafe {
39         let mut offset = 0;
40         let mut count = 0;
41 
42         let needles_x64 = u8x64::splat(needle);
43 
44         // 16320
45         while haystack.len() >= offset + 64 * 255 {
46             let mut counts = u8x64::splat(0);
47             for _ in 0..255 {
48                 counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
49                 offset += 64;
50             }
51             count += sum_x64(&counts);
52         }
53 
54         // 8192
55         if haystack.len() >= offset + 64 * 128 {
56             let mut counts = u8x64::splat(0);
57             for _ in 0..128 {
58                 counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
59                 offset += 64;
60             }
61             count += sum_x64(&counts);
count(haystack: &[u8], needle: u8) -> usize62         }
63 
64         let needles_x32 = u8x32::splat(needle);
65 
66         // 32
67         let mut counts = u8x32::splat(0);
68         for i in 0..(haystack.len() - offset) / 32 {
69             counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32));
70         }
71         count += sum_x32(&counts);
72 
73         // Straggler; need to reset counts because prior loop can run 255 times
74         counts = u8x32::splat(0);
75         if haystack.len() % 32 != 0 {
76             counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) &
77                       u8x32_from_offset(&MASK, haystack.len() % 32);
78         }
79         count += sum_x32(&counts);
80 
81         count
82     }
83 }
84 
85 fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 {
86     u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000)))
87 }
88 
89 fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 {
90     u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000)))
91 }
92 
93 pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
94     assert!(utf8_chars.len() >= 32);
95 
96     unsafe {
97         let mut offset = 0;
98         let mut count = 0;
99 
100         // 16320
101         while utf8_chars.len() >= offset + 64 * 255 {
102             let mut counts = u8x64::splat(0);
103             for _ in 0..255 {
104                 counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset));
105                 offset += 64;
106             }
num_chars(utf8_chars: &[u8]) -> usize107             count += sum_x64(&counts);
108         }
109 
110         // 8192
111         if utf8_chars.len() >= offset + 64 * 128 {
112             let mut counts = u8x64::splat(0);
113             for _ in 0..128 {
114                 counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset));
115                 offset += 64;
116             }
117             count += sum_x64(&counts);
118         }
119 
120         // 32
121         let mut counts = u8x32::splat(0);
122         for i in 0..(utf8_chars.len() - offset) / 32 {
123             counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, offset + i * 32));
124         }
125         count += sum_x32(&counts);
126 
127         // Straggler; need to reset counts because prior loop can run 255 times
128         counts = u8x32::splat(0);
129         if utf8_chars.len() % 32 != 0 {
130             counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) &
131                       u8x32_from_offset(&MASK, utf8_chars.len() % 32);
132         }
133         count += sum_x32(&counts);
134 
135         count
136     }
137 }
138