1 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
2 
3 pub trait AndNot {
4     type Output;
andnot(self, rhs: Self) -> Self::Output5     fn andnot(self, rhs: Self) -> Self::Output;
6 }
7 pub trait BSwap {
bswap(self) -> Self8     fn bswap(self) -> Self;
9 }
10 /// Ops that depend on word size
11 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
12 /// Ops that are independent of word size and endian
13 pub trait BitOps0:
14     BitAnd<Output = Self>
15     + BitOr<Output = Self>
16     + BitXor<Output = Self>
17     + BitXorAssign
18     + Not<Output = Self>
19     + AndNot<Output = Self>
20     + Sized
21     + Copy
22     + Clone
23 {
24 }
25 
26 pub trait BitOps32: BitOps0 + RotateEachWord32 {}
27 pub trait BitOps64: BitOps32 + RotateEachWord64 {}
28 pub trait BitOps128: BitOps64 + RotateEachWord128 {}
29 
30 pub trait RotateEachWord32 {
rotate_each_word_right7(self) -> Self31     fn rotate_each_word_right7(self) -> Self;
rotate_each_word_right8(self) -> Self32     fn rotate_each_word_right8(self) -> Self;
rotate_each_word_right11(self) -> Self33     fn rotate_each_word_right11(self) -> Self;
rotate_each_word_right12(self) -> Self34     fn rotate_each_word_right12(self) -> Self;
rotate_each_word_right16(self) -> Self35     fn rotate_each_word_right16(self) -> Self;
rotate_each_word_right20(self) -> Self36     fn rotate_each_word_right20(self) -> Self;
rotate_each_word_right24(self) -> Self37     fn rotate_each_word_right24(self) -> Self;
rotate_each_word_right25(self) -> Self38     fn rotate_each_word_right25(self) -> Self;
39 }
40 
41 pub trait RotateEachWord64 {
rotate_each_word_right32(self) -> Self42     fn rotate_each_word_right32(self) -> Self;
43 }
44 
45 pub trait RotateEachWord128 {}
46 
47 #[allow(non_camel_case_types)]
48 mod types {
49     //! Vector type naming scheme:
50     //! uN[xP]xL
51     //! Unsigned; N-bit words * P bits per lane * L lanes
52     //!
53     //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
54     //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
55     //! slow inter-lane operations.
56 
57     use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
58     use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
59 
60     pub trait UnsafeFrom<T> {
unsafe_from(t: T) -> Self61         unsafe fn unsafe_from(t: T) -> Self;
62     }
63 
64     /// A vector composed of two elements, which may be words or themselves vectors.
65     pub trait Vec2<W> {
extract(self, i: u32) -> W66         fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self67         fn insert(self, w: W, i: u32) -> Self;
68     }
69 
70     /// A vector composed of four elements, which may be words or themselves vectors.
71     pub trait Vec4<W> {
extract(self, i: u32) -> W72         fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self73         fn insert(self, w: W, i: u32) -> Self;
74     }
75 
76     // TODO: multiples of 4 should inherit this
77     /// A vector composed of four words; depending on their size, operations may cross lanes.
78     pub trait Words4 {
shuffle1230(self) -> Self79         fn shuffle1230(self) -> Self;
shuffle2301(self) -> Self80         fn shuffle2301(self) -> Self;
shuffle3012(self) -> Self81         fn shuffle3012(self) -> Self;
82     }
83 
84     /// A vector composed one or more lanes each composed of four words.
85     pub trait LaneWords4 {
shuffle_lane_words1230(self) -> Self86         fn shuffle_lane_words1230(self) -> Self;
shuffle_lane_words2301(self) -> Self87         fn shuffle_lane_words2301(self) -> Self;
shuffle_lane_words3012(self) -> Self88         fn shuffle_lane_words3012(self) -> Self;
89     }
90 
91     // TODO: make this a part of BitOps
92     /// Exchange neigboring ranges of bits of the specified size
93     pub trait Swap64 {
swap1(self) -> Self94         fn swap1(self) -> Self;
swap2(self) -> Self95         fn swap2(self) -> Self;
swap4(self) -> Self96         fn swap4(self) -> Self;
swap8(self) -> Self97         fn swap8(self) -> Self;
swap16(self) -> Self98         fn swap16(self) -> Self;
swap32(self) -> Self99         fn swap32(self) -> Self;
swap64(self) -> Self100         fn swap64(self) -> Self;
101     }
102 
103     pub trait u32x4<M: Machine>:
104         BitOps32
105         + Store<vec128_storage>
106         + ArithOps
107         + Vec4<u32>
108         + Words4
109         + LaneWords4
110         + StoreBytes
111         + MultiLane<[u32; 4]>
112         + Into<vec128_storage>
113     {
114 }
115     pub trait u64x2<M: Machine>:
116         BitOps64
117         + Store<vec128_storage>
118         + ArithOps
119         + Vec2<u64>
120         + MultiLane<[u64; 2]>
121         + Into<vec128_storage>
122     {
123 }
124     pub trait u128x1<M: Machine>:
125         BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
126     {
127 }
128 
129     pub trait u32x4x2<M: Machine>:
130         BitOps32
131         + Store<vec256_storage>
132         + Vec2<M::u32x4>
133         + MultiLane<[M::u32x4; 2]>
134         + ArithOps
135         + Into<vec256_storage>
136     {
137 }
138     pub trait u64x2x2<M: Machine>:
139         BitOps64
140         + Store<vec256_storage>
141         + Vec2<M::u64x2>
142         + MultiLane<[M::u64x2; 2]>
143         + ArithOps
144         + StoreBytes
145         + Into<vec256_storage>
146     {
147 }
148     pub trait u64x4<M: Machine>:
149         BitOps64
150         + Store<vec256_storage>
151         + Vec4<u64>
152         + MultiLane<[u64; 4]>
153         + ArithOps
154         + Words4
155         + StoreBytes
156         + Into<vec256_storage>
157     {
158 }
159     pub trait u128x2<M: Machine>:
160         BitOps128
161         + Store<vec256_storage>
162         + Vec2<M::u128x1>
163         + MultiLane<[M::u128x1; 2]>
164         + Swap64
165         + Into<vec256_storage>
166     {
167 }
168 
169     pub trait u32x4x4<M: Machine>:
170         BitOps32
171         + Store<vec512_storage>
172         + Vec4<M::u32x4>
173         + MultiLane<[M::u32x4; 4]>
174         + ArithOps
175         + LaneWords4
176         + Into<vec512_storage>
177     {
178 }
179     pub trait u64x2x4<M: Machine>:
180         BitOps64
181         + Store<vec512_storage>
182         + Vec4<M::u64x2>
183         + MultiLane<[M::u64x2; 4]>
184         + ArithOps
185         + Into<vec512_storage>
186     {
187 }
188     // TODO: Words4
189     pub trait u128x4<M: Machine>:
190         BitOps128
191         + Store<vec512_storage>
192         + Vec4<M::u128x1>
193         + MultiLane<[M::u128x1; 4]>
194         + Swap64
195         + Into<vec512_storage>
196     {
197 }
198 
199     /// A vector composed of multiple 128-bit lanes.
200     pub trait MultiLane<Lanes> {
201         /// Split a multi-lane vector into single-lane vectors.
to_lanes(self) -> Lanes202         fn to_lanes(self) -> Lanes;
203         /// Build a multi-lane vector from individual lanes.
from_lanes(lanes: Lanes) -> Self204         fn from_lanes(lanes: Lanes) -> Self;
205     }
206 
207     /// Combine single vectors into a multi-lane vector.
208     pub trait VZip<V> {
vzip(self) -> V209         fn vzip(self) -> V;
210     }
211 
212     impl<V, T> VZip<V> for T
213     where
214         V: MultiLane<T>,
215     {
216         #[inline(always)]
vzip(self) -> V217         fn vzip(self) -> V {
218             V::from_lanes(self)
219         }
220     }
221 }
222 pub use self::types::*;
223 
224 pub trait Machine: Sized + Copy {
225     type u32x4: u32x4<Self>;
226     type u64x2: u64x2<Self>;
227     type u128x1: u128x1<Self>;
228 
229     type u32x4x2: u32x4x2<Self>;
230     type u64x2x2: u64x2x2<Self>;
231     type u64x4: u64x4<Self>;
232     type u128x2: u128x2<Self>;
233 
234     type u32x4x4: u32x4x4<Self>;
235     type u64x2x4: u64x2x4<Self>;
236     type u128x4: u128x4<Self>;
237 
238     #[inline(always)]
unpack<S, V: Store<S>>(self, s: S) -> V239     fn unpack<S, V: Store<S>>(self, s: S) -> V {
240         unsafe { V::unpack(s) }
241     }
242 
243     #[inline(always)]
vec<V, A>(self, a: A) -> V where V: MultiLane<A>,244     fn vec<V, A>(self, a: A) -> V
245     where
246         V: MultiLane<A>,
247     {
248         V::from_lanes(a)
249     }
250 
251     #[inline(always)]
read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,252     fn read_le<V>(self, input: &[u8]) -> V
253     where
254         V: StoreBytes,
255     {
256         unsafe { V::unsafe_read_le(input) }
257     }
258 
259     #[inline(always)]
read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,260     fn read_be<V>(self, input: &[u8]) -> V
261     where
262         V: StoreBytes,
263     {
264         unsafe { V::unsafe_read_be(input) }
265     }
266 
instance() -> Self267     unsafe fn instance() -> Self;
268 }
269 
270 pub trait Store<S> {
unpack(p: S) -> Self271     unsafe fn unpack(p: S) -> Self;
272 }
273 
274 pub trait StoreBytes {
unsafe_read_le(input: &[u8]) -> Self275     unsafe fn unsafe_read_le(input: &[u8]) -> Self;
unsafe_read_be(input: &[u8]) -> Self276     unsafe fn unsafe_read_be(input: &[u8]) -> Self;
write_le(self, out: &mut [u8])277     fn write_le(self, out: &mut [u8]);
write_be(self, out: &mut [u8])278     fn write_be(self, out: &mut [u8]);
279 }
280