1 #![allow(non_camel_case_types)]
2 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
3 
4 pub trait AndNot {
5     type Output;
andnot(self, rhs: Self) -> Self::Output6     fn andnot(self, rhs: Self) -> Self::Output;
7 }
8 pub trait BSwap {
bswap(self) -> Self9     fn bswap(self) -> Self;
10 }
11 /// Ops that depend on word size
12 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
13 /// Ops that are independent of word size and endian
14 pub trait BitOps0:
15     BitAnd<Output = Self>
16     + BitOr<Output = Self>
17     + BitXor<Output = Self>
18     + BitXorAssign
19     + Not<Output = Self>
20     + AndNot<Output = Self>
21     + Sized
22     + Copy
23     + Clone
24 {
25 }
26 
27 pub trait BitOps32: BitOps0 + RotateEachWord32 {}
28 pub trait BitOps64: BitOps32 + RotateEachWord64 {}
29 pub trait BitOps128: BitOps64 + RotateEachWord128 {}
30 
31 pub trait RotateEachWord32 {
rotate_each_word_right7(self) -> Self32     fn rotate_each_word_right7(self) -> Self;
rotate_each_word_right8(self) -> Self33     fn rotate_each_word_right8(self) -> Self;
rotate_each_word_right11(self) -> Self34     fn rotate_each_word_right11(self) -> Self;
rotate_each_word_right12(self) -> Self35     fn rotate_each_word_right12(self) -> Self;
rotate_each_word_right16(self) -> Self36     fn rotate_each_word_right16(self) -> Self;
rotate_each_word_right20(self) -> Self37     fn rotate_each_word_right20(self) -> Self;
rotate_each_word_right24(self) -> Self38     fn rotate_each_word_right24(self) -> Self;
rotate_each_word_right25(self) -> Self39     fn rotate_each_word_right25(self) -> Self;
40 }
41 
42 pub trait RotateEachWord64 {
rotate_each_word_right32(self) -> Self43     fn rotate_each_word_right32(self) -> Self;
44 }
45 
46 pub trait RotateEachWord128 {}
47 
48 // Vector type naming scheme:
49 // uN[xP]xL
50 // Unsigned; N-bit words * P bits per lane * L lanes
51 //
52 // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
53 // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
54 // slow inter-lane operations.
55 
56 use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
57 
58 #[allow(clippy::missing_safety_doc)]
59 pub trait UnsafeFrom<T> {
unsafe_from(t: T) -> Self60     unsafe fn unsafe_from(t: T) -> Self;
61 }
62 
63 /// A vector composed of two elements, which may be words or themselves vectors.
64 pub trait Vec2<W> {
extract(self, i: u32) -> W65     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self66     fn insert(self, w: W, i: u32) -> Self;
67 }
68 
69 /// A vector composed of four elements, which may be words or themselves vectors.
70 pub trait Vec4<W> {
extract(self, i: u32) -> W71     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self72     fn insert(self, w: W, i: u32) -> Self;
73 }
74 
75 // TODO: multiples of 4 should inherit this
76 /// A vector composed of four words; depending on their size, operations may cross lanes.
77 pub trait Words4 {
shuffle1230(self) -> Self78     fn shuffle1230(self) -> Self;
shuffle2301(self) -> Self79     fn shuffle2301(self) -> Self;
shuffle3012(self) -> Self80     fn shuffle3012(self) -> Self;
81 }
82 
83 /// A vector composed one or more lanes each composed of four words.
84 pub trait LaneWords4 {
shuffle_lane_words1230(self) -> Self85     fn shuffle_lane_words1230(self) -> Self;
shuffle_lane_words2301(self) -> Self86     fn shuffle_lane_words2301(self) -> Self;
shuffle_lane_words3012(self) -> Self87     fn shuffle_lane_words3012(self) -> Self;
88 }
89 
90 // TODO: make this a part of BitOps
91 /// Exchange neigboring ranges of bits of the specified size
92 pub trait Swap64 {
swap1(self) -> Self93     fn swap1(self) -> Self;
swap2(self) -> Self94     fn swap2(self) -> Self;
swap4(self) -> Self95     fn swap4(self) -> Self;
swap8(self) -> Self96     fn swap8(self) -> Self;
swap16(self) -> Self97     fn swap16(self) -> Self;
swap32(self) -> Self98     fn swap32(self) -> Self;
swap64(self) -> Self99     fn swap64(self) -> Self;
100 }
101 
102 pub trait u32x4<M: Machine>:
103     BitOps32
104     + Store<vec128_storage>
105     + ArithOps
106     + Vec4<u32>
107     + Words4
108     + LaneWords4
109     + StoreBytes
110     + MultiLane<[u32; 4]>
111     + Into<vec128_storage>
112 {
113 }
114 pub trait u64x2<M: Machine>:
115     BitOps64
116     + Store<vec128_storage>
117     + ArithOps
118     + Vec2<u64>
119     + MultiLane<[u64; 2]>
120     + Into<vec128_storage>
121 {
122 }
123 pub trait u128x1<M: Machine>:
124     BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
125 {
126 }
127 
128 pub trait u32x4x2<M: Machine>:
129     BitOps32
130     + Store<vec256_storage>
131     + Vec2<M::u32x4>
132     + MultiLane<[M::u32x4; 2]>
133     + ArithOps
134     + Into<vec256_storage>
135 {
136 }
137 pub trait u64x2x2<M: Machine>:
138     BitOps64
139     + Store<vec256_storage>
140     + Vec2<M::u64x2>
141     + MultiLane<[M::u64x2; 2]>
142     + ArithOps
143     + StoreBytes
144     + Into<vec256_storage>
145 {
146 }
147 pub trait u64x4<M: Machine>:
148     BitOps64
149     + Store<vec256_storage>
150     + Vec4<u64>
151     + MultiLane<[u64; 4]>
152     + ArithOps
153     + Words4
154     + StoreBytes
155     + Into<vec256_storage>
156 {
157 }
158 pub trait u128x2<M: Machine>:
159     BitOps128
160     + Store<vec256_storage>
161     + Vec2<M::u128x1>
162     + MultiLane<[M::u128x1; 2]>
163     + Swap64
164     + Into<vec256_storage>
165 {
166 }
167 
168 pub trait u32x4x4<M: Machine>:
169     BitOps32
170     + Store<vec512_storage>
171     + Vec4<M::u32x4>
172     + MultiLane<[M::u32x4; 4]>
173     + ArithOps
174     + LaneWords4
175     + Into<vec512_storage>
176 {
177 }
178 pub trait u64x2x4<M: Machine>:
179     BitOps64
180     + Store<vec512_storage>
181     + Vec4<M::u64x2>
182     + MultiLane<[M::u64x2; 4]>
183     + ArithOps
184     + Into<vec512_storage>
185 {
186 }
187 // TODO: Words4
188 pub trait u128x4<M: Machine>:
189     BitOps128
190     + Store<vec512_storage>
191     + Vec4<M::u128x1>
192     + MultiLane<[M::u128x1; 4]>
193     + Swap64
194     + Into<vec512_storage>
195 {
196 }
197 
198 /// A vector composed of multiple 128-bit lanes.
199 pub trait MultiLane<Lanes> {
200     /// Split a multi-lane vector into single-lane vectors.
to_lanes(self) -> Lanes201     fn to_lanes(self) -> Lanes;
202     /// Build a multi-lane vector from individual lanes.
from_lanes(lanes: Lanes) -> Self203     fn from_lanes(lanes: Lanes) -> Self;
204 }
205 
206 /// Combine single vectors into a multi-lane vector.
207 pub trait VZip<V> {
vzip(self) -> V208     fn vzip(self) -> V;
209 }
210 
211 impl<V, T> VZip<V> for T
212 where
213     V: MultiLane<T>,
214 {
215     #[inline(always)]
vzip(self) -> V216     fn vzip(self) -> V {
217         V::from_lanes(self)
218     }
219 }
220 
221 pub trait Machine: Sized + Copy {
222     type u32x4: u32x4<Self>;
223     type u64x2: u64x2<Self>;
224     type u128x1: u128x1<Self>;
225 
226     type u32x4x2: u32x4x2<Self>;
227     type u64x2x2: u64x2x2<Self>;
228     type u64x4: u64x4<Self>;
229     type u128x2: u128x2<Self>;
230 
231     type u32x4x4: u32x4x4<Self>;
232     type u64x2x4: u64x2x4<Self>;
233     type u128x4: u128x4<Self>;
234 
235     #[inline(always)]
unpack<S, V: Store<S>>(self, s: S) -> V236     fn unpack<S, V: Store<S>>(self, s: S) -> V {
237         unsafe { V::unpack(s) }
238     }
239 
240     #[inline(always)]
vec<V, A>(self, a: A) -> V where V: MultiLane<A>,241     fn vec<V, A>(self, a: A) -> V
242     where
243         V: MultiLane<A>,
244     {
245         V::from_lanes(a)
246     }
247 
248     #[inline(always)]
read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,249     fn read_le<V>(self, input: &[u8]) -> V
250     where
251         V: StoreBytes,
252     {
253         unsafe { V::unsafe_read_le(input) }
254     }
255 
256     #[inline(always)]
read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,257     fn read_be<V>(self, input: &[u8]) -> V
258     where
259         V: StoreBytes,
260     {
261         unsafe { V::unsafe_read_be(input) }
262     }
263 
264     /// # Safety
265     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
266     /// environment.
instance() -> Self267     unsafe fn instance() -> Self;
268 }
269 
270 pub trait Store<S> {
271     /// # Safety
272     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
273     /// environment.
unpack(p: S) -> Self274     unsafe fn unpack(p: S) -> Self;
275 }
276 
277 pub trait StoreBytes {
278     /// # Safety
279     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
280     /// environment.
unsafe_read_le(input: &[u8]) -> Self281     unsafe fn unsafe_read_le(input: &[u8]) -> Self;
282     /// # Safety
283     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
284     /// environment.
unsafe_read_be(input: &[u8]) -> Self285     unsafe fn unsafe_read_be(input: &[u8]) -> Self;
write_le(self, out: &mut [u8])286     fn write_le(self, out: &mut [u8]);
write_be(self, out: &mut [u8])287     fn write_be(self, out: &mut [u8]);
288 }
289