1 #![allow(non_camel_case_types)]
2 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
3 
4 pub trait AndNot {
5     type Output;
andnot(self, rhs: Self) -> Self::Output6     fn andnot(self, rhs: Self) -> Self::Output;
7 }
8 pub trait BSwap {
bswap(self) -> Self9     fn bswap(self) -> Self;
10 }
11 /// Ops that depend on word size
12 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
13 /// Ops that are independent of word size and endian
14 pub trait BitOps0:
15     BitAnd<Output = Self>
16     + BitOr<Output = Self>
17     + BitXor<Output = Self>
18     + BitXorAssign
19     + Not<Output = Self>
20     + AndNot<Output = Self>
21     + Sized
22     + Copy
23     + Clone
24 {
25 }
26 
27 pub trait BitOps32: BitOps0 + RotateEachWord32 {}
28 pub trait BitOps64: BitOps32 + RotateEachWord64 {}
29 pub trait BitOps128: BitOps64 + RotateEachWord128 {}
30 
31 pub trait RotateEachWord32 {
rotate_each_word_right7(self) -> Self32     fn rotate_each_word_right7(self) -> Self;
rotate_each_word_right8(self) -> Self33     fn rotate_each_word_right8(self) -> Self;
rotate_each_word_right11(self) -> Self34     fn rotate_each_word_right11(self) -> Self;
rotate_each_word_right12(self) -> Self35     fn rotate_each_word_right12(self) -> Self;
rotate_each_word_right16(self) -> Self36     fn rotate_each_word_right16(self) -> Self;
rotate_each_word_right20(self) -> Self37     fn rotate_each_word_right20(self) -> Self;
rotate_each_word_right24(self) -> Self38     fn rotate_each_word_right24(self) -> Self;
rotate_each_word_right25(self) -> Self39     fn rotate_each_word_right25(self) -> Self;
40 }
41 
42 pub trait RotateEachWord64 {
rotate_each_word_right32(self) -> Self43     fn rotate_each_word_right32(self) -> Self;
44 }
45 
46 pub trait RotateEachWord128 {}
47 
48 // Vector type naming scheme:
49 // uN[xP]xL
50 // Unsigned; N-bit words * P bits per lane * L lanes
51 //
52 // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
53 // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
54 // slow inter-lane operations.
55 
56 use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
57 
58 #[allow(clippy::missing_safety_doc)]
59 pub trait UnsafeFrom<T> {
unsafe_from(t: T) -> Self60     unsafe fn unsafe_from(t: T) -> Self;
61 }
62 
63 /// A vector composed of two elements, which may be words or themselves vectors.
64 pub trait Vec2<W> {
extract(self, i: u32) -> W65     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self66     fn insert(self, w: W, i: u32) -> Self;
67 }
68 
69 /// A vector composed of four elements, which may be words or themselves vectors.
70 pub trait Vec4<W> {
extract(self, i: u32) -> W71     fn extract(self, i: u32) -> W;
insert(self, w: W, i: u32) -> Self72     fn insert(self, w: W, i: u32) -> Self;
73 }
74 /// Vec4 functions which may not be implemented yet for all Vec4 types.
75 /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
76 /// import Vec4Ext only together with Vec4, and don't qualify its methods.
77 pub trait Vec4Ext<W> {
transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized78     fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized;
79 }
80 pub trait Vector<T> {
to_scalars(self) -> T81     fn to_scalars(self) -> T;
82 }
83 
84 // TODO: multiples of 4 should inherit this
85 /// A vector composed of four words; depending on their size, operations may cross lanes.
86 pub trait Words4 {
shuffle1230(self) -> Self87     fn shuffle1230(self) -> Self;
shuffle2301(self) -> Self88     fn shuffle2301(self) -> Self;
shuffle3012(self) -> Self89     fn shuffle3012(self) -> Self;
90 }
91 
92 /// A vector composed one or more lanes each composed of four words.
93 pub trait LaneWords4 {
shuffle_lane_words1230(self) -> Self94     fn shuffle_lane_words1230(self) -> Self;
shuffle_lane_words2301(self) -> Self95     fn shuffle_lane_words2301(self) -> Self;
shuffle_lane_words3012(self) -> Self96     fn shuffle_lane_words3012(self) -> Self;
97 }
98 
99 // TODO: make this a part of BitOps
100 /// Exchange neigboring ranges of bits of the specified size
101 pub trait Swap64 {
swap1(self) -> Self102     fn swap1(self) -> Self;
swap2(self) -> Self103     fn swap2(self) -> Self;
swap4(self) -> Self104     fn swap4(self) -> Self;
swap8(self) -> Self105     fn swap8(self) -> Self;
swap16(self) -> Self106     fn swap16(self) -> Self;
swap32(self) -> Self107     fn swap32(self) -> Self;
swap64(self) -> Self108     fn swap64(self) -> Self;
109 }
110 
111 pub trait u32x4<M: Machine>:
112     BitOps32
113     + Store<vec128_storage>
114     + ArithOps
115     + Vec4<u32>
116     + Words4
117     + LaneWords4
118     + StoreBytes
119     + MultiLane<[u32; 4]>
120     + Into<vec128_storage>
121 {
122 }
123 pub trait u64x2<M: Machine>:
124     BitOps64
125     + Store<vec128_storage>
126     + ArithOps
127     + Vec2<u64>
128     + MultiLane<[u64; 2]>
129     + Into<vec128_storage>
130 {
131 }
132 pub trait u128x1<M: Machine>:
133     BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
134 {
135 }
136 
137 pub trait u32x4x2<M: Machine>:
138     BitOps32
139     + Store<vec256_storage>
140     + Vec2<M::u32x4>
141     + MultiLane<[M::u32x4; 2]>
142     + ArithOps
143     + Into<vec256_storage>
144     + StoreBytes
145 {
146 }
147 pub trait u64x2x2<M: Machine>:
148     BitOps64
149     + Store<vec256_storage>
150     + Vec2<M::u64x2>
151     + MultiLane<[M::u64x2; 2]>
152     + ArithOps
153     + StoreBytes
154     + Into<vec256_storage>
155 {
156 }
157 pub trait u64x4<M: Machine>:
158     BitOps64
159     + Store<vec256_storage>
160     + Vec4<u64>
161     + MultiLane<[u64; 4]>
162     + ArithOps
163     + Words4
164     + StoreBytes
165     + Into<vec256_storage>
166 {
167 }
168 pub trait u128x2<M: Machine>:
169     BitOps128
170     + Store<vec256_storage>
171     + Vec2<M::u128x1>
172     + MultiLane<[M::u128x1; 2]>
173     + Swap64
174     + Into<vec256_storage>
175 {
176 }
177 
178 pub trait u32x4x4<M: Machine>:
179     BitOps32
180     + Store<vec512_storage>
181     + Vec4<M::u32x4>
182     + Vec4Ext<M::u32x4>
183     + Vector<[u32; 16]>
184     + MultiLane<[M::u32x4; 4]>
185     + ArithOps
186     + LaneWords4
187     + Into<vec512_storage>
188     + StoreBytes
189 {
190 }
191 pub trait u64x2x4<M: Machine>:
192     BitOps64
193     + Store<vec512_storage>
194     + Vec4<M::u64x2>
195     + MultiLane<[M::u64x2; 4]>
196     + ArithOps
197     + Into<vec512_storage>
198 {
199 }
200 // TODO: Words4
201 pub trait u128x4<M: Machine>:
202     BitOps128
203     + Store<vec512_storage>
204     + Vec4<M::u128x1>
205     + MultiLane<[M::u128x1; 4]>
206     + Swap64
207     + Into<vec512_storage>
208 {
209 }
210 
211 /// A vector composed of multiple 128-bit lanes.
212 pub trait MultiLane<Lanes> {
213     /// Split a multi-lane vector into single-lane vectors.
to_lanes(self) -> Lanes214     fn to_lanes(self) -> Lanes;
215     /// Build a multi-lane vector from individual lanes.
from_lanes(lanes: Lanes) -> Self216     fn from_lanes(lanes: Lanes) -> Self;
217 }
218 
219 /// Combine single vectors into a multi-lane vector.
220 pub trait VZip<V> {
vzip(self) -> V221     fn vzip(self) -> V;
222 }
223 
224 impl<V, T> VZip<V> for T
225 where
226     V: MultiLane<T>,
227 {
228     #[inline(always)]
vzip(self) -> V229     fn vzip(self) -> V {
230         V::from_lanes(self)
231     }
232 }
233 
234 pub trait Machine: Sized + Copy {
235     type u32x4: u32x4<Self>;
236     type u64x2: u64x2<Self>;
237     type u128x1: u128x1<Self>;
238 
239     type u32x4x2: u32x4x2<Self>;
240     type u64x2x2: u64x2x2<Self>;
241     type u64x4: u64x4<Self>;
242     type u128x2: u128x2<Self>;
243 
244     type u32x4x4: u32x4x4<Self>;
245     type u64x2x4: u64x2x4<Self>;
246     type u128x4: u128x4<Self>;
247 
248     #[inline(always)]
unpack<S, V: Store<S>>(self, s: S) -> V249     fn unpack<S, V: Store<S>>(self, s: S) -> V {
250         unsafe { V::unpack(s) }
251     }
252 
253     #[inline(always)]
vec<V, A>(self, a: A) -> V where V: MultiLane<A>,254     fn vec<V, A>(self, a: A) -> V
255     where
256         V: MultiLane<A>,
257     {
258         V::from_lanes(a)
259     }
260 
261     #[inline(always)]
read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,262     fn read_le<V>(self, input: &[u8]) -> V
263     where
264         V: StoreBytes,
265     {
266         unsafe { V::unsafe_read_le(input) }
267     }
268 
269     #[inline(always)]
read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,270     fn read_be<V>(self, input: &[u8]) -> V
271     where
272         V: StoreBytes,
273     {
274         unsafe { V::unsafe_read_be(input) }
275     }
276 
277     /// # Safety
278     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
279     /// environment.
instance() -> Self280     unsafe fn instance() -> Self;
281 }
282 
283 pub trait Store<S> {
284     /// # Safety
285     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
286     /// environment.
unpack(p: S) -> Self287     unsafe fn unpack(p: S) -> Self;
288 }
289 
290 pub trait StoreBytes {
291     /// # Safety
292     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
293     /// environment.
unsafe_read_le(input: &[u8]) -> Self294     unsafe fn unsafe_read_le(input: &[u8]) -> Self;
295     /// # Safety
296     /// Caller must ensure the type of Self is appropriate for the hardware of the execution
297     /// environment.
unsafe_read_be(input: &[u8]) -> Self298     unsafe fn unsafe_read_be(input: &[u8]) -> Self;
write_le(self, out: &mut [u8])299     fn write_le(self, out: &mut [u8]);
write_be(self, out: &mut [u8])300     fn write_be(self, out: &mut [u8]);
301 }
302