1 #![allow(non_camel_case_types)] 2 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; 3 4 pub trait AndNot { 5 type Output; andnot(self, rhs: Self) -> Self::Output6 fn andnot(self, rhs: Self) -> Self::Output; 7 } 8 pub trait BSwap { bswap(self) -> Self9 fn bswap(self) -> Self; 10 } 11 /// Ops that depend on word size 12 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {} 13 /// Ops that are independent of word size and endian 14 pub trait BitOps0: 15 BitAnd<Output = Self> 16 + BitOr<Output = Self> 17 + BitXor<Output = Self> 18 + BitXorAssign 19 + Not<Output = Self> 20 + AndNot<Output = Self> 21 + Sized 22 + Copy 23 + Clone 24 { 25 } 26 27 pub trait BitOps32: BitOps0 + RotateEachWord32 {} 28 pub trait BitOps64: BitOps32 + RotateEachWord64 {} 29 pub trait BitOps128: BitOps64 + RotateEachWord128 {} 30 31 pub trait RotateEachWord32 { rotate_each_word_right7(self) -> Self32 fn rotate_each_word_right7(self) -> Self; rotate_each_word_right8(self) -> Self33 fn rotate_each_word_right8(self) -> Self; rotate_each_word_right11(self) -> Self34 fn rotate_each_word_right11(self) -> Self; rotate_each_word_right12(self) -> Self35 fn rotate_each_word_right12(self) -> Self; rotate_each_word_right16(self) -> Self36 fn rotate_each_word_right16(self) -> Self; rotate_each_word_right20(self) -> Self37 fn rotate_each_word_right20(self) -> Self; rotate_each_word_right24(self) -> Self38 fn rotate_each_word_right24(self) -> Self; rotate_each_word_right25(self) -> Self39 fn rotate_each_word_right25(self) -> Self; 40 } 41 42 pub trait RotateEachWord64 { rotate_each_word_right32(self) -> Self43 fn rotate_each_word_right32(self) -> Self; 44 } 45 46 pub trait RotateEachWord128 {} 47 48 // Vector type naming scheme: 49 // uN[xP]xL 50 // Unsigned; N-bit words * P bits per lane * L lanes 51 // 52 // A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of 53 // wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and 54 // slow inter-lane operations. 55 56 use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; 57 58 #[allow(clippy::missing_safety_doc)] 59 pub trait UnsafeFrom<T> { unsafe_from(t: T) -> Self60 unsafe fn unsafe_from(t: T) -> Self; 61 } 62 63 /// A vector composed of two elements, which may be words or themselves vectors. 64 pub trait Vec2<W> { extract(self, i: u32) -> W65 fn extract(self, i: u32) -> W; insert(self, w: W, i: u32) -> Self66 fn insert(self, w: W, i: u32) -> Self; 67 } 68 69 /// A vector composed of four elements, which may be words or themselves vectors. 70 pub trait Vec4<W> { extract(self, i: u32) -> W71 fn extract(self, i: u32) -> W; insert(self, w: W, i: u32) -> Self72 fn insert(self, w: W, i: u32) -> Self; 73 } 74 /// Vec4 functions which may not be implemented yet for all Vec4 types. 75 /// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage, 76 /// import Vec4Ext only together with Vec4, and don't qualify its methods. 77 pub trait Vec4Ext<W> { transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized78 fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized; 79 } 80 pub trait Vector<T> { to_scalars(self) -> T81 fn to_scalars(self) -> T; 82 } 83 84 // TODO: multiples of 4 should inherit this 85 /// A vector composed of four words; depending on their size, operations may cross lanes. 86 pub trait Words4 { shuffle1230(self) -> Self87 fn shuffle1230(self) -> Self; shuffle2301(self) -> Self88 fn shuffle2301(self) -> Self; shuffle3012(self) -> Self89 fn shuffle3012(self) -> Self; 90 } 91 92 /// A vector composed one or more lanes each composed of four words. 93 pub trait LaneWords4 { shuffle_lane_words1230(self) -> Self94 fn shuffle_lane_words1230(self) -> Self; shuffle_lane_words2301(self) -> Self95 fn shuffle_lane_words2301(self) -> Self; shuffle_lane_words3012(self) -> Self96 fn shuffle_lane_words3012(self) -> Self; 97 } 98 99 // TODO: make this a part of BitOps 100 /// Exchange neigboring ranges of bits of the specified size 101 pub trait Swap64 { swap1(self) -> Self102 fn swap1(self) -> Self; swap2(self) -> Self103 fn swap2(self) -> Self; swap4(self) -> Self104 fn swap4(self) -> Self; swap8(self) -> Self105 fn swap8(self) -> Self; swap16(self) -> Self106 fn swap16(self) -> Self; swap32(self) -> Self107 fn swap32(self) -> Self; swap64(self) -> Self108 fn swap64(self) -> Self; 109 } 110 111 pub trait u32x4<M: Machine>: 112 BitOps32 113 + Store<vec128_storage> 114 + ArithOps 115 + Vec4<u32> 116 + Words4 117 + LaneWords4 118 + StoreBytes 119 + MultiLane<[u32; 4]> 120 + Into<vec128_storage> 121 { 122 } 123 pub trait u64x2<M: Machine>: 124 BitOps64 125 + Store<vec128_storage> 126 + ArithOps 127 + Vec2<u64> 128 + MultiLane<[u64; 2]> 129 + Into<vec128_storage> 130 { 131 } 132 pub trait u128x1<M: Machine>: 133 BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage> 134 { 135 } 136 137 pub trait u32x4x2<M: Machine>: 138 BitOps32 139 + Store<vec256_storage> 140 + Vec2<M::u32x4> 141 + MultiLane<[M::u32x4; 2]> 142 + ArithOps 143 + Into<vec256_storage> 144 + StoreBytes 145 { 146 } 147 pub trait u64x2x2<M: Machine>: 148 BitOps64 149 + Store<vec256_storage> 150 + Vec2<M::u64x2> 151 + MultiLane<[M::u64x2; 2]> 152 + ArithOps 153 + StoreBytes 154 + Into<vec256_storage> 155 { 156 } 157 pub trait u64x4<M: Machine>: 158 BitOps64 159 + Store<vec256_storage> 160 + Vec4<u64> 161 + MultiLane<[u64; 4]> 162 + ArithOps 163 + Words4 164 + StoreBytes 165 + Into<vec256_storage> 166 { 167 } 168 pub trait u128x2<M: Machine>: 169 BitOps128 170 + Store<vec256_storage> 171 + Vec2<M::u128x1> 172 + MultiLane<[M::u128x1; 2]> 173 + Swap64 174 + Into<vec256_storage> 175 { 176 } 177 178 pub trait u32x4x4<M: Machine>: 179 BitOps32 180 + Store<vec512_storage> 181 + Vec4<M::u32x4> 182 + Vec4Ext<M::u32x4> 183 + Vector<[u32; 16]> 184 + MultiLane<[M::u32x4; 4]> 185 + ArithOps 186 + LaneWords4 187 + Into<vec512_storage> 188 + StoreBytes 189 { 190 } 191 pub trait u64x2x4<M: Machine>: 192 BitOps64 193 + Store<vec512_storage> 194 + Vec4<M::u64x2> 195 + MultiLane<[M::u64x2; 4]> 196 + ArithOps 197 + Into<vec512_storage> 198 { 199 } 200 // TODO: Words4 201 pub trait u128x4<M: Machine>: 202 BitOps128 203 + Store<vec512_storage> 204 + Vec4<M::u128x1> 205 + MultiLane<[M::u128x1; 4]> 206 + Swap64 207 + Into<vec512_storage> 208 { 209 } 210 211 /// A vector composed of multiple 128-bit lanes. 212 pub trait MultiLane<Lanes> { 213 /// Split a multi-lane vector into single-lane vectors. to_lanes(self) -> Lanes214 fn to_lanes(self) -> Lanes; 215 /// Build a multi-lane vector from individual lanes. from_lanes(lanes: Lanes) -> Self216 fn from_lanes(lanes: Lanes) -> Self; 217 } 218 219 /// Combine single vectors into a multi-lane vector. 220 pub trait VZip<V> { vzip(self) -> V221 fn vzip(self) -> V; 222 } 223 224 impl<V, T> VZip<V> for T 225 where 226 V: MultiLane<T>, 227 { 228 #[inline(always)] vzip(self) -> V229 fn vzip(self) -> V { 230 V::from_lanes(self) 231 } 232 } 233 234 pub trait Machine: Sized + Copy { 235 type u32x4: u32x4<Self>; 236 type u64x2: u64x2<Self>; 237 type u128x1: u128x1<Self>; 238 239 type u32x4x2: u32x4x2<Self>; 240 type u64x2x2: u64x2x2<Self>; 241 type u64x4: u64x4<Self>; 242 type u128x2: u128x2<Self>; 243 244 type u32x4x4: u32x4x4<Self>; 245 type u64x2x4: u64x2x4<Self>; 246 type u128x4: u128x4<Self>; 247 248 #[inline(always)] unpack<S, V: Store<S>>(self, s: S) -> V249 fn unpack<S, V: Store<S>>(self, s: S) -> V { 250 unsafe { V::unpack(s) } 251 } 252 253 #[inline(always)] vec<V, A>(self, a: A) -> V where V: MultiLane<A>,254 fn vec<V, A>(self, a: A) -> V 255 where 256 V: MultiLane<A>, 257 { 258 V::from_lanes(a) 259 } 260 261 #[inline(always)] read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,262 fn read_le<V>(self, input: &[u8]) -> V 263 where 264 V: StoreBytes, 265 { 266 unsafe { V::unsafe_read_le(input) } 267 } 268 269 #[inline(always)] read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,270 fn read_be<V>(self, input: &[u8]) -> V 271 where 272 V: StoreBytes, 273 { 274 unsafe { V::unsafe_read_be(input) } 275 } 276 277 /// # Safety 278 /// Caller must ensure the type of Self is appropriate for the hardware of the execution 279 /// environment. instance() -> Self280 unsafe fn instance() -> Self; 281 } 282 283 pub trait Store<S> { 284 /// # Safety 285 /// Caller must ensure the type of Self is appropriate for the hardware of the execution 286 /// environment. unpack(p: S) -> Self287 unsafe fn unpack(p: S) -> Self; 288 } 289 290 pub trait StoreBytes { 291 /// # Safety 292 /// Caller must ensure the type of Self is appropriate for the hardware of the execution 293 /// environment. unsafe_read_le(input: &[u8]) -> Self294 unsafe fn unsafe_read_le(input: &[u8]) -> Self; 295 /// # Safety 296 /// Caller must ensure the type of Self is appropriate for the hardware of the execution 297 /// environment. unsafe_read_be(input: &[u8]) -> Self298 unsafe fn unsafe_read_be(input: &[u8]) -> Self; write_le(self, out: &mut [u8])299 fn write_le(self, out: &mut [u8]); write_be(self, out: &mut [u8])300 fn write_be(self, out: &mut [u8]); 301 } 302