1 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; 2 3 pub trait AndNot { 4 type Output; andnot(self, rhs: Self) -> Self::Output5 fn andnot(self, rhs: Self) -> Self::Output; 6 } 7 pub trait BSwap { bswap(self) -> Self8 fn bswap(self) -> Self; 9 } 10 /// Ops that depend on word size 11 pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {} 12 /// Ops that are independent of word size and endian 13 pub trait BitOps0: 14 BitAnd<Output = Self> 15 + BitOr<Output = Self> 16 + BitXor<Output = Self> 17 + BitXorAssign 18 + Not<Output = Self> 19 + AndNot<Output = Self> 20 + Sized 21 + Copy 22 + Clone 23 { 24 } 25 26 pub trait BitOps32: BitOps0 + RotateEachWord32 {} 27 pub trait BitOps64: BitOps32 + RotateEachWord64 {} 28 pub trait BitOps128: BitOps64 + RotateEachWord128 {} 29 30 pub trait RotateEachWord32 { rotate_each_word_right7(self) -> Self31 fn rotate_each_word_right7(self) -> Self; rotate_each_word_right8(self) -> Self32 fn rotate_each_word_right8(self) -> Self; rotate_each_word_right11(self) -> Self33 fn rotate_each_word_right11(self) -> Self; rotate_each_word_right12(self) -> Self34 fn rotate_each_word_right12(self) -> Self; rotate_each_word_right16(self) -> Self35 fn rotate_each_word_right16(self) -> Self; rotate_each_word_right20(self) -> Self36 fn rotate_each_word_right20(self) -> Self; rotate_each_word_right24(self) -> Self37 fn rotate_each_word_right24(self) -> Self; rotate_each_word_right25(self) -> Self38 fn rotate_each_word_right25(self) -> Self; 39 } 40 41 pub trait RotateEachWord64 { rotate_each_word_right32(self) -> Self42 fn rotate_each_word_right32(self) -> Self; 43 } 44 45 pub trait RotateEachWord128 {} 46 47 #[allow(non_camel_case_types)] 48 mod types { 49 //! Vector type naming scheme: 50 //! uN[xP]xL 51 //! Unsigned; N-bit words * P bits per lane * L lanes 52 //! 53 //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of 54 //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and 55 //! slow inter-lane operations. 56 57 use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; 58 use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes}; 59 60 pub trait UnsafeFrom<T> { unsafe_from(t: T) -> Self61 unsafe fn unsafe_from(t: T) -> Self; 62 } 63 64 /// A vector composed of two elements, which may be words or themselves vectors. 65 pub trait Vec2<W> { extract(self, i: u32) -> W66 fn extract(self, i: u32) -> W; insert(self, w: W, i: u32) -> Self67 fn insert(self, w: W, i: u32) -> Self; 68 } 69 70 /// A vector composed of four elements, which may be words or themselves vectors. 71 pub trait Vec4<W> { extract(self, i: u32) -> W72 fn extract(self, i: u32) -> W; insert(self, w: W, i: u32) -> Self73 fn insert(self, w: W, i: u32) -> Self; 74 } 75 76 // TODO: multiples of 4 should inherit this 77 /// A vector composed of four words; depending on their size, operations may cross lanes. 78 pub trait Words4 { shuffle1230(self) -> Self79 fn shuffle1230(self) -> Self; shuffle2301(self) -> Self80 fn shuffle2301(self) -> Self; shuffle3012(self) -> Self81 fn shuffle3012(self) -> Self; 82 } 83 84 /// A vector composed one or more lanes each composed of four words. 85 pub trait LaneWords4 { shuffle_lane_words1230(self) -> Self86 fn shuffle_lane_words1230(self) -> Self; shuffle_lane_words2301(self) -> Self87 fn shuffle_lane_words2301(self) -> Self; shuffle_lane_words3012(self) -> Self88 fn shuffle_lane_words3012(self) -> Self; 89 } 90 91 // TODO: make this a part of BitOps 92 /// Exchange neigboring ranges of bits of the specified size 93 pub trait Swap64 { swap1(self) -> Self94 fn swap1(self) -> Self; swap2(self) -> Self95 fn swap2(self) -> Self; swap4(self) -> Self96 fn swap4(self) -> Self; swap8(self) -> Self97 fn swap8(self) -> Self; swap16(self) -> Self98 fn swap16(self) -> Self; swap32(self) -> Self99 fn swap32(self) -> Self; swap64(self) -> Self100 fn swap64(self) -> Self; 101 } 102 103 pub trait u32x4<M: Machine>: 104 BitOps32 105 + Store<vec128_storage> 106 + ArithOps 107 + Vec4<u32> 108 + Words4 109 + LaneWords4 110 + StoreBytes 111 + MultiLane<[u32; 4]> 112 + Into<vec128_storage> 113 { 114 } 115 pub trait u64x2<M: Machine>: 116 BitOps64 117 + Store<vec128_storage> 118 + ArithOps 119 + Vec2<u64> 120 + MultiLane<[u64; 2]> 121 + Into<vec128_storage> 122 { 123 } 124 pub trait u128x1<M: Machine>: 125 BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage> 126 { 127 } 128 129 pub trait u32x4x2<M: Machine>: 130 BitOps32 131 + Store<vec256_storage> 132 + Vec2<M::u32x4> 133 + MultiLane<[M::u32x4; 2]> 134 + ArithOps 135 + Into<vec256_storage> 136 { 137 } 138 pub trait u64x2x2<M: Machine>: 139 BitOps64 140 + Store<vec256_storage> 141 + Vec2<M::u64x2> 142 + MultiLane<[M::u64x2; 2]> 143 + ArithOps 144 + StoreBytes 145 + Into<vec256_storage> 146 { 147 } 148 pub trait u64x4<M: Machine>: 149 BitOps64 150 + Store<vec256_storage> 151 + Vec4<u64> 152 + MultiLane<[u64; 4]> 153 + ArithOps 154 + Words4 155 + StoreBytes 156 + Into<vec256_storage> 157 { 158 } 159 pub trait u128x2<M: Machine>: 160 BitOps128 161 + Store<vec256_storage> 162 + Vec2<M::u128x1> 163 + MultiLane<[M::u128x1; 2]> 164 + Swap64 165 + Into<vec256_storage> 166 { 167 } 168 169 pub trait u32x4x4<M: Machine>: 170 BitOps32 171 + Store<vec512_storage> 172 + Vec4<M::u32x4> 173 + MultiLane<[M::u32x4; 4]> 174 + ArithOps 175 + LaneWords4 176 + Into<vec512_storage> 177 { 178 } 179 pub trait u64x2x4<M: Machine>: 180 BitOps64 181 + Store<vec512_storage> 182 + Vec4<M::u64x2> 183 + MultiLane<[M::u64x2; 4]> 184 + ArithOps 185 + Into<vec512_storage> 186 { 187 } 188 // TODO: Words4 189 pub trait u128x4<M: Machine>: 190 BitOps128 191 + Store<vec512_storage> 192 + Vec4<M::u128x1> 193 + MultiLane<[M::u128x1; 4]> 194 + Swap64 195 + Into<vec512_storage> 196 { 197 } 198 199 /// A vector composed of multiple 128-bit lanes. 200 pub trait MultiLane<Lanes> { 201 /// Split a multi-lane vector into single-lane vectors. to_lanes(self) -> Lanes202 fn to_lanes(self) -> Lanes; 203 /// Build a multi-lane vector from individual lanes. from_lanes(lanes: Lanes) -> Self204 fn from_lanes(lanes: Lanes) -> Self; 205 } 206 207 /// Combine single vectors into a multi-lane vector. 208 pub trait VZip<V> { vzip(self) -> V209 fn vzip(self) -> V; 210 } 211 212 impl<V, T> VZip<V> for T 213 where 214 V: MultiLane<T>, 215 { 216 #[inline(always)] vzip(self) -> V217 fn vzip(self) -> V { 218 V::from_lanes(self) 219 } 220 } 221 } 222 pub use self::types::*; 223 224 pub trait Machine: Sized + Copy { 225 type u32x4: u32x4<Self>; 226 type u64x2: u64x2<Self>; 227 type u128x1: u128x1<Self>; 228 229 type u32x4x2: u32x4x2<Self>; 230 type u64x2x2: u64x2x2<Self>; 231 type u64x4: u64x4<Self>; 232 type u128x2: u128x2<Self>; 233 234 type u32x4x4: u32x4x4<Self>; 235 type u64x2x4: u64x2x4<Self>; 236 type u128x4: u128x4<Self>; 237 238 #[inline(always)] unpack<S, V: Store<S>>(self, s: S) -> V239 fn unpack<S, V: Store<S>>(self, s: S) -> V { 240 unsafe { V::unpack(s) } 241 } 242 243 #[inline(always)] vec<V, A>(self, a: A) -> V where V: MultiLane<A>,244 fn vec<V, A>(self, a: A) -> V 245 where 246 V: MultiLane<A>, 247 { 248 V::from_lanes(a) 249 } 250 251 #[inline(always)] read_le<V>(self, input: &[u8]) -> V where V: StoreBytes,252 fn read_le<V>(self, input: &[u8]) -> V 253 where 254 V: StoreBytes, 255 { 256 unsafe { V::unsafe_read_le(input) } 257 } 258 259 #[inline(always)] read_be<V>(self, input: &[u8]) -> V where V: StoreBytes,260 fn read_be<V>(self, input: &[u8]) -> V 261 where 262 V: StoreBytes, 263 { 264 unsafe { V::unsafe_read_be(input) } 265 } 266 instance() -> Self267 unsafe fn instance() -> Self; 268 } 269 270 pub trait Store<S> { unpack(p: S) -> Self271 unsafe fn unpack(p: S) -> Self; 272 } 273 274 pub trait StoreBytes { unsafe_read_le(input: &[u8]) -> Self275 unsafe fn unsafe_read_le(input: &[u8]) -> Self; unsafe_read_be(input: &[u8]) -> Self276 unsafe fn unsafe_read_be(input: &[u8]) -> Self; write_le(self, out: &mut [u8])277 fn write_le(self, out: &mut [u8]); write_be(self, out: &mut [u8])278 fn write_be(self, out: &mut [u8]); 279 } 280