1 //
2 // arch/x86_64/rsp/vmul.h
3 //
4 // This file is subject to the terms and conditions defined in
5 // 'LICENSE', which is part of this source code package.
6 //
7
8 //
9 // TODO: CHECK ME.
10 //
11
12 template <bool VMULU>
rsp_vmulf_vmulu(__m128i vs,__m128i vt,__m128i zero,__m128i * acc_lo,__m128i * acc_md,__m128i * acc_hi)13 static inline __m128i rsp_vmulf_vmulu(__m128i vs, __m128i vt, __m128i zero, __m128i *acc_lo, __m128i *acc_md,
14 __m128i *acc_hi)
15 {
16 __m128i lo, hi, round, sign1, sign2, eq, neq, neg;
17
18 lo = _mm_mullo_epi16(vs, vt);
19 round = _mm_cmpeq_epi16(zero, zero);
20 sign1 = _mm_srli_epi16(lo, 15);
21 lo = _mm_add_epi16(lo, lo);
22 round = _mm_slli_epi16(round, 15);
23 hi = _mm_mulhi_epi16(vs, vt);
24 sign2 = _mm_srli_epi16(lo, 15);
25 *acc_lo = _mm_add_epi16(round, lo);
26 sign1 = _mm_add_epi16(sign1, sign2);
27
28 hi = _mm_slli_epi16(hi, 1);
29 neq = eq = _mm_cmpeq_epi16(vs, vt);
30 *acc_md = _mm_add_epi16(hi, sign1);
31
32 neg = _mm_srai_epi16(*acc_md, 15);
33
34 // VMULU
35 if (VMULU)
36 {
37 *acc_hi = _mm_andnot_si128(eq, neg);
38 hi = _mm_or_si128(*acc_md, neg);
39 return _mm_andnot_si128(*acc_hi, hi);
40 }
41
42 // VMULF
43 else
44 {
45 eq = _mm_and_si128(eq, neg);
46 *acc_hi = _mm_andnot_si128(neq, neg);
47 return _mm_add_epi16(*acc_md, eq);
48 }
49 }
50