1 /*
2 This file is for functions related to 256-bit vectors
3 including functions for bitsliced field operations
4 */
5
6 #include "vec256.h"
7
8 extern void PQCLEAN_MCELIECE348864F_AVX_vec256_mul_asm(vec256 *, vec256 *, const vec256 *);
9
PQCLEAN_MCELIECE348864F_AVX_vec256_set1_16b(uint16_t a)10 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_set1_16b(uint16_t a) {
11 return _mm256_set1_epi16(a);
12 }
13
PQCLEAN_MCELIECE348864F_AVX_vec256_setzero(void)14 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_setzero(void) {
15 return _mm256_setzero_si256();
16 }
17
PQCLEAN_MCELIECE348864F_AVX_vec256_set4x(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t a3)18 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_set4x(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) {
19 return _mm256_set_epi64x(a3, a2, a1, a0);
20 }
21
PQCLEAN_MCELIECE348864F_AVX_vec256_testz(vec256 a)22 int PQCLEAN_MCELIECE348864F_AVX_vec256_testz(vec256 a) {
23 return _mm256_testz_si256(a, a);
24 }
25
PQCLEAN_MCELIECE348864F_AVX_vec256_and(vec256 a,vec256 b)26 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_and(vec256 a, vec256 b) {
27 return _mm256_and_si256(a, b);
28 }
29
PQCLEAN_MCELIECE348864F_AVX_vec256_xor(vec256 a,vec256 b)30 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_xor(vec256 a, vec256 b) {
31 return _mm256_xor_si256(a, b);
32 }
33
PQCLEAN_MCELIECE348864F_AVX_vec256_or(vec256 a,vec256 b)34 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_or(vec256 a, vec256 b) {
35 return _mm256_or_si256(a, b);
36 }
37
PQCLEAN_MCELIECE348864F_AVX_vec256_sll_4x(vec256 a,int s)38 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_sll_4x(vec256 a, int s) {
39 return _mm256_slli_epi64(a, s);
40 }
41
PQCLEAN_MCELIECE348864F_AVX_vec256_srl_4x(vec256 a,int s)42 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_srl_4x(vec256 a, int s) {
43 return _mm256_srli_epi64(a, s);
44 }
45
PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_low(vec256 a,vec256 b)46 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_low(vec256 a, vec256 b) {
47 return _mm256_permute2x128_si256 (a, b, 0x20);
48 }
49
PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_high(vec256 a,vec256 b)50 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_high(vec256 a, vec256 b) {
51 return _mm256_permute2x128_si256 (a, b, 0x31);
52 }
53
PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_low_2x(vec256 a,vec256 b)54 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_low_2x(vec256 a, vec256 b) {
55 return _mm256_unpacklo_epi64 (a, b);
56 }
57
PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_high_2x(vec256 a,vec256 b)58 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_unpack_high_2x(vec256 a, vec256 b) {
59 return _mm256_unpackhi_epi64 (a, b);
60 }
61
PQCLEAN_MCELIECE348864F_AVX_vec256_or_reduce(const vec256 * a)62 vec256 PQCLEAN_MCELIECE348864F_AVX_vec256_or_reduce(const vec256 *a) {
63 int i;
64 vec256 ret;
65
66 ret = a[0];
67 for (i = 1; i < GFBITS; i++) {
68 ret = PQCLEAN_MCELIECE348864F_AVX_vec256_or(ret, a[i]);
69 }
70
71 return ret;
72 }
73
PQCLEAN_MCELIECE348864F_AVX_vec256_copy(vec256 * dest,const vec256 * src)74 void PQCLEAN_MCELIECE348864F_AVX_vec256_copy(vec256 *dest, const vec256 *src) {
75 int i;
76
77 for (i = 0; i < GFBITS; i++) {
78 dest[i] = src[i];
79 }
80 }
81
82
PQCLEAN_MCELIECE348864F_AVX_vec256_mul(vec256 * h,vec256 * f,const vec256 * g)83 void PQCLEAN_MCELIECE348864F_AVX_vec256_mul(vec256 *h, vec256 *f, const vec256 *g) {
84 PQCLEAN_MCELIECE348864F_AVX_vec256_mul_asm(h, f, g);
85 }
86
87 /* bitsliced field squarings */
PQCLEAN_MCELIECE348864F_AVX_vec256_sq(vec256 * out,const vec256 * in)88 void PQCLEAN_MCELIECE348864F_AVX_vec256_sq(vec256 *out, const vec256 *in) {
89 int i;
90 vec256 result[GFBITS];
91
92 result[0] = in[0] ^ in[6];
93 result[1] = in[11];
94 result[2] = in[1] ^ in[7];
95 result[3] = in[6];
96 result[4] = in[2] ^ in[11] ^ in[8];
97 result[5] = in[7];
98 result[6] = in[3] ^ in[9];
99 result[7] = in[8];
100 result[8] = in[4] ^ in[10];
101 result[9] = in[9];
102 result[10] = in[5] ^ in[11];
103 result[11] = in[10];
104
105 for (i = 0; i < GFBITS; i++) {
106 out[i] = result[i];
107 }
108 }
109
110 /* bitsliced field inverses */
PQCLEAN_MCELIECE348864F_AVX_vec256_inv(vec256 * out,const vec256 * in)111 void PQCLEAN_MCELIECE348864F_AVX_vec256_inv(vec256 *out, const vec256 *in) {
112 vec256 tmp_11[ GFBITS ];
113 vec256 tmp_1111[ GFBITS ];
114
115 PQCLEAN_MCELIECE348864F_AVX_vec256_copy(out, in);
116
117 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
118 PQCLEAN_MCELIECE348864F_AVX_vec256_mul(tmp_11, out, in); // ^11
119
120 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, tmp_11);
121 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
122 PQCLEAN_MCELIECE348864F_AVX_vec256_mul(tmp_1111, out, tmp_11); // ^1111
123
124 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, tmp_1111);
125 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
126 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
127 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
128 PQCLEAN_MCELIECE348864F_AVX_vec256_mul(out, out, tmp_1111); // ^11111111
129
130 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
131 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
132 PQCLEAN_MCELIECE348864F_AVX_vec256_mul(out, out, tmp_11); // ^1111111111
133 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out);
134 PQCLEAN_MCELIECE348864F_AVX_vec256_mul(out, out, in); // ^11111111111
135
136 PQCLEAN_MCELIECE348864F_AVX_vec256_sq(out, out); // ^111111111110
137 }
138