1 /*
2   This file is for functions related to 256-bit vectors
3   including functions for bitsliced field operations
4 */
5 
6 #include "vec256.h"
7 
8 extern void PQCLEAN_MCELIECE6960119_AVX_vec256_mul_asm(vec256 *, vec256 *, const vec256 *);
9 
PQCLEAN_MCELIECE6960119_AVX_vec256_set1_16b(uint16_t a)10 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_set1_16b(uint16_t a) {
11     return _mm256_set1_epi16(a);
12 }
13 
PQCLEAN_MCELIECE6960119_AVX_vec256_setzero(void)14 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_setzero(void) {
15     return  _mm256_setzero_si256();
16 }
17 
PQCLEAN_MCELIECE6960119_AVX_vec256_set4x(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t a3)18 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_set4x(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) {
19     return  _mm256_set_epi64x(a3, a2, a1, a0);
20 }
21 
PQCLEAN_MCELIECE6960119_AVX_vec256_testz(vec256 a)22 int PQCLEAN_MCELIECE6960119_AVX_vec256_testz(vec256 a) {
23     return _mm256_testz_si256(a, a);
24 }
25 
PQCLEAN_MCELIECE6960119_AVX_vec256_and(vec256 a,vec256 b)26 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_and(vec256 a, vec256 b) {
27     return _mm256_and_si256(a, b);
28 }
29 
PQCLEAN_MCELIECE6960119_AVX_vec256_xor(vec256 a,vec256 b)30 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_xor(vec256 a, vec256 b) {
31     return _mm256_xor_si256(a, b);
32 }
33 
PQCLEAN_MCELIECE6960119_AVX_vec256_or(vec256 a,vec256 b)34 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_or(vec256 a, vec256 b) {
35     return _mm256_or_si256(a, b);
36 }
37 
PQCLEAN_MCELIECE6960119_AVX_vec256_sll_4x(vec256 a,int s)38 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_sll_4x(vec256 a, int s) {
39     return _mm256_slli_epi64(a, s);
40 }
41 
PQCLEAN_MCELIECE6960119_AVX_vec256_srl_4x(vec256 a,int s)42 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_srl_4x(vec256 a, int s) {
43     return _mm256_srli_epi64(a, s);
44 }
45 
PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_low(vec256 a,vec256 b)46 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_low(vec256 a, vec256 b) {
47     return _mm256_permute2x128_si256 (a, b, 0x20);
48 }
49 
PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_high(vec256 a,vec256 b)50 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_high(vec256 a, vec256 b) {
51     return _mm256_permute2x128_si256 (a, b, 0x31);
52 }
53 
PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_low_2x(vec256 a,vec256 b)54 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_low_2x(vec256 a, vec256 b) {
55     return _mm256_unpacklo_epi64 (a, b);
56 }
57 
PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_high_2x(vec256 a,vec256 b)58 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_unpack_high_2x(vec256 a, vec256 b) {
59     return _mm256_unpackhi_epi64 (a, b);
60 }
61 
PQCLEAN_MCELIECE6960119_AVX_vec256_or_reduce(const vec256 * a)62 vec256 PQCLEAN_MCELIECE6960119_AVX_vec256_or_reduce(const vec256 *a) {
63     int i;
64     vec256 ret;
65 
66     ret = a[0];
67     for (i = 1; i < GFBITS; i++) {
68         ret = PQCLEAN_MCELIECE6960119_AVX_vec256_or(ret, a[i]);
69     }
70 
71     return ret;
72 }
73 
PQCLEAN_MCELIECE6960119_AVX_vec256_copy(vec256 * dest,const vec256 * src)74 void PQCLEAN_MCELIECE6960119_AVX_vec256_copy(vec256 *dest, const vec256 *src) {
75     int i;
76 
77     for (i = 0; i < GFBITS; i++) {
78         dest[i] = src[i];
79     }
80 }
81 
82 
PQCLEAN_MCELIECE6960119_AVX_vec256_mul(vec256 * h,vec256 * f,const vec256 * g)83 void PQCLEAN_MCELIECE6960119_AVX_vec256_mul(vec256 *h, vec256 *f, const vec256 *g) {
84     PQCLEAN_MCELIECE6960119_AVX_vec256_mul_asm(h, f, g);
85 }
86 
87 /* bitsliced field squarings */
PQCLEAN_MCELIECE6960119_AVX_vec256_sq(vec256 * out,const vec256 * in)88 void PQCLEAN_MCELIECE6960119_AVX_vec256_sq(vec256 *out, const vec256 *in) {
89     int i;
90     vec256 result[GFBITS], t;
91 
92     t = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[11], in[12]);
93 
94     result[0] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[0], in[11]);
95     result[1] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[7], t);
96     result[2] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[1], in[7]);
97     result[3] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[8], t);
98     result[4] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[2], in[7]);
99     result[4] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[4], in[8]);
100     result[4] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[4], t);
101     result[5] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[7], in[9]);
102     result[6] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[3], in[8]);
103     result[6] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[6], in[9]);
104     result[6] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[6], in[12]);
105     result[7] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[8], in[10]);
106     result[8] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[4], in[9]);
107     result[8] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[8], in[10]);
108     result[9] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[9], in[11]);
109     result[10] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[5], in[10]);
110     result[10] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(result[10], in[11]);
111     result[11] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[10], in[12]);
112     result[12] = PQCLEAN_MCELIECE6960119_AVX_vec256_xor(in[6], t);
113 
114     for (i = 0; i < GFBITS; i++) {
115         out[i] = result[i];
116     }
117 }
118 
119 /* bitsliced field inverses */
PQCLEAN_MCELIECE6960119_AVX_vec256_inv(vec256 * out,const vec256 * in)120 void PQCLEAN_MCELIECE6960119_AVX_vec256_inv(vec256 *out, const vec256 *in) {
121     vec256 tmp_11[ GFBITS ];
122     vec256 tmp_1111[ GFBITS ];
123 
124     PQCLEAN_MCELIECE6960119_AVX_vec256_copy(out, in);
125 
126     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
127     PQCLEAN_MCELIECE6960119_AVX_vec256_mul(tmp_11, out, in); // ^11
128 
129     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, tmp_11);
130     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
131     PQCLEAN_MCELIECE6960119_AVX_vec256_mul(tmp_1111, out, tmp_11); // ^1111
132 
133     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, tmp_1111);
134     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
135     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
136     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
137     PQCLEAN_MCELIECE6960119_AVX_vec256_mul(out, out, tmp_1111); // ^11111111
138 
139     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
140     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
141     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
142     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out);
143     PQCLEAN_MCELIECE6960119_AVX_vec256_mul(out, out, tmp_1111); // ^111111111111
144 
145     PQCLEAN_MCELIECE6960119_AVX_vec256_sq(out, out); // ^1111111111110
146 }
147