1 #include "gf.h"
2 #include "parameters.h"
3 #include <stdint.h>
4 /**
5 * @file gf.c
6 * Galois field implementation with multiplication using the pclmulqdq instruction
7 */
8
9
10 static uint16_t gf_reduce(uint64_t x, size_t deg_x);
11
12
13
14 /**
15 * Reduces polynomial x modulo primitive polynomial GF_POLY.
16 * @returns x mod GF_POLY
17 * @param[in] x Polynomial of degree less than 64
18 * @param[in] deg_x The degree of polynomial x
19 */
gf_reduce(uint64_t x,size_t deg_x)20 static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
21 uint16_t z1, z2, rmdr, dist;
22 uint64_t mod;
23 size_t steps, i, j;
24
25 // Deduce the number of steps of reduction
26 steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);
27
28 // Reduce
29 for (i = 0; i < steps; ++i) {
30 mod = x >> PARAM_M;
31 x &= (1 << PARAM_M) - 1;
32 x ^= mod;
33
34 z1 = 0;
35 rmdr = PARAM_GF_POLY ^ 1;
36 for (j = PARAM_GF_POLY_WT - 2; j; --j) {
37 z2 = __tzcnt_u16(rmdr);
38 dist = (uint16_t) (z2 - z1);
39 mod <<= dist;
40 x ^= mod;
41 rmdr ^= 1 << z2;
42 z1 = z2;
43 }
44 }
45
46 return x;
47 }
48
49
50
51 /**
52 * Multiplies two elements of GF(2^GF_M).
53 * @returns the product a*b
54 * @param[in] a Element of GF(2^GF_M)
55 * @param[in] b Element of GF(2^GF_M)
56 */
PQCLEAN_HQCRMRS256_AVX2_gf_mul(uint16_t a,uint16_t b)57 uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mul(uint16_t a, uint16_t b) {
58 __m128i va = _mm_cvtsi32_si128(a);
59 __m128i vb = _mm_cvtsi32_si128(b);
60 __m128i vab = _mm_clmulepi64_si128(va, vb, 0);
61 uint32_t ab = _mm_cvtsi128_si32(vab);
62
63 return gf_reduce(ab, 2 * (PARAM_M - 1));
64 }
65
66
67
68 /**
69 * Compute 16 products in GF(2^GF_M).
70 * @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
71 * @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
72 * @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
73 *
74 */
PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(__m256i a,__m256i b)75 __m256i PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(__m256i a, __m256i b) {
76 __m128i al = _mm256_extractf128_si256(a, 0);
77 __m128i ah = _mm256_extractf128_si256(a, 1);
78 __m128i bl = _mm256_extractf128_si256(b, 0);
79 __m128i bh = _mm256_extractf128_si256(b, 1);
80
81 __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
82 abl0 &= CONST128_MIDDLEMASKL;
83 abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);
84
85 __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
86 abh0 &= CONST128_MIDDLEMASKL;
87 abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);
88
89 abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
90 abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);
91
92 __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
93 abl1 &= CONST128_MIDDLEMASKL;
94 abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);
95
96 __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
97 abh1 &= CONST128_MIDDLEMASKL;
98 abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);
99
100 abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
101 abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);
102
103 __m256i ret = _mm256_set_m128i(abl1, abl0);
104
105 __m256i aux = CONST256_MR0;
106
107 for (int32_t i = 0; i < 7; i++) {
108 ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
109 aux = aux << 1;
110 }
111
112 ret &= CONST256_LASTMASK;
113 return ret;
114 }
115
116
117
118 /**
119 * Squares an element of GF(2^GF_M).
120 * @returns a^2
121 * @param[in] a Element of GF(2^GF_M)
122 */
PQCLEAN_HQCRMRS256_AVX2_gf_square(uint16_t a)123 uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_square(uint16_t a) {
124 uint32_t b = a;
125 uint32_t s = b & 1;
126 for (size_t i = 1; i < PARAM_M; ++i) {
127 b <<= 1;
128 s ^= b & (1 << 2 * i);
129 }
130
131 return gf_reduce(s, 2 * (PARAM_M - 1));
132 }
133
134
135
136 /**
137 * Computes the inverse of an element of GF(2^8),
138 * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
139 * @returns the inverse of a
140 * @param[in] a Element of GF(2^GF_M)
141 */
PQCLEAN_HQCRMRS256_AVX2_gf_inverse(uint16_t a)142 uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_inverse(uint16_t a) {
143 uint16_t inv = a;
144 uint16_t tmp1, tmp2;
145
146 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(a); /* a^2 */
147 tmp1 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, a); /* a^3 */
148 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^4 */
149 tmp2 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp1); /* a^7 */
150 tmp1 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp2); /* a^11 */
151 inv = PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp1, inv); /* a^15 */
152 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^30 */
153 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^60 */
154 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^120 */
155 inv = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp2); /* a^127 */
156 inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^254 */
157 return inv;
158 }
159
160
161
162 /**
163 * Returns i modulo 2^GF_M-1.
164 * i must be less than 2*(2^GF_M-1).
165 * Therefore, the return value is either i or i-2^GF_M+1.
166 * @returns i mod (2^GF_M-1)
167 * @param[in] i The integer whose modulo is taken
168 */
PQCLEAN_HQCRMRS256_AVX2_gf_mod(uint16_t i)169 uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mod(uint16_t i) {
170 uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);
171
172 // mask = 0xffff if (i < GF_MUL_ORDER)
173 uint16_t mask = -(tmp >> 15);
174
175 return tmp + (mask & PARAM_GF_MUL_ORDER);
176 }
177