1 #include "gf.h" 2 #include "parameters.h" 3 #include <stdint.h> 4 /** 5 * @file gf.c 6 * Galois field implementation with multiplication using the pclmulqdq instruction 7 */ 8 9 10 static uint16_t gf_reduce(uint64_t x, size_t deg_x); 11 12 13 14 /** 15 * Reduces polynomial x modulo primitive polynomial GF_POLY. 16 * @returns x mod GF_POLY 17 * @param[in] x Polynomial of degree less than 64 18 * @param[in] deg_x The degree of polynomial x 19 */ 20 static uint16_t gf_reduce(uint64_t x, size_t deg_x) { 21 uint16_t z1, z2, rmdr, dist; 22 uint64_t mod; 23 size_t steps, i, j; 24 25 // Deduce the number of steps of reduction 26 steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); 27 28 // Reduce 29 for (i = 0; i < steps; ++i) { 30 mod = x >> PARAM_M; 31 x &= (1 << PARAM_M) - 1; 32 x ^= mod; 33 34 z1 = 0; 35 rmdr = PARAM_GF_POLY ^ 1; 36 for (j = PARAM_GF_POLY_WT - 2; j; --j) { 37 z2 = __tzcnt_u16(rmdr); 38 dist = (uint16_t) (z2 - z1); 39 mod <<= dist; 40 x ^= mod; 41 rmdr ^= 1 << z2; 42 z1 = z2; 43 } 44 } 45 46 return x; 47 } 48 49 50 51 /** 52 * Multiplies two elements of GF(2^GF_M). 53 * @returns the product a*b 54 * @param[in] a Element of GF(2^GF_M) 55 * @param[in] b Element of GF(2^GF_M) 56 */ 57 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b) { 58 __m128i va = _mm_cvtsi32_si128(a); 59 __m128i vb = _mm_cvtsi32_si128(b); 60 __m128i vab = _mm_clmulepi64_si128(va, vb, 0); 61 uint32_t ab = _mm_cvtsi128_si32(vab); 62 63 return gf_reduce(ab, 2 * (PARAM_M - 1)); 64 } 65 66 67 68 /** 69 * Compute 16 products in GF(2^GF_M). 70 * @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M) 71 * @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers 72 * @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer 73 * 74 */ 75 __m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b) { 76 __m128i al = _mm256_extractf128_si256(a, 0); 77 __m128i ah = _mm256_extractf128_si256(a, 1); 78 __m128i bl = _mm256_extractf128_si256(b, 0); 79 __m128i bh = _mm256_extractf128_si256(b, 1); 80 81 __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0); 82 abl0 &= CONST128_MIDDLEMASKL; 83 abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); 84 85 __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11); 86 abh0 &= CONST128_MIDDLEMASKL; 87 abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); 88 89 abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL); 90 abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH); 91 92 __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0); 93 abl1 &= CONST128_MIDDLEMASKL; 94 abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); 95 96 __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11); 97 abh1 &= CONST128_MIDDLEMASKL; 98 abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); 99 100 abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL); 101 abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH); 102 103 __m256i ret = _mm256_set_m128i(abl1, abl0); 104 105 __m256i aux = CONST256_MR0; 106 107 for (int32_t i = 0; i < 7; i++) { 108 ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux); 109 aux = aux << 1; 110 } 111 112 ret &= CONST256_LASTMASK; 113 return ret; 114 } 115 116 117 118 /** 119 * Squares an element of GF(2^GF_M). 120 * @returns a^2 121 * @param[in] a Element of GF(2^GF_M) 122 */ 123 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a) { 124 uint32_t b = a; 125 uint32_t s = b & 1; 126 for (size_t i = 1; i < PARAM_M; ++i) { 127 b <<= 1; 128 s ^= b & (1 << 2 * i); 129 } 130 131 return gf_reduce(s, 2 * (PARAM_M - 1)); 132 } 133 134 135 136 /** 137 * Computes the inverse of an element of GF(2^8), 138 * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254 139 * @returns the inverse of a 140 * @param[in] a Element of GF(2^GF_M) 141 */ 142 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a) { 143 uint16_t inv = a; 144 uint16_t tmp1, tmp2; 145 146 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(a); /* a^2 */ 147 tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, a); /* a^3 */ 148 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^4 */ 149 tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp1); /* a^7 */ 150 tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^11 */ 151 inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, inv); /* a^15 */ 152 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^30 */ 153 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^60 */ 154 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^120 */ 155 inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^127 */ 156 inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^254 */ 157 return inv; 158 } 159 160 161 162 /** 163 * Returns i modulo 2^GF_M-1. 164 * i must be less than 2*(2^GF_M-1). 165 * Therefore, the return value is either i or i-2^GF_M+1. 166 * @returns i mod (2^GF_M-1) 167 * @param[in] i The integer whose modulo is taken 168 */ 169 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i) { 170 uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); 171 172 // mask = 0xffff if (i < GF_MUL_ORDER) 173 uint16_t mask = -(tmp >> 15); 174 175 return tmp + (mask & PARAM_GF_MUL_ORDER); 176 } 177