1 #include "inner.h"
2
3 /*
4 * Support functions for signatures (hash-to-point, norm).
5 *
6 * ==========================(LICENSE BEGIN)============================
7 *
8 * Copyright (c) 2017-2019 Falcon Project
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining
11 * a copy of this software and associated documentation files (the
12 * "Software"), to deal in the Software without restriction, including
13 * without limitation the rights to use, copy, modify, merge, publish,
14 * distribute, sublicense, and/or sell copies of the Software, and to
15 * permit persons to whom the Software is furnished to do so, subject to
16 * the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 * ===========================(LICENSE END)=============================
30 *
31 * @author Thomas Pornin <thomas.pornin@nccgroup.com>
32 */
33
34
35 /* see inner.h */
36 void
PQCLEAN_FALCON512_AVX2_hash_to_point_vartime(inner_shake256_context * sc,uint16_t * x,unsigned logn)37 PQCLEAN_FALCON512_AVX2_hash_to_point_vartime(
38 inner_shake256_context *sc,
39 uint16_t *x, unsigned logn) {
40 /*
41 * This is the straightforward per-the-spec implementation. It
42 * is not constant-time, thus it might reveal information on the
43 * plaintext (at least, enough to check the plaintext against a
44 * list of potential plaintexts) in a scenario where the
45 * attacker does not have access to the signature value or to
46 * the public key, but knows the nonce (without knowledge of the
47 * nonce, the hashed output cannot be matched against potential
48 * plaintexts).
49 */
50 size_t n;
51
52 n = (size_t)1 << logn;
53 while (n > 0) {
54 uint8_t buf[2];
55 uint32_t w;
56
57 inner_shake256_extract(sc, (void *)buf, sizeof buf);
58 w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
59 if (w < 61445) {
60 while (w >= 12289) {
61 w -= 12289;
62 }
63 *x ++ = (uint16_t)w;
64 n --;
65 }
66 }
67 }
68
69 /* see inner.h */
70 void
PQCLEAN_FALCON512_AVX2_hash_to_point_ct(inner_shake256_context * sc,uint16_t * x,unsigned logn,uint8_t * tmp)71 PQCLEAN_FALCON512_AVX2_hash_to_point_ct(
72 inner_shake256_context *sc,
73 uint16_t *x, unsigned logn, uint8_t *tmp) {
74 /*
75 * Each 16-bit sample is a value in 0..65535. The value is
76 * kept if it falls in 0..61444 (because 61445 = 5*12289)
77 * and rejected otherwise; thus, each sample has probability
78 * about 0.93758 of being selected.
79 *
80 * We want to oversample enough to be sure that we will
81 * have enough values with probability at least 1 - 2^(-256).
82 * Depending on degree N, this leads to the following
83 * required oversampling:
84 *
85 * logn n oversampling
86 * 1 2 65
87 * 2 4 67
88 * 3 8 71
89 * 4 16 77
90 * 5 32 86
91 * 6 64 100
92 * 7 128 122
93 * 8 256 154
94 * 9 512 205
95 * 10 1024 287
96 *
97 * If logn >= 7, then the provided temporary buffer is large
98 * enough. Otherwise, we use a stack buffer of 63 entries
99 * (i.e. 126 bytes) for the values that do not fit in tmp[].
100 */
101
102 static const uint16_t overtab[] = {
103 0, /* unused */
104 65,
105 67,
106 71,
107 77,
108 86,
109 100,
110 122,
111 154,
112 205,
113 287
114 };
115
116 unsigned n, n2, u, m, p, over;
117 uint16_t *tt1, tt2[63];
118
119 /*
120 * We first generate m 16-bit value. Values 0..n-1 go to x[].
121 * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
122 * We also reduce modulo q the values; rejected values are set
123 * to 0xFFFF.
124 */
125 n = 1U << logn;
126 n2 = n << 1;
127 over = overtab[logn];
128 m = n + over;
129 tt1 = (uint16_t *)tmp;
130 for (u = 0; u < m; u ++) {
131 uint8_t buf[2];
132 uint32_t w, wr;
133
134 inner_shake256_extract(sc, buf, sizeof buf);
135 w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
136 wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
137 wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
138 wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
139 wr |= ((w - 61445) >> 31) - 1;
140 if (u < n) {
141 x[u] = (uint16_t)wr;
142 } else if (u < n2) {
143 tt1[u - n] = (uint16_t)wr;
144 } else {
145 tt2[u - n2] = (uint16_t)wr;
146 }
147 }
148
149 /*
150 * Now we must "squeeze out" the invalid values. We do this in
151 * a logarithmic sequence of passes; each pass computes where a
152 * value should go, and moves it down by 'p' slots if necessary,
153 * where 'p' uses an increasing powers-of-two scale. It can be
154 * shown that in all cases where the loop decides that a value
155 * has to be moved down by p slots, the destination slot is
156 * "free" (i.e. contains an invalid value).
157 */
158 for (p = 1; p <= over; p <<= 1) {
159 unsigned v;
160
161 /*
162 * In the loop below:
163 *
164 * - v contains the index of the final destination of
165 * the value; it is recomputed dynamically based on
166 * whether values are valid or not.
167 *
168 * - u is the index of the value we consider ("source");
169 * its address is s.
170 *
171 * - The loop may swap the value with the one at index
172 * u-p. The address of the swap destination is d.
173 */
174 v = 0;
175 for (u = 0; u < m; u ++) {
176 uint16_t *s, *d;
177 unsigned j, sv, dv, mk;
178
179 if (u < n) {
180 s = &x[u];
181 } else if (u < n2) {
182 s = &tt1[u - n];
183 } else {
184 s = &tt2[u - n2];
185 }
186 sv = *s;
187
188 /*
189 * The value in sv should ultimately go to
190 * address v, i.e. jump back by u-v slots.
191 */
192 j = u - v;
193
194 /*
195 * We increment v for the next iteration, but
196 * only if the source value is valid. The mask
197 * 'mk' is -1 if the value is valid, 0 otherwise,
198 * so we _subtract_ mk.
199 */
200 mk = (sv >> 15) - 1U;
201 v -= mk;
202
203 /*
204 * In this loop we consider jumps by p slots; if
205 * u < p then there is nothing more to do.
206 */
207 if (u < p) {
208 continue;
209 }
210
211 /*
212 * Destination for the swap: value at address u-p.
213 */
214 if ((u - p) < n) {
215 d = &x[u - p];
216 } else if ((u - p) < n2) {
217 d = &tt1[(u - p) - n];
218 } else {
219 d = &tt2[(u - p) - n2];
220 }
221 dv = *d;
222
223 /*
224 * The swap should be performed only if the source
225 * is valid AND the jump j has its 'p' bit set.
226 */
227 mk &= -(((j & p) + 0x1FF) >> 9);
228
229 *s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
230 *d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
231 }
232 }
233 }
234
235 /* see inner.h */
236 int
PQCLEAN_FALCON512_AVX2_is_short(const int16_t * s1,const int16_t * s2,unsigned logn)237 PQCLEAN_FALCON512_AVX2_is_short(
238 const int16_t *s1, const int16_t *s2, unsigned logn) {
239 /*
240 * We use the l2-norm. Code below uses only 32-bit operations to
241 * compute the square of the norm with saturation to 2^32-1 if
242 * the value exceeds 2^31-1.
243 */
244 size_t n, u;
245 uint32_t s, ng;
246
247 n = (size_t)1 << logn;
248 s = 0;
249 ng = 0;
250 for (u = 0; u < n; u ++) {
251 int32_t z;
252
253 z = s1[u];
254 s += (uint32_t)(z * z);
255 ng |= s;
256 z = s2[u];
257 s += (uint32_t)(z * z);
258 ng |= s;
259 }
260 s |= -(ng >> 31);
261
262 /*
263 * Acceptance bound on the l2-norm is:
264 * 1.2*1.55*sqrt(q)*sqrt(2*N)
265 * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
266 */
267 return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
268 }
269
270 /* see inner.h */
271 int
PQCLEAN_FALCON512_AVX2_is_short_half(uint32_t sqn,const int16_t * s2,unsigned logn)272 PQCLEAN_FALCON512_AVX2_is_short_half(
273 uint32_t sqn, const int16_t *s2, unsigned logn) {
274 size_t n, u;
275 uint32_t ng;
276
277 n = (size_t)1 << logn;
278 ng = -(sqn >> 31);
279 for (u = 0; u < n; u ++) {
280 int32_t z;
281
282 z = s2[u];
283 sqn += (uint32_t)(z * z);
284 ng |= sqn;
285 }
286 sqn |= -(ng >> 31);
287
288 /*
289 * Acceptance bound on the l2-norm is:
290 * 1.2*1.55*sqrt(q)*sqrt(2*N)
291 * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
292 */
293 return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
294 }
295