1 /**
2 * @file ed448goldilocks/decaf.c
3 * @author Mike Hamburg
4 *
5 * @copyright
6 * Copyright (c) 2015-2016 Cryptography Research, Inc. \n
7 * Released under the MIT License. See LICENSE.txt for license information.
8 *
9 * @brief Decaf high-level functions.
10 *
11 * @warning This file was automatically generated in Python.
12 * Please do not edit it.
13 */
14 #define _XOPEN_SOURCE 600 /* for posix_memalign */
15 #include "word.h"
16 #include "field.h"
17
18 #include <decaf.h>
19 #include <decaf/ed448.h>
20
21 /* MSVC has no builtint ctz, this is a fix as in
22 https://stackoverflow.com/questions/355967/how-to-use-msvc-intrinsics-to-get-the-equivalent-of-this-gcc-code/5468852#5468852
23 */
24 #ifdef _MSC_VER
25 #include <intrin.h>
26
ctz(uint32_t value)27 uint32_t __inline ctz(uint32_t value)
28 {
29 DWORD trailing_zero = 0;
30 if ( _BitScanForward( &trailing_zero, value ) )
31 return trailing_zero;
32 else
33 return 32; // This is undefined, I better choose 32 than 0
34 }
35 #define __builtin_ctz(x) ctz(x)
36 #endif
37
38 /* Template stuff */
39 #define API_NS(_id) decaf_448_##_id
40 #define SCALAR_BITS DECAF_448_SCALAR_BITS
41 #define SCALAR_SER_BYTES DECAF_448_SCALAR_BYTES
42 #define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
43 #define scalar_t API_NS(scalar_t)
44 #define point_t API_NS(point_t)
45 #define precomputed_s API_NS(precomputed_s)
46 #define IMAGINE_TWIST 0
47 #define COFACTOR 4
48
49 /* Comb config: number of combs, n, t, s. */
50 #define COMBS_N 5
51 #define COMBS_T 5
52 #define COMBS_S 18
53 #define DECAF_WINDOW_BITS 5
54 #define DECAF_WNAF_FIXED_TABLE_BITS 5
55 #define DECAF_WNAF_VAR_TABLE_BITS 3
56
57 #define EDDSA_USE_SIGMA_ISOGENY 0
58
59 static const int EDWARDS_D = -39081;
60 static const scalar_t point_scalarmul_adjustment = {{{
61 SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
62 }}}, precomputed_scalarmul_adjustment = {{{
63 SC_LIMB(0xc873d6d54a7bb0cf), SC_LIMB(0xe933d8d723a70aad), SC_LIMB(0xbb124b65129c96fd), SC_LIMB(0x00000008335dc163)
64 }}};
65
66 const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES] = { 0x05 };
67
68 #define RISTRETTO_FACTOR DECAF_448_RISTRETTO_FACTOR
69 const gf RISTRETTO_FACTOR = {FIELD_LITERAL(
70 0x42ef0f45572736, 0x7bf6aa20ce5296, 0xf4fd6eded26033, 0x968c14ba839a66, 0xb8d54b64a2d780, 0x6aa0a1f1a7b8a5, 0x683bf68d722fa2, 0x22d962fbeb24f7
71 )};
72
73 #if IMAGINE_TWIST
74 #define TWISTED_D (-(EDWARDS_D))
75 #else
76 #define TWISTED_D ((EDWARDS_D)-1)
77 #endif
78
79 #if TWISTED_D < 0
80 #define EFF_D (-(TWISTED_D))
81 #define NEG_D 1
82 #else
83 #define EFF_D TWISTED_D
84 #define NEG_D 0
85 #endif
86
87 /* End of template stuff */
88
89 /* Sanity */
90 #if (COFACTOR == 8) && !IMAGINE_TWIST && !UNSAFE_CURVE_HAS_POINTS_AT_INFINITY
91 /* FUTURE MAGIC: Curve41417 doesn't have these properties. */
92 #error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
93 /* OK, but why?
94 * Two reasons: #1: There are bugs when COFACTOR == && IMAGINE_TWIST
95 # #2:
96 */
97 #endif
98
99 #if IMAGINE_TWIST && (P_MOD_8 != 5)
100 #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
101 #endif
102
103 #if (COFACTOR != 8) && (COFACTOR != 4)
104 #error "COFACTOR must be 4 or 8"
105 #endif
106
107 #if IMAGINE_TWIST
108 extern const gf SQRT_MINUS_ONE;
109 #endif
110
111 #define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
112
113 extern const point_t API_NS(point_base);
114
115 /* Projective Niels coordinates */
116 typedef struct { gf a, b, c; } niels_s, niels_t[1];
117 typedef struct { niels_t n; gf z; } VECTOR_ALIGNED pniels_s, pniels_t[1];
118
119 /* Precomputed base */
120 struct precomputed_s { niels_t table [COMBS_N<<(COMBS_T-1)]; };
121
122 extern const gf API_NS(precomputed_base_as_fe)[];
123 const precomputed_s *API_NS(precomputed_base) =
124 (const precomputed_s *) &API_NS(precomputed_base_as_fe);
125
126 const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
127 const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);
128
129 /** Inverse. */
130 static void
gf_invert(gf y,const gf x,int assert_nonzero)131 gf_invert(gf y, const gf x, int assert_nonzero) {
132 gf t1, t2;
133 gf_sqr(t1, x); // o^2
134 mask_t ret = gf_isr(t2, t1); // +-1/sqrt(o^2) = +-1/o
135 (void)ret;
136 if (assert_nonzero) assert(ret);
137 gf_sqr(t1, t2);
138 gf_mul(t2, t1, x); // not direct to y in case of alias.
139 gf_copy(y, t2);
140 }
141
142 /** identity = (0,1) */
143 const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
144
145 /* Predeclare because not static: called by elligator */
146 void API_NS(deisogenize) (
147 gf_s *__restrict__ s,
148 gf_s *__restrict__ inv_el_sum,
149 gf_s *__restrict__ inv_el_m1,
150 const point_t p,
151 mask_t toggle_s,
152 mask_t toggle_altx,
153 mask_t toggle_rotation
154 );
155
API_NS(deisogenize)156 void API_NS(deisogenize) (
157 gf_s *__restrict__ s,
158 gf_s *__restrict__ inv_el_sum,
159 gf_s *__restrict__ inv_el_m1,
160 const point_t p,
161 mask_t toggle_s,
162 mask_t toggle_altx,
163 mask_t toggle_rotation
164 ) {
165 #if COFACTOR == 4 && !IMAGINE_TWIST
166 (void)toggle_rotation; /* Only applies to cofactor 8 */
167 gf t1;
168 gf_s *t2 = s, *t3=inv_el_sum, *t4=inv_el_m1;
169
170 gf_add(t1,p->x,p->t);
171 gf_sub(t2,p->x,p->t);
172 gf_mul(t3,t1,t2); /* t3 = num */
173 gf_sqr(t2,p->x);
174 gf_mul(t1,t2,t3);
175 gf_mulw(t2,t1,-1-TWISTED_D); /* -x^2 * (a-d) * num */
176 gf_isr(t1,t2); /* t1 = isr */
177 gf_mul(t2,t1,t3); /* t2 = ratio */
178 gf_mul(t4,t2,RISTRETTO_FACTOR);
179 mask_t negx = gf_lobit(t4) ^ toggle_altx;
180 gf_cond_neg(t2, negx);
181 gf_mul(t3,t2,p->z);
182 gf_sub(t3,t3,p->t);
183 gf_mul(t2,t3,p->x);
184 gf_mulw(t4,t2,-1-TWISTED_D);
185 gf_mul(s,t4,t1);
186 mask_t lobs = gf_lobit(s);
187 gf_cond_neg(s,lobs);
188 gf_copy(inv_el_m1,p->x);
189 gf_cond_neg(inv_el_m1,~lobs^negx^toggle_s);
190 gf_add(inv_el_m1,inv_el_m1,p->t);
191
192 #elif COFACTOR == 8 && IMAGINE_TWIST
193 /* More complicated because of rotation */
194 gf t1,t2,t3,t4,t5;
195 gf_add(t1,p->z,p->y);
196 gf_sub(t2,p->z,p->y);
197 gf_mul(t3,t1,t2); /* t3 = num */
198 gf_mul(t2,p->x,p->y); /* t2 = den */
199 gf_sqr(t1,t2);
200 gf_mul(t4,t1,t3);
201 gf_mulw(t1,t4,-1-TWISTED_D);
202 gf_isr(t4,t1); /* isqrt(num*(a-d)*den^2) */
203 gf_mul(t1,t2,t4);
204 gf_mul(t2,t1,RISTRETTO_FACTOR); /* t2 = "iden" in ristretto.sage */
205 gf_mul(t1,t3,t4); /* t1 = "inum" in ristretto.sage */
206
207 /* Calculate altxy = iden*inum*i*t^2*(d-a) */
208 gf_mul(t3,t1,t2);
209 gf_mul_i(t4,t3);
210 gf_mul(t3,t4,p->t);
211 gf_mul(t4,t3,p->t);
212 gf_mulw(t3,t4,TWISTED_D+1); /* iden*inum*i*t^2*(d-1) */
213 mask_t rotate = toggle_rotation ^ gf_lobit(t3);
214
215 /* Rotate if altxy is negative */
216 gf_cond_swap(t1,t2,rotate);
217 gf_mul_i(t4,p->x);
218 gf_cond_sel(t4,p->y,t4,rotate); /* t4 = "fac" = ix if rotate, else y */
219
220 gf_mul_i(t5,RISTRETTO_FACTOR); /* t5 = imi */
221 gf_mul(t3,t5,t2); /* iden * imi */
222 gf_mul(t2,t5,t1);
223 gf_mul(t5,t2,p->t); /* "altx" = iden*imi*t */
224 mask_t negx = gf_lobit(t5) ^ toggle_altx;
225
226 gf_cond_neg(t1,negx^rotate);
227 gf_mul(t2,t1,p->z);
228 gf_add(t2,t2,ONE);
229 gf_mul(inv_el_sum,t2,t4);
230 gf_mul(s,inv_el_sum,t3);
231
232 mask_t negs = gf_lobit(s);
233 gf_cond_neg(s,negs);
234
235 mask_t negz = ~negs ^ toggle_s ^ negx;
236 gf_copy(inv_el_m1,p->z);
237 gf_cond_neg(inv_el_m1,negz);
238 gf_sub(inv_el_m1,inv_el_m1,t4);
239 #else
240 #error "Cofactor must be 4 (with no IMAGINE_TWIST) or 8 (with IMAGINE_TWIST)"
241 #endif
242 }
243
API_NS(point_encode)244 void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
245 gf s,ie1,ie2;
246 API_NS(deisogenize)(s,ie1,ie2,p,0,0,0);
247 gf_serialize(ser,s,1);
248 }
249
API_NS(point_decode)250 decaf_error_t API_NS(point_decode) (
251 point_t p,
252 const unsigned char ser[SER_BYTES],
253 decaf_bool_t allow_identity
254 ) {
255 gf s, s2, num, tmp;
256 gf_s *tmp2=s2, *ynum=p->z, *isr=p->x, *den=p->t;
257
258 mask_t succ = gf_deserialize(s, ser, 1, 0);
259 succ &= bool_to_mask(allow_identity) | ~gf_eq(s, ZERO);
260 succ &= ~gf_lobit(s);
261
262 gf_sqr(s2,s); /* s^2 = -as^2 */
263 #if IMAGINE_TWIST
264 gf_sub(s2,ZERO,s2); /* -as^2 */
265 #endif
266 gf_sub(den,ONE,s2); /* 1+as^2 */
267 gf_add(ynum,ONE,s2); /* 1-as^2 */
268 gf_mulw(num,s2,-4*TWISTED_D);
269 gf_sqr(tmp,den); /* tmp = den^2 */
270 gf_add(num,tmp,num); /* num = den^2 - 4*d*s^2 */
271 gf_mul(tmp2,num,tmp); /* tmp2 = num*den^2 */
272 succ &= gf_isr(isr,tmp2); /* isr = 1/sqrt(num*den^2) */
273 gf_mul(tmp,isr,den); /* isr*den */
274 gf_mul(p->y,tmp,ynum); /* isr*den*(1-as^2) */
275 gf_mul(tmp2,tmp,s); /* s*isr*den */
276 gf_add(tmp2,tmp2,tmp2); /* 2*s*isr*den */
277 gf_mul(tmp,tmp2,isr); /* 2*s*isr^2*den */
278 gf_mul(p->x,tmp,num); /* 2*s*isr^2*den*num */
279 gf_mul(tmp,tmp2,RISTRETTO_FACTOR); /* 2*s*isr*den*magic */
280 gf_cond_neg(p->x,gf_lobit(tmp)); /* flip x */
281
282 #if COFACTOR==8
283 /* Additionally check y != 0 and x*y*isomagic nonegative */
284 succ &= ~gf_eq(p->y,ZERO);
285 gf_mul(tmp,p->x,p->y);
286 gf_mul(tmp2,tmp,RISTRETTO_FACTOR);
287 succ &= ~gf_lobit(tmp2);
288 #endif
289
290 #if IMAGINE_TWIST
291 gf_copy(tmp,p->x);
292 gf_mul_i(p->x,tmp);
293 #endif
294
295 /* Fill in z and t */
296 gf_copy(p->z,ONE);
297 gf_mul(p->t,p->x,p->y);
298
299 assert(API_NS(point_valid)(p) | ~succ);
300 return decaf_succeed_if(mask_to_bool(succ));
301 }
302
API_NS(point_sub)303 void API_NS(point_sub) (
304 point_t p,
305 const point_t q,
306 const point_t r
307 ) {
308 gf a, b, c, d;
309 gf_sub_nr ( b, q->y, q->x ); /* 3+e */
310 gf_sub_nr ( d, r->y, r->x ); /* 3+e */
311 gf_add_nr ( c, r->y, r->x ); /* 2+e */
312 gf_mul ( a, c, b );
313 gf_add_nr ( b, q->y, q->x ); /* 2+e */
314 gf_mul ( p->y, d, b );
315 gf_mul ( b, r->t, q->t );
316 gf_mulw ( p->x, b, 2*EFF_D );
317 gf_add_nr ( b, a, p->y ); /* 2+e */
318 gf_sub_nr ( c, p->y, a ); /* 3+e */
319 gf_mul ( a, q->z, r->z );
320 gf_add_nr ( a, a, a ); /* 2+e */
321 if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
322 #if NEG_D
323 gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
324 gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
325 #else
326 gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
327 gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
328 #endif
329 gf_mul ( p->z, a, p->y );
330 gf_mul ( p->x, p->y, c );
331 gf_mul ( p->y, a, b );
332 gf_mul ( p->t, b, c );
333 }
334
API_NS(point_add)335 void API_NS(point_add) (
336 point_t p,
337 const point_t q,
338 const point_t r
339 ) {
340 gf a, b, c, d;
341 gf_sub_nr ( b, q->y, q->x ); /* 3+e */
342 gf_sub_nr ( c, r->y, r->x ); /* 3+e */
343 gf_add_nr ( d, r->y, r->x ); /* 2+e */
344 gf_mul ( a, c, b );
345 gf_add_nr ( b, q->y, q->x ); /* 2+e */
346 gf_mul ( p->y, d, b );
347 gf_mul ( b, r->t, q->t );
348 gf_mulw ( p->x, b, 2*EFF_D );
349 gf_add_nr ( b, a, p->y ); /* 2+e */
350 gf_sub_nr ( c, p->y, a ); /* 3+e */
351 gf_mul ( a, q->z, r->z );
352 gf_add_nr ( a, a, a ); /* 2+e */
353 if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
354 #if NEG_D
355 gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
356 gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
357 #else
358 gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
359 gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
360 #endif
361 gf_mul ( p->z, a, p->y );
362 gf_mul ( p->x, p->y, c );
363 gf_mul ( p->y, a, b );
364 gf_mul ( p->t, b, c );
365 }
366
367 static DECAF_NOINLINE void
point_double_internal(point_t p,const point_t q,int before_double)368 point_double_internal (
369 point_t p,
370 const point_t q,
371 int before_double
372 ) {
373 gf a, b, c, d;
374 gf_sqr ( c, q->x );
375 gf_sqr ( a, q->y );
376 gf_add_nr ( d, c, a ); /* 2+e */
377 gf_add_nr ( p->t, q->y, q->x ); /* 2+e */
378 gf_sqr ( b, p->t );
379 gf_subx_nr ( b, b, d, 3 ); /* 4+e */
380 gf_sub_nr ( p->t, a, c ); /* 3+e */
381 gf_sqr ( p->x, q->z );
382 gf_add_nr ( p->z, p->x, p->x ); /* 2+e */
383 gf_subx_nr ( a, p->z, p->t, 4 ); /* 6+e */
384 if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
385 gf_mul ( p->x, a, b );
386 gf_mul ( p->z, p->t, a );
387 gf_mul ( p->y, p->t, d );
388 if (!before_double) gf_mul ( p->t, b, d );
389 }
390
API_NS(point_double)391 void API_NS(point_double)(point_t p, const point_t q) {
392 point_double_internal(p,q,0);
393 }
394
API_NS(point_negate)395 void API_NS(point_negate) (
396 point_t nega,
397 const point_t a
398 ) {
399 gf_sub(nega->x, ZERO, a->x);
400 gf_copy(nega->y, a->y);
401 gf_copy(nega->z, a->z);
402 gf_sub(nega->t, ZERO, a->t);
403 }
404
405 /* Operations on [p]niels */
406 static DECAF_INLINE void
cond_neg_niels(niels_t n,mask_t neg)407 cond_neg_niels (
408 niels_t n,
409 mask_t neg
410 ) {
411 gf_cond_swap(n->a, n->b, neg);
412 gf_cond_neg(n->c, neg);
413 }
414
pt_to_pniels(pniels_t b,const point_t a)415 static DECAF_NOINLINE void pt_to_pniels (
416 pniels_t b,
417 const point_t a
418 ) {
419 gf_sub ( b->n->a, a->y, a->x );
420 gf_add ( b->n->b, a->x, a->y );
421 gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
422 gf_add ( b->z, a->z, a->z );
423 }
424
pniels_to_pt(point_t e,const pniels_t d)425 static DECAF_NOINLINE void pniels_to_pt (
426 point_t e,
427 const pniels_t d
428 ) {
429 gf eu;
430 gf_add ( eu, d->n->b, d->n->a );
431 gf_sub ( e->y, d->n->b, d->n->a );
432 gf_mul ( e->t, e->y, eu);
433 gf_mul ( e->x, d->z, e->y );
434 gf_mul ( e->y, d->z, eu );
435 gf_sqr ( e->z, d->z );
436 }
437
438 static DECAF_NOINLINE void
niels_to_pt(point_t e,const niels_t n)439 niels_to_pt (
440 point_t e,
441 const niels_t n
442 ) {
443 gf_add ( e->y, n->b, n->a );
444 gf_sub ( e->x, n->b, n->a );
445 gf_mul ( e->t, e->y, e->x );
446 gf_copy ( e->z, ONE );
447 }
448
449 static DECAF_NOINLINE void
add_niels_to_pt(point_t d,const niels_t e,int before_double)450 add_niels_to_pt (
451 point_t d,
452 const niels_t e,
453 int before_double
454 ) {
455 gf a, b, c;
456 gf_sub_nr ( b, d->y, d->x ); /* 3+e */
457 gf_mul ( a, e->a, b );
458 gf_add_nr ( b, d->x, d->y ); /* 2+e */
459 gf_mul ( d->y, e->b, b );
460 gf_mul ( d->x, e->c, d->t );
461 gf_add_nr ( c, a, d->y ); /* 2+e */
462 gf_sub_nr ( b, d->y, a ); /* 3+e */
463 gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
464 gf_add_nr ( a, d->x, d->z ); /* 2+e */
465 gf_mul ( d->z, a, d->y );
466 gf_mul ( d->x, d->y, b );
467 gf_mul ( d->y, a, c );
468 if (!before_double) gf_mul ( d->t, b, c );
469 }
470
471 static DECAF_NOINLINE void
sub_niels_from_pt(point_t d,const niels_t e,int before_double)472 sub_niels_from_pt (
473 point_t d,
474 const niels_t e,
475 int before_double
476 ) {
477 gf a, b, c;
478 gf_sub_nr ( b, d->y, d->x ); /* 3+e */
479 gf_mul ( a, e->b, b );
480 gf_add_nr ( b, d->x, d->y ); /* 2+e */
481 gf_mul ( d->y, e->a, b );
482 gf_mul ( d->x, e->c, d->t );
483 gf_add_nr ( c, a, d->y ); /* 2+e */
484 gf_sub_nr ( b, d->y, a ); /* 3+e */
485 gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
486 gf_sub_nr ( a, d->z, d->x ); /* 3+e */
487 gf_mul ( d->z, a, d->y );
488 gf_mul ( d->x, d->y, b );
489 gf_mul ( d->y, a, c );
490 if (!before_double) gf_mul ( d->t, b, c );
491 }
492
493 static void
add_pniels_to_pt(point_t p,const pniels_t pn,int before_double)494 add_pniels_to_pt (
495 point_t p,
496 const pniels_t pn,
497 int before_double
498 ) {
499 gf L0;
500 gf_mul ( L0, p->z, pn->z );
501 gf_copy ( p->z, L0 );
502 add_niels_to_pt( p, pn->n, before_double );
503 }
504
505 static void
sub_pniels_from_pt(point_t p,const pniels_t pn,int before_double)506 sub_pniels_from_pt (
507 point_t p,
508 const pniels_t pn,
509 int before_double
510 ) {
511 gf L0;
512 gf_mul ( L0, p->z, pn->z );
513 gf_copy ( p->z, L0 );
514 sub_niels_from_pt( p, pn->n, before_double );
515 }
516
517 static DECAF_NOINLINE void
prepare_fixed_window(pniels_t * multiples,const point_t b,int ntable)518 prepare_fixed_window(
519 pniels_t *multiples,
520 const point_t b,
521 int ntable
522 ) {
523 point_t tmp;
524 pniels_t pn;
525 int i;
526
527 point_double_internal(tmp, b, 0);
528 pt_to_pniels(pn, tmp);
529 pt_to_pniels(multiples[0], b);
530 API_NS(point_copy)(tmp, b);
531 for (i=1; i<ntable; i++) {
532 add_pniels_to_pt(tmp, pn, 0);
533 pt_to_pniels(multiples[i], tmp);
534 }
535
536 decaf_bzero(pn,sizeof(pn));
537 decaf_bzero(tmp,sizeof(tmp));
538 }
539
API_NS(point_scalarmul)540 void API_NS(point_scalarmul) (
541 point_t a,
542 const point_t b,
543 const scalar_t scalar
544 ) {
545
546 const int WINDOW = DECAF_WINDOW_BITS,
547 WINDOW_MASK = (1<<WINDOW)-1,
548 WINDOW_T_MASK = WINDOW_MASK >> 1,
549 NTABLE = 1<<(WINDOW-1);
550
551 scalar_t scalar1x;
552 API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
553 API_NS(scalar_halve)(scalar1x,scalar1x);
554
555 /* Set up a precomputed table with odd multiples of b. */
556 pniels_t pn, multiples[1<<((int)(DECAF_WINDOW_BITS)-1)]; // == NTABLE (MSVC compatibility issue)
557 point_t tmp;
558 prepare_fixed_window(multiples, b, NTABLE);
559
560 /* Initialize. */
561 int i,j,first=1;
562 i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
563
564 for (; i>=0; i-=WINDOW) {
565 /* Fetch another block of bits */
566 word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
567 if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
568 bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
569 }
570 bits &= WINDOW_MASK;
571 mask_t inv = (bits>>(WINDOW-1))-1;
572 bits ^= inv;
573
574 /* Add in from table. Compute t only on last iteration. */
575 constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
576 cond_neg_niels(pn->n, inv);
577 if (first) {
578 pniels_to_pt(tmp, pn);
579 first = 0;
580 } else {
581 /* Using Hisil et al's lookahead method instead of extensible here
582 * for no particular reason. Double WINDOW times, but only compute t on
583 * the last one.
584 */
585 for (j=0; j<WINDOW-1; j++)
586 point_double_internal(tmp, tmp, -1);
587 point_double_internal(tmp, tmp, 0);
588 add_pniels_to_pt(tmp, pn, i ? -1 : 0);
589 }
590 }
591
592 /* Write out the answer */
593 API_NS(point_copy)(a,tmp);
594
595 decaf_bzero(scalar1x,sizeof(scalar1x));
596 decaf_bzero(pn,sizeof(pn));
597 decaf_bzero(multiples,sizeof(multiples));
598 decaf_bzero(tmp,sizeof(tmp));
599 }
600
API_NS(point_double_scalarmul)601 void API_NS(point_double_scalarmul) (
602 point_t a,
603 const point_t b,
604 const scalar_t scalarb,
605 const point_t c,
606 const scalar_t scalarc
607 ) {
608
609 const int WINDOW = DECAF_WINDOW_BITS,
610 WINDOW_MASK = (1<<WINDOW)-1,
611 WINDOW_T_MASK = WINDOW_MASK >> 1,
612 NTABLE = 1<<(WINDOW-1);
613
614 scalar_t scalar1x, scalar2x;
615 API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
616 API_NS(scalar_halve)(scalar1x,scalar1x);
617 API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
618 API_NS(scalar_halve)(scalar2x,scalar2x);
619
620 /* Set up a precomputed table with odd multiples of b. */
621 pniels_t pn, multiples1[1<<((int)(DECAF_WINDOW_BITS)-1)], multiples2[1<<((int)(DECAF_WINDOW_BITS)-1)];
622 // Array size above equal NTABLE (MSVC compatibility issue)
623 point_t tmp;
624 prepare_fixed_window(multiples1, b, NTABLE);
625 prepare_fixed_window(multiples2, c, NTABLE);
626
627 /* Initialize. */
628 int i,j,first=1;
629 i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
630
631 for (; i>=0; i-=WINDOW) {
632 /* Fetch another block of bits */
633 word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
634 bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
635 if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
636 bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
637 bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
638 }
639 bits1 &= WINDOW_MASK;
640 bits2 &= WINDOW_MASK;
641 mask_t inv1 = (bits1>>(WINDOW-1))-1;
642 mask_t inv2 = (bits2>>(WINDOW-1))-1;
643 bits1 ^= inv1;
644 bits2 ^= inv2;
645
646 /* Add in from table. Compute t only on last iteration. */
647 constant_time_lookup(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
648 cond_neg_niels(pn->n, inv1);
649 if (first) {
650 pniels_to_pt(tmp, pn);
651 first = 0;
652 } else {
653 /* Using Hisil et al's lookahead method instead of extensible here
654 * for no particular reason. Double WINDOW times, but only compute t on
655 * the last one.
656 */
657 for (j=0; j<WINDOW-1; j++)
658 point_double_internal(tmp, tmp, -1);
659 point_double_internal(tmp, tmp, 0);
660 add_pniels_to_pt(tmp, pn, 0);
661 }
662 constant_time_lookup(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
663 cond_neg_niels(pn->n, inv2);
664 add_pniels_to_pt(tmp, pn, i?-1:0);
665 }
666
667 /* Write out the answer */
668 API_NS(point_copy)(a,tmp);
669
670
671 decaf_bzero(scalar1x,sizeof(scalar1x));
672 decaf_bzero(scalar2x,sizeof(scalar2x));
673 decaf_bzero(pn,sizeof(pn));
674 decaf_bzero(multiples1,sizeof(multiples1));
675 decaf_bzero(multiples2,sizeof(multiples2));
676 decaf_bzero(tmp,sizeof(tmp));
677 }
678
API_NS(point_dual_scalarmul)679 void API_NS(point_dual_scalarmul) (
680 point_t a1,
681 point_t a2,
682 const point_t b,
683 const scalar_t scalar1,
684 const scalar_t scalar2
685 ) {
686
687 const int WINDOW = DECAF_WINDOW_BITS,
688 WINDOW_MASK = (1<<WINDOW)-1,
689 WINDOW_T_MASK = WINDOW_MASK >> 1,
690 NTABLE = 1<<(WINDOW-1);
691
692
693 scalar_t scalar1x, scalar2x;
694 API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
695 API_NS(scalar_halve)(scalar1x,scalar1x);
696 API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
697 API_NS(scalar_halve)(scalar2x,scalar2x);
698
699 /* Set up a precomputed table with odd multiples of b. */
700 point_t multiples1[1<<((int)(DECAF_WINDOW_BITS)-1)], multiples2[1<<((int)(DECAF_WINDOW_BITS)-1)], working, tmp;
701 // Array sizes above equal NTABLE (MSVC compatibility issue)
702
703 pniels_t pn;
704
705 API_NS(point_copy)(working, b);
706
707 /* Initialize. */
708 int i,j;
709
710 for (i=0; i<NTABLE; i++) {
711 API_NS(point_copy)(multiples1[i], API_NS(point_identity));
712 API_NS(point_copy)(multiples2[i], API_NS(point_identity));
713 }
714
715 for (i=0; i<SCALAR_BITS; i+=WINDOW) {
716 if (i) {
717 for (j=0; j<WINDOW-1; j++)
718 point_double_internal(working, working, -1);
719 point_double_internal(working, working, 0);
720 }
721
722 /* Fetch another block of bits */
723 word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
724 bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
725 if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
726 bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
727 bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
728 }
729 bits1 &= WINDOW_MASK;
730 bits2 &= WINDOW_MASK;
731 mask_t inv1 = (bits1>>(WINDOW-1))-1;
732 mask_t inv2 = (bits2>>(WINDOW-1))-1;
733 bits1 ^= inv1;
734 bits2 ^= inv2;
735
736 pt_to_pniels(pn, working);
737
738 constant_time_lookup(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
739 cond_neg_niels(pn->n, inv1);
740 /* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
741 add_pniels_to_pt(tmp, pn, 0);
742 constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
743
744
745 constant_time_lookup(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
746 cond_neg_niels(pn->n, inv1^inv2);
747 /* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
748 add_pniels_to_pt(tmp, pn, 0);
749 constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
750 }
751
752 if (NTABLE > 1) {
753 API_NS(point_copy)(working, multiples1[NTABLE-1]);
754 API_NS(point_copy)(tmp , multiples2[NTABLE-1]);
755
756 for (i=NTABLE-1; i>1; i--) {
757 API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
758 API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
759 API_NS(point_add)(working, working, multiples1[i-1]);
760 API_NS(point_add)(tmp, tmp, multiples2[i-1]);
761 }
762
763 API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
764 API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
765 point_double_internal(working, working, 0);
766 point_double_internal(tmp, tmp, 0);
767 API_NS(point_add)(a1, working, multiples1[0]);
768 API_NS(point_add)(a2, tmp, multiples2[0]);
769 } else {
770 API_NS(point_copy)(a1, multiples1[0]);
771 API_NS(point_copy)(a2, multiples2[0]);
772 }
773
774 decaf_bzero(scalar1x,sizeof(scalar1x));
775 decaf_bzero(scalar2x,sizeof(scalar2x));
776 decaf_bzero(pn,sizeof(pn));
777 decaf_bzero(multiples1,sizeof(multiples1));
778 decaf_bzero(multiples2,sizeof(multiples2));
779 decaf_bzero(tmp,sizeof(tmp));
780 decaf_bzero(working,sizeof(working));
781 }
782
API_NS(point_eq)783 decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
784 /* equality mod 2-torsion compares x/y */
785 gf a, b;
786 gf_mul ( a, p->y, q->x );
787 gf_mul ( b, q->y, p->x );
788 mask_t succ = gf_eq(a,b);
789
790 #if (COFACTOR == 8)
791 gf_mul ( a, p->y, q->y );
792 gf_mul ( b, q->x, p->x );
793 #if !(IMAGINE_TWIST)
794 gf_sub ( a, ZERO, a );
795 #else
796 /* Interesting note: the 4tor would normally be rotation.
797 * But because of the *i twist, it's actually
798 * (x,y) <-> (iy,ix)
799 */
800
801 /* No code, just a comment. */
802 #endif
803 succ |= gf_eq(a,b);
804 #endif
805
806 return mask_to_bool(succ);
807 }
808
API_NS(point_valid)809 decaf_bool_t API_NS(point_valid) (
810 const point_t p
811 ) {
812 gf a,b,c;
813 gf_mul(a,p->x,p->y);
814 gf_mul(b,p->z,p->t);
815 mask_t out = gf_eq(a,b);
816 gf_sqr(a,p->x);
817 gf_sqr(b,p->y);
818 gf_sub(a,b,a);
819 gf_sqr(b,p->t);
820 gf_mulw(c,b,TWISTED_D);
821 gf_sqr(b,p->z);
822 gf_add(b,b,c);
823 out &= gf_eq(a,b);
824 out &= ~gf_eq(p->z,ZERO);
825 return mask_to_bool(out);
826 }
827
API_NS(point_debugging_torque)828 void API_NS(point_debugging_torque) (
829 point_t q,
830 const point_t p
831 ) {
832 #if COFACTOR == 8 && IMAGINE_TWIST
833 gf tmp;
834 gf_mul(tmp,p->x,SQRT_MINUS_ONE);
835 gf_mul(q->x,p->y,SQRT_MINUS_ONE);
836 gf_copy(q->y,tmp);
837 gf_copy(q->z,p->z);
838 gf_sub(q->t,ZERO,p->t);
839 #else
840 gf_sub(q->x,ZERO,p->x);
841 gf_sub(q->y,ZERO,p->y);
842 gf_copy(q->z,p->z);
843 gf_copy(q->t,p->t);
844 #endif
845 }
846
API_NS(point_debugging_pscale)847 void API_NS(point_debugging_pscale) (
848 point_t q,
849 const point_t p,
850 const uint8_t factor[SER_BYTES]
851 ) {
852 gf gfac,tmp;
853 /* NB this means you'll never pscale by negative numbers for p521 */
854 ignore_result(gf_deserialize(gfac,factor,0,0));
855 gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
856 gf_mul(tmp,p->x,gfac);
857 gf_copy(q->x,tmp);
858 gf_mul(tmp,p->y,gfac);
859 gf_copy(q->y,tmp);
860 gf_mul(tmp,p->z,gfac);
861 gf_copy(q->z,tmp);
862 gf_mul(tmp,p->t,gfac);
863 gf_copy(q->t,tmp);
864 }
865
gf_batch_invert(gf * __restrict__ out,const gf * in,unsigned int n)866 static void gf_batch_invert (
867 gf *__restrict__ out,
868 const gf *in,
869 unsigned int n
870 ) {
871 gf t1;
872 assert(n>1);
873
874 gf_copy(out[1], in[0]);
875 int i;
876 for (i=1; i<(int) (n-1); i++) {
877 gf_mul(out[i+1], out[i], in[i]);
878 }
879 gf_mul(out[0], out[n-1], in[n-1]);
880
881 gf_invert(out[0], out[0], 1);
882
883 for (i=n-1; i>0; i--) {
884 gf_mul(t1, out[i], out[0]);
885 gf_copy(out[i], t1);
886 gf_mul(t1, out[0], in[i]);
887 gf_copy(out[0], t1);
888 }
889 }
890
batch_normalize_niels(niels_t * table,const gf * zs,gf * __restrict__ zis,int n)891 static void batch_normalize_niels (
892 niels_t *table,
893 const gf *zs,
894 gf *__restrict__ zis,
895 int n
896 ) {
897 int i;
898 gf product;
899 gf_batch_invert(zis, zs, n);
900
901 for (i=0; i<n; i++) {
902 gf_mul(product, table[i]->a, zis[i]);
903 gf_strong_reduce(product);
904 gf_copy(table[i]->a, product);
905
906 gf_mul(product, table[i]->b, zis[i]);
907 gf_strong_reduce(product);
908 gf_copy(table[i]->b, product);
909
910 gf_mul(product, table[i]->c, zis[i]);
911 gf_strong_reduce(product);
912 gf_copy(table[i]->c, product);
913 }
914
915 decaf_bzero(product,sizeof(product));
916 }
917
API_NS(precompute)918 void API_NS(precompute) (
919 precomputed_s *table,
920 const point_t base
921 ) {
922 const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
923 assert(n*t*s >= SCALAR_BITS);
924
925 point_t working, start, doubles[COMBS_T-1];
926 API_NS(point_copy)(working, base);
927 pniels_t pn_tmp;
928
929 gf zs[(unsigned int)(COMBS_N)<<(unsigned int)(COMBS_T-1)], zis[(unsigned int)(COMBS_N)<<(unsigned int)(COMBS_T-1)];
930
931 unsigned int i,j,k;
932
933 /* Compute n tables */
934 for (i=0; i<n; i++) {
935
936 /* Doubling phase */
937 for (j=0; j<t; j++) {
938 if (j) API_NS(point_add)(start, start, working);
939 else API_NS(point_copy)(start, working);
940
941 if (j==t-1 && i==n-1) break;
942
943 point_double_internal(working, working,0);
944 if (j<t-1) API_NS(point_copy)(doubles[j], working);
945
946 for (k=0; k<s-1; k++)
947 point_double_internal(working, working, k<s-2);
948 }
949
950 /* Gray-code phase */
951 for (j=0;; j++) {
952 int gray = j ^ (j>>1);
953 int idx = (((i+1)<<(t-1))-1) ^ gray;
954
955 pt_to_pniels(pn_tmp, start);
956 memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
957 gf_copy(zs[idx], pn_tmp->z);
958
959 if (j >= (1u<<(t-1)) - 1) break;
960 int delta = (j+1) ^ ((j+1)>>1) ^ gray;
961
962 for (k=0; delta>1; k++)
963 delta >>=1;
964
965 if (gray & (1<<k)) {
966 API_NS(point_add)(start, start, doubles[k]);
967 } else {
968 API_NS(point_sub)(start, start, doubles[k]);
969 }
970 }
971 }
972
973 batch_normalize_niels(table->table,(const gf *)zs,zis,n<<(t-1));
974
975 decaf_bzero(zs,sizeof(zs));
976 decaf_bzero(zis,sizeof(zis));
977 decaf_bzero(pn_tmp,sizeof(pn_tmp));
978 decaf_bzero(working,sizeof(working));
979 decaf_bzero(start,sizeof(start));
980 decaf_bzero(doubles,sizeof(doubles));
981 }
982
983 static DECAF_INLINE void
constant_time_lookup_niels(niels_s * __restrict__ ni,const niels_t * table,int nelts,int idx)984 constant_time_lookup_niels (
985 niels_s *__restrict__ ni,
986 const niels_t *table,
987 int nelts,
988 int idx
989 ) {
990 constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
991 }
992
API_NS(precomputed_scalarmul)993 void API_NS(precomputed_scalarmul) (
994 point_t out,
995 const precomputed_s *table,
996 const scalar_t scalar
997 ) {
998 int i;
999 unsigned j,k;
1000 const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
1001
1002 scalar_t scalar1x;
1003 API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
1004 API_NS(scalar_halve)(scalar1x,scalar1x);
1005
1006 niels_t ni;
1007
1008 for (i=s-1; i>=0; i--) {
1009 if (i != (int)s-1) point_double_internal(out,out,0);
1010
1011 for (j=0; j<n; j++) {
1012 int tab = 0;
1013
1014 for (k=0; k<t; k++) {
1015 unsigned int bit = i + s*(k + j*t);
1016 if (bit < SCALAR_BITS) {
1017 tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
1018 }
1019 }
1020
1021 mask_t invert = (tab>>(t-1))-1;
1022 tab ^= invert;
1023 tab &= (1<<(t-1)) - 1;
1024
1025 constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
1026
1027 cond_neg_niels(ni, invert);
1028 if ((i!=(int)s-1)||j) {
1029 add_niels_to_pt(out, ni, j==n-1 && i);
1030 } else {
1031 niels_to_pt(out, ni);
1032 }
1033 }
1034 }
1035
1036 decaf_bzero(ni,sizeof(ni));
1037 decaf_bzero(scalar1x,sizeof(scalar1x));
1038 }
1039
API_NS(point_cond_sel)1040 void API_NS(point_cond_sel) (
1041 point_t out,
1042 const point_t a,
1043 const point_t b,
1044 decaf_bool_t pick_b
1045 ) {
1046 constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
1047 }
1048
1049 /* FUTURE: restore Curve25519 Montgomery ladder? */
API_NS(direct_scalarmul)1050 decaf_error_t API_NS(direct_scalarmul) (
1051 uint8_t scaled[SER_BYTES],
1052 const uint8_t base[SER_BYTES],
1053 const scalar_t scalar,
1054 decaf_bool_t allow_identity,
1055 decaf_bool_t short_circuit
1056 ) {
1057 point_t basep;
1058 decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
1059 if (short_circuit && succ != DECAF_SUCCESS) return succ;
1060 API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
1061 API_NS(point_scalarmul)(basep, basep, scalar);
1062 API_NS(point_encode)(scaled, basep);
1063 API_NS(point_destroy)(basep);
1064 return succ;
1065 }
1066
API_NS(point_mul_by_ratio_and_encode_like_eddsa)1067 void API_NS(point_mul_by_ratio_and_encode_like_eddsa) (
1068 uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
1069 const point_t p
1070 ) {
1071
1072 /* The point is now on the twisted curve. Move it to untwisted. */
1073 gf x, y, z, t;
1074 point_t q;
1075 #if COFACTOR == 8
1076 API_NS(point_double)(q,p);
1077 #else
1078 API_NS(point_copy)(q,p);
1079 #endif
1080
1081 #if EDDSA_USE_SIGMA_ISOGENY
1082 {
1083 /* Use 4-isogeny like ed25519:
1084 * 2*x*y*sqrt(d/a-1)/(ax^2 + y^2 - 2)
1085 * (y^2 - ax^2)/(y^2 + ax^2)
1086 * with a = -1, d = -EDWARDS_D:
1087 * -2xysqrt(EDWARDS_D-1)/(2z^2-y^2+x^2)
1088 * (y^2+x^2)/(y^2-x^2)
1089 */
1090 gf u;
1091 gf_sqr ( x, q->x ); // x^2
1092 gf_sqr ( t, q->y ); // y^2
1093 gf_add( u, x, t ); // x^2 + y^2
1094 gf_add( z, q->y, q->x );
1095 gf_sqr ( y, z);
1096 gf_sub ( y, u, y ); // -2xy
1097 gf_sub ( z, t, x ); // y^2 - x^2
1098 gf_sqr ( x, q->z );
1099 gf_add ( t, x, x);
1100 gf_sub ( t, t, z); // 2z^2 - y^2 + x^2
1101 gf_mul ( x, y, z ); // 2xy(y^2-x^2)
1102 gf_mul ( y, u, t ); // (x^2+y^2)(2z^2-y^2+x^2)
1103 gf_mul ( u, z, t );
1104 gf_copy( z, u );
1105 gf_mul ( u, x, RISTRETTO_FACTOR );
1106 #if IMAGINE_TWIST
1107 gf_mul_i( x, u );
1108 #else
1109 #error "... probably wrong"
1110 gf_copy( x, u );
1111 #endif
1112 decaf_bzero(u,sizeof(u));
1113 }
1114 #elif IMAGINE_TWIST
1115 {
1116 API_NS(point_double)(q,q);
1117 API_NS(point_double)(q,q);
1118 gf_mul_i(x, q->x);
1119 gf_copy(y, q->y);
1120 gf_copy(z, q->z);
1121 }
1122 #else
1123 {
1124 /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
1125 gf u;
1126 gf_sqr ( x, q->x );
1127 gf_sqr ( t, q->y );
1128 gf_add( u, x, t );
1129 gf_add( z, q->y, q->x );
1130 gf_sqr ( y, z);
1131 gf_sub ( y, y, u );
1132 gf_sub ( z, t, x );
1133 gf_sqr ( x, q->z );
1134 gf_add ( t, x, x);
1135 gf_sub ( t, t, z);
1136 gf_mul ( x, t, y );
1137 gf_mul ( y, z, u );
1138 gf_mul ( z, u, t );
1139 decaf_bzero(u,sizeof(u));
1140 }
1141 #endif
1142 /* Affinize */
1143 gf_invert(z,z,1);
1144 gf_mul(t,x,z);
1145 gf_mul(x,y,z);
1146
1147 /* Encode */
1148 enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] = 0;
1149 gf_serialize(enc, x, 1);
1150 enc[DECAF_EDDSA_448_PRIVATE_BYTES-1] |= 0x80 & gf_lobit(t);
1151
1152 decaf_bzero(x,sizeof(x));
1153 decaf_bzero(y,sizeof(y));
1154 decaf_bzero(z,sizeof(z));
1155 decaf_bzero(t,sizeof(t));
1156 API_NS(point_destroy)(q);
1157 }
1158
1159
API_NS(point_decode_like_eddsa_and_mul_by_ratio)1160 decaf_error_t API_NS(point_decode_like_eddsa_and_mul_by_ratio) (
1161 point_t p,
1162 const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
1163 ) {
1164 uint8_t enc2[DECAF_EDDSA_448_PUBLIC_BYTES];
1165 memcpy(enc2,enc,sizeof(enc2));
1166
1167 mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80);
1168 enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80;
1169
1170 mask_t succ = gf_deserialize(p->y, enc2, 1, 0);
1171 #if 0 == 0
1172 succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
1173 #endif
1174
1175 gf_sqr(p->x,p->y);
1176 gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
1177 #if EDDSA_USE_SIGMA_ISOGENY
1178 gf_mulw(p->t,p->z,EDWARDS_D); /* d-dy^2 */
1179 gf_mulw(p->x,p->z,EDWARDS_D-1); /* num = (1-y^2)(d-1) */
1180 gf_copy(p->z,p->x);
1181 #else
1182 gf_mulw(p->t,p->x,EDWARDS_D); /* dy^2 */
1183 #endif
1184 gf_sub(p->t,ONE,p->t); /* denom = 1-dy^2 or 1-d + dy^2 */
1185
1186 gf_mul(p->x,p->z,p->t);
1187 succ &= gf_isr(p->t,p->x); /* 1/sqrt(num * denom) */
1188
1189 gf_mul(p->x,p->t,p->z); /* sqrt(num / denom) */
1190 gf_cond_neg(p->x,gf_lobit(p->x)^low);
1191 gf_copy(p->z,ONE);
1192
1193 #if EDDSA_USE_SIGMA_ISOGENY
1194 {
1195 /* Use 4-isogeny like ed25519:
1196 * 2*x*y/sqrt(1-d/a)/(ax^2 + y^2 - 2)
1197 * (y^2 - ax^2)/(y^2 + ax^2)
1198 * (MAGIC: above formula may be off by a factor of -a
1199 * or something somewhere; check it for other a)
1200 *
1201 * with a = -1, d = -EDWARDS_D:
1202 * -2xy/sqrt(1-EDWARDS_D)/(2z^2-y^2+x^2)
1203 * (y^2+x^2)/(y^2-x^2)
1204 */
1205 gf a, b, c, d;
1206 gf_sqr ( c, p->x );
1207 gf_sqr ( a, p->y );
1208 gf_add ( d, c, a ); // x^2 + y^2
1209 gf_add ( p->t, p->y, p->x );
1210 gf_sqr ( b, p->t );
1211 gf_sub ( b, b, d ); // 2xy
1212 gf_sub ( p->t, a, c ); // y^2 - x^2
1213 gf_sqr ( p->x, p->z );
1214 gf_add ( p->z, p->x, p->x );
1215 gf_sub ( c, p->z, p->t ); // 2z^2 - y^2 + x^2
1216 gf_div_i ( a, c );
1217 gf_mul ( c, a, RISTRETTO_FACTOR );
1218 gf_mul ( p->x, b, p->t); // (2xy)(y^2-x^2)
1219 gf_mul ( p->z, p->t, c ); // (y^2-x^2)sd(2z^2 - y^2 + x^2)
1220 gf_mul ( p->y, d, c ); // (y^2+x^2)sd(2z^2 - y^2 + x^2)
1221 gf_mul ( p->t, d, b );
1222 decaf_bzero(a,sizeof(a));
1223 decaf_bzero(b,sizeof(b));
1224 decaf_bzero(c,sizeof(c));
1225 decaf_bzero(d,sizeof(d));
1226 }
1227 #elif IMAGINE_TWIST
1228 {
1229 gf_mul(p->t,p->x,SQRT_MINUS_ONE);
1230 gf_copy(p->x,p->t);
1231 gf_mul(p->t,p->x,p->y);
1232 }
1233 #else
1234 {
1235 /* 4-isogeny 2xy/(y^2-ax^2), (y^2+ax^2)/(2-y^2-ax^2) */
1236 gf a, b, c, d;
1237 gf_sqr ( c, p->x );
1238 gf_sqr ( a, p->y );
1239 gf_add ( d, c, a );
1240 gf_add ( p->t, p->y, p->x );
1241 gf_sqr ( b, p->t );
1242 gf_sub ( b, b, d );
1243 gf_sub ( p->t, a, c );
1244 gf_sqr ( p->x, p->z );
1245 gf_add ( p->z, p->x, p->x );
1246 gf_sub ( a, p->z, d );
1247 gf_mul ( p->x, a, b );
1248 gf_mul ( p->z, p->t, a );
1249 gf_mul ( p->y, p->t, d );
1250 gf_mul ( p->t, b, d );
1251 decaf_bzero(a,sizeof(a));
1252 decaf_bzero(b,sizeof(b));
1253 decaf_bzero(c,sizeof(c));
1254 decaf_bzero(d,sizeof(d));
1255 }
1256 #endif
1257
1258 decaf_bzero(enc2,sizeof(enc2));
1259 assert(API_NS(point_valid)(p) || ~succ);
1260
1261 return decaf_succeed_if(mask_to_bool(succ));
1262 }
1263
decaf_x448(uint8_t out[X_PUBLIC_BYTES],const uint8_t base[X_PUBLIC_BYTES],const uint8_t scalar[X_PRIVATE_BYTES])1264 decaf_error_t decaf_x448 (
1265 uint8_t out[X_PUBLIC_BYTES],
1266 const uint8_t base[X_PUBLIC_BYTES],
1267 const uint8_t scalar[X_PRIVATE_BYTES]
1268 ) {
1269 gf x1, x2, z2, x3, z3, t1, t2;
1270 ignore_result(gf_deserialize(x1,base,1,0));
1271 gf_copy(x2,ONE);
1272 gf_copy(z2,ZERO);
1273 gf_copy(x3,x1);
1274 gf_copy(z3,ONE);
1275
1276 int t;
1277 mask_t swap = 0;
1278
1279 for (t = X_PRIVATE_BITS-1; t>=0; t--) {
1280 uint8_t sb = scalar[t/8];
1281
1282 /* Scalar conditioning */
1283 if (t/8==0) sb &= -(uint8_t)COFACTOR;
1284 else if (t == X_PRIVATE_BITS-1) sb = -1;
1285
1286 mask_t k_t = (sb>>(t%8)) & 1;
1287 k_t = -k_t; /* set to all 0s or all 1s */
1288
1289 swap ^= k_t;
1290 gf_cond_swap(x2,x3,swap);
1291 gf_cond_swap(z2,z3,swap);
1292 swap = k_t;
1293
1294 gf_add_nr(t1,x2,z2); /* A = x2 + z2 */ /* 2+e */
1295 gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */ /* 3+e */
1296 gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */ /* 3+e */
1297 gf_mul(x2,t1,z2); /* DA */
1298 gf_add_nr(z2,z3,x3); /* C = x3 + z3 */ /* 2+e */
1299 gf_mul(x3,t2,z2); /* CB */
1300 gf_sub_nr(z3,x2,x3); /* DA-CB */ /* 3+e */
1301 gf_sqr(z2,z3); /* (DA-CB)^2 */
1302 gf_mul(z3,x1,z2); /* z3 = x1(DA-CB)^2 */
1303 gf_add_nr(z2,x2,x3); /* (DA+CB) */ /* 2+e */
1304 gf_sqr(x3,z2); /* x3 = (DA+CB)^2 */
1305
1306 gf_sqr(z2,t1); /* AA = A^2 */
1307 gf_sqr(t1,t2); /* BB = B^2 */
1308 gf_mul(x2,z2,t1); /* x2 = AA*BB */
1309 gf_sub_nr(t2,z2,t1); /* E = AA-BB */ /* 3+e */
1310
1311 gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
1312 gf_add_nr(t1,t1,z2); /* AA + a24*E */ /* 2+e */
1313 gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
1314 }
1315
1316 /* Finish */
1317 gf_cond_swap(x2,x3,swap);
1318 gf_cond_swap(z2,z3,swap);
1319 gf_invert(z2,z2,0);
1320 gf_mul(x1,x2,z2);
1321 gf_serialize(out,x1,1);
1322 mask_t nz = ~gf_eq(x1,ZERO);
1323
1324 decaf_bzero(x1,sizeof(x1));
1325 decaf_bzero(x2,sizeof(x2));
1326 decaf_bzero(z2,sizeof(z2));
1327 decaf_bzero(x3,sizeof(x3));
1328 decaf_bzero(z3,sizeof(z3));
1329 decaf_bzero(t1,sizeof(t1));
1330 decaf_bzero(t2,sizeof(t2));
1331
1332 return decaf_succeed_if(mask_to_bool(nz));
1333 }
1334
1335 /* Thanks Johan Pascal */
decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES])1336 void decaf_ed448_convert_public_key_to_x448 (
1337 uint8_t x[DECAF_X448_PUBLIC_BYTES],
1338 const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
1339 ) {
1340 gf y;
1341 const uint8_t mask = (uint8_t)(0xFE<<(7));
1342 ignore_result(gf_deserialize(y, ed, 1, mask));
1343
1344 {
1345 gf n,d;
1346
1347 #if EDDSA_USE_SIGMA_ISOGENY
1348 /* u = (1+y)/(1-y)*/
1349 gf_add(n, y, ONE); /* n = y+1 */
1350 gf_sub(d, ONE, y); /* d = 1-y */
1351 gf_invert(d, d, 0); /* d = 1/(1-y) */
1352 gf_mul(y, n, d); /* u = (y+1)/(1-y) */
1353 gf_serialize(x,y,1);
1354 #else /* EDDSA_USE_SIGMA_ISOGENY */
1355 /* u = y^2 * (1-dy^2) / (1-y^2) */
1356 gf_sqr(n,y); /* y^2*/
1357 gf_sub(d,ONE,n); /* 1-y^2*/
1358 gf_invert(d,d,0); /* 1/(1-y^2)*/
1359 gf_mul(y,n,d); /* y^2 / (1-y^2) */
1360 gf_mulw(d,n,EDWARDS_D); /* dy^2*/
1361 gf_sub(d, ONE, d); /* 1-dy^2*/
1362 gf_mul(n, y, d); /* y^2 * (1-dy^2) / (1-y^2) */
1363 gf_serialize(x,n,1);
1364 #endif /* EDDSA_USE_SIGMA_ISOGENY */
1365
1366 decaf_bzero(y,sizeof(y));
1367 decaf_bzero(n,sizeof(n));
1368 decaf_bzero(d,sizeof(d));
1369 }
1370 }
1371
decaf_x448_generate_key(uint8_t out[X_PUBLIC_BYTES],const uint8_t scalar[X_PRIVATE_BYTES])1372 void decaf_x448_generate_key (
1373 uint8_t out[X_PUBLIC_BYTES],
1374 const uint8_t scalar[X_PRIVATE_BYTES]
1375 ) {
1376 decaf_x448_derive_public_key(out,scalar);
1377 }
1378
API_NS(point_mul_by_ratio_and_encode_like_x448)1379 void API_NS(point_mul_by_ratio_and_encode_like_x448) (
1380 uint8_t out[X_PUBLIC_BYTES],
1381 const point_t p
1382 ) {
1383 point_t q;
1384 #if COFACTOR == 8
1385 point_double_internal(q,p,1);
1386 #else
1387 API_NS(point_copy)(q,p);
1388 #endif
1389 gf_invert(q->t,q->x,0); /* 1/x */
1390 gf_mul(q->z,q->t,q->y); /* y/x */
1391 gf_sqr(q->y,q->z); /* (y/x)^2 */
1392 #if IMAGINE_TWIST
1393 gf_sub(q->y,ZERO,q->y);
1394 #endif
1395 gf_serialize(out,q->y,1);
1396 API_NS(point_destroy(q));
1397 }
1398
decaf_x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],const uint8_t scalar[X_PRIVATE_BYTES])1399 void decaf_x448_derive_public_key (
1400 uint8_t out[X_PUBLIC_BYTES],
1401 const uint8_t scalar[X_PRIVATE_BYTES]
1402 ) {
1403 /* Scalar conditioning */
1404 uint8_t scalar2[X_PRIVATE_BYTES];
1405 memcpy(scalar2,scalar,sizeof(scalar2));
1406 scalar2[0] &= -(uint8_t)COFACTOR;
1407
1408 scalar2[X_PRIVATE_BYTES-1] &= ~(-1u<<((X_PRIVATE_BITS+7)%8));
1409 scalar2[X_PRIVATE_BYTES-1] |= 1<<((X_PRIVATE_BITS+7)%8);
1410
1411 scalar_t the_scalar;
1412 API_NS(scalar_decode_long)(the_scalar,scalar2,sizeof(scalar2));
1413
1414 /* Compensate for the encoding ratio */
1415 for (unsigned i=1; i<DECAF_X448_ENCODE_RATIO; i<<=1) {
1416 API_NS(scalar_halve)(the_scalar,the_scalar);
1417 }
1418 point_t p;
1419 API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
1420 API_NS(point_mul_by_ratio_and_encode_like_x448)(out,p);
1421 API_NS(point_destroy)(p);
1422 }
1423
1424 /**
1425 * @cond internal
1426 * Control for variable-time scalar multiply algorithms.
1427 */
1428 struct smvt_control {
1429 int power, addend;
1430 };
1431
recode_wnaf(struct smvt_control * control,const scalar_t scalar,unsigned int table_bits)1432 static int recode_wnaf (
1433 struct smvt_control *control, /* [nbits/(table_bits+1) + 3] */
1434 const scalar_t scalar,
1435 unsigned int table_bits
1436 ) {
1437 unsigned int table_size = SCALAR_BITS/(table_bits+1) + 3;
1438 int position = table_size - 1; /* at the end */
1439
1440 /* place the end marker */
1441 control[position].power = -1;
1442 control[position].addend = 0;
1443 position--;
1444
1445 /* PERF: Could negate scalar if it's large. But then would need more cases
1446 * in the actual code that uses it, all for an expected reduction of like 1/5 op.
1447 * Probably not worth it.
1448 */
1449
1450 uint64_t current = scalar->limb[0] & 0xFFFF;
1451 uint32_t mask = (1<<(table_bits+1))-1;
1452
1453 unsigned int w;
1454 const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
1455 for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
1456 if (w < (SCALAR_BITS-1)/16+1) {
1457 /* Refill the 16 high bits of current */
1458 current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
1459 }
1460
1461 while (current & 0xFFFF) {
1462 assert(position >= 0);
1463 uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
1464 int32_t delta = odd & mask;
1465 if (odd & 1<<(table_bits+1)) delta -= (1<<(table_bits+1));
1466 current -= delta << pos;
1467 control[position].power = pos + 16*(w-1);
1468 control[position].addend = delta;
1469 position--;
1470 }
1471 current >>= 16;
1472 }
1473 assert(current==0);
1474
1475 position++;
1476 unsigned int n = table_size - position;
1477 unsigned int i;
1478 for (i=0; i<n; i++) {
1479 control[i] = control[i+position];
1480 }
1481 return n-1;
1482 }
1483
1484 static void
prepare_wnaf_table(pniels_t * output,const point_t working,unsigned int tbits)1485 prepare_wnaf_table(
1486 pniels_t *output,
1487 const point_t working,
1488 unsigned int tbits
1489 ) {
1490 point_t tmp;
1491 int i;
1492 pt_to_pniels(output[0], working);
1493
1494 if (tbits == 0) return;
1495
1496 API_NS(point_double)(tmp,working);
1497 pniels_t twop;
1498 pt_to_pniels(twop, tmp);
1499
1500 add_pniels_to_pt(tmp, output[0],0);
1501 pt_to_pniels(output[1], tmp);
1502
1503 for (i=2; i < 1<<tbits; i++) {
1504 add_pniels_to_pt(tmp, twop,0);
1505 pt_to_pniels(output[i], tmp);
1506 }
1507
1508 API_NS(point_destroy)(tmp);
1509 decaf_bzero(twop,sizeof(twop));
1510 }
1511
1512 extern const gf API_NS(precomputed_wnaf_as_fe)[];
1513 static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
1514 const size_t API_NS(sizeof_precomputed_wnafs) __attribute((visibility("hidden")))
1515 = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
1516
1517 void API_NS(precompute_wnafs) (
1518 niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
1519 const point_t base
1520 ) __attribute__ ((visibility ("hidden")));
1521
API_NS(precompute_wnafs)1522 void API_NS(precompute_wnafs) (
1523 niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
1524 const point_t base
1525 ) {
1526 pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
1527 gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
1528 int i;
1529 prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
1530 for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
1531 memcpy(out[i], tmp[i]->n, sizeof(niels_t));
1532 gf_copy(zs[i], tmp[i]->z);
1533 }
1534 batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
1535
1536 decaf_bzero(tmp,sizeof(tmp));
1537 decaf_bzero(zs,sizeof(zs));
1538 decaf_bzero(zis,sizeof(zis));
1539 }
1540
API_NS(base_double_scalarmul_non_secret)1541 void API_NS(base_double_scalarmul_non_secret) (
1542 point_t combo,
1543 const scalar_t scalar1,
1544 const point_t base2,
1545 const scalar_t scalar2
1546 ) {
1547 const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
1548 table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
1549 struct smvt_control control_var[SCALAR_BITS/((int)(DECAF_WNAF_VAR_TABLE_BITS)+1)+3];
1550 struct smvt_control control_pre[SCALAR_BITS/((int)(DECAF_WNAF_FIXED_TABLE_BITS)+1)+3];
1551
1552 int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
1553 int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
1554
1555 pniels_t precmp_var[1<<(int)(DECAF_WNAF_VAR_TABLE_BITS)];
1556 prepare_wnaf_table(precmp_var, base2, table_bits_var);
1557
1558 int contp=0, contv=0, i = control_var[0].power;
1559
1560 if (i < 0) {
1561 API_NS(point_copy)(combo, API_NS(point_identity));
1562 return;
1563 } else if (i > control_pre[0].power) {
1564 pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
1565 contv++;
1566 } else if (i == control_pre[0].power && i >=0 ) {
1567 pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
1568 add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
1569 contv++; contp++;
1570 } else {
1571 i = control_pre[0].power;
1572 niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
1573 contp++;
1574 }
1575
1576 for (i--; i >= 0; i--) {
1577 int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
1578 point_double_internal(combo,combo,i && !(cv||cp));
1579
1580 if (cv) {
1581 assert(control_var[contv].addend);
1582
1583 if (control_var[contv].addend > 0) {
1584 add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
1585 } else {
1586 sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
1587 }
1588 contv++;
1589 }
1590
1591 if (cp) {
1592 assert(control_pre[contp].addend);
1593
1594 if (control_pre[contp].addend > 0) {
1595 add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
1596 } else {
1597 sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
1598 }
1599 contp++;
1600 }
1601 }
1602
1603 /* This function is non-secret, but whatever this is cheap. */
1604 decaf_bzero(control_var,sizeof(control_var));
1605 decaf_bzero(control_pre,sizeof(control_pre));
1606 decaf_bzero(precmp_var,sizeof(precmp_var));
1607
1608 assert(contv == ncb_var); (void)ncb_var;
1609 assert(contp == ncb_pre); (void)ncb_pre;
1610 }
1611
API_NS(point_destroy)1612 void API_NS(point_destroy) (
1613 point_t point
1614 ) {
1615 decaf_bzero(point, sizeof(point_t));
1616 }
1617
API_NS(precomputed_destroy)1618 void API_NS(precomputed_destroy) (
1619 precomputed_s *pre
1620 ) {
1621 decaf_bzero(pre, API_NS(sizeof_precomputed_s));
1622 }
1623