xref: /freebsd/contrib/bearssl/src/ec/ec_prime_i31.c (revision 61e21613)
1 /*
2  * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /*
28  * Parameters for supported curves (field modulus, and 'b' equation
29  * parameter; both values use the 'i31' format, and 'b' is in Montgomery
30  * representation).
31  */
32 
33 static const uint32_t P256_P[] = {
34 	0x00000108,
35 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
36 	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
37 	0x000000FF
38 };
39 
40 static const uint32_t P256_R2[] = {
41 	0x00000108,
42 	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
43 	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
44 	0x00000000
45 };
46 
47 static const uint32_t P256_B[] = {
48 	0x00000108,
49 	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
50 	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
51 	0x0000000E
52 };
53 
54 static const uint32_t P384_P[] = {
55 	0x0000018C,
56 	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
57 	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
58 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
59 	0x00000FFF
60 };
61 
62 static const uint32_t P384_R2[] = {
63 	0x0000018C,
64 	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
65 	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
66 	0x00008000, 0x00008000, 0x00000000, 0x00000000,
67 	0x00000000
68 };
69 
70 static const uint32_t P384_B[] = {
71 	0x0000018C,
72 	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
73 	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
74 	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
75 	0x000008A5
76 };
77 
78 static const uint32_t P521_P[] = {
79 	0x00000219,
80 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
81 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
82 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
83 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
84 	0x01FFFFFF
85 };
86 
87 static const uint32_t P521_R2[] = {
88 	0x00000219,
89 	0x00001000, 0x00000000, 0x00000000, 0x00000000,
90 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
91 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
92 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
93 	0x00000000
94 };
95 
96 static const uint32_t P521_B[] = {
97 	0x00000219,
98 	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
99 	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
100 	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
101 	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
102 	0x00654FAE
103 };
104 
105 typedef struct {
106 	const uint32_t *p;
107 	const uint32_t *b;
108 	const uint32_t *R2;
109 	uint32_t p0i;
110 	size_t point_len;
111 } curve_params;
112 
113 static inline const curve_params *
114 id_to_curve(int curve)
115 {
116 	static const curve_params pp[] = {
117 		{ P256_P, P256_B, P256_R2, 0x00000001,  65 },
118 		{ P384_P, P384_B, P384_R2, 0x00000001,  97 },
119 		{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
120 	};
121 
122 	return &pp[curve - BR_EC_secp256r1];
123 }
124 
125 #define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
126 
127 /*
128  * Type for a point in Jacobian coordinates:
129  * -- three values, x, y and z, in Montgomery representation
130  * -- affine coordinates are X = x / z^2 and Y = y / z^3
131  * -- for the point at infinity, z = 0
132  */
133 typedef struct {
134 	uint32_t c[3][I31_LEN];
135 } jacobian;
136 
137 /*
138  * We use a custom interpreter that uses a dozen registers, and
139  * only six operations:
140  *    MSET(d, a)       copy a into d
141  *    MADD(d, a)       d = d+a (modular)
142  *    MSUB(d, a)       d = d-a (modular)
143  *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
144  *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
145  *    MTZ(d)           clear return value if d = 0
146  * Destination of MMUL (d) must be distinct from operands (a and b).
147  * There is no such constraint for MSUB and MADD.
148  *
149  * Registers include the operand coordinates, and temporaries.
150  */
151 #define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
152 #define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
153 #define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
154 #define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
155 #define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
156 #define MTZ(d)          (0x5000 + ((d) << 8))
157 #define ENDCODE         0
158 
159 /*
160  * Registers for the input operands.
161  */
162 #define P1x    0
163 #define P1y    1
164 #define P1z    2
165 #define P2x    3
166 #define P2y    4
167 #define P2z    5
168 
169 /*
170  * Alternate names for the first input operand.
171  */
172 #define Px     0
173 #define Py     1
174 #define Pz     2
175 
176 /*
177  * Temporaries.
178  */
179 #define t1     6
180 #define t2     7
181 #define t3     8
182 #define t4     9
183 #define t5    10
184 #define t6    11
185 #define t7    12
186 
187 /*
188  * Extra scratch registers available when there is no second operand (e.g.
189  * for "double" and "affine").
190  */
191 #define t8     3
192 #define t9     4
193 #define t10    5
194 
195 /*
196  * Doubling formulas are:
197  *
198  *   s = 4*x*y^2
199  *   m = 3*(x + z^2)*(x - z^2)
200  *   x' = m^2 - 2*s
201  *   y' = m*(s - x') - 8*y^4
202  *   z' = 2*y*z
203  *
204  * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
205  * should. This case should not happen anyway, because our curves have
206  * prime order, and thus do not contain any point of order 2.
207  *
208  * If P is infinity (z = 0), then again the formulas yield infinity,
209  * which is correct. Thus, this code works for all points.
210  *
211  * Cost: 8 multiplications
212  */
213 static const uint16_t code_double[] = {
214 	/*
215 	 * Compute z^2 (in t1).
216 	 */
217 	MMUL(t1, Pz, Pz),
218 
219 	/*
220 	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
221 	 */
222 	MSET(t2, Px),
223 	MSUB(t2, t1),
224 	MADD(t1, Px),
225 
226 	/*
227 	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
228 	 */
229 	MMUL(t3, t1, t2),
230 	MSET(t1, t3),
231 	MADD(t1, t3),
232 	MADD(t1, t3),
233 
234 	/*
235 	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
236 	 */
237 	MMUL(t3, Py, Py),
238 	MADD(t3, t3),
239 	MMUL(t2, Px, t3),
240 	MADD(t2, t2),
241 
242 	/*
243 	 * Compute x' = m^2 - 2*s.
244 	 */
245 	MMUL(Px, t1, t1),
246 	MSUB(Px, t2),
247 	MSUB(Px, t2),
248 
249 	/*
250 	 * Compute z' = 2*y*z.
251 	 */
252 	MMUL(t4, Py, Pz),
253 	MSET(Pz, t4),
254 	MADD(Pz, t4),
255 
256 	/*
257 	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
258 	 * 2*y^2 in t3.
259 	 */
260 	MSUB(t2, Px),
261 	MMUL(Py, t1, t2),
262 	MMUL(t4, t3, t3),
263 	MSUB(Py, t4),
264 	MSUB(Py, t4),
265 
266 	ENDCODE
267 };
268 
269 /*
270  * Addtions formulas are:
271  *
272  *   u1 = x1 * z2^2
273  *   u2 = x2 * z1^2
274  *   s1 = y1 * z2^3
275  *   s2 = y2 * z1^3
276  *   h = u2 - u1
277  *   r = s2 - s1
278  *   x3 = r^2 - h^3 - 2 * u1 * h^2
279  *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
280  *   z3 = h * z1 * z2
281  *
282  * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
283  * z3 == 0, so the result is correct.
284  * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
285  * not correct.
286  * h == 0 only if u1 == u2; this happens in two cases:
287  * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
288  * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
289  *
290  * Thus, the following situations are not handled correctly:
291  * -- P1 = 0 and P2 != 0
292  * -- P1 != 0 and P2 = 0
293  * -- P1 = P2
294  * All other cases are properly computed. However, even in "incorrect"
295  * situations, the three coordinates still are properly formed field
296  * elements.
297  *
298  * The returned flag is cleared if r == 0. This happens in the following
299  * cases:
300  * -- Both points are on the same horizontal line (same Y coordinate).
301  * -- Both points are infinity.
302  * -- One point is infinity and the other is on line Y = 0.
303  * The third case cannot happen with our curves (there is no valid point
304  * on line Y = 0 since that would be a point of order 2). If the two
305  * source points are non-infinity, then remains only the case where the
306  * two points are on the same horizontal line.
307  *
308  * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
309  * P2 != 0:
310  * -- If the returned value is not the point at infinity, then it was properly
311  * computed.
312  * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
313  * is indeed the point at infinity.
314  * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
315  * use the 'double' code.
316  *
317  * Cost: 16 multiplications
318  */
319 static const uint16_t code_add[] = {
320 	/*
321 	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
322 	 */
323 	MMUL(t3, P2z, P2z),
324 	MMUL(t1, P1x, t3),
325 	MMUL(t4, P2z, t3),
326 	MMUL(t3, P1y, t4),
327 
328 	/*
329 	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
330 	 */
331 	MMUL(t4, P1z, P1z),
332 	MMUL(t2, P2x, t4),
333 	MMUL(t5, P1z, t4),
334 	MMUL(t4, P2y, t5),
335 
336 	/*
337 	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
338 	 */
339 	MSUB(t2, t1),
340 	MSUB(t4, t3),
341 
342 	/*
343 	 * Report cases where r = 0 through the returned flag.
344 	 */
345 	MTZ(t4),
346 
347 	/*
348 	 * Compute u1*h^2 (in t6) and h^3 (in t5).
349 	 */
350 	MMUL(t7, t2, t2),
351 	MMUL(t6, t1, t7),
352 	MMUL(t5, t7, t2),
353 
354 	/*
355 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
356 	 * t1 and t7 can be used as scratch registers.
357 	 */
358 	MMUL(P1x, t4, t4),
359 	MSUB(P1x, t5),
360 	MSUB(P1x, t6),
361 	MSUB(P1x, t6),
362 
363 	/*
364 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
365 	 */
366 	MSUB(t6, P1x),
367 	MMUL(P1y, t4, t6),
368 	MMUL(t1, t5, t3),
369 	MSUB(P1y, t1),
370 
371 	/*
372 	 * Compute z3 = h*z1*z2.
373 	 */
374 	MMUL(t1, P1z, P2z),
375 	MMUL(P1z, t1, t2),
376 
377 	ENDCODE
378 };
379 
380 /*
381  * Check that the point is on the curve. This code snippet assumes the
382  * following conventions:
383  * -- Coordinates x and y have been freshly decoded in P1 (but not
384  * converted to Montgomery coordinates yet).
385  * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
386  */
387 static const uint16_t code_check[] = {
388 
389 	/* Convert x and y to Montgomery representation. */
390 	MMUL(t1, P1x, P2x),
391 	MMUL(t2, P1y, P2x),
392 	MSET(P1x, t1),
393 	MSET(P1y, t2),
394 
395 	/* Compute x^3 in t1. */
396 	MMUL(t2, P1x, P1x),
397 	MMUL(t1, P1x, t2),
398 
399 	/* Subtract 3*x from t1. */
400 	MSUB(t1, P1x),
401 	MSUB(t1, P1x),
402 	MSUB(t1, P1x),
403 
404 	/* Add b. */
405 	MADD(t1, P2y),
406 
407 	/* Compute y^2 in t2. */
408 	MMUL(t2, P1y, P1y),
409 
410 	/* Compare y^2 with x^3 - 3*x + b; they must match. */
411 	MSUB(t1, t2),
412 	MTZ(t1),
413 
414 	/* Set z to 1 (in Montgomery representation). */
415 	MMUL(P1z, P2x, P2z),
416 
417 	ENDCODE
418 };
419 
420 /*
421  * Conversion back to affine coordinates. This code snippet assumes that
422  * the z coordinate of P2 is set to 1 (not in Montgomery representation).
423  */
424 static const uint16_t code_affine[] = {
425 
426 	/* Save z*R in t1. */
427 	MSET(t1, P1z),
428 
429 	/* Compute z^3 in t2. */
430 	MMUL(t2, P1z, P1z),
431 	MMUL(t3, P1z, t2),
432 	MMUL(t2, t3, P2z),
433 
434 	/* Invert to (1/z^3) in t2. */
435 	MINV(t2, t3, t4),
436 
437 	/* Compute y. */
438 	MSET(t3, P1y),
439 	MMUL(P1y, t2, t3),
440 
441 	/* Compute (1/z^2) in t3. */
442 	MMUL(t3, t2, t1),
443 
444 	/* Compute x. */
445 	MSET(t2, P1x),
446 	MMUL(P1x, t2, t3),
447 
448 	ENDCODE
449 };
450 
451 static uint32_t
452 run_code(jacobian *P1, const jacobian *P2,
453 	const curve_params *cc, const uint16_t *code)
454 {
455 	uint32_t r;
456 	uint32_t t[13][I31_LEN];
457 	size_t u;
458 
459 	r = 1;
460 
461 	/*
462 	 * Copy the two operands in the dedicated registers.
463 	 */
464 	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
465 	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
466 
467 	/*
468 	 * Run formulas.
469 	 */
470 	for (u = 0;; u ++) {
471 		unsigned op, d, a, b;
472 
473 		op = code[u];
474 		if (op == 0) {
475 			break;
476 		}
477 		d = (op >> 8) & 0x0F;
478 		a = (op >> 4) & 0x0F;
479 		b = op & 0x0F;
480 		op >>= 12;
481 		switch (op) {
482 			uint32_t ctl;
483 			size_t plen;
484 			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
485 
486 		case 0:
487 			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
488 			break;
489 		case 1:
490 			ctl = br_i31_add(t[d], t[a], 1);
491 			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
492 			br_i31_sub(t[d], cc->p, ctl);
493 			break;
494 		case 2:
495 			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
496 			break;
497 		case 3:
498 			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
499 			break;
500 		case 4:
501 			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
502 			br_i31_encode(tp, plen, cc->p);
503 			tp[plen - 1] -= 2;
504 			br_i31_modpow(t[d], tp, plen,
505 				cc->p, cc->p0i, t[a], t[b]);
506 			break;
507 		default:
508 			r &= ~br_i31_iszero(t[d]);
509 			break;
510 		}
511 	}
512 
513 	/*
514 	 * Copy back result.
515 	 */
516 	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
517 	return r;
518 }
519 
520 static void
521 set_one(uint32_t *x, const uint32_t *p)
522 {
523 	size_t plen;
524 
525 	plen = (p[0] + 63) >> 5;
526 	memset(x, 0, plen * sizeof *x);
527 	x[0] = p[0];
528 	x[1] = 0x00000001;
529 }
530 
531 static void
532 point_zero(jacobian *P, const curve_params *cc)
533 {
534 	memset(P, 0, sizeof *P);
535 	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
536 }
537 
538 static inline void
539 point_double(jacobian *P, const curve_params *cc)
540 {
541 	run_code(P, P, cc, code_double);
542 }
543 
544 static inline uint32_t
545 point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
546 {
547 	return run_code(P1, P2, cc, code_add);
548 }
549 
550 static void
551 point_mul(jacobian *P, const unsigned char *x, size_t xlen,
552 	const curve_params *cc)
553 {
554 	/*
555 	 * We do a simple double-and-add ladder with a 2-bit window
556 	 * to make only one add every two doublings. We thus first
557 	 * precompute 2P and 3P in some local buffers.
558 	 *
559 	 * We always perform two doublings and one addition; the
560 	 * addition is with P, 2P and 3P and is done in a temporary
561 	 * array.
562 	 *
563 	 * The addition code cannot handle cases where one of the
564 	 * operands is infinity, which is the case at the start of the
565 	 * ladder. We therefore need to maintain a flag that controls
566 	 * this situation.
567 	 */
568 	uint32_t qz;
569 	jacobian P2, P3, Q, T, U;
570 
571 	memcpy(&P2, P, sizeof P2);
572 	point_double(&P2, cc);
573 	memcpy(&P3, P, sizeof P3);
574 	point_add(&P3, &P2, cc);
575 
576 	point_zero(&Q, cc);
577 	qz = 1;
578 	while (xlen -- > 0) {
579 		int k;
580 
581 		for (k = 6; k >= 0; k -= 2) {
582 			uint32_t bits;
583 			uint32_t bnz;
584 
585 			point_double(&Q, cc);
586 			point_double(&Q, cc);
587 			memcpy(&T, P, sizeof T);
588 			memcpy(&U, &Q, sizeof U);
589 			bits = (*x >> k) & (uint32_t)3;
590 			bnz = NEQ(bits, 0);
591 			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
592 			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
593 			point_add(&U, &T, cc);
594 			CCOPY(bnz & qz, &Q, &T, sizeof Q);
595 			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
596 			qz &= ~bnz;
597 		}
598 		x ++;
599 	}
600 	memcpy(P, &Q, sizeof Q);
601 }
602 
603 /*
604  * Decode point into Jacobian coordinates. This function does not support
605  * the point at infinity. If the point is invalid then this returns 0, but
606  * the coordinates are still set to properly formed field elements.
607  */
608 static uint32_t
609 point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
610 {
611 	/*
612 	 * Points must use uncompressed format:
613 	 * -- first byte is 0x04;
614 	 * -- coordinates X and Y use unsigned big-endian, with the same
615 	 *    length as the field modulus.
616 	 *
617 	 * We don't support hybrid format (uncompressed, but first byte
618 	 * has value 0x06 or 0x07, depending on the least significant bit
619 	 * of Y) because it is rather useless, and explicitly forbidden
620 	 * by PKIX (RFC 5480, section 2.2).
621 	 *
622 	 * We don't support compressed format either, because it is not
623 	 * much used in practice (there are or were patent-related
624 	 * concerns about point compression, which explains the lack of
625 	 * generalised support). Also, point compression support would
626 	 * need a bit more code.
627 	 */
628 	const unsigned char *buf;
629 	size_t plen, zlen;
630 	uint32_t r;
631 	jacobian Q;
632 
633 	buf = src;
634 	point_zero(P, cc);
635 	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
636 	if (len != 1 + (plen << 1)) {
637 		return 0;
638 	}
639 	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
640 	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
641 
642 	/*
643 	 * Check first byte.
644 	 */
645 	r &= EQ(buf[0], 0x04);
646 	/* obsolete
647 	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
648 		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
649 	*/
650 
651 	/*
652 	 * Convert coordinates and check that the point is valid.
653 	 */
654 	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
655 	memcpy(Q.c[0], cc->R2, zlen);
656 	memcpy(Q.c[1], cc->b, zlen);
657 	set_one(Q.c[2], cc->p);
658 	r &= ~run_code(P, &Q, cc, code_check);
659 	return r;
660 }
661 
662 /*
663  * Encode a point. This method assumes that the point is correct and is
664  * not the point at infinity. Encoded size is always 1+2*plen, where
665  * plen is the field modulus length, in bytes.
666  */
667 static void
668 point_encode(void *dst, const jacobian *P, const curve_params *cc)
669 {
670 	unsigned char *buf;
671 	uint32_t xbl;
672 	size_t plen;
673 	jacobian Q, T;
674 
675 	buf = dst;
676 	xbl = cc->p[0];
677 	xbl -= (xbl >> 5);
678 	plen = (xbl + 7) >> 3;
679 	buf[0] = 0x04;
680 	memcpy(&Q, P, sizeof *P);
681 	set_one(T.c[2], cc->p);
682 	run_code(&Q, &T, cc, code_affine);
683 	br_i31_encode(buf + 1, plen, Q.c[0]);
684 	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
685 }
686 
687 static const br_ec_curve_def *
688 id_to_curve_def(int curve)
689 {
690 	switch (curve) {
691 	case BR_EC_secp256r1:
692 		return &br_secp256r1;
693 	case BR_EC_secp384r1:
694 		return &br_secp384r1;
695 	case BR_EC_secp521r1:
696 		return &br_secp521r1;
697 	}
698 	return NULL;
699 }
700 
701 static const unsigned char *
702 api_generator(int curve, size_t *len)
703 {
704 	const br_ec_curve_def *cd;
705 
706 	cd = id_to_curve_def(curve);
707 	*len = cd->generator_len;
708 	return cd->generator;
709 }
710 
711 static const unsigned char *
712 api_order(int curve, size_t *len)
713 {
714 	const br_ec_curve_def *cd;
715 
716 	cd = id_to_curve_def(curve);
717 	*len = cd->order_len;
718 	return cd->order;
719 }
720 
721 static size_t
722 api_xoff(int curve, size_t *len)
723 {
724 	api_generator(curve, len);
725 	*len >>= 1;
726 	return 1;
727 }
728 
729 static uint32_t
730 api_mul(unsigned char *G, size_t Glen,
731 	const unsigned char *x, size_t xlen, int curve)
732 {
733 	uint32_t r;
734 	const curve_params *cc;
735 	jacobian P;
736 
737 	cc = id_to_curve(curve);
738 	if (Glen != cc->point_len) {
739 		return 0;
740 	}
741 	r = point_decode(&P, G, Glen, cc);
742 	point_mul(&P, x, xlen, cc);
743 	point_encode(G, &P, cc);
744 	return r;
745 }
746 
747 static size_t
748 api_mulgen(unsigned char *R,
749 	const unsigned char *x, size_t xlen, int curve)
750 {
751 	const unsigned char *G;
752 	size_t Glen;
753 
754 	G = api_generator(curve, &Glen);
755 	memcpy(R, G, Glen);
756 	api_mul(R, Glen, x, xlen, curve);
757 	return Glen;
758 }
759 
760 static uint32_t
761 api_muladd(unsigned char *A, const unsigned char *B, size_t len,
762 	const unsigned char *x, size_t xlen,
763 	const unsigned char *y, size_t ylen, int curve)
764 {
765 	uint32_t r, t, z;
766 	const curve_params *cc;
767 	jacobian P, Q;
768 
769 	/*
770 	 * TODO: see about merging the two ladders. Right now, we do
771 	 * two independent point multiplications, which is a bit
772 	 * wasteful of CPU resources (but yields short code).
773 	 */
774 
775 	cc = id_to_curve(curve);
776 	if (len != cc->point_len) {
777 		return 0;
778 	}
779 	r = point_decode(&P, A, len, cc);
780 	if (B == NULL) {
781 		size_t Glen;
782 
783 		B = api_generator(curve, &Glen);
784 	}
785 	r &= point_decode(&Q, B, len, cc);
786 	point_mul(&P, x, xlen, cc);
787 	point_mul(&Q, y, ylen, cc);
788 
789 	/*
790 	 * We want to compute P+Q. Since the base points A and B are distinct
791 	 * from infinity, and the multipliers are non-zero and lower than the
792 	 * curve order, then we know that P and Q are non-infinity. This
793 	 * leaves two special situations to test for:
794 	 * -- If P = Q then we must use point_double().
795 	 * -- If P+Q = 0 then we must report an error.
796 	 */
797 	t = point_add(&P, &Q, cc);
798 	point_double(&Q, cc);
799 	z = br_i31_iszero(P.c[2]);
800 
801 	/*
802 	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
803 	 * have the following:
804 	 *
805 	 *   z = 0, t = 0   return P (normal addition)
806 	 *   z = 0, t = 1   return P (normal addition)
807 	 *   z = 1, t = 0   return Q (a 'double' case)
808 	 *   z = 1, t = 1   report an error (P+Q = 0)
809 	 */
810 	CCOPY(z & ~t, &P, &Q, sizeof Q);
811 	point_encode(A, &P, cc);
812 	r &= ~(z & t);
813 
814 	return r;
815 }
816 
817 /* see bearssl_ec.h */
818 const br_ec_impl br_ec_prime_i31 = {
819 	(uint32_t)0x03800000,
820 	&api_generator,
821 	&api_order,
822 	&api_xoff,
823 	&api_mul,
824 	&api_mulgen,
825 	&api_muladd
826 };
827