1 /* mpi-inv.c - MPI functions
2 * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
3 *
4 * This file is part of Libgcrypt.
5 *
6 * Libgcrypt is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as
8 * published by the Free Software Foundation; either version 2.1 of
9 * the License, or (at your option) any later version.
10 *
11 * Libgcrypt is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <config.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include "mpi-internal.h"
24 #include "g10lib.h"
25
26 /*
27 * This uses a modular inversion algorithm designed by Niels Möller
28 * which was implemented in Nettle. The same algorithm was later also
29 * adapted to GMP in mpn_sec_invert.
30 *
31 * For the description of the algorithm, see Algorithm 5 in Appendix A
32 * of "Fast Software Polynomial Multiplication on ARM Processors using
33 * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio
34 * López, and Ricardo Dahab:
35 * https://hal.inria.fr/hal-01506572/document
36 *
37 * Note that in the reference above, at the line 2 of Algorithm 5,
38 * initial value of V was described as V:=1 wrongly. It must be V:=0.
39 */
40 static mpi_ptr_t
mpih_invm_odd(mpi_ptr_t ap,mpi_ptr_t np,mpi_size_t nsize)41 mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize)
42 {
43 int secure;
44 unsigned int iterations;
45 mpi_ptr_t n1hp;
46 mpi_ptr_t bp;
47 mpi_ptr_t up, vp;
48
49 secure = _gcry_is_secure (ap);
50 up = mpi_alloc_limb_space (nsize, secure);
51 MPN_ZERO (up, nsize);
52 up[0] = 1;
53
54 vp = mpi_alloc_limb_space (nsize, secure);
55 MPN_ZERO (vp, nsize);
56
57 secure = _gcry_is_secure (np);
58 bp = mpi_alloc_limb_space (nsize, secure);
59 MPN_COPY (bp, np, nsize);
60
61 n1hp = mpi_alloc_limb_space (nsize, secure);
62 MPN_COPY (n1hp, np, nsize);
63 _gcry_mpih_rshift (n1hp, n1hp, nsize, 1);
64 _gcry_mpih_add_1 (n1hp, n1hp, nsize, 1);
65
66 iterations = 2 * nsize * BITS_PER_MPI_LIMB;
67
68 while (iterations-- > 0)
69 {
70 mpi_limb_t odd_a, odd_u, underflow, borrow;
71
72 odd_a = ap[0] & 1;
73
74 underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a);
75 mpih_add_n_cond (bp, bp, ap, nsize, underflow);
76 mpih_abs_cond (ap, ap, nsize, underflow);
77 mpih_swap_cond (up, vp, nsize, underflow);
78
79 _gcry_mpih_rshift (ap, ap, nsize, 1);
80
81 borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a);
82 mpih_add_n_cond (up, up, np, nsize, borrow);
83
84 odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0;
85 mpih_add_n_cond (up, up, n1hp, nsize, odd_u);
86 }
87
88 _gcry_mpi_free_limb_space (n1hp, nsize);
89 _gcry_mpi_free_limb_space (up, nsize);
90
91 if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0)
92 {
93 /* Inverse exists. */
94 _gcry_mpi_free_limb_space (bp, nsize);
95 return vp;
96 }
97 else
98 {
99 _gcry_mpi_free_limb_space (bp, nsize);
100 _gcry_mpi_free_limb_space (vp, nsize);
101 return NULL;
102 }
103 }
104
105
106 /*
107 * Calculate the multiplicative inverse X of A mod 2^K
108 * A must be positive.
109 *
110 * See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin
111 * Kaya Koç: https://eprint.iacr.org/2017/411.pdf
112 */
113 static mpi_ptr_t
mpih_invm_pow2(mpi_ptr_t ap,mpi_size_t asize,unsigned int k)114 mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k)
115 {
116 int secure = _gcry_is_secure (ap);
117 mpi_size_t i;
118 unsigned int iterations;
119 mpi_ptr_t xp, wp, up, vp;
120 mpi_size_t usize;
121
122 if (!(ap[0] & 1))
123 return NULL;
124
125 iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB)
126 * BITS_PER_MPI_LIMB;
127 usize = iterations / BITS_PER_MPI_LIMB;
128
129 up = mpi_alloc_limb_space (usize, secure);
130 MPN_ZERO (up, usize);
131 up[0] = 1;
132
133 vp = mpi_alloc_limb_space (usize, secure);
134 for (i = 0; i < (usize < asize ? usize : asize); i++)
135 vp[i] = ap[i];
136 for (; i < usize; i++)
137 vp[i] = 0;
138 if ((k % BITS_PER_MPI_LIMB))
139 for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
140 vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
141
142 wp = mpi_alloc_limb_space (usize, secure);
143 MPN_COPY (wp, up, usize);
144
145 xp = mpi_alloc_limb_space (usize, secure);
146 MPN_ZERO (xp, usize);
147
148 /*
149 * It can be considered that overflow at _gcry_mpih_sub_n results
150 * adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo
151 * 2^K computation.
152 */
153 for (i = 0; i < iterations; i++)
154 {
155 int b0 = (up[0] & 1);
156
157 xp[i/BITS_PER_MPI_LIMB] |= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB));
158 _gcry_mpih_sub_n (wp, up, vp, usize);
159 mpih_set_cond (up, wp, usize, b0);
160 _gcry_mpih_rshift (up, up, usize, 1);
161 }
162
163 if ((k % BITS_PER_MPI_LIMB))
164 for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
165 xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
166
167 _gcry_mpi_free_limb_space (up, usize);
168 _gcry_mpi_free_limb_space (vp, usize);
169 _gcry_mpi_free_limb_space (wp, usize);
170
171 return xp;
172 }
173
174
175 /****************
176 * Calculate the multiplicative inverse X of A mod N
177 * That is: Find the solution x for
178 * 1 = (a*x) mod n
179 */
180 static int
mpi_invm_generic(gcry_mpi_t x,gcry_mpi_t a,gcry_mpi_t n)181 mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
182 {
183 int is_gcd_one;
184 #if 0
185 /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */
186 gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
187
188 u = mpi_copy(a);
189 v = mpi_copy(n);
190 u1 = mpi_alloc_set_ui(1);
191 u2 = mpi_alloc_set_ui(0);
192 u3 = mpi_copy(u);
193 v1 = mpi_alloc_set_ui(0);
194 v2 = mpi_alloc_set_ui(1);
195 v3 = mpi_copy(v);
196 q = mpi_alloc( mpi_get_nlimbs(u)+1 );
197 t1 = mpi_alloc( mpi_get_nlimbs(u)+1 );
198 t2 = mpi_alloc( mpi_get_nlimbs(u)+1 );
199 t3 = mpi_alloc( mpi_get_nlimbs(u)+1 );
200 while( mpi_cmp_ui( v3, 0 ) ) {
201 mpi_fdiv_q( q, u3, v3 );
202 mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
203 mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
204 mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
205 mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
206 }
207 /* log_debug("result:\n");
208 log_mpidump("q =", q );
209 log_mpidump("u1=", u1);
210 log_mpidump("u2=", u2);
211 log_mpidump("u3=", u3);
212 log_mpidump("v1=", v1);
213 log_mpidump("v2=", v2); */
214 mpi_set(x, u1);
215
216 is_gcd_one = (mpi_cmp_ui (u3, 1) == 0);
217
218 mpi_free(u1);
219 mpi_free(u2);
220 mpi_free(u3);
221 mpi_free(v1);
222 mpi_free(v2);
223 mpi_free(v3);
224 mpi_free(q);
225 mpi_free(t1);
226 mpi_free(t2);
227 mpi_free(t3);
228 mpi_free(u);
229 mpi_free(v);
230 #elif 0
231 /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
232 * modified according to Michael Penk's solution for Exercise 35
233 * (in the first edition)
234 * In the third edition, it's Exercise 39, and it is described in
235 * page 646 of ANSWERS TO EXERCISES chapter.
236 */
237
238 /* FIXME: we can simplify this in most cases (see Knuth) */
239 gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
240 unsigned k;
241 int sign;
242
243 u = mpi_copy(a);
244 v = mpi_copy(n);
245 for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
246 mpi_rshift(u, u, 1);
247 mpi_rshift(v, v, 1);
248 }
249
250
251 u1 = mpi_alloc_set_ui(1);
252 u2 = mpi_alloc_set_ui(0);
253 u3 = mpi_copy(u);
254 v1 = mpi_copy(v); /* !-- used as const 1 */
255 v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u );
256 v3 = mpi_copy(v);
257 if( mpi_test_bit(u, 0) ) { /* u is odd */
258 t1 = mpi_alloc_set_ui(0);
259 t2 = mpi_alloc_set_ui(1); t2->sign = 1;
260 t3 = mpi_copy(v); t3->sign = !t3->sign;
261 goto Y4;
262 }
263 else {
264 t1 = mpi_alloc_set_ui(1);
265 t2 = mpi_alloc_set_ui(0);
266 t3 = mpi_copy(u);
267 }
268 do {
269 do {
270 if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
271 mpi_add(t1, t1, v);
272 mpi_sub(t2, t2, u);
273 }
274 mpi_rshift(t1, t1, 1);
275 mpi_rshift(t2, t2, 1);
276 mpi_rshift(t3, t3, 1);
277 Y4:
278 ;
279 } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
280
281 if( !t3->sign ) {
282 mpi_set(u1, t1);
283 mpi_set(u2, t2);
284 mpi_set(u3, t3);
285 }
286 else {
287 mpi_sub(v1, v, t1);
288 sign = u->sign; u->sign = !u->sign;
289 mpi_sub(v2, u, t2);
290 u->sign = sign;
291 sign = t3->sign; t3->sign = !t3->sign;
292 mpi_set(v3, t3);
293 t3->sign = sign;
294 }
295 mpi_sub(t1, u1, v1);
296 mpi_sub(t2, u2, v2);
297 mpi_sub(t3, u3, v3);
298 if( t1->sign ) {
299 mpi_add(t1, t1, v);
300 mpi_sub(t2, t2, u);
301 }
302 } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
303 /* mpi_lshift( u3, u3, k ); */
304 is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
305 mpi_set(x, u1);
306
307 mpi_free(u1);
308 mpi_free(u2);
309 mpi_free(u3);
310 mpi_free(v1);
311 mpi_free(v2);
312 mpi_free(v3);
313 mpi_free(t1);
314 mpi_free(t2);
315 mpi_free(t3);
316 #else
317 /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
318 * modified according to Michael Penk's solution for Exercise 35
319 * with further enhancement */
320 /* The reference in the comment above is for the first edition.
321 * In the third edition, it's Exercise 39, and it is described in
322 * page 646 of ANSWERS TO EXERCISES chapter.
323 */
324 gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
325 unsigned k;
326 int sign;
327 int odd ;
328
329 u = mpi_copy(a);
330 v = mpi_copy(n);
331
332 for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
333 mpi_rshift(u, u, 1);
334 mpi_rshift(v, v, 1);
335 }
336 odd = mpi_test_bit(v,0);
337
338 u1 = mpi_alloc_set_ui(1);
339 if( !odd )
340 u2 = mpi_alloc_set_ui(0);
341 u3 = mpi_copy(u);
342 v1 = mpi_copy(v);
343 if( !odd ) {
344 v2 = mpi_alloc( mpi_get_nlimbs(u) );
345 mpi_sub( v2, u1, u ); /* U is used as const 1 */
346 }
347 v3 = mpi_copy(v);
348 if( mpi_test_bit(u, 0) ) { /* u is odd */
349 t1 = mpi_alloc_set_ui(0);
350 if( !odd ) {
351 t2 = mpi_alloc_set_ui(1); t2->sign = 1;
352 }
353 t3 = mpi_copy(v); t3->sign = !t3->sign;
354 goto Y4;
355 }
356 else {
357 t1 = mpi_alloc_set_ui(1);
358 if( !odd )
359 t2 = mpi_alloc_set_ui(0);
360 t3 = mpi_copy(u);
361 }
362 do {
363 do {
364 if( !odd ) {
365 if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
366 mpi_add(t1, t1, v);
367 mpi_sub(t2, t2, u);
368 }
369 mpi_rshift(t1, t1, 1);
370 mpi_rshift(t2, t2, 1);
371 mpi_rshift(t3, t3, 1);
372 }
373 else {
374 if( mpi_test_bit(t1, 0) )
375 mpi_add(t1, t1, v);
376 mpi_rshift(t1, t1, 1);
377 mpi_rshift(t3, t3, 1);
378 }
379 Y4:
380 ;
381 } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
382
383 if( !t3->sign ) {
384 mpi_set(u1, t1);
385 if( !odd )
386 mpi_set(u2, t2);
387 mpi_set(u3, t3);
388 }
389 else {
390 mpi_sub(v1, v, t1);
391 sign = u->sign; u->sign = !u->sign;
392 if( !odd )
393 mpi_sub(v2, u, t2);
394 u->sign = sign;
395 sign = t3->sign; t3->sign = !t3->sign;
396 mpi_set(v3, t3);
397 t3->sign = sign;
398 }
399 mpi_sub(t1, u1, v1);
400 if( !odd )
401 mpi_sub(t2, u2, v2);
402 mpi_sub(t3, u3, v3);
403 if( t1->sign ) {
404 mpi_add(t1, t1, v);
405 if( !odd )
406 mpi_sub(t2, t2, u);
407 }
408 } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
409 /* mpi_lshift( u3, u3, k ); */
410 is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
411 mpi_set(x, u1);
412
413 mpi_free(u1);
414 mpi_free(v1);
415 mpi_free(t1);
416 if( !odd ) {
417 mpi_free(u2);
418 mpi_free(v2);
419 mpi_free(t2);
420 }
421 mpi_free(u3);
422 mpi_free(v3);
423 mpi_free(t3);
424
425 mpi_free(u);
426 mpi_free(v);
427 #endif
428 return is_gcd_one;
429 }
430
431
432 /*
433 * Set X to the multiplicative inverse of A mod M. Return true if the
434 * inverse exists.
435 */
436 int
_gcry_mpi_invm(gcry_mpi_t x,gcry_mpi_t a,gcry_mpi_t n)437 _gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
438 {
439 mpi_ptr_t ap, xp;
440
441 if (!mpi_cmp_ui (a, 0))
442 return 0; /* Inverse does not exists. */
443 if (!mpi_cmp_ui (n, 1))
444 return 0; /* Inverse does not exists. */
445
446 if (mpi_test_bit (n, 0))
447 {
448 if (a->nlimbs <= n->nlimbs)
449 {
450 ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d));
451 MPN_ZERO (ap, n->nlimbs);
452 MPN_COPY (ap, a->d, a->nlimbs);
453 }
454 else
455 ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs);
456
457 xp = mpih_invm_odd (ap, n->d, n->nlimbs);
458 _gcry_mpi_free_limb_space (ap, n->nlimbs);
459
460 if (xp)
461 {
462 _gcry_mpi_assign_limb_space (x, xp, n->nlimbs);
463 x->nlimbs = n->nlimbs;
464 return 1;
465 }
466 else
467 return 0; /* Inverse does not exists. */
468 }
469 else if (!a->sign && !n->sign)
470 {
471 unsigned int k = mpi_trailing_zeros (n);
472 mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
473 mpi_size_t hsize;
474 gcry_mpi_t q;
475 mpi_ptr_t x1p, x2p, q_invp, hp, diffp;
476 mpi_size_t i;
477
478 if (k == _gcry_mpi_get_nbits (n) - 1)
479 {
480 x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
481
482 if (x1p)
483 {
484 _gcry_mpi_assign_limb_space (x, x1p, x1size);
485 x->nlimbs = x1size;
486 return 1;
487 }
488 else
489 return 0; /* Inverse does not exists. */
490 }
491
492 /* N can be expressed as P * Q, where P = 2^K. P and Q are coprime. */
493 /*
494 * Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine
495 * them by Garner's formula, to get X = invm (A, P*Q).
496 * A special case of Chinese Remainder Theorem.
497 */
498
499 /* X1 = invm (A, P) */
500 x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
501 if (!x1p)
502 return 0; /* Inverse does not exists. */
503
504 /* Q = N / P */
505 q = mpi_new (0);
506 mpi_rshift (q, n, k);
507
508 /* X2 = invm (A%Q, Q) */
509 ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs);
510 x2p = mpih_invm_odd (ap, q->d, q->nlimbs);
511 _gcry_mpi_free_limb_space (ap, q->nlimbs);
512 if (!x2p)
513 {
514 _gcry_mpi_free_limb_space (x1p, x1size);
515 mpi_free (q);
516 return 0; /* Inverse does not exists. */
517 }
518
519 /* Q_inv = Q^(-1) = invm (Q, P) */
520 q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k);
521
522 /* H = (X1 - X2) * Q_inv % P */
523 diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d));
524 if (x1size >= q->nlimbs)
525 _gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs);
526 else
527 _gcry_mpih_sub_n (diffp, x1p, x2p, x1size);
528 _gcry_mpi_free_limb_space (x1p, x1size);
529 if ((k % BITS_PER_MPI_LIMB))
530 for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
531 diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
532
533 hsize = x1size * 2;
534 hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d));
535 _gcry_mpih_mul_n (hp, diffp, q_invp, x1size);
536 _gcry_mpi_free_limb_space (diffp, x1size);
537 _gcry_mpi_free_limb_space (q_invp, x1size);
538
539 for (i = x1size; i < hsize; i++)
540 hp[i] = 0;
541 if ((k % BITS_PER_MPI_LIMB))
542 for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
543 hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
544
545 xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d));
546 if (x1size >= q->nlimbs)
547 _gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs);
548 else
549 _gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size);
550
551 _gcry_mpi_free_limb_space (hp, hsize);
552
553 _gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs);
554 _gcry_mpi_free_limb_space (x2p, q->nlimbs);
555
556 _gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs);
557 x->nlimbs = x1size + q->nlimbs;
558
559 mpi_free (q);
560
561 return 1;
562 }
563 else
564 return mpi_invm_generic (x, a, n);
565 }
566