1 /* mpi-inv.c  -  MPI functions
2  *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
3  *
4  * This file is part of Libgcrypt.
5  *
6  * Libgcrypt is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as
8  * published by the Free Software Foundation; either version 2.1 of
9  * the License, or (at your option) any later version.
10  *
11  * Libgcrypt is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <config.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include "mpi-internal.h"
24 #include "g10lib.h"
25 
26 /*
27  * This uses a modular inversion algorithm designed by Niels Möller
28  * which was implemented in Nettle.  The same algorithm was later also
29  * adapted to GMP in mpn_sec_invert.
30  *
31  * For the description of the algorithm, see Algorithm 5 in Appendix A
32  * of "Fast Software Polynomial Multiplication on ARM Processors using
33  * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio
34  * López, and Ricardo Dahab:
35  *   https://hal.inria.fr/hal-01506572/document
36  *
37  * Note that in the reference above, at the line 2 of Algorithm 5,
38  * initial value of V was described as V:=1 wrongly.  It must be V:=0.
39  */
40 static mpi_ptr_t
mpih_invm_odd(mpi_ptr_t ap,mpi_ptr_t np,mpi_size_t nsize)41 mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize)
42 {
43   int secure;
44   unsigned int iterations;
45   mpi_ptr_t n1hp;
46   mpi_ptr_t bp;
47   mpi_ptr_t up, vp;
48 
49   secure = _gcry_is_secure (ap);
50   up = mpi_alloc_limb_space (nsize, secure);
51   MPN_ZERO (up, nsize);
52   up[0] = 1;
53 
54   vp = mpi_alloc_limb_space (nsize, secure);
55   MPN_ZERO (vp, nsize);
56 
57   secure = _gcry_is_secure (np);
58   bp = mpi_alloc_limb_space (nsize, secure);
59   MPN_COPY (bp, np, nsize);
60 
61   n1hp = mpi_alloc_limb_space (nsize, secure);
62   MPN_COPY (n1hp, np, nsize);
63   _gcry_mpih_rshift (n1hp, n1hp, nsize, 1);
64   _gcry_mpih_add_1 (n1hp, n1hp, nsize, 1);
65 
66   iterations = 2 * nsize * BITS_PER_MPI_LIMB;
67 
68   while (iterations-- > 0)
69     {
70       mpi_limb_t odd_a, odd_u, underflow, borrow;
71 
72       odd_a = ap[0] & 1;
73 
74       underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a);
75       mpih_add_n_cond (bp, bp, ap, nsize, underflow);
76       mpih_abs_cond (ap, ap, nsize, underflow);
77       mpih_swap_cond (up, vp, nsize, underflow);
78 
79       _gcry_mpih_rshift (ap, ap, nsize, 1);
80 
81       borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a);
82       mpih_add_n_cond (up, up, np, nsize, borrow);
83 
84       odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0;
85       mpih_add_n_cond (up, up, n1hp, nsize, odd_u);
86     }
87 
88   _gcry_mpi_free_limb_space (n1hp, nsize);
89   _gcry_mpi_free_limb_space (up, nsize);
90 
91   if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0)
92     {
93       /* Inverse exists.  */
94       _gcry_mpi_free_limb_space (bp, nsize);
95       return vp;
96     }
97   else
98     {
99       _gcry_mpi_free_limb_space (bp, nsize);
100       _gcry_mpi_free_limb_space (vp, nsize);
101       return NULL;
102     }
103 }
104 
105 
106 /*
107  * Calculate the multiplicative inverse X of A mod 2^K
108  * A must be positive.
109  *
110  * See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin
111  * Kaya Koç: https://eprint.iacr.org/2017/411.pdf
112  */
113 static mpi_ptr_t
mpih_invm_pow2(mpi_ptr_t ap,mpi_size_t asize,unsigned int k)114 mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k)
115 {
116   int secure = _gcry_is_secure (ap);
117   mpi_size_t i;
118   unsigned int iterations;
119   mpi_ptr_t xp, wp, up, vp;
120   mpi_size_t usize;
121 
122   if (!(ap[0] & 1))
123     return NULL;
124 
125   iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB)
126     * BITS_PER_MPI_LIMB;
127   usize = iterations / BITS_PER_MPI_LIMB;
128 
129   up = mpi_alloc_limb_space (usize, secure);
130   MPN_ZERO (up, usize);
131   up[0] = 1;
132 
133   vp = mpi_alloc_limb_space (usize, secure);
134   for (i = 0; i < (usize < asize ? usize : asize); i++)
135     vp[i] = ap[i];
136   for (; i < usize; i++)
137     vp[i] = 0;
138   if ((k % BITS_PER_MPI_LIMB))
139     for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
140       vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
141 
142   wp = mpi_alloc_limb_space (usize, secure);
143   MPN_COPY (wp, up, usize);
144 
145   xp = mpi_alloc_limb_space (usize, secure);
146   MPN_ZERO (xp, usize);
147 
148   /*
149    * It can be considered that overflow at _gcry_mpih_sub_n results
150    * adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo
151    * 2^K computation.
152    */
153   for (i = 0; i < iterations; i++)
154     {
155       int b0 = (up[0] & 1);
156 
157       xp[i/BITS_PER_MPI_LIMB] |= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB));
158       _gcry_mpih_sub_n (wp, up, vp, usize);
159       mpih_set_cond (up, wp, usize, b0);
160       _gcry_mpih_rshift (up, up, usize, 1);
161     }
162 
163   if ((k % BITS_PER_MPI_LIMB))
164     for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
165       xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
166 
167   _gcry_mpi_free_limb_space (up, usize);
168   _gcry_mpi_free_limb_space (vp, usize);
169   _gcry_mpi_free_limb_space (wp, usize);
170 
171   return xp;
172 }
173 
174 
175 /****************
176  * Calculate the multiplicative inverse X of A mod N
177  * That is: Find the solution x for
178  *		1 = (a*x) mod n
179  */
180 static int
mpi_invm_generic(gcry_mpi_t x,gcry_mpi_t a,gcry_mpi_t n)181 mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
182 {
183     int is_gcd_one;
184 #if 0
185     /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */
186     gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
187 
188     u = mpi_copy(a);
189     v = mpi_copy(n);
190     u1 = mpi_alloc_set_ui(1);
191     u2 = mpi_alloc_set_ui(0);
192     u3 = mpi_copy(u);
193     v1 = mpi_alloc_set_ui(0);
194     v2 = mpi_alloc_set_ui(1);
195     v3 = mpi_copy(v);
196     q  = mpi_alloc( mpi_get_nlimbs(u)+1 );
197     t1 = mpi_alloc( mpi_get_nlimbs(u)+1 );
198     t2 = mpi_alloc( mpi_get_nlimbs(u)+1 );
199     t3 = mpi_alloc( mpi_get_nlimbs(u)+1 );
200     while( mpi_cmp_ui( v3, 0 ) ) {
201 	mpi_fdiv_q( q, u3, v3 );
202 	mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
203 	mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
204 	mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
205 	mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
206     }
207     /*	log_debug("result:\n");
208 	log_mpidump("q =", q );
209 	log_mpidump("u1=", u1);
210 	log_mpidump("u2=", u2);
211 	log_mpidump("u3=", u3);
212 	log_mpidump("v1=", v1);
213 	log_mpidump("v2=", v2); */
214     mpi_set(x, u1);
215 
216     is_gcd_one = (mpi_cmp_ui (u3, 1) == 0);
217 
218     mpi_free(u1);
219     mpi_free(u2);
220     mpi_free(u3);
221     mpi_free(v1);
222     mpi_free(v2);
223     mpi_free(v3);
224     mpi_free(q);
225     mpi_free(t1);
226     mpi_free(t2);
227     mpi_free(t3);
228     mpi_free(u);
229     mpi_free(v);
230 #elif 0
231     /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
232      * modified according to Michael Penk's solution for Exercise 35
233      * (in the first edition)
234      * In the third edition, it's Exercise 39, and it is described in
235      * page 646 of ANSWERS TO EXERCISES chapter.
236      */
237 
238     /* FIXME: we can simplify this in most cases (see Knuth) */
239     gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
240     unsigned k;
241     int sign;
242 
243     u = mpi_copy(a);
244     v = mpi_copy(n);
245     for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
246 	mpi_rshift(u, u, 1);
247 	mpi_rshift(v, v, 1);
248     }
249 
250 
251     u1 = mpi_alloc_set_ui(1);
252     u2 = mpi_alloc_set_ui(0);
253     u3 = mpi_copy(u);
254     v1 = mpi_copy(v);				   /* !-- used as const 1 */
255     v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u );
256     v3 = mpi_copy(v);
257     if( mpi_test_bit(u, 0) ) { /* u is odd */
258 	t1 = mpi_alloc_set_ui(0);
259 	t2 = mpi_alloc_set_ui(1); t2->sign = 1;
260 	t3 = mpi_copy(v); t3->sign = !t3->sign;
261 	goto Y4;
262     }
263     else {
264 	t1 = mpi_alloc_set_ui(1);
265 	t2 = mpi_alloc_set_ui(0);
266 	t3 = mpi_copy(u);
267     }
268     do {
269 	do {
270 	    if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
271 		mpi_add(t1, t1, v);
272 		mpi_sub(t2, t2, u);
273 	    }
274 	    mpi_rshift(t1, t1, 1);
275 	    mpi_rshift(t2, t2, 1);
276 	    mpi_rshift(t3, t3, 1);
277 	  Y4:
278 	    ;
279 	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
280 
281 	if( !t3->sign ) {
282 	    mpi_set(u1, t1);
283 	    mpi_set(u2, t2);
284 	    mpi_set(u3, t3);
285 	}
286 	else {
287 	    mpi_sub(v1, v, t1);
288 	    sign = u->sign; u->sign = !u->sign;
289 	    mpi_sub(v2, u, t2);
290 	    u->sign = sign;
291 	    sign = t3->sign; t3->sign = !t3->sign;
292 	    mpi_set(v3, t3);
293 	    t3->sign = sign;
294 	}
295 	mpi_sub(t1, u1, v1);
296 	mpi_sub(t2, u2, v2);
297 	mpi_sub(t3, u3, v3);
298 	if( t1->sign ) {
299 	    mpi_add(t1, t1, v);
300 	    mpi_sub(t2, t2, u);
301 	}
302     } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
303     /* mpi_lshift( u3, u3, k ); */
304     is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
305     mpi_set(x, u1);
306 
307     mpi_free(u1);
308     mpi_free(u2);
309     mpi_free(u3);
310     mpi_free(v1);
311     mpi_free(v2);
312     mpi_free(v3);
313     mpi_free(t1);
314     mpi_free(t2);
315     mpi_free(t3);
316 #else
317     /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
318      * modified according to Michael Penk's solution for Exercise 35
319      * with further enhancement */
320     /* The reference in the comment above is for the first edition.
321      * In the third edition, it's Exercise 39, and it is described in
322      * page 646 of ANSWERS TO EXERCISES chapter.
323      */
324     gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
325     unsigned k;
326     int sign;
327     int odd ;
328 
329     u = mpi_copy(a);
330     v = mpi_copy(n);
331 
332     for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
333 	mpi_rshift(u, u, 1);
334 	mpi_rshift(v, v, 1);
335     }
336     odd = mpi_test_bit(v,0);
337 
338     u1 = mpi_alloc_set_ui(1);
339     if( !odd )
340 	u2 = mpi_alloc_set_ui(0);
341     u3 = mpi_copy(u);
342     v1 = mpi_copy(v);
343     if( !odd ) {
344 	v2 = mpi_alloc( mpi_get_nlimbs(u) );
345 	mpi_sub( v2, u1, u ); /* U is used as const 1 */
346     }
347     v3 = mpi_copy(v);
348     if( mpi_test_bit(u, 0) ) { /* u is odd */
349 	t1 = mpi_alloc_set_ui(0);
350 	if( !odd ) {
351 	    t2 = mpi_alloc_set_ui(1); t2->sign = 1;
352 	}
353 	t3 = mpi_copy(v); t3->sign = !t3->sign;
354 	goto Y4;
355     }
356     else {
357 	t1 = mpi_alloc_set_ui(1);
358 	if( !odd )
359 	    t2 = mpi_alloc_set_ui(0);
360 	t3 = mpi_copy(u);
361     }
362     do {
363 	do {
364 	    if( !odd ) {
365 		if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
366 		    mpi_add(t1, t1, v);
367 		    mpi_sub(t2, t2, u);
368 		}
369 		mpi_rshift(t1, t1, 1);
370 		mpi_rshift(t2, t2, 1);
371 		mpi_rshift(t3, t3, 1);
372 	    }
373 	    else {
374 		if( mpi_test_bit(t1, 0) )
375 		    mpi_add(t1, t1, v);
376 		mpi_rshift(t1, t1, 1);
377 		mpi_rshift(t3, t3, 1);
378 	    }
379 	  Y4:
380 	    ;
381 	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
382 
383 	if( !t3->sign ) {
384 	    mpi_set(u1, t1);
385 	    if( !odd )
386 		mpi_set(u2, t2);
387 	    mpi_set(u3, t3);
388 	}
389 	else {
390 	    mpi_sub(v1, v, t1);
391 	    sign = u->sign; u->sign = !u->sign;
392 	    if( !odd )
393 		mpi_sub(v2, u, t2);
394 	    u->sign = sign;
395 	    sign = t3->sign; t3->sign = !t3->sign;
396 	    mpi_set(v3, t3);
397 	    t3->sign = sign;
398 	}
399 	mpi_sub(t1, u1, v1);
400 	if( !odd )
401 	    mpi_sub(t2, u2, v2);
402 	mpi_sub(t3, u3, v3);
403 	if( t1->sign ) {
404 	    mpi_add(t1, t1, v);
405 	    if( !odd )
406 		mpi_sub(t2, t2, u);
407 	}
408     } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
409     /* mpi_lshift( u3, u3, k ); */
410     is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
411     mpi_set(x, u1);
412 
413     mpi_free(u1);
414     mpi_free(v1);
415     mpi_free(t1);
416     if( !odd ) {
417 	mpi_free(u2);
418 	mpi_free(v2);
419 	mpi_free(t2);
420     }
421     mpi_free(u3);
422     mpi_free(v3);
423     mpi_free(t3);
424 
425     mpi_free(u);
426     mpi_free(v);
427 #endif
428     return is_gcd_one;
429 }
430 
431 
432 /*
433  * Set X to the multiplicative inverse of A mod M.  Return true if the
434  * inverse exists.
435  */
436 int
_gcry_mpi_invm(gcry_mpi_t x,gcry_mpi_t a,gcry_mpi_t n)437 _gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
438 {
439   mpi_ptr_t ap, xp;
440 
441   if (!mpi_cmp_ui (a, 0))
442     return 0; /* Inverse does not exists.  */
443   if (!mpi_cmp_ui (n, 1))
444     return 0; /* Inverse does not exists.  */
445 
446   if (mpi_test_bit (n, 0))
447     {
448       if (a->nlimbs <= n->nlimbs)
449         {
450           ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d));
451           MPN_ZERO (ap, n->nlimbs);
452           MPN_COPY (ap, a->d, a->nlimbs);
453         }
454       else
455         ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs);
456 
457       xp = mpih_invm_odd (ap, n->d, n->nlimbs);
458       _gcry_mpi_free_limb_space (ap, n->nlimbs);
459 
460       if (xp)
461         {
462           _gcry_mpi_assign_limb_space (x, xp, n->nlimbs);
463           x->nlimbs = n->nlimbs;
464           return 1;
465         }
466       else
467         return 0; /* Inverse does not exists.  */
468     }
469   else if (!a->sign && !n->sign)
470     {
471       unsigned int k = mpi_trailing_zeros (n);
472       mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
473       mpi_size_t hsize;
474       gcry_mpi_t q;
475       mpi_ptr_t x1p, x2p, q_invp, hp, diffp;
476       mpi_size_t i;
477 
478       if (k == _gcry_mpi_get_nbits (n) - 1)
479         {
480           x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
481 
482           if (x1p)
483             {
484               _gcry_mpi_assign_limb_space (x, x1p, x1size);
485               x->nlimbs = x1size;
486               return 1;
487             }
488           else
489             return 0; /* Inverse does not exists.  */
490         }
491 
492       /* N can be expressed as P * Q, where P = 2^K.  P and Q are coprime.  */
493       /*
494        * Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine
495        * them by Garner's formula, to get X = invm (A, P*Q).
496        * A special case of Chinese Remainder Theorem.
497        */
498 
499       /* X1 = invm (A, P) */
500       x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
501       if (!x1p)
502         return 0;               /* Inverse does not exists.  */
503 
504       /* Q = N / P          */
505       q = mpi_new (0);
506       mpi_rshift (q, n, k);
507 
508       /* X2 = invm (A%Q, Q) */
509       ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs);
510       x2p = mpih_invm_odd (ap, q->d, q->nlimbs);
511       _gcry_mpi_free_limb_space (ap, q->nlimbs);
512       if (!x2p)
513         {
514           _gcry_mpi_free_limb_space (x1p, x1size);
515           mpi_free (q);
516           return 0;             /* Inverse does not exists.  */
517         }
518 
519       /* Q_inv = Q^(-1) = invm (Q, P) */
520       q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k);
521 
522       /* H = (X1 - X2) * Q_inv % P */
523       diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d));
524       if (x1size >= q->nlimbs)
525         _gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs);
526       else
527 	_gcry_mpih_sub_n (diffp, x1p, x2p, x1size);
528       _gcry_mpi_free_limb_space (x1p, x1size);
529       if ((k % BITS_PER_MPI_LIMB))
530         for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
531           diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
532 
533       hsize = x1size * 2;
534       hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d));
535       _gcry_mpih_mul_n (hp, diffp, q_invp, x1size);
536       _gcry_mpi_free_limb_space (diffp, x1size);
537       _gcry_mpi_free_limb_space (q_invp, x1size);
538 
539       for (i = x1size; i < hsize; i++)
540         hp[i] = 0;
541       if ((k % BITS_PER_MPI_LIMB))
542         for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
543           hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
544 
545       xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d));
546       if (x1size >= q->nlimbs)
547         _gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs);
548       else
549         _gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size);
550 
551       _gcry_mpi_free_limb_space (hp, hsize);
552 
553       _gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs);
554       _gcry_mpi_free_limb_space (x2p, q->nlimbs);
555 
556       _gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs);
557       x->nlimbs = x1size + q->nlimbs;
558 
559       mpi_free (q);
560 
561       return 1;
562     }
563   else
564     return mpi_invm_generic (x, a, n);
565 }
566