xref: /dragonfly/crypto/libressl/crypto/bn/bn_lcl.h (revision f5b1c8a1)
1 /* $OpenBSD: bn_lcl.h,v 1.21 2014/10/28 07:35:58 jsg Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2000 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 #ifndef HEADER_BN_LCL_H
113 #define HEADER_BN_LCL_H
114 
115 #include <openssl/opensslconf.h>
116 
117 #include <openssl/bn.h>
118 
119 #ifdef  __cplusplus
120 extern "C" {
121 #endif
122 
123 
124 /*
125  * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
126  *
127  *
128  * For window size 'w' (w >= 2) and a random 'b' bits exponent,
129  * the number of multiplications is a constant plus on average
130  *
131  *    2^(w-1) + (b-w)/(w+1);
132  *
133  * here  2^(w-1)  is for precomputing the table (we actually need
134  * entries only for windows that have the lowest bit set), and
135  * (b-w)/(w+1)  is an approximation for the expected number of
136  * w-bit windows, not counting the first one.
137  *
138  * Thus we should use
139  *
140  *    w >= 6  if        b > 671
141  *     w = 5  if  671 > b > 239
142  *     w = 4  if  239 > b >  79
143  *     w = 3  if   79 > b >  23
144  *    w <= 2  if   23 > b
145  *
146  * (with draws in between).  Very small exponents are often selected
147  * with low Hamming weight, so we use  w = 1  for b <= 23.
148  */
149 #define BN_window_bits_for_exponent_size(b) \
150 		((b) > 671 ? 6 : \
151 		 (b) > 239 ? 5 : \
152 		 (b) >  79 ? 4 : \
153 		 (b) >  23 ? 3 : 1)
154 
155 
156 /* BN_mod_exp_mont_consttime is based on the assumption that the
157  * L1 data cache line width of the target processor is at least
158  * the following value.
159  */
160 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH	( 64 )
161 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK	(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
162 
163 /* Window sizes optimized for fixed window size modular exponentiation
164  * algorithm (BN_mod_exp_mont_consttime).
165  *
166  * To achieve the security goals of BN_mode_exp_mont_consttime, the
167  * maximum size of the window must not exceed
168  * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
169  *
170  * Window size thresholds are defined for cache line sizes of 32 and 64,
171  * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
172  * window size of 7 should only be used on processors that have a 128
173  * byte or greater cache line size.
174  */
175 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
176 
177 #  define BN_window_bits_for_ctime_exponent_size(b) \
178 		((b) > 937 ? 6 : \
179 		 (b) > 306 ? 5 : \
180 		 (b) >  89 ? 4 : \
181 		 (b) >  22 ? 3 : 1)
182 #  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE	(6)
183 
184 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
185 
186 #  define BN_window_bits_for_ctime_exponent_size(b) \
187 		((b) > 306 ? 5 : \
188 		 (b) >  89 ? 4 : \
189 		 (b) >  22 ? 3 : 1)
190 #  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE	(5)
191 
192 #endif
193 
194 
195 /* Pentium pro 16,16,16,32,64 */
196 /* Alpha       16,16,16,16.64 */
197 #define BN_MULL_SIZE_NORMAL			(16) /* 32 */
198 #define BN_MUL_RECURSIVE_SIZE_NORMAL		(16) /* 32 less than */
199 #define BN_SQR_RECURSIVE_SIZE_NORMAL		(16) /* 32 */
200 #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL	(32) /* 32 */
201 #define BN_MONT_CTX_SET_SIZE_WORD		(64) /* 32 */
202 
203 #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
204 /*
205  * BN_UMULT_HIGH section.
206  *
207  * No, I'm not trying to overwhelm you when stating that the
208  * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
209  * you to be impressed when I say that if the compiler doesn't
210  * support 2*N integer type, then you have to replace every N*N
211  * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
212  * and additions which unavoidably results in severe performance
213  * penalties. Of course provided that the hardware is capable of
214  * producing 2*N result... That's when you normally start
215  * considering assembler implementation. However! It should be
216  * pointed out that some CPUs (most notably Alpha, PowerPC and
217  * upcoming IA-64 family:-) provide *separate* instruction
218  * calculating the upper half of the product placing the result
219  * into a general purpose register. Now *if* the compiler supports
220  * inline assembler, then it's not impossible to implement the
221  * "bignum" routines (and have the compiler optimize 'em)
222  * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
223  * macro is about:-)
224  *
225  *					<appro@fy.chalmers.se>
226  */
227 # if defined(__alpha)
228 #  if defined(__GNUC__) && __GNUC__>=2
229 #   define BN_UMULT_HIGH(a,b)	({	\
230 	BN_ULONG ret;		\
231 	asm ("umulh	%1,%2,%0"	\
232 	     : "=r"(ret)		\
233 	     : "r"(a), "r"(b));		\
234 	ret;			})
235 #  endif	/* compiler */
236 # elif defined(_ARCH_PPC) && defined(_LP64)
237 #  if defined(__GNUC__) && __GNUC__>=2
238 #   define BN_UMULT_HIGH(a,b)	({	\
239 	BN_ULONG ret;		\
240 	asm ("mulhdu	%0,%1,%2"	\
241 	     : "=r"(ret)		\
242 	     : "r"(a), "r"(b));		\
243 	ret;			})
244 #  endif	/* compiler */
245 # elif defined(__x86_64) || defined(__x86_64__)
246 #  if defined(__GNUC__) && __GNUC__>=2
247 #   define BN_UMULT_HIGH(a,b)	({	\
248 	BN_ULONG ret,discard;	\
249 	asm ("mulq	%3"		\
250 	     : "=a"(discard),"=d"(ret)	\
251 	     : "a"(a), "g"(b)		\
252 	     : "cc");			\
253 	ret;			})
254 #   define BN_UMULT_LOHI(low,high,a,b)	\
255 	asm ("mulq	%3"		\
256 		: "=a"(low),"=d"(high)	\
257 		: "a"(a),"g"(b)		\
258 		: "cc");
259 #  endif
260 # elif defined(__mips) && defined(_LP64)
261 #  if defined(__GNUC__) && __GNUC__>=2
262 #   if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) /* "h" constraint is no more since 4.4 */
263 #     define BN_UMULT_HIGH(a,b)		 (((__uint128_t)(a)*(b))>>64)
264 #     define BN_UMULT_LOHI(low,high,a,b) ({	\
265 	__uint128_t ret=(__uint128_t)(a)*(b);	\
266 	(high)=ret>>64; (low)=ret;	 })
267 #   else
268 #     define BN_UMULT_HIGH(a,b)	({	\
269 	BN_ULONG ret;		\
270 	asm ("dmultu	%1,%2"		\
271 	     : "=h"(ret)		\
272 	     : "r"(a), "r"(b) : "l");	\
273 	ret;			})
274 #     define BN_UMULT_LOHI(low,high,a,b)\
275 	asm ("dmultu	%2,%3"		\
276 	     : "=l"(low),"=h"(high)	\
277 	     : "r"(a), "r"(b));
278 #    endif
279 #  endif
280 # endif		/* cpu */
281 #endif		/* OPENSSL_NO_ASM */
282 
283 /*************************************************************
284  * Using the long long type
285  */
286 #define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
287 #define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
288 
289 #ifdef BN_DEBUG_RAND
290 #define bn_clear_top2max(a) \
291 	{ \
292 	int      ind = (a)->dmax - (a)->top; \
293 	BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
294 	for (; ind != 0; ind--) \
295 		*(++ftl) = 0x0; \
296 	}
297 #else
298 #define bn_clear_top2max(a)
299 #endif
300 
301 #ifdef BN_LLONG
302 #define mul_add(r,a,w,c) { \
303 	BN_ULLONG t; \
304 	t=(BN_ULLONG)w * (a) + (r) + (c); \
305 	(r)= Lw(t); \
306 	(c)= Hw(t); \
307 	}
308 
309 #define mul(r,a,w,c) { \
310 	BN_ULLONG t; \
311 	t=(BN_ULLONG)w * (a) + (c); \
312 	(r)= Lw(t); \
313 	(c)= Hw(t); \
314 	}
315 
316 #define sqr(r0,r1,a) { \
317 	BN_ULLONG t; \
318 	t=(BN_ULLONG)(a)*(a); \
319 	(r0)=Lw(t); \
320 	(r1)=Hw(t); \
321 	}
322 
323 #elif defined(BN_UMULT_LOHI)
324 #define mul_add(r,a,w,c) {		\
325 	BN_ULONG high,low,ret,tmp=(a);	\
326 	ret =  (r);			\
327 	BN_UMULT_LOHI(low,high,w,tmp);	\
328 	ret += (c);			\
329 	(c) =  (ret<(c))?1:0;		\
330 	(c) += high;			\
331 	ret += low;			\
332 	(c) += (ret<low)?1:0;		\
333 	(r) =  ret;			\
334 	}
335 
336 #define mul(r,a,w,c)	{		\
337 	BN_ULONG high,low,ret,ta=(a);	\
338 	BN_UMULT_LOHI(low,high,w,ta);	\
339 	ret =  low + (c);		\
340 	(c) =  high;			\
341 	(c) += (ret<low)?1:0;		\
342 	(r) =  ret;			\
343 	}
344 
345 #define sqr(r0,r1,a)	{		\
346 	BN_ULONG tmp=(a);		\
347 	BN_UMULT_LOHI(r0,r1,tmp,tmp);	\
348 	}
349 
350 #elif defined(BN_UMULT_HIGH)
351 #define mul_add(r,a,w,c) {		\
352 	BN_ULONG high,low,ret,tmp=(a);	\
353 	ret =  (r);			\
354 	high=  BN_UMULT_HIGH(w,tmp);	\
355 	ret += (c);			\
356 	low =  (w) * tmp;		\
357 	(c) =  (ret<(c))?1:0;		\
358 	(c) += high;			\
359 	ret += low;			\
360 	(c) += (ret<low)?1:0;		\
361 	(r) =  ret;			\
362 	}
363 
364 #define mul(r,a,w,c)	{		\
365 	BN_ULONG high,low,ret,ta=(a);	\
366 	low =  (w) * ta;		\
367 	high=  BN_UMULT_HIGH(w,ta);	\
368 	ret =  low + (c);		\
369 	(c) =  high;			\
370 	(c) += (ret<low)?1:0;		\
371 	(r) =  ret;			\
372 	}
373 
374 #define sqr(r0,r1,a)	{		\
375 	BN_ULONG tmp=(a);		\
376 	(r0) = tmp * tmp;		\
377 	(r1) = BN_UMULT_HIGH(tmp,tmp);	\
378 	}
379 
380 #else
381 /*************************************************************
382  * No long long type
383  */
384 
385 #define LBITS(a)	((a)&BN_MASK2l)
386 #define HBITS(a)	(((a)>>BN_BITS4)&BN_MASK2l)
387 #define	L2HBITS(a)	(((a)<<BN_BITS4)&BN_MASK2)
388 
389 #define mul64(l,h,bl,bh) \
390 	{ \
391 	BN_ULONG m,m1,lt,ht; \
392  \
393 	lt=l; \
394 	ht=h; \
395 	m =(bh)*(lt); \
396 	lt=(bl)*(lt); \
397 	m1=(bl)*(ht); \
398 	ht =(bh)*(ht); \
399 	m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
400 	ht+=HBITS(m); \
401 	m1=L2HBITS(m); \
402 	lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
403 	(l)=lt; \
404 	(h)=ht; \
405 	}
406 
407 #define sqr64(lo,ho,in) \
408 	{ \
409 	BN_ULONG l,h,m; \
410  \
411 	h=(in); \
412 	l=LBITS(h); \
413 	h=HBITS(h); \
414 	m =(l)*(h); \
415 	l*=l; \
416 	h*=h; \
417 	h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
418 	m =(m&BN_MASK2l)<<(BN_BITS4+1); \
419 	l=(l+m)&BN_MASK2; if (l < m) h++; \
420 	(lo)=l; \
421 	(ho)=h; \
422 	}
423 
424 #define mul_add(r,a,bl,bh,c) { \
425 	BN_ULONG l,h; \
426  \
427 	h= (a); \
428 	l=LBITS(h); \
429 	h=HBITS(h); \
430 	mul64(l,h,(bl),(bh)); \
431  \
432 	/* non-multiply part */ \
433 	l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
434 	(c)=(r); \
435 	l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
436 	(c)=h&BN_MASK2; \
437 	(r)=l; \
438 	}
439 
440 #define mul(r,a,bl,bh,c) { \
441 	BN_ULONG l,h; \
442  \
443 	h= (a); \
444 	l=LBITS(h); \
445 	h=HBITS(h); \
446 	mul64(l,h,(bl),(bh)); \
447  \
448 	/* non-multiply part */ \
449 	l+=(c); if ((l&BN_MASK2) < (c)) h++; \
450 	(c)=h&BN_MASK2; \
451 	(r)=l&BN_MASK2; \
452 	}
453 #endif /* !BN_LLONG */
454 
455 	void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
456 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
457 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
458 void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
459 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
460 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
461 int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
462 int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
463     int cl, int dl);
464 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
465     int dna, int dnb, BN_ULONG *t);
466 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
467     int n, int tna, int tnb, BN_ULONG *t);
468 void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
469 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
470 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
471     BN_ULONG *t);
472 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
473     BN_ULONG *t);
474 BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
475     int cl, int dl);
476 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
477     int cl, int dl);
478 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
479 
480 #ifdef  __cplusplus
481 }
482 #endif
483 
484 #endif
485