1 /* Run some tests on various mpn routines.
2
3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6 Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
7
8 This file is part of the GNU MP Library.
9
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 2.1 of the License, or (at your
13 option) any later version.
14
15 The GNU MP Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
19
20 You should have received a copy of the GNU Lesser General Public License
21 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 MA 02110-1301, USA. */
24
25
26 /* Usage: try [options] <function>...
27
28 For example, "./try mpn_add_n" to run tests of that function.
29
30 Combinations of alignments and overlaps are tested, with redzones above
31 or below the destinations, and with the sources write-protected.
32
33 The number of tests performed becomes ridiculously large with all the
34 combinations, and for that reason this can't be a part of a "make check",
35 it's meant only for development. The code isn't very pretty either.
36
37 During development it can help to disable the redzones, since seeing the
38 rest of the destination written can show where the wrong part is, or if
39 the dst pointers are off by 1 or whatever. The magic DEADVAL initial
40 fill (see below) will show locations never written.
41
42 The -s option can be used to test only certain size operands, which is
43 useful if some new code doesn't yet support say sizes less than the
44 unrolling, or whatever.
45
46 When a problem occurs it'll of course be necessary to run the program
47 under gdb to find out quite where, how and why it's going wrong. Disable
48 the spinner with the -W option when doing this, or single stepping won't
49 work. Using the "-1" option to run with simple data can be useful.
50
51 New functions to test can be added in try_array[]. If a new TYPE is
52 required then add it to the existing constants, set up its parameters in
53 param_init(), and add it to the call() function. Extra parameter fields
54 can be added if necessary, or further interpretations given to existing
55 fields.
56
57
58 Enhancements:
59
60 umul_ppmm support is not very good, lots of source data is generated
61 whereas only two limbs are needed.
62
63 Make a little scheme for interpreting the "SIZE" selections uniformly.
64
65 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
66 source limbs. Possibly increase the default repetitions in that case.
67
68 Automatically detect gdb and disable the spinner (use -W for now).
69
70 Make a way to re-run a failing case in the debugger. Have an option to
71 snapshot each test case before it's run so the data is available if a
72 segv occurs. (This should be more reliable than the current print_all()
73 in the signal handler.)
74
75 When alignment means a dst isn't hard against the redzone, check the
76 space in between remains unchanged.
77
78 When a source overlaps a destination, don't run both s[i].high 0 and 1,
79 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
80
81 When partial overlaps aren't done, don't loop over source alignments
82 during overlaps.
83
84 Try to make the looping code a bit less horrible. Right now it's pretty
85 hard to see what iterations are actually done.
86
87 Perhaps specific setups and loops for each style of function under test
88 would be clearer than a parameterized general loop. There's lots of
89 stuff common to all functions, but the exceptions get messy.
90
91 When there's no overlap, run with both src>dst and src<dst. A subtle
92 calling-conventions violation occured in a P6 copy which depended on the
93 relative location of src and dst.
94
95 multiplier_N is more or less a third source region for the addmul_N
96 routines, and could be done with the redzoned region scheme.
97
98 */
99
100 /* always do assertion checking */
101 #define WANT_ASSERT 1
102
103 #include "config.h"
104
105 #include <errno.h>
106 #include <limits.h>
107 #include <signal.h>
108 #include <stdio.h>
109 #include <stdlib.h>
110 #include <string.h>
111 #include <time.h>
112
113 #if defined( _MSC_VER )
114 #define WINDOWS_LEAN_AND_MEAN
115 #include <windows.h>
116 #endif
117
118 #if HAVE_UNISTD_H
119 #include <unistd.h>
120 #endif
121
122 #if HAVE_SYS_MMAN_H
123 #include <sys/mman.h>
124 #endif
125
126 #include "mpir.h"
127 #include "gmp-impl.h"
128 #include "longlong.h"
129 #include "tests.h"
130
131
132 #if !HAVE_DECL_OPTARG
133 extern char *optarg;
134 extern int optind, opterr;
135 #endif
136
137 #if ! HAVE_DECL_SYS_NERR
138 extern int sys_nerr;
139 #endif
140
141 #if ! HAVE_DECL_SYS_ERRLIST && !defined( _MSC_VER )
142 extern char *sys_errlist[];
143 #endif
144
145 #if ! HAVE_STRERROR
146 char *
strerror(int n)147 strerror (int n)
148 {
149 if (n < 0 || n >= sys_nerr)
150 return "errno out of range";
151 else
152 return sys_errlist[n];
153 }
154 #endif
155
156 /* Rumour has it some systems lack a define of PROT_NONE. */
157 #ifndef PROT_NONE
158 #define PROT_NONE 0
159 #endif
160
161 /* Dummy defines for when mprotect doesn't exist. */
162 #ifndef PROT_READ
163 #define PROT_READ 0
164 #endif
165 #ifndef PROT_WRITE
166 #define PROT_WRITE 0
167 #endif
168
169 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
170 _SC_PAGE_SIZE instead. */
171 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
172 #define _SC_PAGESIZE _SC_PAGE_SIZE
173 #endif
174
175
176 #ifdef EXTRA_PROTOS
177 EXTRA_PROTOS
178 #endif
179 #ifdef EXTRA_PROTOS2
180 EXTRA_PROTOS2
181 #endif
182
183
184 #define DEFAULT_REPETITIONS 10
185
186 int option_repetitions = DEFAULT_REPETITIONS;
187 int option_spinner = 1;
188 int option_redzones = 1;
189 int option_firstsize = 0;
190 int option_lastsize = 500;
191 int option_firstsize2 = 0;
192
193 #define ALIGNMENTS 4
194 #define OVERLAPS 4
195 #define CARRY_RANDOMS 5
196 #define MULTIPLIER_RANDOMS 5
197 #define DIVISOR_RANDOMS 5
198 #define FRACTION_COUNT 4
199
200 int option_print = 0;
201
202 #define DATA_TRAND 0
203 #define DATA_ZEROS 1
204 #define DATA_SEQ 2
205 #define DATA_FFS 3
206 #define DATA_2FD 4
207 int option_data = DATA_TRAND;
208
209
210 mp_size_t pagesize;
211 #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
212
213 /* must be a multiple of the page size */
214 #define REDZONE_BYTES (pagesize * 16)
215 #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
216
217
218 #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
219
220 #if BITS_PER_MP_LIMB == 32
221 #define DEADVAL CNST_LIMB(0xDEADBEEF)
222 #else
223 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
224 #endif
225
226
227 struct region_t {
228 mp_ptr ptr;
229 mp_size_t size;
230 };
231
232
233 #define TRAP_NOWHERE 0
234 #define TRAP_REF 1
235 #define TRAP_FUN 2
236 #define TRAP_SETUPS 3
237 int trap_location = TRAP_NOWHERE;
238
239
240 #define NUM_SOURCES 3
241 #define NUM_DESTS 2
242
243 struct source_t {
244 struct region_t region;
245 int high;
246 mp_size_t align;
247 mp_ptr p;
248 };
249
250 struct source_t s[NUM_SOURCES];
251
252 struct dest_t {
253 int high;
254 mp_size_t align;
255 mp_size_t size;
256 };
257
258 struct dest_t d[NUM_DESTS];
259
260 struct source_each_t {
261 mp_ptr p;
262 };
263
264 struct dest_each_t {
265 struct region_t region;
266 mp_ptr p;
267 };
268
269 mp_size_t size;
270 mp_size_t size2;
271 unsigned long shift;
272 mp_limb_t carry;
273 mp_limb_t divisor;
274 mp_limb_t altdiv;
275 mp_limb_t multiplier;
276 mp_limb_t multiplier_N[8];
277
278 struct each_t {
279 const char *name;
280 struct dest_each_t d[NUM_DESTS];
281 struct source_each_t s[NUM_SOURCES];
282 mp_limb_t retval;
283 };
284
285 struct each_t ref = { "Ref" };
286 struct each_t fun = { "Fun" };
287
288 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
289
290 void validate_fail _PROTO ((void));
291
292
293 #if HAVE_TRY_NEW_C
294 #include "try-new.c"
295 #endif
296
297
298 typedef mp_limb_t (*tryfun_t) _PROTO ((ANYARGS));
299
300 struct try_t {
301 char retval;
302
303 char src[NUM_SOURCES];
304 char dst[NUM_DESTS];
305
306 #define SIZE_YES 1
307 #define SIZE_ALLOW_ZERO 2
308 #define SIZE_1 3 /* 1 limb */
309 #define SIZE_2 4 /* 2 limbs */
310 #define SIZE_3 5 /* 3 limbs */
311 #define SIZE_4 20 /* 4 limbs */
312 #define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
313 #define SIZE_SIZE2 7
314 #define SIZE_PLUS_1 8
315 #define SIZE_PLUS_2 9
316 #define SIZE_SUM 10
317 #define SIZE_DIFF 11
318 #define SIZE_DIFF_PLUS_1 12
319 #define SIZE_DIFF_PLUS_3 13
320 #define SIZE_RETVAL 14
321 #define SIZE_CEIL_HALF 15
322 #define SIZE_GET_STR 16
323 #define SIZE_PLUS_MSIZE_SUB_1 17 /* size+msize-1 */
324 #define SIZE_DOUBLE 18
325 #define SIZE_DOUBLE_MINUS_1 19
326 char size;
327 char size2;
328 char dst_size[NUM_DESTS];
329
330 /* multiplier_N size in limbs */
331 mp_size_t msize;
332
333 char dst_bytes[NUM_DESTS];
334
335 char dst0_from_src1;
336
337 #define CARRY_BIT 1 /* single bit 0 or 1 */
338 #define CARRY_3 2 /* 0, 1, 2 */
339 #define CARRY_4 3 /* 0 to 3 */
340 #define CARRY_LIMB 4 /* any limb value */
341 #define CARRY_DIVISOR 5 /* carry<divisor */
342 char carry;
343
344 /* a fudge to tell the output when to print negatives */
345 char carry_sign;
346
347 char multiplier;
348 char shift;
349
350 #define DIVISOR_LIMB 1
351 #define DIVISOR_NORM 2
352 #define DIVISOR_ODD 3
353 #define DIVISOR_DIVBM1 4
354 char divisor;
355
356 #define DATA_NON_ZERO 1
357 #define DATA_GCD 2
358 #define DATA_SRC1_ODD 3
359 #define DATA_SRC1_HIGHBIT 4
360 #define DATA_MULTIPLE_DIVISOR 5
361 #define DATA_UDIV_QRNND 6
362 #define DATA_SRC0_ODD 7
363 char data;
364
365 /* Default is allow full overlap. */
366 #define OVERLAP_NONE 1
367 #define OVERLAP_LOW_TO_HIGH 2
368 #define OVERLAP_HIGH_TO_LOW 3
369 #define OVERLAP_NOT_SRCS 4
370 #define OVERLAP_NOT_SRC2 8
371 char overlap;
372
373 tryfun_t reference;
374 const char *reference_name;
375
376 void (*validate) _PROTO ((void));
377 const char *validate_name;
378 };
379
380 struct try_t *tr;
381
382 void
validate_mod_34lsub1(void)383 validate_mod_34lsub1 (void)
384 {
385 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
386
387 mp_srcptr ptr = s[0].p;
388 int error = 0;
389 mp_limb_t got, got_mod, want, want_mod;
390
391 ASSERT (size >= 1);
392
393 got = fun.retval;
394 got_mod = got % CNST_34LSUB1;
395
396 want = refmpn_mod_34lsub1 (ptr, size);
397 want_mod = want % CNST_34LSUB1;
398
399 if (got_mod != want_mod)
400 {
401 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got);
402 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want);
403 error = 1;
404 }
405
406 if (error)
407 validate_fail ();
408 }
409
410 void
validate_divexact_1(void)411 validate_divexact_1 (void)
412 {
413 mp_srcptr src = s[0].p;
414 mp_srcptr dst = fun.d[0].p;
415 int error = 0;
416
417 ASSERT (size >= 1);
418
419 {
420 mp_ptr tp = refmpn_malloc_limbs (size);
421 mp_limb_t rem;
422
423 rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
424 if (rem != 0)
425 {
426 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
427 error = 1;
428 }
429 if (! refmpn_equal_anynail (tp, dst, size))
430 {
431 printf ("Quotient a/d wrong\n");
432 mpn_trace ("fun ", dst, size);
433 mpn_trace ("want", tp, size);
434 error = 1;
435 }
436 free (tp);
437 }
438
439 if (error)
440 validate_fail ();
441 }
442
443
444 void
validate_modexact_1c_odd(void)445 validate_modexact_1c_odd (void)
446 {
447 mp_srcptr ptr = s[0].p;
448 mp_limb_t r = fun.retval;
449 int error = 0;
450
451 ASSERT (size >= 1);
452 ASSERT (divisor & 1);
453
454 if ((r & GMP_NAIL_MASK) != 0)
455 printf ("r has non-zero nail\n");
456
457 if (carry < divisor)
458 {
459 if (! (r < divisor))
460 {
461 printf ("Don't have r < divisor\n");
462 error = 1;
463 }
464 }
465 else /* carry >= divisor */
466 {
467 if (! (r <= divisor))
468 {
469 printf ("Don't have r <= divisor\n");
470 error = 1;
471 }
472 }
473
474 {
475 mp_limb_t c = carry % divisor;
476 mp_ptr tp = refmpn_malloc_limbs (size+1);
477 mp_size_t k;
478
479 for (k = size-1; k <= size; k++)
480 {
481 /* set {tp,size+1} to r*b^k + a - c */
482 refmpn_copyi (tp, ptr, size);
483 tp[size] = 0;
484 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
485 if (refmpn_sub_1 (tp, tp, size+1, c))
486 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
487
488 if (refmpn_mod_1 (tp, size+1, divisor) == 0)
489 goto good_remainder;
490 }
491 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
492 error = 1;
493
494 good_remainder:
495 free (tp);
496 }
497
498 if (error)
499 validate_fail ();
500 }
501
502 void
validate_modexact_1_odd(void)503 validate_modexact_1_odd (void)
504 {
505 carry = 0;
506 validate_modexact_1c_odd ();
507 }
508
509
510 void
validate_sqrtrem(void)511 validate_sqrtrem (void)
512 {
513 mp_srcptr orig_ptr = s[0].p;
514 mp_size_t orig_size = size;
515 mp_size_t root_size = (size+1)/2;
516 mp_srcptr root_ptr = fun.d[0].p;
517 mp_size_t rem_size = fun.retval;
518 mp_srcptr rem_ptr = fun.d[1].p;
519 mp_size_t prod_size = 2*root_size;
520 mp_ptr p;
521 int error = 0;
522
523 if (rem_size < 0 || rem_size > size)
524 {
525 printf ("Bad remainder size retval %ld\n", (long) rem_size);
526 validate_fail ();
527 }
528
529 p = refmpn_malloc_limbs (prod_size);
530
531 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
532 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
533 {
534 printf ("Remainder bigger than 2*root\n");
535 error = 1;
536 }
537
538 refmpn_sqr (p, root_ptr, root_size);
539 if (rem_size != 0)
540 refmpn_add (p, p, prod_size, rem_ptr, rem_size);
541 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
542 {
543 printf ("root^2+rem != original\n");
544 mpn_trace ("prod", p, prod_size);
545 error = 1;
546 }
547 free (p);
548
549 if (error)
550 validate_fail ();
551 }
552
553
554 /* These types are indexes into the param[] array and are arbitrary so long
555 as they're all distinct and within the size of param[]. Renumber
556 whenever necessary or desired. */
557
558 #define TYPE_ADD 1
559 #define TYPE_ADD_N 2
560 #define TYPE_ADD_NC 3
561 #define TYPE_SUB 4
562 #define TYPE_SUB_N 5
563 #define TYPE_SUB_NC 6
564
565 #define TYPE_MUL_1 7
566 #define TYPE_MUL_1C 8
567
568 #define TYPE_MUL_2 9
569
570 #define TYPE_ADDMUL_1 10
571 #define TYPE_ADDMUL_1C 11
572 #define TYPE_SUBMUL_1 12
573 #define TYPE_SUBMUL_1C 13
574
575 #define TYPE_ADDMUL_2 14
576 #define TYPE_ADDMUL_3 15
577 #define TYPE_ADDMUL_4 16
578 #define TYPE_ADDMUL_5 17
579 #define TYPE_ADDMUL_6 18
580 #define TYPE_ADDMUL_7 19
581 #define TYPE_ADDMUL_8 20
582
583 #define TYPE_SUMDIFF_N 21
584 #define TYPE_NSUMDIFF_N 141
585 #define TYPE_SUMDIFF_NC 22
586
587 #define TYPE_RSHIFT 23
588 #define TYPE_LSHIFT 24
589
590 #define TYPE_COPY 25
591 #define TYPE_COPYI 26
592 #define TYPE_COPYD 27
593 #define TYPE_COM_N 28
594
595 #define TYPE_ADDLSH1_N 30
596 #define TYPE_SUBLSH1_N 31
597 #define TYPE_RSH1ADD_N 32
598 #define TYPE_RSH1SUB_N 33
599
600 #define TYPE_MOD_1 35
601 #define TYPE_MOD_1C 36
602 #define TYPE_DIVMOD_1 37
603 #define TYPE_DIVMOD_1C 38
604 #define TYPE_DIVREM_1 39
605 #define TYPE_DIVREM_1C 40
606 #define TYPE_PREINV_DIVREM_1 41
607 #define TYPE_PREINV_MOD_1 42
608 #define TYPE_MOD_34LSUB1 43
609 #define TYPE_UDIV_QRNND 44
610 #define TYPE_UDIV_QRNND_R 45
611
612 #define TYPE_DIVEXACT_1 50
613 #define TYPE_DIVEXACT_BY3 51
614 #define TYPE_DIVEXACT_BY3C 52
615 #define TYPE_MODEXACT_1_ODD 53
616 #define TYPE_MODEXACT_1C_ODD 54
617
618 #define TYPE_GCD 60
619 #define TYPE_GCD_1 61
620 #define TYPE_GCD_FINDA 62
621 #define TYPE_MPZ_JACOBI 63
622 #define TYPE_MPZ_KRONECKER 64
623 #define TYPE_MPZ_KRONECKER_UI 65
624 #define TYPE_MPZ_KRONECKER_SI 66
625 #define TYPE_MPZ_UI_KRONECKER 67
626 #define TYPE_MPZ_SI_KRONECKER 68
627
628 #define TYPE_AND_N 70
629 #define TYPE_NAND_N 71
630 #define TYPE_ANDN_N 72
631 #define TYPE_IOR_N 73
632 #define TYPE_IORN_N 74
633 #define TYPE_NIOR_N 75
634 #define TYPE_XOR_N 76
635 #define TYPE_XNOR_N 77
636
637 #define TYPE_MUL_BASECASE 80
638 #define TYPE_MUL_N 81
639 #define TYPE_MULMID_BASECASE 82
640 #define TYPE_MULMID 83
641 #define TYPE_MULMID_N 84
642 #define TYPE_SQR 85
643 #define TYPE_UMUL_PPMM 86
644 #define TYPE_UMUL_PPMM_R 87
645
646 #define TYPE_SB_DIVREM_MN 90
647 #define TYPE_TDIV_QR 91
648 #define TYPE_TDIV_Q 92
649
650 #define TYPE_SQRTREM 100
651 #define TYPE_ZERO 101
652 #define TYPE_GET_STR 102
653 #define TYPE_POPCOUNT 103
654 #define TYPE_HAMDIST 104
655
656 #define TYPE_DIVEXACT_BYFF 105
657 #define TYPE_LSHIFT1 106
658 #define TYPE_RSHIFT1 107
659
660 #define TYPE_ADDADD_N 108
661 #define TYPE_ADDSUB_N 109
662 #define TYPE_SUBADD_N 110
663
664 #define TYPE_REDC_BASECASE 111
665 #define TYPE_DIVREM_EUCLIDEAN_QR_1 112
666 #define TYPE_DIVREM_EUCLIDEAN_R_1 113
667 #define TYPE_DIVEXACT_BYFOBM1 114
668
669 #define TYPE_LSHIFT2 115
670 #define TYPE_RSHIFT2 116
671 #define TYPE_STORE 117
672 #define TYPE_LSHIFTC 118
673 //#define TYPE_DIVREM_EUCLIDEAN_QR_2 118
674 #define TYPE_ADDLSH_N 120
675 #define TYPE_SUBLSH_N 121
676
677 #define TYPE_INCLSH_N 122
678 #define TYPE_DECLSH_N 123
679 #define TYPE_ADDERR1_N 124
680 #define TYPE_SUBERR1_N 125
681 #define TYPE_ADDERR2_N 126
682 #define TYPE_SUBERR2_N 127
683 #define TYPE_ADDLSH_NC 128
684 #define TYPE_SUBLSH_NC 129
685
686 #define TYPE_DIVREM_HENSEL_QR_1 130
687 #define TYPE_DIVREM_HENSEL_QR_1_1 131
688 #define TYPE_DIVREM_HENSEL_QR_1_2 132
689 #define TYPE_DIVREM_HENSEL_R_1 133
690 #define TYPE_RSH_DIVREM_HENSEL_QR_1 134
691 #define TYPE_RSH_DIVREM_HENSEL_QR_1_1 135
692 #define TYPE_RSH_DIVREM_HENSEL_QR_1_2 136
693 #define TYPE_DIVREM_HENSEL_RSH_QR_1 137
694 #define TYPE_NOT 138
695
696 #define TYPE_DOUBLE 139
697 #define TYPE_HALF 140
698
699 #define TYPE_EXTRA 150
700
701 struct try_t param[150];
702
703
704 void
param_init(void)705 param_init (void)
706 {
707 struct try_t *p;
708
709 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
710
711 #if HAVE_STRINGIZE
712 #define REFERENCE(fun) \
713 p->reference = (tryfun_t) fun; \
714 p->reference_name = #fun
715 #define VALIDATE(fun) \
716 p->validate = fun; \
717 p->validate_name = #fun
718 #else
719 #define REFERENCE(fun) \
720 p->reference = (tryfun_t) fun; \
721 p->reference_name = "fun"
722 #define VALIDATE(fun) \
723 p->validate = fun; \
724 p->validate_name = "fun"
725 #endif
726
727
728 p = ¶m[TYPE_ADD_N];
729 p->retval = 1;
730 p->dst[0] = 1;
731 p->src[0] = 1;
732 p->src[1] = 1;
733 REFERENCE (refmpn_add_n);
734
735 p = ¶m[TYPE_ADD_NC];
736 COPY (TYPE_ADD_N);
737 p->carry = CARRY_BIT;
738 REFERENCE (refmpn_add_nc);
739
740 p = ¶m[TYPE_SUB_N];
741 COPY (TYPE_ADD_N);
742 REFERENCE (refmpn_sub_n);
743
744 p = ¶m[TYPE_SUB_NC];
745 COPY (TYPE_ADD_NC);
746 REFERENCE (refmpn_sub_nc);
747
748 p = ¶m[TYPE_ADD];
749 COPY (TYPE_ADD_N);
750 p->size = SIZE_ALLOW_ZERO;
751 p->size2 = 1;
752 REFERENCE (refmpn_add);
753
754 p = ¶m[TYPE_SUB];
755 COPY (TYPE_ADD);
756 REFERENCE (refmpn_sub);
757
758
759 p = ¶m[TYPE_MUL_1];
760 p->retval = 1;
761 p->dst[0] = 1;
762 p->src[0] = 1;
763 p->multiplier = 1;
764 p->overlap = OVERLAP_LOW_TO_HIGH;
765 REFERENCE (refmpn_mul_1);
766
767 p = ¶m[TYPE_MUL_1C];
768 COPY (TYPE_MUL_1);
769 p->carry = CARRY_LIMB;
770 REFERENCE (refmpn_mul_1c);
771
772
773 p = ¶m[TYPE_MUL_2];
774 p->retval = 1;
775 p->dst[0] = 1;
776 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
777 p->src[0] = 1;
778 p->src[1] = 1;
779 p->msize = 2;
780 p->overlap = OVERLAP_NOT_SRC2;
781 REFERENCE (refmpn_mul_2);
782
783
784 p = ¶m[TYPE_ADDMUL_1];
785 p->retval = 1;
786 p->dst[0] = 1;
787 p->src[0] = 1;
788 p->multiplier = 1;
789 p->dst0_from_src1 = 1;
790 REFERENCE (refmpn_addmul_1);
791
792 p = ¶m[TYPE_ADDMUL_1C];
793 COPY (TYPE_ADDMUL_1);
794 p->carry = CARRY_LIMB;
795 REFERENCE (refmpn_addmul_1c);
796
797 p = ¶m[TYPE_SUBMUL_1];
798 COPY (TYPE_ADDMUL_1);
799 REFERENCE (refmpn_submul_1);
800
801 p = ¶m[TYPE_SUBMUL_1C];
802 COPY (TYPE_ADDMUL_1C);
803 REFERENCE (refmpn_submul_1c);
804
805
806 p = ¶m[TYPE_ADDMUL_2];
807 p->retval = 1;
808 p->dst[0] = 1;
809 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
810 p->src[0] = 1;
811 p->src[1] = 1;
812 p->msize = 2;
813 p->dst0_from_src1 = 1;
814 p->overlap = OVERLAP_NOT_SRC2;
815 REFERENCE (refmpn_addmul_2);
816
817 p = ¶m[TYPE_ADDMUL_3];
818 COPY (TYPE_ADDMUL_2);
819 p->msize = 3;
820 REFERENCE (refmpn_addmul_3);
821
822 p = ¶m[TYPE_ADDMUL_4];
823 COPY (TYPE_ADDMUL_2);
824 p->msize = 4;
825 REFERENCE (refmpn_addmul_4);
826
827 p = ¶m[TYPE_ADDMUL_5];
828 COPY (TYPE_ADDMUL_2);
829 p->msize = 5;
830 REFERENCE (refmpn_addmul_5);
831
832 p = ¶m[TYPE_ADDMUL_6];
833 COPY (TYPE_ADDMUL_2);
834 p->msize = 6;
835 REFERENCE (refmpn_addmul_6);
836
837 p = ¶m[TYPE_ADDMUL_7];
838 COPY (TYPE_ADDMUL_2);
839 p->msize = 7;
840 REFERENCE (refmpn_addmul_7);
841
842 p = ¶m[TYPE_ADDMUL_8];
843 COPY (TYPE_ADDMUL_2);
844 p->msize = 8;
845 REFERENCE (refmpn_addmul_8);
846
847
848 p = ¶m[TYPE_AND_N];
849 p->dst[0] = 1;
850 p->src[0] = 1;
851 p->src[1] = 1;
852 REFERENCE (refmpn_and_n);
853
854 p = ¶m[TYPE_ANDN_N];
855 COPY (TYPE_AND_N);
856 REFERENCE (refmpn_andn_n);
857
858 p = ¶m[TYPE_NAND_N];
859 COPY (TYPE_AND_N);
860 REFERENCE (refmpn_nand_n);
861
862 p = ¶m[TYPE_IOR_N];
863 COPY (TYPE_AND_N);
864 REFERENCE (refmpn_ior_n);
865
866 p = ¶m[TYPE_IORN_N];
867 COPY (TYPE_AND_N);
868 REFERENCE (refmpn_iorn_n);
869
870 p = ¶m[TYPE_NIOR_N];
871 COPY (TYPE_AND_N);
872 REFERENCE (refmpn_nior_n);
873
874 p = ¶m[TYPE_XOR_N];
875 COPY (TYPE_AND_N);
876 REFERENCE (refmpn_xor_n);
877
878 p = ¶m[TYPE_XNOR_N];
879 COPY (TYPE_AND_N);
880 REFERENCE (refmpn_xnor_n);
881
882 p = ¶m[TYPE_SUMDIFF_N];
883 p->retval = 1;
884 p->dst[0] = 1;
885 p->dst[1] = 1;
886 p->src[0] = 1;
887 p->src[1] = 1;
888 REFERENCE (refmpn_sumdiff_n);
889
890 p = ¶m[TYPE_NSUMDIFF_N];
891 COPY (TYPE_SUMDIFF_N);
892 REFERENCE (refmpn_nsumdiff_n);
893
894 p = ¶m[TYPE_ADDERR1_N];
895 p->retval=1;
896 p->dst[0]=1;
897 p->dst[1]=1;
898 p->dst_size[1]=SIZE_2;
899 p->src[0]=1;
900 p->src[1]=1;
901 p->src[2]=1;
902 p->carry=CARRY_BIT;
903 p->overlap=OVERLAP_NONE;
904 REFERENCE (refmpn_add_err1_n);
905
906 p = ¶m[TYPE_SUBERR1_N];
907 p->retval=1;
908 p->dst[0]=1;
909 p->dst[1]=1;
910 p->dst_size[1]=SIZE_2;
911 p->src[0]=1;
912 p->src[1]=1;
913 p->src[2]=1;
914 p->carry=CARRY_BIT;
915 p->overlap=OVERLAP_NONE;
916 REFERENCE (refmpn_sub_err1_n);
917
918 p = ¶m[TYPE_ADDERR2_N];
919 p->retval=1;
920 p->dst[0]=1;
921 p->dst[1]=1;
922 p->dst_size[1]=SIZE_4;
923 p->src[0]=1;
924 p->src[1]=1;
925 p->src[2]=1;
926 p->src[3]=1;//FIXME
927 p->carry=CARRY_BIT;
928 p->overlap=OVERLAP_NONE;
929 REFERENCE (refmpn_add_err2_n);
930
931 p = ¶m[TYPE_SUBERR2_N];
932 p->retval=1;
933 p->dst[0]=1;
934 p->dst[1]=1;
935 p->dst_size[1]=SIZE_4;
936 p->src[0]=1;
937 p->src[1]=1;
938 p->src[2]=1;
939 p->src[3]=1;//FIXME
940 p->carry=CARRY_BIT;
941 p->overlap=OVERLAP_NONE;
942 REFERENCE (refmpn_sub_err2_n);
943
944 p = ¶m[TYPE_SUMDIFF_NC];
945 COPY (TYPE_SUMDIFF_N);
946 p->carry = CARRY_4;
947 REFERENCE (refmpn_sumdiff_nc);
948
949 p = ¶m[TYPE_ADDADD_N];
950 p->retval = 1;
951 p->dst[0] = 1;
952 p->src[0] = 1;
953 p->src[1] = 1;
954 p->src[2] = 1;
955 REFERENCE (refmpn_addadd_n);
956
957 p = ¶m[TYPE_ADDSUB_N];
958 p->retval = 1;
959 p->dst[0] = 1;
960 p->src[0] = 1;
961 p->src[1] = 1;
962 p->src[2] = 1;
963 REFERENCE (refmpn_addsub_n);
964
965 p = ¶m[TYPE_SUBADD_N];
966 p->retval = 1;
967 p->dst[0] = 1;
968 p->src[0] = 1;
969 p->src[1] = 1;
970 p->src[2] = 1;
971 REFERENCE (refmpn_subadd_n);
972
973 p = ¶m[TYPE_COPY];
974 p->dst[0] = 1;
975 p->src[0] = 1;
976 p->overlap = OVERLAP_NONE;
977 p->size = SIZE_ALLOW_ZERO;
978 REFERENCE (refmpn_copy);
979
980 p = ¶m[TYPE_COPYI];
981 p->dst[0] = 1;
982 p->src[0] = 1;
983 p->overlap = OVERLAP_LOW_TO_HIGH;
984 p->size = SIZE_ALLOW_ZERO;
985 REFERENCE (refmpn_copyi);
986
987 p = ¶m[TYPE_COPYD];
988 p->dst[0] = 1;
989 p->src[0] = 1;
990 p->overlap = OVERLAP_HIGH_TO_LOW;
991 p->size = SIZE_ALLOW_ZERO;
992 REFERENCE (refmpn_copyd);
993
994 p = ¶m[TYPE_COM_N];
995 p->dst[0] = 1;
996 p->src[0] = 1;
997 REFERENCE (refmpn_com_n);
998
999 p = ¶m[TYPE_ADDLSH_N];
1000 p->dst[0]=1;
1001 p->src[0]=1;
1002 p->src[1]=1;
1003 p->shift=1;
1004 REFERENCE (refmpn_addlsh_n);
1005
1006 p = ¶m[TYPE_SUBLSH_N];
1007 p->dst[0]=1;
1008 p->src[0]=1;
1009 p->src[1]=1;
1010 p->shift=1;
1011 REFERENCE (refmpn_sublsh_n);
1012
1013 p = ¶m[TYPE_ADDLSH_NC];
1014 p->dst[0]=1;
1015 p->src[0]=1;
1016 p->src[1]=1;
1017 p->shift=1;
1018 p->carry=CARRY_LIMB;
1019 REFERENCE (refmpn_addlsh_nc);
1020
1021 p = ¶m[TYPE_SUBLSH_NC];
1022 p->dst[0]=1;
1023 p->src[0]=1;
1024 p->src[1]=1;
1025 p->shift=1;
1026 p->carry=CARRY_LIMB;
1027 REFERENCE (refmpn_sublsh_nc);
1028
1029 p = ¶m[TYPE_INCLSH_N];
1030 p->dst[0]=1;
1031 p->src[0]=1;
1032 p->shift=1;
1033 REFERENCE (refmpn_inclsh_n);
1034
1035 p = ¶m[TYPE_DECLSH_N];
1036 p->dst[0]=1;
1037 p->src[0]=1;
1038 p->shift=1;
1039 REFERENCE (refmpn_declsh_n);
1040
1041 p = ¶m[TYPE_ADDLSH1_N];
1042 COPY (TYPE_ADD_N);
1043 REFERENCE (refmpn_addlsh1_n);
1044
1045 p = ¶m[TYPE_SUBLSH1_N];
1046 COPY (TYPE_ADD_N);
1047 REFERENCE (refmpn_sublsh1_n);
1048
1049 p = ¶m[TYPE_RSH1ADD_N];
1050 COPY (TYPE_ADD_N);
1051 REFERENCE (refmpn_rsh1add_n);
1052
1053 p = ¶m[TYPE_RSH1SUB_N];
1054 COPY (TYPE_ADD_N);
1055 REFERENCE (refmpn_rsh1sub_n);
1056
1057
1058 p = ¶m[TYPE_MOD_1];
1059 p->retval = 1;
1060 p->src[0] = 1;
1061 p->size = SIZE_ALLOW_ZERO;
1062 p->divisor = DIVISOR_LIMB;
1063 REFERENCE (refmpn_mod_1);
1064
1065 p = ¶m[TYPE_MOD_1C];
1066 COPY (TYPE_MOD_1);
1067 p->carry = CARRY_DIVISOR;
1068 REFERENCE (refmpn_mod_1c);
1069
1070 p = ¶m[TYPE_DIVMOD_1];
1071 COPY (TYPE_MOD_1);
1072 p->dst[0] = 1;
1073 REFERENCE (refmpn_divmod_1);
1074
1075 p = ¶m[TYPE_DIVREM_EUCLIDEAN_QR_1];
1076 p->retval = 1;
1077 p->src[0] = 1;
1078 p->divisor = DIVISOR_LIMB;
1079 p->dst[0] = 1;
1080 REFERENCE (refmpn_divrem_1);
1081
1082 p = ¶m[TYPE_DIVREM_EUCLIDEAN_R_1];
1083 p->retval = 1;
1084 p->src[0] = 1;
1085 p->divisor = DIVISOR_LIMB;
1086 REFERENCE (refmpn_divrem_euclidean_r_1);
1087
1088 p = ¶m[TYPE_DIVREM_HENSEL_QR_1];
1089 p->retval = 1;
1090 p->src[0] = 1;
1091 p->divisor = DIVISOR_ODD;
1092 p->dst[0] = 1;
1093 REFERENCE (refmpn_divrem_hensel_qr_1);
1094
1095 p = ¶m[TYPE_DIVREM_HENSEL_QR_1_1];
1096 p->retval = 1;
1097 p->src[0] = 1;
1098 p->divisor = DIVISOR_ODD;
1099 p->dst[0] = 1;
1100 REFERENCE (refmpn_divrem_hensel_qr_1);
1101
1102 p = ¶m[TYPE_DIVREM_HENSEL_QR_1_2];
1103 p->retval = 1;
1104 p->src[0] = 1;
1105 p->divisor = DIVISOR_ODD;
1106 p->dst[0] = 1;
1107 REFERENCE (refmpn_divrem_hensel_qr_1);
1108
1109 p = ¶m[TYPE_DIVREM_HENSEL_R_1];
1110 p->retval = 1;
1111 p->src[0] = 1;
1112 p->divisor = DIVISOR_ODD;
1113 REFERENCE (refmpn_divrem_hensel_r_1);
1114
1115 p = ¶m[TYPE_DIVREM_HENSEL_RSH_QR_1];
1116 p->retval = 1;
1117 p->src[0] = 1;
1118 p->divisor = DIVISOR_ODD;
1119 p->dst[0] = 1;
1120 p->shift=1;
1121 REFERENCE (refmpn_divrem_hensel_rsh_qr_1);
1122
1123 p = ¶m[TYPE_RSH_DIVREM_HENSEL_QR_1];
1124 p->retval = 1;
1125 p->src[0] = 1;
1126 p->divisor = DIVISOR_ODD;
1127 p->dst[0] = 1;
1128 p->shift=1;
1129 p->carry=CARRY_LIMB;
1130 REFERENCE (refmpn_rsh_divrem_hensel_qr_1);
1131
1132 p = ¶m[TYPE_RSH_DIVREM_HENSEL_QR_1_1];
1133 p->retval = 1;
1134 p->src[0] = 1;
1135 p->divisor = DIVISOR_ODD;
1136 p->dst[0] = 1;
1137 p->shift=1;
1138 p->carry=CARRY_LIMB;
1139 REFERENCE (refmpn_rsh_divrem_hensel_qr_1);
1140
1141 p = ¶m[TYPE_RSH_DIVREM_HENSEL_QR_1_2];
1142 p->retval = 1;
1143 p->src[0] = 1;
1144 p->divisor = DIVISOR_ODD;
1145 p->dst[0] = 1;
1146 p->shift=1;
1147 p->carry=CARRY_LIMB;
1148 REFERENCE (refmpn_rsh_divrem_hensel_qr_1);
1149
1150 p = ¶m[TYPE_DIVEXACT_BYFOBM1];
1151 p->retval = 1;
1152 p->src[0] = 1;
1153 p->divisor = DIVISOR_DIVBM1;
1154 p->dst[0] = 1;
1155 REFERENCE (refmpn_divexact_byfobm1);
1156
1157 p = ¶m[TYPE_DIVMOD_1C];
1158 COPY (TYPE_DIVMOD_1);
1159 p->carry = CARRY_DIVISOR;
1160 REFERENCE (refmpn_divmod_1c);
1161
1162 p = ¶m[TYPE_DIVREM_1];
1163 COPY (TYPE_DIVMOD_1);
1164 p->size2 = SIZE_FRACTION;
1165 p->dst_size[0] = SIZE_SUM;
1166 REFERENCE (refmpn_divrem_1);
1167
1168 p = ¶m[TYPE_DIVREM_1C];
1169 COPY (TYPE_DIVREM_1);
1170 p->carry = CARRY_DIVISOR;
1171 REFERENCE (refmpn_divrem_1c);
1172
1173 p = ¶m[TYPE_PREINV_DIVREM_1];
1174 COPY (TYPE_DIVREM_1);
1175 p->size = SIZE_YES; /* ie. no size==0 */
1176 REFERENCE (refmpn_preinv_divrem_1);
1177
1178 p = ¶m[TYPE_PREINV_MOD_1];
1179 p->retval = 1;
1180 p->src[0] = 1;
1181 p->divisor = DIVISOR_NORM;
1182 REFERENCE (refmpn_preinv_mod_1);
1183
1184 p = ¶m[TYPE_MOD_34LSUB1];
1185 p->retval = 1;
1186 p->src[0] = 1;
1187 VALIDATE (validate_mod_34lsub1);
1188
1189 p = ¶m[TYPE_UDIV_QRNND];
1190 p->retval = 1;
1191 p->src[0] = 1;
1192 p->dst[0] = 1;
1193 p->dst_size[0] = SIZE_1;
1194 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
1195 p->data = DATA_UDIV_QRNND;
1196 p->overlap = OVERLAP_NONE;
1197 REFERENCE (refmpn_udiv_qrnnd);
1198
1199 p = ¶m[TYPE_UDIV_QRNND_R];
1200 COPY (TYPE_UDIV_QRNND);
1201 REFERENCE (refmpn_udiv_qrnnd_r);
1202
1203
1204 p = ¶m[TYPE_DIVEXACT_1];
1205 p->dst[0] = 1;
1206 p->src[0] = 1;
1207 p->divisor = DIVISOR_LIMB;
1208 p->data = DATA_MULTIPLE_DIVISOR;
1209 VALIDATE (validate_divexact_1);
1210 REFERENCE (refmpn_divmod_1);
1211
1212
1213 p = ¶m[TYPE_DIVEXACT_BY3];
1214 p->retval = 1;
1215 p->dst[0] = 1;
1216 p->src[0] = 1;
1217 REFERENCE (refmpn_divexact_by3);
1218
1219 p = ¶m[TYPE_DIVEXACT_BYFF];
1220 p->retval = 1;
1221 p->dst[0] = 1;
1222 p->src[0] = 1;
1223 REFERENCE (refmpn_divexact_byff);
1224
1225 p = ¶m[TYPE_LSHIFT1];
1226 p->retval = 1;
1227 p->dst[0] = 1;
1228 p->src[0] = 1;
1229 REFERENCE (refmpn_lshift1);
1230
1231 p = ¶m[TYPE_RSHIFT1];
1232 p->retval = 1;
1233 p->dst[0] = 1;
1234 p->src[0] = 1;
1235 REFERENCE (refmpn_rshift1);
1236
1237 p = ¶m[TYPE_LSHIFT2];
1238 p->retval = 1;
1239 p->dst[0] = 1;
1240 p->src[0] = 1;
1241 REFERENCE (refmpn_lshift2);
1242
1243 p = ¶m[TYPE_RSHIFT2];
1244 p->retval = 1;
1245 p->dst[0] = 1;
1246 p->src[0] = 1;
1247 REFERENCE (refmpn_rshift2);
1248
1249 p = ¶m[TYPE_DIVEXACT_BY3C];
1250 COPY (TYPE_DIVEXACT_BY3);
1251 p->carry = CARRY_3;
1252 REFERENCE (refmpn_divexact_by3c);
1253
1254
1255 p = ¶m[TYPE_MODEXACT_1_ODD];
1256 p->retval = 1;
1257 p->src[0] = 1;
1258 p->divisor = DIVISOR_ODD;
1259 VALIDATE (validate_modexact_1_odd);
1260
1261 p = ¶m[TYPE_MODEXACT_1C_ODD];
1262 COPY (TYPE_MODEXACT_1_ODD);
1263 p->carry = CARRY_LIMB;
1264 VALIDATE (validate_modexact_1c_odd);
1265
1266
1267 p = ¶m[TYPE_GCD_1];
1268 p->retval = 1;
1269 p->src[0] = 1;
1270 p->data = DATA_NON_ZERO;
1271 p->divisor = DIVISOR_LIMB;
1272 REFERENCE (refmpn_gcd_1);
1273
1274 p = ¶m[TYPE_GCD];
1275 p->retval = 1;
1276 p->dst[0] = 1;
1277 p->src[0] = 1;
1278 p->src[1] = 1;
1279 p->size2 = 1;
1280 p->dst_size[0] = SIZE_RETVAL;
1281 p->overlap = OVERLAP_NOT_SRCS;
1282 p->data = DATA_GCD;
1283 REFERENCE (refmpn_gcd);
1284
1285 p = ¶m[TYPE_MPZ_JACOBI];
1286 p->retval = 1;
1287 p->src[0] = 1;
1288 p->size = SIZE_ALLOW_ZERO;
1289 p->src[1] = 1;
1290 p->data = DATA_SRC1_ODD;
1291 p->size2 = 1;
1292 p->carry = CARRY_4;
1293 p->carry_sign = 1;
1294 REFERENCE (refmpz_jacobi);
1295
1296 p = ¶m[TYPE_MPZ_KRONECKER];
1297 COPY (TYPE_MPZ_JACOBI);
1298 p->data = 0; /* clear inherited DATA_SRC1_ODD */
1299 REFERENCE (refmpz_kronecker);
1300
1301
1302 p = ¶m[TYPE_MPZ_KRONECKER_UI];
1303 p->retval = 1;
1304 p->src[0] = 1;
1305 p->size = SIZE_ALLOW_ZERO;
1306 p->multiplier = 1;
1307 p->carry = CARRY_BIT;
1308 REFERENCE (refmpz_kronecker_ui);
1309
1310 p = ¶m[TYPE_MPZ_KRONECKER_SI];
1311 COPY (TYPE_MPZ_KRONECKER_UI);
1312 REFERENCE (refmpz_kronecker_si);
1313
1314 p = ¶m[TYPE_MPZ_UI_KRONECKER];
1315 COPY (TYPE_MPZ_KRONECKER_UI);
1316 REFERENCE (refmpz_ui_kronecker);
1317
1318 p = ¶m[TYPE_MPZ_SI_KRONECKER];
1319 COPY (TYPE_MPZ_KRONECKER_UI);
1320 REFERENCE (refmpz_si_kronecker);
1321
1322 p = ¶m[TYPE_REDC_BASECASE];
1323 p->dst[0] = 1;
1324 p->src[0] = 1;
1325 p->src[1] = 1;
1326 p->data = DATA_SRC0_ODD ;
1327 p->size2 = SIZE_DOUBLE;
1328 p->overlap = OVERLAP_NONE;
1329 REFERENCE (refmpn_redc_1);
1330
1331 p = ¶m[TYPE_SQR];
1332 p->dst[0] = 1;
1333 p->src[0] = 1;
1334 p->dst_size[0] = SIZE_SUM;
1335 p->overlap = OVERLAP_NONE;
1336 REFERENCE (refmpn_sqr);
1337
1338 p = ¶m[TYPE_MUL_N];
1339 COPY (TYPE_SQR);
1340 p->src[1] = 1;
1341 REFERENCE (refmpn_mul_n);
1342
1343 p = ¶m[TYPE_MULMID_BASECASE];
1344 COPY (TYPE_MUL_BASECASE);
1345 p->dst_size[0] = SIZE_DIFF_PLUS_3;
1346 p->size2 = 1;
1347 p->overlap = OVERLAP_NONE;
1348 REFERENCE (refmpn_mulmid_basecase);
1349
1350 p = ¶m[TYPE_MULMID];
1351 COPY (TYPE_MULMID_BASECASE);
1352 REFERENCE (refmpn_mulmid);
1353
1354 p = ¶m[TYPE_MULMID_N];
1355 p->dst[0] = 1;
1356 p->src[0] = 1;
1357 p->src[1] = 1;
1358 p->dst_size[0] = SIZE_PLUS_2;
1359 p->size2 = SIZE_DOUBLE_MINUS_1;
1360 p->overlap = OVERLAP_NONE;
1361 REFERENCE (refmpn_mulmid_n);
1362
1363 p = ¶m[TYPE_MUL_BASECASE];
1364 COPY (TYPE_MUL_N);
1365 p->dst_size[0] = SIZE_SUM;
1366 p->size2 = 1;
1367 REFERENCE (refmpn_mul_basecase);
1368
1369 p = ¶m[TYPE_UMUL_PPMM];
1370 p->retval = 1;
1371 p->src[0] = 1;
1372 p->dst[0] = 1;
1373 p->dst_size[0] = SIZE_1;
1374 p->overlap = OVERLAP_NONE;
1375 REFERENCE (refmpn_umul_ppmm);
1376
1377 p = ¶m[TYPE_UMUL_PPMM_R];
1378 COPY (TYPE_UMUL_PPMM);
1379 REFERENCE (refmpn_umul_ppmm_r);
1380
1381
1382 p = ¶m[TYPE_RSHIFT];
1383 p->retval = 1;
1384 p->dst[0] = 1;
1385 p->src[0] = 1;
1386 p->shift = 1;
1387 p->overlap = OVERLAP_LOW_TO_HIGH;
1388 REFERENCE (refmpn_rshift);
1389
1390 p = ¶m[TYPE_LSHIFT];
1391 COPY (TYPE_RSHIFT);
1392 p->overlap = OVERLAP_HIGH_TO_LOW;
1393 REFERENCE (refmpn_lshift);
1394
1395 p = ¶m[TYPE_LSHIFTC];
1396 COPY (TYPE_LSHIFT);
1397 REFERENCE (refmpn_lshiftc);
1398
1399 p = ¶m[TYPE_POPCOUNT];
1400 p->retval = 1;
1401 p->src[0] = 1;
1402 REFERENCE (refmpn_popcount);
1403
1404 p = ¶m[TYPE_NOT];
1405 //p->src[0] = 1;
1406 p->dst[0] = 1;
1407 REFERENCE (refmpn_not);
1408
1409 p = ¶m[TYPE_DOUBLE];
1410 p->retval = 1;
1411 p->dst[0] = 1;
1412 REFERENCE (refmpn_double);
1413
1414 p = ¶m[TYPE_HALF];
1415 p->retval = 1;
1416 p->dst[0] = 1;
1417 REFERENCE (refmpn_half);
1418
1419 p = ¶m[TYPE_HAMDIST];
1420 COPY (TYPE_POPCOUNT);
1421 p->src[1] = 1;
1422 REFERENCE (refmpn_hamdist);
1423
1424 p = ¶m[TYPE_TDIV_QR];
1425 p->dst[0] = 1;
1426 p->dst[1] = 1;
1427 p->src[0] = 1;
1428 p->src[1] = 1;
1429 p->size2 = 1;
1430 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1431 p->dst_size[1] = SIZE_SIZE2;
1432 p->overlap = OVERLAP_NONE;
1433 REFERENCE (refmpn_tdiv_qr);
1434
1435 p = ¶m[TYPE_TDIV_Q];
1436 p->dst[0] = 1;
1437 p->src[0] = 1;
1438 p->src[1] = 1;
1439 p->size2 = 1;
1440 p->dst_size[0] = SIZE_DIFF_PLUS_1;
1441 p->overlap = OVERLAP_NONE;
1442 REFERENCE (refmpn_tdiv_q);
1443
1444 p = ¶m[TYPE_SQRTREM];
1445 p->retval = 1;
1446 p->dst[0] = 1;
1447 p->dst[1] = 1;
1448 p->src[0] = 1;
1449 p->dst_size[0] = SIZE_CEIL_HALF;
1450 p->dst_size[1] = SIZE_RETVAL;
1451 p->overlap = OVERLAP_NONE;
1452 VALIDATE (validate_sqrtrem);
1453 REFERENCE (refmpn_sqrtrem);
1454
1455 p = ¶m[TYPE_ZERO];
1456 p->dst[0] = 1;
1457 p->size = SIZE_ALLOW_ZERO;
1458 REFERENCE (refmpn_zero);
1459
1460 p = ¶m[TYPE_STORE];
1461 p->dst[0] = 1;
1462 p->size = SIZE_ALLOW_ZERO;
1463 REFERENCE (refmpn_store);
1464
1465 p = ¶m[TYPE_GET_STR];
1466 p->retval = 1;
1467 p->src[0] = 1;
1468 p->size = SIZE_ALLOW_ZERO;
1469 p->dst[0] = 1;
1470 p->dst[1] = 1;
1471 p->dst_size[0] = SIZE_GET_STR;
1472 p->dst_bytes[0] = 1;
1473 p->overlap = OVERLAP_NONE;
1474 REFERENCE (refmpn_get_str);
1475
1476 #ifdef EXTRA_PARAM_INIT
1477 EXTRA_PARAM_INIT
1478 #endif
1479 }
1480
1481
1482 /* The following are macros if there's no native versions, so wrap them in
1483 functions that can be in try_array[]. */
1484
1485 void
MPN_COPY_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1486 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1487 { MPN_COPY (rp, sp, size); }
1488
1489 void
MPN_COPY_INCR_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1490 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1491 { MPN_COPY_INCR (rp, sp, size); }
1492
1493 void
MPN_COPY_DECR_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1494 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1495 { MPN_COPY_DECR (rp, sp, size); }
1496
1497 void
__GMPN_COPY_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1498 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1499 { __GMPN_COPY (rp, sp, size); }
1500
1501 #ifdef __GMPN_COPY_INCR
1502 void
__GMPN_COPY_INCR_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1503 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1504 { __GMPN_COPY_INCR (rp, sp, size); }
1505 #endif
1506
1507 void
mpn_com_n_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1508 mpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1509 { mpn_com_n (rp, sp, size); }
1510
1511 void
mpn_and_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1512 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1513 { mpn_and_n (rp, s1, s2, size); }
1514
1515 void
mpn_andn_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1516 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1517 { mpn_andn_n (rp, s1, s2, size); }
1518
1519 void
mpn_nand_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1520 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1521 { mpn_nand_n (rp, s1, s2, size); }
1522
1523 void
mpn_ior_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1524 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1525 { mpn_ior_n (rp, s1, s2, size); }
1526
1527 void
mpn_iorn_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1528 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1529 { mpn_iorn_n (rp, s1, s2, size); }
1530
1531 void
mpn_nior_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1532 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1533 { mpn_nior_n (rp, s1, s2, size); }
1534
1535 void
mpn_xor_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1536 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1537 { mpn_xor_n (rp, s1, s2, size); }
1538
1539 void
mpn_xnor_n_fun(mp_ptr rp,mp_srcptr s1,mp_srcptr s2,mp_size_t size)1540 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1541 { mpn_xnor_n (rp, s1, s2, size); }
1542
1543 mp_limb_t
udiv_qrnnd_fun(mp_limb_t * remptr,mp_limb_t n1,mp_limb_t n0,mp_limb_t d)1544 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1545 {
1546 mp_limb_t q;
1547 udiv_qrnnd (q, *remptr, n1, n0, d);
1548 return q;
1549 }
1550
1551 mp_limb_t
mpn_divexact_by3_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1552 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1553 {
1554 return mpn_divexact_by3 (rp, sp, size);
1555 }
1556
1557 void
mpn_not_fun(mp_ptr rp,mp_size_t size)1558 mpn_not_fun (mp_ptr rp, mp_size_t size)
1559 {
1560 mpn_not (rp, size);
1561 }
1562
1563 mp_limb_t
mpn_double_fun(mp_ptr rp,mp_size_t size)1564 mpn_double_fun (mp_ptr rp, mp_size_t size)
1565 {
1566 return mpn_double (rp, size);
1567 }
1568
1569 mp_limb_t
mpn_half_fun(mp_ptr rp,mp_size_t size)1570 mpn_half_fun (mp_ptr rp, mp_size_t size)
1571 {
1572 return mpn_half (rp, size);
1573 }
1574
1575 mp_limb_t
mpn_lshift1_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1576 mpn_lshift1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1577 {
1578 return mpn_lshift1 (rp, sp, size);
1579 }
1580
1581 mp_limb_t
mpn_rshift1_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1582 mpn_rshift1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1583 {
1584 return mpn_rshift1 (rp, sp, size);
1585 }
1586
1587 mp_limb_t
mpn_lshift2_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1588 mpn_lshift2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1589 {
1590 return mpn_lshift2 (rp, sp, size);
1591 }
1592
1593 mp_limb_t
mpn_rshift2_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size)1594 mpn_rshift2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1595 {
1596 return mpn_rshift2 (rp, sp, size);
1597 }
1598
1599 #if HAVE_NATIVE_mpn_addlsh1_n
1600 mp_limb_t
mpn_addlsh1_n_fun(mp_ptr rp,mp_srcptr sp,mp_srcptr sp1,mp_size_t size)1601 mpn_addlsh1_n_fun (mp_ptr rp, mp_srcptr sp, mp_srcptr sp1,mp_size_t size)
1602 {
1603 return mpn_addlsh1_n (rp, sp,sp1, size);
1604 }
1605 #endif
1606
1607 #if HAVE_NATIVE_mpn_sublsh1_n
1608 mp_limb_t
mpn_sublsh1_n_fun(mp_ptr rp,mp_srcptr sp,mp_srcptr sp1,mp_size_t size)1609 mpn_sublsh1_n_fun (mp_ptr rp, mp_srcptr sp, mp_srcptr sp1,mp_size_t size)
1610 {
1611 return mpn_sublsh1_n (rp, sp,sp1, size);
1612 }
1613 #endif
1614
1615 #if HAVE_NATIVE_mpn_inclsh_n
1616 mp_limb_t
mpn_inclsh_n_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned int c)1617 mpn_inclsh_n_fun (mp_ptr rp, mp_srcptr sp,mp_size_t size,unsigned int c)
1618 {
1619 return mpn_inclsh_n (rp, sp, size,c);
1620 }
1621 #endif
1622
1623 #if HAVE_NATIVE_mpn_declsh_n
1624 mp_limb_t
mpn_declsh_n_fun(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned int c)1625 mpn_declsh_n_fun (mp_ptr rp, mp_srcptr sp,mp_size_t size,unsigned int c)
1626 {
1627 return mpn_declsh_n (rp, sp, size,c);
1628 }
1629 #endif
1630
1631 mp_limb_t
mpn_modexact_1_odd_fun(mp_srcptr ptr,mp_size_t size,mp_limb_t divisor)1632 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1633 {
1634 return mpn_modexact_1_odd (ptr, size, divisor);
1635 }
1636
1637 void
mpn_kara_mul_n_fun(mp_ptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)1638 mpn_kara_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1639 {
1640 mp_ptr tspace;
1641 TMP_DECL;
1642 TMP_MARK;
1643 tspace = TMP_ALLOC_LIMBS (MPN_KARA_MUL_N_TSIZE (size));
1644 mpn_kara_mul_n (dst, src1, src2, size, tspace);
1645 }
1646 void
mpn_kara_sqr_n_fun(mp_ptr dst,mp_srcptr src,mp_size_t size)1647 mpn_kara_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1648 {
1649 mp_ptr tspace;
1650 TMP_DECL;
1651 TMP_MARK;
1652 tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (size));
1653 mpn_kara_sqr_n (dst, src, size, tspace);
1654 TMP_FREE;
1655 }
1656 void
mpn_toom3_mul_n_fun(mp_ptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)1657 mpn_toom3_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1658 {
1659 mp_ptr tspace;
1660 TMP_DECL;
1661 TMP_MARK;
1662 tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (size));
1663 mpn_toom3_mul_n (dst, src1, src2, size, tspace);
1664 TMP_FREE;
1665 }
1666 void
mpn_toom3_sqr_n_fun(mp_ptr dst,mp_srcptr src1,mp_size_t size)1667 mpn_toom3_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)
1668 {
1669 mp_ptr tspace;
1670 TMP_DECL;
1671 TMP_MARK;
1672 tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (size));
1673 mpn_toom3_sqr_n (dst, src1, size, tspace);
1674 TMP_FREE;
1675 }
1676 void
mpn_toom4_mul_n_fun(mp_ptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)1677 mpn_toom4_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1678 {
1679 mpn_toom4_mul_n (dst, src1, src2, size);
1680 }
1681 void
mpn_toom4_sqr_n_fun(mp_ptr dst,mp_srcptr src1,mp_size_t size)1682 mpn_toom4_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)
1683 {
1684 mpn_toom4_sqr_n (dst, src1, size);
1685 }
1686 void
mpn_toom8h_mul_fun(mp_ptr dst,mp_srcptr src1,mp_size_t size1,mp_srcptr src2,mp_size_t size2)1687 mpn_toom8h_mul_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size1, mp_srcptr src2, mp_size_t size2)
1688 {
1689 mpn_toom8h_mul (dst, src1, size1, src2, size2);
1690 }
1691 void
mpn_toom8_sqr_n_fun(mp_ptr dst,mp_srcptr src1,mp_size_t size)1692 mpn_toom8_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size)
1693 {
1694 mpn_toom8_sqr_n (dst, src1, size);
1695 }
1696
1697 mp_limb_t
umul_ppmm_fun(mp_limb_t * lowptr,mp_limb_t m1,mp_limb_t m2)1698 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1699 {
1700 mp_limb_t high;
1701 umul_ppmm (high, *lowptr, m1, m2);
1702 return high;
1703 }
1704
1705 void
MPN_ZERO_fun(mp_ptr ptr,mp_size_t size)1706 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1707 { MPN_ZERO (ptr, size); }
1708
1709 void
mpn_store_fun(mp_ptr ptr,mp_size_t size,mp_limb_t val)1710 mpn_store_fun (mp_ptr ptr, mp_size_t size,mp_limb_t val)
1711 { mpn_store (ptr, size,val); }
1712
1713 struct choice_t {
1714 const char *name;
1715 tryfun_t function;
1716 int type;
1717 mp_size_t minsize;
1718 };
1719
1720 #if HAVE_STRINGIZE
1721 #define TRY(fun) #fun, (tryfun_t) fun
1722 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1723 #else
1724 #define TRY(fun) "fun", (tryfun_t) fun
1725 #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1726 #endif
1727
1728 const struct choice_t choice_array[] = {
1729 { TRY(mpn_add), TYPE_ADD },
1730 { TRY(mpn_sub), TYPE_SUB },
1731
1732 { TRY(mpn_add_n), TYPE_ADD_N },
1733 { TRY(mpn_sub_n), TYPE_SUB_N },
1734
1735 #if HAVE_NATIVE_mpn_add_nc
1736 { TRY(mpn_add_nc), TYPE_ADD_NC },
1737 #endif
1738 #if HAVE_NATIVE_mpn_sub_nc
1739 { TRY(mpn_sub_nc), TYPE_SUB_NC },
1740 #endif
1741
1742 { TRY(mpn_sumdiff_n), TYPE_SUMDIFF_N },
1743 #if HAVE_NATIVE_mpn_nsumdiff_n
1744 { TRY(mpn_nsumdiff_n), TYPE_NSUMDIFF_N },
1745 #endif
1746 #if HAVE_NATIVE_mpn_sumdiff_nc
1747 { TRY(mpn_sumdiff_nc), TYPE_SUMDIFF_NC },
1748 #endif
1749
1750 { TRY(mpn_addadd_n), TYPE_ADDADD_N },
1751 { TRY(mpn_addsub_n), TYPE_ADDSUB_N },
1752 { TRY(mpn_subadd_n), TYPE_SUBADD_N },
1753
1754 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
1755 { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
1756 #if HAVE_NATIVE_mpn_addmul_1c
1757 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1758 #endif
1759 #if HAVE_NATIVE_mpn_submul_1c
1760 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1761 #endif
1762
1763 #if HAVE_NATIVE_mpn_addmul_2
1764 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1765 #endif
1766 #if HAVE_NATIVE_mpn_addmul_3
1767 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1768 #endif
1769 #if HAVE_NATIVE_mpn_addmul_4
1770 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1771 #endif
1772 #if HAVE_NATIVE_mpn_addmul_5
1773 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1774 #endif
1775 #if HAVE_NATIVE_mpn_addmul_6
1776 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1777 #endif
1778 #if HAVE_NATIVE_mpn_addmul_7
1779 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1780 #endif
1781 #if HAVE_NATIVE_mpn_addmul_8
1782 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1783 #endif
1784
1785 { TRY_FUNFUN(mpn_com_n), TYPE_COM_N },
1786
1787 { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
1788 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1789 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1790
1791 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
1792 #ifdef __GMPN_COPY_INCR
1793 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1794 #endif
1795
1796 #if HAVE_NATIVE_mpn_copyi
1797 { TRY(mpn_copyi), TYPE_COPYI },
1798 #endif
1799 #if HAVE_NATIVE_mpn_copyd
1800 { TRY(mpn_copyd), TYPE_COPYD },
1801 #endif
1802
1803 #if HAVE_NATIVE_mpn_addlsh1_n
1804 { TRY_FUNFUN(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1805 #endif
1806 #if HAVE_NATIVE_mpn_sublsh1_n
1807 { TRY_FUNFUN(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1808 #endif
1809 #if HAVE_NATIVE_mpn_addlsh_n
1810 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1811 #endif
1812 #if HAVE_NATIVE_mpn_sublsh_n
1813 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1814 #endif
1815 #if HAVE_NATIVE_mpn_addlsh_nc
1816 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
1817 #endif
1818 #if HAVE_NATIVE_mpn_sublsh_nc
1819 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
1820 #endif
1821 #if HAVE_NATIVE_mpn_inclsh_n
1822 { TRY_FUNFUN(mpn_inclsh_n), TYPE_INCLSH_N },
1823 #endif
1824 #if HAVE_NATIVE_mpn_declsh_n
1825 { TRY_FUNFUN(mpn_declsh_n), TYPE_DECLSH_N },
1826 #endif
1827
1828 #if HAVE_NATIVE_mpn_rsh1add_n
1829 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1830 #endif
1831 #if HAVE_NATIVE_mpn_rsh1sub_n
1832 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1833 #endif
1834
1835 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
1836 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1837 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1838 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
1839 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1840 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1841 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
1842 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1843
1844 { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
1845 { TRY(mpn_divrem_euclidean_qr_1), TYPE_DIVREM_EUCLIDEAN_QR_1 },
1846 { TRY(mpn_divrem_euclidean_r_1), TYPE_DIVREM_EUCLIDEAN_R_1 },
1847 { TRY(mpn_divrem_hensel_qr_1), TYPE_DIVREM_HENSEL_QR_1 },
1848 { TRY(mpn_divrem_hensel_qr_1_1), TYPE_DIVREM_HENSEL_QR_1_1 },
1849 { TRY(mpn_divrem_hensel_qr_1_2), TYPE_DIVREM_HENSEL_QR_1_2 ,2},
1850 { TRY(mpn_divrem_hensel_r_1), TYPE_DIVREM_HENSEL_R_1 },
1851 { TRY(mpn_rsh_divrem_hensel_qr_1), TYPE_RSH_DIVREM_HENSEL_QR_1 },
1852 { TRY(mpn_rsh_divrem_hensel_qr_1_1), TYPE_RSH_DIVREM_HENSEL_QR_1_1 },
1853 { TRY(mpn_rsh_divrem_hensel_qr_1_2), TYPE_RSH_DIVREM_HENSEL_QR_1_2 ,3},
1854
1855 { TRY(mpn_divrem_hensel_rsh_qr_1), TYPE_DIVREM_HENSEL_RSH_QR_1 },
1856
1857 { TRY(mpn_add_err1_n), TYPE_ADDERR1_N},
1858 { TRY(mpn_sub_err1_n), TYPE_SUBERR1_N},
1859 { TRY(mpn_add_err2_n), TYPE_ADDERR2_N},
1860 { TRY(mpn_sub_err2_n), TYPE_SUBERR2_N},
1861 #if USE_PREINV_DIVREM_1
1862 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1863 #endif
1864 { TRY(mpn_mod_1), TYPE_MOD_1 },
1865 #if USE_PREINV_MOD_1
1866 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1867 #endif
1868 #if HAVE_NATIVE_mpn_divrem_1c
1869 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
1870 #endif
1871 #if HAVE_NATIVE_mpn_mod_1c
1872 { TRY(mpn_mod_1c), TYPE_MOD_1C },
1873 #endif
1874 #if GMP_NUMB_BITS % 4 == 0
1875 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
1876 #endif
1877
1878 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1879 #if HAVE_NATIVE_mpn_udiv_qrnnd
1880 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1881 #endif
1882 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
1883 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 },
1884 #endif
1885
1886 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
1887 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1888 { TRY(mpn_divexact_byff), TYPE_DIVEXACT_BYFF },
1889 { TRY(mpn_divexact_byfobm1), TYPE_DIVEXACT_BYFOBM1 },
1890
1891 { TRY_FUNFUN(mpn_lshift1), TYPE_LSHIFT1 },
1892 { TRY_FUNFUN(mpn_rshift1), TYPE_RSHIFT1 },
1893 { TRY_FUNFUN(mpn_lshift2), TYPE_LSHIFT2 },
1894 { TRY_FUNFUN(mpn_rshift2), TYPE_RSHIFT2 },
1895 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
1896
1897 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1898 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
1899
1900 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
1901 { TRY(mpn_tdiv_q), TYPE_TDIV_Q },
1902
1903 { TRY(mpn_mul_1), TYPE_MUL_1 },
1904 #if HAVE_NATIVE_mpn_mul_1c
1905 { TRY(mpn_mul_1c), TYPE_MUL_1C },
1906 #endif
1907 #if HAVE_NATIVE_mpn_mul_2
1908 { TRY(mpn_mul_2), TYPE_MUL_2, 2 },
1909 #endif
1910
1911 { TRY(mpn_rshift), TYPE_RSHIFT },
1912 { TRY(mpn_lshift), TYPE_LSHIFT },
1913 #if HAVE_NATIVE_mpn_lshiftc
1914 { TRY(mpn_lshiftc), TYPE_LSHIFTC },
1915 #endif
1916
1917 { TRY(mpn_mul_basecase), TYPE_MUL_BASECASE },
1918 { TRY(mpn_redc_1), TYPE_REDC_BASECASE },
1919 #if SQR_KARATSUBA_THRESHOLD > 0
1920 { TRY(mpn_sqr_basecase), TYPE_SQR },
1921 #endif
1922
1923 { TRY(mpn_mul), TYPE_MUL_BASECASE },
1924 { TRY(mpn_mul_n), TYPE_MUL_N },
1925 { TRY(mpn_sqr), TYPE_SQR },
1926 { TRY(mpn_mulmid_basecase), TYPE_MULMID_BASECASE },
1927 { TRY(mpn_mulmid), TYPE_MULMID },
1928 { TRY(mpn_mulmid_n), TYPE_MULMID_N },
1929
1930 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1931 #if HAVE_NATIVE_mpn_umul_ppmm
1932 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 },
1933 #endif
1934 #if HAVE_NATIVE_mpn_umul_ppmm_r
1935 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 },
1936 #endif
1937
1938 { TRY_FUNFUN(mpn_kara_mul_n), TYPE_MUL_N, MPN_KARA_MUL_N_MINSIZE },
1939 { TRY_FUNFUN(mpn_kara_sqr_n), TYPE_SQR, MPN_KARA_SQR_N_MINSIZE },
1940 { TRY_FUNFUN(mpn_toom3_mul_n), TYPE_MUL_N, MPN_TOOM3_MUL_N_MINSIZE },
1941 { TRY_FUNFUN(mpn_toom4_mul_n), TYPE_MUL_N, MPN_TOOM4_MUL_N_MINSIZE },
1942 { TRY_FUNFUN(mpn_toom8h_mul), TYPE_MUL_BASECASE, MPN_TOOM8H_MUL_MINSIZE },
1943 { TRY_FUNFUN(mpn_toom3_sqr_n), TYPE_SQR, MPN_TOOM3_SQR_N_MINSIZE },
1944 { TRY_FUNFUN(mpn_toom4_sqr_n), TYPE_SQR, MPN_TOOM4_SQR_N_MINSIZE },
1945 { TRY_FUNFUN(mpn_toom8_sqr_n), TYPE_SQR, MPN_TOOM8_SQR_N_MINSIZE },
1946
1947 { TRY(mpn_gcd_1), TYPE_GCD_1 },
1948 { TRY(mpn_gcd), TYPE_GCD },
1949 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
1950 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1951 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1952 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1953 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1954
1955 { TRY_FUNFUN(mpn_not), TYPE_NOT },
1956 { TRY_FUNFUN(mpn_double), TYPE_DOUBLE },
1957 { TRY_FUNFUN(mpn_half), TYPE_HALF },
1958
1959 { TRY(mpn_popcount), TYPE_POPCOUNT },
1960 { TRY(mpn_hamdist), TYPE_HAMDIST },
1961
1962 { TRY(mpn_sqrtrem), TYPE_SQRTREM },
1963
1964 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1965 { TRY_FUNFUN(mpn_store), TYPE_STORE },
1966
1967 { TRY(mpn_get_str), TYPE_GET_STR },
1968
1969 #ifdef EXTRA_ROUTINES
1970 EXTRA_ROUTINES
1971 #endif
1972 };
1973
1974 const struct choice_t *choice = NULL;
1975
1976
1977 void
mprotect_maybe(void * addr,size_t len,int prot)1978 mprotect_maybe (void *addr, size_t len, int prot)
1979 {
1980 if (!option_redzones)
1981 return;
1982
1983 #if HAVE_MPROTECT
1984 if (mprotect (addr, len, prot) != 0)
1985 {
1986 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1987 addr, len, prot, strerror (errno));
1988 exit (1);
1989 }
1990 #else
1991 {
1992 static int warned = 0;
1993 if (!warned)
1994 {
1995 fprintf (stderr,
1996 "mprotect not available, bounds testing not performed\n");
1997 warned = 1;
1998 }
1999 }
2000 #endif
2001 }
2002
2003 /* round "a" up to a multiple of "m" */
2004 size_t
round_up_multiple(size_t a,size_t m)2005 round_up_multiple (size_t a, size_t m)
2006 {
2007 unsigned long r;
2008
2009 r = a % m;
2010 if (r == 0)
2011 return a;
2012 else
2013 return a + (m - r);
2014 }
2015
2016
2017 /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
2018 for instance HP-UX 10.
2019
2020 mmap will almost certainly return a pointer already aligned to a page
2021 boundary, but it's easy enough to share the alignment handling with the
2022 malloc case. */
2023
2024 void
malloc_region(struct region_t * r,mp_size_t n)2025 malloc_region (struct region_t *r, mp_size_t n)
2026 {
2027 mp_ptr p;
2028 size_t nbytes;
2029
2030 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
2031
2032 n = round_up_multiple (n, PAGESIZE_LIMBS);
2033 r->size = n;
2034
2035 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
2036
2037 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
2038 #define MAP_ANON MAP_ANONYMOUS
2039 #endif
2040
2041 #if HAVE_MMAP && defined (MAP_ANON)
2042 /* note must pass fd=-1 for MAP_ANON on BSD */
2043 p = (mp_ptr)mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
2044 if (p == (void *) -1)
2045 {
2046 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
2047 nbytes, strerror (errno));
2048 exit (1);
2049 }
2050 #else
2051 p = (mp_ptr) malloc (nbytes);
2052 ASSERT_ALWAYS (p != NULL);
2053 #endif
2054
2055 p = align_pointer (p, pagesize);
2056
2057 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
2058 p += REDZONE_LIMBS;
2059 r->ptr = p;
2060
2061 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
2062 }
2063
2064 void
mprotect_region(const struct region_t * r,int prot)2065 mprotect_region (const struct region_t *r, int prot)
2066 {
2067 mprotect_maybe (r->ptr, r->size, prot);
2068 }
2069
2070
2071 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
2072 and CARRY_4 */
2073 mp_limb_t carry_array[] = {
2074 0, 1, 2, 3,
2075 4,
2076 CNST_LIMB(1) << 8,
2077 CNST_LIMB(1) << 16,
2078 GMP_NUMB_MAX
2079 };
2080 int carry_index;
2081
2082 #define CARRY_COUNT \
2083 ((tr->carry == CARRY_BIT) ? 2 \
2084 : tr->carry == CARRY_3 ? 3 \
2085 : tr->carry == CARRY_4 ? 4 \
2086 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
2087 ? numberof(carry_array) + CARRY_RANDOMS \
2088 : 1)
2089
2090 #define MPN_RANDOM_ALT(index,dst,size) \
2091 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
2092
2093 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2094 the same type */
2095 #define CARRY_ITERATION \
2096 for (carry_index = 0; \
2097 (carry_index < numberof (carry_array) \
2098 ? (carry = carry_array[carry_index]) \
2099 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
2100 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
2101 carry_index < CARRY_COUNT; \
2102 carry_index++)
2103
2104
2105 mp_limb_t multiplier_array[] = {
2106 0, 1, 2, 3,
2107 CNST_LIMB(1) << 8,
2108 CNST_LIMB(1) << 16,
2109 GMP_NUMB_MAX - 2,
2110 GMP_NUMB_MAX - 1,
2111 GMP_NUMB_MAX
2112 };
2113 int multiplier_index;
2114
2115 mp_limb_t divisor_array[] = {
2116 1, 2, 3,
2117 CNST_LIMB(1) << 8,
2118 CNST_LIMB(1) << 16,
2119 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
2120 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
2121 GMP_NUMB_HIGHBIT,
2122 GMP_NUMB_HIGHBIT + 1,
2123 GMP_NUMB_MAX - 2,
2124 GMP_NUMB_MAX - 1,
2125 GMP_NUMB_MAX
2126 };
2127
2128 int divisor_index;
2129
2130 mp_limb_t altdiv_array[]={1,3,5,15,17,51,85,255,65535,
2131 GMP_NUMB_MAX/1,GMP_NUMB_MAX/3,GMP_NUMB_MAX/5,GMP_NUMB_MAX/15,
2132 GMP_NUMB_MAX/17,GMP_NUMB_MAX/51,GMP_NUMB_MAX/85,GMP_NUMB_MAX/255,GMP_NUMB_MAX/65535};
2133
2134 int altdiv_index;
2135
2136 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
2137 the same type */
2138 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
2139 for (index = 0; \
2140 (index < numberof (array) \
2141 ? (var = array[index]) \
2142 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
2143 index < limit; \
2144 index++)
2145
2146 #define MULTIPLIER_COUNT \
2147 (tr->multiplier \
2148 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
2149 : 1)
2150
2151 #define MULTIPLIER_ITERATION \
2152 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
2153 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
2154
2155 #define DIVISOR_COUNT \
2156 (tr->divisor == 0 ? 1 : \
2157 tr->divisor == DIVISOR_DIVBM1 \
2158 ? 1 \
2159 : numberof (divisor_array) + DIVISOR_RANDOMS )
2160
2161 #define ALTDIV_COUNT \
2162 (tr->divisor == 0 ? 1 : \
2163 tr->divisor == DIVISOR_DIVBM1 \
2164 ? numberof (divisor_array) : 1 )
2165
2166 #define DIVISOR_ITERATION \
2167 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT,divisor_array , \
2168 DIVISOR_RANDOMS, TRY_DIVISOR)
2169
2170
2171 #define ALTDIV_ITERATION \
2172 ARRAY_ITERATION(altdiv, altdiv_index, ALTDIV_COUNT, altdiv_array, \
2173 0 , TRY_DIVISOR)
2174
2175
2176 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
2177 d[0] or d[1] respectively, -1 means a separate (write-protected)
2178 location. */
2179
2180 struct overlap_t {
2181 int s[NUM_SOURCES];
2182 } overlap_array[] = {
2183 { { -1, -1, -1 } },
2184 { { 0, -1, -1 } },
2185 { { -1, 0, -1 } },
2186 { { 0, 0, -1 } },
2187 { { 1, -1, -1 } },
2188 { { -1, 1, -1 } },
2189 { { 1, 1, -1 } },
2190 { { 0, 1, -1 } },
2191 { { 1, 0, -1 } },
2192 { { -1, -1, 0 } },
2193 { { 0, -1, 0 } },
2194 { { -1, 0, 0 } },
2195 { { 0, 0, 0 } },
2196 { { 1, -1, 0 } },
2197 { { -1, 1, 0 } },
2198 { { 1, 1, 0 } },
2199 { { 0, 1, 0 } },
2200 { { 1, 0, 0 } },
2201 { { -1, -1, 1 } },
2202 { { 0, -1, 1 } },
2203 { { -1, 0, 1 } },
2204 { { 0, 0, 1 } },
2205 { { 1, -1, 1 } },
2206 { { -1, 1, 1 } },
2207 { { 1, 1, 1 } },
2208 { { 0, 1, 1 } },
2209 { { 1, 0, 1 } },
2210 };
2211
2212 struct overlap_t *overlap, *overlap_limit;
2213
2214 /*
2215 This is a count of the number of overlaps from the above table to try.
2216 Each source operand can be overlapped with each destination operand (which
2217 are fixed and cannot be overlapped) or put in a non-overlapping block all
2218 to itself. Some functions require that source operands don't overlap. They
2219 can't go beyond the first three entries of the table, as after that, this
2220 starts to happen.
2221
2222 Three source operands are available, but only those which are used by the
2223 function are actually filled with data and made part of the test. The rest
2224 are ignored.
2225 */
2226
2227 #define OVERLAP_COUNT \
2228 (tr->overlap & OVERLAP_NONE ? 1 \
2229 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
2230 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
2231 : tr->dst[1] ? 9 \
2232 : tr->src[2] ? 27 \
2233 : tr->src[1] ? 4 \
2234 : tr->dst[0] ? 2 \
2235 : 1)
2236
2237 #define OVERLAP_ITERATION \
2238 for (overlap = &overlap_array[0], \
2239 overlap_limit = &overlap_array[OVERLAP_COUNT]; \
2240 overlap < overlap_limit; \
2241 overlap++)
2242
2243
2244 int base = 10;
2245
2246 #define T_RAND_COUNT 2
2247 int t_rand;
2248
2249 void
t_random(mp_ptr ptr,mp_size_t n)2250 t_random (mp_ptr ptr, mp_size_t n)
2251 {
2252 if (n == 0)
2253 return;
2254
2255 switch (option_data) {
2256 case DATA_TRAND:
2257 switch (t_rand) {
2258 case 0: refmpn_random (ptr, n); break;
2259 case 1: refmpn_random2 (ptr, n); break;
2260 default: abort();
2261 }
2262 break;
2263 case DATA_SEQ:
2264 {
2265 static mp_limb_t counter = 0;
2266 mp_size_t i;
2267 for (i = 0; i < n; i++)
2268 ptr[i] = ++counter;
2269 }
2270 break;
2271 case DATA_ZEROS:
2272 refmpn_zero (ptr, n);
2273 break;
2274 case DATA_FFS:
2275 refmpn_fill (ptr, n, GMP_NUMB_MAX);
2276 break;
2277 case DATA_2FD:
2278 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
2279 inducing the q1_ff special case in the mul-by-inverse part of some
2280 versions of divrem_1 and mod_1. */
2281 refmpn_fill (ptr, n, (mp_limb_t) -1);
2282 ptr[n-1] = 2;
2283 ptr[0] -= 2;
2284 break;
2285
2286 default:
2287 abort();
2288 }
2289 }
2290 #define T_RAND_ITERATION \
2291 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
2292
2293
2294 void
print_each(const struct each_t * e)2295 print_each (const struct each_t *e)
2296 {
2297 int i;
2298
2299 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
2300 if (tr->retval)
2301 mpn_trace (" retval", &e->retval, 1);
2302
2303 for (i = 0; i < NUM_DESTS; i++)
2304 {
2305 if (tr->dst[i])
2306 {
2307 if (tr->dst_bytes[i])
2308 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2309 else
2310 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
2311 printf (" located %p\n", e->d[i].p);
2312 }
2313 }
2314
2315 for (i = 0; i < NUM_SOURCES; i++)
2316 if (tr->src[i])
2317 printf (" s[%d] located %p\n", i, e->s[i].p);
2318 }
2319
2320
2321 void
print_all(void)2322 print_all (void)
2323 {
2324 int i;
2325
2326 printf ("\n");
2327 printf ("size %ld\n", (long) size);
2328 if (tr->size2)
2329 printf ("size2 %ld\n", (long) size2);
2330
2331 for (i = 0; i < NUM_DESTS; i++)
2332 if (d[i].size != size)
2333 printf ("d[%d].size %ld\n", i, (long) d[i].size);
2334
2335 if (tr->multiplier)
2336 mpn_trace (" multiplier", &multiplier, 1);
2337 if (tr->divisor)
2338 mpn_trace (" divisor", &divisor, 1);
2339 if (tr->shift)
2340 printf (" shift %lu\n", shift);
2341 if (tr->carry)
2342 mpn_trace (" carry", &carry, 1);
2343 if (tr->msize)
2344 mpn_trace (" multiplier_N", multiplier_N, tr->msize);
2345
2346 for (i = 0; i < NUM_DESTS; i++)
2347 if (tr->dst[i])
2348 printf (" d[%d] %s, align %ld, size %ld\n",
2349 i, d[i].high ? "high" : "low",
2350 (long) d[i].align, (long) d[i].size);
2351
2352 for (i = 0; i < NUM_SOURCES; i++)
2353 {
2354 if (tr->src[i])
2355 {
2356 printf (" s[%d] %s, align %ld, ",
2357 i, s[i].high ? "high" : "low", (long) s[i].align);
2358 switch (overlap->s[i]) {
2359 case -1:
2360 printf ("no overlap\n");
2361 break;
2362 default:
2363 printf ("==d[%d]%s\n",
2364 overlap->s[i],
2365 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
2366 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
2367 : "");
2368 break;
2369 }
2370 printf (" s[%d]=", i);
2371 if (tr->carry_sign && (carry & (1 << i)))
2372 printf ("-");
2373 mpn_trace (NULL, s[i].p, SRC_SIZE(i));
2374 }
2375 }
2376
2377 if (tr->dst0_from_src1)
2378 mpn_trace (" d[0]", s[1].region.ptr, size);
2379
2380 if (tr->reference)
2381 print_each (&ref);
2382 print_each (&fun);
2383 }
2384
2385 void
compare(void)2386 compare (void)
2387 {
2388 int error = 0;
2389 int i;
2390
2391 if (tr->retval && ref.retval != fun.retval)
2392 {
2393 gmp_printf ("Different return values (%Mu, %Mu)\n",
2394 ref.retval, fun.retval);
2395 error = 1;
2396 }
2397
2398 for (i = 0; i < NUM_DESTS; i++)
2399 {
2400 switch (tr->dst_size[i]) {
2401 case SIZE_RETVAL:
2402 case SIZE_GET_STR:
2403 d[i].size = ref.retval;
2404 break;
2405 }
2406 }
2407
2408 for (i = 0; i < NUM_DESTS; i++)
2409 {
2410 if (! tr->dst[i])
2411 continue;
2412
2413 if (tr->dst_bytes[i])
2414 {
2415 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2416 {
2417 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2418 i,
2419 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2420 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2421 error = 1;
2422 }
2423 }
2424 else
2425 {
2426 if (d[i].size != 0
2427 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2428 {
2429 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2430 i,
2431 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2432 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2433 error = 1;
2434 }
2435 }
2436 }
2437
2438 if (error)
2439 {
2440 print_all();
2441 abort();
2442 }
2443 }
2444
2445
2446 /* The functions are cast if the return value should be a long rather than
2447 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
2448 might not be enough if some actual calling conventions checking is
2449 implemented on a long long limb system. */
2450
2451 void
call(struct each_t * e,tryfun_t function)2452 call (struct each_t *e, tryfun_t function)
2453 {
2454 switch (choice->type) {
2455 case TYPE_ADD:
2456 case TYPE_SUB:
2457 e->retval = CALLING_CONVENTIONS (function)
2458 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2459 break;
2460
2461 case TYPE_ADDERR1_N:
2462 case TYPE_SUBERR1_N:
2463 e->retval =CALLING_CONVENTIONS(function)
2464 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p , size,carry);
2465 break;
2466 case TYPE_ADDERR2_N:
2467 case TYPE_SUBERR2_N:
2468 e->retval =CALLING_CONVENTIONS(function)
2469 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p ,e->s[3].p, size,carry);
2470 break;
2471
2472 case TYPE_ADD_N:
2473 case TYPE_SUB_N:
2474 case TYPE_ADDLSH1_N:
2475 case TYPE_SUBLSH1_N:
2476 case TYPE_RSH1ADD_N:
2477 case TYPE_RSH1SUB_N:
2478 e->retval = CALLING_CONVENTIONS (function)
2479 (e->d[0].p, e->s[0].p, e->s[1].p, size);
2480 break;
2481 case TYPE_ADDLSH_N:
2482 case TYPE_SUBLSH_N:
2483 e->retval = CALLING_CONVENTIONS (function)
2484 (e->d[0].p, e->s[0].p, e->s[1].p, size,shift);
2485 break;
2486 case TYPE_ADDLSH_NC:
2487 case TYPE_SUBLSH_NC:
2488 e->retval = CALLING_CONVENTIONS (function)
2489 (e->d[0].p, e->s[0].p, e->s[1].p, size,shift,carry);
2490 break;
2491 case TYPE_INCLSH_N:
2492 case TYPE_DECLSH_N:
2493 e->retval = CALLING_CONVENTIONS (function)
2494 (e->d[0].p, e->s[0].p, size,shift);
2495 break;
2496 case TYPE_ADD_NC:
2497 case TYPE_SUB_NC:
2498 e->retval = CALLING_CONVENTIONS (function)
2499 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2500 break;
2501
2502 case TYPE_MUL_1:
2503 case TYPE_ADDMUL_1:
2504 case TYPE_SUBMUL_1:
2505 e->retval = CALLING_CONVENTIONS (function)
2506 (e->d[0].p, e->s[0].p, size, multiplier);
2507 break;
2508 case TYPE_MUL_1C:
2509 case TYPE_ADDMUL_1C:
2510 case TYPE_SUBMUL_1C:
2511 e->retval = CALLING_CONVENTIONS (function)
2512 (e->d[0].p, e->s[0].p, size, multiplier, carry);
2513 break;
2514
2515 case TYPE_MUL_2:
2516 if (size == 1)
2517 abort ();
2518 e->retval = CALLING_CONVENTIONS (function)
2519 (e->d[0].p, e->s[0].p, size, e->s[1].p);
2520 break;
2521
2522 case TYPE_ADDMUL_2:
2523 case TYPE_ADDMUL_3:
2524 case TYPE_ADDMUL_4:
2525 case TYPE_ADDMUL_5:
2526 case TYPE_ADDMUL_6:
2527 case TYPE_ADDMUL_7:
2528 case TYPE_ADDMUL_8:
2529 if (size == 1)
2530 abort ();
2531 e->retval = CALLING_CONVENTIONS (function)
2532 (e->d[0].p, e->s[0].p, size, multiplier_N);
2533 break;
2534
2535 case TYPE_AND_N:
2536 case TYPE_ANDN_N:
2537 case TYPE_NAND_N:
2538 case TYPE_IOR_N:
2539 case TYPE_IORN_N:
2540 case TYPE_NIOR_N:
2541 case TYPE_XOR_N:
2542 case TYPE_XNOR_N:
2543 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2544 break;
2545
2546 case TYPE_SUMDIFF_N:
2547 e->retval = CALLING_CONVENTIONS (function)
2548 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2549 break;
2550 case TYPE_NSUMDIFF_N:
2551 e->retval = CALLING_CONVENTIONS (function)
2552 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2553 break;
2554 case TYPE_SUMDIFF_NC:
2555 e->retval = CALLING_CONVENTIONS (function)
2556 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2557 break;
2558
2559 case TYPE_ADDSUB_N:
2560 e->retval = (int)CALLING_CONVENTIONS (function)
2561 (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);
2562 break;
2563 case TYPE_ADDADD_N:
2564 e->retval = CALLING_CONVENTIONS (function)
2565 (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);
2566 break;
2567 case TYPE_SUBADD_N:
2568 e->retval = CALLING_CONVENTIONS (function)
2569 (e->d[0].p, e->s[0].p, e->s[1].p, e->s[2].p,size);
2570 break;
2571
2572
2573 case TYPE_COPY:
2574 case TYPE_COPYI:
2575 case TYPE_COPYD:
2576 case TYPE_COM_N:
2577 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2578 break;
2579
2580
2581 case TYPE_DIVEXACT_BY3:
2582 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2583 break;
2584 case TYPE_DIVEXACT_BYFF:
2585 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2586 break;
2587
2588 case TYPE_LSHIFT1:
2589 case TYPE_RSHIFT1:
2590 case TYPE_LSHIFT2:
2591 case TYPE_RSHIFT2:
2592 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2593 break;
2594
2595 case TYPE_DIVEXACT_BY3C:
2596 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2597 carry);
2598 break;
2599
2600
2601 case TYPE_DIVREM_HENSEL_QR_1:
2602 case TYPE_DIVREM_HENSEL_QR_1_1:
2603 case TYPE_DIVREM_HENSEL_QR_1_2:
2604 case TYPE_DIVMOD_1:
2605 case TYPE_DIVEXACT_1:
2606 e->retval = CALLING_CONVENTIONS (function)
2607 (e->d[0].p, e->s[0].p, size, divisor);
2608 break;
2609
2610 case TYPE_RSH_DIVREM_HENSEL_QR_1:
2611 case TYPE_RSH_DIVREM_HENSEL_QR_1_1:
2612 case TYPE_RSH_DIVREM_HENSEL_QR_1_2:
2613 e->retval = CALLING_CONVENTIONS (function)
2614 (e->d[0].p, e->s[0].p, size, divisor,shift,carry);
2615 break;
2616
2617 case TYPE_DIVREM_HENSEL_RSH_QR_1:
2618 e->retval = CALLING_CONVENTIONS (function)
2619 (e->d[0].p, e->s[0].p, size, divisor,shift);
2620 break;
2621 case TYPE_DIVEXACT_BYFOBM1:
2622 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, altdiv,GMP_NUMB_MAX/altdiv);
2623 break;
2624 case TYPE_DIVMOD_1C:
2625 e->retval = CALLING_CONVENTIONS (function)
2626 (e->d[0].p, e->s[0].p, size, divisor, carry);
2627 break;
2628 case TYPE_DIVREM_EUCLIDEAN_QR_1:
2629 case TYPE_DIVREM_1:
2630 e->retval = CALLING_CONVENTIONS (function)
2631 (e->d[0].p, size2, e->s[0].p, size, divisor);
2632 break;
2633 case TYPE_DIVREM_1C:
2634 e->retval = CALLING_CONVENTIONS (function)
2635 (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2636 break;
2637 case TYPE_PREINV_DIVREM_1:
2638 {
2639 mp_limb_t dinv;
2640 unsigned shift;
2641 shift = refmpn_count_leading_zeros (divisor);
2642 dinv = refmpn_invert_limb (divisor << shift);
2643 e->retval = CALLING_CONVENTIONS (function)
2644 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2645 }
2646 break;
2647 case TYPE_DIVREM_HENSEL_R_1:
2648 case TYPE_DIVREM_EUCLIDEAN_R_1:
2649 case TYPE_MOD_1:
2650 case TYPE_MODEXACT_1_ODD:
2651 e->retval = CALLING_CONVENTIONS (function)
2652 (e->s[0].p, size, divisor);
2653 break;
2654 case TYPE_MOD_1C:
2655 case TYPE_MODEXACT_1C_ODD:
2656 e->retval = CALLING_CONVENTIONS (function)
2657 (e->s[0].p, size, divisor, carry);
2658 break;
2659 case TYPE_PREINV_MOD_1:
2660 e->retval = CALLING_CONVENTIONS (function)
2661 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2662 break;
2663 case TYPE_MOD_34LSUB1:
2664 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2665 break;
2666
2667 case TYPE_UDIV_QRNND:
2668 e->retval = CALLING_CONVENTIONS (function)
2669 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2670 break;
2671 case TYPE_UDIV_QRNND_R:
2672 e->retval = CALLING_CONVENTIONS (function)
2673 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2674 break;
2675
2676 case TYPE_SB_DIVREM_MN:
2677 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
2678 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
2679 e->retval = CALLING_CONVENTIONS (function)
2680 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2);
2681 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
2682 break;
2683 case TYPE_TDIV_QR:
2684 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2685 e->s[0].p, size, e->s[1].p, size2);
2686 break;
2687 case TYPE_TDIV_Q:
2688 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p,
2689 size, e->s[1].p, size2);
2690 break;
2691
2692 case TYPE_GCD_1:
2693 /* Must have a non-zero src, but this probably isn't the best way to do
2694 it. */
2695 if (refmpn_zero_p (e->s[0].p, size))
2696 e->retval = 0;
2697 else
2698 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2699 break;
2700
2701 case TYPE_GCD:
2702 /* Sources are destroyed, so they're saved and replaced, but a general
2703 approach to this might be better. Note that it's still e->s[0].p and
2704 e->s[1].p that are passed, to get the desired alignments. */
2705 {
2706 mp_ptr s0 = refmpn_malloc_limbs (size);
2707 mp_ptr s1 = refmpn_malloc_limbs (size2);
2708 refmpn_copyi (s0, e->s[0].p, size);
2709 refmpn_copyi (s1, e->s[1].p, size2);
2710
2711 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2712 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2713 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2714 e->s[0].p, size,
2715 e->s[1].p, size2);
2716 refmpn_copyi (e->s[0].p, s0, size);
2717 refmpn_copyi (e->s[1].p, s1, size2);
2718 free (s0);
2719 free (s1);
2720 }
2721 break;
2722
2723 case TYPE_GCD_FINDA:
2724 {
2725 /* FIXME: do this with a flag */
2726 mp_limb_t c[2];
2727 c[0] = e->s[0].p[0];
2728 c[0] += (c[0] == 0);
2729 c[1] = e->s[0].p[0];
2730 c[1] += (c[1] == 0);
2731 e->retval = CALLING_CONVENTIONS (function) (c);
2732 }
2733 break;
2734
2735 case TYPE_MPZ_JACOBI:
2736 case TYPE_MPZ_KRONECKER:
2737 {
2738 mpz_t a, b;
2739 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2740 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2741 e->retval = CALLING_CONVENTIONS (function) (a, b);
2742 }
2743 break;
2744 case TYPE_MPZ_KRONECKER_UI:
2745 {
2746 mpz_t a;
2747 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2748 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2749 }
2750 break;
2751 case TYPE_MPZ_KRONECKER_SI:
2752 {
2753 mpz_t a;
2754 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2755 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2756 }
2757 break;
2758 case TYPE_MPZ_UI_KRONECKER:
2759 {
2760 mpz_t b;
2761 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2762 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2763 }
2764 break;
2765 case TYPE_MPZ_SI_KRONECKER:
2766 {
2767 mpz_t b;
2768 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2769 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2770 }
2771 break;
2772
2773 case TYPE_MUL_BASECASE:
2774 CALLING_CONVENTIONS (function)
2775 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2776 break;
2777 case TYPE_MULMID_BASECASE:
2778 CALLING_CONVENTIONS (function)
2779 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2780 break;
2781 case TYPE_MULMID:
2782 CALLING_CONVENTIONS (function)
2783 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2784 break;
2785 case TYPE_MULMID_N:
2786 CALLING_CONVENTIONS (function)
2787 (e->d[0].p, e->s[1].p, e->s[0].p, size);
2788 break;
2789 case TYPE_REDC_BASECASE:
2790 /* Sources are destroyed, so they're saved and replaced, but a general
2791 approach to this might be better. Note that it's still e->s[0].p and
2792 e->s[1].p that are passed, to get the desired alignments. */
2793 {
2794 mp_limb_t Np;
2795 mp_ptr s0 = refmpn_malloc_limbs (size);
2796 mp_ptr s1 = refmpn_malloc_limbs (size2);
2797 modlimb_invert(Np,e->s[0].p[0]);
2798 Np=-Np;
2799 refmpn_copyi (s0, e->s[0].p, size);
2800 refmpn_copyi (s1, e->s[1].p, size2);
2801
2802 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2803 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2804 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,e->s[1].p,
2805 e->s[0].p, size,Np);
2806 refmpn_copyi (e->s[0].p, s0, size);
2807 refmpn_copyi (e->s[1].p, s1, size2);
2808 free (s0);
2809 free (s1);
2810 }
2811
2812 break;
2813 case TYPE_MUL_N:
2814 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2815 break;
2816 case TYPE_SQR:
2817 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2818 break;
2819
2820 case TYPE_UMUL_PPMM:
2821 e->retval = CALLING_CONVENTIONS (function)
2822 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2823 break;
2824 case TYPE_UMUL_PPMM_R:
2825 e->retval = CALLING_CONVENTIONS (function)
2826 (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2827 break;
2828
2829 case TYPE_LSHIFTC:
2830 case TYPE_LSHIFT:
2831 case TYPE_RSHIFT:
2832 e->retval = CALLING_CONVENTIONS (function)
2833 (e->d[0].p, e->s[0].p, size, shift);
2834 break;
2835
2836 case TYPE_NOT:
2837 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2838 break;
2839 case TYPE_HALF:
2840 case TYPE_DOUBLE:
2841 e->retval=CALLING_CONVENTIONS (function) (e->d[0].p, size);
2842 break;
2843 case TYPE_POPCOUNT:
2844 e->retval = (* (unsigned long (*)(ANYARGS))
2845 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2846 break;
2847 case TYPE_HAMDIST:
2848 e->retval = (* (unsigned long (*)(ANYARGS))
2849 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2850 break;
2851
2852 case TYPE_SQRTREM:
2853 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2854 (e->d[0].p, e->d[1].p, e->s[0].p, size);
2855 break;
2856
2857 case TYPE_ZERO:
2858 CALLING_CONVENTIONS (function) (e->d[0].p, size);
2859 break;
2860 case TYPE_STORE:
2861 CALLING_CONVENTIONS (function) (e->d[0].p, size,4354);
2862 break;
2863
2864 case TYPE_GET_STR:
2865 {
2866 size_t sizeinbase, fill;
2867 char *dst;
2868 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2869 ASSERT_ALWAYS (sizeinbase <= d[0].size);
2870 fill = d[0].size - sizeinbase;
2871 if (d[0].high)
2872 {
2873 memset (e->d[0].p, 0xBA, fill);
2874 dst = (char *) e->d[0].p + fill;
2875 }
2876 else
2877 {
2878 dst = (char *) e->d[0].p;
2879 memset (dst + sizeinbase, 0xBA, fill);
2880 }
2881 if (POW2_P (base))
2882 {
2883 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2884 e->s[0].p, size);
2885 }
2886 else
2887 {
2888 refmpn_copy (e->d[1].p, e->s[0].p, size);
2889 e->retval = CALLING_CONVENTIONS (function) (dst, base,
2890 e->d[1].p, size);
2891 }
2892 refmpn_zero (e->d[1].p, size); /* cloberred or unused */
2893 }
2894 break;
2895
2896 #ifdef EXTRA_CALL
2897 EXTRA_CALL
2898 #endif
2899
2900 default:
2901 printf ("Unknown routine type %d\n", choice->type);
2902 abort ();
2903 break;
2904 }
2905 }
2906
2907
2908 void
pointer_setup(struct each_t * e)2909 pointer_setup (struct each_t *e)
2910 {
2911 int i, j;
2912
2913 for (i = 0; i < NUM_DESTS; i++)
2914 {
2915 switch (tr->dst_size[i]) {
2916 case 0:
2917 case SIZE_RETVAL: /* will be adjusted later */
2918 d[i].size = size;
2919 break;
2920
2921 case SIZE_1:
2922 d[i].size = 1;
2923 break;
2924 case SIZE_2:
2925 d[i].size = 2;
2926 break;
2927 case SIZE_3:
2928 d[i].size = 3;
2929 break;
2930 case SIZE_4:
2931 d[i].size = 4;
2932 break;
2933
2934 case SIZE_PLUS_1:
2935 d[i].size = size+1;
2936 break;
2937 case SIZE_PLUS_2:
2938 d[i].size = size+2;
2939 break;
2940 case SIZE_PLUS_MSIZE_SUB_1:
2941 d[i].size = size + tr->msize - 1;
2942 break;
2943
2944 case SIZE_SUM:
2945 if (tr->size2)
2946 d[i].size = size + size2;
2947 else
2948 d[i].size = 2*size;
2949 break;
2950
2951 case SIZE_SIZE2:
2952 d[i].size = size2;
2953 break;
2954
2955 case SIZE_DIFF:
2956 d[i].size = size - size2;
2957 break;
2958
2959 case SIZE_DIFF_PLUS_1:
2960 d[i].size = size - size2 + 1;
2961 break;
2962
2963 case SIZE_DIFF_PLUS_3:
2964 d[i].size = size - size2 + 3;
2965 break;
2966
2967 case SIZE_CEIL_HALF:
2968 d[i].size = (size+1)/2;
2969 break;
2970
2971 case SIZE_GET_STR:
2972 {
2973 mp_limb_t ff = GMP_NUMB_MAX;
2974 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2975 }
2976 break;
2977
2978 default:
2979 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2980 abort ();
2981 }
2982 }
2983
2984 /* establish e->d[].p destinations */
2985 for (i = 0; i < NUM_DESTS; i++)
2986 {
2987 mp_size_t offset = 0;
2988
2989 /* possible room for overlapping sources */
2990 for (j = 0; j < numberof (overlap->s); j++)
2991 if (overlap->s[j] == i)
2992 offset = MAX (offset, s[j].align);
2993
2994 if (d[i].high)
2995 {
2996 if (tr->dst_bytes[i])
2997 {
2998 e->d[i].p = (mp_ptr)
2999 ((char *) (e->d[i].region.ptr + e->d[i].region.size)
3000 - d[i].size - d[i].align);
3001 }
3002 else
3003 {
3004 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
3005 - d[i].size - d[i].align;
3006 if (tr->overlap == OVERLAP_LOW_TO_HIGH)
3007 e->d[i].p -= offset;
3008 }
3009 }
3010 else
3011 {
3012 if (tr->dst_bytes[i])
3013 {
3014 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
3015 }
3016 else
3017 {
3018 e->d[i].p = e->d[i].region.ptr + d[i].align;
3019 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3020 e->d[i].p += offset;
3021 }
3022 }
3023 }
3024
3025 /* establish e->s[].p sources */
3026 for (i = 0; i < NUM_SOURCES; i++)
3027 {
3028 int o = overlap->s[i];
3029 switch (o) {
3030 case -1:
3031 /* no overlap */
3032 e->s[i].p = s[i].p;
3033 break;
3034 case 0:
3035 case 1:
3036 /* overlap with d[o] */
3037 if (tr->overlap == OVERLAP_HIGH_TO_LOW)
3038 e->s[i].p = e->d[o].p - s[i].align;
3039 else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
3040 e->s[i].p = e->d[o].p + s[i].align;
3041 else if (tr->size2 == SIZE_FRACTION)
3042 e->s[i].p = e->d[o].p + size2;
3043 else
3044 e->s[i].p = e->d[o].p;
3045 break;
3046 default:
3047 abort();
3048 break;
3049 }
3050 }
3051 }
3052
3053
3054 void
validate_fail(void)3055 validate_fail (void)
3056 {
3057 if (tr->reference)
3058 {
3059 trap_location = TRAP_REF;
3060 call (&ref, tr->reference);
3061 trap_location = TRAP_NOWHERE;
3062 }
3063
3064 print_all();
3065 abort();
3066 }
3067
3068
3069 void
try_one(void)3070 try_one (void)
3071 {
3072 int i;
3073
3074 if (option_spinner)
3075 spinner();
3076 spinner_count++;
3077
3078 trap_location = TRAP_SETUPS;
3079
3080 if (tr->divisor == DIVISOR_NORM)
3081 divisor |= GMP_NUMB_HIGHBIT;
3082 if (tr->divisor == DIVISOR_ODD)
3083 divisor |= 1;
3084
3085 for (i = 0; i < NUM_SOURCES; i++)
3086 {
3087 if (s[i].high)
3088 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
3089 else
3090 s[i].p = s[i].region.ptr + s[i].align;
3091 }
3092
3093 pointer_setup (&ref);
3094 pointer_setup (&fun);
3095
3096 ref.retval = 0x04152637;
3097 fun.retval = 0x8C9DAEBF;
3098
3099 t_random (multiplier_N, tr->msize);
3100
3101 for (i = 0; i < NUM_SOURCES; i++)
3102 {
3103 if (! tr->src[i])
3104 continue;
3105
3106 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
3107 t_random (s[i].p, SRC_SIZE(i));
3108
3109 switch (tr->data) {
3110 case DATA_NON_ZERO:
3111 if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
3112 s[i].p[0] = 1;
3113 break;
3114
3115 case DATA_MULTIPLE_DIVISOR:
3116 /* same number of low zero bits as divisor */
3117 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
3118 refmpn_sub_1 (s[i].p, s[i].p, size,
3119 refmpn_mod_1 (s[i].p, size, divisor));
3120 break;
3121
3122 case DATA_GCD:
3123 /* s[1] no more bits than s[0] */
3124 if (i == 1 && size2 == size)
3125 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
3126
3127 /* high limb non-zero */
3128 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
3129
3130 /* odd */
3131 s[i].p[0] |= 1;
3132 break;
3133
3134 case DATA_SRC1_ODD:
3135 if (i == 1)
3136 s[i].p[0] |= 1;
3137 break;
3138
3139 case DATA_SRC0_ODD:
3140 if (i == 0)
3141 s[i].p[0] |= 1;
3142 break;
3143
3144 case DATA_SRC1_HIGHBIT:
3145 if (i == 1)
3146 {
3147 if (tr->size2)
3148 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
3149 else
3150 s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
3151 }
3152 break;
3153
3154 case DATA_UDIV_QRNND:
3155 s[i].p[1] %= divisor;
3156 break;
3157 }
3158
3159 mprotect_region (&s[i].region, PROT_READ);
3160 }
3161
3162 for (i = 0; i < NUM_DESTS; i++)
3163 {
3164 if (! tr->dst[i])
3165 continue;
3166
3167 if (tr->dst0_from_src1 && i==0)
3168 {
3169 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1));
3170 mp_size_t fill = MAX (0, d[0].size - copy);
3171 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
3172 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
3173 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
3174 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
3175 }
3176 else if (tr->dst_bytes[i])
3177 {
3178 memset (ref.d[i].p, 0xBA, d[i].size);
3179 memset (fun.d[i].p, 0xBA, d[i].size);
3180 }
3181 else
3182 {
3183 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
3184 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
3185 }
3186 }
3187
3188 for (i = 0; i < NUM_SOURCES; i++)
3189 {
3190 if (! tr->src[i])
3191 continue;
3192
3193 if (ref.s[i].p != s[i].p)
3194 {
3195 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
3196 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
3197 }
3198 }
3199
3200 if (option_print)
3201 print_all();
3202
3203 if (tr->validate != NULL)
3204 {
3205 trap_location = TRAP_FUN;
3206 call (&fun, choice->function);
3207 trap_location = TRAP_NOWHERE;
3208
3209 if (! CALLING_CONVENTIONS_CHECK ())
3210 {
3211 print_all();
3212 abort();
3213 }
3214
3215 (*tr->validate) ();
3216 }
3217 else
3218 {
3219 trap_location = TRAP_REF;
3220 call (&ref, tr->reference);
3221 trap_location = TRAP_FUN;
3222 call (&fun, choice->function);
3223 trap_location = TRAP_NOWHERE;
3224
3225 if (! CALLING_CONVENTIONS_CHECK ())
3226 {
3227 print_all();
3228 abort();
3229 }
3230
3231 compare ();
3232 }
3233 }
3234
3235
3236 #define SIZE_ITERATION \
3237 for (size = MAX3 (option_firstsize, \
3238 choice->minsize, \
3239 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
3240 size <= option_lastsize; \
3241 size++)
3242
3243 #define SIZE2_FIRST \
3244 (tr->size2 == SIZE_2 ? 2 \
3245 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \
3246 : tr->size2 == SIZE_DOUBLE ? size*2 \
3247 : tr->size2 == SIZE_DOUBLE_MINUS_1 ? size*2-1 \
3248 : tr->size2 ? \
3249 MAX (choice->minsize, (option_firstsize2 != 0 \
3250 ? option_firstsize2 : 1)) \
3251 : 0)
3252
3253 #define SIZE2_LAST \
3254 (tr->size2 == SIZE_2 ? 2 \
3255 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
3256 : tr->size2 == SIZE_DOUBLE ? size*2 \
3257 : tr->size2 == SIZE_DOUBLE_MINUS_1 ? size*2-1 \
3258 : tr->size2 ? size \
3259 : 0)
3260
3261 #define SIZE2_ITERATION \
3262 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
3263
3264 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
3265 #define ALIGN_ITERATION(w,n,cond) \
3266 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
3267
3268 #define HIGH_LIMIT(cond) ((cond) != 0)
3269 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
3270 #define HIGH_ITERATION(w,n,cond) \
3271 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
3272
3273 #define SHIFT_LIMIT \
3274 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
3275
3276 #define SHIFT_ITERATION \
3277 for (shift = 1; shift <= SHIFT_LIMIT; shift++)
3278
3279
3280 void
try_many(void)3281 try_many (void)
3282 {
3283 int i;
3284
3285 {
3286 unsigned long total = 1;
3287
3288 total *= option_repetitions;
3289 total *= option_lastsize;
3290 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
3291 else if (tr->size2 == SIZE_DOUBLE) total *= 1;
3292 else if (tr->size2) total *= (option_lastsize+1)/2;
3293
3294 total *= SHIFT_LIMIT;
3295 total *= MULTIPLIER_COUNT;
3296 total *= DIVISOR_COUNT;
3297 total *= ALTDIV_COUNT;
3298 total *= CARRY_COUNT;
3299 total *= T_RAND_COUNT;
3300
3301 total *= HIGH_COUNT (tr->dst[0]);
3302 total *= HIGH_COUNT (tr->dst[1]);
3303 total *= HIGH_COUNT (tr->src[0]);
3304 total *= HIGH_COUNT (tr->src[1]);
3305 total *= HIGH_COUNT (tr->src[2]);
3306
3307 total *= ALIGN_COUNT (tr->dst[0]);
3308 total *= ALIGN_COUNT (tr->dst[1]);
3309 total *= ALIGN_COUNT (tr->src[0]);
3310 total *= ALIGN_COUNT (tr->src[1]);
3311 total *= ALIGN_COUNT (tr->src[2]);
3312
3313 #if NUM_SOURCES > 3 || NUM_DESTS > 2
3314 #error Need to adjust high_count and align_count above
3315 #endif
3316
3317 total *= OVERLAP_COUNT;
3318
3319 printf ("%s %lu\n", choice->name, total);
3320 }
3321
3322 spinner_count = 0;
3323
3324 for (i = 0; i < option_repetitions; i++)
3325 SIZE_ITERATION
3326 SIZE2_ITERATION
3327
3328 SHIFT_ITERATION
3329 MULTIPLIER_ITERATION
3330 ALTDIV_ITERATION
3331 DIVISOR_ITERATION
3332 CARRY_ITERATION /* must be after divisor */
3333 T_RAND_ITERATION
3334
3335 HIGH_ITERATION(d,0, tr->dst[0])
3336 HIGH_ITERATION(d,1, tr->dst[1])
3337 HIGH_ITERATION(s,0, tr->src[0])
3338 HIGH_ITERATION(s,1, tr->src[1])
3339 HIGH_ITERATION(s,2, tr->src[2])
3340
3341 ALIGN_ITERATION(d,0, tr->dst[0])
3342 ALIGN_ITERATION(d,1, tr->dst[1])
3343 ALIGN_ITERATION(s,0, tr->src[0])
3344 ALIGN_ITERATION(s,1, tr->src[1])
3345 ALIGN_ITERATION(s,2, tr->src[2])
3346
3347 #if NUM_SOURCES > 3 || NUM_DESTS > 2
3348 #error Need to adjust high_iteration and align_iteration above
3349 #endif
3350
3351 OVERLAP_ITERATION
3352 try_one();
3353
3354 printf("\n");
3355 }
3356
3357
3358 /* Usually print_all() doesn't show much, but it might give a hint as to
3359 where the function was up to when it died. */
3360 void
trap(int sig)3361 trap (int sig)
3362 {
3363 const char *name = "noname";
3364
3365 switch (sig) {
3366 case SIGILL: name = "SIGILL"; break;
3367 #ifdef SIGBUS
3368 case SIGBUS: name = "SIGBUS"; break;
3369 #endif
3370 case SIGSEGV: name = "SIGSEGV"; break;
3371 case SIGFPE: name = "SIGFPE"; break;
3372 }
3373
3374 printf ("\n\nSIGNAL TRAP: %s\n", name);
3375
3376 switch (trap_location) {
3377 case TRAP_REF:
3378 printf (" in reference function: %s\n", tr->reference_name);
3379 break;
3380 case TRAP_FUN:
3381 printf (" in test function: %s\n", choice->name);
3382 print_all ();
3383 break;
3384 case TRAP_SETUPS:
3385 printf (" in parameter setups\n");
3386 print_all ();
3387 break;
3388 default:
3389 printf (" somewhere unknown\n");
3390 break;
3391 }
3392 exit (1);
3393 }
3394
3395
3396 void
try_init(void)3397 try_init (void)
3398 {
3399 #if HAVE_GETPAGESIZE
3400 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
3401 know _SC_PAGESIZE. */
3402 pagesize = getpagesize ();
3403 #elif HAVE_SYSCONF
3404 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
3405 {
3406 /* According to the linux man page, sysconf doesn't set errno */
3407 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
3408 exit (1);
3409 }
3410 #elif defined( _MSC_VER )
3411 SYSTEM_INFO si;
3412 GetSystemInfo(&si);
3413 pagesize = si.dwPageSize;
3414 #else
3415 #error Error, error, cannot get page size
3416 #endif
3417
3418 printf ("pagesize is 0x%lX bytes\n", pagesize);
3419
3420 signal (SIGILL, trap);
3421 #ifdef SIGBUS
3422 signal (SIGBUS, trap);
3423 #endif
3424 signal (SIGSEGV, trap);
3425 signal (SIGFPE, trap);
3426
3427 {
3428 int i;
3429
3430 for (i = 0; i < NUM_SOURCES; i++)
3431 {
3432 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
3433 printf ("s[%d] %p to %p (0x%lX bytes)\n",
3434 i, s[i].region.ptr,
3435 s[i].region.ptr + s[i].region.size,
3436 (long) s[i].region.size * BYTES_PER_MP_LIMB);
3437 }
3438
3439 #define INIT_EACH(e,es) \
3440 for (i = 0; i < NUM_DESTS; i++) \
3441 { \
3442 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
3443 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
3444 es, i, e.d[i].region.ptr, \
3445 e.d[i].region.ptr + e.d[i].region.size, \
3446 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \
3447 }
3448
3449 INIT_EACH(ref, "ref");
3450 INIT_EACH(fun, "fun");
3451 }
3452 }
3453
3454 int
strmatch_wild(const char * pattern,const char * str)3455 strmatch_wild (const char *pattern, const char *str)
3456 {
3457 size_t plen, slen;
3458
3459 /* wildcard at start */
3460 if (pattern[0] == '*')
3461 {
3462 pattern++;
3463 plen = strlen (pattern);
3464 slen = strlen (str);
3465 return (plen == 0
3466 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
3467 }
3468
3469 /* wildcard at end */
3470 plen = strlen (pattern);
3471 if (plen >= 1 && pattern[plen-1] == '*')
3472 return (memcmp (pattern, str, plen-1) == 0);
3473
3474 /* no wildcards */
3475 return (strcmp (pattern, str) == 0);
3476 }
3477
3478 void
try_name(const char * name)3479 try_name (const char *name)
3480 {
3481 int found = 0;
3482 int i;
3483
3484 for (i = 0; i < numberof (choice_array); i++)
3485 {
3486 if (strmatch_wild (name, choice_array[i].name))
3487 {
3488 choice = &choice_array[i];
3489 tr = ¶m[choice->type];
3490 try_many ();
3491 found = 1;
3492 }
3493 }
3494
3495 if (!found)
3496 {
3497 printf ("%s unknown\n", name);
3498 /* exit (1); */
3499 }
3500 }
3501
3502
3503 void
usage(const char * prog)3504 usage (const char *prog)
3505 {
3506 int col = 0;
3507 int i;
3508
3509 printf ("Usage: %s [options] function...\n", prog);
3510 printf (" -1 use limb data 1,2,3,etc\n");
3511 printf (" -9 use limb data all 0xFF..FFs\n");
3512 printf (" -a zeros use limb data all zeros\n");
3513 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n");
3514 printf (" -a 2fd use data 0x2FFF...FFFD\n");
3515 printf (" -p print each case tried (try this if seg faulting)\n");
3516 printf (" -R seed random numbers from time()\n");
3517 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS);
3518 printf (" -s size starting size to test\n");
3519 printf (" -S size2 starting size2 to test\n");
3520 printf (" -s s1-s2 range of sizes to test\n");
3521 printf (" -W don't show the spinner (use this in gdb)\n");
3522 printf (" -z disable mprotect() redzones\n");
3523 printf ("Default data is refmpn_random() and refmpn_random2().\n");
3524 printf ("\n");
3525 printf ("Functions that can be tested:\n");
3526
3527 for (i = 0; i < numberof (choice_array); i++)
3528 {
3529 if (col + 1 + strlen (choice_array[i].name) > 79)
3530 {
3531 printf ("\n");
3532 col = 0;
3533 }
3534 printf (" %s", choice_array[i].name);
3535 col += 1 + strlen (choice_array[i].name);
3536 }
3537 printf ("\n");
3538
3539 exit(1);
3540 }
3541
3542
3543 int
main(int argc,char * argv[])3544 main (int argc, char *argv[])
3545 {
3546 int i;
3547
3548 /* unbuffered output */
3549 setbuf (stdout, NULL);
3550 setbuf (stderr, NULL);
3551
3552 /* default trace in hex, and in upper-case so can paste into bc */
3553 mp_trace_base = -16;
3554
3555 param_init ();
3556
3557 {
3558 unsigned long seed = 123;
3559 int opt;
3560
3561 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3562 {
3563 switch (opt) {
3564 case '1':
3565 /* use limb data values 1, 2, 3, ... etc */
3566 option_data = DATA_SEQ;
3567 break;
3568 case '9':
3569 /* use limb data values 0xFFF...FFF always */
3570 option_data = DATA_FFS;
3571 break;
3572 case 'a':
3573 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
3574 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
3575 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
3576 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
3577 else
3578 {
3579 fprintf (stderr, "unrecognised data option: %s\n", optarg);
3580 exit (1);
3581 }
3582 break;
3583 case 'b':
3584 mp_trace_base = atoi (optarg);
3585 break;
3586 case 'E':
3587 /* re-seed */
3588 sscanf (optarg, "%lu", &seed);
3589 printf ("Re-seeding with %lu\n", seed);
3590 break;
3591 case 'p':
3592 option_print = 1;
3593 break;
3594 case 'R':
3595 /* randomize */
3596 seed = time (NULL);
3597 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3598 break;
3599 case 'r':
3600 option_repetitions = atoi (optarg);
3601 break;
3602 case 's':
3603 {
3604 char *p;
3605 option_firstsize = atoi (optarg);
3606 if ((p = strchr (optarg, '-')) != NULL)
3607 option_lastsize = atoi (p+1);
3608 }
3609 break;
3610 case 'S':
3611 /* -S <size> sets the starting size for the second of a two size
3612 routine (like mpn_mul_basecase) */
3613 option_firstsize2 = atoi (optarg);
3614 break;
3615 case 'W':
3616 /* use this when running in the debugger */
3617 option_spinner = 0;
3618 break;
3619 case 'z':
3620 /* disable redzones */
3621 option_redzones = 0;
3622 break;
3623 case '?':
3624 usage (argv[0]);
3625 break;
3626 }
3627 }
3628
3629 gmp_randinit_default (__gmp_rands);
3630 __gmp_rands_initialized = 1;
3631 gmp_randseed_ui (__gmp_rands, seed);
3632 }
3633
3634 try_init();
3635
3636 if (argc <= optind)
3637 usage (argv[0]);
3638
3639 for (i = optind; i < argc; i++)
3640 try_name (argv[i]);
3641
3642 return 0;
3643 }
3644