xref: /netbsd/external/lgpl3/gmp/dist/tune/modlinv.c (revision 671ea119)
1 /* Alternate implementations of binvert_limb to compare speeds. */
2 
3 /*
4 Copyright 2000, 2002 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10 
11   * the GNU Lesser General Public License as published by the Free
12     Software Foundation; either version 3 of the License, or (at your
13     option) any later version.
14 
15 or
16 
17   * the GNU General Public License as published by the Free Software
18     Foundation; either version 2 of the License, or (at your option) any
19     later version.
20 
21 or both in parallel, as here.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26 for more details.
27 
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library.  If not,
30 see https://www.gnu.org/licenses/.  */
31 
32 #include <stdio.h>
33 #include "gmp-impl.h"
34 #include "longlong.h"
35 #include "speed.h"
36 
37 
38 /* Like the standard version in gmp-impl.h, but with the expressions using a
39    "1-" form.  This has the same number of steps, but "1-" is on the
40    dependent chain, whereas the "2*" in the standard version isn't.
41    Depending on the CPU this should be the same or a touch slower.  */
42 
43 #if GMP_LIMB_BITS <= 32
44 #define binvert_limb_mul1(inv,n)                                \
45   do {                                                          \
46     mp_limb_t  __n = (n);                                       \
47     mp_limb_t  __inv;                                           \
48     ASSERT ((__n & 1) == 1);                                    \
49     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
50     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
51     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
52     ASSERT (__inv * __n == 1);                                  \
53     (inv) = __inv;                                              \
54   } while (0)
55 #endif
56 
57 #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
58 #define binvert_limb_mul1(inv,n)                                \
59   do {                                                          \
60     mp_limb_t  __n = (n);                                       \
61     mp_limb_t  __inv;                                           \
62     ASSERT ((__n & 1) == 1);                                    \
63     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
64     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
65     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
66     __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
67     ASSERT (__inv * __n == 1);                                  \
68     (inv) = __inv;                                              \
69   } while (0)
70 #endif
71 
72 
73 /* The loop based version used in GMP 3.0 and earlier.  Usually slower than
74    multiplying, due to the number of steps that must be performed.  Much
75    slower when the processor has a good multiply.  */
76 
77 #define binvert_limb_loop(inv,n)                \
78   do {                                          \
79     mp_limb_t  __v = (n);                       \
80     mp_limb_t  __v_orig = __v;                  \
81     mp_limb_t  __make_zero = 1;                 \
82     mp_limb_t  __two_i = 1;                     \
83     mp_limb_t  __v_inv = 0;                     \
84                                                 \
85     ASSERT ((__v & 1) == 1);                    \
86                                                 \
87     do                                          \
88       {                                         \
89         while ((__two_i & __make_zero) == 0)    \
90           __two_i <<= 1, __v <<= 1;             \
91         __v_inv += __two_i;                     \
92         __make_zero -= __v;                     \
93       }                                         \
94     while (__make_zero);                        \
95                                                 \
96     ASSERT (__v_orig * __v_inv == 1);           \
97     (inv) = __v_inv;                            \
98   } while (0)
99 
100 
101 /* Another loop based version with conditionals, but doing a fixed number of
102    steps. */
103 
104 #define binvert_limb_cond(inv,n)                \
105   do {                                          \
106     mp_limb_t  __n = (n);                       \
107     mp_limb_t  __rem = (1 - __n) >> 1;          \
108     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
109     int        __count;                         \
110                                                 \
111     ASSERT ((__n & 1) == 1);                    \
112                                                 \
113     __count = GMP_LIMB_BITS-1;               \
114     do                                          \
115       {                                         \
116         __inv >>= 1;                            \
117         if (__rem & 1)                          \
118           {                                     \
119             __inv |= GMP_LIMB_HIGHBIT;          \
120             __rem -= __n;                       \
121           }                                     \
122         __rem >>= 1;                            \
123       }                                         \
124     while (-- __count);                         \
125                                                 \
126     ASSERT (__inv * __n == 1);                  \
127     (inv) = __inv;                              \
128   } while (0)
129 
130 
131 /* Another loop based bitwise version, but purely arithmetic, no
132    conditionals. */
133 
134 #define binvert_limb_arith(inv,n)                                       \
135   do {                                                                  \
136     mp_limb_t  __n = (n);                                               \
137     mp_limb_t  __rem = (1 - __n) >> 1;                                  \
138     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
139     mp_limb_t  __lowbit;                                                \
140     int        __count;                                                 \
141                                                                         \
142     ASSERT ((__n & 1) == 1);                                            \
143                                                                         \
144     __count = GMP_LIMB_BITS-1;                                       \
145     do                                                                  \
146       {                                                                 \
147         __lowbit = __rem & 1;                                           \
148         __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
149         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
150       }                                                                 \
151     while (-- __count);                                                 \
152                                                                         \
153     ASSERT (__inv * __n == 1);                                          \
154     (inv) = __inv;                                                      \
155   } while (0)
156 
157 
158 double
speed_binvert_limb_mul1(struct speed_params * s)159 speed_binvert_limb_mul1 (struct speed_params *s)
160 {
161   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
162 }
163 double
speed_binvert_limb_loop(struct speed_params * s)164 speed_binvert_limb_loop (struct speed_params *s)
165 {
166   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
167 }
168 double
speed_binvert_limb_cond(struct speed_params * s)169 speed_binvert_limb_cond (struct speed_params *s)
170 {
171   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
172 }
173 double
speed_binvert_limb_arith(struct speed_params * s)174 speed_binvert_limb_arith (struct speed_params *s)
175 {
176   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
177 }
178