1 /* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
2 
3    THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
4    INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
5 
6 Copyright 1999-2002, 2010 Free Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
12 
13   * the GNU Lesser General Public License as published by the Free
14     Software Foundation; either version 3 of the License, or (at your
15     option) any later version.
16 
17 or
18 
19   * the GNU General Public License as published by the Free Software
20     Foundation; either version 2 of the License, or (at your option) any
21     later version.
22 
23 or both in parallel, as here.
24 
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28 for more details.
29 
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library.  If not,
32 see https://www.gnu.org/licenses/.  */
33 
34 #include "gmp-impl.h"
35 #include "longlong.h"
36 
37 
38 /* Use the simple loop by default.  The generic count_trailing_zeros is not
39    very fast, and the extra trickery of method 3 has proven to be less use
40    than might have been though.  */
41 #ifndef JACOBI_BASE_METHOD
42 #define JACOBI_BASE_METHOD  2
43 #endif
44 
45 
46 /* Use count_trailing_zeros.  */
47 #if JACOBI_BASE_METHOD == 1
48 #define PROCESS_TWOS_ANY                                \
49   {                                                     \
50     mp_limb_t  twos;                                    \
51     count_trailing_zeros (twos, a);                     \
52     result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b);        \
53     a >>= twos;                                         \
54   }
55 #define PROCESS_TWOS_EVEN  PROCESS_TWOS_ANY
56 #endif
57 
58 /* Use a simple loop.  A disadvantage of this is that there's a branch on a
59    50/50 chance of a 0 or 1 low bit.  */
60 #if JACOBI_BASE_METHOD == 2
61 #define PROCESS_TWOS_EVEN               \
62   {                                     \
63     int  two;                           \
64     two = JACOBI_TWO_U_BIT1 (b);        \
65     do                                  \
66       {                                 \
67 	a >>= 1;                        \
68 	result_bit1 ^= two;             \
69 	ASSERT (a != 0);                \
70       }                                 \
71     while ((a & 1) == 0);               \
72   }
73 #define PROCESS_TWOS_ANY        \
74   if ((a & 1) == 0)             \
75     PROCESS_TWOS_EVEN;
76 #endif
77 
78 /* Process one bit arithmetically, then a simple loop.  This cuts the loop
79    condition down to a 25/75 chance, which should branch predict better.
80    The CPU will need a reasonable variable left shift.  */
81 #if JACOBI_BASE_METHOD == 3
82 #define PROCESS_TWOS_EVEN               \
83   {                                     \
84     int  two, mask, shift;              \
85 					\
86     two = JACOBI_TWO_U_BIT1 (b);        \
87     mask = (~a & 2);                    \
88     a >>= 1;                            \
89 					\
90     shift = (~a & 1);                   \
91     a >>= shift;                        \
92     result_bit1 ^= two ^ (two & mask);  \
93 					\
94     while ((a & 1) == 0)                \
95       {                                 \
96 	a >>= 1;                        \
97 	result_bit1 ^= two;             \
98 	ASSERT (a != 0);                \
99       }                                 \
100   }
101 #define PROCESS_TWOS_ANY                \
102   {                                     \
103     int  two, mask, shift;              \
104 					\
105     two = JACOBI_TWO_U_BIT1 (b);        \
106     shift = (~a & 1);                   \
107     a >>= shift;                        \
108 					\
109     mask = shift << 1;                  \
110     result_bit1 ^= (two & mask);        \
111 					\
112     while ((a & 1) == 0)                \
113       {                                 \
114 	a >>= 1;                        \
115 	result_bit1 ^= two;             \
116 	ASSERT (a != 0);                \
117       }                                 \
118   }
119 #endif
120 
121 #if JACOBI_BASE_METHOD < 4
122 /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
123    with a restricted range of inputs accepted, namely b>1, b odd.
124 
125    The initial result_bit1 is taken as a parameter for the convenience of
126    mpz_kronecker_ui() et al.  The sign changes both here and in those
127    routines accumulate nicely in bit 1, see the JACOBI macros.
128 
129    The return value here is the normal +1, 0, or -1.  Note that +1 and -1
130    have bit 1 in the "BIT1" sense, which could be useful if the caller is
131    accumulating it into some extended calculation.
132 
133    Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
134    possible, but a couple of tests suggest it's not a significant speedup,
135    and may even be a slowdown, so what's here is good enough for now. */
136 
137 int
mpn_jacobi_base(mp_limb_t a,mp_limb_t b,int result_bit1)138 mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
139 {
140   ASSERT (b & 1);  /* b odd */
141   ASSERT (b != 1);
142 
143   if (a == 0)
144     return 0;
145 
146   PROCESS_TWOS_ANY;
147   if (a == 1)
148     goto done;
149 
150   if (a >= b)
151     goto a_gt_b;
152 
153   for (;;)
154     {
155       result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
156       MP_LIMB_T_SWAP (a, b);
157 
158     a_gt_b:
159       do
160 	{
161 	  /* working on (a/b), a,b odd, a>=b */
162 	  ASSERT (a & 1);
163 	  ASSERT (b & 1);
164 	  ASSERT (a >= b);
165 
166 	  if ((a -= b) == 0)
167 	    return 0;
168 
169 	  PROCESS_TWOS_EVEN;
170 	  if (a == 1)
171 	    goto done;
172 	}
173       while (a >= b);
174     }
175 
176  done:
177   return JACOBI_BIT1_TO_PN (result_bit1);
178 }
179 #endif
180 
181 #if JACOBI_BASE_METHOD == 4
182 /* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
183  * parameter. We have no need for the convention that the sign is in
184  * bit 1, internally we use bit 0. */
185 
186 /* FIXME: Could try table-based count_trailing_zeros. */
187 int
mpn_jacobi_base(mp_limb_t a,mp_limb_t b,int bit)188 mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
189 {
190   int c;
191 
192   ASSERT (b & 1);
193   ASSERT (b > 1);
194 
195   if (a == 0)
196     /* This is the only line which depends on b > 1 */
197     return 0;
198 
199   bit >>= 1;
200 
201   /* Below, we represent a and b shifted right so that the least
202      significant one bit is implicit. */
203 
204   b >>= 1;
205 
206   count_trailing_zeros (c, a);
207   bit ^= c & (b ^ (b >> 1));
208 
209   /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
210   a >>= c;
211   a >>= 1;
212 
213   do
214     {
215       mp_limb_t t = a - b;
216       mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
217 
218       if (t == 0)
219 	return 0;
220 
221       /* If b > a, invoke reciprocity */
222       bit ^= (bgta & a & b);
223 
224       /* b <-- min (a, b) */
225       b += (bgta & t);
226 
227       /* a <-- |a - b| */
228       a = (t ^ bgta) - bgta;
229 
230       /* Number of trailing zeros is the same no matter if we look at
231        * t or a, but using t gives more parallelism. */
232       count_trailing_zeros (c, t);
233       c ++;
234       /* (2/b) = -1 if b = 3 or 5 mod 8 */
235       bit ^= c & (b ^ (b >> 1));
236       a >>= c;
237     }
238   while (b > 0);
239 
240   return 1-2*(bit & 1);
241 }
242 #endif /* JACOBI_BASE_METHOD == 4 */
243