1 /* mpz_fac_ui(result, n) -- Set RESULT to N!.
2
3 Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
4 Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24
25 #include "fac_ui.h"
26
27
28 static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
29 static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
30
31
32 /* must be >=2 */
33 #define APCONST 5
34
35 /* for single non-zero limb */
36 #define MPZ_SET_1_NZ(z,n) \
37 do { \
38 mpz_ptr __z = (z); \
39 ASSERT ((n) != 0); \
40 PTR(__z)[0] = (n); \
41 SIZ(__z) = 1; \
42 } while (0)
43
44 /* for src>0 and n>0 */
45 #define MPZ_MUL_1_POS(dst,src,n) \
46 do { \
47 mpz_ptr __dst = (dst); \
48 mpz_srcptr __src = (src); \
49 mp_size_t __size = SIZ(__src); \
50 mp_ptr __dst_p; \
51 mp_limb_t __c; \
52 \
53 ASSERT (__size > 0); \
54 ASSERT ((n) != 0); \
55 \
56 MPZ_REALLOC (__dst, __size+1); \
57 __dst_p = PTR(__dst); \
58 \
59 __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n); \
60 __dst_p[__size] = __c; \
61 SIZ(__dst) = __size + (__c != 0); \
62 } while (0)
63
64
65 #if BITS_PER_ULONG == GMP_LIMB_BITS
66 #define BSWAP_ULONG(x,y) BSWAP_LIMB(x,y)
67 #endif
68
69 /* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
70 by a shift down to get the high part. But it provoked incorrect code
71 from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode. This
72 case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
73 can get that directly muxing a 4-byte ulong if it matters enough. */
74
75 #if ! defined (BSWAP_ULONG)
76 #define BSWAP_ULONG(dst, src) \
77 do { \
78 unsigned long __bswapl_src = (src); \
79 unsigned long __bswapl_dst = 0; \
80 int __i; \
81 for (__i = 0; __i < sizeof(unsigned long); __i++) \
82 { \
83 __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF); \
84 __bswapl_src >>= 8; \
85 } \
86 (dst) = __bswapl_dst; \
87 } while (0)
88 #endif
89
90 /* x is bit reverse of y */
91 /* Note the divides below are all exact */
92 #define BITREV_ULONG(x,y) \
93 do { \
94 unsigned long __dst; \
95 BSWAP_ULONG(__dst,y); \
96 __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
97 __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4) ); \
98 __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2) ); \
99 (x) = __dst; \
100 } while(0)
101 /* above could be improved if cpu has a nibble/bit swap/muxing instruction */
102 /* above code is serialized, possible to write as a big parallel expression */
103
104
105
106 void
mpz_fac_ui(mpz_ptr x,unsigned long n)107 mpz_fac_ui (mpz_ptr x, unsigned long n)
108 {
109 unsigned long z, stt;
110 int i, j;
111 mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
112 mp_limb_t d[4];
113
114 static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
115
116 if (n < numberof (table))
117 {
118 MPZ_SET_1_NZ (x, table[n]);
119 return;
120 }
121
122 /* NOTE : MUST have n>=3 here */
123 ASSERT (n >= 3);
124 /* for estimating the alloc sizes the calculation of these formula's is not
125 exact and also the formulas are only approximations, also we ignore
126 the few "side" calculations, correct allocation seems to speed up the
127 small sizes better, having very little effect on the large sizes */
128
129 /* estimate space for stack entries see below
130 number of bits for n! is
131 (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
132 2.325748065-n*1.442695041+(n+0.5)*log_2(n) */
133 umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
134 /* d[1] is 2n/e, d[0] ignored */
135 count_leading_zeros (z, d[1]);
136 z = GMP_LIMB_BITS - z - 1; /* z=floor(log_2(2n/e)) */
137 umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
138 /* d=n*floor(log_2(2n/e)) */
139 d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
140 d[1] >>= 2;
141 /* d=n*floor(log_2(2n/e))/4 */
142 z = d[0] + 1; /* have to ignore any overflow */
143 /* so z is the number of bits wanted for st[0] */
144
145
146 if (n <= ((unsigned long) 1) << (APCONST))
147 {
148 mpz_realloc2 (x, 4 * z);
149 ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
150 return;
151 }
152 if (n <= ((unsigned long) 1) << (APCONST + 1))
153 { /* use n!=odd(1,n)*(n/2)!*2^(n/2) */
154 mpz_init2 (t1, 2 * z);
155 mpz_realloc2 (x, 4 * z);
156 ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
157 ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
158 mpz_mul (x, x, t1);
159 mpz_clear (t1);
160 mpz_mul_2exp (x, x, n / 2);
161 return;
162 }
163 if (n <= ((unsigned long) 1) << (APCONST + 2))
164 {
165 /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
166 so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
167 mpz_init2 (t1, 2 * z);
168 mpz_realloc2 (x, 4 * z);
169 for (i = 0; i < 4; i++)
170 {
171 mpz_init2 (st[i], z);
172 z >>= 1;
173 }
174 odd_product (1, n / 2, st);
175 mpz_set (x, st[0]);
176 odd_product (n / 2, n, st);
177 mpz_mul (x, x, x);
178 ASSERT (n / 4 <= FACMUL4 + 6);
179 ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
180 /* must have 2^APCONST odd numbers max */
181 mpz_mul (t1, t1, st[0]);
182 for (i = 0; i < 4; i++)
183 mpz_clear (st[i]);
184 mpz_mul (x, x, t1);
185 mpz_clear (t1);
186 mpz_mul_2exp (x, x, n / 2 + n / 4);
187 return;
188 }
189
190 count_leading_zeros (stt, (mp_limb_t) n);
191 stt = GMP_LIMB_BITS - stt + 1 - APCONST;
192
193 for (i = 0; i < (signed long) stt; i++)
194 {
195 mpz_init2 (st[i], z);
196 z >>= 1;
197 }
198
199 count_leading_zeros (z, (mp_limb_t) (n / 3));
200 /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
201 z = GMP_LIMB_BITS - z;
202
203 /*
204 n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
205 where 2^e || n! 3.2^z>n C_2(a,b)=PRODUCT of odd z such that a<z<=b
206 */
207
208
209 mpz_init_set_ui (t1, 1);
210 for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
211 {
212 MPZ_SET_1_NZ (x, 1);
213 for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
214 if ((signed long) z >= i)
215 {
216 odd_product (n >> i, n >> (i - 1), st);
217 /* largest odd product when j=i=1 then we have
218 odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
219 so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
220 number of bits is n*log_base2(2n/e)/4+1 */
221 if (i != j)
222 mpz_pow_ui (st[0], st[0], i / j);
223 mpz_mul (x, x, st[0]);
224 }
225 if ((signed long) z >= j && j != 1)
226 {
227 mpz_mul (t1, t1, x);
228 mpz_mul (t1, t1, t1);
229 }
230 }
231 for (i = 0; i < (signed long) stt; i++)
232 mpz_clear (st[i]);
233 mpz_mul (x, x, t1);
234 mpz_clear (t1);
235 popc_limb (i, (mp_limb_t) n);
236 mpz_mul_2exp (x, x, n - i);
237 return;
238 }
239
240 /* start,step are mp_limb_t although they will fit in unsigned long */
241 static void
ap_product_small(mpz_t ret,mp_limb_t start,mp_limb_t step,unsigned long count,unsigned long nm)242 ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
243 unsigned long count, unsigned long nm)
244 {
245 unsigned long a;
246 mp_limb_t b;
247
248 ASSERT (count <= (((unsigned long) 1) << APCONST));
249 /* count can never be zero ? check this and remove test below */
250 if (count == 0)
251 {
252 MPZ_SET_1_NZ (ret, 1);
253 return;
254 }
255 if (count == 1)
256 {
257 MPZ_SET_1_NZ (ret, start);
258 return;
259 }
260 switch (nm)
261 {
262 case 1:
263 MPZ_SET_1_NZ (ret, start);
264 b = start + step;
265 for (a = 0; a < count - 1; b += step, a++)
266 MPZ_MUL_1_POS (ret, ret, b);
267 return;
268 case 2:
269 MPZ_SET_1_NZ (ret, start * (start + step));
270 if (count == 2)
271 return;
272 for (b = start + 2 * step, a = count / 2 - 1; a != 0;
273 a--, b += 2 * step)
274 MPZ_MUL_1_POS (ret, ret, b * (b + step));
275 if (count % 2 == 1)
276 MPZ_MUL_1_POS (ret, ret, b);
277 return;
278 case 3:
279 if (count == 2)
280 {
281 MPZ_SET_1_NZ (ret, start * (start + step));
282 return;
283 }
284 MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
285 if (count == 3)
286 return;
287 for (b = start + 3 * step, a = count / 3 - 1; a != 0;
288 a--, b += 3 * step)
289 MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
290 if (count % 3 == 2)
291 b = b * (b + step);
292 if (count % 3 != 0)
293 MPZ_MUL_1_POS (ret, ret, b);
294 return;
295 default: /* ie nm=4 */
296 if (count == 2)
297 {
298 MPZ_SET_1_NZ (ret, start * (start + step));
299 return;
300 }
301 if (count == 3)
302 {
303 MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
304 return;
305 }
306 MPZ_SET_1_NZ (ret,
307 start * (start + step) * (start + 2 * step) * (start +
308 3 * step));
309 if (count == 4)
310 return;
311 for (b = start + 4 * step, a = count / 4 - 1; a != 0;
312 a--, b += 4 * step)
313 MPZ_MUL_1_POS (ret, ret,
314 b * (b + step) * (b + 2 * step) * (b + 3 * step));
315 if (count % 4 == 2)
316 b = b * (b + step);
317 if (count % 4 == 3)
318 b = b * (b + step) * (b + 2 * step);
319 if (count % 4 != 0)
320 MPZ_MUL_1_POS (ret, ret, b);
321 return;
322 }
323 }
324
325 /* return value in st[0]
326 odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
327 so st[0] needs enough bits for above, st[1] needs half these bits and
328 st[2] needs 1/4 of these bits etc */
329 static void
odd_product(unsigned long low,unsigned long high,mpz_t * st)330 odd_product (unsigned long low, unsigned long high, mpz_t * st)
331 {
332 unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
333 signed long z;
334
335 low++;
336 if (low % 2 == 0)
337 low++;
338 if (high == 0)
339 high = 1;
340 if (high % 2 == 0)
341 high--;
342 /* must have high>=low ? check this and remove test below */
343 if (high < low)
344 {
345 MPZ_SET_1_NZ (st[0], 1);
346 return;
347 }
348 if (high == low)
349 {
350 MPZ_SET_1_NZ (st[0], low);
351 return;
352 }
353 if (high <= FACMUL2 + 2)
354 {
355 nm = 2;
356 if (high <= FACMUL3 + 4)
357 {
358 nm = 3;
359 if (high <= FACMUL4 + 6)
360 nm = 4;
361 }
362 }
363 high = (high - low) / 2 + 1; /* high is now count,high<=2^(BITS_PER_ULONG-1) */
364 if (high <= (((unsigned long) 1) << APCONST))
365 {
366 ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
367 return;
368 }
369 count_leading_zeros (n, (mp_limb_t) high);
370 /* assumes clz above is LIMB based not NUMB based */
371 n = GMP_LIMB_BITS - n - APCONST;
372 mask = (((unsigned long) 1) << n);
373 a = mask << 1;
374 mask--;
375 /* have 2^(BITS_IN_N-APCONST) iterations so need
376 (BITS_IN_N-APCONST+1) stack entries */
377 for (z = mask; z >= 0; z--)
378 {
379 BITREV_ULONG (y, z);
380 y >>= (BITS_PER_ULONG - n);
381 ap_product_small (st[stn],
382 (mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
383 (high + y) >> n, nm);
384 ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
385 stn++;
386 y = stc++;
387 while ((y & 1) == 0)
388 {
389 mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
390 stn--;
391 y >>= 1;
392 }
393 }
394 ASSERT (stn == 1);
395 return;
396 }
397