xref: /dragonfly/contrib/gmp/mpz/fac_ui.c (revision 25a2db75)
1 /* mpz_fac_ui(result, n) -- Set RESULT to N!.
2 
3 Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24 
25 #include "fac_ui.h"
26 
27 
28 static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
29 static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
30 
31 
32 /* must be >=2	*/
33 #define APCONST	5
34 
35 /* for single non-zero limb */
36 #define MPZ_SET_1_NZ(z,n)	\
37   do {				\
38     mpz_ptr  __z = (z);		\
39     ASSERT ((n) != 0);		\
40     PTR(__z)[0] = (n);		\
41     SIZ(__z) = 1;		\
42   } while (0)
43 
44 /* for src>0 and n>0 */
45 #define MPZ_MUL_1_POS(dst,src,n)			\
46   do {							\
47     mpz_ptr    __dst = (dst);				\
48     mpz_srcptr __src = (src);				\
49     mp_size_t  __size = SIZ(__src);			\
50     mp_ptr     __dst_p;					\
51     mp_limb_t  __c;					\
52 							\
53     ASSERT (__size > 0);				\
54     ASSERT ((n) != 0);					\
55 							\
56     MPZ_REALLOC (__dst, __size+1);			\
57     __dst_p = PTR(__dst);				\
58 							\
59     __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n);	\
60     __dst_p[__size] = __c;				\
61     SIZ(__dst) = __size + (__c != 0);			\
62   } while (0)
63 
64 
65 #if BITS_PER_ULONG == GMP_LIMB_BITS
66 #define BSWAP_ULONG(x,y)	BSWAP_LIMB(x,y)
67 #endif
68 
69 /* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
70    by a shift down to get the high part.  But it provoked incorrect code
71    from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode.  This
72    case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
73    can get that directly muxing a 4-byte ulong if it matters enough.  */
74 
75 #if ! defined (BSWAP_ULONG)
76 #define BSWAP_ULONG(dst, src)						\
77   do {									\
78     unsigned long  __bswapl_src = (src);				\
79     unsigned long  __bswapl_dst = 0;					\
80     int	       __i;							\
81     for (__i = 0; __i < sizeof(unsigned long); __i++)			\
82       {									\
83 	__bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF);	\
84 	__bswapl_src >>= 8;						\
85       }									\
86     (dst) = __bswapl_dst;						\
87   } while (0)
88 #endif
89 
90 /* x is bit reverse of y */
91 /* Note the divides below are all exact */
92 #define BITREV_ULONG(x,y)						   \
93   do {									   \
94    unsigned long __dst;							   \
95    BSWAP_ULONG(__dst,y);						   \
96    __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
97    __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4)  ); \
98    __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2)  ); \
99    (x) = __dst;								   \
100   } while(0)
101 /* above could be improved if cpu has a nibble/bit swap/muxing instruction */
102 /* above code is serialized, possible to write as a big parallel expression */
103 
104 
105 
106 void
107 mpz_fac_ui (mpz_ptr x, unsigned long n)
108 {
109   unsigned long z, stt;
110   int i, j;
111   mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
112   mp_limb_t d[4];
113 
114   static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
115 
116   if (n < numberof (table))
117     {
118       MPZ_SET_1_NZ (x, table[n]);
119       return;
120     }
121 
122   /*  NOTE : MUST have n>=3 here */
123   ASSERT (n >= 3);
124   /* for estimating the alloc sizes the calculation of these formula's is not
125      exact and also the formulas are only approximations, also we ignore
126      the few "side" calculations, correct allocation seems to speed up the
127      small sizes better, having very little effect on the large sizes */
128 
129   /* estimate space for stack entries see below
130      number of bits for n! is
131      (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
132      2.325748065-n*1.442695041+(n+0.5)*log_2(n)  */
133   umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
134   /* d[1] is 2n/e, d[0] ignored        */
135   count_leading_zeros (z, d[1]);
136   z = GMP_LIMB_BITS - z - 1;	/* z=floor(log_2(2n/e))   */
137   umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
138   /* d=n*floor(log_2(2n/e))   */
139   d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
140   d[1] >>= 2;
141   /* d=n*floor(log_2(2n/e))/4   */
142   z = d[0] + 1;			/* have to ignore any overflow */
143   /* so z is the number of bits wanted for st[0]    */
144 
145 
146   if (n <= ((unsigned long) 1) << (APCONST))
147     {
148       mpz_realloc2 (x, 4 * z);
149       ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
150       return;
151     }
152   if (n <= ((unsigned long) 1) << (APCONST + 1))
153     {				/*  use n!=odd(1,n)*(n/2)!*2^(n/2)         */
154       mpz_init2 (t1, 2 * z);
155       mpz_realloc2 (x, 4 * z);
156       ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
157       ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
158       mpz_mul (x, x, t1);
159       mpz_clear (t1);
160       mpz_mul_2exp (x, x, n / 2);
161       return;
162     }
163   if (n <= ((unsigned long) 1) << (APCONST + 2))
164     {
165       /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
166 	 so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
167       mpz_init2 (t1, 2 * z);
168       mpz_realloc2 (x, 4 * z);
169       for (i = 0; i < 4; i++)
170 	{
171 	  mpz_init2 (st[i], z);
172 	  z >>= 1;
173 	}
174       odd_product (1, n / 2, st);
175       mpz_set (x, st[0]);
176       odd_product (n / 2, n, st);
177       mpz_mul (x, x, x);
178       ASSERT (n / 4 <= FACMUL4 + 6);
179       ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
180       /* must have 2^APCONST odd numbers max */
181       mpz_mul (t1, t1, st[0]);
182       for (i = 0; i < 4; i++)
183 	mpz_clear (st[i]);
184       mpz_mul (x, x, t1);
185       mpz_clear (t1);
186       mpz_mul_2exp (x, x, n / 2 + n / 4);
187       return;
188     }
189 
190   count_leading_zeros (stt, (mp_limb_t) n);
191   stt = GMP_LIMB_BITS - stt + 1 - APCONST;
192 
193   for (i = 0; i < (signed long) stt; i++)
194     {
195       mpz_init2 (st[i], z);
196       z >>= 1;
197     }
198 
199   count_leading_zeros (z, (mp_limb_t) (n / 3));
200   /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
201   z = GMP_LIMB_BITS - z;
202 
203   /*
204      n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
205      where 2^e || n!   3.2^z>n   C_2(a,b)=PRODUCT of odd z such that a<z<=b
206    */
207 
208 
209   mpz_init_set_ui (t1, 1);
210   for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
211     {
212       MPZ_SET_1_NZ (x, 1);
213       for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
214 	if ((signed long) z >= i)
215 	  {
216 	    odd_product (n >> i, n >> (i - 1), st);
217 	    /* largest odd product when j=i=1 then we have
218 	       odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
219 	       so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
220 	       number of bits is n*log_base2(2n/e)/4+1  */
221 	    if (i != j)
222 	      mpz_pow_ui (st[0], st[0], i / j);
223 	    mpz_mul (x, x, st[0]);
224 	  }
225       if ((signed long) z >= j && j != 1)
226 	{
227 	  mpz_mul (t1, t1, x);
228 	  mpz_mul (t1, t1, t1);
229 	}
230     }
231   for (i = 0; i < (signed long) stt; i++)
232     mpz_clear (st[i]);
233   mpz_mul (x, x, t1);
234   mpz_clear (t1);
235   popc_limb (i, (mp_limb_t) n);
236   mpz_mul_2exp (x, x, n - i);
237   return;
238 }
239 
240 /* start,step are mp_limb_t although they will fit in unsigned long	*/
241 static void
242 ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
243 		  unsigned long count, unsigned long nm)
244 {
245   unsigned long a;
246   mp_limb_t b;
247 
248   ASSERT (count <= (((unsigned long) 1) << APCONST));
249 /* count can never be zero ? check this and remove test below */
250   if (count == 0)
251     {
252       MPZ_SET_1_NZ (ret, 1);
253       return;
254     }
255   if (count == 1)
256     {
257       MPZ_SET_1_NZ (ret, start);
258       return;
259     }
260   switch (nm)
261     {
262     case 1:
263       MPZ_SET_1_NZ (ret, start);
264       b = start + step;
265       for (a = 0; a < count - 1; b += step, a++)
266 	MPZ_MUL_1_POS (ret, ret, b);
267       return;
268     case 2:
269       MPZ_SET_1_NZ (ret, start * (start + step));
270       if (count == 2)
271 	return;
272       for (b = start + 2 * step, a = count / 2 - 1; a != 0;
273 	   a--, b += 2 * step)
274 	MPZ_MUL_1_POS (ret, ret, b * (b + step));
275       if (count % 2 == 1)
276 	MPZ_MUL_1_POS (ret, ret, b);
277       return;
278     case 3:
279       if (count == 2)
280 	{
281 	  MPZ_SET_1_NZ (ret, start * (start + step));
282 	  return;
283 	}
284       MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
285       if (count == 3)
286 	return;
287       for (b = start + 3 * step, a = count / 3 - 1; a != 0;
288 	   a--, b += 3 * step)
289 	MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
290       if (count % 3 == 2)
291 	b = b * (b + step);
292       if (count % 3 != 0)
293 	MPZ_MUL_1_POS (ret, ret, b);
294       return;
295     default:			/* ie nm=4      */
296       if (count == 2)
297 	{
298 	  MPZ_SET_1_NZ (ret, start * (start + step));
299 	  return;
300 	}
301       if (count == 3)
302 	{
303 	  MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
304 	  return;
305 	}
306       MPZ_SET_1_NZ (ret,
307 		    start * (start + step) * (start + 2 * step) * (start +
308 								   3 * step));
309       if (count == 4)
310 	return;
311       for (b = start + 4 * step, a = count / 4 - 1; a != 0;
312 	   a--, b += 4 * step)
313 	MPZ_MUL_1_POS (ret, ret,
314 		       b * (b + step) * (b + 2 * step) * (b + 3 * step));
315       if (count % 4 == 2)
316 	b = b * (b + step);
317       if (count % 4 == 3)
318 	b = b * (b + step) * (b + 2 * step);
319       if (count % 4 != 0)
320 	MPZ_MUL_1_POS (ret, ret, b);
321       return;
322     }
323 }
324 
325 /* return value in st[0]
326    odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
327    so st[0] needs enough bits for above, st[1] needs half these bits and
328    st[2] needs 1/4 of these bits etc	*/
329 static void
330 odd_product (unsigned long low, unsigned long high, mpz_t * st)
331 {
332   unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
333   signed long z;
334 
335   low++;
336   if (low % 2 == 0)
337     low++;
338   if (high == 0)
339     high = 1;
340   if (high % 2 == 0)
341     high--;
342 /* must have high>=low ? check this and remove test below */
343   if (high < low)
344     {
345       MPZ_SET_1_NZ (st[0], 1);
346       return;
347     }
348   if (high == low)
349     {
350       MPZ_SET_1_NZ (st[0], low);
351       return;
352     }
353   if (high <= FACMUL2 + 2)
354     {
355       nm = 2;
356       if (high <= FACMUL3 + 4)
357 	{
358 	  nm = 3;
359 	  if (high <= FACMUL4 + 6)
360 	    nm = 4;
361 	}
362     }
363   high = (high - low) / 2 + 1;	/* high is now count,high<=2^(BITS_PER_ULONG-1) */
364   if (high <= (((unsigned long) 1) << APCONST))
365     {
366       ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
367       return;
368     }
369   count_leading_zeros (n, (mp_limb_t) high);
370 /* assumes clz above is LIMB based not NUMB based */
371   n = GMP_LIMB_BITS - n - APCONST;
372   mask = (((unsigned long) 1) << n);
373   a = mask << 1;
374   mask--;
375 /* have 2^(BITS_IN_N-APCONST) iterations so need
376    (BITS_IN_N-APCONST+1) stack entries	*/
377   for (z = mask; z >= 0; z--)
378     {
379       BITREV_ULONG (y, z);
380       y >>= (BITS_PER_ULONG - n);
381       ap_product_small (st[stn],
382 			(mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
383 			(high + y) >> n, nm);
384       ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
385       stn++;
386       y = stc++;
387       while ((y & 1) == 0)
388 	{
389 	  mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
390 	  stn--;
391 	  y >>= 1;
392 	}
393     }
394   ASSERT (stn == 1);
395   return;
396 }
397