xref: /dragonfly/contrib/gmp/mpn/generic/mod_1_2.c (revision 86d7f5d3)
1*86d7f5d3SJohn Marino /* mpn_mod_1s_2p (ap, n, b, cps)
2*86d7f5d3SJohn Marino    Divide (ap,,n) by b.  Return the single-limb remainder.
3*86d7f5d3SJohn Marino    Requires that b < B / 2.
4*86d7f5d3SJohn Marino 
5*86d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
6*86d7f5d3SJohn Marino 
7*86d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
8*86d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
9*86d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10*86d7f5d3SJohn Marino 
11*86d7f5d3SJohn Marino Copyright 2008, 2009 Free Software Foundation, Inc.
12*86d7f5d3SJohn Marino 
13*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
14*86d7f5d3SJohn Marino 
15*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
16*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
17*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
18*86d7f5d3SJohn Marino option) any later version.
19*86d7f5d3SJohn Marino 
20*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
21*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
23*86d7f5d3SJohn Marino License for more details.
24*86d7f5d3SJohn Marino 
25*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
26*86d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
27*86d7f5d3SJohn Marino 
28*86d7f5d3SJohn Marino #include "gmp.h"
29*86d7f5d3SJohn Marino #include "gmp-impl.h"
30*86d7f5d3SJohn Marino #include "longlong.h"
31*86d7f5d3SJohn Marino 
32*86d7f5d3SJohn Marino void
mpn_mod_1s_2p_cps(mp_limb_t cps[5],mp_limb_t b)33*86d7f5d3SJohn Marino mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
34*86d7f5d3SJohn Marino {
35*86d7f5d3SJohn Marino   mp_limb_t bi;
36*86d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb;
37*86d7f5d3SJohn Marino   int cnt;
38*86d7f5d3SJohn Marino 
39*86d7f5d3SJohn Marino   ASSERT (b <= (~(mp_limb_t) 0) / 2);
40*86d7f5d3SJohn Marino 
41*86d7f5d3SJohn Marino   count_leading_zeros (cnt, b);
42*86d7f5d3SJohn Marino 
43*86d7f5d3SJohn Marino   b <<= cnt;
44*86d7f5d3SJohn Marino   invert_limb (bi, b);
45*86d7f5d3SJohn Marino 
46*86d7f5d3SJohn Marino   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
47*86d7f5d3SJohn Marino   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
48*86d7f5d3SJohn Marino   udiv_rnd_preinv (B2modb, B1modb, b, bi);
49*86d7f5d3SJohn Marino   udiv_rnd_preinv (B3modb, B2modb, b, bi);
50*86d7f5d3SJohn Marino 
51*86d7f5d3SJohn Marino   cps[0] = bi;
52*86d7f5d3SJohn Marino   cps[1] = cnt;
53*86d7f5d3SJohn Marino   cps[2] = B1modb >> cnt;
54*86d7f5d3SJohn Marino   cps[3] = B2modb >> cnt;
55*86d7f5d3SJohn Marino   cps[4] = B3modb >> cnt;
56*86d7f5d3SJohn Marino 
57*86d7f5d3SJohn Marino #if WANT_ASSERT
58*86d7f5d3SJohn Marino   {
59*86d7f5d3SJohn Marino     int i;
60*86d7f5d3SJohn Marino     b = cps[2];
61*86d7f5d3SJohn Marino     for (i = 3; i <= 4; i++)
62*86d7f5d3SJohn Marino       {
63*86d7f5d3SJohn Marino 	b += cps[i];
64*86d7f5d3SJohn Marino 	ASSERT (b >= cps[i]);
65*86d7f5d3SJohn Marino       }
66*86d7f5d3SJohn Marino   }
67*86d7f5d3SJohn Marino #endif
68*86d7f5d3SJohn Marino }
69*86d7f5d3SJohn Marino 
70*86d7f5d3SJohn Marino mp_limb_t
mpn_mod_1s_2p(mp_srcptr ap,mp_size_t n,mp_limb_t b,mp_limb_t cps[5])71*86d7f5d3SJohn Marino mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
72*86d7f5d3SJohn Marino {
73*86d7f5d3SJohn Marino   mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
74*86d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb;
75*86d7f5d3SJohn Marino   mp_size_t i;
76*86d7f5d3SJohn Marino   int cnt;
77*86d7f5d3SJohn Marino 
78*86d7f5d3SJohn Marino   ASSERT (n >= 1);
79*86d7f5d3SJohn Marino 
80*86d7f5d3SJohn Marino   B1modb = cps[2];
81*86d7f5d3SJohn Marino   B2modb = cps[3];
82*86d7f5d3SJohn Marino   B3modb = cps[4];
83*86d7f5d3SJohn Marino 
84*86d7f5d3SJohn Marino   if ((n & 1) != 0)
85*86d7f5d3SJohn Marino     {
86*86d7f5d3SJohn Marino       if (n == 1)
87*86d7f5d3SJohn Marino 	{
88*86d7f5d3SJohn Marino 	  rl = ap[n - 1];
89*86d7f5d3SJohn Marino 	  bi = cps[0];
90*86d7f5d3SJohn Marino 	  cnt = cps[1];
91*86d7f5d3SJohn Marino 	  udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
92*86d7f5d3SJohn Marino 			     rl << cnt, b, bi);
93*86d7f5d3SJohn Marino 	  return r >> cnt;
94*86d7f5d3SJohn Marino 	}
95*86d7f5d3SJohn Marino 
96*86d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 2], B1modb);
97*86d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
98*86d7f5d3SJohn Marino       umul_ppmm (rh, rl, ap[n - 1], B2modb);
99*86d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
100*86d7f5d3SJohn Marino       n--;
101*86d7f5d3SJohn Marino     }
102*86d7f5d3SJohn Marino   else
103*86d7f5d3SJohn Marino     {
104*86d7f5d3SJohn Marino       umul_ppmm (rh, rl, ap[n - 1], B1modb);
105*86d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
106*86d7f5d3SJohn Marino     }
107*86d7f5d3SJohn Marino 
108*86d7f5d3SJohn Marino   for (i = n - 4; i >= 0; i -= 2)
109*86d7f5d3SJohn Marino     {
110*86d7f5d3SJohn Marino       /* rr = ap[i]				< B
111*86d7f5d3SJohn Marino 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
112*86d7f5d3SJohn Marino 	    + LO(rr)  * (B^2 mod b)		<= (B-1)(b-1)
113*86d7f5d3SJohn Marino 	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
114*86d7f5d3SJohn Marino       */
115*86d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[i + 1], B1modb);
116*86d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
117*86d7f5d3SJohn Marino 
118*86d7f5d3SJohn Marino       umul_ppmm (ch, cl, rl, B2modb);
119*86d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
120*86d7f5d3SJohn Marino 
121*86d7f5d3SJohn Marino       umul_ppmm (rh, rl, rh, B3modb);
122*86d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
123*86d7f5d3SJohn Marino     }
124*86d7f5d3SJohn Marino 
125*86d7f5d3SJohn Marino   bi = cps[0];
126*86d7f5d3SJohn Marino   cnt = cps[1];
127*86d7f5d3SJohn Marino 
128*86d7f5d3SJohn Marino #if 1
129*86d7f5d3SJohn Marino   umul_ppmm (rh, cl, rh, B1modb);
130*86d7f5d3SJohn Marino   add_ssaaaa (rh, rl, rh, rl, 0, cl);
131*86d7f5d3SJohn Marino   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
132*86d7f5d3SJohn Marino #else
133*86d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
134*86d7f5d3SJohn Marino 		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
135*86d7f5d3SJohn Marino   ASSERT (q <= 2);	/* optimize for small quotient? */
136*86d7f5d3SJohn Marino #endif
137*86d7f5d3SJohn Marino 
138*86d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
139*86d7f5d3SJohn Marino 
140*86d7f5d3SJohn Marino   return r >> cnt;
141*86d7f5d3SJohn Marino }
142