1 /* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
2    inverse, returning quotient and remainder.
3 
4    Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 
41 
42 /* Computes Hensel binary division of {np, 2*n} by {dp, n}.
43 
44    Output:
45 
46       q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
47 
48       r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
49 
50    Stores q at qp. Stores the n least significant limbs of r at the high half
51    of np, and returns the borrow from the subtraction n - q*d.
52 
53    d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
54 
55 mp_size_t
mpn_dcpi1_bdiv_qr_n_itch(mp_size_t n)56 mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
57 {
58   return n;
59 }
60 
61 mp_limb_t
mpn_dcpi1_bdiv_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,mp_limb_t dinv,mp_ptr tp)62 mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
63 		     mp_limb_t dinv, mp_ptr tp)
64 {
65   mp_size_t lo, hi;
66   mp_limb_t cy;
67   mp_limb_t rh;
68 
69   lo = n >> 1;			/* floor(n/2) */
70   hi = n - lo;			/* ceil(n/2) */
71 
72   if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
73     cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
74   else
75     cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
76 
77   mpn_mul (tp, dp + lo, hi, qp, lo);
78 
79   mpn_incr_u (tp + lo, cy);
80   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
81 
82   if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
83     cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
84   else
85     cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
86 
87   mpn_mul (tp, qp + lo, hi, dp + hi, lo);
88 
89   mpn_incr_u (tp + hi, cy);
90   rh += mpn_sub_n (np + n, np + n, tp, n);
91 
92   return rh;
93 }
94 
95 mp_limb_t
mpn_dcpi1_bdiv_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)96 mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
97 		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
98 {
99   mp_size_t qn;
100   mp_limb_t rr, cy;
101   mp_ptr tp;
102   TMP_DECL;
103 
104   TMP_MARK;
105 
106   ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
107   ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
108   ASSERT (dp[0] & 1);
109 
110   tp = TMP_SALLOC_LIMBS (dn);
111 
112   qn = nn - dn;
113 
114   if (qn > dn)
115     {
116       /* Reduce qn mod dn without division, optimizing small operations.  */
117       do
118 	qn -= dn;
119       while (qn > dn);
120 
121       /* Perform the typically smaller block first.  */
122       if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
123 	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
124       else
125 	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
126 
127       rr = 0;
128       if (qn != dn)
129 	{
130 	  if (qn > dn - qn)
131 	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
132 	  else
133 	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
134 	  mpn_incr_u (tp + qn, cy);
135 
136 	  rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
137 	  cy = 0;
138 	}
139 
140       np += qn;
141       qp += qn;
142 
143       qn = nn - dn - qn;
144       do
145 	{
146 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
147 	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
148 	  qp += dn;
149 	  np += dn;
150 	  qn -= dn;
151 	}
152       while (qn > 0);
153       TMP_FREE;
154       return rr + cy;
155     }
156 
157   if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
158     cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
159   else
160     cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
161 
162   rr = 0;
163   if (qn != dn)
164     {
165       if (qn > dn - qn)
166 	mpn_mul (tp, qp, qn, dp + qn, dn - qn);
167       else
168 	mpn_mul (tp, dp + qn, dn - qn, qp, qn);
169       mpn_incr_u (tp + qn, cy);
170 
171       rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
172       cy = 0;
173     }
174 
175   TMP_FREE;
176   return rr + cy;
177 }
178