xref: /dragonfly/contrib/gmp/mpn/generic/div_q.c (revision cf89a63b)
1 /* mpn_div_q -- division for arbitrary size operands.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8 
9 Copyright 2009, 2010 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 #include "gmp.h"
27 #include "gmp-impl.h"
28 #include "longlong.h"
29 
30 
31 /* Compute Q = N/D with truncation.
32      N = {np,nn}
33      D = {dp,dn}
34      Q = {qp,nn-dn+1}
35      T = {scratch,nn+1} is scratch space
36    N and D are both untouched by the computation.
37    N and T may overlap; pass the same space if N is irrelevant after the call,
38    but note that tp needs an extra limb.
39 
40    Operand requirements:
41      N >= D > 0
42      dp[dn-1] != 0
43      No overlap between the N, D, and Q areas.
44 
45    This division function does not clobber its input operands, since it is
46    intended to support average-O(qn) division, and for that to be effective, it
47    cannot put requirements on callers to copy a O(nn) operand.
48 
49    If a caller does not care about the value of {np,nn+1} after calling this
50    function, it should pass np also for the scratch argument.  This function
51    will then save some time and space by avoiding allocation and copying.
52    (FIXME: Is this a good design?  We only really save any copying for
53    already-normalised divisors, which should be rare.  It also prevents us from
54    reasonably asking for all scratch space we need.)
55 
56    We write nn-dn+1 limbs for the quotient, but return void.  Why not return
57    the most significant quotient limb?  Look at the 4 main code blocks below
58    (consisting of an outer if-else where each arm contains an if-else). It is
59    tricky for the first code block, since the mpn_*_div_q calls will typically
60    generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
61    we generate the most significant quotient limb here, before calling
62    mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
63    critical division case (the SB sub-case in particular) copying is not a good
64    idea.
65 
66    It might make sense to split the if-else parts of the (qn + FUDGE
67    >= dn) blocks into separate functions, since we could promise quite
68    different things to callers in these two cases.  The 'then' case
69    benefits from np=scratch, and it could perhaps even tolerate qp=np,
70    saving some headache for many callers.
71 
72    FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
73    operands, we do not reuse the huge scratch for adjustments.  This can be a
74    serious waste of memory for the largest operands.
75 */
76 
77 /* FUDGE determines when to try getting an approximate quotient from the upper
78    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
79    for the code to be correct.  */
80 #define FUDGE 5			/* FIXME: tune this */
81 
82 #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
83 #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
84 #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
85 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
86 #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
87 #endif
88 
89 void
90 mpn_div_q (mp_ptr qp,
91 	   mp_srcptr np, mp_size_t nn,
92 	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
93 {
94   mp_ptr new_dp, new_np, tp, rp;
95   mp_limb_t cy, dh, qh;
96   mp_size_t new_nn, qn;
97   gmp_pi1_t dinv;
98   int cnt;
99   TMP_DECL;
100   TMP_MARK;
101 
102   ASSERT (nn >= dn);
103   ASSERT (dn > 0);
104   ASSERT (dp[dn - 1] != 0);
105   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
106   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
107   ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
108 
109   ASSERT_ALWAYS (FUDGE >= 2);
110 
111   if (dn == 1)
112     {
113       mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
114       return;
115     }
116 
117   qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
118 
119   if (qn + FUDGE >= dn)
120     {
121       /* |________________________|
122                           |_______|  */
123       new_np = scratch;
124 
125       dh = dp[dn - 1];
126       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
127 	{
128 	  count_leading_zeros (cnt, dh);
129 
130 	  cy = mpn_lshift (new_np, np, nn, cnt);
131 	  new_np[nn] = cy;
132 	  new_nn = nn + (cy != 0);
133 
134 	  new_dp = TMP_ALLOC_LIMBS (dn);
135 	  mpn_lshift (new_dp, dp, dn, cnt);
136 
137 	  if (dn == 2)
138 	    {
139 	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
140 	    }
141 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
142 		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
143 	    {
144 	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
145 	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
146 	    }
147 	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
148 		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
149 		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
150 		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
151 	    {
152 	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
153 	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
154 	    }
155 	  else
156 	    {
157 	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
158 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
159 	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
160 	    }
161 	  if (cy == 0)
162 	    qp[qn - 1] = qh;
163 	  else if (UNLIKELY (qh != 0))
164 	    {
165 	      /* This happens only when the quotient is close to B^n and
166 		 mpn_*_divappr_q returned B^n.  */
167 	      mp_size_t i, n;
168 	      n = new_nn - dn;
169 	      for (i = 0; i < n; i++)
170 		qp[i] = GMP_NUMB_MAX;
171 	      qh = 0;		/* currently ignored */
172 	    }
173 	}
174       else  /* divisor is already normalised */
175 	{
176 	  if (new_np != np)
177 	    MPN_COPY (new_np, np, nn);
178 
179 	  if (dn == 2)
180 	    {
181 	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
182 	    }
183 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
184 		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
185 	    {
186 	      invert_pi1 (dinv, dh, dp[dn - 2]);
187 	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
188 	    }
189 	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
190 		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
191 		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
192 		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
193 	    {
194 	      invert_pi1 (dinv, dh, dp[dn - 2]);
195 	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
196 	    }
197 	  else
198 	    {
199 	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
200 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
201 	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
202 	    }
203 	  qp[nn - dn] = qh;
204 	}
205     }
206   else
207     {
208       /* |________________________|
209                 |_________________|  */
210       tp = TMP_ALLOC_LIMBS (qn + 1);
211 
212       new_np = scratch;
213       new_nn = 2 * qn + 1;
214       if (new_np == np)
215 	/* We need {np,nn} to remain untouched until the final adjustment, so
216 	   we need to allocate separate space for new_np.  */
217 	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
218 
219 
220       dh = dp[dn - 1];
221       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
222 	{
223 	  count_leading_zeros (cnt, dh);
224 
225 	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
226 	  new_np[new_nn] = cy;
227 
228 	  new_nn += (cy != 0);
229 
230 	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
231 	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
232 	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
233 
234 	  if (qn + 1 == 2)
235 	    {
236 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
237 	    }
238 	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
239 	    {
240 	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
241 	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
242 	    }
243 	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
244 	    {
245 	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
246 	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
247 	    }
248 	  else
249 	    {
250 	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
251 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
252 	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
253 	    }
254 	  if (cy == 0)
255 	    tp[qn] = qh;
256 	  else if (UNLIKELY (qh != 0))
257 	    {
258 	      /* This happens only when the quotient is close to B^n and
259 		 mpn_*_divappr_q returned B^n.  */
260 	      mp_size_t i, n;
261 	      n = new_nn - (qn + 1);
262 	      for (i = 0; i < n; i++)
263 		tp[i] = GMP_NUMB_MAX;
264 	      qh = 0;		/* currently ignored */
265 	    }
266 	}
267       else  /* divisor is already normalised */
268 	{
269 	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
270 
271 	  new_dp = (mp_ptr) dp + dn - (qn + 1);
272 
273 	  if (qn == 2 - 1)
274 	    {
275 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
276 	    }
277 	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
278 	    {
279 	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
280 	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
281 	    }
282 	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
283 	    {
284 	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
285 	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
286 	    }
287 	  else
288 	    {
289 	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
290 	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
291 	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
292 	    }
293 	  tp[qn] = qh;
294 	}
295 
296       MPN_COPY (qp, tp + 1, qn);
297       if (tp[0] <= 4)
298         {
299 	  mp_size_t rn;
300 
301           rp = TMP_ALLOC_LIMBS (dn + qn);
302           mpn_mul (rp, dp, dn, tp + 1, qn);
303 	  rn = dn + qn;
304 	  rn -= rp[rn - 1] == 0;
305 
306           if (rn > nn || mpn_cmp (np, rp, nn) < 0)
307             mpn_decr_u (qp, 1);
308         }
309     }
310 
311   TMP_FREE;
312 }
313