1 /* mpn_tdiv_q -- division for arbitrary size operands.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5 Copyright 2009, 2010 Free Software Foundation, Inc.
6 
7 Copyright 2010 William Hart (modified to work with MPIR functions).
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include "mpir.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 
28 /* The DIV_QR_THRESHOLDs should be replaced with DIV_Q_THRESHOLDs */
29 
30 /* Compute Q = N/D with truncation.
31      N = {np,nn}
32      D = {dp,dn}
33      Q = {qp,nn-dn+1}
34      T = {scratch,nn+1} is scratch space
35    N and D are both untouched by the computation.
36    N and T may overlap; pass the same space if N is irrelevant after the call,
37    but note that tp needs an extra limb.
38 
39    Operand requirements:
40      N >= D > 0
41      dp[dn-1] != 0
42      No overlap between the N, D, and Q areas.
43 
44    This division function does not clobber its input operands, since it is
45    intended to support average-O(qn) division, and for that to be effective, it
46    cannot put requirements on callers to copy a O(nn) operand.
47 
48    If a caller does not care about the value of {np,nn+1} after calling this
49    function, it should pass np also for the scratch argument.  This function
50    will then save some time and space by avoiding allocation and copying.
51    (FIXME: Is this a good design?  We only really save any copying for
52    already-normalised divisors, which should be rare.  It also prevents us from
53    reasonably asking for all scratch space we need.)
54 
55    We write nn-dn+1 limbs for the quotient, but return void.  Why not return
56    the most significant quotient limb?  Look at the 4 main code blocks below
57    (consisting of an outer if-else where each arm contains an if-else). It is
58    tricky for the first code block, since the mpn_*_div_q calls will typically
59    generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
60    we generate the most significant quotient limb here, before calling
61    mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
62    critical division case (the SB sub-case in particular) copying is not a good
63    idea.
64 
65    It might make sense to split the if-else parts of the (qn + FUDGE
66    >= dn) blocks into separate functions, since we could promise quite
67    different things to callers in these two cases.  The 'then' case
68    benefits from np=scratch, and it could perhaps even tolerate qp=np,
69    saving some headache for many callers.
70 
71    FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
72    operands, we do not reuse the huge scratch for adjustments.  This can be a
73    serious waste of memory for the largest operands.
74 */
75 
76 /* FUDGE determines when to try getting an approximate quotient from the upper
77    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
78    for the code to be correct.  */
79 #define FUDGE 5			/* FIXME: tune this */
80 
81 void
mpn_tdiv_q(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)82 mpn_tdiv_q (mp_ptr qp,
83 	   mp_srcptr np, mp_size_t nn,
84 	   mp_srcptr dp, mp_size_t dn)
85 {
86   mp_ptr new_dp, new_np, tp, rp, scratch;
87   mp_limb_t cy, dh, qh;
88   mp_size_t new_nn, qn;
89   mp_limb_t dinv;
90   int cnt;
91   TMP_DECL;
92   TMP_MARK;
93 
94   ASSERT (nn >= dn);
95   ASSERT (dn > 0);
96   ASSERT (dp[dn - 1] != 0);
97   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
98   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
99 
100   ASSERT_ALWAYS (FUDGE >= 2);
101 
102   if (dn == 1)
103     {
104       mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
105       return;
106     }
107 
108   scratch = TMP_ALLOC_LIMBS(nn + 1);
109 
110   qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
111 
112   if (qn + FUDGE >= dn)
113     {
114       /* |________________________|
115                           |_______|  */
116       new_np = scratch;
117 
118       dh = dp[dn - 1];
119       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
120 	{
121 	  count_leading_zeros (cnt, dh);
122 
123 	  cy = mpn_lshift (new_np, np, nn, cnt);
124 	  new_np[nn] = cy;
125 	  new_nn = nn + (cy != 0);
126 
127 	  new_dp = TMP_ALLOC_LIMBS (dn);
128 	  mpn_lshift (new_dp, dp, dn, cnt);
129 
130 	  if (dn == 2)
131 	    {
132 	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
133 	    }
134 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
135 		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
136 	    {
137           mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
138 	      qh = mpn_sb_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
139 	    }
140 	  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) ||
141 		   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))
142 	    {
143           mpir_invert_pi1(dinv, new_dp[dn - 1], new_dp[dn - 2]);
144           qh = mpn_dc_div_q (qp, new_np, new_nn, new_dp, dn, dinv);
145 	    }
146 	  else
147 	    {
148            mp_ptr inv = TMP_ALLOC_LIMBS(dn);
149            mpn_invert(inv, new_dp, dn);
150            qh = mpn_inv_div_q (qp, new_np, new_nn, new_dp, dn, inv);
151 	    }
152 	  if (cy == 0)
153 	    qp[qn - 1] = qh;
154 	  else if (UNLIKELY (qh != 0))
155 	    {
156 	      /* This happens only when the quotient is close to B^n and
157 		 mpn_*_divappr_q returned B^n.  */
158 	      mp_size_t i, n;
159 	      n = new_nn - dn;
160 	      for (i = 0; i < n; i++)
161 		qp[i] = GMP_NUMB_MAX;
162 	      qh = 0;		/* currently ignored */
163 	    }
164 	}
165       else  /* divisor is already normalised */
166 	{
167 	  MPN_COPY (new_np, np, nn);
168 
169 	  if (dn == 2)
170 	    {
171 	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
172 	    }
173 	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
174 		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
175 	    {
176            mpir_invert_pi1(dinv, dh, dp[dn - 2]);
177            qh = mpn_sb_div_q (qp, new_np, nn, dp, dn, dinv);
178 	    }
179 	  else if (BELOW_THRESHOLD (dn, INV_DIV_Q_THRESHOLD) ||
180 		   BELOW_THRESHOLD (nn, 2 * INV_DIV_Q_THRESHOLD))
181 	    {
182            mpir_invert_pi1(dinv, dh, dp[dn - 2]);
183            qh = mpn_dc_div_q (qp, new_np, nn, dp, dn, dinv);
184 	    }
185 	  else
186 	    {
187            mp_ptr inv = TMP_ALLOC_LIMBS(dn);
188            mpn_invert(inv, dp, dn);
189            qh = mpn_inv_div_q (qp, new_np, nn, dp, dn, inv);
190 	    }
191 	  qp[nn - dn] = qh;
192 	}
193     }
194   else
195     {
196       /* |________________________|
197                 |_________________|  */
198       tp = TMP_ALLOC_LIMBS (qn + 1);
199 
200       new_np = scratch;
201       new_nn = 2 * qn + 1;
202 
203       dh = dp[dn - 1];
204       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
205 	{
206 	  count_leading_zeros (cnt, dh);
207 
208 	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
209 	  new_np[new_nn] = cy;
210 
211 	  new_nn += (cy != 0);
212 
213 	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
214 	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
215 	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
216 
217 	  if (qn + 1 == 2)
218 	    {
219 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
220 	    }
221 	  else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))
222 	    {
223           mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);
224 	      qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
225 	    }
226 	  else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))
227 	    {
228           mpir_invert_pi1(dinv, new_dp[qn], new_dp[qn - 1]);
229 	      qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
230 	    }
231 	  else
232 	    {
233            mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
234            mpn_invert(inv, new_dp, qn + 1);
235            qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv);
236 	    }
237 	  if (cy == 0)
238 	    tp[qn] = qh;
239 	  else if (UNLIKELY (qh != 0))
240 	    {
241 	      /* This happens only when the quotient is close to B^n and
242 		 mpn_*_divappr_q returned B^n.  */
243 	      mp_size_t i, n;
244 	      n = new_nn - (qn + 1);
245 	      for (i = 0; i < n; i++)
246 		tp[i] = GMP_NUMB_MAX;
247 	      qh = 0;		/* currently ignored */
248 	    }
249 	}
250       else  /* divisor is already normalised */
251 	{
252 	  MPN_COPY (new_np, np + nn - new_nn, new_nn);
253 
254 	  new_dp = (mp_ptr) dp + dn - (qn + 1);
255 
256 	  if (qn == 2 - 1)
257 	    {
258 	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
259 	    }
260 	  else if (BELOW_THRESHOLD (qn - 1, DC_DIVAPPR_Q_THRESHOLD))
261 	    {
262           mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);
263           qh = mpn_sb_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
264 	    }
265 	  else if (BELOW_THRESHOLD (qn - 1, INV_DIVAPPR_Q_THRESHOLD))
266 	    {
267           mpir_invert_pi1(dinv, dh, new_dp[qn - 1]);
268           qh = mpn_dc_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv);
269 	    }
270 	  else
271 	    {
272           mp_ptr inv = TMP_ALLOC_LIMBS(qn + 1);
273 	       mpn_invert(inv, new_dp, qn + 1);
274           qh = mpn_inv_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, inv);
275 	    }
276 	  tp[qn] = qh;
277 	}
278 
279       MPN_COPY (qp, tp + 1, qn);
280       if (UNLIKELY(tp[0] <= 4))
281         {
282 	  mp_size_t rn;
283 
284           rp = TMP_ALLOC_LIMBS (dn + qn);
285           mpn_mul (rp, dp, dn, tp + 1, qn);
286 	  rn = dn + qn;
287 	  rn -= rp[rn - 1] == 0;
288 
289           if (rn > nn || mpn_cmp (np, rp, nn) < 0)
290             mpn_decr_u (qp, 1);
291         }
292     }
293 
294   TMP_FREE;
295 }
296