xref: /dragonfly/contrib/gmp/mpn/generic/tdiv_qr.c (revision 36a3d1d6)
1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3    qxn is non-zero, generate that many fraction limbs and append them after the
4    other quotient limbs, and update the remainder accordningly.  The input
5    operands are unaffected.
6 
7    Preconditions:
8    1. The most significant limb of of the divisor must be non-zero.
9    2. No argument overlap is permitted.  (??? relax this ???)
10    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
11 
12    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
13    complexity of multiplication.
14 
15 Copyright 1997, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
16 
17 This file is part of the GNU MP Library.
18 
19 The GNU MP Library is free software; you can redistribute it and/or modify
20 it under the terms of the GNU Lesser General Public License as published by
21 the Free Software Foundation; either version 3 of the License, or (at your
22 option) any later version.
23 
24 The GNU MP Library is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
27 License for more details.
28 
29 You should have received a copy of the GNU Lesser General Public License
30 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
31 
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
35 
36 
37 void
38 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
39 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
40 {
41   /* FIXME:
42      1. qxn
43      2. pass allocated storage in additional parameter?
44   */
45   ASSERT_ALWAYS (qxn == 0);
46 
47   ASSERT (qxn >= 0);
48   ASSERT (nn >= 0);
49   ASSERT (dn >= 0);
50   ASSERT (dn == 0 || dp[dn - 1] != 0);
51   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
52   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
53 
54   switch (dn)
55     {
56     case 0:
57       DIVIDE_BY_ZERO;
58 
59     case 1:
60       {
61 	rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
62 	return;
63       }
64 
65     case 2:
66       {
67 	mp_ptr n2p, d2p;
68 	mp_limb_t qhl, cy;
69 	TMP_DECL;
70 	TMP_MARK;
71 	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
72 	  {
73 	    int cnt;
74 	    mp_limb_t dtmp[2];
75 	    count_leading_zeros (cnt, dp[1]);
76 	    cnt -= GMP_NAIL_BITS;
77 	    d2p = dtmp;
78 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
79 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
80 	    n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
81 	    cy = mpn_lshift (n2p, np, nn, cnt);
82 	    n2p[nn] = cy;
83 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
84 	    if (cy == 0)
85 	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
86 	    rp[0] = (n2p[0] >> cnt)
87 	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
88 	    rp[1] = (n2p[1] >> cnt);
89 	  }
90 	else
91 	  {
92 	    d2p = (mp_ptr) dp;
93 	    n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
94 	    MPN_COPY (n2p, np, nn);
95 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
96 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
97 	    rp[0] = n2p[0];
98 	    rp[1] = n2p[1];
99 	  }
100 	TMP_FREE;
101 	return;
102       }
103 
104     default:
105       {
106 	int adjust;
107 	TMP_DECL;
108 	TMP_MARK;
109 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
110 	if (nn + adjust >= 2 * dn)
111 	  {
112 	    mp_ptr n2p, d2p, q2p;
113 	    mp_limb_t cy;
114 	    int cnt;
115 
116 	    qp[nn - dn] = 0;			  /* zero high quotient limb */
117 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
118 	      {
119 		count_leading_zeros (cnt, dp[dn - 1]);
120 		cnt -= GMP_NAIL_BITS;
121 		d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
122 		mpn_lshift (d2p, dp, dn, cnt);
123 		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
124 		cy = mpn_lshift (n2p, np, nn, cnt);
125 		n2p[nn] = cy;
126 		nn += adjust;
127 	      }
128 	    else
129 	      {
130 		cnt = 0;
131 		d2p = (mp_ptr) dp;
132 		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
133 		MPN_COPY (n2p, np, nn);
134 		n2p[nn] = 0;
135 		nn += adjust;
136 	      }
137 
138 	    if (dn < DIV_DC_THRESHOLD)
139 	      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
140 	    else
141 	      {
142 		/* Divide 2*dn / dn limbs as long as the limbs in np last.  */
143 		q2p = qp + nn - dn;
144 		n2p += nn - dn;
145 		do
146 		  {
147 		    q2p -= dn;  n2p -= dn;
148 		    mpn_dc_divrem_n (q2p, n2p, d2p, dn);
149 		    nn -= dn;
150 		  }
151 		while (nn >= 2 * dn);
152 
153 		if (nn != dn)
154 		  {
155 		    mp_limb_t ql;
156 		    n2p -= nn - dn;
157 
158 		    /* We have now dn < nn - dn < 2dn.  Make a recursive call,
159 		       since falling out to the code below isn't pretty.
160 		       Unfortunately, mpn_tdiv_qr returns nn-dn+1 quotient
161 		       limbs, which would overwrite one already generated
162 		       quotient limbs.  Preserve it with an ugly hack.  */
163 		    /* FIXME: This suggests that we should have an
164 		       mpn_tdiv_qr_internal that instead returns the most
165 		       significant quotient limb and move the meat of this
166 		       function there.  */
167 		    /* FIXME: Perhaps call mpn_sb_divrem_mn here for certain
168 		       operand ranges, to decrease overhead for small
169 		       operands?  */
170 		    ql = qp[nn - dn]; /* preserve quotient limb... */
171 		    mpn_tdiv_qr (qp, n2p, 0L, n2p, nn, d2p, dn);
172 		    qp[nn - dn] = ql; /* ...restore it again */
173 		  }
174 	      }
175 
176 
177 	    if (cnt != 0)
178 	      mpn_rshift (rp, n2p, dn, cnt);
179 	    else
180 	      MPN_COPY (rp, n2p, dn);
181 	    TMP_FREE;
182 	    return;
183 	  }
184 
185 	/* When we come here, the numerator/partial remainder is less
186 	   than twice the size of the denominator.  */
187 
188 	  {
189 	    /* Problem:
190 
191 	       Divide a numerator N with nn limbs by a denominator D with dn
192 	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
193 	       compared to dn, conventional division algorithms perform poorly.
194 	       We want an algorithm that has an expected running time that is
195 	       dependent only on qn.
196 
197 	       Algorithm (very informally stated):
198 
199 	       1) Divide the 2 x qn most significant limbs from the numerator
200 		  by the qn most significant limbs from the denominator.  Call
201 		  the result qest.  This is either the correct quotient, but
202 		  might be 1 or 2 too large.  Compute the remainder from the
203 		  division.  (This step is implemented by a mpn_divrem call.)
204 
205 	       2) Is the most significant limb from the remainder < p, where p
206 		  is the product of the most significant limb from the quotient
207 		  and the next(d)?  (Next(d) denotes the next ignored limb from
208 		  the denominator.)  If it is, decrement qest, and adjust the
209 		  remainder accordingly.
210 
211 	       3) Is the remainder >= qest?  If it is, qest is the desired
212 		  quotient.  The algorithm terminates.
213 
214 	       4) Subtract qest x next(d) from the remainder.  If there is
215 		  borrow out, decrement qest, and adjust the remainder
216 		  accordingly.
217 
218 	       5) Skip one word from the denominator (i.e., let next(d) denote
219 		  the next less significant limb.  */
220 
221 	    mp_size_t qn;
222 	    mp_ptr n2p, d2p;
223 	    mp_ptr tp;
224 	    mp_limb_t cy;
225 	    mp_size_t in, rn;
226 	    mp_limb_t quotient_too_large;
227 	    unsigned int cnt;
228 
229 	    qn = nn - dn;
230 	    qp[qn] = 0;				/* zero high quotient limb */
231 	    qn += adjust;			/* qn cannot become bigger */
232 
233 	    if (qn == 0)
234 	      {
235 		MPN_COPY (rp, np, dn);
236 		TMP_FREE;
237 		return;
238 	      }
239 
240 	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
241 	    /* Normalize denominator by shifting it to the left such that its
242 	       most significant bit is set.  Then shift the numerator the same
243 	       amount, to mathematically preserve quotient.  */
244 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
245 	      {
246 		count_leading_zeros (cnt, dp[dn - 1]);
247 		cnt -= GMP_NAIL_BITS;
248 
249 		d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
250 		mpn_lshift (d2p, dp + in, qn, cnt);
251 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
252 
253 		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
254 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
255 		if (adjust)
256 		  {
257 		    n2p[2 * qn] = cy;
258 		    n2p++;
259 		  }
260 		else
261 		  {
262 		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
263 		  }
264 	      }
265 	    else
266 	      {
267 		cnt = 0;
268 		d2p = (mp_ptr) dp + in;
269 
270 		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
271 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
272 		if (adjust)
273 		  {
274 		    n2p[2 * qn] = 0;
275 		    n2p++;
276 		  }
277 	      }
278 
279 	    /* Get an approximate quotient using the extracted operands.  */
280 	    if (qn == 1)
281 	      {
282 		mp_limb_t q0, r0;
283 		mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
284 		/* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
285 		   temps here.  This doesn't hurt code quality on any machines
286 		   so we do it unconditionally.  */
287 		gcc272bug_n1 = n2p[1];
288 		gcc272bug_n0 = n2p[0];
289 		gcc272bug_d0 = d2p[0];
290 		udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS,
291 			    gcc272bug_d0 << GMP_NAIL_BITS);
292 		r0 >>= GMP_NAIL_BITS;
293 		n2p[0] = r0;
294 		qp[0] = q0;
295 	      }
296 	    else if (qn == 2)
297 	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
298 	    else if (qn < DIV_DC_THRESHOLD)
299 	      mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn);
300 	    else
301 	      mpn_dc_divrem_n (qp, n2p, d2p, qn);
302 
303 	    rn = qn;
304 	    /* Multiply the first ignored divisor limb by the most significant
305 	       quotient limb.  If that product is > the partial remainder's
306 	       most significant limb, we know the quotient is too large.  This
307 	       test quickly catches most cases where the quotient is too large;
308 	       it catches all cases where the quotient is 2 too large.  */
309 	    {
310 	      mp_limb_t dl, x;
311 	      mp_limb_t h, dummy;
312 
313 	      if (in - 2 < 0)
314 		dl = 0;
315 	      else
316 		dl = dp[in - 2];
317 
318 #if GMP_NAIL_BITS == 0
319 	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB));
320 #else
321 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
322 	      if (cnt != 0)
323 		x |= dl >> (GMP_NUMB_BITS - cnt);
324 #endif
325 	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
326 
327 	      if (n2p[qn - 1] < h)
328 		{
329 		  mp_limb_t cy;
330 
331 		  mpn_decr_u (qp, (mp_limb_t) 1);
332 		  cy = mpn_add_n (n2p, n2p, d2p, qn);
333 		  if (cy)
334 		    {
335 		      /* The partial remainder is safely large.  */
336 		      n2p[qn] = cy;
337 		      ++rn;
338 		    }
339 		}
340 	    }
341 
342 	    quotient_too_large = 0;
343 	    if (cnt != 0)
344 	      {
345 		mp_limb_t cy1, cy2;
346 
347 		/* Append partially used numerator limb to partial remainder.  */
348 		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
349 		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
350 
351 		/* Update partial remainder with partially used divisor limb.  */
352 		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
353 		if (qn != rn)
354 		  {
355 		    ASSERT_ALWAYS (n2p[qn] >= cy2);
356 		    n2p[qn] -= cy2;
357 		  }
358 		else
359 		  {
360 		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
361 
362 		    quotient_too_large = (cy1 < cy2);
363 		    ++rn;
364 		  }
365 		--in;
366 	      }
367 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
368 
369 	    tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
370 
371 	    if (in < qn)
372 	      {
373 		if (in == 0)
374 		  {
375 		    MPN_COPY (rp, n2p, rn);
376 		    ASSERT_ALWAYS (rn == dn);
377 		    goto foo;
378 		  }
379 		mpn_mul (tp, qp, qn, dp, in);
380 	      }
381 	    else
382 	      mpn_mul (tp, dp, in, qp, qn);
383 
384 	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
385 	    MPN_COPY (rp + in, n2p, dn - in);
386 	    quotient_too_large |= cy;
387 	    cy = mpn_sub_n (rp, np, tp, in);
388 	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
389 	    quotient_too_large |= cy;
390 	  foo:
391 	    if (quotient_too_large)
392 	      {
393 		mpn_decr_u (qp, (mp_limb_t) 1);
394 		mpn_add_n (rp, rp, dp, dn);
395 	      }
396 	  }
397 	TMP_FREE;
398 	return;
399       }
400     }
401 }
402