xref: /dragonfly/contrib/gmp/mpn/generic/tdiv_qr.c (revision 6e278935)
1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3    qxn is non-zero, generate that many fraction limbs and append them after the
4    other quotient limbs, and update the remainder accordingly.  The input
5    operands are unaffected.
6 
7    Preconditions:
8    1. The most significant limb of of the divisor must be non-zero.
9    2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10 
11    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12    complexity of multiplication.
13 
14 Copyright 1997, 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
15 
16 This file is part of the GNU MP Library.
17 
18 The GNU MP Library is free software; you can redistribute it and/or modify
19 it under the terms of the GNU Lesser General Public License as published by
20 the Free Software Foundation; either version 3 of the License, or (at your
21 option) any later version.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
26 License for more details.
27 
28 You should have received a copy of the GNU Lesser General Public License
29 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
30 
31 #include "gmp.h"
32 #include "gmp-impl.h"
33 #include "longlong.h"
34 
35 
36 void
37 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
38 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
39 {
40   ASSERT_ALWAYS (qxn == 0);
41 
42   ASSERT (nn >= 0);
43   ASSERT (dn >= 0);
44   ASSERT (dn == 0 || dp[dn - 1] != 0);
45   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
46   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
47 
48   switch (dn)
49     {
50     case 0:
51       DIVIDE_BY_ZERO;
52 
53     case 1:
54       {
55 	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
56 	return;
57       }
58 
59     case 2:
60       {
61 	mp_ptr n2p, d2p;
62 	mp_limb_t qhl, cy;
63 	TMP_DECL;
64 	TMP_MARK;
65 	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
66 	  {
67 	    int cnt;
68 	    mp_limb_t dtmp[2];
69 	    count_leading_zeros (cnt, dp[1]);
70 	    cnt -= GMP_NAIL_BITS;
71 	    d2p = dtmp;
72 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
73 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
74 	    n2p = TMP_ALLOC_LIMBS (nn + 1);
75 	    cy = mpn_lshift (n2p, np, nn, cnt);
76 	    n2p[nn] = cy;
77 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
78 	    if (cy == 0)
79 	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
80 	    rp[0] = (n2p[0] >> cnt)
81 	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
82 	    rp[1] = (n2p[1] >> cnt);
83 	  }
84 	else
85 	  {
86 	    d2p = (mp_ptr) dp;
87 	    n2p = TMP_ALLOC_LIMBS (nn);
88 	    MPN_COPY (n2p, np, nn);
89 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
90 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
91 	    rp[0] = n2p[0];
92 	    rp[1] = n2p[1];
93 	  }
94 	TMP_FREE;
95 	return;
96       }
97 
98     default:
99       {
100 	int adjust;
101 	gmp_pi1_t dinv;
102 	TMP_DECL;
103 	TMP_MARK;
104 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
105 	if (nn + adjust >= 2 * dn)
106 	  {
107 	    mp_ptr n2p, d2p;
108 	    mp_limb_t cy;
109 	    int cnt;
110 
111 	    qp[nn - dn] = 0;			  /* zero high quotient limb */
112 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
113 	      {
114 		count_leading_zeros (cnt, dp[dn - 1]);
115 		cnt -= GMP_NAIL_BITS;
116 		d2p = TMP_ALLOC_LIMBS (dn);
117 		mpn_lshift (d2p, dp, dn, cnt);
118 		n2p = TMP_ALLOC_LIMBS (nn + 1);
119 		cy = mpn_lshift (n2p, np, nn, cnt);
120 		n2p[nn] = cy;
121 		nn += adjust;
122 	      }
123 	    else
124 	      {
125 		cnt = 0;
126 		d2p = (mp_ptr) dp;
127 		n2p = TMP_ALLOC_LIMBS (nn + 1);
128 		MPN_COPY (n2p, np, nn);
129 		n2p[nn] = 0;
130 		nn += adjust;
131 	      }
132 
133 	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
134 	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
135 	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
136 	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
137 		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
138 		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
139 		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
140 	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
141 	    else
142 	      {
143 		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
144 		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
145 		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
146 		n2p = rp;
147 	      }
148 
149 	    if (cnt != 0)
150 	      mpn_rshift (rp, n2p, dn, cnt);
151 	    else
152 	      MPN_COPY (rp, n2p, dn);
153 	    TMP_FREE;
154 	    return;
155 	  }
156 
157 	/* When we come here, the numerator/partial remainder is less
158 	   than twice the size of the denominator.  */
159 
160 	  {
161 	    /* Problem:
162 
163 	       Divide a numerator N with nn limbs by a denominator D with dn
164 	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
165 	       compared to dn, conventional division algorithms perform poorly.
166 	       We want an algorithm that has an expected running time that is
167 	       dependent only on qn.
168 
169 	       Algorithm (very informally stated):
170 
171 	       1) Divide the 2 x qn most significant limbs from the numerator
172 		  by the qn most significant limbs from the denominator.  Call
173 		  the result qest.  This is either the correct quotient, but
174 		  might be 1 or 2 too large.  Compute the remainder from the
175 		  division.  (This step is implemented by a mpn_divrem call.)
176 
177 	       2) Is the most significant limb from the remainder < p, where p
178 		  is the product of the most significant limb from the quotient
179 		  and the next(d)?  (Next(d) denotes the next ignored limb from
180 		  the denominator.)  If it is, decrement qest, and adjust the
181 		  remainder accordingly.
182 
183 	       3) Is the remainder >= qest?  If it is, qest is the desired
184 		  quotient.  The algorithm terminates.
185 
186 	       4) Subtract qest x next(d) from the remainder.  If there is
187 		  borrow out, decrement qest, and adjust the remainder
188 		  accordingly.
189 
190 	       5) Skip one word from the denominator (i.e., let next(d) denote
191 		  the next less significant limb.  */
192 
193 	    mp_size_t qn;
194 	    mp_ptr n2p, d2p;
195 	    mp_ptr tp;
196 	    mp_limb_t cy;
197 	    mp_size_t in, rn;
198 	    mp_limb_t quotient_too_large;
199 	    unsigned int cnt;
200 
201 	    qn = nn - dn;
202 	    qp[qn] = 0;				/* zero high quotient limb */
203 	    qn += adjust;			/* qn cannot become bigger */
204 
205 	    if (qn == 0)
206 	      {
207 		MPN_COPY (rp, np, dn);
208 		TMP_FREE;
209 		return;
210 	      }
211 
212 	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
213 	    /* Normalize denominator by shifting it to the left such that its
214 	       most significant bit is set.  Then shift the numerator the same
215 	       amount, to mathematically preserve quotient.  */
216 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
217 	      {
218 		count_leading_zeros (cnt, dp[dn - 1]);
219 		cnt -= GMP_NAIL_BITS;
220 
221 		d2p = TMP_ALLOC_LIMBS (qn);
222 		mpn_lshift (d2p, dp + in, qn, cnt);
223 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
224 
225 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
226 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
227 		if (adjust)
228 		  {
229 		    n2p[2 * qn] = cy;
230 		    n2p++;
231 		  }
232 		else
233 		  {
234 		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
235 		  }
236 	      }
237 	    else
238 	      {
239 		cnt = 0;
240 		d2p = (mp_ptr) dp + in;
241 
242 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
243 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
244 		if (adjust)
245 		  {
246 		    n2p[2 * qn] = 0;
247 		    n2p++;
248 		  }
249 	      }
250 
251 	    /* Get an approximate quotient using the extracted operands.  */
252 	    if (qn == 1)
253 	      {
254 		mp_limb_t q0, r0;
255 		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
256 		n2p[0] = r0 >> GMP_NAIL_BITS;
257 		qp[0] = q0;
258 	      }
259 	    else if (qn == 2)
260 	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
261 	    else
262 	      {
263 		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
264 		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
265 		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
266 		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
267 		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
268 		else
269 		  {
270 		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
271 		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
272 		    mp_ptr r2p = rp;
273 		    if (np == r2p)	/* If N and R share space, put ... */
274 		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
275 		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
276 		    MPN_COPY (n2p, r2p, qn);
277 		  }
278 	      }
279 
280 	    rn = qn;
281 	    /* Multiply the first ignored divisor limb by the most significant
282 	       quotient limb.  If that product is > the partial remainder's
283 	       most significant limb, we know the quotient is too large.  This
284 	       test quickly catches most cases where the quotient is too large;
285 	       it catches all cases where the quotient is 2 too large.  */
286 	    {
287 	      mp_limb_t dl, x;
288 	      mp_limb_t h, dummy;
289 
290 	      if (in - 2 < 0)
291 		dl = 0;
292 	      else
293 		dl = dp[in - 2];
294 
295 #if GMP_NAIL_BITS == 0
296 	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
297 #else
298 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
299 	      if (cnt != 0)
300 		x |= dl >> (GMP_NUMB_BITS - cnt);
301 #endif
302 	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
303 
304 	      if (n2p[qn - 1] < h)
305 		{
306 		  mp_limb_t cy;
307 
308 		  mpn_decr_u (qp, (mp_limb_t) 1);
309 		  cy = mpn_add_n (n2p, n2p, d2p, qn);
310 		  if (cy)
311 		    {
312 		      /* The partial remainder is safely large.  */
313 		      n2p[qn] = cy;
314 		      ++rn;
315 		    }
316 		}
317 	    }
318 
319 	    quotient_too_large = 0;
320 	    if (cnt != 0)
321 	      {
322 		mp_limb_t cy1, cy2;
323 
324 		/* Append partially used numerator limb to partial remainder.  */
325 		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
326 		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
327 
328 		/* Update partial remainder with partially used divisor limb.  */
329 		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
330 		if (qn != rn)
331 		  {
332 		    ASSERT_ALWAYS (n2p[qn] >= cy2);
333 		    n2p[qn] -= cy2;
334 		  }
335 		else
336 		  {
337 		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
338 
339 		    quotient_too_large = (cy1 < cy2);
340 		    ++rn;
341 		  }
342 		--in;
343 	      }
344 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
345 
346 	    tp = TMP_ALLOC_LIMBS (dn);
347 
348 	    if (in < qn)
349 	      {
350 		if (in == 0)
351 		  {
352 		    MPN_COPY (rp, n2p, rn);
353 		    ASSERT_ALWAYS (rn == dn);
354 		    goto foo;
355 		  }
356 		mpn_mul (tp, qp, qn, dp, in);
357 	      }
358 	    else
359 	      mpn_mul (tp, dp, in, qp, qn);
360 
361 	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
362 	    MPN_COPY (rp + in, n2p, dn - in);
363 	    quotient_too_large |= cy;
364 	    cy = mpn_sub_n (rp, np, tp, in);
365 	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
366 	    quotient_too_large |= cy;
367 	  foo:
368 	    if (quotient_too_large)
369 	      {
370 		mpn_decr_u (qp, (mp_limb_t) 1);
371 		mpn_add_n (rp, rp, dp, dn);
372 	      }
373 	  }
374 	TMP_FREE;
375 	return;
376       }
377     }
378 }
379