1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3    qxn is non-zero, generate that many fraction limbs and append them after the
4    other quotient limbs, and update the remainder accordingly.  The input
5    operands are unaffected.
6 
7    Preconditions:
8    1. The most significant limb of of the divisor must be non-zero.
9    2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10 
11    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12    complexity of multiplication.
13 
14 Copyright 1997, 2000-2002, 2005, 2009 Free Software Foundation, Inc.
15 
16 This file is part of the GNU MP Library.
17 
18 The GNU MP Library is free software; you can redistribute it and/or modify
19 it under the terms of either:
20 
21   * the GNU Lesser General Public License as published by the Free
22     Software Foundation; either version 3 of the License, or (at your
23     option) any later version.
24 
25 or
26 
27   * the GNU General Public License as published by the Free Software
28     Foundation; either version 2 of the License, or (at your option) any
29     later version.
30 
31 or both in parallel, as here.
32 
33 The GNU MP Library is distributed in the hope that it will be useful, but
34 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36 for more details.
37 
38 You should have received copies of the GNU General Public License and the
39 GNU Lesser General Public License along with the GNU MP Library.  If not,
40 see https://www.gnu.org/licenses/.  */
41 
42 #include "gmp.h"
43 #include "gmp-impl.h"
44 #include "longlong.h"
45 
46 
47 void
mpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)48 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
49 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
50 {
51   ASSERT_ALWAYS (qxn == 0);
52 
53   ASSERT (nn >= 0);
54   ASSERT (dn >= 0);
55   ASSERT (dn == 0 || dp[dn - 1] != 0);
56   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
57   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
58 
59   switch (dn)
60     {
61     case 0:
62       DIVIDE_BY_ZERO;
63 
64     case 1:
65       {
66 	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
67 	return;
68       }
69 
70     case 2:
71       {
72 	mp_ptr n2p, d2p;
73 	mp_limb_t qhl, cy;
74 	TMP_DECL;
75 	TMP_MARK;
76 	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
77 	  {
78 	    int cnt;
79 	    mp_limb_t dtmp[2];
80 	    count_leading_zeros (cnt, dp[1]);
81 	    cnt -= GMP_NAIL_BITS;
82 	    d2p = dtmp;
83 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
84 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
85 	    n2p = TMP_ALLOC_LIMBS (nn + 1);
86 	    cy = mpn_lshift (n2p, np, nn, cnt);
87 	    n2p[nn] = cy;
88 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
89 	    if (cy == 0)
90 	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
91 	    rp[0] = (n2p[0] >> cnt)
92 	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
93 	    rp[1] = (n2p[1] >> cnt);
94 	  }
95 	else
96 	  {
97 	    d2p = (mp_ptr) dp;
98 	    n2p = TMP_ALLOC_LIMBS (nn);
99 	    MPN_COPY (n2p, np, nn);
100 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
101 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
102 	    rp[0] = n2p[0];
103 	    rp[1] = n2p[1];
104 	  }
105 	TMP_FREE;
106 	return;
107       }
108 
109     default:
110       {
111 	int adjust;
112 	gmp_pi1_t dinv;
113 	TMP_DECL;
114 	TMP_MARK;
115 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
116 	if (nn + adjust >= 2 * dn)
117 	  {
118 	    mp_ptr n2p, d2p;
119 	    mp_limb_t cy;
120 	    int cnt;
121 
122 	    qp[nn - dn] = 0;			  /* zero high quotient limb */
123 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
124 	      {
125 		count_leading_zeros (cnt, dp[dn - 1]);
126 		cnt -= GMP_NAIL_BITS;
127 		d2p = TMP_ALLOC_LIMBS (dn);
128 		mpn_lshift (d2p, dp, dn, cnt);
129 		n2p = TMP_ALLOC_LIMBS (nn + 1);
130 		cy = mpn_lshift (n2p, np, nn, cnt);
131 		n2p[nn] = cy;
132 		nn += adjust;
133 	      }
134 	    else
135 	      {
136 		cnt = 0;
137 		d2p = (mp_ptr) dp;
138 		n2p = TMP_ALLOC_LIMBS (nn + 1);
139 		MPN_COPY (n2p, np, nn);
140 		n2p[nn] = 0;
141 		nn += adjust;
142 	      }
143 
144 	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
145 	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
146 	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
147 	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
148 		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
149 		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
150 		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
151 	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
152 	    else
153 	      {
154 		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
155 		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
156 		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
157 		n2p = rp;
158 	      }
159 
160 	    if (cnt != 0)
161 	      mpn_rshift (rp, n2p, dn, cnt);
162 	    else
163 	      MPN_COPY (rp, n2p, dn);
164 	    TMP_FREE;
165 	    return;
166 	  }
167 
168 	/* When we come here, the numerator/partial remainder is less
169 	   than twice the size of the denominator.  */
170 
171 	  {
172 	    /* Problem:
173 
174 	       Divide a numerator N with nn limbs by a denominator D with dn
175 	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
176 	       compared to dn, conventional division algorithms perform poorly.
177 	       We want an algorithm that has an expected running time that is
178 	       dependent only on qn.
179 
180 	       Algorithm (very informally stated):
181 
182 	       1) Divide the 2 x qn most significant limbs from the numerator
183 		  by the qn most significant limbs from the denominator.  Call
184 		  the result qest.  This is either the correct quotient, but
185 		  might be 1 or 2 too large.  Compute the remainder from the
186 		  division.  (This step is implemented by a mpn_divrem call.)
187 
188 	       2) Is the most significant limb from the remainder < p, where p
189 		  is the product of the most significant limb from the quotient
190 		  and the next(d)?  (Next(d) denotes the next ignored limb from
191 		  the denominator.)  If it is, decrement qest, and adjust the
192 		  remainder accordingly.
193 
194 	       3) Is the remainder >= qest?  If it is, qest is the desired
195 		  quotient.  The algorithm terminates.
196 
197 	       4) Subtract qest x next(d) from the remainder.  If there is
198 		  borrow out, decrement qest, and adjust the remainder
199 		  accordingly.
200 
201 	       5) Skip one word from the denominator (i.e., let next(d) denote
202 		  the next less significant limb.  */
203 
204 	    mp_size_t qn;
205 	    mp_ptr n2p, d2p;
206 	    mp_ptr tp;
207 	    mp_limb_t cy;
208 	    mp_size_t in, rn;
209 	    mp_limb_t quotient_too_large;
210 	    unsigned int cnt;
211 
212 	    qn = nn - dn;
213 	    qp[qn] = 0;				/* zero high quotient limb */
214 	    qn += adjust;			/* qn cannot become bigger */
215 
216 	    if (qn == 0)
217 	      {
218 		MPN_COPY (rp, np, dn);
219 		TMP_FREE;
220 		return;
221 	      }
222 
223 	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
224 	    /* Normalize denominator by shifting it to the left such that its
225 	       most significant bit is set.  Then shift the numerator the same
226 	       amount, to mathematically preserve quotient.  */
227 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
228 	      {
229 		count_leading_zeros (cnt, dp[dn - 1]);
230 		cnt -= GMP_NAIL_BITS;
231 
232 		d2p = TMP_ALLOC_LIMBS (qn);
233 		mpn_lshift (d2p, dp + in, qn, cnt);
234 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
235 
236 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
237 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
238 		if (adjust)
239 		  {
240 		    n2p[2 * qn] = cy;
241 		    n2p++;
242 		  }
243 		else
244 		  {
245 		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
246 		  }
247 	      }
248 	    else
249 	      {
250 		cnt = 0;
251 		d2p = (mp_ptr) dp + in;
252 
253 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
254 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
255 		if (adjust)
256 		  {
257 		    n2p[2 * qn] = 0;
258 		    n2p++;
259 		  }
260 	      }
261 
262 	    /* Get an approximate quotient using the extracted operands.  */
263 	    if (qn == 1)
264 	      {
265 		mp_limb_t q0, r0;
266 		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
267 		n2p[0] = r0 >> GMP_NAIL_BITS;
268 		qp[0] = q0;
269 	      }
270 	    else if (qn == 2)
271 	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
272 	    else
273 	      {
274 		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
275 		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
276 		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
277 		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
278 		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
279 		else
280 		  {
281 		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
282 		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
283 		    mp_ptr r2p = rp;
284 		    if (np == r2p)	/* If N and R share space, put ... */
285 		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
286 		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
287 		    MPN_COPY (n2p, r2p, qn);
288 		  }
289 	      }
290 
291 	    rn = qn;
292 	    /* Multiply the first ignored divisor limb by the most significant
293 	       quotient limb.  If that product is > the partial remainder's
294 	       most significant limb, we know the quotient is too large.  This
295 	       test quickly catches most cases where the quotient is too large;
296 	       it catches all cases where the quotient is 2 too large.  */
297 	    {
298 	      mp_limb_t dl, x;
299 	      mp_limb_t h, dummy;
300 
301 	      if (in - 2 < 0)
302 		dl = 0;
303 	      else
304 		dl = dp[in - 2];
305 
306 #if GMP_NAIL_BITS == 0
307 	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
308 #else
309 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
310 	      if (cnt != 0)
311 		x |= dl >> (GMP_NUMB_BITS - cnt);
312 #endif
313 	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
314 
315 	      if (n2p[qn - 1] < h)
316 		{
317 		  mp_limb_t cy;
318 
319 		  mpn_decr_u (qp, (mp_limb_t) 1);
320 		  cy = mpn_add_n (n2p, n2p, d2p, qn);
321 		  if (cy)
322 		    {
323 		      /* The partial remainder is safely large.  */
324 		      n2p[qn] = cy;
325 		      ++rn;
326 		    }
327 		}
328 	    }
329 
330 	    quotient_too_large = 0;
331 	    if (cnt != 0)
332 	      {
333 		mp_limb_t cy1, cy2;
334 
335 		/* Append partially used numerator limb to partial remainder.  */
336 		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
337 		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
338 
339 		/* Update partial remainder with partially used divisor limb.  */
340 		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
341 		if (qn != rn)
342 		  {
343 		    ASSERT_ALWAYS (n2p[qn] >= cy2);
344 		    n2p[qn] -= cy2;
345 		  }
346 		else
347 		  {
348 		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
349 
350 		    quotient_too_large = (cy1 < cy2);
351 		    ++rn;
352 		  }
353 		--in;
354 	      }
355 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
356 
357 	    tp = TMP_ALLOC_LIMBS (dn);
358 
359 	    if (in < qn)
360 	      {
361 		if (in == 0)
362 		  {
363 		    MPN_COPY (rp, n2p, rn);
364 		    ASSERT_ALWAYS (rn == dn);
365 		    goto foo;
366 		  }
367 		mpn_mul (tp, qp, qn, dp, in);
368 	      }
369 	    else
370 	      mpn_mul (tp, dp, in, qp, qn);
371 
372 	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
373 	    MPN_COPY (rp + in, n2p, dn - in);
374 	    quotient_too_large |= cy;
375 	    cy = mpn_sub_n (rp, np, tp, in);
376 	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
377 	    quotient_too_large |= cy;
378 	  foo:
379 	    if (quotient_too_large)
380 	      {
381 		mpn_decr_u (qp, (mp_limb_t) 1);
382 		mpn_add_n (rp, rp, dp, dn);
383 	      }
384 	  }
385 	TMP_FREE;
386 	return;
387       }
388     }
389 }
390