1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
3    qxn is non-zero, generate that many fraction limbs and append them after the
4    other quotient limbs, and update the remainder accordingly.  The input
5    operands are unaffected.
6 
7    Preconditions:
8    1. The most significant limb of the divisor must be non-zero.
9    2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
10 
11    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12    complexity of multiplication.
13 
14 Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15 
16 This file is part of the GNU MP Library.
17 
18 The GNU MP Library is free software; you can redistribute it and/or modify
19 it under the terms of either:
20 
21   * the GNU Lesser General Public License as published by the Free
22     Software Foundation; either version 3 of the License, or (at your
23     option) any later version.
24 
25 or
26 
27   * the GNU General Public License as published by the Free Software
28     Foundation; either version 2 of the License, or (at your option) any
29     later version.
30 
31 or both in parallel, as here.
32 
33 The GNU MP Library is distributed in the hope that it will be useful, but
34 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
36 for more details.
37 
38 You should have received copies of the GNU General Public License and the
39 GNU Lesser General Public License along with the GNU MP Library.  If not,
40 see https://www.gnu.org/licenses/.  */
41 
42 #include "gmp-impl.h"
43 #include "longlong.h"
44 
45 
46 void
mpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)47 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49 {
50   ASSERT_ALWAYS (qxn == 0);
51 
52   ASSERT (nn >= 0);
53   ASSERT (dn >= 0);
54   ASSERT (dn == 0 || dp[dn - 1] != 0);
55   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57 
58   switch (dn)
59     {
60     case 0:
61       DIVIDE_BY_ZERO;
62 
63     case 1:
64       {
65 	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66 	return;
67       }
68 
69     case 2:
70       {
71 	mp_ptr n2p;
72 	mp_limb_t qhl, cy;
73 	TMP_DECL;
74 	TMP_MARK;
75 	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76 	  {
77 	    int cnt;
78 	    mp_limb_t d2p[2];
79 	    count_leading_zeros (cnt, dp[1]);
80 	    cnt -= GMP_NAIL_BITS;
81 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83 	    n2p = TMP_ALLOC_LIMBS (nn + 1);
84 	    cy = mpn_lshift (n2p, np, nn, cnt);
85 	    n2p[nn] = cy;
86 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87 	    if (cy == 0)
88 	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
89 	    rp[0] = (n2p[0] >> cnt)
90 	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91 	    rp[1] = (n2p[1] >> cnt);
92 	  }
93 	else
94 	  {
95 	    n2p = TMP_ALLOC_LIMBS (nn);
96 	    MPN_COPY (n2p, np, nn);
97 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
99 	    rp[0] = n2p[0];
100 	    rp[1] = n2p[1];
101 	  }
102 	TMP_FREE;
103 	return;
104       }
105 
106     default:
107       {
108 	int adjust;
109 	gmp_pi1_t dinv;
110 	TMP_DECL;
111 	TMP_MARK;
112 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
113 	if (nn + adjust >= 2 * dn)
114 	  {
115 	    mp_ptr n2p, d2p;
116 	    mp_limb_t cy;
117 	    int cnt;
118 
119 	    qp[nn - dn] = 0;			  /* zero high quotient limb */
120 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121 	      {
122 		count_leading_zeros (cnt, dp[dn - 1]);
123 		cnt -= GMP_NAIL_BITS;
124 		d2p = TMP_ALLOC_LIMBS (dn);
125 		mpn_lshift (d2p, dp, dn, cnt);
126 		n2p = TMP_ALLOC_LIMBS (nn + 1);
127 		cy = mpn_lshift (n2p, np, nn, cnt);
128 		n2p[nn] = cy;
129 		nn += adjust;
130 	      }
131 	    else
132 	      {
133 		cnt = 0;
134 		d2p = (mp_ptr) dp;
135 		n2p = TMP_ALLOC_LIMBS (nn + 1);
136 		MPN_COPY (n2p, np, nn);
137 		n2p[nn] = 0;
138 		nn += adjust;
139 	      }
140 
141 	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142 	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143 	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144 	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
145 		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146 		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147 		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
148 	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149 	    else
150 	      {
151 		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152 		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153 		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154 		n2p = rp;
155 	      }
156 
157 	    if (cnt != 0)
158 	      mpn_rshift (rp, n2p, dn, cnt);
159 	    else
160 	      MPN_COPY (rp, n2p, dn);
161 	    TMP_FREE;
162 	    return;
163 	  }
164 
165 	/* When we come here, the numerator/partial remainder is less
166 	   than twice the size of the denominator.  */
167 
168 	  {
169 	    /* Problem:
170 
171 	       Divide a numerator N with nn limbs by a denominator D with dn
172 	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
173 	       compared to dn, conventional division algorithms perform poorly.
174 	       We want an algorithm that has an expected running time that is
175 	       dependent only on qn.
176 
177 	       Algorithm (very informally stated):
178 
179 	       1) Divide the 2 x qn most significant limbs from the numerator
180 		  by the qn most significant limbs from the denominator.  Call
181 		  the result qest.  This is either the correct quotient, but
182 		  might be 1 or 2 too large.  Compute the remainder from the
183 		  division.  (This step is implemented by an mpn_divrem call.)
184 
185 	       2) Is the most significant limb from the remainder < p, where p
186 		  is the product of the most significant limb from the quotient
187 		  and the next(d)?  (Next(d) denotes the next ignored limb from
188 		  the denominator.)  If it is, decrement qest, and adjust the
189 		  remainder accordingly.
190 
191 	       3) Is the remainder >= qest?  If it is, qest is the desired
192 		  quotient.  The algorithm terminates.
193 
194 	       4) Subtract qest x next(d) from the remainder.  If there is
195 		  borrow out, decrement qest, and adjust the remainder
196 		  accordingly.
197 
198 	       5) Skip one word from the denominator (i.e., let next(d) denote
199 		  the next less significant limb.  */
200 
201 	    mp_size_t qn;
202 	    mp_ptr n2p, d2p;
203 	    mp_ptr tp;
204 	    mp_limb_t cy;
205 	    mp_size_t in, rn;
206 	    mp_limb_t quotient_too_large;
207 	    unsigned int cnt;
208 
209 	    qn = nn - dn;
210 	    qp[qn] = 0;				/* zero high quotient limb */
211 	    qn += adjust;			/* qn cannot become bigger */
212 
213 	    if (qn == 0)
214 	      {
215 		MPN_COPY (rp, np, dn);
216 		TMP_FREE;
217 		return;
218 	      }
219 
220 	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
221 	    /* Normalize denominator by shifting it to the left such that its
222 	       most significant bit is set.  Then shift the numerator the same
223 	       amount, to mathematically preserve quotient.  */
224 	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225 	      {
226 		count_leading_zeros (cnt, dp[dn - 1]);
227 		cnt -= GMP_NAIL_BITS;
228 
229 		d2p = TMP_ALLOC_LIMBS (qn);
230 		mpn_lshift (d2p, dp + in, qn, cnt);
231 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232 
233 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235 		if (adjust)
236 		  {
237 		    n2p[2 * qn] = cy;
238 		    n2p++;
239 		  }
240 		else
241 		  {
242 		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243 		  }
244 	      }
245 	    else
246 	      {
247 		cnt = 0;
248 		d2p = (mp_ptr) dp + in;
249 
250 		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252 		if (adjust)
253 		  {
254 		    n2p[2 * qn] = 0;
255 		    n2p++;
256 		  }
257 	      }
258 
259 	    /* Get an approximate quotient using the extracted operands.  */
260 	    if (qn == 1)
261 	      {
262 		mp_limb_t q0, r0;
263 		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264 		n2p[0] = r0 >> GMP_NAIL_BITS;
265 		qp[0] = q0;
266 	      }
267 	    else if (qn == 2)
268 	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269 	    else
270 	      {
271 		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272 		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273 		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274 		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275 		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276 		else
277 		  {
278 		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279 		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280 		    mp_ptr r2p = rp;
281 		    if (np == r2p)	/* If N and R share space, put ... */
282 		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
283 		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284 		    MPN_COPY (n2p, r2p, qn);
285 		  }
286 	      }
287 
288 	    rn = qn;
289 	    /* Multiply the first ignored divisor limb by the most significant
290 	       quotient limb.  If that product is > the partial remainder's
291 	       most significant limb, we know the quotient is too large.  This
292 	       test quickly catches most cases where the quotient is too large;
293 	       it catches all cases where the quotient is 2 too large.  */
294 	    {
295 	      mp_limb_t dl, x;
296 	      mp_limb_t h, dummy;
297 
298 	      if (in - 2 < 0)
299 		dl = 0;
300 	      else
301 		dl = dp[in - 2];
302 
303 #if GMP_NAIL_BITS == 0
304 	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305 #else
306 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307 	      if (cnt != 0)
308 		x |= dl >> (GMP_NUMB_BITS - cnt);
309 #endif
310 	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311 
312 	      if (n2p[qn - 1] < h)
313 		{
314 		  mp_limb_t cy;
315 
316 		  mpn_decr_u (qp, (mp_limb_t) 1);
317 		  cy = mpn_add_n (n2p, n2p, d2p, qn);
318 		  if (cy)
319 		    {
320 		      /* The partial remainder is safely large.  */
321 		      n2p[qn] = cy;
322 		      ++rn;
323 		    }
324 		}
325 	    }
326 
327 	    quotient_too_large = 0;
328 	    if (cnt != 0)
329 	      {
330 		mp_limb_t cy1, cy2;
331 
332 		/* Append partially used numerator limb to partial remainder.  */
333 		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334 		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335 
336 		/* Update partial remainder with partially used divisor limb.  */
337 		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338 		if (qn != rn)
339 		  {
340 		    ASSERT_ALWAYS (n2p[qn] >= cy2);
341 		    n2p[qn] -= cy2;
342 		  }
343 		else
344 		  {
345 		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346 
347 		    quotient_too_large = (cy1 < cy2);
348 		    ++rn;
349 		  }
350 		--in;
351 	      }
352 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
353 
354 	    tp = TMP_ALLOC_LIMBS (dn);
355 
356 	    if (in < qn)
357 	      {
358 		if (in == 0)
359 		  {
360 		    MPN_COPY (rp, n2p, rn);
361 		    ASSERT_ALWAYS (rn == dn);
362 		    goto foo;
363 		  }
364 		mpn_mul (tp, qp, qn, dp, in);
365 	      }
366 	    else
367 	      mpn_mul (tp, dp, in, qp, qn);
368 
369 	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370 	    MPN_COPY (rp + in, n2p, dn - in);
371 	    quotient_too_large |= cy;
372 	    cy = mpn_sub_n (rp, np, tp, in);
373 	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374 	    quotient_too_large |= cy;
375 	  foo:
376 	    if (quotient_too_large)
377 	      {
378 		mpn_decr_u (qp, (mp_limb_t) 1);
379 		mpn_add_n (rp, rp, dp, dn);
380 	      }
381 	  }
382 	TMP_FREE;
383 	return;
384       }
385     }
386 }
387