1 /* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
2    the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
3    between Q and N; all other overlap disallowed.
4 
5    Contributed to the GNU project by Torbjorn Granlund.
6 
7    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
8    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
9    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
10 
11 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
12 
13 This file is part of the GNU MP Library.
14 
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of either:
17 
18   * the GNU Lesser General Public License as published by the Free
19     Software Foundation; either version 3 of the License, or (at your
20     option) any later version.
21 
22 or
23 
24   * the GNU General Public License as published by the Free Software
25     Foundation; either version 2 of the License, or (at your option) any
26     later version.
27 
28 or both in parallel, as here.
29 
30 The GNU MP Library is distributed in the hope that it will be useful, but
31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
33 for more details.
34 
35 You should have received copies of the GNU General Public License and the
36 GNU Lesser General Public License along with the GNU MP Library.  If not,
37 see https://www.gnu.org/licenses/.  */
38 
39 
40 #include "gmp.h"
41 #include "gmp-impl.h"
42 #include "longlong.h"
43 
44 #if 1
45 void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)46 mpn_divexact (mp_ptr qp,
47 	      mp_srcptr np, mp_size_t nn,
48 	      mp_srcptr dp, mp_size_t dn)
49 {
50   unsigned shift;
51   mp_size_t qn;
52   mp_ptr tp;
53   TMP_DECL;
54 
55   ASSERT (dn > 0);
56   ASSERT (nn >= dn);
57   ASSERT (dp[dn-1] > 0);
58 
59   while (dp[0] == 0)
60     {
61       ASSERT (np[0] == 0);
62       dp++;
63       np++;
64       dn--;
65       nn--;
66     }
67 
68   if (dn == 1)
69     {
70       MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
71       return;
72     }
73 
74   TMP_MARK;
75 
76   qn = nn + 1 - dn;
77   count_trailing_zeros (shift, dp[0]);
78 
79   if (shift > 0)
80     {
81       mp_ptr wp;
82       mp_size_t ss;
83       ss = (dn > qn) ? qn + 1 : dn;
84 
85       tp = TMP_ALLOC_LIMBS (ss);
86       mpn_rshift (tp, dp, ss, shift);
87       dp = tp;
88 
89       /* Since we have excluded dn == 1, we have nn > qn, and we need
90 	 to shift one limb beyond qn. */
91       wp = TMP_ALLOC_LIMBS (qn + 1);
92       mpn_rshift (wp, np, qn + 1, shift);
93       np = wp;
94     }
95 
96   if (dn > qn)
97     dn = qn;
98 
99   tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
100   mpn_bdiv_q (qp, np, qn, dp, dn, tp);
101   TMP_FREE;
102 }
103 
104 #else
105 
106 /* We use the Jebelean's bidirectional exact division algorithm.  This is
107    somewhat naively implemented, with equal quotient parts done by 2-adic
108    division and truncating division.  Since 2-adic division is faster, it
109    should be used for a larger chunk.
110 
111    This code is horrendously ugly, in all sorts of ways.
112 
113    * It was hacked without much care or thought, but with a testing program.
114    * It handles scratch space frivolously, and furthermore the itch function
115      is broken.
116    * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
117      have yet to provoke an error due to this, though.
118    * Algorithm selection leaves a lot to be desired.  In particular, the choice
119      between DC and MU isn't a point, but we treat it like one.
120    * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
121      that the latter is faster.  We should at least reverse this, but perhaps
122      we should make the lsb part considerably larger.  (How do we tune this?)
123 */
124 
125 mp_size_t
mpn_divexact_itch(mp_size_t nn,mp_size_t dn)126 mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
127 {
128   return nn + dn;		/* FIXME this is not right */
129 }
130 
131 void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_ptr scratch)132 mpn_divexact (mp_ptr qp,
133 	      mp_srcptr np, mp_size_t nn,
134 	      mp_srcptr dp, mp_size_t dn,
135 	      mp_ptr scratch)
136 {
137   mp_size_t qn;
138   mp_size_t nn0, qn0;
139   mp_size_t nn1, qn1;
140   mp_ptr tp;
141   mp_limb_t qml;
142   mp_limb_t qh;
143   int cnt;
144   mp_ptr xdp;
145   mp_limb_t di;
146   mp_limb_t cy;
147   gmp_pi1_t dinv;
148   TMP_DECL;
149 
150   TMP_MARK;
151 
152   qn = nn - dn + 1;
153 
154   /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
155   if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
156     {
157       tp = scratch;
158       MPN_COPY (tp, np, qn);
159       binvert_limb (di, dp[0]);  di = -di;
160       dn = MIN (dn, qn);
161       mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
162       TMP_FREE;
163       return;
164     }
165 
166   qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
167 
168   /* If quotient is much larger than the divisor, the bidirectional algorithm
169      does not work as currently implemented.  Fall back to plain bdiv.  */
170   if (qn0 > dn)
171     {
172       if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
173 	{
174 	  tp = scratch;
175 	  MPN_COPY (tp, np, qn);
176 	  binvert_limb (di, dp[0]);  di = -di;
177 	  dn = MIN (dn, qn);
178 	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
179 	}
180       else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
181 	{
182 	  tp = scratch;
183 	  MPN_COPY (tp, np, qn);
184 	  binvert_limb (di, dp[0]);  di = -di;
185 	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
186 	}
187       else
188 	{
189 	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
190 	}
191       TMP_FREE;
192       return;
193     }
194 
195   nn0 = qn0 + qn0;
196 
197   nn1 = nn0 - 1 + ((nn-dn) & 1);
198   qn1 = qn0;
199   if (LIKELY (qn0 != dn))
200     {
201       nn1 = nn1 + 1;
202       qn1 = qn1 + 1;
203       if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
204 	{
205 	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
206 	     to include an extra limb in order to get the needed overlap.  */
207 	  /* FIXME: Now with the mu_divappr_q function, we should really need
208 	     more overlap. That indicates one of two things: (1) The test code
209 	     is not good. (2) We actually overlap too much by default.  */
210 	  nn1 = nn1 + 1;
211 	  qn1 = qn1 + 1;
212 	}
213     }
214 
215   tp = TMP_ALLOC_LIMBS (nn1 + 1);
216 
217   count_leading_zeros (cnt, dp[dn - 1]);
218 
219   /* Normalize divisor, store into tmp area.  */
220   if (cnt != 0)
221     {
222       xdp = TMP_ALLOC_LIMBS (qn1);
223       mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
224     }
225   else
226     {
227       xdp = (mp_ptr) dp + dn - qn1;
228     }
229 
230   /* Shift dividend according to the divisor normalization.  */
231   /* FIXME: We compute too much here for XX_divappr_q, but these functions'
232      interfaces want a pointer to the imaginative least significant limb, not
233      to the least significant *used* limb.  Of course, we could leave nn1-qn1
234      rubbish limbs in the low part, to save some time.  */
235   if (cnt != 0)
236     {
237       cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
238       if (cy != 0)
239 	{
240 	  tp[nn1] = cy;
241 	  nn1++;
242 	}
243     }
244   else
245     {
246       /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
247 	 mpn_sub_n right before is executed.  */
248       MPN_COPY (tp, np + nn - nn1, nn1);
249     }
250 
251   invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
252   if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
253     {
254       qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
255     }
256   else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
257     {
258       qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
259     }
260   else
261     {
262       /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
263 	 conditional subtraction here.  */
264       qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
265       if (qh)
266 	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
267       mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
268       qp[qn0 - 1 + nn1 - qn1] = qh;
269     }
270   qml = qp[qn0 - 1];
271 
272   binvert_limb (di, dp[0]);  di = -di;
273 
274   if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
275     {
276       MPN_COPY (tp, np, qn0);
277       mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
278     }
279   else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
280     {
281       MPN_COPY (tp, np, qn0);
282       mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
283     }
284   else
285     {
286       mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
287     }
288 
289   if (qml < qp[qn0 - 1])
290     mpn_decr_u (qp + qn0, 1);
291 
292   TMP_FREE;
293 }
294 #endif
295