1 /* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
2    size operands.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 #include "longlong.h"
41 
42 
43 mp_limb_t
mpn_dcpi1_div_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,gmp_pi1_t * dinv,mp_ptr tp)44 mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
45 		    gmp_pi1_t *dinv, mp_ptr tp)
46 {
47   mp_size_t lo, hi;
48   mp_limb_t cy, qh, ql;
49 
50   lo = n >> 1;			/* floor(n/2) */
51   hi = n - lo;			/* ceil(n/2) */
52 
53   if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
54     qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
55   else
56     qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
57 
58   mpn_mul (tp, qp + lo, hi, dp, lo);
59 
60   cy = mpn_sub_n (np + lo, np + lo, tp, n);
61   if (qh != 0)
62     cy += mpn_sub_n (np + n, np + n, dp, lo);
63 
64   while (cy != 0)
65     {
66       qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
67       cy -= mpn_add_n (np + lo, np + lo, dp, n);
68     }
69 
70   if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
71     ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
72   else
73     ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
74 
75   mpn_mul (tp, dp, hi, qp, lo);
76 
77   cy = mpn_sub_n (np, np, tp, n);
78   if (ql != 0)
79     cy += mpn_sub_n (np + lo, np + lo, dp, hi);
80 
81   while (cy != 0)
82     {
83       mpn_sub_1 (qp, qp, lo, 1);
84       cy -= mpn_add_n (np, np, dp, n);
85     }
86 
87   return qh;
88 }
89 
90 mp_limb_t
mpn_dcpi1_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,gmp_pi1_t * dinv)91 mpn_dcpi1_div_qr (mp_ptr qp,
92 		  mp_ptr np, mp_size_t nn,
93 		  mp_srcptr dp, mp_size_t dn,
94 		  gmp_pi1_t *dinv)
95 {
96   mp_size_t qn;
97   mp_limb_t qh, cy;
98   mp_ptr tp;
99   TMP_DECL;
100 
101   TMP_MARK;
102 
103   ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
104   ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
105   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
106 
107   tp = TMP_SALLOC_LIMBS (dn);
108 
109   qn = nn - dn;
110   qp += qn;
111   np += nn;
112   dp += dn;
113 
114   if (qn > dn)
115     {
116       /* Reduce qn mod dn without division, optimizing small operations.  */
117       do
118 	qn -= dn;
119       while (qn > dn);
120 
121       qp -= qn;			/* point at low limb of next quotient block */
122       np -= qn;			/* point in the middle of partial remainder */
123 
124       /* Perform the typically smaller block first.  */
125       if (qn == 1)
126 	{
127 	  mp_limb_t q, n2, n1, n0, d1, d0;
128 
129 	  /* Handle qh up front, for simplicity. */
130 	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
131 	  if (qh)
132 	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
133 
134 	  /* A single iteration of schoolbook: One 3/2 division,
135 	     followed by the bignum update and adjustment. */
136 	  n2 = np[0];
137 	  n1 = np[-1];
138 	  n0 = np[-2];
139 	  d1 = dp[-1];
140 	  d0 = dp[-2];
141 
142 	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
143 
144 	  if (UNLIKELY (n2 == d1) && n1 == d0)
145 	    {
146 	      q = GMP_NUMB_MASK;
147 	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
148 	      ASSERT (cy == n2);
149 	    }
150 	  else
151 	    {
152 	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
153 
154 	      if (dn > 2)
155 		{
156 		  mp_limb_t cy, cy1;
157 		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
158 
159 		  cy1 = n0 < cy;
160 		  n0 = (n0 - cy) & GMP_NUMB_MASK;
161 		  cy = n1 < cy1;
162 		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
163 		  np[-2] = n0;
164 
165 		  if (UNLIKELY (cy != 0))
166 		    {
167 		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
168 		      qh -= (q == 0);
169 		      q = (q - 1) & GMP_NUMB_MASK;
170 		    }
171 		}
172 	      else
173 		np[-2] = n0;
174 
175 	      np[-1] = n1;
176 	    }
177 	  qp[0] = q;
178 	}
179       else
180 	{
181 	  /* Do a 2qn / qn division */
182 	  if (qn == 2)
183 	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
184 	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
185 	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
186 	  else
187 	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
188 
189 	  if (qn != dn)
190 	    {
191 	      if (qn > dn - qn)
192 		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
193 	      else
194 		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
195 
196 	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
197 	      if (qh != 0)
198 		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
199 
200 	      while (cy != 0)
201 		{
202 		  qh -= mpn_sub_1 (qp, qp, qn, 1);
203 		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
204 		}
205 	    }
206 	}
207 
208       qn = nn - dn - qn;
209       do
210 	{
211 	  qp -= dn;
212 	  np -= dn;
213 	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
214 	  qn -= dn;
215 	}
216       while (qn > 0);
217     }
218   else
219     {
220       qp -= qn;			/* point at low limb of next quotient block */
221       np -= qn;			/* point in the middle of partial remainder */
222 
223       if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
224 	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
225       else
226 	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
227 
228       if (qn != dn)
229 	{
230 	  if (qn > dn - qn)
231 	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
232 	  else
233 	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
234 
235 	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
236 	  if (qh != 0)
237 	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
238 
239 	  while (cy != 0)
240 	    {
241 	      qh -= mpn_sub_1 (qp, qp, qn, 1);
242 	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
243 	    }
244 	}
245     }
246 
247   TMP_FREE;
248   return qh;
249 }
250