1 /* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
2    division algorithm.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2007, 2009 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp.h"
40 #include "gmp-impl.h"
41 #include "longlong.h"
42 
43 mp_limb_t
mpn_sbpi1_div_q(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)44 mpn_sbpi1_div_q (mp_ptr qp,
45 		 mp_ptr np, mp_size_t nn,
46 		 mp_srcptr dp, mp_size_t dn,
47 		 mp_limb_t dinv)
48 {
49   mp_limb_t qh;
50   mp_size_t qn, i;
51   mp_limb_t n1, n0;
52   mp_limb_t d1, d0;
53   mp_limb_t cy, cy1;
54   mp_limb_t q;
55   mp_limb_t flag;
56 
57   mp_size_t dn_orig = dn;
58   mp_srcptr dp_orig = dp;
59   mp_ptr np_orig = np;
60 
61   ASSERT (dn > 2);
62   ASSERT (nn >= dn);
63   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
64 
65   np += nn;
66 
67   qn = nn - dn;
68   if (qn + 1 < dn)
69     {
70       dp += dn - (qn + 1);
71       dn = qn + 1;
72     }
73 
74   qh = mpn_cmp (np - dn, dp, dn) >= 0;
75   if (qh != 0)
76     mpn_sub_n (np - dn, np - dn, dp, dn);
77 
78   qp += qn;
79 
80   dn -= 2;			/* offset dn by 2 for main division loops,
81 				   saving two iterations in mpn_submul_1.  */
82   d1 = dp[dn + 1];
83   d0 = dp[dn + 0];
84 
85   np -= 2;
86 
87   n1 = np[1];
88 
89   for (i = qn - (dn + 2); i >= 0; i--)
90     {
91       np--;
92       if (UNLIKELY (n1 == d1) && np[1] == d0)
93 	{
94 	  q = GMP_NUMB_MASK;
95 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
96 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
97 	}
98       else
99 	{
100 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
101 
102 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
103 
104 	  cy1 = n0 < cy;
105 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
106 	  cy = n1 < cy1;
107 	  n1 -= cy1;
108 	  np[0] = n0;
109 
110 	  if (UNLIKELY (cy != 0))
111 	    {
112 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
113 	      q--;
114 	    }
115 	}
116 
117       *--qp = q;
118     }
119 
120   flag = ~CNST_LIMB(0);
121 
122   if (dn >= 0)
123     {
124       for (i = dn; i > 0; i--)
125 	{
126 	  np--;
127 	  if (UNLIKELY (n1 >= (d1 & flag)))
128 	    {
129 	      q = GMP_NUMB_MASK;
130 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
131 
132 	      if (UNLIKELY (n1 != cy))
133 		{
134 		  if (n1 < (cy & flag))
135 		    {
136 		      q--;
137 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
138 		    }
139 		  else
140 		    flag = 0;
141 		}
142 	      n1 = np[1];
143 	    }
144 	  else
145 	    {
146 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
147 
148 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
149 
150 	      cy1 = n0 < cy;
151 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
152 	      cy = n1 < cy1;
153 	      n1 -= cy1;
154 	      np[0] = n0;
155 
156 	      if (UNLIKELY (cy != 0))
157 		{
158 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
159 		  q--;
160 		}
161 	    }
162 
163 	  *--qp = q;
164 
165 	  /* Truncate operands.  */
166 	  dn--;
167 	  dp++;
168 	}
169 
170       np--;
171       if (UNLIKELY (n1 >= (d1 & flag)))
172 	{
173 	  q = GMP_NUMB_MASK;
174 	  cy = mpn_submul_1 (np, dp, 2, q);
175 
176 	  if (UNLIKELY (n1 != cy))
177 	    {
178 	      if (n1 < (cy & flag))
179 		{
180 		  q--;
181 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
182 		}
183 	      else
184 		flag = 0;
185 	    }
186 	  n1 = np[1];
187 	}
188       else
189 	{
190 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
191 
192 	  np[0] = n0;
193 	  np[1] = n1;
194 	}
195 
196       *--qp = q;
197     }
198   ASSERT_ALWAYS (np[1] == n1);
199   np += 2;
200 
201 
202   dn = dn_orig;
203   if (UNLIKELY (n1 < (dn & flag)))
204     {
205       mp_limb_t q, x;
206 
207       /* The quotient may be too large if the remainder is small.  Recompute
208 	 for above ignored operand parts, until the remainder spills.
209 
210 	 FIXME: The quality of this code isn't the same as the code above.
211 	 1. We don't compute things in an optimal order, high-to-low, in order
212 	    to terminate as quickly as possible.
213 	 2. We mess with pointers and sizes, adding and subtracting and
214 	    adjusting to get things right.  It surely could be streamlined.
215 	 3. The only termination criteria are that we determine that the
216 	    quotient needs to be adjusted, or that we have recomputed
217 	    everything.  We should stop when the remainder is so large
218 	    that no additional subtracting could make it spill.
219 	 4. If nothing else, we should not do two loops of submul_1 over the
220 	    data, instead handle both the triangularization and chopping at
221 	    once.  */
222 
223       x = n1;
224 
225       if (dn > 2)
226 	{
227 	  /* Compensate for triangularization.  */
228 	  mp_limb_t y;
229 
230 	  dp = dp_orig;
231 	  if (qn + 1 < dn)
232 	    {
233 	      dp += dn - (qn + 1);
234 	      dn = qn + 1;
235 	    }
236 
237 	  y = np[-2];
238 
239 	  for (i = dn - 3; i >= 0; i--)
240 	    {
241 	      q = qp[i];
242 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
243 
244 	      if (y < cy)
245 		{
246 		  if (x == 0)
247 		    {
248 		      cy = mpn_sub_1 (qp, qp, qn, 1);
249 		      ASSERT_ALWAYS (cy == 0);
250 		      return qh - cy;
251 		    }
252 		  x--;
253 		}
254 	      y -= cy;
255 	    }
256 	  np[-2] = y;
257 	}
258 
259       dn = dn_orig;
260       if (qn + 1 < dn)
261 	{
262 	  /* Compensate for ignored dividend and divisor tails.  */
263 
264 	  dp = dp_orig;
265 	  np = np_orig;
266 
267 	  if (qh != 0)
268 	    {
269 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
270 	      if (cy != 0)
271 		{
272 		  if (x == 0)
273 		    {
274 		      if (qn != 0)
275 			cy = mpn_sub_1 (qp, qp, qn, 1);
276 		      return qh - cy;
277 		    }
278 		  x--;
279 		}
280 	    }
281 
282 	  if (qn == 0)
283 	    return qh;
284 
285 	  for (i = dn - qn - 2; i >= 0; i--)
286 	    {
287 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
288 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
289 	      if (cy != 0)
290 		{
291 		  if (x == 0)
292 		    {
293 		      cy = mpn_sub_1 (qp, qp, qn, 1);
294 		      return qh;
295 		    }
296 		  x--;
297 		}
298 	    }
299 	}
300     }
301 
302   return qh;
303 }
304