1 /* mpn_sbpi1_div_q -- Schoolbook division using the M�ller-Granlund 3/2
2    division algorithm.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 
10 Copyright 2007, 2009 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18 
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22 License for more details.
23 
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
26 
27 
28 #include "gmp.h"
29 #include "gmp-impl.h"
30 #include "longlong.h"
31 
32 mp_limb_t
33 mpn_sbpi1_div_q (mp_ptr qp,
34 		 mp_ptr np, mp_size_t nn,
35 		 mp_srcptr dp, mp_size_t dn,
36 		 mp_limb_t dinv)
37 {
38   mp_limb_t qh;
39   mp_size_t qn, i;
40   mp_limb_t n1, n0;
41   mp_limb_t d1, d0;
42   mp_limb_t cy, cy1;
43   mp_limb_t q;
44   mp_limb_t flag;
45 
46   mp_size_t dn_orig = dn;
47   mp_srcptr dp_orig = dp;
48   mp_ptr np_orig = np;
49 
50   ASSERT (dn > 2);
51   ASSERT (nn >= dn);
52   ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
53 
54   np += nn;
55 
56   qn = nn - dn;
57   if (qn + 1 < dn)
58     {
59       dp += dn - (qn + 1);
60       dn = qn + 1;
61     }
62 
63   qh = mpn_cmp (np - dn, dp, dn) >= 0;
64   if (qh != 0)
65     mpn_sub_n (np - dn, np - dn, dp, dn);
66 
67   qp += qn;
68 
69   dn -= 2;			/* offset dn by 2 for main division loops,
70 				   saving two iterations in mpn_submul_1.  */
71   d1 = dp[dn + 1];
72   d0 = dp[dn + 0];
73 
74   np -= 2;
75 
76   n1 = np[1];
77 
78   for (i = qn - (dn + 2); i >= 0; i--)
79     {
80       np--;
81       if (UNLIKELY (n1 == d1) && np[1] == d0)
82 	{
83 	  q = GMP_NUMB_MASK;
84 	  mpn_submul_1 (np - dn, dp, dn + 2, q);
85 	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
86 	}
87       else
88 	{
89 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
90 
91 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
92 
93 	  cy1 = n0 < cy;
94 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
95 	  cy = n1 < cy1;
96 	  n1 -= cy1;
97 	  np[0] = n0;
98 
99 	  if (UNLIKELY (cy != 0))
100 	    {
101 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
102 	      q--;
103 	    }
104 	}
105 
106       *--qp = q;
107     }
108 
109   flag = ~CNST_LIMB(0);
110 
111   if (dn >= 0)
112     {
113       for (i = dn; i > 0; i--)
114 	{
115 	  np--;
116 	  if (UNLIKELY (n1 >= (d1 & flag)))
117 	    {
118 	      q = GMP_NUMB_MASK;
119 	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
120 
121 	      if (UNLIKELY (n1 != cy))
122 		{
123 		  if (n1 < (cy & flag))
124 		    {
125 		      q--;
126 		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
127 		    }
128 		  else
129 		    flag = 0;
130 		}
131 	      n1 = np[1];
132 	    }
133 	  else
134 	    {
135 	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
136 
137 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
138 
139 	      cy1 = n0 < cy;
140 	      n0 = (n0 - cy) & GMP_NUMB_MASK;
141 	      cy = n1 < cy1;
142 	      n1 -= cy1;
143 	      np[0] = n0;
144 
145 	      if (UNLIKELY (cy != 0))
146 		{
147 		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
148 		  q--;
149 		}
150 	    }
151 
152 	  *--qp = q;
153 
154 	  /* Truncate operands.  */
155 	  dn--;
156 	  dp++;
157 	}
158 
159       np--;
160       if (UNLIKELY (n1 >= (d1 & flag)))
161 	{
162 	  q = GMP_NUMB_MASK;
163 	  cy = mpn_submul_1 (np, dp, 2, q);
164 
165 	  if (UNLIKELY (n1 != cy))
166 	    {
167 	      if (n1 < (cy & flag))
168 		{
169 		  q--;
170 		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
171 		}
172 	      else
173 		flag = 0;
174 	    }
175 	  n1 = np[1];
176 	}
177       else
178 	{
179 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
180 
181 	  np[0] = n0;
182 	  np[1] = n1;
183 	}
184 
185       *--qp = q;
186     }
187   ASSERT_ALWAYS (np[1] == n1);
188   np += 2;
189 
190 
191   dn = dn_orig;
192   if (UNLIKELY (n1 < (dn & flag)))
193     {
194       mp_limb_t q, x;
195 
196       /* The quotient may be too large if the remainder is small.  Recompute
197 	 for above ignored operand parts, until the remainder spills.
198 
199 	 FIXME: The quality of this code isn't the same as the code above.
200 	 1. We don't compute things in an optimal order, high-to-low, in order
201 	    to terminate as quickly as possible.
202 	 2. We mess with pointers and sizes, adding and subtracting and
203 	    adjusting to get things right.  It surely could be streamlined.
204 	 3. The only termination criteria are that we determine that the
205 	    quotient needs to be adjusted, or that we have recomputed
206 	    everything.  We should stop when the remainder is so large
207 	    that no additional subtracting could make it spill.
208 	 4. If nothing else, we should not do two loops of submul_1 over the
209 	    data, instead handle both the triangularization and chopping at
210 	    once.  */
211 
212       x = n1;
213 
214       if (dn > 2)
215 	{
216 	  /* Compensate for triangularization.  */
217 	  mp_limb_t y;
218 
219 	  dp = dp_orig;
220 	  if (qn + 1 < dn)
221 	    {
222 	      dp += dn - (qn + 1);
223 	      dn = qn + 1;
224 	    }
225 
226 	  y = np[-2];
227 
228 	  for (i = dn - 3; i >= 0; i--)
229 	    {
230 	      q = qp[i];
231 	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
232 
233 	      if (y < cy)
234 		{
235 		  if (x == 0)
236 		    {
237 		      cy = mpn_sub_1 (qp, qp, qn, 1);
238 		      ASSERT_ALWAYS (cy == 0);
239 		      return qh - cy;
240 		    }
241 		  x--;
242 		}
243 	      y -= cy;
244 	    }
245 	  np[-2] = y;
246 	}
247 
248       dn = dn_orig;
249       if (qn + 1 < dn)
250 	{
251 	  /* Compensate for ignored dividend and divisor tails.  */
252 
253 	  dp = dp_orig;
254 	  np = np_orig;
255 
256 	  if (qh != 0)
257 	    {
258 	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
259 	      if (cy != 0)
260 		{
261 		  if (x == 0)
262 		    {
263 		      if (qn != 0)
264 			cy = mpn_sub_1 (qp, qp, qn, 1);
265 		      return qh - cy;
266 		    }
267 		  x--;
268 		}
269 	    }
270 
271 	  if (qn == 0)
272 	    return qh;
273 
274 	  for (i = dn - qn - 2; i >= 0; i--)
275 	    {
276 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
277 	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
278 	      if (cy != 0)
279 		{
280 		  if (x == 0)
281 		    {
282 		      cy = mpn_sub_1 (qp, qp, qn, 1);
283 		      return qh;
284 		    }
285 		  x--;
286 		}
287 	    }
288 	}
289     }
290 
291   return qh;
292 }
293