1 /*-
2 * This file is dual licensed under the MIT and the University of Illinois
3 * Open Source Licenses.
4 *
5 *-
6 * University of Illinois/NCSA
7 * Open Source License
8 *
9 * Copyright (c) 2009-2014 by the contributors listed below
10 *
11 * All rights reserved.
12 *
13 * Developed by:
14 *
15 * LLVM Team
16 *
17 * University of Illinois at Urbana-Champaign
18 *
19 * http://llvm.org
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a
22 * copy of this software and associated documentation files (the "Software"),
23 * to deal with the Software without restriction, including without limitation
24 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
25 * and/or sell copies of the Software, and to permit persons to whom the
26 * Software is furnished to do so, subject to the following conditions:
27 *
28 * * Redistributions of source code must retain the above copyright notice,
29 * this list of conditions and the following disclaimers.
30 *
31 * * Redistributions in binary form must reproduce the above copyright notice,
32 * this list of conditions and the following disclaimers in the
33 * documentation and/or other materials provided with the distribution.
34 *
35 * * Neither the names of the LLVM Team, University of Illinois at
36 * Urbana-Champaign, nor the names of its contributors may be used to
37 * endorse or promote products derived from this Software without specific
38 * prior written permission.
39 *
40 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
45 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
46 * WITH THE SOFTWARE.
47 *
48 *-
49 * Copyright (c) 2009-2014 by the contributors listed below
50 *
51 * Permission is hereby granted, free of charge, to any person obtaining a copy
52 * of this software and associated documentation files (the "Software"), to
53 * deal in the Software without restriction, including without limitation the
54 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
55 * sell copies of the Software, and to permit persons to whom the Software is
56 * furnished to do so, subject to the following conditions:
57 *
58 * The above copyright notice and this permission notice shall be included in
59 * all copies or substantial portions of the Software.
60 *
61 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
62 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
63 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
64 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
65 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
66 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
67 * DEALINGS IN THE SOFTWARE.
68 *
69 * -
70 * List of contributors:
71 *
72 * Craig van Vliet <cvanvliet@auroraux.org>
73 * Edward O'Callaghan <eocallaghan@auroraux.org>
74 * Howard Hinnant <hhinnant@apple.com>
75 * Matt Thomas <matt@NetBSD.org>
76 * Joerg Sonnenberger <joerg@NetBSD.org>
77 */
78
79 #include <machine/limits.h>
80 #include <machine/stdint.h>
81
82 typedef union
83 {
84 _uint128_t all;
85 struct {
86 unsigned long low;
87 unsigned long high;
88 } s;
89 } utwords;
90
91 /* Effects: if rem != 0, *rem = a % b
92 * Returns: a / b
93 */
94
95 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
96
97 static _uint128_t
__udivmodti4(_uint128_t a,_uint128_t b,_uint128_t * rem)98 __udivmodti4(_uint128_t a, _uint128_t b, _uint128_t *rem)
99 {
100 const unsigned n_udword_bits = sizeof(unsigned long) * CHAR_BIT;
101 const unsigned n_utword_bits = sizeof(_uint128_t) * CHAR_BIT;
102 utwords n, d, q, r;
103 unsigned sr;
104
105 n.all = a;
106 d.all = b;
107
108 /* special cases, X is unknown, K != 0 */
109 if (n.s.high == 0) {
110 if (d.s.high == 0) {
111 /* 0 X
112 * ---
113 * 0 X
114 */
115 if (rem)
116 *rem = n.s.low % d.s.low;
117 return n.s.low / d.s.low;
118 }
119 /* 0 X
120 * ---
121 * K X
122 */
123 if (rem)
124 *rem = n.s.low;
125 return 0;
126 }
127 /* n.s.high != 0 */
128 if (d.s.low == 0) {
129 if (d.s.high == 0) {
130 /* K X
131 * ---
132 * 0 0
133 */
134 if (rem)
135 *rem = n.s.high % d.s.low;
136 return n.s.high / d.s.low;
137 }
138 /* d.s.high != 0 */
139 if (n.s.low == 0) {
140 /* K 0
141 * ---
142 * K 0
143 */
144 if (rem) {
145 r.s.high = n.s.high % d.s.high;
146 r.s.low = 0;
147 *rem = r.all;
148 }
149 return n.s.high / d.s.high;
150 }
151 /* K K
152 * ---
153 * K 0
154 */
155 if ((d.s.high & (d.s.high - 1)) == 0) { /* if d is a power of 2 */
156 if (rem) {
157 r.s.low = n.s.low;
158 r.s.high = n.s.high & (d.s.high - 1);
159 *rem = r.all;
160 }
161 return n.s.high >> __builtin_ctzl(d.s.high);
162 }
163 /* K K
164 * ---
165 * K 0
166 */
167 sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high);
168 /* 0 <= sr <= n_udword_bits - 2 or sr large */
169 if (sr > n_udword_bits - 2) {
170 if (rem)
171 *rem = n.all;
172 return 0;
173 }
174 ++sr;
175 /* 1 <= sr <= n_udword_bits - 1 */
176 /* q.all = n.all << (n_utword_bits - sr); */
177 q.s.low = 0;
178 q.s.high = n.s.low << (n_udword_bits - sr);
179 /* r.all = n.all >> sr; */
180 r.s.high = n.s.high >> sr;
181 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
182 } else { /* d.s.low != 0 */
183 if (d.s.high == 0) {
184 /* K X
185 * ---
186 * 0 K
187 */
188 if ((d.s.low & (d.s.low - 1)) == 0) { /* if d is a power of 2 */
189 if (rem)
190 *rem = n.s.low & (d.s.low - 1);
191 if (d.s.low == 1)
192 return n.all;
193 sr = __builtin_ctzl(d.s.low);
194 q.s.high = n.s.high >> sr;
195 q.s.low = (n.s.high << (n_udword_bits - sr)) |
196 (n.s.low >> sr);
197 return q.all;
198 }
199 /* K X
200 * ---
201 * 0 K
202 */
203 sr = 1 + n_udword_bits + __builtin_clzl(d.s.low) -
204 __builtin_clzl(n.s.high);
205 /* 2 <= sr <= n_utword_bits - 1
206 * q.all = n.all << (n_utword_bits - sr);
207 * r.all = n.all >> sr;
208 * if (sr == n_udword_bits)
209 * {
210 * q.s.low = 0;
211 * q.s.high = n.s.low;
212 * r.s.high = 0;
213 * r.s.low = n.s.high;
214 * }
215 * else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1
216 * {
217 * q.s.low = 0;
218 * q.s.high = n.s.low << (n_udword_bits - sr);
219 * r.s.high = n.s.high >> sr;
220 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
221 * }
222 * else // n_udword_bits + 1 <= sr <= n_utword_bits - 1
223 * {
224 * q.s.low = n.s.low << (n_utword_bits - sr);
225 * q.s.high = (n.s.high << (n_utword_bits - sr)) |
226 * (n.s.low >> (sr - n_udword_bits));
227 * r.s.high = 0;
228 * r.s.low = n.s.high >> (sr - n_udword_bits);
229 * }
230 */
231 q.s.low = (n.s.low << (n_utword_bits - sr)) &
232 ((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
233 q.s.high = ((n.s.low << ( n_udword_bits - sr)) &
234 ((long)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
235 (((n.s.high << (n_utword_bits - sr)) |
236 (n.s.low >> (sr - n_udword_bits))) &
237 ((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
238 r.s.high = (n.s.high >> sr) &
239 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
240 r.s.low = ((n.s.high >> (sr - n_udword_bits)) &
241 ((long)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
242 (((n.s.high << (n_udword_bits - sr)) |
243 (n.s.low >> sr)) &
244 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
245 } else {
246 /* K X
247 * ---
248 * K K
249 */
250 sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high);
251 /*0 <= sr <= n_udword_bits - 1 or sr large */
252 if (sr > n_udword_bits - 1) {
253 if (rem)
254 *rem = n.all;
255 return 0;
256 }
257 ++sr;
258 /* 1 <= sr <= n_udword_bits */
259 /* q.all = n.all << (n_utword_bits - sr); */
260 q.s.low = 0;
261 q.s.high = n.s.low << (n_udword_bits - sr);
262 /* r.all = n.all >> sr;
263 * if (sr < n_udword_bits)
264 * {
265 * r.s.high = n.s.high >> sr;
266 * r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
267 * }
268 * else
269 * {
270 * r.s.high = 0;
271 * r.s.low = n.s.high;
272 * }
273 */
274 r.s.high = (n.s.high >> sr) &
275 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
276 r.s.low = (n.s.high << (n_udword_bits - sr)) |
277 ((n.s.low >> sr) &
278 ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
279 }
280 }
281 /* Not a special case
282 * q and r are initialized with:
283 * q.all = n.all << (n_utword_bits - sr);
284 * r.all = n.all >> sr;
285 * 1 <= sr <= n_utword_bits - 1
286 */
287 unsigned carry = 0;
288 for (; sr > 0; --sr) {
289 /* r:q = ((r:q) << 1) | carry */
290 r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
291 r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
292 q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
293 q.s.low = (q.s.low << 1) | carry;
294 /* carry = 0;
295 * if (r.all >= d.all)
296 * {
297 * r.all -= d.all;
298 * carry = 1;
299 * }
300 */
301 const _int128_t s =
302 (_int128_t)(d.all - r.all - 1) >> (n_utword_bits - 1);
303 carry = s & 1;
304 r.all -= d.all & s;
305 }
306 q.all = (q.all << 1) | carry;
307 if (rem)
308 *rem = r.all;
309 return q.all;
310 }
311
312 _uint128_t __udivti3(_uint128_t a, _uint128_t b);
313
314 _uint128_t
__udivti3(_uint128_t a,_uint128_t b)315 __udivti3(_uint128_t a, _uint128_t b)
316 {
317 return __udivmodti4(a, b, 0);
318 }
319