xref: /dragonfly/sys/platform/pc64/x86_64/__udivti3.c (revision 37de577a)
1 /*-
2  * This file is dual licensed under the MIT and the University of Illinois
3  * Open Source Licenses.
4  *
5  *-
6  * University of Illinois/NCSA
7  * Open Source License
8  *
9  * Copyright (c) 2009-2014 by the contributors listed below
10  *
11  * All rights reserved.
12  *
13  * Developed by:
14  *
15  * LLVM Team
16  *
17  * University of Illinois at Urbana-Champaign
18  *
19  * http://llvm.org
20  *
21  * Permission is hereby granted, free of charge, to any person obtaining a
22  * copy of this software and associated documentation files (the "Software"),
23  * to deal with the Software without restriction, including without limitation
24  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
25  * and/or sell copies of the Software, and to permit persons to whom the
26  * Software is furnished to do so, subject to the following conditions:
27  *
28  * * Redistributions of source code must retain the above copyright notice,
29  *   this list of conditions and the following disclaimers.
30  *
31  * * Redistributions in binary form must reproduce the above copyright notice,
32  *   this list of conditions and the following disclaimers in the
33  *   documentation and/or other materials provided with the distribution.
34  *
35  * * Neither the names of the LLVM Team, University of Illinois at
36  *   Urbana-Champaign, nor the names of its contributors may be used to
37  *   endorse or promote products derived from this Software without specific
38  *   prior written permission.
39  *
40  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
43  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
45  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
46  * WITH THE SOFTWARE.
47  *
48  *-
49  * Copyright (c) 2009-2014 by the contributors listed below
50  *
51  * Permission is hereby granted, free of charge, to any person obtaining a copy
52  * of this software and associated documentation files (the "Software"), to
53  * deal in the Software without restriction, including without limitation the
54  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
55  * sell copies of the Software, and to permit persons to whom the Software is
56  * furnished to do so, subject to the following conditions:
57  *
58  * The above copyright notice and this permission notice shall be included in
59  * all copies or substantial portions of the Software.
60  *
61  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
62  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
63  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
64  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
65  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
66  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
67  * DEALINGS IN THE SOFTWARE.
68  *
69  * -
70  * List of contributors:
71  *
72  * Craig van Vliet <cvanvliet@auroraux.org>
73  * Edward O'Callaghan <eocallaghan@auroraux.org>
74  * Howard Hinnant <hhinnant@apple.com>
75  * Matt Thomas <matt@NetBSD.org>
76  * Joerg Sonnenberger <joerg@NetBSD.org>
77  */
78 
79 #include <machine/limits.h>
80 #include <machine/stdint.h>
81 
82 typedef union
83 {
84 	_uint128_t all;
85 	struct {
86 		unsigned long low;
87 		unsigned long high;
88 	} s;
89 } utwords;
90 
91 /* Effects: if rem != 0, *rem = a % b
92  * Returns: a / b
93  */
94 
95 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
96 
97 static _uint128_t
98 __udivmodti4(_uint128_t a, _uint128_t b, _uint128_t *rem)
99 {
100 	const unsigned n_udword_bits = sizeof(unsigned long) * CHAR_BIT;
101 	const unsigned n_utword_bits = sizeof(_uint128_t) * CHAR_BIT;
102 	utwords n, d, q, r;
103 	unsigned sr;
104 
105 	n.all = a;
106 	d.all = b;
107 
108 	/* special cases, X is unknown, K != 0 */
109 	if (n.s.high == 0) {
110 		if (d.s.high == 0) {
111 			/* 0 X
112 			 * ---
113 			 * 0 X
114 			 */
115 			if (rem)
116 				*rem = n.s.low % d.s.low;
117 			return n.s.low / d.s.low;
118 		}
119 		/* 0 X
120 		 * ---
121 		 * K X
122 		 */
123 		if (rem)
124 			*rem = n.s.low;
125 		return 0;
126 	}
127 	/* n.s.high != 0 */
128 	if (d.s.low == 0) {
129 		if (d.s.high == 0) {
130 			/* K X
131 			 * ---
132 			 * 0 0
133 			 */
134 			if (rem)
135 				*rem = n.s.high % d.s.low;
136 			return n.s.high / d.s.low;
137 		}
138 		/* d.s.high != 0 */
139 		if (n.s.low == 0) {
140 			/* K 0
141 			 * ---
142 			 * K 0
143 			 */
144 			if (rem) {
145 				r.s.high = n.s.high % d.s.high;
146 				r.s.low = 0;
147 				*rem = r.all;
148 			}
149 			return n.s.high / d.s.high;
150 		}
151 		/* K K
152 		 * ---
153 		 * K 0
154 		 */
155 		if ((d.s.high & (d.s.high - 1)) == 0) { /* if d is a power of 2 */
156 			if (rem) {
157 				r.s.low = n.s.low;
158 				r.s.high = n.s.high & (d.s.high - 1);
159 				*rem = r.all;
160 			}
161 			return n.s.high >> __builtin_ctzl(d.s.high);
162 		}
163 		/* K K
164 		 * ---
165 		 * K 0
166 		 */
167 		sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high);
168 		/* 0 <= sr <= n_udword_bits - 2 or sr large */
169 		if (sr > n_udword_bits - 2) {
170 			if (rem)
171 				*rem = n.all;
172 			return 0;
173 		}
174 		++sr;
175 		/* 1 <= sr <= n_udword_bits - 1 */
176 		/* q.all = n.all << (n_utword_bits - sr); */
177 		q.s.low = 0;
178 		q.s.high = n.s.low << (n_udword_bits - sr);
179 		/* r.all = n.all >> sr; */
180 		r.s.high = n.s.high >> sr;
181 		r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
182 	} else {  /* d.s.low != 0 */
183 		if (d.s.high == 0) {
184 			/* K X
185 			 * ---
186 			 * 0 K
187 			 */
188 			if ((d.s.low & (d.s.low - 1)) == 0) { /* if d is a power of 2 */
189 				if (rem)
190 					*rem = n.s.low & (d.s.low - 1);
191 				if (d.s.low == 1)
192 					return n.all;
193 				sr = __builtin_ctzl(d.s.low);
194 				q.s.high = n.s.high >> sr;
195 				q.s.low = (n.s.high << (n_udword_bits - sr)) |
196 				    (n.s.low >> sr);
197 				return q.all;
198 			}
199 			/* K X
200 			 * ---
201 			 * 0 K
202 			 */
203 			sr = 1 + n_udword_bits + __builtin_clzl(d.s.low) -
204 			    __builtin_clzl(n.s.high);
205 			/* 2 <= sr <= n_utword_bits - 1
206 			 * q.all = n.all << (n_utword_bits - sr);
207 			 * r.all = n.all >> sr;
208 			 * if (sr == n_udword_bits)
209 			 * {
210 			 *     q.s.low = 0;
211 			 *     q.s.high = n.s.low;
212 			 *     r.s.high = 0;
213 			 *     r.s.low = n.s.high;
214 			 * }
215 			 * else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
216 			 * {
217 			 *     q.s.low = 0;
218 			 *     q.s.high = n.s.low << (n_udword_bits - sr);
219 			 *     r.s.high = n.s.high >> sr;
220 			 *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
221 			 * }
222 			 * else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
223 			 * {
224 			 *     q.s.low = n.s.low << (n_utword_bits - sr);
225 			 *     q.s.high = (n.s.high << (n_utword_bits - sr)) |
226 			 *              (n.s.low >> (sr - n_udword_bits));
227 			 *     r.s.high = 0;
228 			 *     r.s.low = n.s.high >> (sr - n_udword_bits);
229 			 * }
230 			 */
231 			q.s.low =  (n.s.low << (n_utword_bits - sr)) &
232 			    ((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
233 			q.s.high = ((n.s.low << ( n_udword_bits - sr)) &
234 			    ((long)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
235 			    (((n.s.high << (n_utword_bits - sr)) |
236 				(n.s.low >> (sr - n_udword_bits))) &
237 				((long)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
238 			r.s.high = (n.s.high >> sr) &
239 			    ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
240 			r.s.low =  ((n.s.high >> (sr - n_udword_bits)) &
241 			    ((long)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
242 			    (((n.s.high << (n_udword_bits - sr)) |
243 				(n.s.low >> sr)) &
244 				((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
245 		} else {
246 			/* K X
247 			 * ---
248 			 * K K
249 			 */
250 			sr = __builtin_clzl(d.s.high) - __builtin_clzl(n.s.high);
251 			/*0 <= sr <= n_udword_bits - 1 or sr large */
252 			if (sr > n_udword_bits - 1) {
253 				if (rem)
254 					*rem = n.all;
255 				return 0;
256 			}
257 			++sr;
258 			/* 1 <= sr <= n_udword_bits */
259 			/* q.all = n.all << (n_utword_bits - sr); */
260 			q.s.low = 0;
261 			q.s.high = n.s.low << (n_udword_bits - sr);
262 			/* r.all = n.all >> sr;
263 			 * if (sr < n_udword_bits)
264 			 * {
265 			 *     r.s.high = n.s.high >> sr;
266 			 *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
267 			 * }
268 			 * else
269 			 * {
270 			 *     r.s.high = 0;
271 			 *     r.s.low = n.s.high;
272 			 * }
273 			 */
274 			r.s.high = (n.s.high >> sr) &
275 			    ((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
276 			r.s.low = (n.s.high << (n_udword_bits - sr)) |
277 			    ((n.s.low >> sr) &
278 				((long)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
279 		}
280 	}
281 	/* Not a special case
282 	 * q and r are initialized with:
283 	 * q.all = n.all << (n_utword_bits - sr);
284 	 * r.all = n.all >> sr;
285 	 * 1 <= sr <= n_utword_bits - 1
286 	 */
287 	unsigned carry = 0;
288 	for (; sr > 0; --sr) {
289 		/* r:q = ((r:q)  << 1) | carry */
290 		r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
291 		r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
292 		q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
293 		q.s.low  = (q.s.low  << 1) | carry;
294 		/* carry = 0;
295 		 * if (r.all >= d.all)
296 		 * {
297 		 *     r.all -= d.all;
298 		 *      carry = 1;
299 		 * }
300 		 */
301 		const _int128_t s =
302 		    (_int128_t)(d.all - r.all - 1) >> (n_utword_bits - 1);
303 		carry = s & 1;
304 		r.all -= d.all & s;
305 	}
306 	q.all = (q.all << 1) | carry;
307 	if (rem)
308 		*rem = r.all;
309 	return q.all;
310 }
311 
312 _uint128_t __udivti3(_uint128_t a, _uint128_t b);
313 
314 _uint128_t
315 __udivti3(_uint128_t a, _uint128_t b)
316 {
317 	return __udivmodti4(a, b, 0);
318 }
319