1 /* cairo - a vector graphics library with display and print output
2  *
3  * Copyright © 2004 Keith Packard
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it either under the terms of the GNU Lesser General Public
7  * License version 2.1 as published by the Free Software Foundation
8  * (the "LGPL") or, at your option, under the terms of the Mozilla
9  * Public License Version 1.1 (the "MPL"). If you do not alter this
10  * notice, a recipient may use your version of this file under either
11  * the MPL or the LGPL.
12  *
13  * You should have received a copy of the LGPL along with this library
14  * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16  * You should have received a copy of the MPL along with this library
17  * in the file COPYING-MPL-1.1
18  *
19  * The contents of this file are subject to the Mozilla Public License
20  * Version 1.1 (the "License"); you may not use this file except in
21  * compliance with the License. You may obtain a copy of the License at
22  * http://www.mozilla.org/MPL/
23  *
24  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25  * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26  * the specific language governing rights and limitations.
27  *
28  * The Original Code is the cairo graphics library.
29  *
30  * The Initial Developer of the Original Code is Keith Packard
31  *
32  * Contributor(s):
33  *	Keith R. Packard <keithp@keithp.com>
34  */
35 
36 #include "cairoint.h"
37 
38 #if HAVE_UINT64_T
39 
40 #define uint64_lo32(i)	((i) & 0xffffffff)
41 #define uint64_hi32(i)	((i) >> 32)
42 #define uint64_lo(i)	((i) & 0xffffffff)
43 #define uint64_hi(i)	((i) >> 32)
44 #define uint64_shift32(i)   ((i) << 32)
45 #define uint64_carry32	(((uint64_t) 1) << 32)
46 
47 #define _cairo_uint32s_to_uint64(h,l) ((uint64_t) (h) << 32 | (l))
48 
49 #else
50 
51 #define uint64_lo32(i)	((i).lo)
52 #define uint64_hi32(i)	((i).hi)
53 
54 static cairo_uint64_t
uint64_lo(cairo_uint64_t i)55 uint64_lo (cairo_uint64_t i)
56 {
57     cairo_uint64_t  s;
58 
59     s.lo = i.lo;
60     s.hi = 0;
61     return s;
62 }
63 
64 static cairo_uint64_t
uint64_hi(cairo_uint64_t i)65 uint64_hi (cairo_uint64_t i)
66 {
67     cairo_uint64_t  s;
68 
69     s.lo = i.hi;
70     s.hi = 0;
71     return s;
72 }
73 
74 static cairo_uint64_t
uint64_shift32(cairo_uint64_t i)75 uint64_shift32 (cairo_uint64_t i)
76 {
77     cairo_uint64_t  s;
78 
79     s.lo = 0;
80     s.hi = i.lo;
81     return s;
82 }
83 
84 static const cairo_uint64_t uint64_carry32 = { 0, 1 };
85 
86 cairo_uint64_t
_cairo_uint32_to_uint64(uint32_t i)87 _cairo_uint32_to_uint64 (uint32_t i)
88 {
89     cairo_uint64_t	q;
90 
91     q.lo = i;
92     q.hi = 0;
93     return q;
94 }
95 
96 cairo_int64_t
_cairo_int32_to_int64(int32_t i)97 _cairo_int32_to_int64 (int32_t i)
98 {
99     cairo_uint64_t	q;
100 
101     q.lo = i;
102     q.hi = i < 0 ? -1 : 0;
103     return q;
104 }
105 
106 static cairo_uint64_t
_cairo_uint32s_to_uint64(uint32_t h,uint32_t l)107 _cairo_uint32s_to_uint64 (uint32_t h, uint32_t l)
108 {
109     cairo_uint64_t	q;
110 
111     q.lo = l;
112     q.hi = h;
113     return q;
114 }
115 
116 cairo_uint64_t
_cairo_uint64_add(cairo_uint64_t a,cairo_uint64_t b)117 _cairo_uint64_add (cairo_uint64_t a, cairo_uint64_t b)
118 {
119     cairo_uint64_t	s;
120 
121     s.hi = a.hi + b.hi;
122     s.lo = a.lo + b.lo;
123     if (s.lo < a.lo)
124 	s.hi++;
125     return s;
126 }
127 
128 cairo_uint64_t
_cairo_uint64_sub(cairo_uint64_t a,cairo_uint64_t b)129 _cairo_uint64_sub (cairo_uint64_t a, cairo_uint64_t b)
130 {
131     cairo_uint64_t	s;
132 
133     s.hi = a.hi - b.hi;
134     s.lo = a.lo - b.lo;
135     if (s.lo > a.lo)
136 	s.hi--;
137     return s;
138 }
139 
140 #define uint32_lo(i)	((i) & 0xffff)
141 #define uint32_hi(i)	((i) >> 16)
142 #define uint32_carry16	((1) << 16)
143 
144 cairo_uint64_t
_cairo_uint32x32_64_mul(uint32_t a,uint32_t b)145 _cairo_uint32x32_64_mul (uint32_t a, uint32_t b)
146 {
147     cairo_uint64_t  s;
148 
149     uint16_t	ah, al, bh, bl;
150     uint32_t	r0, r1, r2, r3;
151 
152     al = uint32_lo (a);
153     ah = uint32_hi (a);
154     bl = uint32_lo (b);
155     bh = uint32_hi (b);
156 
157     r0 = (uint32_t) al * bl;
158     r1 = (uint32_t) al * bh;
159     r2 = (uint32_t) ah * bl;
160     r3 = (uint32_t) ah * bh;
161 
162     r1 += uint32_hi(r0);    /* no carry possible */
163     r1 += r2;		    /* but this can carry */
164     if (r1 < r2)	    /* check */
165 	r3 += uint32_carry16;
166 
167     s.hi = r3 + uint32_hi(r1);
168     s.lo = (uint32_lo (r1) << 16) + uint32_lo (r0);
169     return s;
170 }
171 
172 cairo_int64_t
_cairo_int32x32_64_mul(int32_t a,int32_t b)173 _cairo_int32x32_64_mul (int32_t a, int32_t b)
174 {
175     cairo_int64_t s;
176     s = _cairo_uint32x32_64_mul ((uint32_t) a, (uint32_t) b);
177     if (a < 0)
178 	s.hi -= b;
179     if (b < 0)
180 	s.hi -= a;
181     return s;
182 }
183 
184 cairo_uint64_t
_cairo_uint64_mul(cairo_uint64_t a,cairo_uint64_t b)185 _cairo_uint64_mul (cairo_uint64_t a, cairo_uint64_t b)
186 {
187     cairo_uint64_t	s;
188 
189     s = _cairo_uint32x32_64_mul (a.lo, b.lo);
190     s.hi += a.lo * b.hi + a.hi * b.lo;
191     return s;
192 }
193 
194 cairo_uint64_t
_cairo_uint64_lsl(cairo_uint64_t a,int shift)195 _cairo_uint64_lsl (cairo_uint64_t a, int shift)
196 {
197     if (shift >= 32)
198     {
199 	a.hi = a.lo;
200 	a.lo = 0;
201 	shift -= 32;
202     }
203     if (shift)
204     {
205 	a.hi = a.hi << shift | a.lo >> (32 - shift);
206 	a.lo = a.lo << shift;
207     }
208     return a;
209 }
210 
211 cairo_uint64_t
_cairo_uint64_rsl(cairo_uint64_t a,int shift)212 _cairo_uint64_rsl (cairo_uint64_t a, int shift)
213 {
214     if (shift >= 32)
215     {
216 	a.lo = a.hi;
217 	a.hi = 0;
218 	shift -= 32;
219     }
220     if (shift)
221     {
222 	a.lo = a.lo >> shift | a.hi << (32 - shift);
223 	a.hi = a.hi >> shift;
224     }
225     return a;
226 }
227 
228 #define _cairo_uint32_rsa(a,n)	((uint32_t) (((int32_t) (a)) >> (n)))
229 
230 cairo_int64_t
_cairo_uint64_rsa(cairo_int64_t a,int shift)231 _cairo_uint64_rsa (cairo_int64_t a, int shift)
232 {
233     if (shift >= 32)
234     {
235 	a.lo = a.hi;
236 	a.hi = _cairo_uint32_rsa (a.hi, 31);
237 	shift -= 32;
238     }
239     if (shift)
240     {
241 	a.lo = a.lo >> shift | a.hi << (32 - shift);
242 	a.hi = _cairo_uint32_rsa (a.hi, shift);
243     }
244     return a;
245 }
246 
247 int
_cairo_uint64_lt(cairo_uint64_t a,cairo_uint64_t b)248 _cairo_uint64_lt (cairo_uint64_t a, cairo_uint64_t b)
249 {
250     return (a.hi < b.hi ||
251 	    (a.hi == b.hi && a.lo < b.lo));
252 }
253 
254 int
_cairo_uint64_eq(cairo_uint64_t a,cairo_uint64_t b)255 _cairo_uint64_eq (cairo_uint64_t a, cairo_uint64_t b)
256 {
257     return a.hi == b.hi && a.lo == b.lo;
258 }
259 
260 int
_cairo_int64_lt(cairo_int64_t a,cairo_int64_t b)261 _cairo_int64_lt (cairo_int64_t a, cairo_int64_t b)
262 {
263     if (_cairo_int64_negative (a) && !_cairo_int64_negative (b))
264 	return 1;
265     if (!_cairo_int64_negative (a) && _cairo_int64_negative (b))
266 	return 0;
267     return _cairo_uint64_lt (a, b);
268 }
269 
270 int
_cairo_uint64_cmp(cairo_uint64_t a,cairo_uint64_t b)271 _cairo_uint64_cmp (cairo_uint64_t a, cairo_uint64_t b)
272 {
273     if (a.hi < b.hi)
274 	return -1;
275     else if (a.hi > b.hi)
276 	return 1;
277     else if (a.lo < b.lo)
278 	return -1;
279     else if (a.lo > b.lo)
280 	return 1;
281     else
282 	return 0;
283 }
284 
285 int
_cairo_int64_cmp(cairo_int64_t a,cairo_int64_t b)286 _cairo_int64_cmp (cairo_int64_t a, cairo_int64_t b)
287 {
288     if (_cairo_int64_negative (a) && !_cairo_int64_negative (b))
289 	return -1;
290     if (!_cairo_int64_negative (a) && _cairo_int64_negative (b))
291 	return 1;
292 
293     return _cairo_uint64_cmp (a, b);
294 }
295 
296 cairo_uint64_t
_cairo_uint64_not(cairo_uint64_t a)297 _cairo_uint64_not (cairo_uint64_t a)
298 {
299     a.lo = ~a.lo;
300     a.hi = ~a.hi;
301     return a;
302 }
303 
304 cairo_uint64_t
_cairo_uint64_negate(cairo_uint64_t a)305 _cairo_uint64_negate (cairo_uint64_t a)
306 {
307     a.lo = ~a.lo;
308     a.hi = ~a.hi;
309     if (++a.lo == 0)
310 	++a.hi;
311     return a;
312 }
313 
314 /*
315  * Simple bit-at-a-time divide.
316  */
317 cairo_uquorem64_t
_cairo_uint64_divrem(cairo_uint64_t num,cairo_uint64_t den)318 _cairo_uint64_divrem (cairo_uint64_t num, cairo_uint64_t den)
319 {
320     cairo_uquorem64_t	qr;
321     cairo_uint64_t	bit;
322     cairo_uint64_t	quo;
323 
324     bit = _cairo_uint32_to_uint64 (1);
325 
326     /* normalize to make den >= num, but not overflow */
327     while (_cairo_uint64_lt (den, num) && (den.hi & 0x80000000) == 0)
328     {
329 	bit = _cairo_uint64_lsl (bit, 1);
330 	den = _cairo_uint64_lsl (den, 1);
331     }
332     quo = _cairo_uint32_to_uint64 (0);
333 
334     /* generate quotient, one bit at a time */
335     while (bit.hi | bit.lo)
336     {
337 	if (_cairo_uint64_le (den, num))
338 	{
339 	    num = _cairo_uint64_sub (num, den);
340 	    quo = _cairo_uint64_add (quo, bit);
341 	}
342 	bit = _cairo_uint64_rsl (bit, 1);
343 	den = _cairo_uint64_rsl (den, 1);
344     }
345     qr.quo = quo;
346     qr.rem = num;
347     return qr;
348 }
349 
350 #endif /* !HAVE_UINT64_T */
351 
352 #if HAVE_UINT128_T
353 cairo_uquorem128_t
_cairo_uint128_divrem(cairo_uint128_t num,cairo_uint128_t den)354 _cairo_uint128_divrem (cairo_uint128_t num, cairo_uint128_t den)
355 {
356     cairo_uquorem128_t	qr;
357 
358     qr.quo = num / den;
359     qr.rem = num % den;
360     return qr;
361 }
362 
363 #else
364 
365 cairo_uint128_t
_cairo_uint32_to_uint128(uint32_t i)366 _cairo_uint32_to_uint128 (uint32_t i)
367 {
368     cairo_uint128_t	q;
369 
370     q.lo = _cairo_uint32_to_uint64 (i);
371     q.hi = _cairo_uint32_to_uint64 (0);
372     return q;
373 }
374 
375 cairo_int128_t
_cairo_int32_to_int128(int32_t i)376 _cairo_int32_to_int128 (int32_t i)
377 {
378     cairo_int128_t	q;
379 
380     q.lo = _cairo_int32_to_int64 (i);
381     q.hi = _cairo_int32_to_int64 (i < 0 ? -1 : 0);
382     return q;
383 }
384 
385 cairo_uint128_t
_cairo_uint64_to_uint128(cairo_uint64_t i)386 _cairo_uint64_to_uint128 (cairo_uint64_t i)
387 {
388     cairo_uint128_t	q;
389 
390     q.lo = i;
391     q.hi = _cairo_uint32_to_uint64 (0);
392     return q;
393 }
394 
395 cairo_int128_t
_cairo_int64_to_int128(cairo_int64_t i)396 _cairo_int64_to_int128 (cairo_int64_t i)
397 {
398     cairo_int128_t	q;
399 
400     q.lo = i;
401     q.hi = _cairo_int32_to_int64 (_cairo_int64_negative(i) ? -1 : 0);
402     return q;
403 }
404 
405 cairo_uint128_t
_cairo_uint128_add(cairo_uint128_t a,cairo_uint128_t b)406 _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b)
407 {
408     cairo_uint128_t	s;
409 
410     s.hi = _cairo_uint64_add (a.hi, b.hi);
411     s.lo = _cairo_uint64_add (a.lo, b.lo);
412     if (_cairo_uint64_lt (s.lo, a.lo))
413 	s.hi = _cairo_uint64_add (s.hi, _cairo_uint32_to_uint64 (1));
414     return s;
415 }
416 
417 cairo_uint128_t
_cairo_uint128_sub(cairo_uint128_t a,cairo_uint128_t b)418 _cairo_uint128_sub (cairo_uint128_t a, cairo_uint128_t b)
419 {
420     cairo_uint128_t	s;
421 
422     s.hi = _cairo_uint64_sub (a.hi, b.hi);
423     s.lo = _cairo_uint64_sub (a.lo, b.lo);
424     if (_cairo_uint64_gt (s.lo, a.lo))
425 	s.hi = _cairo_uint64_sub (s.hi, _cairo_uint32_to_uint64(1));
426     return s;
427 }
428 
429 cairo_uint128_t
_cairo_uint64x64_128_mul(cairo_uint64_t a,cairo_uint64_t b)430 _cairo_uint64x64_128_mul (cairo_uint64_t a, cairo_uint64_t b)
431 {
432     cairo_uint128_t	s;
433     uint32_t		ah, al, bh, bl;
434     cairo_uint64_t	r0, r1, r2, r3;
435 
436     al = uint64_lo32 (a);
437     ah = uint64_hi32 (a);
438     bl = uint64_lo32 (b);
439     bh = uint64_hi32 (b);
440 
441     r0 = _cairo_uint32x32_64_mul (al, bl);
442     r1 = _cairo_uint32x32_64_mul (al, bh);
443     r2 = _cairo_uint32x32_64_mul (ah, bl);
444     r3 = _cairo_uint32x32_64_mul (ah, bh);
445 
446     r1 = _cairo_uint64_add (r1, uint64_hi (r0));    /* no carry possible */
447     r1 = _cairo_uint64_add (r1, r2);	    	    /* but this can carry */
448     if (_cairo_uint64_lt (r1, r2))		    /* check */
449 	r3 = _cairo_uint64_add (r3, uint64_carry32);
450 
451     s.hi = _cairo_uint64_add (r3, uint64_hi(r1));
452     s.lo = _cairo_uint64_add (uint64_shift32 (r1),
453 				uint64_lo (r0));
454     return s;
455 }
456 
457 cairo_int128_t
_cairo_int64x64_128_mul(cairo_int64_t a,cairo_int64_t b)458 _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b)
459 {
460     cairo_int128_t  s;
461     s = _cairo_uint64x64_128_mul (_cairo_int64_to_uint64(a),
462 				  _cairo_int64_to_uint64(b));
463     if (_cairo_int64_negative (a))
464 	s.hi = _cairo_uint64_sub (s.hi,
465 				  _cairo_int64_to_uint64 (b));
466     if (_cairo_int64_negative (b))
467 	s.hi = _cairo_uint64_sub (s.hi,
468 				  _cairo_int64_to_uint64 (a));
469     return s;
470 }
471 
472 cairo_uint128_t
_cairo_uint128_mul(cairo_uint128_t a,cairo_uint128_t b)473 _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b)
474 {
475     cairo_uint128_t	s;
476 
477     s = _cairo_uint64x64_128_mul (a.lo, b.lo);
478     s.hi = _cairo_uint64_add (s.hi,
479 				_cairo_uint64_mul (a.lo, b.hi));
480     s.hi = _cairo_uint64_add (s.hi,
481 				_cairo_uint64_mul (a.hi, b.lo));
482     return s;
483 }
484 
485 cairo_uint128_t
_cairo_uint128_lsl(cairo_uint128_t a,int shift)486 _cairo_uint128_lsl (cairo_uint128_t a, int shift)
487 {
488     if (shift >= 64)
489     {
490 	a.hi = a.lo;
491 	a.lo = _cairo_uint32_to_uint64 (0);
492 	shift -= 64;
493     }
494     if (shift)
495     {
496 	a.hi = _cairo_uint64_add (_cairo_uint64_lsl (a.hi, shift),
497 				    _cairo_uint64_rsl (a.lo, (64 - shift)));
498 	a.lo = _cairo_uint64_lsl (a.lo, shift);
499     }
500     return a;
501 }
502 
503 cairo_uint128_t
_cairo_uint128_rsl(cairo_uint128_t a,int shift)504 _cairo_uint128_rsl (cairo_uint128_t a, int shift)
505 {
506     if (shift >= 64)
507     {
508 	a.lo = a.hi;
509 	a.hi = _cairo_uint32_to_uint64 (0);
510 	shift -= 64;
511     }
512     if (shift)
513     {
514 	a.lo = _cairo_uint64_add (_cairo_uint64_rsl (a.lo, shift),
515 				    _cairo_uint64_lsl (a.hi, (64 - shift)));
516 	a.hi = _cairo_uint64_rsl (a.hi, shift);
517     }
518     return a;
519 }
520 
521 cairo_uint128_t
_cairo_uint128_rsa(cairo_int128_t a,int shift)522 _cairo_uint128_rsa (cairo_int128_t a, int shift)
523 {
524     if (shift >= 64)
525     {
526 	a.lo = a.hi;
527 	a.hi = _cairo_uint64_rsa (a.hi, 64-1);
528 	shift -= 64;
529     }
530     if (shift)
531     {
532 	a.lo = _cairo_uint64_add (_cairo_uint64_rsl (a.lo, shift),
533 				    _cairo_uint64_lsl (a.hi, (64 - shift)));
534 	a.hi = _cairo_uint64_rsa (a.hi, shift);
535     }
536     return a;
537 }
538 
539 int
_cairo_uint128_lt(cairo_uint128_t a,cairo_uint128_t b)540 _cairo_uint128_lt (cairo_uint128_t a, cairo_uint128_t b)
541 {
542     return (_cairo_uint64_lt (a.hi, b.hi) ||
543 	    (_cairo_uint64_eq (a.hi, b.hi) &&
544 	     _cairo_uint64_lt (a.lo, b.lo)));
545 }
546 
547 int
_cairo_int128_lt(cairo_int128_t a,cairo_int128_t b)548 _cairo_int128_lt (cairo_int128_t a, cairo_int128_t b)
549 {
550     if (_cairo_int128_negative (a) && !_cairo_int128_negative (b))
551 	return 1;
552     if (!_cairo_int128_negative (a) && _cairo_int128_negative (b))
553 	return 0;
554     return _cairo_uint128_lt (a, b);
555 }
556 
557 int
_cairo_uint128_cmp(cairo_uint128_t a,cairo_uint128_t b)558 _cairo_uint128_cmp (cairo_uint128_t a, cairo_uint128_t b)
559 {
560     int cmp;
561 
562     cmp = _cairo_uint64_cmp (a.hi, b.hi);
563     if (cmp)
564 	return cmp;
565     return _cairo_uint64_cmp (a.lo, b.lo);
566 }
567 
568 int
_cairo_int128_cmp(cairo_int128_t a,cairo_int128_t b)569 _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b)
570 {
571     if (_cairo_int128_negative (a) && !_cairo_int128_negative (b))
572 	return -1;
573     if (!_cairo_int128_negative (a) && _cairo_int128_negative (b))
574 	return 1;
575 
576     return _cairo_uint128_cmp (a, b);
577 }
578 
579 int
_cairo_uint128_eq(cairo_uint128_t a,cairo_uint128_t b)580 _cairo_uint128_eq (cairo_uint128_t a, cairo_uint128_t b)
581 {
582     return (_cairo_uint64_eq (a.hi, b.hi) &&
583 	    _cairo_uint64_eq (a.lo, b.lo));
584 }
585 
586 #if HAVE_UINT64_T
587 #define _cairo_msbset64(q)  (q & ((uint64_t) 1 << 63))
588 #else
589 #define _cairo_msbset64(q)  (q.hi & ((uint32_t) 1 << 31))
590 #endif
591 
592 cairo_uquorem128_t
_cairo_uint128_divrem(cairo_uint128_t num,cairo_uint128_t den)593 _cairo_uint128_divrem (cairo_uint128_t num, cairo_uint128_t den)
594 {
595     cairo_uquorem128_t	qr;
596     cairo_uint128_t	bit;
597     cairo_uint128_t	quo;
598 
599     bit = _cairo_uint32_to_uint128 (1);
600 
601     /* normalize to make den >= num, but not overflow */
602     while (_cairo_uint128_lt (den, num) && !_cairo_msbset64(den.hi))
603     {
604 	bit = _cairo_uint128_lsl (bit, 1);
605 	den = _cairo_uint128_lsl (den, 1);
606     }
607     quo = _cairo_uint32_to_uint128 (0);
608 
609     /* generate quotient, one bit at a time */
610     while (_cairo_uint128_ne (bit, _cairo_uint32_to_uint128(0)))
611     {
612 	if (_cairo_uint128_le (den, num))
613 	{
614 	    num = _cairo_uint128_sub (num, den);
615 	    quo = _cairo_uint128_add (quo, bit);
616 	}
617 	bit = _cairo_uint128_rsl (bit, 1);
618 	den = _cairo_uint128_rsl (den, 1);
619     }
620     qr.quo = quo;
621     qr.rem = num;
622     return qr;
623 }
624 
625 cairo_int128_t
_cairo_int128_negate(cairo_int128_t a)626 _cairo_int128_negate (cairo_int128_t a)
627 {
628     a.lo = _cairo_uint64_not (a.lo);
629     a.hi = _cairo_uint64_not (a.hi);
630     return _cairo_uint128_add (a, _cairo_uint32_to_uint128 (1));
631 }
632 
633 cairo_int128_t
_cairo_int128_not(cairo_int128_t a)634 _cairo_int128_not (cairo_int128_t a)
635 {
636     a.lo = _cairo_uint64_not (a.lo);
637     a.hi = _cairo_uint64_not (a.hi);
638     return a;
639 }
640 
641 #endif /* !HAVE_UINT128_T */
642 
643 cairo_quorem128_t
_cairo_int128_divrem(cairo_int128_t num,cairo_int128_t den)644 _cairo_int128_divrem (cairo_int128_t num, cairo_int128_t den)
645 {
646     int			num_neg = _cairo_int128_negative (num);
647     int			den_neg = _cairo_int128_negative (den);
648     cairo_uquorem128_t	uqr;
649     cairo_quorem128_t	qr;
650 
651     if (num_neg)
652 	num = _cairo_int128_negate (num);
653     if (den_neg)
654 	den = _cairo_int128_negate (den);
655     uqr = _cairo_uint128_divrem (num, den);
656     if (num_neg)
657 	qr.rem = _cairo_int128_negate (uqr.rem);
658     else
659 	qr.rem = uqr.rem;
660     if (num_neg != den_neg)
661 	qr.quo = _cairo_int128_negate (uqr.quo);
662     else
663 	qr.quo = uqr.quo;
664     return qr;
665 }
666 
667 /**
668  * _cairo_uint_96by64_32x64_divrem:
669  *
670  * Compute a 32 bit quotient and 64 bit remainder of a 96 bit unsigned
671  * dividend and 64 bit divisor.  If the quotient doesn't fit into 32
672  * bits then the returned remainder is equal to the divisor, and the
673  * quotient is the largest representable 64 bit integer.  It is an
674  * error to call this function with the high 32 bits of @num being
675  * non-zero. */
676 cairo_uquorem64_t
_cairo_uint_96by64_32x64_divrem(cairo_uint128_t num,cairo_uint64_t den)677 _cairo_uint_96by64_32x64_divrem (cairo_uint128_t num,
678 				 cairo_uint64_t den)
679 {
680     cairo_uquorem64_t result;
681     cairo_uint64_t B = _cairo_uint32s_to_uint64 (1, 0);
682 
683     /* These are the high 64 bits of the *96* bit numerator.  We're
684      * going to represent the numerator as xB + y, where x is a 64,
685      * and y is a 32 bit number. */
686     cairo_uint64_t x = _cairo_uint128_to_uint64 (_cairo_uint128_rsl(num, 32));
687 
688     /* Initialise the result to indicate overflow. */
689     result.quo = _cairo_uint32s_to_uint64 (-1U, -1U);
690     result.rem = den;
691 
692     /* Don't bother if the quotient is going to overflow. */
693     if (_cairo_uint64_ge (x, den)) {
694 	return /* overflow */ result;
695     }
696 
697     if (_cairo_uint64_lt (x, B)) {
698 	/* When the final quotient is known to fit in 32 bits, then
699 	 * num < 2^64 if and only if den < 2^32. */
700 	return _cairo_uint64_divrem (_cairo_uint128_to_uint64 (num), den);
701     }
702     else {
703 	/* Denominator is >= 2^32. the numerator is >= 2^64, and the
704 	 * division won't overflow: need two divrems.  Write the
705 	 * numerator and denominator as
706 	 *
707 	 *	num = xB + y		x : 64 bits, y : 32 bits
708 	 *	den = uB + v		u, v : 32 bits
709 	 */
710 	uint32_t y = _cairo_uint128_to_uint32 (num);
711 	uint32_t u = uint64_hi32 (den);
712 	uint32_t v = _cairo_uint64_to_uint32 (den);
713 
714 	/* Compute a lower bound approximate quotient of num/den
715 	 * from x/(u+1).  Then we have
716 	 *
717 	 * x	= q(u+1) + r	; q : 32 bits, r <= u : 32 bits.
718 	 *
719 	 * xB + y	= q(u+1)B	+ (rB+y)
720 	 *		= q(uB + B + v - v) + (rB+y)
721 	 *		= q(uB + v)	+ qB - qv + (rB+y)
722 	 *		= q(uB + v)	+ q(B-v) + (rB+y)
723 	 *
724 	 * The true quotient of num/den then is q plus the
725 	 * contribution of q(B-v) + (rB+y).  The main contribution
726 	 * comes from the term q(B-v), with the term (rB+y) only
727 	 * contributing at most one part.
728 	 *
729 	 * The term q(B-v) must fit into 64 bits, since q fits into 32
730 	 * bits on account of being a lower bound to the true
731 	 * quotient, and as B-v <= 2^32, we may safely use a single
732 	 * 64/64 bit division to find its contribution. */
733 
734 	cairo_uquorem64_t quorem;
735 	cairo_uint64_t remainder; /* will contain final remainder */
736 	uint32_t quotient;	/* will contain final quotient. */
737 	uint32_t q;
738 	uint32_t r;
739 
740 	/* Approximate quotient by dividing the high 64 bits of num by
741 	 * u+1. Watch out for overflow of u+1. */
742 	if (u+1) {
743 	    quorem = _cairo_uint64_divrem (x, _cairo_uint32_to_uint64 (u+1));
744 	    q = _cairo_uint64_to_uint32 (quorem.quo);
745 	    r = _cairo_uint64_to_uint32 (quorem.rem);
746 	}
747 	else {
748 	    q = uint64_hi32 (x);
749 	    r = _cairo_uint64_to_uint32 (x);
750 	}
751 	quotient = q;
752 
753 	/* Add the main term's contribution to quotient.  Note B-v =
754 	 * -v as an uint32 (unless v = 0) */
755 	if (v)
756 	    quorem = _cairo_uint64_divrem (_cairo_uint32x32_64_mul (q, -v), den);
757 	else
758 	    quorem = _cairo_uint64_divrem (_cairo_uint32s_to_uint64 (q, 0), den);
759 	quotient += _cairo_uint64_to_uint32 (quorem.quo);
760 
761 	/* Add the contribution of the subterm and start computing the
762 	 * true remainder. */
763 	remainder = _cairo_uint32s_to_uint64 (r, y);
764 	if (_cairo_uint64_ge (remainder, den)) {
765 	    remainder = _cairo_uint64_sub (remainder, den);
766 	    quotient++;
767 	}
768 
769 	/* Add the contribution of the main term's remainder. The
770 	 * funky test here checks that remainder + main_rem >= den,
771 	 * taking into account overflow of the addition. */
772 	remainder = _cairo_uint64_add (remainder, quorem.rem);
773 	if (_cairo_uint64_ge (remainder, den) ||
774 	    _cairo_uint64_lt (remainder, quorem.rem))
775 	{
776 	    remainder = _cairo_uint64_sub (remainder, den);
777 	    quotient++;
778 	}
779 
780 	result.quo = _cairo_uint32_to_uint64 (quotient);
781 	result.rem = remainder;
782     }
783     return result;
784 }
785 
786 cairo_quorem64_t
_cairo_int_96by64_32x64_divrem(cairo_int128_t num,cairo_int64_t den)787 _cairo_int_96by64_32x64_divrem (cairo_int128_t num, cairo_int64_t den)
788 {
789     int			num_neg = _cairo_int128_negative (num);
790     int			den_neg = _cairo_int64_negative (den);
791     cairo_uint64_t	nonneg_den;
792     cairo_uquorem64_t	uqr;
793     cairo_quorem64_t	qr;
794 
795     if (num_neg)
796 	num = _cairo_int128_negate (num);
797     if (den_neg)
798 	nonneg_den = _cairo_int64_negate (den);
799     else
800 	nonneg_den = den;
801 
802     uqr = _cairo_uint_96by64_32x64_divrem (num, nonneg_den);
803     if (_cairo_uint64_eq (uqr.rem, nonneg_den)) {
804 	/* bail on overflow. */
805 	qr.quo = _cairo_uint32s_to_uint64 (0x7FFFFFFF, -1U);
806 	qr.rem = den;
807 	return qr;
808     }
809 
810     if (num_neg)
811 	qr.rem = _cairo_int64_negate (uqr.rem);
812     else
813 	qr.rem = uqr.rem;
814     if (num_neg != den_neg)
815 	qr.quo = _cairo_int64_negate (uqr.quo);
816     else
817 	qr.quo = uqr.quo;
818     return qr;
819 }
820