xref: /qemu/include/qemu/host-utils.h (revision b83a80e8)
1 /*
2  * Utility compute operations used by translated code.
3  *
4  * Copyright (c) 2007 Thiemo Seufer
5  * Copyright (c) 2007 Jocelyn Mayer
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 /* Portions of this work are licensed under the terms of the GNU GPL,
27  * version 2 or later. See the COPYING file in the top-level directory.
28  */
29 
30 #ifndef HOST_UTILS_H
31 #define HOST_UTILS_H
32 
33 #include "qemu/compiler.h"
34 #include "qemu/bswap.h"
35 
36 #ifdef CONFIG_INT128
37 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
38                           uint64_t a, uint64_t b)
39 {
40     __uint128_t r = (__uint128_t)a * b;
41     *plow = r;
42     *phigh = r >> 64;
43 }
44 
45 static inline void muls64(uint64_t *plow, uint64_t *phigh,
46                           int64_t a, int64_t b)
47 {
48     __int128_t r = (__int128_t)a * b;
49     *plow = r;
50     *phigh = r >> 64;
51 }
52 
53 /* compute with 96 bit intermediate result: (a*b)/c */
54 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
55 {
56     return (__int128_t)a * b / c;
57 }
58 
59 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
60                                uint64_t divisor)
61 {
62     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
63     __uint128_t result = dividend / divisor;
64 
65     *plow = result;
66     *phigh = result >> 64;
67     return dividend % divisor;
68 }
69 
70 static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
71                               int64_t divisor)
72 {
73     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
74     __int128_t result = dividend / divisor;
75 
76     *plow = result;
77     *phigh = result >> 64;
78     return dividend % divisor;
79 }
80 #else
81 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
82 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
83 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
84 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
85 
86 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
87 {
88     union {
89         uint64_t ll;
90         struct {
91 #ifdef HOST_WORDS_BIGENDIAN
92             uint32_t high, low;
93 #else
94             uint32_t low, high;
95 #endif
96         } l;
97     } u, res;
98     uint64_t rl, rh;
99 
100     u.ll = a;
101     rl = (uint64_t)u.l.low * (uint64_t)b;
102     rh = (uint64_t)u.l.high * (uint64_t)b;
103     rh += (rl >> 32);
104     res.l.high = rh / c;
105     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
106     return res.ll;
107 }
108 #endif
109 
110 /**
111  * clz32 - count leading zeros in a 32-bit value.
112  * @val: The value to search
113  *
114  * Returns 32 if the value is zero.  Note that the GCC builtin is
115  * undefined if the value is zero.
116  */
117 static inline int clz32(uint32_t val)
118 {
119     return val ? __builtin_clz(val) : 32;
120 }
121 
122 /**
123  * clo32 - count leading ones in a 32-bit value.
124  * @val: The value to search
125  *
126  * Returns 32 if the value is -1.
127  */
128 static inline int clo32(uint32_t val)
129 {
130     return clz32(~val);
131 }
132 
133 /**
134  * clz64 - count leading zeros in a 64-bit value.
135  * @val: The value to search
136  *
137  * Returns 64 if the value is zero.  Note that the GCC builtin is
138  * undefined if the value is zero.
139  */
140 static inline int clz64(uint64_t val)
141 {
142     return val ? __builtin_clzll(val) : 64;
143 }
144 
145 /**
146  * clo64 - count leading ones in a 64-bit value.
147  * @val: The value to search
148  *
149  * Returns 64 if the value is -1.
150  */
151 static inline int clo64(uint64_t val)
152 {
153     return clz64(~val);
154 }
155 
156 /**
157  * ctz32 - count trailing zeros in a 32-bit value.
158  * @val: The value to search
159  *
160  * Returns 32 if the value is zero.  Note that the GCC builtin is
161  * undefined if the value is zero.
162  */
163 static inline int ctz32(uint32_t val)
164 {
165     return val ? __builtin_ctz(val) : 32;
166 }
167 
168 /**
169  * cto32 - count trailing ones in a 32-bit value.
170  * @val: The value to search
171  *
172  * Returns 32 if the value is -1.
173  */
174 static inline int cto32(uint32_t val)
175 {
176     return ctz32(~val);
177 }
178 
179 /**
180  * ctz64 - count trailing zeros in a 64-bit value.
181  * @val: The value to search
182  *
183  * Returns 64 if the value is zero.  Note that the GCC builtin is
184  * undefined if the value is zero.
185  */
186 static inline int ctz64(uint64_t val)
187 {
188     return val ? __builtin_ctzll(val) : 64;
189 }
190 
191 /**
192  * cto64 - count trailing ones in a 64-bit value.
193  * @val: The value to search
194  *
195  * Returns 64 if the value is -1.
196  */
197 static inline int cto64(uint64_t val)
198 {
199     return ctz64(~val);
200 }
201 
202 /**
203  * clrsb32 - count leading redundant sign bits in a 32-bit value.
204  * @val: The value to search
205  *
206  * Returns the number of bits following the sign bit that are equal to it.
207  * No special cases; output range is [0-31].
208  */
209 static inline int clrsb32(uint32_t val)
210 {
211 #if __has_builtin(__builtin_clrsb) || !defined(__clang__)
212     return __builtin_clrsb(val);
213 #else
214     return clz32(val ^ ((int32_t)val >> 1)) - 1;
215 #endif
216 }
217 
218 /**
219  * clrsb64 - count leading redundant sign bits in a 64-bit value.
220  * @val: The value to search
221  *
222  * Returns the number of bits following the sign bit that are equal to it.
223  * No special cases; output range is [0-63].
224  */
225 static inline int clrsb64(uint64_t val)
226 {
227 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
228     return __builtin_clrsbll(val);
229 #else
230     return clz64(val ^ ((int64_t)val >> 1)) - 1;
231 #endif
232 }
233 
234 /**
235  * ctpop8 - count the population of one bits in an 8-bit value.
236  * @val: The value to search
237  */
238 static inline int ctpop8(uint8_t val)
239 {
240     return __builtin_popcount(val);
241 }
242 
243 /**
244  * ctpop16 - count the population of one bits in a 16-bit value.
245  * @val: The value to search
246  */
247 static inline int ctpop16(uint16_t val)
248 {
249     return __builtin_popcount(val);
250 }
251 
252 /**
253  * ctpop32 - count the population of one bits in a 32-bit value.
254  * @val: The value to search
255  */
256 static inline int ctpop32(uint32_t val)
257 {
258     return __builtin_popcount(val);
259 }
260 
261 /**
262  * ctpop64 - count the population of one bits in a 64-bit value.
263  * @val: The value to search
264  */
265 static inline int ctpop64(uint64_t val)
266 {
267     return __builtin_popcountll(val);
268 }
269 
270 /**
271  * revbit8 - reverse the bits in an 8-bit value.
272  * @x: The value to modify.
273  */
274 static inline uint8_t revbit8(uint8_t x)
275 {
276 #if __has_builtin(__builtin_bitreverse8)
277     return __builtin_bitreverse8(x);
278 #else
279     /* Assign the correct nibble position.  */
280     x = ((x & 0xf0) >> 4)
281       | ((x & 0x0f) << 4);
282     /* Assign the correct bit position.  */
283     x = ((x & 0x88) >> 3)
284       | ((x & 0x44) >> 1)
285       | ((x & 0x22) << 1)
286       | ((x & 0x11) << 3);
287     return x;
288 #endif
289 }
290 
291 /**
292  * revbit16 - reverse the bits in a 16-bit value.
293  * @x: The value to modify.
294  */
295 static inline uint16_t revbit16(uint16_t x)
296 {
297 #if __has_builtin(__builtin_bitreverse16)
298     return __builtin_bitreverse16(x);
299 #else
300     /* Assign the correct byte position.  */
301     x = bswap16(x);
302     /* Assign the correct nibble position.  */
303     x = ((x & 0xf0f0) >> 4)
304       | ((x & 0x0f0f) << 4);
305     /* Assign the correct bit position.  */
306     x = ((x & 0x8888) >> 3)
307       | ((x & 0x4444) >> 1)
308       | ((x & 0x2222) << 1)
309       | ((x & 0x1111) << 3);
310     return x;
311 #endif
312 }
313 
314 /**
315  * revbit32 - reverse the bits in a 32-bit value.
316  * @x: The value to modify.
317  */
318 static inline uint32_t revbit32(uint32_t x)
319 {
320 #if __has_builtin(__builtin_bitreverse32)
321     return __builtin_bitreverse32(x);
322 #else
323     /* Assign the correct byte position.  */
324     x = bswap32(x);
325     /* Assign the correct nibble position.  */
326     x = ((x & 0xf0f0f0f0u) >> 4)
327       | ((x & 0x0f0f0f0fu) << 4);
328     /* Assign the correct bit position.  */
329     x = ((x & 0x88888888u) >> 3)
330       | ((x & 0x44444444u) >> 1)
331       | ((x & 0x22222222u) << 1)
332       | ((x & 0x11111111u) << 3);
333     return x;
334 #endif
335 }
336 
337 /**
338  * revbit64 - reverse the bits in a 64-bit value.
339  * @x: The value to modify.
340  */
341 static inline uint64_t revbit64(uint64_t x)
342 {
343 #if __has_builtin(__builtin_bitreverse64)
344     return __builtin_bitreverse64(x);
345 #else
346     /* Assign the correct byte position.  */
347     x = bswap64(x);
348     /* Assign the correct nibble position.  */
349     x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
350       | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
351     /* Assign the correct bit position.  */
352     x = ((x & 0x8888888888888888ull) >> 3)
353       | ((x & 0x4444444444444444ull) >> 1)
354       | ((x & 0x2222222222222222ull) << 1)
355       | ((x & 0x1111111111111111ull) << 3);
356     return x;
357 #endif
358 }
359 
360 /**
361  * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
362  */
363 static inline uint64_t uabs64(int64_t v)
364 {
365     return v < 0 ? -v : v;
366 }
367 
368 /**
369  * sadd32_overflow - addition with overflow indication
370  * @x, @y: addends
371  * @ret: Output for sum
372  *
373  * Computes *@ret = @x + @y, and returns true if and only if that
374  * value has been truncated.
375  */
376 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
377 {
378 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
379     return __builtin_add_overflow(x, y, ret);
380 #else
381     *ret = x + y;
382     return ((*ret ^ x) & ~(x ^ y)) < 0;
383 #endif
384 }
385 
386 /**
387  * sadd64_overflow - addition with overflow indication
388  * @x, @y: addends
389  * @ret: Output for sum
390  *
391  * Computes *@ret = @x + @y, and returns true if and only if that
392  * value has been truncated.
393  */
394 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
395 {
396 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
397     return __builtin_add_overflow(x, y, ret);
398 #else
399     *ret = x + y;
400     return ((*ret ^ x) & ~(x ^ y)) < 0;
401 #endif
402 }
403 
404 /**
405  * uadd32_overflow - addition with overflow indication
406  * @x, @y: addends
407  * @ret: Output for sum
408  *
409  * Computes *@ret = @x + @y, and returns true if and only if that
410  * value has been truncated.
411  */
412 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
413 {
414 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
415     return __builtin_add_overflow(x, y, ret);
416 #else
417     *ret = x + y;
418     return *ret < x;
419 #endif
420 }
421 
422 /**
423  * uadd64_overflow - addition with overflow indication
424  * @x, @y: addends
425  * @ret: Output for sum
426  *
427  * Computes *@ret = @x + @y, and returns true if and only if that
428  * value has been truncated.
429  */
430 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
431 {
432 #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
433     return __builtin_add_overflow(x, y, ret);
434 #else
435     *ret = x + y;
436     return *ret < x;
437 #endif
438 }
439 
440 /**
441  * ssub32_overflow - subtraction with overflow indication
442  * @x: Minuend
443  * @y: Subtrahend
444  * @ret: Output for difference
445  *
446  * Computes *@ret = @x - @y, and returns true if and only if that
447  * value has been truncated.
448  */
449 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
450 {
451 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
452     return __builtin_sub_overflow(x, y, ret);
453 #else
454     *ret = x - y;
455     return ((*ret ^ x) & (x ^ y)) < 0;
456 #endif
457 }
458 
459 /**
460  * ssub64_overflow - subtraction with overflow indication
461  * @x: Minuend
462  * @y: Subtrahend
463  * @ret: Output for sum
464  *
465  * Computes *@ret = @x - @y, and returns true if and only if that
466  * value has been truncated.
467  */
468 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
469 {
470 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
471     return __builtin_sub_overflow(x, y, ret);
472 #else
473     *ret = x - y;
474     return ((*ret ^ x) & (x ^ y)) < 0;
475 #endif
476 }
477 
478 /**
479  * usub32_overflow - subtraction with overflow indication
480  * @x: Minuend
481  * @y: Subtrahend
482  * @ret: Output for sum
483  *
484  * Computes *@ret = @x - @y, and returns true if and only if that
485  * value has been truncated.
486  */
487 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
488 {
489 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
490     return __builtin_sub_overflow(x, y, ret);
491 #else
492     *ret = x - y;
493     return x < y;
494 #endif
495 }
496 
497 /**
498  * usub64_overflow - subtraction with overflow indication
499  * @x: Minuend
500  * @y: Subtrahend
501  * @ret: Output for sum
502  *
503  * Computes *@ret = @x - @y, and returns true if and only if that
504  * value has been truncated.
505  */
506 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
507 {
508 #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
509     return __builtin_sub_overflow(x, y, ret);
510 #else
511     *ret = x - y;
512     return x < y;
513 #endif
514 }
515 
516 /**
517  * smul32_overflow - multiplication with overflow indication
518  * @x, @y: Input multipliers
519  * @ret: Output for product
520  *
521  * Computes *@ret = @x * @y, and returns true if and only if that
522  * value has been truncated.
523  */
524 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
525 {
526 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
527     return __builtin_mul_overflow(x, y, ret);
528 #else
529     int64_t z = (int64_t)x * y;
530     *ret = z;
531     return *ret != z;
532 #endif
533 }
534 
535 /**
536  * smul64_overflow - multiplication with overflow indication
537  * @x, @y: Input multipliers
538  * @ret: Output for product
539  *
540  * Computes *@ret = @x * @y, and returns true if and only if that
541  * value has been truncated.
542  */
543 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
544 {
545 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
546     return __builtin_mul_overflow(x, y, ret);
547 #else
548     uint64_t hi, lo;
549     muls64(&lo, &hi, x, y);
550     *ret = lo;
551     return hi != ((int64_t)lo >> 63);
552 #endif
553 }
554 
555 /**
556  * umul32_overflow - multiplication with overflow indication
557  * @x, @y: Input multipliers
558  * @ret: Output for product
559  *
560  * Computes *@ret = @x * @y, and returns true if and only if that
561  * value has been truncated.
562  */
563 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
564 {
565 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
566     return __builtin_mul_overflow(x, y, ret);
567 #else
568     uint64_t z = (uint64_t)x * y;
569     *ret = z;
570     return z > UINT32_MAX;
571 #endif
572 }
573 
574 /**
575  * umul64_overflow - multiplication with overflow indication
576  * @x, @y: Input multipliers
577  * @ret: Output for product
578  *
579  * Computes *@ret = @x * @y, and returns true if and only if that
580  * value has been truncated.
581  */
582 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
583 {
584 #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
585     return __builtin_mul_overflow(x, y, ret);
586 #else
587     uint64_t hi;
588     mulu64(ret, &hi, x, y);
589     return hi != 0;
590 #endif
591 }
592 
593 /*
594  * Unsigned 128x64 multiplication.
595  * Returns true if the result got truncated to 128 bits.
596  * Otherwise, returns false and the multiplication result via plow and phigh.
597  */
598 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
599 {
600 #if defined(CONFIG_INT128) && \
601     (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5)
602     bool res;
603     __uint128_t r;
604     __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
605     res = __builtin_mul_overflow(f, factor, &r);
606 
607     *plow = r;
608     *phigh = r >> 64;
609 
610     return res;
611 #else
612     uint64_t dhi = *phigh;
613     uint64_t dlo = *plow;
614     uint64_t ahi;
615     uint64_t blo, bhi;
616 
617     if (dhi == 0) {
618         mulu64(plow, phigh, dlo, factor);
619         return false;
620     }
621 
622     mulu64(plow, &ahi, dlo, factor);
623     mulu64(&blo, &bhi, dhi, factor);
624 
625     return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
626 #endif
627 }
628 
629 /**
630  * uadd64_carry - addition with carry-in and carry-out
631  * @x, @y: addends
632  * @pcarry: in-out carry value
633  *
634  * Computes @x + @y + *@pcarry, placing the carry-out back
635  * into *@pcarry and returning the 64-bit sum.
636  */
637 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
638 {
639 #if __has_builtin(__builtin_addcll)
640     unsigned long long c = *pcarry;
641     x = __builtin_addcll(x, y, c, &c);
642     *pcarry = c & 1;
643     return x;
644 #else
645     bool c = *pcarry;
646     /* This is clang's internal expansion of __builtin_addc. */
647     c = uadd64_overflow(x, c, &x);
648     c |= uadd64_overflow(x, y, &x);
649     *pcarry = c;
650     return x;
651 #endif
652 }
653 
654 /**
655  * usub64_borrow - subtraction with borrow-in and borrow-out
656  * @x, @y: addends
657  * @pborrow: in-out borrow value
658  *
659  * Computes @x - @y - *@pborrow, placing the borrow-out back
660  * into *@pborrow and returning the 64-bit sum.
661  */
662 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
663 {
664 #if __has_builtin(__builtin_subcll)
665     unsigned long long b = *pborrow;
666     x = __builtin_subcll(x, y, b, &b);
667     *pborrow = b & 1;
668     return x;
669 #else
670     bool b = *pborrow;
671     b = usub64_overflow(x, b, &x);
672     b |= usub64_overflow(x, y, &x);
673     *pborrow = b;
674     return x;
675 #endif
676 }
677 
678 /* Host type specific sizes of these routines.  */
679 
680 #if ULONG_MAX == UINT32_MAX
681 # define clzl   clz32
682 # define ctzl   ctz32
683 # define clol   clo32
684 # define ctol   cto32
685 # define ctpopl ctpop32
686 # define revbitl revbit32
687 #elif ULONG_MAX == UINT64_MAX
688 # define clzl   clz64
689 # define ctzl   ctz64
690 # define clol   clo64
691 # define ctol   cto64
692 # define ctpopl ctpop64
693 # define revbitl revbit64
694 #else
695 # error Unknown sizeof long
696 #endif
697 
698 static inline bool is_power_of_2(uint64_t value)
699 {
700     if (!value) {
701         return false;
702     }
703 
704     return !(value & (value - 1));
705 }
706 
707 /**
708  * Return @value rounded down to the nearest power of two or zero.
709  */
710 static inline uint64_t pow2floor(uint64_t value)
711 {
712     if (!value) {
713         /* Avoid undefined shift by 64 */
714         return 0;
715     }
716     return 0x8000000000000000ull >> clz64(value);
717 }
718 
719 /*
720  * Return @value rounded up to the nearest power of two modulo 2^64.
721  * This is *zero* for @value > 2^63, so be careful.
722  */
723 static inline uint64_t pow2ceil(uint64_t value)
724 {
725     int n = clz64(value - 1);
726 
727     if (!n) {
728         /*
729          * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
730          * Therefore, either @value == 0 or @value > 2^63.
731          * If it's 0, return 1, else return 0.
732          */
733         return !value;
734     }
735     return 0x8000000000000000ull >> (n - 1);
736 }
737 
738 static inline uint32_t pow2roundup32(uint32_t x)
739 {
740     x |= (x >> 1);
741     x |= (x >> 2);
742     x |= (x >> 4);
743     x |= (x >> 8);
744     x |= (x >> 16);
745     return x + 1;
746 }
747 
748 /**
749  * urshift - 128-bit Unsigned Right Shift.
750  * @plow: in/out - lower 64-bit integer.
751  * @phigh: in/out - higher 64-bit integer.
752  * @shift: in - bytes to shift, between 0 and 127.
753  *
754  * Result is zero-extended and stored in plow/phigh, which are
755  * input/output variables. Shift values outside the range will
756  * be mod to 128. In other words, the caller is responsible to
757  * verify/assert both the shift range and plow/phigh pointers.
758  */
759 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
760 
761 /**
762  * ulshift - 128-bit Unsigned Left Shift.
763  * @plow: in/out - lower 64-bit integer.
764  * @phigh: in/out - higher 64-bit integer.
765  * @shift: in - bytes to shift, between 0 and 127.
766  * @overflow: out - true if any 1-bit is shifted out.
767  *
768  * Result is zero-extended and stored in plow/phigh, which are
769  * input/output variables. Shift values outside the range will
770  * be mod to 128. In other words, the caller is responsible to
771  * verify/assert both the shift range and plow/phigh pointers.
772  */
773 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
774 
775 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
776  * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
777  *
778  * Licensed under the GPLv2/LGPLv3
779  */
780 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
781                                   uint64_t n0, uint64_t d)
782 {
783 #if defined(__x86_64__)
784     uint64_t q;
785     asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
786     return q;
787 #elif defined(__s390x__) && !defined(__clang__)
788     /* Need to use a TImode type to get an even register pair for DLGR.  */
789     unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
790     asm("dlgr %0, %1" : "+r"(n) : "r"(d));
791     *r = n >> 64;
792     return n;
793 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
794     /* From Power ISA 2.06, programming note for divdeu.  */
795     uint64_t q1, q2, Q, r1, r2, R;
796     asm("divdeu %0,%2,%4; divdu %1,%3,%4"
797         : "=&r"(q1), "=r"(q2)
798         : "r"(n1), "r"(n0), "r"(d));
799     r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
800     r2 = n0 - (q2 * d);
801     Q = q1 + q2;
802     R = r1 + r2;
803     if (R >= d || R < r2) { /* overflow implies R > d */
804         Q += 1;
805         R -= d;
806     }
807     *r = R;
808     return Q;
809 #else
810     uint64_t d0, d1, q0, q1, r1, r0, m;
811 
812     d0 = (uint32_t)d;
813     d1 = d >> 32;
814 
815     r1 = n1 % d1;
816     q1 = n1 / d1;
817     m = q1 * d0;
818     r1 = (r1 << 32) | (n0 >> 32);
819     if (r1 < m) {
820         q1 -= 1;
821         r1 += d;
822         if (r1 >= d) {
823             if (r1 < m) {
824                 q1 -= 1;
825                 r1 += d;
826             }
827         }
828     }
829     r1 -= m;
830 
831     r0 = r1 % d1;
832     q0 = r1 / d1;
833     m = q0 * d0;
834     r0 = (r0 << 32) | (uint32_t)n0;
835     if (r0 < m) {
836         q0 -= 1;
837         r0 += d;
838         if (r0 >= d) {
839             if (r0 < m) {
840                 q0 -= 1;
841                 r0 += d;
842             }
843         }
844     }
845     r0 -= m;
846 
847     *r = r0;
848     return (q1 << 32) | q0;
849 #endif
850 }
851 
852 #endif
853