1 /**************************************************************************
2  *
3  * Copyright 2008 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 
29 /**
30  * Math utilities and approximations for common math functions.
31  * Reduced precision is usually acceptable in shaders...
32  *
33  * "fast" is used in the names of functions which are low-precision,
34  * or at least lower-precision than the normal C lib functions.
35  */
36 
37 
38 #ifndef U_MATH_H
39 #define U_MATH_H
40 
41 
42 #include "c99_math.h"
43 #include <assert.h>
44 #include <float.h>
45 #include <stdarg.h>
46 
47 #include "bitscan.h"
48 #include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */
49 
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53 
54 
55 #ifndef M_SQRT2
56 #define M_SQRT2 1.41421356237309504880
57 #endif
58 
59 #define POW2_TABLE_SIZE_LOG2 9
60 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
61 #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
62 #define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
63 extern float pow2_table[POW2_TABLE_SIZE];
64 
65 
66 /**
67  * Initialize math module.  This should be called before using any
68  * other functions in this module.
69  */
70 extern void
71 util_init_math(void);
72 
73 
74 union fi {
75    float f;
76    int32_t i;
77    uint32_t ui;
78 };
79 
80 
81 union di {
82    double d;
83    int64_t i;
84    uint64_t ui;
85 };
86 
87 
88 /**
89  * Extract the IEEE float32 exponent.
90  */
91 static inline signed
util_get_float32_exponent(float x)92 util_get_float32_exponent(float x)
93 {
94    union fi f;
95 
96    f.f = x;
97 
98    return ((f.ui >> 23) & 0xff) - 127;
99 }
100 
101 
102 /**
103  * Fast version of 2^x
104  * Identity: exp2(a + b) = exp2(a) * exp2(b)
105  * Let ipart = int(x)
106  * Let fpart = x - ipart;
107  * So, exp2(x) = exp2(ipart) * exp2(fpart)
108  * Compute exp2(ipart) with i << ipart
109  * Compute exp2(fpart) with lookup table.
110  */
111 static inline float
util_fast_exp2(float x)112 util_fast_exp2(float x)
113 {
114    int32_t ipart;
115    float fpart, mpart;
116    union fi epart;
117 
118    if(x > 129.00000f)
119       return 3.402823466e+38f;
120 
121    if (x < -126.99999f)
122       return 0.0f;
123 
124    ipart = (int32_t) x;
125    fpart = x - (float) ipart;
126 
127    /* same as
128     *   epart.f = (float) (1 << ipart)
129     * but faster and without integer overflow for ipart > 31
130     */
131    epart.i = (ipart + 127 ) << 23;
132 
133    mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
134 
135    return epart.f * mpart;
136 }
137 
138 
139 /**
140  * Fast approximation to exp(x).
141  */
142 static inline float
util_fast_exp(float x)143 util_fast_exp(float x)
144 {
145    const float k = 1.44269f; /* = log2(e) */
146    return util_fast_exp2(k * x);
147 }
148 
149 
150 #define LOG2_TABLE_SIZE_LOG2 16
151 #define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2)
152 #define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1)
153 extern float log2_table[LOG2_TABLE_SIZE];
154 
155 
156 /**
157  * Fast approximation to log2(x).
158  */
159 static inline float
util_fast_log2(float x)160 util_fast_log2(float x)
161 {
162    union fi num;
163    float epart, mpart;
164    num.f = x;
165    epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
166    /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */
167    mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)];
168    return epart + mpart;
169 }
170 
171 
172 /**
173  * Fast approximation to x^y.
174  */
175 static inline float
util_fast_pow(float x,float y)176 util_fast_pow(float x, float y)
177 {
178    return util_fast_exp2(util_fast_log2(x) * y);
179 }
180 
181 
182 /**
183  * Floor(x), returned as int.
184  */
185 static inline int
util_ifloor(float f)186 util_ifloor(float f)
187 {
188 #if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
189    /*
190     * IEEE floor for computers that round to nearest or even.
191     * 'f' must be between -4194304 and 4194303.
192     * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
193     * but uses some IEEE specific tricks for better speed.
194     * Contributed by Josh Vanderhoof
195     */
196    int ai, bi;
197    double af, bf;
198    af = (3 << 22) + 0.5 + (double)f;
199    bf = (3 << 22) + 0.5 - (double)f;
200    /* GCC generates an extra fstp/fld without this. */
201    __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
202    __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
203    return (ai - bi) >> 1;
204 #else
205    int ai, bi;
206    double af, bf;
207    union fi u;
208    af = (3 << 22) + 0.5 + (double) f;
209    bf = (3 << 22) + 0.5 - (double) f;
210    u.f = (float) af;  ai = u.i;
211    u.f = (float) bf;  bi = u.i;
212    return (ai - bi) >> 1;
213 #endif
214 }
215 
216 
217 /**
218  * Round float to nearest int.
219  */
220 static inline int
util_iround(float f)221 util_iround(float f)
222 {
223 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
224    int r;
225    __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
226    return r;
227 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
228    int r;
229    _asm {
230       fld f
231       fistp r
232    }
233    return r;
234 #else
235    if (f >= 0.0f)
236       return (int) (f + 0.5f);
237    else
238       return (int) (f - 0.5f);
239 #endif
240 }
241 
242 
243 /**
244  * Approximate floating point comparison
245  */
246 static inline bool
util_is_approx(float a,float b,float tol)247 util_is_approx(float a, float b, float tol)
248 {
249    return fabsf(b - a) <= tol;
250 }
251 
252 
253 /**
254  * util_is_X_inf_or_nan = test if x is NaN or +/- Inf
255  * util_is_X_nan        = test if x is NaN
256  * util_X_inf_sign      = return +1 for +Inf, -1 for -Inf, or 0 for not Inf
257  *
258  * NaN can be checked with x != x, however this fails with the fast math flag
259  **/
260 
261 
262 /**
263  * Single-float
264  */
265 static inline bool
util_is_inf_or_nan(float x)266 util_is_inf_or_nan(float x)
267 {
268    union fi tmp;
269    tmp.f = x;
270    return (tmp.ui & 0x7f800000) == 0x7f800000;
271 }
272 
273 
274 static inline bool
util_is_nan(float x)275 util_is_nan(float x)
276 {
277    union fi tmp;
278    tmp.f = x;
279    return (tmp.ui & 0x7fffffff) > 0x7f800000;
280 }
281 
282 
283 static inline int
util_inf_sign(float x)284 util_inf_sign(float x)
285 {
286    union fi tmp;
287    tmp.f = x;
288    if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
289       return 0;
290    }
291 
292    return (x < 0) ? -1 : 1;
293 }
294 
295 
296 /**
297  * Double-float
298  */
299 static inline bool
util_is_double_inf_or_nan(double x)300 util_is_double_inf_or_nan(double x)
301 {
302    union di tmp;
303    tmp.d = x;
304    return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL;
305 }
306 
307 
308 static inline bool
util_is_double_nan(double x)309 util_is_double_nan(double x)
310 {
311    union di tmp;
312    tmp.d = x;
313    return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL;
314 }
315 
316 
317 static inline int
util_double_inf_sign(double x)318 util_double_inf_sign(double x)
319 {
320    union di tmp;
321    tmp.d = x;
322    if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) {
323       return 0;
324    }
325 
326    return (x < 0) ? -1 : 1;
327 }
328 
329 
330 /**
331  * Half-float
332  */
333 static inline bool
util_is_half_inf_or_nan(int16_t x)334 util_is_half_inf_or_nan(int16_t x)
335 {
336    return (x & 0x7c00) == 0x7c00;
337 }
338 
339 
340 static inline bool
util_is_half_nan(int16_t x)341 util_is_half_nan(int16_t x)
342 {
343    return (x & 0x7fff) > 0x7c00;
344 }
345 
346 
347 static inline int
util_half_inf_sign(int16_t x)348 util_half_inf_sign(int16_t x)
349 {
350    if ((x & 0x7fff) != 0x7c00) {
351       return 0;
352    }
353 
354    return (x < 0) ? -1 : 1;
355 }
356 
357 
358 /**
359  * Return float bits.
360  */
361 static inline unsigned
fui(float f)362 fui( float f )
363 {
364    union fi fi;
365    fi.f = f;
366    return fi.ui;
367 }
368 
369 static inline float
uif(uint32_t ui)370 uif(uint32_t ui)
371 {
372    union fi fi;
373    fi.ui = ui;
374    return fi.f;
375 }
376 
377 
378 /**
379  * Convert uint8_t to float in [0, 1].
380  */
381 static inline float
ubyte_to_float(uint8_t ub)382 ubyte_to_float(uint8_t ub)
383 {
384    return (float) ub * (1.0f / 255.0f);
385 }
386 
387 
388 /**
389  * Convert float in [0,1] to uint8_t in [0,255] with clamping.
390  */
391 static inline uint8_t
float_to_ubyte(float f)392 float_to_ubyte(float f)
393 {
394    /* return 0 for NaN too */
395    if (!(f > 0.0f)) {
396       return (uint8_t) 0;
397    }
398    else if (f >= 1.0f) {
399       return (uint8_t) 255;
400    }
401    else {
402       union fi tmp;
403       tmp.f = f;
404       tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
405       return (uint8_t) tmp.i;
406    }
407 }
408 
409 /**
410  * Convert uint16_t to float in [0, 1].
411  */
412 static inline float
ushort_to_float(uint16_t us)413 ushort_to_float(uint16_t us)
414 {
415    return (float) us * (1.0f / 65535.0f);
416 }
417 
418 
419 /**
420  * Convert float in [0,1] to uint16_t in [0,65535] with clamping.
421  */
422 static inline uint16_t
float_to_ushort(float f)423 float_to_ushort(float f)
424 {
425    /* return 0 for NaN too */
426    if (!(f > 0.0f)) {
427       return (uint16_t) 0;
428    }
429    else if (f >= 1.0f) {
430       return (uint16_t) 65535;
431    }
432    else {
433       union fi tmp;
434       tmp.f = f;
435       tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f;
436       return (uint16_t) tmp.i;
437    }
438 }
439 
440 static inline float
byte_to_float_tex(int8_t b)441 byte_to_float_tex(int8_t b)
442 {
443    return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
444 }
445 
446 static inline int8_t
float_to_byte_tex(float f)447 float_to_byte_tex(float f)
448 {
449    return (int8_t) (127.0F * f);
450 }
451 
452 /**
453  * Calc log base 2
454  */
455 static inline unsigned
util_logbase2(unsigned n)456 util_logbase2(unsigned n)
457 {
458 #if defined(HAVE___BUILTIN_CLZ)
459    return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1));
460 #else
461    unsigned pos = 0;
462    if (n >= 1<<16) { n >>= 16; pos += 16; }
463    if (n >= 1<< 8) { n >>=  8; pos +=  8; }
464    if (n >= 1<< 4) { n >>=  4; pos +=  4; }
465    if (n >= 1<< 2) { n >>=  2; pos +=  2; }
466    if (n >= 1<< 1) {           pos +=  1; }
467    return pos;
468 #endif
469 }
470 
471 static inline uint64_t
util_logbase2_64(uint64_t n)472 util_logbase2_64(uint64_t n)
473 {
474 #if defined(HAVE___BUILTIN_CLZLL)
475    return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
476 #else
477    uint64_t pos = 0ull;
478    if (n >= 1ull<<32) { n >>= 32; pos += 32; }
479    if (n >= 1ull<<16) { n >>= 16; pos += 16; }
480    if (n >= 1ull<< 8) { n >>=  8; pos +=  8; }
481    if (n >= 1ull<< 4) { n >>=  4; pos +=  4; }
482    if (n >= 1ull<< 2) { n >>=  2; pos +=  2; }
483    if (n >= 1ull<< 1) {           pos +=  1; }
484    return pos;
485 #endif
486 }
487 
488 /**
489  * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently,
490  * returns the smallest x such that n <= 2**x.
491  */
492 static inline unsigned
util_logbase2_ceil(unsigned n)493 util_logbase2_ceil(unsigned n)
494 {
495    if (n <= 1)
496       return 0;
497 
498    return 1 + util_logbase2(n - 1);
499 }
500 
501 static inline uint64_t
util_logbase2_ceil64(uint64_t n)502 util_logbase2_ceil64(uint64_t n)
503 {
504    if (n <= 1)
505       return 0;
506 
507    return 1ull + util_logbase2_64(n - 1);
508 }
509 
510 /**
511  * Returns the smallest power of two >= x
512  */
513 static inline unsigned
util_next_power_of_two(unsigned x)514 util_next_power_of_two(unsigned x)
515 {
516 #if defined(HAVE___BUILTIN_CLZ)
517    if (x <= 1)
518        return 1;
519 
520    return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1)));
521 #else
522    unsigned val = x;
523 
524    if (x <= 1)
525       return 1;
526 
527    if (util_is_power_of_two_or_zero(x))
528       return x;
529 
530    val--;
531    val = (val >> 1) | val;
532    val = (val >> 2) | val;
533    val = (val >> 4) | val;
534    val = (val >> 8) | val;
535    val = (val >> 16) | val;
536    val++;
537    return val;
538 #endif
539 }
540 
541 static inline uint64_t
util_next_power_of_two64(uint64_t x)542 util_next_power_of_two64(uint64_t x)
543 {
544 #if defined(HAVE___BUILTIN_CLZLL)
545    if (x <= 1)
546        return 1;
547 
548    return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1)));
549 #else
550    uint64_t val = x;
551 
552    if (x <= 1)
553       return 1;
554 
555    if (util_is_power_of_two_or_zero64(x))
556       return x;
557 
558    val--;
559    val = (val >> 1)  | val;
560    val = (val >> 2)  | val;
561    val = (val >> 4)  | val;
562    val = (val >> 8)  | val;
563    val = (val >> 16) | val;
564    val = (val >> 32) | val;
565    val++;
566    return val;
567 #endif
568 }
569 
570 /**
571  * Reverse bits in n
572  * Algorithm taken from:
573  * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
574  */
575 static inline unsigned
util_bitreverse(unsigned n)576 util_bitreverse(unsigned n)
577 {
578     n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
579     n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2);
580     n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4);
581     n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8);
582     n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16);
583     return n;
584 }
585 
586 /**
587  * Convert from little endian to CPU byte order.
588  */
589 
590 #if UTIL_ARCH_BIG_ENDIAN
591 #define util_le64_to_cpu(x) util_bswap64(x)
592 #define util_le32_to_cpu(x) util_bswap32(x)
593 #define util_le16_to_cpu(x) util_bswap16(x)
594 #else
595 #define util_le64_to_cpu(x) (x)
596 #define util_le32_to_cpu(x) (x)
597 #define util_le16_to_cpu(x) (x)
598 #endif
599 
600 #define util_cpu_to_le64(x) util_le64_to_cpu(x)
601 #define util_cpu_to_le32(x) util_le32_to_cpu(x)
602 #define util_cpu_to_le16(x) util_le16_to_cpu(x)
603 
604 /**
605  * Reverse byte order of a 32 bit word.
606  */
607 static inline uint32_t
util_bswap32(uint32_t n)608 util_bswap32(uint32_t n)
609 {
610 #if defined(HAVE___BUILTIN_BSWAP32)
611    return __builtin_bswap32(n);
612 #else
613    return (n >> 24) |
614           ((n >> 8) & 0x0000ff00) |
615           ((n << 8) & 0x00ff0000) |
616           (n << 24);
617 #endif
618 }
619 
620 /**
621  * Reverse byte order of a 64bit word.
622  */
623 static inline uint64_t
util_bswap64(uint64_t n)624 util_bswap64(uint64_t n)
625 {
626 #if defined(HAVE___BUILTIN_BSWAP64)
627    return __builtin_bswap64(n);
628 #else
629    return ((uint64_t)util_bswap32((uint32_t)n) << 32) |
630           util_bswap32((n >> 32));
631 #endif
632 }
633 
634 
635 /**
636  * Reverse byte order of a 16 bit word.
637  */
638 static inline uint16_t
util_bswap16(uint16_t n)639 util_bswap16(uint16_t n)
640 {
641    return (n >> 8) |
642           (n << 8);
643 }
644 
645 static inline void*
util_memcpy_cpu_to_le32(void * restrict dest,const void * restrict src,size_t n)646 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
647 {
648 #if UTIL_ARCH_BIG_ENDIAN
649    size_t i, e;
650    assert(n % 4 == 0);
651 
652    for (i = 0, e = n / 4; i < e; i++) {
653       uint32_t * restrict d = (uint32_t* restrict)dest;
654       const uint32_t * restrict s = (const uint32_t* restrict)src;
655       d[i] = util_bswap32(s[i]);
656    }
657    return dest;
658 #else
659    return memcpy(dest, src, n);
660 #endif
661 }
662 
663 /**
664  * Clamp X to [MIN, MAX].
665  * This is a macro to allow float, int, uint, etc. types.
666  * We arbitrarily turn NaN into MIN.
667  */
668 #define CLAMP( X, MIN, MAX )  ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
669 
670 #define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
671 #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
672 
673 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
674 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
675 
676 #define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D))
677 #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
678 
679 
680 /**
681  * Align a value up to an alignment value
682  *
683  * If \c value is not already aligned to the requested alignment value, it
684  * will be rounded up.
685  *
686  * \param value  Value to be rounded
687  * \param alignment  Alignment value to be used.  This must be a power of two.
688  *
689  * \sa ROUND_DOWN_TO()
690  */
691 static inline uintptr_t
ALIGN(uintptr_t value,int32_t alignment)692 ALIGN(uintptr_t value, int32_t alignment)
693 {
694    assert(util_is_power_of_two_nonzero(alignment));
695    return (((value) + (alignment) - 1) & ~((alignment) - 1));
696 }
697 
698 /**
699  * Like ALIGN(), but works with a non-power-of-two alignment.
700  */
701 static inline uintptr_t
ALIGN_NPOT(uintptr_t value,int32_t alignment)702 ALIGN_NPOT(uintptr_t value, int32_t alignment)
703 {
704    assert(alignment > 0);
705    return (value + alignment - 1) / alignment * alignment;
706 }
707 
708 /**
709  * Align a value down to an alignment value
710  *
711  * If \c value is not already aligned to the requested alignment value, it
712  * will be rounded down.
713  *
714  * \param value  Value to be rounded
715  * \param alignment  Alignment value to be used.  This must be a power of two.
716  *
717  * \sa ALIGN()
718  */
719 static inline uintptr_t
ROUND_DOWN_TO(uintptr_t value,int32_t alignment)720 ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
721 {
722    assert(util_is_power_of_two_nonzero(alignment));
723    return ((value) & ~(alignment - 1));
724 }
725 
726 /**
727  * Align a value, only works pot alignemnts.
728  */
729 static inline int
align(int value,int alignment)730 align(int value, int alignment)
731 {
732    return (value + alignment - 1) & ~(alignment - 1);
733 }
734 
735 static inline uint64_t
align64(uint64_t value,unsigned alignment)736 align64(uint64_t value, unsigned alignment)
737 {
738    return (value + alignment - 1) & ~((uint64_t)alignment - 1);
739 }
740 
741 /**
742  * Works like align but on npot alignments.
743  */
744 static inline size_t
util_align_npot(size_t value,size_t alignment)745 util_align_npot(size_t value, size_t alignment)
746 {
747    if (value % alignment)
748       return value + (alignment - (value % alignment));
749    return value;
750 }
751 
752 static inline unsigned
u_minify(unsigned value,unsigned levels)753 u_minify(unsigned value, unsigned levels)
754 {
755     return MAX2(1, value >> levels);
756 }
757 
758 #ifndef COPY_4V
759 #define COPY_4V( DST, SRC )         \
760 do {                                \
761    (DST)[0] = (SRC)[0];             \
762    (DST)[1] = (SRC)[1];             \
763    (DST)[2] = (SRC)[2];             \
764    (DST)[3] = (SRC)[3];             \
765 } while (0)
766 #endif
767 
768 
769 #ifndef COPY_4FV
770 #define COPY_4FV( DST, SRC )  COPY_4V(DST, SRC)
771 #endif
772 
773 
774 #ifndef ASSIGN_4V
775 #define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
776 do {                                     \
777    (DST)[0] = (V0);                      \
778    (DST)[1] = (V1);                      \
779    (DST)[2] = (V2);                      \
780    (DST)[3] = (V3);                      \
781 } while (0)
782 #endif
783 
784 
785 static inline uint32_t
util_unsigned_fixed(float value,unsigned frac_bits)786 util_unsigned_fixed(float value, unsigned frac_bits)
787 {
788    return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
789 }
790 
791 static inline int32_t
util_signed_fixed(float value,unsigned frac_bits)792 util_signed_fixed(float value, unsigned frac_bits)
793 {
794    return (int32_t)(value * (1<<frac_bits));
795 }
796 
797 unsigned
798 util_fpstate_get(void);
799 unsigned
800 util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
801 void
802 util_fpstate_set(unsigned fpstate);
803 
804 /**
805  * For indexed draw calls, return true if the vertex count to be drawn is
806  * much lower than the vertex count that has to be uploaded, meaning
807  * that the driver should flatten indices instead of trying to upload
808  * a too big range.
809  *
810  * This is used by vertex upload code in u_vbuf and glthread.
811  */
812 static inline bool
util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,unsigned upload_vertex_count)813 util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,
814                                    unsigned upload_vertex_count)
815 {
816    if (draw_vertex_count > 1024)
817       return upload_vertex_count > draw_vertex_count * 4;
818    else if (draw_vertex_count > 32)
819       return upload_vertex_count > draw_vertex_count * 8;
820    else
821       return upload_vertex_count > draw_vertex_count * 16;
822 }
823 
824 #ifdef __cplusplus
825 }
826 #endif
827 
828 #endif /* U_MATH_H */
829