1 //
2 // Copyright (c) 2002-2013 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // mathutil.h: Math and bit manipulation functions.
8 
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11 
12 #include <limits>
13 #include <algorithm>
14 #include <math.h>
15 #include <string.h>
16 #include <stdint.h>
17 #include <stdlib.h>
18 
19 #include <anglebase/numerics/safe_math.h>
20 
21 #include "common/debug.h"
22 #include "common/platform.h"
23 
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 }
29 
30 namespace gl
31 {
32 
33 const unsigned int Float32One = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35 
36 template<typename T>
isPow2(T x)37 inline bool isPow2(T x)
38 {
39     static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40     return (x & (x - 1)) == 0 && (x != 0);
41 }
42 
log2(int x)43 inline int log2(int x)
44 {
45     int r = 0;
46     while ((x >> r) > 1) r++;
47     return r;
48 }
49 
ceilPow2(unsigned int x)50 inline unsigned int ceilPow2(unsigned int x)
51 {
52     if (x != 0) x--;
53     x |= x >> 1;
54     x |= x >> 2;
55     x |= x >> 4;
56     x |= x >> 8;
57     x |= x >> 16;
58     x++;
59 
60     return x;
61 }
62 
63 template <typename DestT, typename SrcT>
clampCast(SrcT value)64 inline DestT clampCast(SrcT value)
65 {
66     // For floating-point types with denormalization, min returns the minimum positive normalized
67     // value. To find the value that has no values less than it, use numeric_limits::lowest.
68     constexpr const long double destLo =
69         static_cast<long double>(std::numeric_limits<DestT>::lowest());
70     constexpr const long double destHi =
71         static_cast<long double>(std::numeric_limits<DestT>::max());
72     constexpr const long double srcLo =
73         static_cast<long double>(std::numeric_limits<SrcT>::lowest());
74     constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
75 
76     if (destHi < srcHi)
77     {
78         DestT destMax = std::numeric_limits<DestT>::max();
79         if (value >= static_cast<SrcT>(destMax))
80         {
81             return destMax;
82         }
83     }
84 
85     if (destLo > srcLo)
86     {
87         DestT destLow = std::numeric_limits<DestT>::lowest();
88         if (value <= static_cast<SrcT>(destLow))
89         {
90             return destLow;
91         }
92     }
93 
94     return static_cast<DestT>(value);
95 }
96 
97 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
98 // value is casted to the source type.
99 template <>
clampCast(bool value)100 inline unsigned int clampCast(bool value)
101 {
102     return static_cast<unsigned int>(value);
103 }
104 
105 template <>
clampCast(bool value)106 inline int clampCast(bool value)
107 {
108     return static_cast<int>(value);
109 }
110 
111 template<typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)112 inline T clamp(T x, MIN min, MAX max)
113 {
114     // Since NaNs fail all comparison tests, a NaN value will default to min
115     return x > min ? (x > max ? max : x) : min;
116 }
117 
clamp01(float x)118 inline float clamp01(float x)
119 {
120     return clamp(x, 0.0f, 1.0f);
121 }
122 
123 template<const int n>
unorm(float x)124 inline unsigned int unorm(float x)
125 {
126     const unsigned int max = 0xFFFFFFFF >> (32 - n);
127 
128     if (x > 1)
129     {
130         return max;
131     }
132     else if (x < 0)
133     {
134         return 0;
135     }
136     else
137     {
138         return (unsigned int)(max * x + 0.5f);
139     }
140 }
141 
supportsSSE2()142 inline bool supportsSSE2()
143 {
144 #if defined(ANGLE_USE_SSE)
145     static bool checked = false;
146     static bool supports = false;
147 
148     if (checked)
149     {
150         return supports;
151     }
152 
153 #if defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
154     {
155         int info[4];
156         __cpuid(info, 0);
157 
158         if (info[0] >= 1)
159         {
160             __cpuid(info, 1);
161 
162             supports = (info[3] >> 26) & 1;
163         }
164     }
165 #endif  // defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
166     checked = true;
167     return supports;
168 #else  // defined(ANGLE_USE_SSE)
169     return false;
170 #endif
171 }
172 
173 template <typename destType, typename sourceType>
bitCast(const sourceType & source)174 destType bitCast(const sourceType &source)
175 {
176     size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
177     destType output;
178     memcpy(&output, &source, copySize);
179     return output;
180 }
181 
float32ToFloat16(float fp32)182 inline unsigned short float32ToFloat16(float fp32)
183 {
184     unsigned int fp32i = bitCast<unsigned int>(fp32);
185     unsigned int sign = (fp32i & 0x80000000) >> 16;
186     unsigned int abs = fp32i & 0x7FFFFFFF;
187 
188     if(abs > 0x47FFEFFF)   // Infinity
189     {
190         return static_cast<unsigned short>(sign | 0x7FFF);
191     }
192     else if(abs < 0x38800000)   // Denormal
193     {
194         unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
195         int e = 113 - (abs >> 23);
196 
197         if(e < 24)
198         {
199             abs = mantissa >> e;
200         }
201         else
202         {
203             abs = 0;
204         }
205 
206         return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
207     }
208     else
209     {
210         return static_cast<unsigned short>(sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
211     }
212 }
213 
214 float float16ToFloat32(unsigned short h);
215 
216 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
217 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
218 
float32ToFloat11(float fp32)219 inline unsigned short float32ToFloat11(float fp32)
220 {
221     const unsigned int float32MantissaMask = 0x7FFFFF;
222     const unsigned int float32ExponentMask = 0x7F800000;
223     const unsigned int float32SignMask = 0x80000000;
224     const unsigned int float32ValueMask = ~float32SignMask;
225     const unsigned int float32ExponentFirstBit = 23;
226     const unsigned int float32ExponentBias = 127;
227 
228     const unsigned short float11Max = 0x7BF;
229     const unsigned short float11MantissaMask = 0x3F;
230     const unsigned short float11ExponentMask = 0x7C0;
231     const unsigned short float11BitMask = 0x7FF;
232     const unsigned int float11ExponentBias = 14;
233 
234     const unsigned int float32Maxfloat11 = 0x477E0000;
235     const unsigned int float32Minfloat11 = 0x38800000;
236 
237     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
238     const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
239 
240     unsigned int float32Val = float32Bits & float32ValueMask;
241 
242     if ((float32Val & float32ExponentMask) == float32ExponentMask)
243     {
244         // INF or NAN
245         if ((float32Val & float32MantissaMask) != 0)
246         {
247             return float11ExponentMask | (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) & float11MantissaMask);
248         }
249         else if (float32Sign)
250         {
251             // -INF is clamped to 0 since float11 is positive only
252             return 0;
253         }
254         else
255         {
256             return float11ExponentMask;
257         }
258     }
259     else if (float32Sign)
260     {
261         // float11 is positive only, so clamp to zero
262         return 0;
263     }
264     else if (float32Val > float32Maxfloat11)
265     {
266         // The number is too large to be represented as a float11, set to max
267         return float11Max;
268     }
269     else
270     {
271         if (float32Val < float32Minfloat11)
272         {
273             // The number is too small to be represented as a normalized float11
274             // Convert it to a denormalized value.
275             const unsigned int shift = (float32ExponentBias - float11ExponentBias) - (float32Val >> float32ExponentFirstBit);
276             float32Val = ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
277         }
278         else
279         {
280             // Rebias the exponent to represent the value as a normalized float11
281             float32Val += 0xC8000000;
282         }
283 
284         return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
285     }
286 }
287 
float32ToFloat10(float fp32)288 inline unsigned short float32ToFloat10(float fp32)
289 {
290     const unsigned int float32MantissaMask = 0x7FFFFF;
291     const unsigned int float32ExponentMask = 0x7F800000;
292     const unsigned int float32SignMask = 0x80000000;
293     const unsigned int float32ValueMask = ~float32SignMask;
294     const unsigned int float32ExponentFirstBit = 23;
295     const unsigned int float32ExponentBias = 127;
296 
297     const unsigned short float10Max = 0x3DF;
298     const unsigned short float10MantissaMask = 0x1F;
299     const unsigned short float10ExponentMask = 0x3E0;
300     const unsigned short float10BitMask = 0x3FF;
301     const unsigned int float10ExponentBias = 14;
302 
303     const unsigned int float32Maxfloat10 = 0x477C0000;
304     const unsigned int float32Minfloat10 = 0x38800000;
305 
306     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
307     const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
308 
309     unsigned int float32Val = float32Bits & float32ValueMask;
310 
311     if ((float32Val & float32ExponentMask) == float32ExponentMask)
312     {
313         // INF or NAN
314         if ((float32Val & float32MantissaMask) != 0)
315         {
316             return float10ExponentMask | (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) & float10MantissaMask);
317         }
318         else if (float32Sign)
319         {
320             // -INF is clamped to 0 since float11 is positive only
321             return 0;
322         }
323         else
324         {
325             return float10ExponentMask;
326         }
327     }
328     else if (float32Sign)
329     {
330         // float10 is positive only, so clamp to zero
331         return 0;
332     }
333     else if (float32Val > float32Maxfloat10)
334     {
335         // The number is too large to be represented as a float11, set to max
336         return float10Max;
337     }
338     else
339     {
340         if (float32Val < float32Minfloat10)
341         {
342             // The number is too small to be represented as a normalized float11
343             // Convert it to a denormalized value.
344             const unsigned int shift = (float32ExponentBias - float10ExponentBias) - (float32Val >> float32ExponentFirstBit);
345             float32Val = ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
346         }
347         else
348         {
349             // Rebias the exponent to represent the value as a normalized float11
350             float32Val += 0xC8000000;
351         }
352 
353         return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
354     }
355 }
356 
float11ToFloat32(unsigned short fp11)357 inline float float11ToFloat32(unsigned short fp11)
358 {
359     unsigned short exponent = (fp11 >> 6) & 0x1F;
360     unsigned short mantissa = fp11 & 0x3F;
361 
362     if (exponent == 0x1F)
363     {
364         // INF or NAN
365         return bitCast<float>(0x7f800000 | (mantissa << 17));
366     }
367     else
368     {
369         if (exponent != 0)
370         {
371             // normalized
372         }
373         else if (mantissa != 0)
374         {
375             // The value is denormalized
376             exponent = 1;
377 
378             do
379             {
380                 exponent--;
381                 mantissa <<= 1;
382             }
383             while ((mantissa & 0x40) == 0);
384 
385             mantissa = mantissa & 0x3F;
386         }
387         else // The value is zero
388         {
389             exponent = static_cast<unsigned short>(-112);
390         }
391 
392         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
393     }
394 }
395 
float10ToFloat32(unsigned short fp11)396 inline float float10ToFloat32(unsigned short fp11)
397 {
398     unsigned short exponent = (fp11 >> 5) & 0x1F;
399     unsigned short mantissa = fp11 & 0x1F;
400 
401     if (exponent == 0x1F)
402     {
403         // INF or NAN
404         return bitCast<float>(0x7f800000 | (mantissa << 17));
405     }
406     else
407     {
408         if (exponent != 0)
409         {
410             // normalized
411         }
412         else if (mantissa != 0)
413         {
414             // The value is denormalized
415             exponent = 1;
416 
417             do
418             {
419                 exponent--;
420                 mantissa <<= 1;
421             }
422             while ((mantissa & 0x20) == 0);
423 
424             mantissa = mantissa & 0x1F;
425         }
426         else // The value is zero
427         {
428             exponent = static_cast<unsigned short>(-112);
429         }
430 
431         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
432     }
433 }
434 
435 template <typename T>
normalizedToFloat(T input)436 inline float normalizedToFloat(T input)
437 {
438     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
439 
440     const float inverseMax = 1.0f / std::numeric_limits<T>::max();
441     return input * inverseMax;
442 }
443 
444 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)445 inline float normalizedToFloat(T input)
446 {
447     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
448     static_assert(inputBitCount < (sizeof(T) * 8), "T must have more bits than inputBitCount.");
449 
450     const float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
451     return input * inverseMax;
452 }
453 
454 template <typename T>
floatToNormalized(float input)455 inline T floatToNormalized(float input)
456 {
457     return static_cast<T>(std::numeric_limits<T>::max() * input + 0.5f);
458 }
459 
460 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)461 inline T floatToNormalized(float input)
462 {
463     static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
464     return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
465 }
466 
467 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)468 inline T getShiftedData(T input)
469 {
470     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
471                   "T must have at least as many bits as inputBitCount + inputBitStart.");
472     const T mask = (1 << inputBitCount) - 1;
473     return (input >> inputBitStart) & mask;
474 }
475 
476 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)477 inline T shiftData(T input)
478 {
479     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
480                   "T must have at least as many bits as inputBitCount + inputBitStart.");
481     const T mask = (1 << inputBitCount) - 1;
482     return (input & mask) << inputBitStart;
483 }
484 
CountLeadingZeros(uint32_t x)485 inline unsigned int CountLeadingZeros(uint32_t x)
486 {
487     // Use binary search to find the amount of leading zeros.
488     unsigned int zeros = 32u;
489     uint32_t y;
490 
491     y = x >> 16u;
492     if (y != 0)
493     {
494         zeros = zeros - 16u;
495         x     = y;
496     }
497     y = x >> 8u;
498     if (y != 0)
499     {
500         zeros = zeros - 8u;
501         x     = y;
502     }
503     y = x >> 4u;
504     if (y != 0)
505     {
506         zeros = zeros - 4u;
507         x     = y;
508     }
509     y = x >> 2u;
510     if (y != 0)
511     {
512         zeros = zeros - 2u;
513         x     = y;
514     }
515     y = x >> 1u;
516     if (y != 0)
517     {
518         return zeros - 2u;
519     }
520     return zeros - x;
521 }
522 
average(unsigned char a,unsigned char b)523 inline unsigned char average(unsigned char a, unsigned char b)
524 {
525     return ((a ^ b) >> 1) + (a & b);
526 }
527 
average(signed char a,signed char b)528 inline signed char average(signed char a, signed char b)
529 {
530     return ((short)a + (short)b) / 2;
531 }
532 
average(unsigned short a,unsigned short b)533 inline unsigned short average(unsigned short a, unsigned short b)
534 {
535     return ((a ^ b) >> 1) + (a & b);
536 }
537 
average(signed short a,signed short b)538 inline signed short average(signed short a, signed short b)
539 {
540     return ((int)a + (int)b) / 2;
541 }
542 
average(unsigned int a,unsigned int b)543 inline unsigned int average(unsigned int a, unsigned int b)
544 {
545     return ((a ^ b) >> 1) + (a & b);
546 }
547 
average(int a,int b)548 inline int average(int a, int b)
549 {
550     long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2ll;
551     return static_cast<int>(average);
552 }
553 
average(float a,float b)554 inline float average(float a, float b)
555 {
556     return (a + b) * 0.5f;
557 }
558 
averageHalfFloat(unsigned short a,unsigned short b)559 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
560 {
561     return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
562 }
563 
averageFloat11(unsigned int a,unsigned int b)564 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
565 {
566     return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) + float11ToFloat32(static_cast<unsigned short>(b))) * 0.5f);
567 }
568 
averageFloat10(unsigned int a,unsigned int b)569 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
570 {
571     return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) + float10ToFloat32(static_cast<unsigned short>(b))) * 0.5f);
572 }
573 
574 template <typename T>
575 class Range
576 {
577   public:
Range()578     Range() {}
Range(T lo,T hi)579     Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
580 
length()581     T length() const { return (empty() ? 0 : (mHigh - mLow)); }
582 
intersects(Range<T> other)583     bool intersects(Range<T> other)
584     {
585         if (mLow <= other.mLow)
586         {
587             return other.mLow < mHigh;
588         }
589         else
590         {
591             return mLow < other.mHigh;
592         }
593     }
594 
595     // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)596     void extend(T value)
597     {
598         mLow  = value < mLow ? value : mLow;
599         mHigh = value >= mHigh ? (value + 1) : mHigh;
600     }
601 
empty()602     bool empty() const { return mHigh <= mLow; }
603 
contains(T value)604     bool contains(T value) const { return value >= mLow && value < mHigh; }
605 
606     class Iterator final
607     {
608       public:
Iterator(T value)609         Iterator(T value) : mCurrent(value) {}
610 
611         Iterator &operator++()
612         {
613             mCurrent++;
614             return *this;
615         }
616         bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
617         bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
618         T operator*() const { return mCurrent; }
619 
620       private:
621         T mCurrent;
622     };
623 
begin()624     Iterator begin() const { return Iterator(mLow); }
625 
end()626     Iterator end() const { return Iterator(mHigh); }
627 
low()628     T low() const { return mLow; }
high()629     T high() const { return mHigh; }
630 
631   private:
632     T mLow;
633     T mHigh;
634 };
635 
636 typedef Range<int> RangeI;
637 typedef Range<unsigned int> RangeUI;
638 
639 struct IndexRange
640 {
IndexRangeIndexRange641     IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange642     IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
643         : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
644     {
645         ASSERT(start <= end);
646     }
647 
648     // Number of vertices in the range.
vertexCountIndexRange649     size_t vertexCount() const { return (end - start) + 1; }
650 
651     // Inclusive range of indices that are not primitive restart
652     size_t start;
653     size_t end;
654 
655     // Number of non-primitive restart indices
656     size_t vertexIndexCount;
657 };
658 
659 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
660 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)661 inline float Ldexp(float x, int exp)
662 {
663     if (exp > 128)
664     {
665         return std::numeric_limits<float>::infinity();
666     }
667     if (exp < -126)
668     {
669         return 0.0f;
670     }
671     double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
672     return static_cast<float>(result);
673 }
674 
675 // First, both normalized floating-point values are converted into 16-bit integer values.
676 // Then, the results are packed into the returned 32-bit unsigned integer.
677 // The first float value will be written to the least significant bits of the output;
678 // the last float value will be written to the most significant bits.
679 // The conversion of each value to fixed point is done as follows :
680 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)681 inline uint32_t packSnorm2x16(float f1, float f2)
682 {
683     int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
684     int16_t mostSignificantBits = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
685     return static_cast<uint32_t>(mostSignificantBits) << 16 |
686            (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
687 }
688 
689 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then, each
690 // component is converted to a normalized floating-point value to generate the returned two float values.
691 // The first float value will be extracted from the least significant bits of the input;
692 // the last float value will be extracted from the most-significant bits.
693 // The conversion for unpacked fixed-point value to floating point is done as follows:
694 // unpackSnorm2x16 : clamp(f / 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)695 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
696 {
697     int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
698     int16_t mostSignificantBits = static_cast<int16_t>(u >> 16);
699     *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
700     *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
701 }
702 
703 // First, both normalized floating-point values are converted into 16-bit integer values.
704 // Then, the results are packed into the returned 32-bit unsigned integer.
705 // The first float value will be written to the least significant bits of the output;
706 // the last float value will be written to the most significant bits.
707 // The conversion of each value to fixed point is done as follows:
708 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)709 inline uint32_t packUnorm2x16(float f1, float f2)
710 {
711     uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
712     uint16_t mostSignificantBits = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
713     return static_cast<uint32_t>(mostSignificantBits) << 16 | static_cast<uint32_t>(leastSignificantBits);
714 }
715 
716 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then, each
717 // component is converted to a normalized floating-point value to generate the returned two float values.
718 // The first float value will be extracted from the least significant bits of the input;
719 // the last float value will be extracted from the most-significant bits.
720 // The conversion for unpacked fixed-point value to floating point is done as follows:
721 // unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)722 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
723 {
724     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
725     uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
726     *f1 = static_cast<float>(leastSignificantBits) / 65535.0f;
727     *f2 = static_cast<float>(mostSignificantBits) / 65535.0f;
728 }
729 
730 // Helper functions intended to be used only here.
731 namespace priv
732 {
733 
ToPackedUnorm8(float f)734 inline uint8_t ToPackedUnorm8(float f)
735 {
736     return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
737 }
738 
ToPackedSnorm8(float f)739 inline int8_t ToPackedSnorm8(float f)
740 {
741     return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
742 }
743 
744 }  // namespace priv
745 
746 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
747 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
748 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)749 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
750 {
751     uint8_t bits[4];
752     bits[0]         = priv::ToPackedUnorm8(f1);
753     bits[1]         = priv::ToPackedUnorm8(f2);
754     bits[2]         = priv::ToPackedUnorm8(f3);
755     bits[3]         = priv::ToPackedUnorm8(f4);
756     uint32_t result = 0u;
757     for (int i = 0; i < 4; ++i)
758     {
759         int shift = i * 8;
760         result |= (static_cast<uint32_t>(bits[i]) << shift);
761     }
762     return result;
763 }
764 
765 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
766 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
767 // bits.
UnpackUnorm4x8(uint32_t u,float * f)768 inline void UnpackUnorm4x8(uint32_t u, float *f)
769 {
770     for (int i = 0; i < 4; ++i)
771     {
772         int shift    = i * 8;
773         uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
774         f[i]         = static_cast<float>(bits) / 255.0f;
775     }
776 }
777 
778 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
779 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
780 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)781 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
782 {
783     int8_t bits[4];
784     bits[0]         = priv::ToPackedSnorm8(f1);
785     bits[1]         = priv::ToPackedSnorm8(f2);
786     bits[2]         = priv::ToPackedSnorm8(f3);
787     bits[3]         = priv::ToPackedSnorm8(f4);
788     uint32_t result = 0u;
789     for (int i = 0; i < 4; ++i)
790     {
791         int shift = i * 8;
792         result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
793     }
794     return result;
795 }
796 
797 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
798 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
799 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)800 inline void UnpackSnorm4x8(uint32_t u, float *f)
801 {
802     for (int i = 0; i < 4; ++i)
803     {
804         int shift   = i * 8;
805         int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
806         f[i]        = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
807     }
808 }
809 
810 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
811 // floating-point representation found in the OpenGL ES Specification, and then packing these
812 // two 16-bit integers into a 32-bit unsigned integer.
813 // f1: The 16 least-significant bits of the result;
814 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)815 inline uint32_t packHalf2x16(float f1, float f2)
816 {
817     uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
818     uint16_t mostSignificantBits = static_cast<uint16_t>(float32ToFloat16(f2));
819     return static_cast<uint32_t>(mostSignificantBits) << 16 | static_cast<uint32_t>(leastSignificantBits);
820 }
821 
822 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit values,
823 // interpreting those values as 16-bit floating-point numbers according to the OpenGL ES Specification,
824 // and converting them to 32-bit floating-point values.
825 // The first float value is obtained from the 16 least-significant bits of u;
826 // the second component is obtained from the 16 most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)827 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
828 {
829     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
830     uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
831 
832     *f1 = float16ToFloat32(leastSignificantBits);
833     *f2 = float16ToFloat32(mostSignificantBits);
834 }
835 
sRGBToLinear(uint8_t srgbValue)836 inline uint8_t sRGBToLinear(uint8_t srgbValue)
837 {
838     float value = srgbValue / 255.0f;
839     if (value <= 0.04045f)
840     {
841         value = value / 12.92f;
842     }
843     else
844     {
845         value = std::pow((value + 0.055f) / 1.055f, 2.4f);
846     }
847     return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
848 }
849 
linearToSRGB(uint8_t linearValue)850 inline uint8_t linearToSRGB(uint8_t linearValue)
851 {
852     float value = linearValue / 255.0f;
853     if (value <= 0.0f)
854     {
855         value = 0.0f;
856     }
857     else if (value < 0.0031308f)
858     {
859         value = value * 12.92f;
860     }
861     else if (value < 1.0f)
862     {
863         value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
864     }
865     else
866     {
867         value = 1.0f;
868     }
869     return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
870 }
871 
872 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)873 inline uint32_t BitfieldReverse(uint32_t value)
874 {
875     // TODO(oetuaho@nvidia.com): Optimize this if needed. There don't seem to be compiler intrinsics
876     // for this, and right now it's not used in performance-critical paths.
877     uint32_t result = 0u;
878     for (size_t j = 0u; j < 32u; ++j)
879     {
880         result |= (((value >> j) & 1u) << (31u - j));
881     }
882     return result;
883 }
884 
885 // Count the 1 bits.
886 #if defined(ANGLE_PLATFORM_WINDOWS)
887 #if defined(_M_ARM) || defined(_M_ARM64)
BitCount(uint32_t bits)888 inline int BitCount(uint32_t bits)
889 {
890     bits = bits - ((bits >> 1) & 0x55555555);
891     bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
892     return (((bits + (bits >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
893 }
894 #else // _M_ARM || _M_ARM64
BitCount(uint32_t bits)895 inline int BitCount(uint32_t bits)
896 {
897     return static_cast<int>(__popcnt(bits));
898 }
899 #if defined(ANGLE_IS_64_BIT_CPU)
BitCount(uint64_t bits)900 inline int BitCount(uint64_t bits)
901 {
902     return static_cast<int>(__popcnt64(bits));
903 }
904 #endif // !_M_ARM
905 #endif  // defined(ANGLE_IS_64_BIT_CPU)
906 #endif  // defined(ANGLE_PLATFORM_WINDOWS)
907 
908 #if defined(ANGLE_PLATFORM_POSIX)
BitCount(uint32_t bits)909 inline int BitCount(uint32_t bits)
910 {
911     return __builtin_popcount(bits);
912 }
913 
914 #if defined(ANGLE_IS_64_BIT_CPU)
BitCount(uint64_t bits)915 inline int BitCount(uint64_t bits)
916 {
917     return __builtin_popcountll(bits);
918 }
919 #endif  // defined(ANGLE_IS_64_BIT_CPU)
920 #endif  // defined(ANGLE_PLATFORM_POSIX)
921 
922 #if defined(ANGLE_PLATFORM_WINDOWS)
923 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
924 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)925 inline unsigned long ScanForward(uint32_t bits)
926 {
927     ASSERT(bits != 0u);
928     unsigned long firstBitIndex = 0ul;
929     unsigned char ret           = _BitScanForward(&firstBitIndex, bits);
930     ASSERT(ret != 0u);
931     return firstBitIndex;
932 }
933 
934 #if defined(ANGLE_IS_64_BIT_CPU)
ScanForward(uint64_t bits)935 inline unsigned long ScanForward(uint64_t bits)
936 {
937     ASSERT(bits != 0u);
938     unsigned long firstBitIndex = 0ul;
939     unsigned char ret           = _BitScanForward64(&firstBitIndex, bits);
940     ASSERT(ret != 0u);
941     return firstBitIndex;
942 }
943 #endif  // defined(ANGLE_IS_64_BIT_CPU)
944 #endif  // defined(ANGLE_PLATFORM_WINDOWS)
945 
946 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)947 inline unsigned long ScanForward(uint32_t bits)
948 {
949     ASSERT(bits != 0u);
950     return static_cast<unsigned long>(__builtin_ctz(bits));
951 }
952 
953 #if defined(ANGLE_IS_64_BIT_CPU)
ScanForward(uint64_t bits)954 inline unsigned long ScanForward(uint64_t bits)
955 {
956     ASSERT(bits != 0u);
957     return static_cast<unsigned long>(__builtin_ctzll(bits));
958 }
959 #endif  // defined(ANGLE_IS_64_BIT_CPU)
960 #endif  // defined(ANGLE_PLATFORM_POSIX)
961 
962 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
963 // significant bit.
ScanReverse(unsigned long bits)964 inline unsigned long ScanReverse(unsigned long bits)
965 {
966     ASSERT(bits != 0u);
967 #if defined(ANGLE_PLATFORM_WINDOWS)
968     unsigned long lastBitIndex = 0ul;
969     unsigned char ret          = _BitScanReverse(&lastBitIndex, bits);
970     ASSERT(ret != 0u);
971     return lastBitIndex;
972 #elif defined(ANGLE_PLATFORM_POSIX)
973     return static_cast<unsigned long>(sizeof(unsigned long) * CHAR_BIT - 1 - __builtin_clzl(bits));
974 #else
975 #error Please implement bit-scan-reverse for your platform!
976 #endif
977 }
978 
979 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
980 template <typename T>
FindLSB(T bits)981 int FindLSB(T bits)
982 {
983     static_assert(std::is_integral<T>::value, "must be integral type.");
984     if (bits == 0u)
985     {
986         return -1;
987     }
988     else
989     {
990         return static_cast<int>(ScanForward(bits));
991     }
992 }
993 
994 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
995 template <typename T>
FindMSB(T bits)996 int FindMSB(T bits)
997 {
998     static_assert(std::is_integral<T>::value, "must be integral type.");
999     if (bits == 0u)
1000     {
1001         return -1;
1002     }
1003     else
1004     {
1005         return static_cast<int>(ScanReverse(bits));
1006     }
1007 }
1008 
1009 // Returns whether the argument is Not a Number.
1010 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) - non-zero.
isNaN(float f)1011 inline bool isNaN(float f)
1012 {
1013     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1014     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1015     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) && (bitCast<uint32_t>(f) & 0x7fffffu);
1016 }
1017 
1018 // Returns whether the argument is infinity.
1019 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) - zero.
isInf(float f)1020 inline bool isInf(float f)
1021 {
1022     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1023     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1024     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) && !(bitCast<uint32_t>(f) & 0x7fffffu);
1025 }
1026 
1027 namespace priv
1028 {
1029 template <unsigned int N, unsigned int R>
1030 struct iSquareRoot
1031 {
solveiSquareRoot1032     static constexpr unsigned int solve()
1033     {
1034         return (R * R > N)
1035                    ? 0
1036                    : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1037     }
1038     enum Result
1039     {
1040         value = iSquareRoot::solve()
1041     };
1042 };
1043 
1044 template <unsigned int N>
1045 struct iSquareRoot<N, N>
1046 {
1047     enum result
1048     {
1049         value = N
1050     };
1051 };
1052 
1053 }  // namespace priv
1054 
1055 template <unsigned int N>
1056 constexpr unsigned int iSquareRoot()
1057 {
1058     return priv::iSquareRoot<N, 1>::value;
1059 }
1060 
1061 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1062 //
1063 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1064 // behavior is undefined.
1065 
1066 template <typename T>
1067 inline T WrappingSum(T lhs, T rhs)
1068 {
1069     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1070     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1071     return static_cast<T>(lhsUnsigned + rhsUnsigned);
1072 }
1073 
1074 template <typename T>
1075 inline T WrappingDiff(T lhs, T rhs)
1076 {
1077     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1078     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1079     return static_cast<T>(lhsUnsigned - rhsUnsigned);
1080 }
1081 
1082 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1083 {
1084     int64_t lhsWide = static_cast<int64_t>(lhs);
1085     int64_t rhsWide = static_cast<int64_t>(rhs);
1086     // The multiplication is guaranteed not to overflow.
1087     int64_t resultWide = lhsWide * rhsWide;
1088     // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1089     resultWide = resultWide & 0xffffffffll;
1090     // Casting to a narrower signed type is fine since the casted value is representable in the
1091     // narrower type.
1092     return static_cast<int32_t>(resultWide);
1093 }
1094 
1095 }  // namespace gl
1096 
1097 namespace rx
1098 {
1099 
1100 template <typename T>
1101 T roundUp(const T value, const T alignment)
1102 {
1103     auto temp = value + alignment - static_cast<T>(1);
1104     return temp - temp % alignment;
1105 }
1106 
1107 template <typename T>
1108 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1109 {
1110     angle::CheckedNumeric<T> checkedValue(value);
1111     angle::CheckedNumeric<T> checkedAlignment(alignment);
1112     return roundUp(checkedValue, checkedAlignment);
1113 }
1114 
1115 inline unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1116 {
1117     unsigned int divided = value / divisor;
1118     return (divided + ((value % divisor == 0) ? 0 : 1));
1119 }
1120 
1121 #if defined(_MSC_VER)
1122 
1123 #define ANGLE_ROTL(x,y) _rotl(x,y)
1124 #define ANGLE_ROTR16(x,y) _rotr16(x,y)
1125 
1126 #else
1127 
1128 inline uint32_t RotL(uint32_t x, int8_t r)
1129 {
1130     return (x << r) | (x >> (32 - r));
1131 }
1132 
1133 inline uint16_t RotR16(uint16_t x, int8_t r)
1134 {
1135     return (x >> r) | (x << (16 - r));
1136 }
1137 
1138 #define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1139 #define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1140 
1141 #endif // namespace rx
1142 
1143 }
1144 
1145 #endif   // COMMON_MATHUTIL_H_
1146