1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "types.h"
12 #include "Debug.h"
13 #include <stdlib.h>
14 
15 #if defined _WIN32
16 #   include <intrin.h>
17 #endif
18 
19 #if !defined(_WIN32)
20 #   include "../../inc/common/secure_mem.h"
21 #   include "../../inc/common/secure_string.h"
22 #endif
23 
24 namespace iSTD
25 {
26 
27 /*****************************************************************************\
28 MACRO: BIT
29 \*****************************************************************************/
30 #ifndef BIT
31 #define BIT( n )    ( 1 << (n) )
32 #endif
33 
34 /*****************************************************************************\
35 MACRO: MASKED_BIT
36 \*****************************************************************************/
37 #ifndef MASKED_BIT
38 #define MASKED_BIT( n, enable ) ( 1 << (n + 16) | ((enable) ? 1 : 0) << (n) )
39 #endif
40 
41 /*****************************************************************************\
42 MACRO: QWBIT
43 \*****************************************************************************/
44 #ifndef QWBIT
45 #define QWBIT( n )    ( 1ll << (n) )
46 #endif
47 
48 /*****************************************************************************\
49 MACRO: BITMASK
50 PURPOSE: Creates a mask of n bits
51 \*****************************************************************************/
52 #ifndef BITMASK
53 #define BITMASK( n )    ( ~( (0xffffffff) << (n) ) )
54 #endif
55 #ifndef BITMASK_RANGE
56 #define BITMASK_RANGE( startbit, endbit )   ( BITMASK( (endbit)+1 ) & ~BITMASK( startbit ) )
57 #endif
58 
59 /*****************************************************************************\
60 MACRO: QWBITMASK
61 PURPOSE: Creates a mask of n bits
62 \*****************************************************************************/
63 #ifndef QWBITMASK
64 #define QWBITMASK( n )    ( ~( (0xffffffffffffffffull) << (n) ) )
65 #endif
66 
67 #ifndef QWBITMASK_RANGE
68 #define QWBITMASK_RANGE( startbit, endbit )  ( QWBITMASK( (endbit)+1 ) & ~QWBITMASK( startbit ) )
69 #endif
70 
71 /*****************************************************************************\
72 MACRO: BITFIELD_RANGE
73 PURPOSE: Calculates the number of bits between the startbit and the endbit (0 based)
74 \*****************************************************************************/
75 #ifndef BITFIELD_RANGE
76 #define BITFIELD_RANGE( startbit, endbit )     ((endbit)-(startbit)+1)
77 #endif
78 
79 /*****************************************************************************\
80 MACRO: BITFIELD_BIT
81 PURPOSE: Definition declared for clarity when creating structs
82 \*****************************************************************************/
83 #ifndef BITFIELD_BIT
84 #define BITFIELD_BIT( bit )                   1
85 #endif
86 
87 /*****************************************************************************\
88 MACRO: GETMSB
89 PURPOSE: Checks MSB
90 \*****************************************************************************/
91 #ifndef GETMSB
92 #define GETMSB( n ) ( \
93     ( (n) & BIT(31) ) ? 31 : \
94     ( (n) & BIT(30) ) ? 30 : \
95     ( (n) & BIT(29) ) ? 29 : \
96     ( (n) & BIT(28) ) ? 28 : \
97     ( (n) & BIT(27) ) ? 27 : \
98     ( (n) & BIT(26) ) ? 26 : \
99     ( (n) & BIT(25) ) ? 25 : \
100     ( (n) & BIT(24) ) ? 24 : \
101     ( (n) & BIT(23) ) ? 23 : \
102     ( (n) & BIT(22) ) ? 22 : \
103     ( (n) & BIT(21) ) ? 21 : \
104     ( (n) & BIT(20) ) ? 20 : \
105     ( (n) & BIT(19) ) ? 19 : \
106     ( (n) & BIT(18) ) ? 18 : \
107     ( (n) & BIT(17) ) ? 17 : \
108     ( (n) & BIT(16) ) ? 16 : \
109     ( (n) & BIT(15) ) ? 15 : \
110     ( (n) & BIT(14) ) ? 14 : \
111     ( (n) & BIT(13) ) ? 13 : \
112     ( (n) & BIT(12) ) ? 12 : \
113     ( (n) & BIT(11) ) ? 11 : \
114     ( (n) & BIT(10) ) ? 10 : \
115     ( (n) & BIT(9)  ) ?  9 : \
116     ( (n) & BIT(8)  ) ?  8 : \
117     ( (n) & BIT(7)  ) ?  7 : \
118     ( (n) & BIT(6)  ) ?  6 : \
119     ( (n) & BIT(5)  ) ?  5 : \
120     ( (n) & BIT(4)  ) ?  4 : \
121     ( (n) & BIT(3)  ) ?  3 : \
122     ( (n) & BIT(2)  ) ?  2 : \
123     ( (n) & BIT(1)  ) ?  1 : \
124     ( (n) & BIT(0)  ) ?  0 : \
125     (-1) )
126 #endif
127 
128 /*****************************************************************************\
129 MACRO: BITCOUNT
130 PURPOSE: Determines the number of bits needed in a bitmask, given the number
131 of elements to be stored in the mask
132 \*****************************************************************************/
133 #ifndef BITCOUNT
134 #define BITCOUNT( n ) ( \
135     ( ((n)-1) & BIT(31) ) ? 32 : \
136     ( ((n)-1) & BIT(30) ) ? 31 : \
137     ( ((n)-1) & BIT(29) ) ? 30 : \
138     ( ((n)-1) & BIT(28) ) ? 29 : \
139     ( ((n)-1) & BIT(27) ) ? 28 : \
140     ( ((n)-1) & BIT(26) ) ? 27 : \
141     ( ((n)-1) & BIT(25) ) ? 26 : \
142     ( ((n)-1) & BIT(24) ) ? 25 : \
143     ( ((n)-1) & BIT(23) ) ? 24 : \
144     ( ((n)-1) & BIT(22) ) ? 23 : \
145     ( ((n)-1) & BIT(21) ) ? 22 : \
146     ( ((n)-1) & BIT(20) ) ? 21 : \
147     ( ((n)-1) & BIT(19) ) ? 20 : \
148     ( ((n)-1) & BIT(18) ) ? 19 : \
149     ( ((n)-1) & BIT(17) ) ? 18 : \
150     ( ((n)-1) & BIT(16) ) ? 17 : \
151     ( ((n)-1) & BIT(15) ) ? 16 : \
152     ( ((n)-1) & BIT(14) ) ? 15 : \
153     ( ((n)-1) & BIT(13) ) ? 14 : \
154     ( ((n)-1) & BIT(12) ) ? 13 : \
155     ( ((n)-1) & BIT(11) ) ? 12 : \
156     ( ((n)-1) & BIT(10) ) ? 11 : \
157     ( ((n)-1) & BIT(9)  ) ? 10 : \
158     ( ((n)-1) & BIT(8)  ) ?  9 : \
159     ( ((n)-1) & BIT(7)  ) ?  8 : \
160     ( ((n)-1) & BIT(6)  ) ?  7 : \
161     ( ((n)-1) & BIT(5)  ) ?  6 : \
162     ( ((n)-1) & BIT(4)  ) ?  5 : \
163     ( ((n)-1) & BIT(3)  ) ?  4 : \
164     ( ((n)-1) & BIT(2)  ) ?  3 : \
165     ( ((n)-1) & BIT(1)  ) ?  2 : \
166     ( ((n)-1) & BIT(0)  ) ?  1 : \
167     0 )
168 #endif
169 
170 /*****************************************************************************\
171 MACRO: MIN
172 \*****************************************************************************/
173 #ifndef MIN
174 #define MIN( x, y ) (((x)<=(y))?(x):(y))
175 #endif
176 
177 /*****************************************************************************\
178 MACRO: MAX
179 \*****************************************************************************/
180 #ifndef MAX
181 #define MAX( x, y ) (((x)>=(y))?(x):(y))
182 #endif
183 
184 /*****************************************************************************\
185 MACRO: CEIL_DIV
186 \*****************************************************************************/
187 #ifndef CEIL_DIV
188 #define CEIL_DIV( x, y ) ( 1 + ( ( ( x ) - 1 ) / ( y ) ) )
189 #endif
190 
191 /*****************************************************************************\
192 MACRO: STRCAT
193 \*****************************************************************************/
194 #ifndef STRCAT
195 #define STRCAT( dst, size, src ) strcat_s( (dst), (size), (src) )
196 #endif
197 
198 /*****************************************************************************\
199 MACRO: STRNCAT
200 \*****************************************************************************/
201 #ifndef STRNCAT
202 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
203 #define STRNCAT( dst, size, src, len ) strncat( (dst), (src), (len) )
204 #else
205 #define STRNCAT( dst, size, src, len ) strncat_s( (dst), (size), (src), (len) )
206 #endif
207 #endif
208 
209 /*****************************************************************************\
210 MACRO: WCSNCAT
211 \*****************************************************************************/
212 #ifndef WCSNCAT
213 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
214 #define WCSNCAT( dst, size, src, len ) wcsncat( (dst), (src), (len) )
215 #else
216 #define WCSNCAT( dst, size, src, len ) wcsncat_s( (dst), (size), (src), (len) )
217 #endif
218 #endif
219 
220 /*****************************************************************************\
221 MACRO: STRCPY
222 \*****************************************************************************/
223 #ifndef STRCPY
224 #define STRCPY( dst, size, src ) strcpy_s( (dst), (size), (src) )
225 #endif
226 
227 /*****************************************************************************\
228 MACRO: SPRINTF
229 \*****************************************************************************/
230 #ifndef SPRINTF
231 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
232 #define SPRINTF( dst, size, src, args ) sprintf( (dst), (src), (args) )
233 #else
234 #define SPRINTF( dst, size, src, args ) sprintf_s( (dst), (size), (src), (args) )
235 #endif
236 #endif
237 
238 /*****************************************************************************\
239 MACRO: VSNPRINTF
240 \*****************************************************************************/
241 #ifndef VSNPRINTF
242 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
243 #define VSNPRINTF( dst, size, len, src, args ) _vsnprintf( (dst), (len), (src), (args) )
244 #else
245 #define VSNPRINTF( dst, size, len, src, args ) _vsnprintf_s( (dst), (size), (len), (src), (args) )
246 #endif
247 #endif
248 
249 /*****************************************************************************\
250 MACRO: VSPRINTF
251 \*****************************************************************************/
252 #ifndef VSPRINTF
253 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
254 #define VSPRINTF( dst, size, src, args ) vsprintf( (dst), (src), (args) )
255 #else
256 #define VSPRINTF( dst, size, src, args ) vsprintf_s( (dst), (size), (src), (args) )
257 #endif
258 #endif
259 
260 /*****************************************************************************\
261 MACRO: MEMCPY
262 \*****************************************************************************/
263 #ifndef MEMCPY
264 #if defined(__ANDROID__)
265 #define  MEMCPY( dst, size, src, args ) memcpy( (dst), (src), (args) )
266 #elif defined(ISTDLIB_KMD) || !defined(_MSC_VER)
267 #define MEMCPY( dst, size, src, args ) memcpy( (dst), (src), (args) )
268 #else
269 #define MEMCPY( dst, size, src, args ) memcpy_s( (dst), (size), (src), (args) )
270 #endif
271 #endif
272 
273 /*****************************************************************************\
274 MACRO: ARRAY_COUNT
275 \*****************************************************************************/
276 #ifndef ARRAY_COUNT
277 #define ARRAY_COUNT( x ) ( sizeof( x ) / sizeof( x[ 0 ] ) )
278 #endif
279 
280 /*****************************************************************************\
281 Inline Template Function:
282     Swap
283 
284 Description:
285     Swaps the values of two variables of the same type
286 \*****************************************************************************/
287 template <class Type>
Swap(Type & var0,Type & var1)288 inline void Swap( Type &var0, Type &var1 )
289 {
290     Type tmp = var0;
291     var0 = var1;
292     var1 = tmp;
293 }
294 
295 /*****************************************************************************\
296 Inline Template Function:
297     Min
298 
299 Description:
300     Returns the min of the two values
301 \*****************************************************************************/
302 template <class Type>
Min(const Type var0,const Type var1)303 __forceinline Type Min( const Type var0, const Type var1 )
304 {
305     return ( var0 <= var1 ) ? var0 : var1;
306 }
307 
308 /*****************************************************************************\
309 Inline Template Function:
310     Max
311 
312 Description:
313     Returns the max of the two values
314 \*****************************************************************************/
315 template <class Type>
Max(const Type var0,const Type var1)316 __forceinline Type Max( const Type var0, const Type var1 )
317 {
318     return ( var0 >= var1 ) ? var0 : var1;
319 }
320 
321 /*****************************************************************************\
322 Inline Template Function:
323     ClampMax
324 
325 Description:
326     Checks the value for Greater than the maximum value.  If the value is
327     greater then the maximum then it returns the maximum value.  Otherwise, it
328     returns the value.
329 \*****************************************************************************/
330 template<class Type>
ClampMax(const Type value,const Type max)331 __forceinline Type ClampMax( const Type value, const Type max )
332 {
333     return ( ( (value) > (max) ) ? (max) : (value) );
334 }
335 
336 /*****************************************************************************\
337 Inline Template Function:
338     ClampMin
339 
340 Description:
341     Checks the value for less than the minimum value.  If the value is less
342     then the minimum then it returns the minimum value.  Otherwise, it returns
343     the value.
344 \*****************************************************************************/
345 template<class Type>
ClampMin(const Type value,const Type min)346 __forceinline Type ClampMin( const Type value, const Type min )
347 {
348     return ( ( (value) < (min) ) ? (min) : (value) );
349 }
350 
351 /*****************************************************************************\
352 Inline Template Function:
353     Clamp
354 
355 Description:
356     Checks the value for less than the minimum value or greater than the
357     maximum value.  If the value is less then the minimum then it returns the
358     minimum value.  If the value is greater then the maximum then it returns
359     the maximum value. Otherwise, it returns the value.
360 \*****************************************************************************/
361 template<class Type>
Clamp(const Type value,const Type min,const Type max)362 __forceinline Type Clamp( const Type value, const Type min, const Type max )
363 {
364     return ClampMin<Type>( ClampMax<Type>( value, max ), min );
365 }
366 
367 /*****************************************************************************\
368 Inline Template Function:
369     CheckLimits
370 
371 Description:
372     Determines if the value is within the specified range
373 \*****************************************************************************/
374 template <class Type>
CheckLimits(const Type value,const Type min,const Type max)375 __forceinline bool CheckLimits( const Type value, const Type min, const Type max )
376 {
377     if( ( value < min ) || ( value > max ) )
378     {
379         ASSERT(0);
380         return false;
381     }
382 
383     return true;
384 }
385 
386 /*****************************************************************************\
387 Inline Template Function:
388     emul
389 
390 Description:
391     Upconversion Multiply, used for checking overflow.
392 \*****************************************************************************/
393 template <typename t1, typename t2>
emul(t1 a,t1 b)394 __forceinline t2 emul( t1 a, t1 b )
395 {
396     return (t2)a*b;
397 }
398 
399 /*****************************************************************************\
400 Inline Function:
401     bsr64
402 
403 Description:
404     Intrinsic definition of bit scan reverse for 64bit values.
405 \*****************************************************************************/
406 #if defined( _WIN64 ) || defined( __x86_64__ )
bsr64(const unsigned long long int mask)407 __forceinline DWORD bsr64( const unsigned long long int mask )
408 {
409 #if defined _WIN32
410     DWORD index;
411     _BitScanReverse64( &index, static_cast<_int64>( mask ) );
412     return static_cast<DWORD>( index );
413 
414 #elif defined __GNUC__
415     return static_cast<unsigned int>( 63 - __builtin_clzll( mask ) );
416 
417 #else
418     DWORD bit = 0;
419     if( mask != 0 )
420     {
421         bit = 63;
422         while( ( mask & QWBIT(bit) ) == 0 )
423         {
424             --bit;
425         }
426     }
427     return bit;
428 
429 #endif
430 }
431 #endif // defined( _WIN64 ) || defined( __x86_64__ )
432 
433 /*****************************************************************************\
434 Inline Function:
435     bsr
436 
437 Description:
438     Intrinsic definition when not compiler-defined
439 \*****************************************************************************/
bsr(const DWORD mask)440 __forceinline DWORD bsr( const DWORD mask )
441 {
442 #if defined _WIN32
443     DWORD index;
444     _BitScanReverse( &index, mask );
445     return static_cast<DWORD>(index);
446 
447 #elif defined __GNUC__
448     return static_cast<unsigned int>( 31 - __builtin_clz( mask ) );
449 
450 #else
451     DWORD bit = 0;
452     if( mask != 0 )
453     {
454         bit = 31;
455         while( ( mask & BIT(bit) ) == 0 )
456         {
457             --bit;
458         }
459     }
460     return bit;
461 
462 #endif
463 }
464 
465 /*****************************************************************************\
466 Inline Function:
467     bsr64
468 
469     Description:
470     Intrinsic definition of bit scan forward for 64bit values.
471 \*****************************************************************************/
472 #if defined( _WIN64 ) || defined( __x86_64__ )
bsf64(const unsigned long long int mask)473 __forceinline DWORD bsf64( const unsigned long long int mask )
474 {
475 #if defined _WIN32
476     DWORD index;
477     _BitScanForward64( &index, static_cast<_int64>( mask ) );
478     return static_cast<DWORD>( index );
479 
480 #elif defined __GNUC__
481     return static_cast<unsigned int>( __builtin_ffsll( mask ) - 1 );
482 
483 #else
484     DWORD bit = 0;
485     if( mask != 0 )
486     {
487         while( ( mask & QWBIT(bit) ) == 0 )
488         {
489             ++bit;
490         }
491     }
492     return bit;
493 
494 #endif
495 }
496 #endif // defined( _WIN64 ) || defined( __x86_64__ )
497 
498 /*****************************************************************************\
499 Inline Function:
500     bsf
501 
502 Description:
503     Intrinsic definition when not compiler-defined
504 \*****************************************************************************/
bsf(const DWORD mask)505 __forceinline DWORD bsf( const DWORD mask )
506 {
507 #if defined _WIN32
508     DWORD index;
509     _BitScanForward( &index, mask );
510     return index;
511 
512 #elif defined __GNUC__
513     return static_cast<unsigned int>( __builtin_ffsl( mask ) - 1 );
514 
515 #else
516     DWORD bit = 0;
517     if( mask != 0 )
518     {
519         while( ( mask & BIT(bit) ) == 0 )
520         {
521             ++bit;
522         }
523     }
524     return bit;
525 
526 #endif
527 }
528 
529 /*****************************************************************************\
530 Description:
531     Find first zero which identifies the index of the least significant zero bit
532     mask - mask to be checked
533 \*****************************************************************************/
534 #ifndef FIND_FIRST_0_LSB
535 #define FIND_FIRST_0_LSB( mask )    ( iSTD::bsf(~mask) )
536 #endif
537 
538 /*****************************************************************************\
539 Inline Function:
540     clz
541 
542 Description:
543     Count number of leading zeros of the mask
544 \*****************************************************************************/
clz(const DWORD mask)545 __forceinline DWORD clz( const DWORD mask )
546 {
547     DWORD retValue = 32;
548 
549     // bsr returns 0 if the mask is 0 and sets a the ZF flag so handle
550     // 0 special.
551     if( mask != 0 )
552     {
553         retValue = 31 - bsr( mask );
554     }
555 
556     return retValue;
557 }
558 
559 /*****************************************************************************\
560 Inline Function:
561     IsPowerOfTwo
562 
563 Description:
564     Determines if the given value is a power of two.
565 \*****************************************************************************/
566 template< typename Type >
IsPowerOfTwo(const Type number)567 __forceinline bool IsPowerOfTwo( const Type number )
568 {
569     return ( ( number & ( number - 1 ) ) == 0 );
570 }
571 
572 /*****************************************************************************\
573 Inline Function:
574     Round
575 
576 Description:
577     Rounds an unsigned integer to the next multiple of (power-2) size
578 \*****************************************************************************/
579 template< typename Type1, typename Type2 >
Round(const Type1 value,const Type2 size)580 __forceinline Type1 Round( const Type1 value, const Type2 size )
581 {
582     ASSERT( IsPowerOfTwo(size) );
583     Type1 mask = (Type1)size - 1;
584     Type1 roundedValue = ( value + mask ) & ~( mask );
585     return roundedValue;
586 }
587 
588 /*****************************************************************************\
589 Inline Function:
590     RoundDown
591 
592 Description:
593     Rounds an unsigned integer to the previous multiple of (power-2) size
594 \*****************************************************************************/
595 template< typename Type1, typename Type2 >
RoundDown(const Type1 value,const Type2 size)596 __forceinline DWORD RoundDown( const Type1 value, const Type2 size )
597 {
598     ASSERT( IsPowerOfTwo(size) );
599     Type1 mask = (Type1)size - 1;
600     Type1 roundedValue = value & ~( mask );
601     return roundedValue;
602 }
603 
604 /*****************************************************************************\
605 Inline Function:
606     RoundNonPow2
607 
608 Description:
609     Rounds up to an unsigned integer to the next multiple of size (nonpow2)
610 \*****************************************************************************/
611 template< typename Type1, typename Type2 >
RoundNonPow2(const Type1 value,const Type2 size)612 __forceinline Type1 RoundNonPow2( const Type1 value, const Type2 size )
613 {
614     const Type1 size1 = (Type1)size;
615     const Type1 remainder = ( value % size1 );
616 
617     Type1 roundedValue = value;
618     if( remainder )
619     {
620         roundedValue += size1 - remainder;
621     }
622     return roundedValue;
623 }
624 
625 /*****************************************************************************\
626 Inline Function:
627     RoundDownNonPow2
628 
629 Description:
630     Rounds an unsigned integer to the previous multiple of size (nonpow2)
631 \*****************************************************************************/
632 template< typename Type1, typename Type2 >
RoundDownNonPow2(const Type1 value,const Type2 size)633 __forceinline DWORD RoundDownNonPow2( const Type1 value, const Type2 size )
634 {
635     const Type1 size1 = (Type1)size;
636     return (DWORD)(( value / size1 ) * size1);
637 }
638 
639 /*****************************************************************************\
640 Inline Function:
641     RoundPower2
642 
643 Description:
644     Rounds an unsigned 32-bit integer to the next power of 2
645 \*****************************************************************************/
RoundPower2(const DWORD value)646 inline DWORD RoundPower2( const DWORD value )
647 {
648     return IsPowerOfTwo( value ) ? value : 2ul << bsr( value );
649 }
650 
651 /*****************************************************************************\
652 Inline Function:
653     RoundPower2
654 
655 Description:
656     Rounds an unsigned 64-bit integer to the next power of 2
657 \*****************************************************************************/
RoundPower2(const QWORD value)658 inline QWORD RoundPower2( const QWORD value )
659 {
660     VALUE64 v64 = { value };
661 
662     if( v64.h.u || ( v64.l.u & BIT(31) ) )
663     {
664         v64.h.u = RoundPower2( (DWORD)(( v64.l.u ) ? v64.h.u + 1 : v64.h.u) );
665         v64.l.u = 0;
666     }
667     else
668     {
669         v64.l.u = RoundPower2( (DWORD)(v64.l.u) );
670     }
671 
672     return v64.u;
673 }
674 
675 /*****************************************************************************\
676 Inline Function:
677     Log2
678 
679 Description:
680     Returns the logarithm base two of the passed in number by returning
681     floor( log2( number ) ).  Also in the case of Log2(0) the function
682     will return 0.
683 \*****************************************************************************/
Log2(const DWORD value)684 inline DWORD Log2( const DWORD value )
685 {
686     ASSERT( IsPowerOfTwo(value) );
687 
688     DWORD power2 = 0;
689     while( value && value != (DWORD)BIT(power2) )
690     {
691         ++power2;
692     }
693 
694     return power2;
695 }
696 
697 /*****************************************************************************\
698 Inline Function:
699     IsAligned
700 
701 Description:
702     Determines if the given pointer is aligned to the given size
703 \*****************************************************************************/
704 template< typename Type >
IsAligned(Type * ptr,const size_t alignSize)705 __forceinline bool IsAligned( Type * ptr, const size_t alignSize )
706 {
707     return ( ( (size_t)ptr % alignSize ) == 0 );
708 }
709 
710 /*****************************************************************************\
711 Inline Function:
712     IsAligned
713 
714 Description:
715     Determines if the given size is aligned to the given size
716 \*****************************************************************************/
717 template< typename Type >
IsAligned(Type size,const size_t alignSize)718 __forceinline bool IsAligned( Type size, const size_t alignSize )
719 {
720     return ( ( size % alignSize ) == 0 );
721 }
722 
723 /*****************************************************************************\
724 Inline Function:
725     Align
726 
727 Description:
728     Type-safe (power-2) alignment of a pointer.
729 \*****************************************************************************/
730 template<typename Type>
Align(Type * const ptr,const size_t alignment)731 __forceinline Type* Align( Type* const ptr, const size_t alignment )
732 {
733     ASSERT( IsPowerOfTwo(alignment) );
734 
735     return (Type*)( ( ((size_t)ptr) + alignment-1 ) & ~( alignment-1 ) );
736 }
737 
738 /*****************************************************************************\
739 Inline Function:
740     Align
741 
742 Description:
743     Type-safe (power-2) alignment of a value.
744 \*****************************************************************************/
745 template<typename Type>
Align(const Type value,const size_t alignment)746 __forceinline Type Align( const Type value, const size_t alignment )
747 {
748     ASSERT( IsPowerOfTwo(alignment) );
749 
750     Type mask = static_cast<Type>(alignment) - 1;
751     return (value + mask) & ~mask;
752 }
753 
754 /*****************************************************************************\
755 Inline Function:
756     GetAlignmentOffset
757 
758 Description:
759     Returns the size in bytes needed to align the given pointer to the
760     given alignment size
761 \*****************************************************************************/
762 template<typename Type>
GetAlignmentOffset(Type * const ptr,const size_t alignSize)763 __forceinline DWORD GetAlignmentOffset( Type* const ptr, const size_t alignSize )
764 {
765     ASSERT( alignSize );
766 
767     DWORD offset = 0;
768 
769     if( IsPowerOfTwo(alignSize) )
770     {   // can recast 'ptr' to DWORD, since offset is DWORD
771         offset = DWORD( UINT_PTR( Align(ptr, alignSize) ) - (UINT_PTR)(ptr) );
772     }
773     else
774     {
775         const DWORD modulo = (DWORD)(UINT_PTR(ptr) % alignSize);
776 
777         if( modulo )
778         {
779             offset = (DWORD)alignSize - modulo;
780         }
781     }
782 
783     return offset;
784 }
785 
786 /*****************************************************************************\
787 Inline Function:
788     GetAlignmentOffset
789 
790 Description:
791     Returns the size in bytes needed to align the given size to the
792     given alignment size
793 \*****************************************************************************/
794 template<typename Type>
GetAlignmentOffset(const Type size,const size_t alignSize)795 __forceinline Type GetAlignmentOffset( const Type size, const size_t alignSize )
796 {
797     ASSERT( alignSize );
798 
799     Type offset = 0;
800 
801     if( IsPowerOfTwo(alignSize) )
802     {
803         offset = Align(size, alignSize) - size;
804     }
805     else
806     {
807         const Type modulo = (Type)( size % alignSize );
808 
809         if( modulo )
810         {
811             offset = (Type)alignSize - modulo;
812         }
813     }
814 
815     return offset;
816 }
817 
818 /*****************************************************************************\
819 Inline Function:
820     MemCompare
821 
822 Description:
823     Templated Exception Handler Memory Compare function
824 \*****************************************************************************/
825 template <size_t size>
MemCompare(const void * dst,const void * src)826 inline bool MemCompare( const void* dst, const void* src )
827 {
828     const UINT64*   pSrc    = reinterpret_cast<const UINT64*>(src);
829     const UINT64*   pDst    = reinterpret_cast<const UINT64*>(dst);
830     size_t          cmpSize = size;
831 
832     // align for sizes larger than 128 due to double clock penalty for mov
833     //  if one of the memory access is not 64 bit aligned. See Intel Programming
834     //  manual Volume 1, Section 4.1.1
835 #ifdef _WIN64
836     if( size > DUAL_CACHE_SIZE )
837     {
838         // align data to 64 bit if necessary, calculate number of bytes to offset
839         size_t alignSrc = (size_t)( (UINT_PTR)pSrc & ( sizeof(QWORD) - 1 ) );
840         size_t alignDst = (size_t)( (UINT_PTR)pDst & ( sizeof(QWORD) - 1 ) );
841 
842         // alignments are power of 2 : 1 byte, 2 bytes, 4 bytes
843         if( alignSrc > 0 && alignDst > 0 )
844         {
845             cmpSize -= alignDst; // take off our alignment
846 
847             const UINT32* uSrc = reinterpret_cast<const UINT32*>(pSrc);
848             const UINT32* uDst = reinterpret_cast<const UINT32*>(pDst);
849 
850             if( alignDst >= sizeof(UINT32) )
851             {
852                 if( (*uSrc - *uDst) != 0 )
853                 {
854                     return false;
855                 }
856 
857                 alignDst    -= sizeof(UINT32);
858                 uSrc        += 1;
859                 uDst        += 1;
860             }
861 
862             const WORD* wSrc = reinterpret_cast<const WORD*>(uSrc);
863             const WORD* wDst = reinterpret_cast<const WORD*>(uDst);
864 
865             if( alignDst >= sizeof(WORD) )
866             {
867 
868                 if( (*wSrc - *wDst) != 0 )
869                 {
870                     return false;
871                 }
872 
873                 alignDst    -= sizeof(WORD);
874                 wSrc        += 1;
875                 wDst        += 1;
876             }
877 
878             const BYTE* bSrc = reinterpret_cast<const BYTE*>(wSrc);
879             const BYTE* bDst = reinterpret_cast<const BYTE*>(wDst);
880 
881             if( alignDst >= sizeof(BYTE) )
882             {
883                 if( (*bSrc - *bDst) != 0 )
884                 {
885                     return false;
886                 }
887 
888                 alignDst    -= sizeof(BYTE);
889                 bSrc        += 1;
890                 bDst        += 1;
891             }
892 
893             pSrc    = reinterpret_cast<const UINT64*>(bSrc);
894             pDst    = reinterpret_cast<const UINT64*>(bDst);
895         }
896     }
897 #endif
898 
899     // compare memory by tier until we find a difference
900     size_t cnt = cmpSize >> 3;
901 
902     for( size_t i = 0; i < cnt; i++ )
903     {
904         if( (*pSrc - *pDst) != 0 )
905         {
906             return false;
907         }
908 
909         pSrc += 1;
910         pDst += 1;
911     }
912 
913     cmpSize -= (cnt * sizeof(UINT64));
914 
915     if( cmpSize == 0 )
916     {
917         return true;
918     }
919 
920     const UINT32* dSrc   = reinterpret_cast<const UINT32*>(pSrc);
921     const UINT32* dDst   = reinterpret_cast<const UINT32*>(pDst);
922 
923     if( cmpSize >= sizeof(UINT32) )
924     {
925         if( (*dSrc - *dDst) != 0 )
926         {
927             return false;
928         }
929 
930         dSrc    += 1;
931         dDst    += 1;
932         cmpSize -= sizeof(UINT32);
933     }
934 
935     if( cmpSize == 0 )
936     {
937         return true;
938     }
939 
940     const WORD* wSrc  = reinterpret_cast<const WORD*>(dSrc);
941     const WORD* wDst  = reinterpret_cast<const WORD*>(dDst);
942 
943     if( cmpSize >= sizeof(WORD) )
944     {
945         if( (*wSrc - *wDst) != 0 )
946         {
947             return false;
948         }
949 
950         wSrc    += 1;
951         wDst    += 1;
952         cmpSize -= sizeof(WORD);
953     }
954 
955     if (cmpSize == 0 )
956     {
957         return true;
958     }
959 
960     const BYTE* bSrc  = reinterpret_cast<const BYTE*>(wSrc);
961     const BYTE* bDst  = reinterpret_cast<const BYTE*>(wDst);
962 
963     if( (*bSrc - *bDst) != 0 )
964     {
965         return false;
966     }
967 
968     return true;
969 }
970 
971 template <>
972 inline bool MemCompare<1>( const void* dst, const void* src )
973 {
974     return (*(BYTE*)dst == *(BYTE*)src);
975 }
976 
977 template <>
978 inline bool MemCompare<2>( const void* dst, const void* src )
979 {
980     return (*(WORD*)dst == *(WORD*)src);
981 }
982 
983 template <>
984 inline bool MemCompare<4>( const void* dst, const void* src )
985 {
986     return (*(UINT32*)dst == *(UINT32*)src);
987 }
988 
989 template <>
990 inline bool MemCompare<8>( const void* dst, const void* src )
991 {
992     return (*(UINT64*)dst == *(UINT64*)src);
993 }
994 
995 /*****************************************************************************\
996 Inline Function:
997     IsEqual
998 
999 Description:
1000     Compares two values for equality
1001 \*****************************************************************************/
1002 template <class Type>
IsEqual(const Type & a,const Type & b)1003 __forceinline bool IsEqual( const Type& a, const Type& b )
1004 {
1005     return iSTD::MemCompare<sizeof(Type)>( &a, &b );
1006 }
1007 
1008 /*****************************************************************************\
1009 Inline Function:
1010     IsTagComplete
1011 
1012 Description:
1013     Determines is the surface tag has reached completion
1014 \*****************************************************************************/
1015 template <class Type>
IsTagComplete(const Type hwTag,const Type swTag,const Type resTag)1016 __forceinline bool IsTagComplete( const Type hwTag, const Type swTag, const Type resTag )
1017 {
1018     return ( ( resTag == hwTag ) || ( ( resTag - hwTag ) > ( swTag - hwTag ) ) );
1019 }
1020 
1021 /*****************************************************************************\
1022 
1023 Inline Function:
1024     Hash
1025 
1026 Description:
1027     Calculates hash from sequence of 32-bit values.
1028 
1029     Jenkins 96-bit mixing function with 32-bit feedback-loop and 64-bit state.
1030 
1031     All magic values are DWORDs of SHA2-256 mixing data:
1032     0x428a2f98 0x71374491 0xb5c0fbcf 0xe9b5dba5
1033     0x3956c25b 0x59f111f1 0x923f82a4 0xab1c5ed5
1034 
1035     Could be speed-up by processing 2 or 3 DWORDs at time.
1036 
1037 \*****************************************************************************/
1038 #define HASH_JENKINS_MIX(a,b,c)    \
1039 {                                  \
1040     a -= b; a -= c; a ^= (c>>13);  \
1041     b -= c; b -= a; b ^= (a<<8);   \
1042     c -= a; c -= b; c ^= (b>>13);  \
1043     a -= b; a -= c; a ^= (c>>12);  \
1044     b -= c; b -= a; b ^= (a<<16);  \
1045     c -= a; c -= b; c ^= (b>>5);   \
1046     a -= b; a -= c; a ^= (c>>3);   \
1047     b -= c; b -= a; b ^= (a<<10);  \
1048     c -= a; c -= b; c ^= (b>>15);  \
1049 }
1050 
Hash(const DWORD * data,DWORD count)1051 inline QWORD Hash( const DWORD *data, DWORD count )
1052 {
1053     DWORD   a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1054     while( count-- )
1055     {
1056         a ^= *(data++);
1057         HASH_JENKINS_MIX( a, hi, lo );
1058     }
1059     return (((QWORD)hi)<<32)|lo;
1060 }
1061 
1062 struct HashJenkinsMixReturnAggregate
1063 {
HashJenkinsMixReturnAggregateHashJenkinsMixReturnAggregate1064     HashJenkinsMixReturnAggregate(DWORD _a, DWORD _hi, DWORD _lo) :
1065         a(_a),
1066         hi(_hi),
1067         lo(_lo)
1068     {}
1069 
1070     DWORD a;
1071     DWORD hi;
1072     DWORD lo;
1073 };
1074 
1075 inline
HashJenkinsMix(DWORD a,DWORD hi,DWORD lo)1076 HashJenkinsMixReturnAggregate HashJenkinsMix(DWORD a, DWORD hi, DWORD lo)
1077 {
1078     HASH_JENKINS_MIX(a, hi, lo);
1079     return HashJenkinsMixReturnAggregate(a, hi, lo);
1080 }
1081 
1082 __forceinline
HashNext(DWORD & a,DWORD & hi,DWORD & lo,DWORD data)1083 void HashNext(DWORD &a, DWORD &hi, DWORD &lo, DWORD data)
1084 {
1085     a ^= data;
1086     HashJenkinsMixReturnAggregate result = HashJenkinsMix(a, hi, lo);
1087     a = result.a;
1088     hi = result.hi;
1089     lo = result.lo;
1090 }
1091 
1092 __forceinline
HashFirst(DWORD & a,DWORD & hi,DWORD & lo,DWORD data)1093 void HashFirst(DWORD &a, DWORD &hi, DWORD &lo, DWORD data)
1094 {
1095     a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1096     HashNext(a, hi, lo, data);
1097 }
1098 
1099 
1100 /*****************************************************************************\
1101 Inline Function:
1102 HashFromBuffer
1103 
1104 Description:
1105     Calculates hash from data buffer.
1106 Input:
1107     data - pointer to the data buffer
1108     count - size of the buffer in bytes
1109 \*****************************************************************************/
HashFromBuffer(const char * data,size_t count)1110 inline QWORD HashFromBuffer(const char *data, size_t count)
1111 {
1112     DWORD a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1113     const DWORD *dataDw = reinterpret_cast<const DWORD*>(data);
1114     size_t countDw = (DWORD)(count / sizeof(DWORD));
1115 
1116     while (countDw--)
1117     {
1118         a ^= *(dataDw++);
1119         HASH_JENKINS_MIX(a, hi, lo);
1120     }
1121     // If buffer size isn't miltiply of DWORD we have to use last bytes to calculate hash
1122     if (count % sizeof(DWORD) != 0)
1123     {
1124         DWORD lastDw = 0;
1125         char *lastBytesBuff = reinterpret_cast<char*>(&lastDw);
1126         const size_t restBytesCount = count % sizeof(DWORD);
1127 
1128         for (unsigned int i = 0; i < restBytesCount; i++)
1129         {
1130             lastBytesBuff[i] = data[count - restBytesCount + i];
1131         }
1132         a ^= lastDw;
1133         HASH_JENKINS_MIX(a, hi, lo);
1134     }
1135     return (((QWORD)hi) << 32) | lo;
1136 }
1137 #undef HASH_JENKINS_MIX
1138 
1139 /*****************************************************************************\
1140 
1141 Inline Function:
1142     Hash32b
1143 
1144 Description:
1145     Calculates 32 bit hash from 32 bit value.
1146 
1147     badc0ded hash - self-reversible, 32->32 mapping, good avalanche
1148     4 asm instructions in x86, 0 maps to 0
1149 
1150 \*****************************************************************************/
Hash32b(const DWORD value)1151 inline DWORD Hash32b( const DWORD value )
1152 {
1153 #if defined _WIN32
1154     return   ( _byteswap_ulong( value * 0xbadc0ded ) ^ 0xfecacafe ) * 0x649c57e5;
1155 #else
1156     return ( __builtin_bswap32( value * 0xbadc0ded ) ^ 0xfecacafe ) * 0x649c57e5;
1157 #endif
1158 }
1159 
1160 /*****************************************************************************\
1161 
1162 Inline Function:
1163     Hash32b
1164 
1165 Description:
1166     Calculates 32 bit hash from sequence of 32 bit values.
1167 
1168     badc0ded hash - self-reversible, 32->32 mapping, good avalanche
1169     4 asm instructions in x86, 0 maps to 0
1170 
1171 \*****************************************************************************/
Hash32b(const DWORD * data,DWORD count)1172 inline DWORD Hash32b( const DWORD *data, DWORD count )
1173 {
1174     DWORD hash = 0xdeadf00d;
1175 
1176     while( count-- )
1177     {
1178         hash ^= Hash32b( *( data + count ) );
1179     }
1180 
1181     return hash;
1182 }
1183 
1184 /*****************************************************************************\
1185 
1186 Inline Function:
1187     BitCount
1188 
1189 Description:
1190     Returns the number of bits set to 1 in the input 32-bit number.
1191 
1192 \*****************************************************************************/
BitCount(DWORD v)1193 inline DWORD BitCount( DWORD v )
1194 {
1195     v = v - ((v >> 1) & 0x55555555);
1196     v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
1197     return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x1010101) >> 24;
1198 }
1199 
1200 /*****************************************************************************\
1201 
1202 Inline Function:
1203     BitCount64
1204 
1205 Description:
1206     Returns the number of bits set to 1 in the input 64-bit number.
1207 
1208 \*****************************************************************************/
BitCount64(unsigned long long v)1209 inline DWORD BitCount64( unsigned long long v )
1210 {
1211     v -= ( v >> 1 ) & 0x5555555555555555ULL;
1212     v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
1213     v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL) * 0x0101010101010101ULL;
1214     return static_cast<DWORD>( v >> 56 );
1215 }
1216 
1217 /*****************************************************************************\
1218 
1219 Inline Function:
1220     BitReverse
1221 
1222 Description:
1223     Reverse a 32-bit bitfield in a number.
1224 
1225 \*****************************************************************************/
BitReverse(DWORD v)1226 inline DWORD BitReverse( DWORD v )
1227 {
1228     // swap odd and even bits
1229     v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
1230     // swap consecutive pairs
1231     v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
1232     // swap nibbles
1233     v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
1234     // swap bytes
1235     v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
1236     // swap words
1237     v = ( v >> 16             ) | ( v               << 16);
1238     return v;
1239 }
1240 
1241 /*****************************************************************************\
1242 Inline Function:
1243     PtrAdd
1244 
1245 Description:
1246     Type-safe addition of a pointer and a scalar (in bytes).
1247 \*****************************************************************************/
1248 template<typename Type>
PtrAdd(Type * ptr,const size_t numBytes)1249 __forceinline Type* PtrAdd( Type* ptr, const size_t numBytes )
1250 {
1251     return (Type*)( ((BYTE*)ptr) + numBytes );
1252 }
1253 
1254 /*****************************************************************************\
1255 Inline Function:
1256     FixedSIntToInt
1257 
1258 Description:
1259     Converts a fixed signed integer value into a native signed int
1260 \*****************************************************************************/
FixedSIntToInt(DWORD value,DWORD size)1261 __forceinline int FixedSIntToInt( DWORD value, DWORD size )
1262 {
1263     if( value & BIT(size+1) )
1264     {
1265         return -1 * (value + 1);
1266     }
1267 
1268     return value;
1269 }
1270 
1271 } // iSTD
1272