1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #pragma once
10
11 #include "types.h"
12 #include "Debug.h"
13 #include <stdlib.h>
14
15 #if defined _WIN32
16 # include <intrin.h>
17 #endif
18
19 #if !defined(_WIN32)
20 # include "../../inc/common/secure_mem.h"
21 # include "../../inc/common/secure_string.h"
22 #endif
23
24 namespace iSTD
25 {
26
27 /*****************************************************************************\
28 MACRO: BIT
29 \*****************************************************************************/
30 #ifndef BIT
31 #define BIT( n ) ( 1 << (n) )
32 #endif
33
34 /*****************************************************************************\
35 MACRO: MASKED_BIT
36 \*****************************************************************************/
37 #ifndef MASKED_BIT
38 #define MASKED_BIT( n, enable ) ( 1 << (n + 16) | ((enable) ? 1 : 0) << (n) )
39 #endif
40
41 /*****************************************************************************\
42 MACRO: QWBIT
43 \*****************************************************************************/
44 #ifndef QWBIT
45 #define QWBIT( n ) ( 1ll << (n) )
46 #endif
47
48 /*****************************************************************************\
49 MACRO: BITMASK
50 PURPOSE: Creates a mask of n bits
51 \*****************************************************************************/
52 #ifndef BITMASK
53 #define BITMASK( n ) ( ~( (0xffffffff) << (n) ) )
54 #endif
55 #ifndef BITMASK_RANGE
56 #define BITMASK_RANGE( startbit, endbit ) ( BITMASK( (endbit)+1 ) & ~BITMASK( startbit ) )
57 #endif
58
59 /*****************************************************************************\
60 MACRO: QWBITMASK
61 PURPOSE: Creates a mask of n bits
62 \*****************************************************************************/
63 #ifndef QWBITMASK
64 #define QWBITMASK( n ) ( ~( (0xffffffffffffffffull) << (n) ) )
65 #endif
66
67 #ifndef QWBITMASK_RANGE
68 #define QWBITMASK_RANGE( startbit, endbit ) ( QWBITMASK( (endbit)+1 ) & ~QWBITMASK( startbit ) )
69 #endif
70
71 /*****************************************************************************\
72 MACRO: BITFIELD_RANGE
73 PURPOSE: Calculates the number of bits between the startbit and the endbit (0 based)
74 \*****************************************************************************/
75 #ifndef BITFIELD_RANGE
76 #define BITFIELD_RANGE( startbit, endbit ) ((endbit)-(startbit)+1)
77 #endif
78
79 /*****************************************************************************\
80 MACRO: BITFIELD_BIT
81 PURPOSE: Definition declared for clarity when creating structs
82 \*****************************************************************************/
83 #ifndef BITFIELD_BIT
84 #define BITFIELD_BIT( bit ) 1
85 #endif
86
87 /*****************************************************************************\
88 MACRO: GETMSB
89 PURPOSE: Checks MSB
90 \*****************************************************************************/
91 #ifndef GETMSB
92 #define GETMSB( n ) ( \
93 ( (n) & BIT(31) ) ? 31 : \
94 ( (n) & BIT(30) ) ? 30 : \
95 ( (n) & BIT(29) ) ? 29 : \
96 ( (n) & BIT(28) ) ? 28 : \
97 ( (n) & BIT(27) ) ? 27 : \
98 ( (n) & BIT(26) ) ? 26 : \
99 ( (n) & BIT(25) ) ? 25 : \
100 ( (n) & BIT(24) ) ? 24 : \
101 ( (n) & BIT(23) ) ? 23 : \
102 ( (n) & BIT(22) ) ? 22 : \
103 ( (n) & BIT(21) ) ? 21 : \
104 ( (n) & BIT(20) ) ? 20 : \
105 ( (n) & BIT(19) ) ? 19 : \
106 ( (n) & BIT(18) ) ? 18 : \
107 ( (n) & BIT(17) ) ? 17 : \
108 ( (n) & BIT(16) ) ? 16 : \
109 ( (n) & BIT(15) ) ? 15 : \
110 ( (n) & BIT(14) ) ? 14 : \
111 ( (n) & BIT(13) ) ? 13 : \
112 ( (n) & BIT(12) ) ? 12 : \
113 ( (n) & BIT(11) ) ? 11 : \
114 ( (n) & BIT(10) ) ? 10 : \
115 ( (n) & BIT(9) ) ? 9 : \
116 ( (n) & BIT(8) ) ? 8 : \
117 ( (n) & BIT(7) ) ? 7 : \
118 ( (n) & BIT(6) ) ? 6 : \
119 ( (n) & BIT(5) ) ? 5 : \
120 ( (n) & BIT(4) ) ? 4 : \
121 ( (n) & BIT(3) ) ? 3 : \
122 ( (n) & BIT(2) ) ? 2 : \
123 ( (n) & BIT(1) ) ? 1 : \
124 ( (n) & BIT(0) ) ? 0 : \
125 (-1) )
126 #endif
127
128 /*****************************************************************************\
129 MACRO: BITCOUNT
130 PURPOSE: Determines the number of bits needed in a bitmask, given the number
131 of elements to be stored in the mask
132 \*****************************************************************************/
133 #ifndef BITCOUNT
134 #define BITCOUNT( n ) ( \
135 ( ((n)-1) & BIT(31) ) ? 32 : \
136 ( ((n)-1) & BIT(30) ) ? 31 : \
137 ( ((n)-1) & BIT(29) ) ? 30 : \
138 ( ((n)-1) & BIT(28) ) ? 29 : \
139 ( ((n)-1) & BIT(27) ) ? 28 : \
140 ( ((n)-1) & BIT(26) ) ? 27 : \
141 ( ((n)-1) & BIT(25) ) ? 26 : \
142 ( ((n)-1) & BIT(24) ) ? 25 : \
143 ( ((n)-1) & BIT(23) ) ? 24 : \
144 ( ((n)-1) & BIT(22) ) ? 23 : \
145 ( ((n)-1) & BIT(21) ) ? 22 : \
146 ( ((n)-1) & BIT(20) ) ? 21 : \
147 ( ((n)-1) & BIT(19) ) ? 20 : \
148 ( ((n)-1) & BIT(18) ) ? 19 : \
149 ( ((n)-1) & BIT(17) ) ? 18 : \
150 ( ((n)-1) & BIT(16) ) ? 17 : \
151 ( ((n)-1) & BIT(15) ) ? 16 : \
152 ( ((n)-1) & BIT(14) ) ? 15 : \
153 ( ((n)-1) & BIT(13) ) ? 14 : \
154 ( ((n)-1) & BIT(12) ) ? 13 : \
155 ( ((n)-1) & BIT(11) ) ? 12 : \
156 ( ((n)-1) & BIT(10) ) ? 11 : \
157 ( ((n)-1) & BIT(9) ) ? 10 : \
158 ( ((n)-1) & BIT(8) ) ? 9 : \
159 ( ((n)-1) & BIT(7) ) ? 8 : \
160 ( ((n)-1) & BIT(6) ) ? 7 : \
161 ( ((n)-1) & BIT(5) ) ? 6 : \
162 ( ((n)-1) & BIT(4) ) ? 5 : \
163 ( ((n)-1) & BIT(3) ) ? 4 : \
164 ( ((n)-1) & BIT(2) ) ? 3 : \
165 ( ((n)-1) & BIT(1) ) ? 2 : \
166 ( ((n)-1) & BIT(0) ) ? 1 : \
167 0 )
168 #endif
169
170 /*****************************************************************************\
171 MACRO: MIN
172 \*****************************************************************************/
173 #ifndef MIN
174 #define MIN( x, y ) (((x)<=(y))?(x):(y))
175 #endif
176
177 /*****************************************************************************\
178 MACRO: MAX
179 \*****************************************************************************/
180 #ifndef MAX
181 #define MAX( x, y ) (((x)>=(y))?(x):(y))
182 #endif
183
184 /*****************************************************************************\
185 MACRO: CEIL_DIV
186 \*****************************************************************************/
187 #ifndef CEIL_DIV
188 #define CEIL_DIV( x, y ) ( 1 + ( ( ( x ) - 1 ) / ( y ) ) )
189 #endif
190
191 /*****************************************************************************\
192 MACRO: STRCAT
193 \*****************************************************************************/
194 #ifndef STRCAT
195 #define STRCAT( dst, size, src ) strcat_s( (dst), (size), (src) )
196 #endif
197
198 /*****************************************************************************\
199 MACRO: STRNCAT
200 \*****************************************************************************/
201 #ifndef STRNCAT
202 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
203 #define STRNCAT( dst, size, src, len ) strncat( (dst), (src), (len) )
204 #else
205 #define STRNCAT( dst, size, src, len ) strncat_s( (dst), (size), (src), (len) )
206 #endif
207 #endif
208
209 /*****************************************************************************\
210 MACRO: WCSNCAT
211 \*****************************************************************************/
212 #ifndef WCSNCAT
213 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
214 #define WCSNCAT( dst, size, src, len ) wcsncat( (dst), (src), (len) )
215 #else
216 #define WCSNCAT( dst, size, src, len ) wcsncat_s( (dst), (size), (src), (len) )
217 #endif
218 #endif
219
220 /*****************************************************************************\
221 MACRO: STRCPY
222 \*****************************************************************************/
223 #ifndef STRCPY
224 #define STRCPY( dst, size, src ) strcpy_s( (dst), (size), (src) )
225 #endif
226
227 /*****************************************************************************\
228 MACRO: SPRINTF
229 \*****************************************************************************/
230 #ifndef SPRINTF
231 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
232 #define SPRINTF( dst, size, src, args ) sprintf( (dst), (src), (args) )
233 #else
234 #define SPRINTF( dst, size, src, args ) sprintf_s( (dst), (size), (src), (args) )
235 #endif
236 #endif
237
238 /*****************************************************************************\
239 MACRO: VSNPRINTF
240 \*****************************************************************************/
241 #ifndef VSNPRINTF
242 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
243 #define VSNPRINTF( dst, size, len, src, args ) _vsnprintf( (dst), (len), (src), (args) )
244 #else
245 #define VSNPRINTF( dst, size, len, src, args ) _vsnprintf_s( (dst), (size), (len), (src), (args) )
246 #endif
247 #endif
248
249 /*****************************************************************************\
250 MACRO: VSPRINTF
251 \*****************************************************************************/
252 #ifndef VSPRINTF
253 #if defined(ISTDLIB_KMD) || !defined(_WIN32)
254 #define VSPRINTF( dst, size, src, args ) vsprintf( (dst), (src), (args) )
255 #else
256 #define VSPRINTF( dst, size, src, args ) vsprintf_s( (dst), (size), (src), (args) )
257 #endif
258 #endif
259
260 /*****************************************************************************\
261 MACRO: MEMCPY
262 \*****************************************************************************/
263 #ifndef MEMCPY
264 #if defined(__ANDROID__)
265 #define MEMCPY( dst, size, src, args ) memcpy( (dst), (src), (args) )
266 #elif defined(ISTDLIB_KMD) || !defined(_MSC_VER)
267 #define MEMCPY( dst, size, src, args ) memcpy( (dst), (src), (args) )
268 #else
269 #define MEMCPY( dst, size, src, args ) memcpy_s( (dst), (size), (src), (args) )
270 #endif
271 #endif
272
273 /*****************************************************************************\
274 MACRO: ARRAY_COUNT
275 \*****************************************************************************/
276 #ifndef ARRAY_COUNT
277 #define ARRAY_COUNT( x ) ( sizeof( x ) / sizeof( x[ 0 ] ) )
278 #endif
279
280 /*****************************************************************************\
281 Inline Template Function:
282 Swap
283
284 Description:
285 Swaps the values of two variables of the same type
286 \*****************************************************************************/
287 template <class Type>
Swap(Type & var0,Type & var1)288 inline void Swap( Type &var0, Type &var1 )
289 {
290 Type tmp = var0;
291 var0 = var1;
292 var1 = tmp;
293 }
294
295 /*****************************************************************************\
296 Inline Template Function:
297 Min
298
299 Description:
300 Returns the min of the two values
301 \*****************************************************************************/
302 template <class Type>
Min(const Type var0,const Type var1)303 __forceinline Type Min( const Type var0, const Type var1 )
304 {
305 return ( var0 <= var1 ) ? var0 : var1;
306 }
307
308 /*****************************************************************************\
309 Inline Template Function:
310 Max
311
312 Description:
313 Returns the max of the two values
314 \*****************************************************************************/
315 template <class Type>
Max(const Type var0,const Type var1)316 __forceinline Type Max( const Type var0, const Type var1 )
317 {
318 return ( var0 >= var1 ) ? var0 : var1;
319 }
320
321 /*****************************************************************************\
322 Inline Template Function:
323 ClampMax
324
325 Description:
326 Checks the value for Greater than the maximum value. If the value is
327 greater then the maximum then it returns the maximum value. Otherwise, it
328 returns the value.
329 \*****************************************************************************/
330 template<class Type>
ClampMax(const Type value,const Type max)331 __forceinline Type ClampMax( const Type value, const Type max )
332 {
333 return ( ( (value) > (max) ) ? (max) : (value) );
334 }
335
336 /*****************************************************************************\
337 Inline Template Function:
338 ClampMin
339
340 Description:
341 Checks the value for less than the minimum value. If the value is less
342 then the minimum then it returns the minimum value. Otherwise, it returns
343 the value.
344 \*****************************************************************************/
345 template<class Type>
ClampMin(const Type value,const Type min)346 __forceinline Type ClampMin( const Type value, const Type min )
347 {
348 return ( ( (value) < (min) ) ? (min) : (value) );
349 }
350
351 /*****************************************************************************\
352 Inline Template Function:
353 Clamp
354
355 Description:
356 Checks the value for less than the minimum value or greater than the
357 maximum value. If the value is less then the minimum then it returns the
358 minimum value. If the value is greater then the maximum then it returns
359 the maximum value. Otherwise, it returns the value.
360 \*****************************************************************************/
361 template<class Type>
Clamp(const Type value,const Type min,const Type max)362 __forceinline Type Clamp( const Type value, const Type min, const Type max )
363 {
364 return ClampMin<Type>( ClampMax<Type>( value, max ), min );
365 }
366
367 /*****************************************************************************\
368 Inline Template Function:
369 CheckLimits
370
371 Description:
372 Determines if the value is within the specified range
373 \*****************************************************************************/
374 template <class Type>
CheckLimits(const Type value,const Type min,const Type max)375 __forceinline bool CheckLimits( const Type value, const Type min, const Type max )
376 {
377 if( ( value < min ) || ( value > max ) )
378 {
379 ASSERT(0);
380 return false;
381 }
382
383 return true;
384 }
385
386 /*****************************************************************************\
387 Inline Template Function:
388 emul
389
390 Description:
391 Upconversion Multiply, used for checking overflow.
392 \*****************************************************************************/
393 template <typename t1, typename t2>
emul(t1 a,t1 b)394 __forceinline t2 emul( t1 a, t1 b )
395 {
396 return (t2)a*b;
397 }
398
399 /*****************************************************************************\
400 Inline Function:
401 bsr64
402
403 Description:
404 Intrinsic definition of bit scan reverse for 64bit values.
405 \*****************************************************************************/
406 #if defined( _WIN64 ) || defined( __x86_64__ )
bsr64(const unsigned long long int mask)407 __forceinline DWORD bsr64( const unsigned long long int mask )
408 {
409 #if defined _WIN32
410 DWORD index;
411 _BitScanReverse64( &index, static_cast<_int64>( mask ) );
412 return static_cast<DWORD>( index );
413
414 #elif defined __GNUC__
415 return static_cast<unsigned int>( 63 - __builtin_clzll( mask ) );
416
417 #else
418 DWORD bit = 0;
419 if( mask != 0 )
420 {
421 bit = 63;
422 while( ( mask & QWBIT(bit) ) == 0 )
423 {
424 --bit;
425 }
426 }
427 return bit;
428
429 #endif
430 }
431 #endif // defined( _WIN64 ) || defined( __x86_64__ )
432
433 /*****************************************************************************\
434 Inline Function:
435 bsr
436
437 Description:
438 Intrinsic definition when not compiler-defined
439 \*****************************************************************************/
bsr(const DWORD mask)440 __forceinline DWORD bsr( const DWORD mask )
441 {
442 #if defined _WIN32
443 DWORD index;
444 _BitScanReverse( &index, mask );
445 return static_cast<DWORD>(index);
446
447 #elif defined __GNUC__
448 return static_cast<unsigned int>( 31 - __builtin_clz( mask ) );
449
450 #else
451 DWORD bit = 0;
452 if( mask != 0 )
453 {
454 bit = 31;
455 while( ( mask & BIT(bit) ) == 0 )
456 {
457 --bit;
458 }
459 }
460 return bit;
461
462 #endif
463 }
464
465 /*****************************************************************************\
466 Inline Function:
467 bsr64
468
469 Description:
470 Intrinsic definition of bit scan forward for 64bit values.
471 \*****************************************************************************/
472 #if defined( _WIN64 ) || defined( __x86_64__ )
bsf64(const unsigned long long int mask)473 __forceinline DWORD bsf64( const unsigned long long int mask )
474 {
475 #if defined _WIN32
476 DWORD index;
477 _BitScanForward64( &index, static_cast<_int64>( mask ) );
478 return static_cast<DWORD>( index );
479
480 #elif defined __GNUC__
481 return static_cast<unsigned int>( __builtin_ffsll( mask ) - 1 );
482
483 #else
484 DWORD bit = 0;
485 if( mask != 0 )
486 {
487 while( ( mask & QWBIT(bit) ) == 0 )
488 {
489 ++bit;
490 }
491 }
492 return bit;
493
494 #endif
495 }
496 #endif // defined( _WIN64 ) || defined( __x86_64__ )
497
498 /*****************************************************************************\
499 Inline Function:
500 bsf
501
502 Description:
503 Intrinsic definition when not compiler-defined
504 \*****************************************************************************/
bsf(const DWORD mask)505 __forceinline DWORD bsf( const DWORD mask )
506 {
507 #if defined _WIN32
508 DWORD index;
509 _BitScanForward( &index, mask );
510 return index;
511
512 #elif defined __GNUC__
513 return static_cast<unsigned int>( __builtin_ffsl( mask ) - 1 );
514
515 #else
516 DWORD bit = 0;
517 if( mask != 0 )
518 {
519 while( ( mask & BIT(bit) ) == 0 )
520 {
521 ++bit;
522 }
523 }
524 return bit;
525
526 #endif
527 }
528
529 /*****************************************************************************\
530 Description:
531 Find first zero which identifies the index of the least significant zero bit
532 mask - mask to be checked
533 \*****************************************************************************/
534 #ifndef FIND_FIRST_0_LSB
535 #define FIND_FIRST_0_LSB( mask ) ( iSTD::bsf(~mask) )
536 #endif
537
538 /*****************************************************************************\
539 Inline Function:
540 clz
541
542 Description:
543 Count number of leading zeros of the mask
544 \*****************************************************************************/
clz(const DWORD mask)545 __forceinline DWORD clz( const DWORD mask )
546 {
547 DWORD retValue = 32;
548
549 // bsr returns 0 if the mask is 0 and sets a the ZF flag so handle
550 // 0 special.
551 if( mask != 0 )
552 {
553 retValue = 31 - bsr( mask );
554 }
555
556 return retValue;
557 }
558
559 /*****************************************************************************\
560 Inline Function:
561 IsPowerOfTwo
562
563 Description:
564 Determines if the given value is a power of two.
565 \*****************************************************************************/
566 template< typename Type >
IsPowerOfTwo(const Type number)567 __forceinline bool IsPowerOfTwo( const Type number )
568 {
569 return ( ( number & ( number - 1 ) ) == 0 );
570 }
571
572 /*****************************************************************************\
573 Inline Function:
574 Round
575
576 Description:
577 Rounds an unsigned integer to the next multiple of (power-2) size
578 \*****************************************************************************/
579 template< typename Type1, typename Type2 >
Round(const Type1 value,const Type2 size)580 __forceinline Type1 Round( const Type1 value, const Type2 size )
581 {
582 ASSERT( IsPowerOfTwo(size) );
583 Type1 mask = (Type1)size - 1;
584 Type1 roundedValue = ( value + mask ) & ~( mask );
585 return roundedValue;
586 }
587
588 /*****************************************************************************\
589 Inline Function:
590 RoundDown
591
592 Description:
593 Rounds an unsigned integer to the previous multiple of (power-2) size
594 \*****************************************************************************/
595 template< typename Type1, typename Type2 >
RoundDown(const Type1 value,const Type2 size)596 __forceinline DWORD RoundDown( const Type1 value, const Type2 size )
597 {
598 ASSERT( IsPowerOfTwo(size) );
599 Type1 mask = (Type1)size - 1;
600 Type1 roundedValue = value & ~( mask );
601 return roundedValue;
602 }
603
604 /*****************************************************************************\
605 Inline Function:
606 RoundNonPow2
607
608 Description:
609 Rounds up to an unsigned integer to the next multiple of size (nonpow2)
610 \*****************************************************************************/
611 template< typename Type1, typename Type2 >
RoundNonPow2(const Type1 value,const Type2 size)612 __forceinline Type1 RoundNonPow2( const Type1 value, const Type2 size )
613 {
614 const Type1 size1 = (Type1)size;
615 const Type1 remainder = ( value % size1 );
616
617 Type1 roundedValue = value;
618 if( remainder )
619 {
620 roundedValue += size1 - remainder;
621 }
622 return roundedValue;
623 }
624
625 /*****************************************************************************\
626 Inline Function:
627 RoundDownNonPow2
628
629 Description:
630 Rounds an unsigned integer to the previous multiple of size (nonpow2)
631 \*****************************************************************************/
632 template< typename Type1, typename Type2 >
RoundDownNonPow2(const Type1 value,const Type2 size)633 __forceinline DWORD RoundDownNonPow2( const Type1 value, const Type2 size )
634 {
635 const Type1 size1 = (Type1)size;
636 return (DWORD)(( value / size1 ) * size1);
637 }
638
639 /*****************************************************************************\
640 Inline Function:
641 RoundPower2
642
643 Description:
644 Rounds an unsigned 32-bit integer to the next power of 2
645 \*****************************************************************************/
RoundPower2(const DWORD value)646 inline DWORD RoundPower2( const DWORD value )
647 {
648 return IsPowerOfTwo( value ) ? value : 2ul << bsr( value );
649 }
650
651 /*****************************************************************************\
652 Inline Function:
653 RoundPower2
654
655 Description:
656 Rounds an unsigned 64-bit integer to the next power of 2
657 \*****************************************************************************/
RoundPower2(const QWORD value)658 inline QWORD RoundPower2( const QWORD value )
659 {
660 VALUE64 v64 = { value };
661
662 if( v64.h.u || ( v64.l.u & BIT(31) ) )
663 {
664 v64.h.u = RoundPower2( (DWORD)(( v64.l.u ) ? v64.h.u + 1 : v64.h.u) );
665 v64.l.u = 0;
666 }
667 else
668 {
669 v64.l.u = RoundPower2( (DWORD)(v64.l.u) );
670 }
671
672 return v64.u;
673 }
674
675 /*****************************************************************************\
676 Inline Function:
677 Log2
678
679 Description:
680 Returns the logarithm base two of the passed in number by returning
681 floor( log2( number ) ). Also in the case of Log2(0) the function
682 will return 0.
683 \*****************************************************************************/
Log2(const DWORD value)684 inline DWORD Log2( const DWORD value )
685 {
686 ASSERT( IsPowerOfTwo(value) );
687
688 DWORD power2 = 0;
689 while( value && value != (DWORD)BIT(power2) )
690 {
691 ++power2;
692 }
693
694 return power2;
695 }
696
697 /*****************************************************************************\
698 Inline Function:
699 IsAligned
700
701 Description:
702 Determines if the given pointer is aligned to the given size
703 \*****************************************************************************/
704 template< typename Type >
IsAligned(Type * ptr,const size_t alignSize)705 __forceinline bool IsAligned( Type * ptr, const size_t alignSize )
706 {
707 return ( ( (size_t)ptr % alignSize ) == 0 );
708 }
709
710 /*****************************************************************************\
711 Inline Function:
712 IsAligned
713
714 Description:
715 Determines if the given size is aligned to the given size
716 \*****************************************************************************/
717 template< typename Type >
IsAligned(Type size,const size_t alignSize)718 __forceinline bool IsAligned( Type size, const size_t alignSize )
719 {
720 return ( ( size % alignSize ) == 0 );
721 }
722
723 /*****************************************************************************\
724 Inline Function:
725 Align
726
727 Description:
728 Type-safe (power-2) alignment of a pointer.
729 \*****************************************************************************/
730 template<typename Type>
Align(Type * const ptr,const size_t alignment)731 __forceinline Type* Align( Type* const ptr, const size_t alignment )
732 {
733 ASSERT( IsPowerOfTwo(alignment) );
734
735 return (Type*)( ( ((size_t)ptr) + alignment-1 ) & ~( alignment-1 ) );
736 }
737
738 /*****************************************************************************\
739 Inline Function:
740 Align
741
742 Description:
743 Type-safe (power-2) alignment of a value.
744 \*****************************************************************************/
745 template<typename Type>
Align(const Type value,const size_t alignment)746 __forceinline Type Align( const Type value, const size_t alignment )
747 {
748 ASSERT( IsPowerOfTwo(alignment) );
749
750 Type mask = static_cast<Type>(alignment) - 1;
751 return (value + mask) & ~mask;
752 }
753
754 /*****************************************************************************\
755 Inline Function:
756 GetAlignmentOffset
757
758 Description:
759 Returns the size in bytes needed to align the given pointer to the
760 given alignment size
761 \*****************************************************************************/
762 template<typename Type>
GetAlignmentOffset(Type * const ptr,const size_t alignSize)763 __forceinline DWORD GetAlignmentOffset( Type* const ptr, const size_t alignSize )
764 {
765 ASSERT( alignSize );
766
767 DWORD offset = 0;
768
769 if( IsPowerOfTwo(alignSize) )
770 { // can recast 'ptr' to DWORD, since offset is DWORD
771 offset = DWORD( UINT_PTR( Align(ptr, alignSize) ) - (UINT_PTR)(ptr) );
772 }
773 else
774 {
775 const DWORD modulo = (DWORD)(UINT_PTR(ptr) % alignSize);
776
777 if( modulo )
778 {
779 offset = (DWORD)alignSize - modulo;
780 }
781 }
782
783 return offset;
784 }
785
786 /*****************************************************************************\
787 Inline Function:
788 GetAlignmentOffset
789
790 Description:
791 Returns the size in bytes needed to align the given size to the
792 given alignment size
793 \*****************************************************************************/
794 template<typename Type>
GetAlignmentOffset(const Type size,const size_t alignSize)795 __forceinline Type GetAlignmentOffset( const Type size, const size_t alignSize )
796 {
797 ASSERT( alignSize );
798
799 Type offset = 0;
800
801 if( IsPowerOfTwo(alignSize) )
802 {
803 offset = Align(size, alignSize) - size;
804 }
805 else
806 {
807 const Type modulo = (Type)( size % alignSize );
808
809 if( modulo )
810 {
811 offset = (Type)alignSize - modulo;
812 }
813 }
814
815 return offset;
816 }
817
818 /*****************************************************************************\
819 Inline Function:
820 MemCompare
821
822 Description:
823 Templated Exception Handler Memory Compare function
824 \*****************************************************************************/
825 template <size_t size>
MemCompare(const void * dst,const void * src)826 inline bool MemCompare( const void* dst, const void* src )
827 {
828 const UINT64* pSrc = reinterpret_cast<const UINT64*>(src);
829 const UINT64* pDst = reinterpret_cast<const UINT64*>(dst);
830 size_t cmpSize = size;
831
832 // align for sizes larger than 128 due to double clock penalty for mov
833 // if one of the memory access is not 64 bit aligned. See Intel Programming
834 // manual Volume 1, Section 4.1.1
835 #ifdef _WIN64
836 if( size > DUAL_CACHE_SIZE )
837 {
838 // align data to 64 bit if necessary, calculate number of bytes to offset
839 size_t alignSrc = (size_t)( (UINT_PTR)pSrc & ( sizeof(QWORD) - 1 ) );
840 size_t alignDst = (size_t)( (UINT_PTR)pDst & ( sizeof(QWORD) - 1 ) );
841
842 // alignments are power of 2 : 1 byte, 2 bytes, 4 bytes
843 if( alignSrc > 0 && alignDst > 0 )
844 {
845 cmpSize -= alignDst; // take off our alignment
846
847 const UINT32* uSrc = reinterpret_cast<const UINT32*>(pSrc);
848 const UINT32* uDst = reinterpret_cast<const UINT32*>(pDst);
849
850 if( alignDst >= sizeof(UINT32) )
851 {
852 if( (*uSrc - *uDst) != 0 )
853 {
854 return false;
855 }
856
857 alignDst -= sizeof(UINT32);
858 uSrc += 1;
859 uDst += 1;
860 }
861
862 const WORD* wSrc = reinterpret_cast<const WORD*>(uSrc);
863 const WORD* wDst = reinterpret_cast<const WORD*>(uDst);
864
865 if( alignDst >= sizeof(WORD) )
866 {
867
868 if( (*wSrc - *wDst) != 0 )
869 {
870 return false;
871 }
872
873 alignDst -= sizeof(WORD);
874 wSrc += 1;
875 wDst += 1;
876 }
877
878 const BYTE* bSrc = reinterpret_cast<const BYTE*>(wSrc);
879 const BYTE* bDst = reinterpret_cast<const BYTE*>(wDst);
880
881 if( alignDst >= sizeof(BYTE) )
882 {
883 if( (*bSrc - *bDst) != 0 )
884 {
885 return false;
886 }
887
888 alignDst -= sizeof(BYTE);
889 bSrc += 1;
890 bDst += 1;
891 }
892
893 pSrc = reinterpret_cast<const UINT64*>(bSrc);
894 pDst = reinterpret_cast<const UINT64*>(bDst);
895 }
896 }
897 #endif
898
899 // compare memory by tier until we find a difference
900 size_t cnt = cmpSize >> 3;
901
902 for( size_t i = 0; i < cnt; i++ )
903 {
904 if( (*pSrc - *pDst) != 0 )
905 {
906 return false;
907 }
908
909 pSrc += 1;
910 pDst += 1;
911 }
912
913 cmpSize -= (cnt * sizeof(UINT64));
914
915 if( cmpSize == 0 )
916 {
917 return true;
918 }
919
920 const UINT32* dSrc = reinterpret_cast<const UINT32*>(pSrc);
921 const UINT32* dDst = reinterpret_cast<const UINT32*>(pDst);
922
923 if( cmpSize >= sizeof(UINT32) )
924 {
925 if( (*dSrc - *dDst) != 0 )
926 {
927 return false;
928 }
929
930 dSrc += 1;
931 dDst += 1;
932 cmpSize -= sizeof(UINT32);
933 }
934
935 if( cmpSize == 0 )
936 {
937 return true;
938 }
939
940 const WORD* wSrc = reinterpret_cast<const WORD*>(dSrc);
941 const WORD* wDst = reinterpret_cast<const WORD*>(dDst);
942
943 if( cmpSize >= sizeof(WORD) )
944 {
945 if( (*wSrc - *wDst) != 0 )
946 {
947 return false;
948 }
949
950 wSrc += 1;
951 wDst += 1;
952 cmpSize -= sizeof(WORD);
953 }
954
955 if (cmpSize == 0 )
956 {
957 return true;
958 }
959
960 const BYTE* bSrc = reinterpret_cast<const BYTE*>(wSrc);
961 const BYTE* bDst = reinterpret_cast<const BYTE*>(wDst);
962
963 if( (*bSrc - *bDst) != 0 )
964 {
965 return false;
966 }
967
968 return true;
969 }
970
971 template <>
972 inline bool MemCompare<1>( const void* dst, const void* src )
973 {
974 return (*(BYTE*)dst == *(BYTE*)src);
975 }
976
977 template <>
978 inline bool MemCompare<2>( const void* dst, const void* src )
979 {
980 return (*(WORD*)dst == *(WORD*)src);
981 }
982
983 template <>
984 inline bool MemCompare<4>( const void* dst, const void* src )
985 {
986 return (*(UINT32*)dst == *(UINT32*)src);
987 }
988
989 template <>
990 inline bool MemCompare<8>( const void* dst, const void* src )
991 {
992 return (*(UINT64*)dst == *(UINT64*)src);
993 }
994
995 /*****************************************************************************\
996 Inline Function:
997 IsEqual
998
999 Description:
1000 Compares two values for equality
1001 \*****************************************************************************/
1002 template <class Type>
IsEqual(const Type & a,const Type & b)1003 __forceinline bool IsEqual( const Type& a, const Type& b )
1004 {
1005 return iSTD::MemCompare<sizeof(Type)>( &a, &b );
1006 }
1007
1008 /*****************************************************************************\
1009 Inline Function:
1010 IsTagComplete
1011
1012 Description:
1013 Determines is the surface tag has reached completion
1014 \*****************************************************************************/
1015 template <class Type>
IsTagComplete(const Type hwTag,const Type swTag,const Type resTag)1016 __forceinline bool IsTagComplete( const Type hwTag, const Type swTag, const Type resTag )
1017 {
1018 return ( ( resTag == hwTag ) || ( ( resTag - hwTag ) > ( swTag - hwTag ) ) );
1019 }
1020
1021 /*****************************************************************************\
1022
1023 Inline Function:
1024 Hash
1025
1026 Description:
1027 Calculates hash from sequence of 32-bit values.
1028
1029 Jenkins 96-bit mixing function with 32-bit feedback-loop and 64-bit state.
1030
1031 All magic values are DWORDs of SHA2-256 mixing data:
1032 0x428a2f98 0x71374491 0xb5c0fbcf 0xe9b5dba5
1033 0x3956c25b 0x59f111f1 0x923f82a4 0xab1c5ed5
1034
1035 Could be speed-up by processing 2 or 3 DWORDs at time.
1036
1037 \*****************************************************************************/
1038 #define HASH_JENKINS_MIX(a,b,c) \
1039 { \
1040 a -= b; a -= c; a ^= (c>>13); \
1041 b -= c; b -= a; b ^= (a<<8); \
1042 c -= a; c -= b; c ^= (b>>13); \
1043 a -= b; a -= c; a ^= (c>>12); \
1044 b -= c; b -= a; b ^= (a<<16); \
1045 c -= a; c -= b; c ^= (b>>5); \
1046 a -= b; a -= c; a ^= (c>>3); \
1047 b -= c; b -= a; b ^= (a<<10); \
1048 c -= a; c -= b; c ^= (b>>15); \
1049 }
1050
Hash(const DWORD * data,DWORD count)1051 inline QWORD Hash( const DWORD *data, DWORD count )
1052 {
1053 DWORD a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1054 while( count-- )
1055 {
1056 a ^= *(data++);
1057 HASH_JENKINS_MIX( a, hi, lo );
1058 }
1059 return (((QWORD)hi)<<32)|lo;
1060 }
1061
1062 struct HashJenkinsMixReturnAggregate
1063 {
HashJenkinsMixReturnAggregateHashJenkinsMixReturnAggregate1064 HashJenkinsMixReturnAggregate(DWORD _a, DWORD _hi, DWORD _lo) :
1065 a(_a),
1066 hi(_hi),
1067 lo(_lo)
1068 {}
1069
1070 DWORD a;
1071 DWORD hi;
1072 DWORD lo;
1073 };
1074
1075 inline
HashJenkinsMix(DWORD a,DWORD hi,DWORD lo)1076 HashJenkinsMixReturnAggregate HashJenkinsMix(DWORD a, DWORD hi, DWORD lo)
1077 {
1078 HASH_JENKINS_MIX(a, hi, lo);
1079 return HashJenkinsMixReturnAggregate(a, hi, lo);
1080 }
1081
1082 __forceinline
HashNext(DWORD & a,DWORD & hi,DWORD & lo,DWORD data)1083 void HashNext(DWORD &a, DWORD &hi, DWORD &lo, DWORD data)
1084 {
1085 a ^= data;
1086 HashJenkinsMixReturnAggregate result = HashJenkinsMix(a, hi, lo);
1087 a = result.a;
1088 hi = result.hi;
1089 lo = result.lo;
1090 }
1091
1092 __forceinline
HashFirst(DWORD & a,DWORD & hi,DWORD & lo,DWORD data)1093 void HashFirst(DWORD &a, DWORD &hi, DWORD &lo, DWORD data)
1094 {
1095 a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1096 HashNext(a, hi, lo, data);
1097 }
1098
1099
1100 /*****************************************************************************\
1101 Inline Function:
1102 HashFromBuffer
1103
1104 Description:
1105 Calculates hash from data buffer.
1106 Input:
1107 data - pointer to the data buffer
1108 count - size of the buffer in bytes
1109 \*****************************************************************************/
HashFromBuffer(const char * data,size_t count)1110 inline QWORD HashFromBuffer(const char *data, size_t count)
1111 {
1112 DWORD a = 0x428a2f98, hi = 0x71374491, lo = 0xb5c0fbcf;
1113 const DWORD *dataDw = reinterpret_cast<const DWORD*>(data);
1114 size_t countDw = (DWORD)(count / sizeof(DWORD));
1115
1116 while (countDw--)
1117 {
1118 a ^= *(dataDw++);
1119 HASH_JENKINS_MIX(a, hi, lo);
1120 }
1121 // If buffer size isn't miltiply of DWORD we have to use last bytes to calculate hash
1122 if (count % sizeof(DWORD) != 0)
1123 {
1124 DWORD lastDw = 0;
1125 char *lastBytesBuff = reinterpret_cast<char*>(&lastDw);
1126 const size_t restBytesCount = count % sizeof(DWORD);
1127
1128 for (unsigned int i = 0; i < restBytesCount; i++)
1129 {
1130 lastBytesBuff[i] = data[count - restBytesCount + i];
1131 }
1132 a ^= lastDw;
1133 HASH_JENKINS_MIX(a, hi, lo);
1134 }
1135 return (((QWORD)hi) << 32) | lo;
1136 }
1137 #undef HASH_JENKINS_MIX
1138
1139 /*****************************************************************************\
1140
1141 Inline Function:
1142 Hash32b
1143
1144 Description:
1145 Calculates 32 bit hash from 32 bit value.
1146
1147 badc0ded hash - self-reversible, 32->32 mapping, good avalanche
1148 4 asm instructions in x86, 0 maps to 0
1149
1150 \*****************************************************************************/
Hash32b(const DWORD value)1151 inline DWORD Hash32b( const DWORD value )
1152 {
1153 #if defined _WIN32
1154 return ( _byteswap_ulong( value * 0xbadc0ded ) ^ 0xfecacafe ) * 0x649c57e5;
1155 #else
1156 return ( __builtin_bswap32( value * 0xbadc0ded ) ^ 0xfecacafe ) * 0x649c57e5;
1157 #endif
1158 }
1159
1160 /*****************************************************************************\
1161
1162 Inline Function:
1163 Hash32b
1164
1165 Description:
1166 Calculates 32 bit hash from sequence of 32 bit values.
1167
1168 badc0ded hash - self-reversible, 32->32 mapping, good avalanche
1169 4 asm instructions in x86, 0 maps to 0
1170
1171 \*****************************************************************************/
Hash32b(const DWORD * data,DWORD count)1172 inline DWORD Hash32b( const DWORD *data, DWORD count )
1173 {
1174 DWORD hash = 0xdeadf00d;
1175
1176 while( count-- )
1177 {
1178 hash ^= Hash32b( *( data + count ) );
1179 }
1180
1181 return hash;
1182 }
1183
1184 /*****************************************************************************\
1185
1186 Inline Function:
1187 BitCount
1188
1189 Description:
1190 Returns the number of bits set to 1 in the input 32-bit number.
1191
1192 \*****************************************************************************/
BitCount(DWORD v)1193 inline DWORD BitCount( DWORD v )
1194 {
1195 v = v - ((v >> 1) & 0x55555555);
1196 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
1197 return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x1010101) >> 24;
1198 }
1199
1200 /*****************************************************************************\
1201
1202 Inline Function:
1203 BitCount64
1204
1205 Description:
1206 Returns the number of bits set to 1 in the input 64-bit number.
1207
1208 \*****************************************************************************/
BitCount64(unsigned long long v)1209 inline DWORD BitCount64( unsigned long long v )
1210 {
1211 v -= ( v >> 1 ) & 0x5555555555555555ULL;
1212 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
1213 v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL) * 0x0101010101010101ULL;
1214 return static_cast<DWORD>( v >> 56 );
1215 }
1216
1217 /*****************************************************************************\
1218
1219 Inline Function:
1220 BitReverse
1221
1222 Description:
1223 Reverse a 32-bit bitfield in a number.
1224
1225 \*****************************************************************************/
BitReverse(DWORD v)1226 inline DWORD BitReverse( DWORD v )
1227 {
1228 // swap odd and even bits
1229 v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
1230 // swap consecutive pairs
1231 v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
1232 // swap nibbles
1233 v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
1234 // swap bytes
1235 v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
1236 // swap words
1237 v = ( v >> 16 ) | ( v << 16);
1238 return v;
1239 }
1240
1241 /*****************************************************************************\
1242 Inline Function:
1243 PtrAdd
1244
1245 Description:
1246 Type-safe addition of a pointer and a scalar (in bytes).
1247 \*****************************************************************************/
1248 template<typename Type>
PtrAdd(Type * ptr,const size_t numBytes)1249 __forceinline Type* PtrAdd( Type* ptr, const size_t numBytes )
1250 {
1251 return (Type*)( ((BYTE*)ptr) + numBytes );
1252 }
1253
1254 /*****************************************************************************\
1255 Inline Function:
1256 FixedSIntToInt
1257
1258 Description:
1259 Converts a fixed signed integer value into a native signed int
1260 \*****************************************************************************/
FixedSIntToInt(DWORD value,DWORD size)1261 __forceinline int FixedSIntToInt( DWORD value, DWORD size )
1262 {
1263 if( value & BIT(size+1) )
1264 {
1265 return -1 * (value + 1);
1266 }
1267
1268 return value;
1269 }
1270
1271 } // iSTD
1272