1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file utils.h
24  *
25  * @brief Utilities used by SWR core.
26  *
27  ******************************************************************************/
28 #pragma once
29 
30 #include <string.h>
31 #include <type_traits>
32 #include <algorithm>
33 #include <array>
34 #include "common/os.h"
35 #include "common/intrin.h"
36 #include "common/swr_assert.h"
37 #include "core/api.h"
38 
39 struct simdBBox
40 {
41     simdscalari ymin;
42     simdscalari ymax;
43     simdscalari xmin;
44     simdscalari xmax;
45 };
46 
47 struct simd16BBox
48 {
49     simd16scalari ymin;
50     simd16scalari ymax;
51     simd16scalari xmin;
52     simd16scalari xmax;
53 };
54 
55 template <typename SIMD_T>
56 struct SIMDBBOX_T
57 {
58     typename SIMD_T::Integer ymin;
59     typename SIMD_T::Integer ymax;
60     typename SIMD_T::Integer xmin;
61     typename SIMD_T::Integer xmax;
62 };
63 
64 // helper function to unroll loops
65 template <int Begin, int End, int Step = 1>
66 struct UnrollerL
67 {
68     template <typename Lambda>
stepUnrollerL69     INLINE static void step(Lambda& func)
70     {
71         func(Begin);
72         UnrollerL<Begin + Step, End, Step>::step(func);
73     }
74 };
75 
76 template <int End, int Step>
77 struct UnrollerL<End, End, Step>
78 {
79     template <typename Lambda>
80     static void step(Lambda& func)
81     {
82     }
83 };
84 
85 // helper function to unroll loops, with mask to skip specific iterations
86 template <int Begin, int End, int Step = 1, int Mask = 0x7f>
87 struct UnrollerLMask
88 {
89     template <typename Lambda>
90     INLINE static void step(Lambda& func)
91     {
92         if (Mask & (1 << Begin))
93         {
94             func(Begin);
95         }
96         UnrollerL<Begin + Step, End, Step>::step(func);
97     }
98 };
99 
100 template <int End, int Step, int Mask>
101 struct UnrollerLMask<End, End, Step, Mask>
102 {
103     template <typename Lambda>
104     static void step(Lambda& func)
105     {
106     }
107 };
108 
109 // general CRC compute
110 INLINE
111 uint32_t ComputeCRC(uint32_t crc, const void* pData, uint32_t size)
112 {
113 #if defined(_WIN64) || defined(__x86_64__)
114     uint32_t  sizeInQwords       = size / sizeof(uint64_t);
115     uint32_t  sizeRemainderBytes = size % sizeof(uint64_t);
116     uint64_t* pDataWords         = (uint64_t*)pData;
117     for (uint32_t i = 0; i < sizeInQwords; ++i)
118     {
119         crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
120     }
121 #else
122     uint32_t  sizeInDwords       = size / sizeof(uint32_t);
123     uint32_t  sizeRemainderBytes = size % sizeof(uint32_t);
124     uint32_t* pDataWords         = (uint32_t*)pData;
125     for (uint32_t i = 0; i < sizeInDwords; ++i)
126     {
127         crc = _mm_crc32_u32(crc, *pDataWords++);
128     }
129 #endif
130 
131     uint8_t* pRemainderBytes = (uint8_t*)pDataWords;
132     for (uint32_t i = 0; i < sizeRemainderBytes; ++i)
133     {
134         crc = _mm_crc32_u8(crc, *pRemainderBytes++);
135     }
136 
137     return crc;
138 }
139 
140 //////////////////////////////////////////////////////////////////////////
141 /// Check specified bit within a data word
142 //////////////////////////////////////////////////////////////////////////
143 template <typename T>
144 INLINE static bool CheckBit(T word, uint32_t bit)
145 {
146     return 0 != (word & (T(1) << bit));
147 }
148 
149 //////////////////////////////////////////////////////////////////////////
150 /// Add byte offset to any-type pointer
151 //////////////////////////////////////////////////////////////////////////
152 template <typename T>
153 INLINE static T* PtrAdd(T* p, intptr_t offset)
154 {
155     intptr_t intp = reinterpret_cast<intptr_t>(p);
156     return reinterpret_cast<T*>(intp + offset);
157 }
158 
159 //////////////////////////////////////////////////////////////////////////
160 /// Is a power-of-2?
161 //////////////////////////////////////////////////////////////////////////
162 template <typename T>
163 INLINE static bool IsPow2(T value)
164 {
165     return value == (value & (T(0) - value));
166 }
167 
168 //////////////////////////////////////////////////////////////////////////
169 /// Align down to specified alignment
170 /// Note: IsPow2(alignment) MUST be true
171 //////////////////////////////////////////////////////////////////////////
172 template <typename T1, typename T2>
173 INLINE static T1 AlignDownPow2(T1 value, T2 alignment)
174 {
175     SWR_ASSERT(IsPow2(alignment));
176     return value & ~T1(alignment - 1);
177 }
178 
179 //////////////////////////////////////////////////////////////////////////
180 /// Align up to specified alignment
181 /// Note: IsPow2(alignment) MUST be true
182 //////////////////////////////////////////////////////////////////////////
183 template <typename T1, typename T2>
184 INLINE static T1 AlignUpPow2(T1 value, T2 alignment)
185 {
186     return AlignDownPow2(value + T1(alignment - 1), alignment);
187 }
188 
189 //////////////////////////////////////////////////////////////////////////
190 /// Align up ptr to specified alignment
191 /// Note: IsPow2(alignment) MUST be true
192 //////////////////////////////////////////////////////////////////////////
193 template <typename T1, typename T2>
194 INLINE static T1* AlignUpPow2(T1* value, T2 alignment)
195 {
196     return reinterpret_cast<T1*>(
197         AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
198 }
199 
200 //////////////////////////////////////////////////////////////////////////
201 /// Align down to specified alignment
202 //////////////////////////////////////////////////////////////////////////
203 template <typename T1, typename T2>
204 INLINE static T1 AlignDown(T1 value, T2 alignment)
205 {
206     if (IsPow2(alignment))
207     {
208         return AlignDownPow2(value, alignment);
209     }
210     return value - T1(value % alignment);
211 }
212 
213 //////////////////////////////////////////////////////////////////////////
214 /// Align down to specified alignment
215 //////////////////////////////////////////////////////////////////////////
216 template <typename T1, typename T2>
217 INLINE static T1* AlignDown(T1* value, T2 alignment)
218 {
219     return (T1*)AlignDown(uintptr_t(value), alignment);
220 }
221 
222 //////////////////////////////////////////////////////////////////////////
223 /// Align up to specified alignment
224 /// Note: IsPow2(alignment) MUST be true
225 //////////////////////////////////////////////////////////////////////////
226 template <typename T1, typename T2>
227 INLINE static T1 AlignUp(T1 value, T2 alignment)
228 {
229     return AlignDown(value + T1(alignment - 1), alignment);
230 }
231 
232 //////////////////////////////////////////////////////////////////////////
233 /// Align up to specified alignment
234 /// Note: IsPow2(alignment) MUST be true
235 //////////////////////////////////////////////////////////////////////////
236 template <typename T1, typename T2>
237 INLINE static T1* AlignUp(T1* value, T2 alignment)
238 {
239     return AlignDown(PtrAdd(value, alignment - 1), alignment);
240 }
241 
242 //////////////////////////////////////////////////////////////////////////
243 /// Helper structure used to access an array of elements that don't
244 /// correspond to a typical word size.
245 //////////////////////////////////////////////////////////////////////////
246 template <typename T, size_t BitsPerElementT, size_t ArrayLenT>
247 class BitsArray
248 {
249 private:
250     static const size_t BITS_PER_WORD     = sizeof(size_t) * 8;
251     static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
252     static const size_t NUM_WORDS         = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
253     static const size_t ELEMENT_MASK      = (size_t(1) << BitsPerElementT) - 1;
254 
255     static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
256                   "Element size must an integral fraction of pointer size");
257 
258     size_t m_words[NUM_WORDS] = {};
259 
260 public:
261     T operator[](size_t elementIndex) const
262     {
263         size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
264         word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
265         return T(word & ELEMENT_MASK);
266     }
267 };
268 
269 // Ranged integer argument for TemplateArgUnroller
270 template <typename T, T TMin, T TMax>
271 struct RangedArg
272 {
273     T val;
274 };
275 
276 template <uint32_t TMin, uint32_t TMax>
277 using IntArg = RangedArg<uint32_t, TMin, TMax>;
278 
279 // Recursive template used to auto-nest conditionals.  Converts dynamic boolean function
280 // arguments to static template arguments.
281 template <typename TermT, typename... ArgsB>
282 struct TemplateArgUnroller
283 {
284     //-----------------------------------------
285     // Boolean value
286     //-----------------------------------------
287 
288     // Last Arg Terminator
289     static typename TermT::FuncType GetFunc(bool bArg)
290     {
291         if (bArg)
292         {
293             return TermT::template GetFunc<ArgsB..., std::true_type>();
294         }
295 
296         return TermT::template GetFunc<ArgsB..., std::false_type>();
297     }
298 
299     // Recursively parse args
300     template <typename... TArgsT>
301     static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
302     {
303         if (bArg)
304         {
305             return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
306         }
307 
308         return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
309     }
310 
311     //-----------------------------------------
312     // Ranged value (within specified range)
313     //-----------------------------------------
314 
315     // Last Arg Terminator
316     template <typename T, T TMin, T TMax>
317     static typename TermT::FuncType GetFunc(RangedArg<T, TMin, TMax> iArg)
318     {
319         if (iArg.val == TMax)
320         {
321             return TermT::template GetFunc<ArgsB..., std::integral_constant<T, TMax>>();
322         }
323         if (TMax > TMin)
324         {
325             return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
326                 RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val});
327         }
328         SWR_ASSUME(false);
329         return nullptr;
330     }
331     template <typename T, T TVal>
332     static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg)
333     {
334         SWR_ASSERT(iArg.val == TVal);
335         return TermT::template GetFunc<ArgsB..., std::integral_constant<T, TVal>>();
336     }
337 
338     // Recursively parse args
339     template <typename T, T TMin, T TMax, typename... TArgsT>
340     static typename TermT::FuncType GetFunc(RangedArg<T, TMin, TMax> iArg, TArgsT... remainingArgs)
341     {
342         if (iArg.val == TMax)
343         {
344             return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(
345                 remainingArgs...);
346         }
347         if (TMax > TMin)
348         {
349             return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
350                 RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
351         }
352         SWR_ASSUME(false);
353         return nullptr;
354     }
355     template <typename T, T TVal, typename... TArgsT>
356     static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg, TArgsT... remainingArgs)
357     {
358         SWR_ASSERT(iArg.val == TVal);
359         return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(
360             remainingArgs...);
361     }
362 };
363 
364 //////////////////////////////////////////////////////////////////////////
365 /// Helpers used to get / set environment variable
366 //////////////////////////////////////////////////////////////////////////
367 static INLINE std::string GetEnv(const std::string& variableName)
368 {
369     std::string output;
370 #if defined(_WIN32)
371     uint32_t valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
372     if (!valueSize)
373         return output;
374     output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
375     GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
376 #else
377     char* env = getenv(variableName.c_str());
378     output    = env ? env : "";
379 #endif
380 
381     return output;
382 }
383 
384 static INLINE void SetEnv(const std::string& variableName, const std::string& value)
385 {
386 #if defined(_WIN32)
387     SetEnvironmentVariableA(variableName.c_str(), value.c_str());
388 #else
389     setenv(variableName.c_str(), value.c_str(), true);
390 #endif
391 }
392 
393