1 /****************************************************************************
2  * Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  ****************************************************************************/
23 
24 #ifndef __SWR_OS_H__
25 #define __SWR_OS_H__
26 
27 #include <cstddef>
28 #include "core/knobs.h"
29 
30 #if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
31 
32 #define SWR_API __cdecl
33 #define SWR_VISIBLE __declspec(dllexport)
34 
35 #ifndef NOMINMAX
36 #undef UNICODE
37 #define NOMINMAX
38 #include <windows.h>
39 #undef NOMINMAX
40 #define UNICODE
41 #else
42 #include <windows.h>
43 #endif
44 #include <intrin.h>
45 #include <cstdint>
46 
47 #if defined(MemoryFence)
48 // Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
49 #undef MemoryFence
50 #endif
51 
52 #if defined(_MSC_VER)
53 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
54 #elif defined(__GNUC__)
55 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
56 #endif
57 
58 #if defined(_DEBUG)
59 // We compile Debug builds with inline function expansion enabled.  This allows
60 // functions compiled with __forceinline to be inlined even in Debug builds.
61 // The inline_depth(0) pragma below will disable inline function expansion for
62 // normal INLINE / inline functions, but not for __forceinline functions.
63 // Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
64 // Debug builds.
65 #define INLINE inline
66 #pragma inline_depth(0)
67 #else
68 // Use of __forceinline increases compile time dramatically in release builds
69 // and provides almost 0 measurable benefit.  Disable until we have a compelling
70 // use-case
71 // #define INLINE __forceinline
72 #define INLINE inline
73 #endif
74 #ifndef FORCEINLINE
75 #define FORCEINLINE __forceinline
76 #endif
77 
78 #define DEBUGBREAK __debugbreak()
79 
80 #define PRAGMA_WARNING_PUSH_DISABLE(...) \
81     __pragma(warning(push));             \
82     __pragma(warning(disable : __VA_ARGS__));
83 
84 #define PRAGMA_WARNING_POP() __pragma(warning(pop))
85 
AlignedMalloc(size_t _Size,size_t _Alignment)86 static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
87 {
88     return _aligned_malloc(_Size, _Alignment);
89 }
90 
AlignedFree(void * p)91 static inline void AlignedFree(void* p)
92 {
93     return _aligned_free(p);
94 }
95 
96 #if defined(_WIN64)
97 #define BitScanReverseSizeT BitScanReverse64
98 #define BitScanForwardSizeT BitScanForward64
99 #define _mm_popcount_sizeT _mm_popcnt_u64
100 #else
101 #define BitScanReverseSizeT BitScanReverse
102 #define BitScanForwardSizeT BitScanForward
103 #define _mm_popcount_sizeT _mm_popcnt_u32
104 #endif
105 
106 #elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
107 
108 #define SWR_API
109 #define SWR_VISIBLE __attribute__((visibility("default")))
110 
111 #include <stdlib.h>
112 #include <string.h>
113 #include <x86intrin.h>
114 #include <stdint.h>
115 #include <sys/types.h>
116 #include <unistd.h>
117 #include <sys/stat.h>
118 #include <stdio.h>
119 #include <limits.h>
120 
121 typedef void         VOID;
122 typedef void*        LPVOID;
123 typedef int          INT;
124 typedef unsigned int UINT;
125 typedef void*        HANDLE;
126 typedef int          LONG;
127 typedef unsigned int DWORD;
128 
129 #undef FALSE
130 #define FALSE 0
131 
132 #undef TRUE
133 #define TRUE 1
134 
135 #define MAX_PATH PATH_MAX
136 
137 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
138 #ifndef INLINE
139 #define INLINE __inline
140 #endif
141 #ifndef FORCEINLINE
142 #define FORCEINLINE INLINE
143 #endif
144 #define DEBUGBREAK asm("int $3")
145 
146 #if !defined(__CYGWIN__)
147 
148 #ifndef __cdecl
149 #define __cdecl
150 #endif
151 #ifndef __stdcall
152 #define __stdcall
153 #endif
154 
155 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
156 #define __declspec(x) __declspec_##x
157 #define __declspec_align(y) __attribute__((aligned(y)))
158 #define __declspec_deprecated __attribute__((deprecated))
159 #define __declspec_dllexport
160 #define __declspec_dllimport
161 #define __declspec_noinline __attribute__((__noinline__))
162 #define __declspec_nothrow __attribute__((nothrow))
163 #define __declspec_novtable
164 #define __declspec_thread __thread
165 #else
166 #define __declspec(X)
167 #endif
168 
169 #endif
170 
171 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
172 
173 #if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
__rdtsc()174 inline uint64_t      __rdtsc()
175 {
176     long low, high;
177     asm volatile("rdtsc" : "=a"(low), "=d"(high));
178     return (low | ((uint64_t)high << 32));
179 }
180 #endif
181 
182 #if !defined(__clang__) && !defined(__INTEL_COMPILER)
183 // Intrinsic not defined in gcc < 10
184 #if (__GNUC__) && (GCC_VERSION < 100000)
_mm256_storeu2_m128i(__m128i * hi,__m128i * lo,__m256i a)185 static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
186 {
187     _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
188     _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
189 }
190 #endif
191 
192 // gcc prior to 4.9 doesn't have _mm*_undefined_*
193 #if (__GNUC__) && (GCC_VERSION < 40900)
194 #define _mm_undefined_si128 _mm_setzero_si128
195 #define _mm256_undefined_ps _mm256_setzero_ps
196 #endif
197 #endif
198 
_BitScanForward(unsigned long * Index,unsigned long Mask)199 inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask)
200 {
201     *Index = __builtin_ctz(Mask);
202     return (Mask != 0);
203 }
204 
_BitScanForward(unsigned int * Index,unsigned int Mask)205 inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask)
206 {
207     *Index = __builtin_ctz(Mask);
208     return (Mask != 0);
209 }
210 
_BitScanReverse(unsigned long * Index,unsigned long Mask)211 inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask)
212 {
213     *Index = 63 - __builtin_clz(Mask);
214     return (Mask != 0);
215 }
216 
_BitScanReverse(unsigned int * Index,unsigned int Mask)217 inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask)
218 {
219     *Index = 31 - __builtin_clz(Mask);
220     return (Mask != 0);
221 }
222 
223 #define _BitScanForward64 _BitScanForward
224 #define _BitScanReverse64 _BitScanReverse
225 
AlignedMalloc(size_t size,size_t alignment)226 inline void* AlignedMalloc(size_t size, size_t alignment)
227 {
228     void* ret;
229     if (posix_memalign(&ret, alignment, size))
230     {
231         return NULL;
232     }
233     return ret;
234 }
235 
AlignedFree(void * p)236 static inline void AlignedFree(void* p)
237 {
238     free(p);
239 }
240 
241 #define _countof(a) (sizeof(a) / sizeof(*(a)))
242 
243 #define sprintf_s sprintf
244 #define strcpy_s(dst, size, src) strncpy(dst, src, size)
245 #define GetCurrentProcessId getpid
246 
247 #define InterlockedCompareExchange(Dest, Exchange, Comparand) \
248     __sync_val_compare_and_swap(Dest, Comparand, Exchange)
249 #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
250 #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
251 #define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
252 #define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
253 #define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
254 #define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
255 #define _ReadWriteBarrier() asm volatile("" ::: "memory")
256 
257 #define PRAGMA_WARNING_PUSH_DISABLE(...)
258 #define PRAGMA_WARNING_POP()
259 
260 #define ZeroMemory(dst, size) memset(dst, 0, size)
261 #else
262 
263 #error Unsupported OS/system.
264 
265 #endif
266 
267 #define THREAD thread_local
268 
269 // Universal types
270 typedef uint8_t  KILOBYTE[1024];
271 typedef KILOBYTE MEGABYTE[1024];
272 typedef MEGABYTE GIGABYTE[1024];
273 
274 #define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
275 #define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
276 #define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
277 
278 #include "common/swr_assert.h"
279 
280 #ifdef __GNUC__
281 #define ATTR_UNUSED __attribute__((unused))
282 #else
283 #define ATTR_UNUSED
284 #endif
285 
286 #define SWR_FUNC(_retType, _funcName, /* args */...)        \
287     typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
288     _retType SWR_API _funcName(__VA_ARGS__);
289 
290 // Defined in os.cpp
291 void SWR_API SetCurrentThreadName(const char* pThreadName);
292 void SWR_API CreateDirectoryPath(const std::string& path);
293 
294 /// Execute Command (block until finished)
295 /// @returns process exit value
296 int SWR_API
297     ExecCmd(const std::string& cmd,                ///< (In) Command line string
298             const char*  pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
299             std::string* pOptStdOut     = nullptr,   ///< (Optional Out) Standard Output text
300             std::string* pOptStdErr     = nullptr,   ///< (Optional Out) Standard Error text
301             const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
302 
303 
304 /// Helper for setting up FP state
305 /// @returns old csr state
SetOptimalVectorCSR()306 static INLINE uint32_t SetOptimalVectorCSR()
307 {
308     uint32_t oldCSR = _mm_getcsr();
309 
310     uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
311     newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
312     _mm_setcsr(newCSR);
313 
314     return oldCSR;
315 }
316 
317 /// Set Vector CSR state.
318 /// @param csrState - should be value returned from SetOptimalVectorCSR()
RestoreVectorCSR(uint32_t csrState)319 static INLINE void RestoreVectorCSR(uint32_t csrState)
320 {
321     _mm_setcsr(csrState);
322 }
323 
324 #endif //__SWR_OS_H__
325