1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
18 #error Do not #include this internal file directly; use public TBB headers instead.
19 #endif
20 
21 #define __TBB_machine_msvc_ia32_common_H
22 
23 #include <intrin.h>
24 
25 //TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
26 #if  !_M_X64 || __INTEL_COMPILER
27     #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
28 #else
29     //MSVC in x64 mode does not accept inline assembler
30     #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
31     #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
32 #endif
33 
34 #if _M_X64
35     #define __TBB_r(reg_name) r##reg_name
36     #define __TBB_W(name) name##64
37     namespace tbb { namespace internal { namespace msvc_intrinsics {
38         typedef __int64 word;
39     }}}
40 #else
41     #define __TBB_r(reg_name) e##reg_name
42     #define __TBB_W(name) name
43     namespace tbb { namespace internal { namespace msvc_intrinsics {
44         typedef long word;
45     }}}
46 #endif
47 
48 #if __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
49     // S is the operand size in bytes, B is the suffix for intrinsics for that size
50     #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U)                                           \
51     __pragma(intrinsic( _InterlockedCompareExchange##B ))                                   \
52     static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
53         return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand );                \
54     }                                                                                       \
55     __pragma(intrinsic( _InterlockedExchangeAdd##B ))                                       \
56     static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) {           \
57         return _InterlockedExchangeAdd##B ( (T*)ptr, addend );                              \
58     }                                                                                       \
59     __pragma(intrinsic( _InterlockedExchange##B ))                                          \
60     static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) {          \
61         return _InterlockedExchange##B ( (T*)ptr, value );                                  \
62     }
63 
64     // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
65     __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
66     __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
67     __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
68 
69     #if __TBB_WORDSIZE==8
70     __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
71     #endif
72 
73     #undef __TBB_MACHINE_DEFINE_ATOMICS
74 #endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
75 
76 #if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
77     #pragma intrinsic(_ReadWriteBarrier)
78     #pragma intrinsic(_mm_mfence)
79     #define __TBB_compiler_fence()    _ReadWriteBarrier()
80     #define __TBB_full_memory_fence() _mm_mfence()
81 #elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
82     #define __TBB_compiler_fence()    __asm { __asm nop }
83     #define __TBB_full_memory_fence() __asm { __asm mfence }
84 #else
85     #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
86 #endif
87 
88 #define __TBB_control_consistency_helper() __TBB_compiler_fence()
89 #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
90 #define __TBB_release_consistency_helper() __TBB_compiler_fence()
91 
92 #if (_MSC_VER>=1300) || (__INTEL_COMPILER)
93     #pragma intrinsic(_mm_pause)
94     namespace tbb { namespace internal { namespace msvc_intrinsics {
pause(uintptr_t delay)95         static inline void pause (uintptr_t delay ) {
96             for (;delay>0; --delay )
97                 _mm_pause();
98         }
99     }}}
100     #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
101     #define __TBB_SINGLE_PAUSE _mm_pause()
102 #else
103     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
104         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
105     #endif
106     namespace tbb { namespace internal { namespace msvc_inline_asm
107         static inline void pause (uintptr_t delay ) {
108             _asm
109             {
110                 mov __TBB_r(ax), delay
111               __TBB_L1:
112                 pause
113                 add __TBB_r(ax), -1
114                 jne __TBB_L1
115             }
116             return;
117         }
118     }}}
119     #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
120     #define __TBB_SINGLE_PAUSE __asm pause
121 #endif
122 
123 #if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
124 // MSVC did not have this intrinsic prior to VC8.
125 // ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
126     #pragma intrinsic(__TBB_W(_BitScanReverse))
127     namespace tbb { namespace internal { namespace msvc_intrinsics {
lg_bsr(uintptr_t i)128         static inline uintptr_t lg_bsr( uintptr_t i ){
129             unsigned long j;
130             __TBB_W(_BitScanReverse)( &j, i );
131             return j;
132         }
133     }}}
134     #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
135 #else
136     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
137         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
138     #endif
139     namespace tbb { namespace internal { namespace msvc_inline_asm {
lg_bsr(uintptr_t i)140         static inline uintptr_t lg_bsr( uintptr_t i ){
141             uintptr_t j;
142             __asm
143             {
144                 bsr __TBB_r(ax), i
145                 mov j, __TBB_r(ax)
146             }
147             return j;
148         }
149     }}}
150     #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
151 #endif
152 
153 #if _MSC_VER>=1400
154     #pragma intrinsic(__TBB_W(_InterlockedOr))
155     #pragma intrinsic(__TBB_W(_InterlockedAnd))
156     namespace tbb { namespace internal { namespace msvc_intrinsics {
lock_or(volatile void * operand,intptr_t addend)157         static inline void lock_or( volatile void *operand, intptr_t addend ){
158             __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
159         }
lock_and(volatile void * operand,intptr_t addend)160         static inline void lock_and( volatile void *operand, intptr_t addend ){
161             __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
162         }
163     }}}
164     #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_intrinsics::lock_or(P,V)
165     #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
166 #else
167     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
168         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
169     #endif
170     namespace tbb { namespace internal { namespace msvc_inline_asm {
lock_or(volatile void * operand,__int32 addend)171         static inline void lock_or( volatile void *operand, __int32 addend ) {
172             __asm
173             {
174                 mov eax, addend
175                 mov edx, [operand]
176                 lock or [edx], eax
177             }
178          }
lock_and(volatile void * operand,__int32 addend)179          static inline void lock_and( volatile void *operand, __int32 addend ) {
180             __asm
181             {
182                 mov eax, addend
183                 mov edx, [operand]
184                 lock and [edx], eax
185             }
186          }
187     }}}
188     #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_inline_asm::lock_or(P,V)
189     #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
190 #endif
191 
192 #pragma intrinsic(__rdtsc)
193 namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
__TBB_machine_time_stamp()194 static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
195     return __rdtsc();
196 }
197 #define __TBB_time_stamp() __TBB_machine_time_stamp()
198 
199 // API to retrieve/update FPU control setting
200 #define __TBB_CPU_CTL_ENV_PRESENT 1
201 
202 namespace tbb { namespace internal { class cpu_ctl_env; } }
203 #if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
__TBB_get_cpu_ctl_env(tbb::internal::cpu_ctl_env * ctl)204     inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
205         __asm {
206             __asm mov     __TBB_r(ax), ctl
207             __asm stmxcsr [__TBB_r(ax)]
208             __asm fstcw   [__TBB_r(ax)+4]
209         }
210     }
__TBB_set_cpu_ctl_env(const tbb::internal::cpu_ctl_env * ctl)211     inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
212         __asm {
213             __asm mov     __TBB_r(ax), ctl
214             __asm ldmxcsr [__TBB_r(ax)]
215             __asm fldcw   [__TBB_r(ax)+4]
216         }
217     }
218 #else
219     extern "C" {
220         void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
221         void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
222     }
223 #endif
224 
225 namespace tbb {
226 namespace internal {
227 class cpu_ctl_env {
228 private:
229     int         mxcsr;
230     short       x87cw;
231     static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
232 public:
233     bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
get_env()234     void get_env() {
235         __TBB_get_cpu_ctl_env( this );
236         mxcsr &= MXCSR_CONTROL_MASK;
237     }
set_env()238     void set_env() const { __TBB_set_cpu_ctl_env( this ); }
239 };
240 } // namespace internal
241 } // namespace tbb
242 
243 #if !__TBB_WIN8UI_SUPPORT
244 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
245 #define __TBB_Yield()  SwitchToThread()
246 #else
247 #include<thread>
248 #define __TBB_Yield()  std::this_thread::yield()
249 #endif
250 
251 #undef __TBB_r
252 #undef __TBB_W
253 #undef __TBB_word
254 
255 extern "C" {
256     __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
257     void   __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
258 
259     // 'pause' instruction aborts HLE/RTM transactions
__TBB_machine_try_lock_elided_cancel()260     inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
261 
262 #if __TBB_TSX_INTRINSICS_PRESENT
263     #define __TBB_machine_is_in_transaction _xtest
264     #define __TBB_machine_begin_transaction _xbegin
265     #define __TBB_machine_end_transaction   _xend
266     // The value (0xFF) below comes from the
267     // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
268     #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
269 #else
270     __int8           __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
271     unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
272     void             __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
273     void             __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
274 #endif /* __TBB_TSX_INTRINSICS_PRESENT */
275 }
276