1 /* 2 Copyright (c) 2005-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H) 18 #error Do not #include this internal file directly; use public TBB headers instead. 19 #endif 20 21 #if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT 22 #error "Intel(R) C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port" 23 #endif 24 25 #define __TBB_machine_icc_generic_H 26 27 //ICC mimics the "native" target compiler 28 #if _MSC_VER 29 #include "msvc_ia32_common.h" 30 #else 31 #include "gcc_ia32_common.h" 32 #endif 33 34 //TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port. 35 //As compiler intrinsics are used for all the operations it is possible to do. 36 37 #if __TBB_x86_32 38 #define __TBB_WORDSIZE 4 39 #else 40 #define __TBB_WORDSIZE 8 41 #endif 42 #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE 43 44 //__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else 45 #ifndef __TBB_compiler_fence 46 #if _MSC_VER 47 //TODO: any way to use same intrinsics on windows and linux? 48 #pragma intrinsic(_ReadWriteBarrier) 49 #define __TBB_compiler_fence() _ReadWriteBarrier() 50 #else 51 #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") 52 #endif 53 #endif 54 55 #ifndef __TBB_full_memory_fence 56 #if _MSC_VER 57 //TODO: any way to use same intrinsics on windows and linux? 58 #pragma intrinsic(_mm_mfence) 59 #define __TBB_full_memory_fence() _mm_mfence() 60 #else 61 #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory") 62 #endif 63 #endif 64 65 #ifndef __TBB_control_consistency_helper 66 #define __TBB_control_consistency_helper() __TBB_compiler_fence() 67 #endif 68 69 namespace tbb { namespace internal { 70 //TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste. 71 //however it seems unlikely that ICC will silently change exact enum values, as they are defined 72 //in the ISO exactly like this. 73 //TODO: add test that exact values of the enum are same as in the ISO C++11 74 typedef enum memory_order { 75 memory_order_relaxed, memory_order_consume, memory_order_acquire, 76 memory_order_release, memory_order_acq_rel, memory_order_seq_cst 77 } memory_order; 78 79 namespace icc_intrinsics_port { 80 template <typename T> convert_argument(T value)81 T convert_argument(T value){ 82 return value; 83 } 84 //The overload below is needed to have explicit conversion of pointer to void* in argument list. 85 //compiler bug? 86 //TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed 87 template <typename T> convert_argument(T * value)88 void* convert_argument(T* value){ 89 return (void*)value; 90 } 91 } 92 //TODO: code below is a bit repetitive, consider simplifying it 93 template <typename T, size_t S> 94 struct machine_load_store { load_with_acquiremachine_load_store95 static T load_with_acquire ( const volatile T& location ) { 96 return __atomic_load_explicit(&location, memory_order_acquire); 97 } store_with_releasemachine_load_store98 static void store_with_release ( volatile T &location, T value ) { 99 __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release); 100 } 101 }; 102 103 template <typename T, size_t S> 104 struct machine_load_store_relaxed { loadmachine_load_store_relaxed105 static inline T load ( const T& location ) { 106 return __atomic_load_explicit(&location, memory_order_relaxed); 107 } storemachine_load_store_relaxed108 static inline void store ( T& location, T value ) { 109 __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed); 110 } 111 }; 112 113 template <typename T, size_t S> 114 struct machine_load_store_seq_cst { loadmachine_load_store_seq_cst115 static T load ( const volatile T& location ) { 116 return __atomic_load_explicit(&location, memory_order_seq_cst); 117 } 118 storemachine_load_store_seq_cst119 static void store ( volatile T &location, T value ) { 120 __atomic_store_explicit(&location, value, memory_order_seq_cst); 121 } 122 }; 123 124 }} // namespace tbb::internal 125 126 namespace tbb{ namespace internal { namespace icc_intrinsics_port{ 127 typedef enum memory_order_map { 128 relaxed = memory_order_relaxed, 129 acquire = memory_order_acquire, 130 release = memory_order_release, 131 full_fence= memory_order_seq_cst 132 } memory_order_map; 133 }}}// namespace tbb::internal 134 135 #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M) \ 136 inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) { \ 137 __atomic_compare_exchange_strong_explicit( \ 138 (T*)ptr \ 139 ,&comparand \ 140 ,value \ 141 , tbb::internal::icc_intrinsics_port::M \ 142 , tbb::internal::icc_intrinsics_port::M); \ 143 return comparand; \ 144 } \ 145 \ 146 inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) { \ 147 return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \ 148 } \ 149 \ 150 inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) { \ 151 return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \ 152 } \ 153 154 __TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence) 155 __TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire) 156 __TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release) 157 __TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed) 158 159 __TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence) 160 __TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire) 161 __TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release) 162 __TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed) 163 164 __TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence) 165 __TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire) 166 __TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release) 167 __TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed) 168 169 __TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence) 170 __TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire) 171 __TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release) 172 __TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed) 173 174 175 #undef __TBB_MACHINE_DEFINE_ATOMICS 176 177 #define __TBB_USE_FENCED_ATOMICS 1 178 179 namespace tbb { namespace internal { 180 #if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 181 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence) 182 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence) 183 184 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire) 185 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release) 186 187 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed) 188 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed) 189 190 template <typename T> 191 struct machine_load_store<T,8> { 192 static T load_with_acquire ( const volatile T& location ) { 193 if( tbb::internal::is_aligned(&location,8)) { 194 return __atomic_load_explicit(&location, memory_order_acquire); 195 } else { 196 return __TBB_machine_generic_load8acquire(&location); 197 } 198 } 199 static void store_with_release ( volatile T &location, T value ) { 200 if( tbb::internal::is_aligned(&location,8)) { 201 __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release); 202 } else { 203 return __TBB_machine_generic_store8release(&location,value); 204 } 205 } 206 }; 207 208 template <typename T> 209 struct machine_load_store_relaxed<T,8> { 210 static T load( const volatile T& location ) { 211 if( tbb::internal::is_aligned(&location,8)) { 212 return __atomic_load_explicit(&location, memory_order_relaxed); 213 } else { 214 return __TBB_machine_generic_load8relaxed(&location); 215 } 216 } 217 static void store( volatile T &location, T value ) { 218 if( tbb::internal::is_aligned(&location,8)) { 219 __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed); 220 } else { 221 return __TBB_machine_generic_store8relaxed(&location,value); 222 } 223 } 224 }; 225 226 template <typename T > 227 struct machine_load_store_seq_cst<T,8> { 228 static T load ( const volatile T& location ) { 229 if( tbb::internal::is_aligned(&location,8)) { 230 return __atomic_load_explicit(&location, memory_order_seq_cst); 231 } else { 232 return __TBB_machine_generic_load8full_fence(&location); 233 } 234 235 } 236 237 static void store ( volatile T &location, T value ) { 238 if( tbb::internal::is_aligned(&location,8)) { 239 __atomic_store_explicit(&location, value, memory_order_seq_cst); 240 } else { 241 return __TBB_machine_generic_store8full_fence(&location,value); 242 } 243 244 } 245 }; 246 247 #endif 248 }} // namespace tbb::internal 249 template <typename T> 250 inline void __TBB_machine_OR( T *operand, T addend ) { 251 __atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst); 252 } 253 254 template <typename T> 255 inline void __TBB_machine_AND( T *operand, T addend ) { 256 __atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst); 257 } 258 259