1 //------------------------------------------------------------------------------ 2 // GB_atomics.h: definitions for atomic operations 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // All atomic operations used by SuiteSparse:GraphBLAS appear in this file. 11 12 // These atomic operations assume either an ANSI C11 compiler that supports 13 // OpenMP 3.1 or later, or Microsoft Visual Studio on 64-bit Windows (which 14 // only supports OpenMP 2.0). SuiteSparse:GraphBLAS is not supported on 32-bit 15 // Windows. 16 17 #ifndef GB_ATOMICS_H 18 #define GB_ATOMICS_H 19 #include "GB.h" 20 21 //------------------------------------------------------------------------------ 22 // determine the architecture 23 //------------------------------------------------------------------------------ 24 25 #if __x86_64__ 26 27 // on the x86, atomic updates can be more aggresive. The MIN, MAX, EQ, 28 // XNOR, and ANY monoids are implemented with atomic compare/exchange. 29 30 #define GB_X86_64 1 31 32 #else 33 34 // on the ARM, Power8/9, and others, only use built-in #pragma omp atomic 35 // updates. Do not use atomic compare/exchange. 36 37 #define GB_X86_64 0 38 39 #endif 40 41 //------------------------------------------------------------------------------ 42 // atomic updates 43 //------------------------------------------------------------------------------ 44 45 // Whenever possible, the OpenMP pragma is used with a clause (as introduced in 46 // OpenMP 3.1), as follow: 47 // 48 // #pragma omp atomic [clause] 49 // 50 // where [clause] is read, write, update, or capture. 51 // 52 // Microsoft Visual Studio only supports OpenMP 2.0, which does not have the 53 // [clause]. Without the [clause], #pragma omp atomic is like 54 // #pragma omp atomic update, but the expression can only be one of: 55 // 56 // x binop= expression 57 // x++ 58 // ++x 59 // x-- 60 // --x 61 // 62 // where binop is one of these operators: + * - / & ^ | << >> 63 // 64 // OpenMP 3.0 and later support additional options for the "update" clause, 65 // but SuiteSparse:GraphBLAS uses only this form: 66 // 67 // x binop= expression 68 // 69 // where binop is: + * & ^ | 70 // 71 // This atomic update is used for the PLUS, TIMES, LAND, LXOR, and LOR monoids, 72 // when applied to the built-in types. For PLUS and TIMES, these are the 10 73 // types INTx, UINTx, FP32, FP64 (for x = 8, 16, 32, and 64). For the boolean 74 // monoids, only the BOOL type is used. 75 // 76 // As a result, the atomic updates are the same for gcc and icc (which support 77 // OpenMP 3.0 or later) with the "update" clause. For MS Visual Studio, the 78 // "update" clause is removed since it supports OpenMP 2.0. 79 80 #if ( _OPENMP >= 201307 ) 81 82 // OpenMP 4.0 or later 83 #define GB_ATOMIC_UPDATE GB_PRAGMA (omp atomic update seq_cst) 84 85 #elif ( _OPENMP >= 201107 ) 86 87 // OpenMP 3.1 88 #define GB_ATOMIC_UPDATE GB_PRAGMA (omp atomic update) 89 90 #elif ( _OPENMP >= 199810 ) 91 92 // OpenMP 1.0 to 3.0: no optional clauses, "update" is assumed 93 #define GB_ATOMIC_UPDATE GB_PRAGMA (omp atomic) 94 95 #else 96 97 // no OpenMP at all 98 #define GB_ATOMIC_UPDATE 99 100 #endif 101 102 //------------------------------------------------------------------------------ 103 // atomic read and write 104 //------------------------------------------------------------------------------ 105 106 // In Microsoft Visual Studio, simple reads and writes to properly aligned 107 // 64-bit values are already atomic on 64-bit Windows for any architecture 108 // supported by Windows (any Intel or ARM architecture). See: 109 // https://docs.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access 110 // SuiteSparse:GraphBLAS is not supported on 32-bit Windows. Thus, there 111 // is no need for atomic reads/writes when compiling GraphBLAS on Windows 112 // with MS Visual Studio. 113 114 // ARM, Power8/9, and others need the explicit atomic read/write. 115 // x86: no atomic read/write is needed. 116 117 #if GB_X86_64 118 119 // x86: no atomic read/write is needed. 120 #define GB_ATOMIC_READ 121 #define GB_ATOMIC_WRITE 122 123 #elif ( _OPENMP >= 201811 ) 124 125 // OpenMP 5.0 or later 126 #define GB_ATOMIC_READ GB_PRAGMA (omp atomic read acquire) 127 #define GB_ATOMIC_WRITE GB_PRAGMA (omp atomic write release) 128 129 #elif ( _OPENMP >= 201307 ) 130 131 // OpenMP 4.0 and 4.5 132 #define GB_ATOMIC_READ GB_PRAGMA (omp atomic read seq_cst) 133 #define GB_ATOMIC_WRITE GB_PRAGMA (omp atomic write seq_cst) 134 135 #elif ( _OPENMP >= 201107 ) 136 137 // OpenMP 3.1 138 #define GB_ATOMIC_READ GB_PRAGMA (omp atomic read) 139 #define GB_ATOMIC_WRITE GB_PRAGMA (omp atomic write) 140 141 #else 142 143 // OpenMP 3.0 or earlier, or no OpenMP at all 144 #define GB_ATOMIC_READ 145 #define GB_ATOMIC_WRITE 146 147 #endif 148 149 //------------------------------------------------------------------------------ 150 // flush 151 //------------------------------------------------------------------------------ 152 153 #if defined ( _OPENMP ) 154 155 // All versions of OpenMP have the #pragma omp flush 156 #define GB_OMP_FLUSH GB_PRAGMA (omp flush) 157 158 #else 159 160 // no OpenMP at all 161 #define GB_OMP_FLUSH 162 163 #endif 164 165 //------------------------------------------------------------------------------ 166 // atomic capture 167 //------------------------------------------------------------------------------ 168 169 // An atomic capture loads the prior value of the target into a thread-local 170 // result, and then overwrites the target with the new value. The target is a 171 // value that is shared between threads. The value and result arguments are 172 // thread-local. SuiteSparse:GraphBLAS uses four atomic capture methods, 173 // defined below, of the form: 174 // 175 // { result = target ; target = value ; } for int64_t and int8_t 176 // { result = target ; target |= value ; } for int64_t 177 // { result = target++ ; } for int64_t 178 // 179 // OpenMP 3.1 and later supports atomic captures with a "capture" clause: 180 // 181 // #pragma omp atomic capture 182 // { result = target ; target = value ; } 183 // 184 // or with a binary operator 185 // 186 // #pragma omp atomic capture 187 // { result = target ; target binop= value ; } 188 // 189 // MS Visual Studio supports only OpenMP 2.0, and does not support any 190 // "capture" clause. Thus, on Windows, _InterlockedExchange* and 191 // _InterlockedOr* functions are used instead, as described here: 192 // 193 // https://docs.microsoft.com/en-us/cpp/intrinsics/interlockedexchange-intrinsic-functions 194 // https://docs.microsoft.com/en-us/cpp/intrinsics/interlockedor-intrinsic-functions 195 196 #if ( _OPENMP >= 201307 ) 197 198 // OpenMP 4.0 or later 199 #define GB_ATOMIC_CAPTURE GB_PRAGMA (omp atomic capture seq_cst) 200 201 #elif ( _OPENMP >= 201107 ) 202 203 // OpenMP 3.1 204 #define GB_ATOMIC_CAPTURE GB_PRAGMA (omp atomic capture) 205 206 #elif ( _OPENMP >= 199810 ) 207 208 // OpenMP 1.0 to 3.0: generate an intentional compile-time error if any 209 // attempt is made to use the atomic capture. 210 #define GB_ATOMIC_CAPTURE atomic capture not available 211 212 #else 213 214 // no OpenMP at all 215 #define GB_ATOMIC_CAPTURE 216 217 #endif 218 219 //-------------------------------------------------------------------------- 220 // atomic capture for int64_t 221 //-------------------------------------------------------------------------- 222 223 // int64_t result, target, value ; 224 // do this atomically: { result = target ; target = value ; } 225 226 #if GB_MICROSOFT 227 228 #define GB_ATOMIC_CAPTURE_INT64(result, target, value) \ 229 { \ 230 result = _InterlockedExchange64 \ 231 ((int64_t volatile *) (&(target)), value) ; \ 232 } 233 234 #else 235 236 #define GB_ATOMIC_CAPTURE_INT64(result, target, value) \ 237 { \ 238 GB_ATOMIC_CAPTURE \ 239 { \ 240 result = target ; \ 241 target = value ; \ 242 } \ 243 } 244 245 #endif 246 247 //-------------------------------------------------------------------------- 248 // atomic capture for int8_t 249 //-------------------------------------------------------------------------- 250 251 // int8_t result, target, value ; 252 // do this atomically: { result = target ; target = value ; } 253 254 #if GB_MICROSOFT 255 256 #define GB_ATOMIC_CAPTURE_INT8(result, target, value) \ 257 { \ 258 result = _InterlockedExchange8 \ 259 ((char volatile *) &(target), value) ; \ 260 } 261 262 #else 263 264 #define GB_ATOMIC_CAPTURE_INT8(result, target, value) \ 265 { \ 266 GB_ATOMIC_CAPTURE \ 267 { \ 268 result = target ; \ 269 target = value ; \ 270 } \ 271 } 272 273 #endif 274 275 //-------------------------------------------------------------------------- 276 // atomic capture with bitwise OR, for int64_t 277 //-------------------------------------------------------------------------- 278 279 // int64_t result, target, value ; 280 // do this atomically: { result = target ; target |= value ; } 281 282 #if GB_MICROSOFT 283 284 #define GB_ATOMIC_CAPTURE_INT64_OR(result, target, value) \ 285 { \ 286 result = _InterlockedOr64 \ 287 ((int64_t volatile *) (&(target)), value) ; \ 288 } 289 290 #else 291 292 #define GB_ATOMIC_CAPTURE_INT64_OR(result, target, value) \ 293 { \ 294 GB_ATOMIC_CAPTURE \ 295 { \ 296 result = target ; \ 297 target |= value ; \ 298 } \ 299 } 300 301 #endif 302 303 //-------------------------------------------------------------------------- 304 // atomic post-increment 305 //-------------------------------------------------------------------------- 306 307 // Increment an int64_t value and return the value prior to being 308 // incremented: 309 // 310 // int64_t result = target++ ; 311 // 312 // See 313 // https://docs.microsoft.com/en-us/cpp/intrinsics/interlockedincrement-intrinsic-functions?view=msvc-160 314 // The MS Visual Studio version computes result = ++target, so result must 315 // be decremented by one. 316 317 #if GB_MICROSOFT 318 319 #define GB_ATOMIC_CAPTURE_INC64(result,target) \ 320 { \ 321 result = _InterlockedIncrement64 \ 322 ((int64_t volatile *) (&(target))) - 1 ; \ 323 } 324 325 #else 326 327 #define GB_ATOMIC_CAPTURE_INC64(result,target) \ 328 { \ 329 GB_ATOMIC_CAPTURE \ 330 result = (target)++ ; \ 331 } 332 333 #endif 334 335 //------------------------------------------------------------------------------ 336 // atomic compare-and-exchange 337 //------------------------------------------------------------------------------ 338 339 // Atomic compare-and-exchange is used to implement the MAX, MIN and EQ 340 // monoids, for the fine-grain saxpy-style matrix multiplication. Ideally, 341 // OpenMP would be used for these atomic operation, but they are not supported. 342 // So compiler-specific functions are used instead. 343 344 // In gcc, icc, and clang, the atomic compare-and-exchange function 345 // __atomic_compare_exchange computes the following, as a single atomic 346 // operation, where type_t is any 8, 16, 32, or 64 bit scalar type. In 347 // SuiteSparse:GraphBLAS, type_t can be bool, int8_t, uint8_t, int16_t, 348 // uint16_t, int32_t, uint32_t, int64_t, uint64_t, float, or double. 349 // 350 // bool __atomic_compare_exchange 351 // ( 352 // type_t *target, // input/output 353 // type_t *expected, // input/output 354 // type_t *desired, // input only, even though it is a pointer 355 // bool weak, // true, for SuiteSparse:GraphBLAS 356 // int success_memorder, // __ATOMIC_SEQ_CST is used here 357 // int failure_memorder // __ATOMIC_SEQ_CST is used here 358 // ) 359 // { 360 // bool result ; 361 // if (*target == *expected) 362 // { 363 // *target = *desired ; 364 // result = true ; 365 // } 366 // else 367 // { 368 // *expected = *target ; 369 // result = false ; 370 // } 371 // return (result) ; 372 // } 373 // 374 // The generic __atomic_compare_exchange function in gcc (also supported by 375 // icc) computes the above for any of these 8, 16, 32, or 64-bit scalar types 376 // needed in SuiteSparse:GraphBLAS. SuiteSparse:GraphBLAS does not require the 377 // 'expected = target' assignment if the result is false. It ignores the 378 // value of 'expected' after the operation completes. The target, expected, 379 // and desired parameters are all provided as pointers: 380 // 381 // See https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html 382 383 // Microsoft Visual Studio provides similar but not identical functionality in 384 // the _InterlockedCompareExchange functions, but they are named differently 385 // for different types. Only int8_t, int16_t, int32_t, and int64_t types are 386 // supported. For the int64_t case, the following is performed atomically: 387 // 388 // int64_t _InterlockedCompareExchange64 389 // ( 390 // int64_t volatile *target, // input/output 391 // int64_t desired // input only 392 // int64_t expected 393 // ) 394 // { 395 // int64_t result = *target ; 396 // if (*target == expected) 397 // { 398 // target = desired ; 399 // } 400 // return (result) ; 401 // } 402 // 403 // It does not assign "expected = target" if the test is false, but 404 // SuiteSparse:GraphBLAS does not require this action. It does not return a 405 // boolean result, but instead returns the original value of (*target). 406 // However, this can be compared with the expected value to obtain the 407 // same boolean result as __atomic_compare_exchange. 408 // 409 // Type punning is used to extend these signed integer types to unsigned 410 // integers of the same number of bytes, and to float and double. 411 412 #if GB_MICROSOFT 413 414 //-------------------------------------------------------------------------- 415 // GB_PUN: type punning 416 //-------------------------------------------------------------------------- 417 418 // With type punning, a value is treated as a different type, but with no 419 // typecasting. The address of the variable is first typecasted to a (type 420 // *) pointer, and then the pointer is dereferenced. Type punning is only 421 // needed to extend the atomic compare/exchange functions for Microsoft 422 // Visual Studio. 423 424 #define GB_PUN(type,value) (*((type *) (&(value)))) 425 426 //-------------------------------------------------------------------------- 427 // compare/exchange for MS Visual Studio 428 //-------------------------------------------------------------------------- 429 430 // bool, int8_t, and uint8_t 431 #define GB_ATOMIC_COMPARE_EXCHANGE_8(target, expected, desired) \ 432 ( \ 433 GB_PUN (int8_t, expected) == \ 434 _InterlockedCompareExchange8 ((int8_t volatile *) (target), \ 435 GB_PUN (int8_t, desired), GB_PUN (int8_t, expected)) \ 436 ) 437 438 // int16_t and uint16_t 439 #define GB_ATOMIC_COMPARE_EXCHANGE_16(target, expected, desired) \ 440 ( \ 441 GB_PUN (int16_t, expected) == \ 442 _InterlockedCompareExchange16 ((int16_t volatile *) (target), \ 443 GB_PUN (int16_t, desired), GB_PUN (int16_t, expected)) \ 444 ) 445 446 // float, int32_t, and uint32_t 447 #define GB_ATOMIC_COMPARE_EXCHANGE_32(target, expected, desired) \ 448 ( \ 449 GB_PUN (int32_t, expected) == \ 450 _InterlockedCompareExchange ((int32_t volatile *) (target), \ 451 GB_PUN (int32_t, desired), GB_PUN (int32_t, expected)) \ 452 ) 453 454 // double, int64_t, and uint64_t 455 #define GB_ATOMIC_COMPARE_EXCHANGE_64(target, expected, desired) \ 456 ( \ 457 GB_PUN (int64_t, expected) == \ 458 _InterlockedCompareExchange64 ((int64_t volatile *) (target), \ 459 GB_PUN (int64_t, desired), GB_PUN (int64_t, expected)) \ 460 ) 461 462 #else 463 464 //-------------------------------------------------------------------------- 465 // compare/exchange for gcc, icc, and clang on x86 and Power8/9 466 //-------------------------------------------------------------------------- 467 468 // the compare/exchange function is generic for any type 469 #define GB_ATOMIC_COMPARE_EXCHANGE_X(target, expected, desired) \ 470 __atomic_compare_exchange (target, &expected, &desired, \ 471 true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) \ 472 473 // bool, int8_t, and uint8_t 474 #define GB_ATOMIC_COMPARE_EXCHANGE_8(target, expected, desired) \ 475 GB_ATOMIC_COMPARE_EXCHANGE_X(target, expected, desired) 476 477 // int16_t and uint16_t 478 #define GB_ATOMIC_COMPARE_EXCHANGE_16(target, expected, desired) \ 479 GB_ATOMIC_COMPARE_EXCHANGE_X (target, expected, desired) 480 481 // float, int32_t, and uint32_t 482 #define GB_ATOMIC_COMPARE_EXCHANGE_32(target, expected, desired) \ 483 GB_ATOMIC_COMPARE_EXCHANGE_X (target, expected, desired) 484 485 // double, int64_t, and uint64_t 486 #define GB_ATOMIC_COMPARE_EXCHANGE_64(target, expected, desired) \ 487 GB_ATOMIC_COMPARE_EXCHANGE_X (target, expected, desired) 488 489 490 #endif 491 492 #endif 493 494