1 /*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30 s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40
41 Data Type | Data type encoding
42 -----------|---------------
43 int8_t | `fixed1`
44 uint8_t | `fixed1u`
45 int16_t | `fixed2`
46 uint16_t | `fixed2u`
47 int32_t | `fixed4`
48 uint32_t | `fixed4u`
49 int32_t | `fixed8`
50 uint32_t | `fixed8u`
51 float | `float4`
52 double | `float8`
53 float 10 (8087 eighty bit float) | `float10`
54 complex<float> | `cmplx4`
55 complex<double> | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^ | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv. | eqv
75 .neqv. | neqv
76
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t a pointer to source location
91 @param gtid the global thread id
92 @param lhs a pointer to the left operand
93 @param rhs the right operand
94
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t a pointer to source location
106 @param gtid the global thread id
107 @param lhs a pointer to the left operand
108 @param rhs the right operand
109 @param flag one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149 __kmpc_atomic_fixed1_add
150 __kmpc_atomic_fixed1_add_cpt
151 __kmpc_atomic_fixed1_add_fp
152 __kmpc_atomic_fixed1_andb
153 __kmpc_atomic_fixed1_andb_cpt
154 __kmpc_atomic_fixed1_andl
155 __kmpc_atomic_fixed1_andl_cpt
156 __kmpc_atomic_fixed1_div
157 __kmpc_atomic_fixed1_div_cpt
158 __kmpc_atomic_fixed1_div_cpt_rev
159 __kmpc_atomic_fixed1_div_float8
160 __kmpc_atomic_fixed1_div_fp
161 __kmpc_atomic_fixed1_div_rev
162 __kmpc_atomic_fixed1_eqv
163 __kmpc_atomic_fixed1_eqv_cpt
164 __kmpc_atomic_fixed1_max
165 __kmpc_atomic_fixed1_max_cpt
166 __kmpc_atomic_fixed1_min
167 __kmpc_atomic_fixed1_min_cpt
168 __kmpc_atomic_fixed1_mul
169 __kmpc_atomic_fixed1_mul_cpt
170 __kmpc_atomic_fixed1_mul_float8
171 __kmpc_atomic_fixed1_mul_fp
172 __kmpc_atomic_fixed1_neqv
173 __kmpc_atomic_fixed1_neqv_cpt
174 __kmpc_atomic_fixed1_orb
175 __kmpc_atomic_fixed1_orb_cpt
176 __kmpc_atomic_fixed1_orl
177 __kmpc_atomic_fixed1_orl_cpt
178 __kmpc_atomic_fixed1_rd
179 __kmpc_atomic_fixed1_shl
180 __kmpc_atomic_fixed1_shl_cpt
181 __kmpc_atomic_fixed1_shl_cpt_rev
182 __kmpc_atomic_fixed1_shl_rev
183 __kmpc_atomic_fixed1_shr
184 __kmpc_atomic_fixed1_shr_cpt
185 __kmpc_atomic_fixed1_shr_cpt_rev
186 __kmpc_atomic_fixed1_shr_rev
187 __kmpc_atomic_fixed1_sub
188 __kmpc_atomic_fixed1_sub_cpt
189 __kmpc_atomic_fixed1_sub_cpt_rev
190 __kmpc_atomic_fixed1_sub_fp
191 __kmpc_atomic_fixed1_sub_rev
192 __kmpc_atomic_fixed1_swp
193 __kmpc_atomic_fixed1_wr
194 __kmpc_atomic_fixed1_xor
195 __kmpc_atomic_fixed1_xor_cpt
196 __kmpc_atomic_fixed1u_add_fp
197 __kmpc_atomic_fixed1u_sub_fp
198 __kmpc_atomic_fixed1u_mul_fp
199 __kmpc_atomic_fixed1u_div
200 __kmpc_atomic_fixed1u_div_cpt
201 __kmpc_atomic_fixed1u_div_cpt_rev
202 __kmpc_atomic_fixed1u_div_fp
203 __kmpc_atomic_fixed1u_div_rev
204 __kmpc_atomic_fixed1u_shr
205 __kmpc_atomic_fixed1u_shr_cpt
206 __kmpc_atomic_fixed1u_shr_cpt_rev
207 __kmpc_atomic_fixed1u_shr_rev
208 __kmpc_atomic_fixed2_add
209 __kmpc_atomic_fixed2_add_cpt
210 __kmpc_atomic_fixed2_add_fp
211 __kmpc_atomic_fixed2_andb
212 __kmpc_atomic_fixed2_andb_cpt
213 __kmpc_atomic_fixed2_andl
214 __kmpc_atomic_fixed2_andl_cpt
215 __kmpc_atomic_fixed2_div
216 __kmpc_atomic_fixed2_div_cpt
217 __kmpc_atomic_fixed2_div_cpt_rev
218 __kmpc_atomic_fixed2_div_float8
219 __kmpc_atomic_fixed2_div_fp
220 __kmpc_atomic_fixed2_div_rev
221 __kmpc_atomic_fixed2_eqv
222 __kmpc_atomic_fixed2_eqv_cpt
223 __kmpc_atomic_fixed2_max
224 __kmpc_atomic_fixed2_max_cpt
225 __kmpc_atomic_fixed2_min
226 __kmpc_atomic_fixed2_min_cpt
227 __kmpc_atomic_fixed2_mul
228 __kmpc_atomic_fixed2_mul_cpt
229 __kmpc_atomic_fixed2_mul_float8
230 __kmpc_atomic_fixed2_mul_fp
231 __kmpc_atomic_fixed2_neqv
232 __kmpc_atomic_fixed2_neqv_cpt
233 __kmpc_atomic_fixed2_orb
234 __kmpc_atomic_fixed2_orb_cpt
235 __kmpc_atomic_fixed2_orl
236 __kmpc_atomic_fixed2_orl_cpt
237 __kmpc_atomic_fixed2_rd
238 __kmpc_atomic_fixed2_shl
239 __kmpc_atomic_fixed2_shl_cpt
240 __kmpc_atomic_fixed2_shl_cpt_rev
241 __kmpc_atomic_fixed2_shl_rev
242 __kmpc_atomic_fixed2_shr
243 __kmpc_atomic_fixed2_shr_cpt
244 __kmpc_atomic_fixed2_shr_cpt_rev
245 __kmpc_atomic_fixed2_shr_rev
246 __kmpc_atomic_fixed2_sub
247 __kmpc_atomic_fixed2_sub_cpt
248 __kmpc_atomic_fixed2_sub_cpt_rev
249 __kmpc_atomic_fixed2_sub_fp
250 __kmpc_atomic_fixed2_sub_rev
251 __kmpc_atomic_fixed2_swp
252 __kmpc_atomic_fixed2_wr
253 __kmpc_atomic_fixed2_xor
254 __kmpc_atomic_fixed2_xor_cpt
255 __kmpc_atomic_fixed2u_add_fp
256 __kmpc_atomic_fixed2u_sub_fp
257 __kmpc_atomic_fixed2u_mul_fp
258 __kmpc_atomic_fixed2u_div
259 __kmpc_atomic_fixed2u_div_cpt
260 __kmpc_atomic_fixed2u_div_cpt_rev
261 __kmpc_atomic_fixed2u_div_fp
262 __kmpc_atomic_fixed2u_div_rev
263 __kmpc_atomic_fixed2u_shr
264 __kmpc_atomic_fixed2u_shr_cpt
265 __kmpc_atomic_fixed2u_shr_cpt_rev
266 __kmpc_atomic_fixed2u_shr_rev
267 __kmpc_atomic_fixed4_add
268 __kmpc_atomic_fixed4_add_cpt
269 __kmpc_atomic_fixed4_add_fp
270 __kmpc_atomic_fixed4_andb
271 __kmpc_atomic_fixed4_andb_cpt
272 __kmpc_atomic_fixed4_andl
273 __kmpc_atomic_fixed4_andl_cpt
274 __kmpc_atomic_fixed4_div
275 __kmpc_atomic_fixed4_div_cpt
276 __kmpc_atomic_fixed4_div_cpt_rev
277 __kmpc_atomic_fixed4_div_float8
278 __kmpc_atomic_fixed4_div_fp
279 __kmpc_atomic_fixed4_div_rev
280 __kmpc_atomic_fixed4_eqv
281 __kmpc_atomic_fixed4_eqv_cpt
282 __kmpc_atomic_fixed4_max
283 __kmpc_atomic_fixed4_max_cpt
284 __kmpc_atomic_fixed4_min
285 __kmpc_atomic_fixed4_min_cpt
286 __kmpc_atomic_fixed4_mul
287 __kmpc_atomic_fixed4_mul_cpt
288 __kmpc_atomic_fixed4_mul_float8
289 __kmpc_atomic_fixed4_mul_fp
290 __kmpc_atomic_fixed4_neqv
291 __kmpc_atomic_fixed4_neqv_cpt
292 __kmpc_atomic_fixed4_orb
293 __kmpc_atomic_fixed4_orb_cpt
294 __kmpc_atomic_fixed4_orl
295 __kmpc_atomic_fixed4_orl_cpt
296 __kmpc_atomic_fixed4_rd
297 __kmpc_atomic_fixed4_shl
298 __kmpc_atomic_fixed4_shl_cpt
299 __kmpc_atomic_fixed4_shl_cpt_rev
300 __kmpc_atomic_fixed4_shl_rev
301 __kmpc_atomic_fixed4_shr
302 __kmpc_atomic_fixed4_shr_cpt
303 __kmpc_atomic_fixed4_shr_cpt_rev
304 __kmpc_atomic_fixed4_shr_rev
305 __kmpc_atomic_fixed4_sub
306 __kmpc_atomic_fixed4_sub_cpt
307 __kmpc_atomic_fixed4_sub_cpt_rev
308 __kmpc_atomic_fixed4_sub_fp
309 __kmpc_atomic_fixed4_sub_rev
310 __kmpc_atomic_fixed4_swp
311 __kmpc_atomic_fixed4_wr
312 __kmpc_atomic_fixed4_xor
313 __kmpc_atomic_fixed4_xor_cpt
314 __kmpc_atomic_fixed4u_add_fp
315 __kmpc_atomic_fixed4u_sub_fp
316 __kmpc_atomic_fixed4u_mul_fp
317 __kmpc_atomic_fixed4u_div
318 __kmpc_atomic_fixed4u_div_cpt
319 __kmpc_atomic_fixed4u_div_cpt_rev
320 __kmpc_atomic_fixed4u_div_fp
321 __kmpc_atomic_fixed4u_div_rev
322 __kmpc_atomic_fixed4u_shr
323 __kmpc_atomic_fixed4u_shr_cpt
324 __kmpc_atomic_fixed4u_shr_cpt_rev
325 __kmpc_atomic_fixed4u_shr_rev
326 __kmpc_atomic_fixed8_add
327 __kmpc_atomic_fixed8_add_cpt
328 __kmpc_atomic_fixed8_add_fp
329 __kmpc_atomic_fixed8_andb
330 __kmpc_atomic_fixed8_andb_cpt
331 __kmpc_atomic_fixed8_andl
332 __kmpc_atomic_fixed8_andl_cpt
333 __kmpc_atomic_fixed8_div
334 __kmpc_atomic_fixed8_div_cpt
335 __kmpc_atomic_fixed8_div_cpt_rev
336 __kmpc_atomic_fixed8_div_float8
337 __kmpc_atomic_fixed8_div_fp
338 __kmpc_atomic_fixed8_div_rev
339 __kmpc_atomic_fixed8_eqv
340 __kmpc_atomic_fixed8_eqv_cpt
341 __kmpc_atomic_fixed8_max
342 __kmpc_atomic_fixed8_max_cpt
343 __kmpc_atomic_fixed8_min
344 __kmpc_atomic_fixed8_min_cpt
345 __kmpc_atomic_fixed8_mul
346 __kmpc_atomic_fixed8_mul_cpt
347 __kmpc_atomic_fixed8_mul_float8
348 __kmpc_atomic_fixed8_mul_fp
349 __kmpc_atomic_fixed8_neqv
350 __kmpc_atomic_fixed8_neqv_cpt
351 __kmpc_atomic_fixed8_orb
352 __kmpc_atomic_fixed8_orb_cpt
353 __kmpc_atomic_fixed8_orl
354 __kmpc_atomic_fixed8_orl_cpt
355 __kmpc_atomic_fixed8_rd
356 __kmpc_atomic_fixed8_shl
357 __kmpc_atomic_fixed8_shl_cpt
358 __kmpc_atomic_fixed8_shl_cpt_rev
359 __kmpc_atomic_fixed8_shl_rev
360 __kmpc_atomic_fixed8_shr
361 __kmpc_atomic_fixed8_shr_cpt
362 __kmpc_atomic_fixed8_shr_cpt_rev
363 __kmpc_atomic_fixed8_shr_rev
364 __kmpc_atomic_fixed8_sub
365 __kmpc_atomic_fixed8_sub_cpt
366 __kmpc_atomic_fixed8_sub_cpt_rev
367 __kmpc_atomic_fixed8_sub_fp
368 __kmpc_atomic_fixed8_sub_rev
369 __kmpc_atomic_fixed8_swp
370 __kmpc_atomic_fixed8_wr
371 __kmpc_atomic_fixed8_xor
372 __kmpc_atomic_fixed8_xor_cpt
373 __kmpc_atomic_fixed8u_add_fp
374 __kmpc_atomic_fixed8u_sub_fp
375 __kmpc_atomic_fixed8u_mul_fp
376 __kmpc_atomic_fixed8u_div
377 __kmpc_atomic_fixed8u_div_cpt
378 __kmpc_atomic_fixed8u_div_cpt_rev
379 __kmpc_atomic_fixed8u_div_fp
380 __kmpc_atomic_fixed8u_div_rev
381 __kmpc_atomic_fixed8u_shr
382 __kmpc_atomic_fixed8u_shr_cpt
383 __kmpc_atomic_fixed8u_shr_cpt_rev
384 __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392 __kmpc_atomic_float4_add
393 __kmpc_atomic_float4_add_cpt
394 __kmpc_atomic_float4_add_float8
395 __kmpc_atomic_float4_add_fp
396 __kmpc_atomic_float4_div
397 __kmpc_atomic_float4_div_cpt
398 __kmpc_atomic_float4_div_cpt_rev
399 __kmpc_atomic_float4_div_float8
400 __kmpc_atomic_float4_div_fp
401 __kmpc_atomic_float4_div_rev
402 __kmpc_atomic_float4_max
403 __kmpc_atomic_float4_max_cpt
404 __kmpc_atomic_float4_min
405 __kmpc_atomic_float4_min_cpt
406 __kmpc_atomic_float4_mul
407 __kmpc_atomic_float4_mul_cpt
408 __kmpc_atomic_float4_mul_float8
409 __kmpc_atomic_float4_mul_fp
410 __kmpc_atomic_float4_rd
411 __kmpc_atomic_float4_sub
412 __kmpc_atomic_float4_sub_cpt
413 __kmpc_atomic_float4_sub_cpt_rev
414 __kmpc_atomic_float4_sub_float8
415 __kmpc_atomic_float4_sub_fp
416 __kmpc_atomic_float4_sub_rev
417 __kmpc_atomic_float4_swp
418 __kmpc_atomic_float4_wr
419 __kmpc_atomic_float8_add
420 __kmpc_atomic_float8_add_cpt
421 __kmpc_atomic_float8_add_fp
422 __kmpc_atomic_float8_div
423 __kmpc_atomic_float8_div_cpt
424 __kmpc_atomic_float8_div_cpt_rev
425 __kmpc_atomic_float8_div_fp
426 __kmpc_atomic_float8_div_rev
427 __kmpc_atomic_float8_max
428 __kmpc_atomic_float8_max_cpt
429 __kmpc_atomic_float8_min
430 __kmpc_atomic_float8_min_cpt
431 __kmpc_atomic_float8_mul
432 __kmpc_atomic_float8_mul_cpt
433 __kmpc_atomic_float8_mul_fp
434 __kmpc_atomic_float8_rd
435 __kmpc_atomic_float8_sub
436 __kmpc_atomic_float8_sub_cpt
437 __kmpc_atomic_float8_sub_cpt_rev
438 __kmpc_atomic_float8_sub_fp
439 __kmpc_atomic_float8_sub_rev
440 __kmpc_atomic_float8_swp
441 __kmpc_atomic_float8_wr
442 __kmpc_atomic_float10_add
443 __kmpc_atomic_float10_add_cpt
444 __kmpc_atomic_float10_add_fp
445 __kmpc_atomic_float10_div
446 __kmpc_atomic_float10_div_cpt
447 __kmpc_atomic_float10_div_cpt_rev
448 __kmpc_atomic_float10_div_fp
449 __kmpc_atomic_float10_div_rev
450 __kmpc_atomic_float10_mul
451 __kmpc_atomic_float10_mul_cpt
452 __kmpc_atomic_float10_mul_fp
453 __kmpc_atomic_float10_rd
454 __kmpc_atomic_float10_sub
455 __kmpc_atomic_float10_sub_cpt
456 __kmpc_atomic_float10_sub_cpt_rev
457 __kmpc_atomic_float10_sub_fp
458 __kmpc_atomic_float10_sub_rev
459 __kmpc_atomic_float10_swp
460 __kmpc_atomic_float10_wr
461 __kmpc_atomic_float16_add
462 __kmpc_atomic_float16_add_cpt
463 __kmpc_atomic_float16_div
464 __kmpc_atomic_float16_div_cpt
465 __kmpc_atomic_float16_div_cpt_rev
466 __kmpc_atomic_float16_div_rev
467 __kmpc_atomic_float16_max
468 __kmpc_atomic_float16_max_cpt
469 __kmpc_atomic_float16_min
470 __kmpc_atomic_float16_min_cpt
471 __kmpc_atomic_float16_mul
472 __kmpc_atomic_float16_mul_cpt
473 __kmpc_atomic_float16_rd
474 __kmpc_atomic_float16_sub
475 __kmpc_atomic_float16_sub_cpt
476 __kmpc_atomic_float16_sub_cpt_rev
477 __kmpc_atomic_float16_sub_rev
478 __kmpc_atomic_float16_swp
479 __kmpc_atomic_float16_wr
480 @endcode
481
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488
489 @code
490 __kmpc_atomic_cmplx4_add
491 __kmpc_atomic_cmplx4_add_cmplx8
492 __kmpc_atomic_cmplx4_add_cpt
493 __kmpc_atomic_cmplx4_div
494 __kmpc_atomic_cmplx4_div_cmplx8
495 __kmpc_atomic_cmplx4_div_cpt
496 __kmpc_atomic_cmplx4_div_cpt_rev
497 __kmpc_atomic_cmplx4_div_rev
498 __kmpc_atomic_cmplx4_mul
499 __kmpc_atomic_cmplx4_mul_cmplx8
500 __kmpc_atomic_cmplx4_mul_cpt
501 __kmpc_atomic_cmplx4_rd
502 __kmpc_atomic_cmplx4_sub
503 __kmpc_atomic_cmplx4_sub_cmplx8
504 __kmpc_atomic_cmplx4_sub_cpt
505 __kmpc_atomic_cmplx4_sub_cpt_rev
506 __kmpc_atomic_cmplx4_sub_rev
507 __kmpc_atomic_cmplx4_swp
508 __kmpc_atomic_cmplx4_wr
509 __kmpc_atomic_cmplx8_add
510 __kmpc_atomic_cmplx8_add_cpt
511 __kmpc_atomic_cmplx8_div
512 __kmpc_atomic_cmplx8_div_cpt
513 __kmpc_atomic_cmplx8_div_cpt_rev
514 __kmpc_atomic_cmplx8_div_rev
515 __kmpc_atomic_cmplx8_mul
516 __kmpc_atomic_cmplx8_mul_cpt
517 __kmpc_atomic_cmplx8_rd
518 __kmpc_atomic_cmplx8_sub
519 __kmpc_atomic_cmplx8_sub_cpt
520 __kmpc_atomic_cmplx8_sub_cpt_rev
521 __kmpc_atomic_cmplx8_sub_rev
522 __kmpc_atomic_cmplx8_swp
523 __kmpc_atomic_cmplx8_wr
524 __kmpc_atomic_cmplx10_add
525 __kmpc_atomic_cmplx10_add_cpt
526 __kmpc_atomic_cmplx10_div
527 __kmpc_atomic_cmplx10_div_cpt
528 __kmpc_atomic_cmplx10_div_cpt_rev
529 __kmpc_atomic_cmplx10_div_rev
530 __kmpc_atomic_cmplx10_mul
531 __kmpc_atomic_cmplx10_mul_cpt
532 __kmpc_atomic_cmplx10_rd
533 __kmpc_atomic_cmplx10_sub
534 __kmpc_atomic_cmplx10_sub_cpt
535 __kmpc_atomic_cmplx10_sub_cpt_rev
536 __kmpc_atomic_cmplx10_sub_rev
537 __kmpc_atomic_cmplx10_swp
538 __kmpc_atomic_cmplx10_wr
539 __kmpc_atomic_cmplx16_add
540 __kmpc_atomic_cmplx16_add_cpt
541 __kmpc_atomic_cmplx16_div
542 __kmpc_atomic_cmplx16_div_cpt
543 __kmpc_atomic_cmplx16_div_cpt_rev
544 __kmpc_atomic_cmplx16_div_rev
545 __kmpc_atomic_cmplx16_mul
546 __kmpc_atomic_cmplx16_mul_cpt
547 __kmpc_atomic_cmplx16_rd
548 __kmpc_atomic_cmplx16_sub
549 __kmpc_atomic_cmplx16_sub_cpt
550 __kmpc_atomic_cmplx16_sub_cpt_rev
551 __kmpc_atomic_cmplx16_swp
552 __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560
561 /*
562 * Global vars
563 */
564
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570
571 KMP_ALIGN(128)
572
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600 /* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
operator +(Quad_a4_t & lhs,Quad_a4_t & rhs)609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611 }
operator -(Quad_a4_t & lhs,Quad_a4_t & rhs)612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614 }
operator *(Quad_a4_t & lhs,Quad_a4_t & rhs)615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617 }
operator /(Quad_a4_t & lhs,Quad_a4_t & rhs)618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626 }
627
operator +(Quad_a16_t & lhs,Quad_a16_t & rhs)628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630 }
operator -(Quad_a16_t & lhs,Quad_a16_t & rhs)631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633 }
operator *(Quad_a16_t & lhs,Quad_a16_t & rhs)634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636 }
operator /(Quad_a16_t & lhs,Quad_a16_t & rhs)637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645 }
646
operator +(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650 }
operator -(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654 }
operator *(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658 }
operator /(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662 }
663
operator +(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667 }
operator -(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671 }
operator *(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675 }
operator /(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679 }
680
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688 #define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP((TYPE)rhs)); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP((TYPE)rhs)); \
802 } \
803 }
804
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP) \
840 { \
841 struct _sss { \
842 TYPE cmp; \
843 kmp_int##BITS *vvv; \
844 }; \
845 struct _sss old_value, new_value; \
846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849 new_value.cmp = old_value.cmp OP rhs; \
850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853 KMP_DO_PAUSE; \
854 \
855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856 new_value.cmp = old_value.cmp OP rhs; \
857 } \
858 }
859
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863 (*lhs) = (*lhs)OP rhs; \
864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873 GOMP_FLAG) \
874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878 }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881 GOMP_FLAG) \
882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884 OP_CMPXCHG(TYPE, BITS, OP) \
885 }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890 MASK, GOMP_FLAG) \
891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894 }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902 GOMP_FLAG) \
903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908 } else { \
909 KMP_CHECK_GTID; \
910 OP_UPDATE_CRITICAL(TYPE, OP, \
911 LCK_ID) /* unaligned address - use critical */ \
912 } \
913 }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916 GOMP_FLAG) \
917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921 } else { \
922 KMP_CHECK_GTID; \
923 OP_UPDATE_CRITICAL(TYPE, OP, \
924 LCK_ID) /* unaligned address - use critical */ \
925 } \
926 }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931 MASK, GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936 } else { \
937 KMP_CHECK_GTID; \
938 OP_UPDATE_CRITICAL(TYPE, OP, \
939 LCK_ID) /* unaligned address - use critical */ \
940 } \
941 }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948 0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950 0) // __kmpc_atomic_fixed4_sub
951
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953 KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964 KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 // TYPE_ID - operands type and size (fixed4, float4)
971 // OP_ID - operation identifier (add, sub, mul, ...)
972 // TYPE - operand type
973 // BITS - size in bits, used to distinguish low level calls
974 // OP - operator (used in critical section)
975 // LCK_ID - lock identifier, used to possibly distinguish lock variable
976 // MASK - used for alignment check
977
978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986 0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994 0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004 0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008 0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016 0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026 0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028 0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036 0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044 0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and || */
1075
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082 OP_CRITICAL(= *lhs OP, LCK_ID) \
1083 }
1084
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092 OP_CMPXCHG(TYPE, BITS, OP) \
1093 }
1094
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103 } else { \
1104 KMP_CHECK_GTID; \
1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106 } \
1107 }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119 0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121 0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C: */
1129 /* MAX, MIN, .EQV., .NEQV. */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138 \
1139 if (*lhs OP rhs) { /* still need actions? */ \
1140 *lhs = rhs; \
1141 } \
1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148 KMP_CHECK_GTID; \
1149 MIN_MAX_CRITSECT(OP, 0); \
1150 return; \
1151 }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158 { \
1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160 TYPE old_value; \
1161 temp_val = *lhs; \
1162 old_value = temp_val; \
1163 while (old_value OP rhs && /* still need actions? */ \
1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165 (kmp_int##BITS *)lhs, \
1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168 temp_val = *lhs; \
1169 old_value = temp_val; \
1170 } \
1171 }
1172
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177 if (*lhs OP rhs) { /* need actions? */ \
1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179 MIN_MAX_CRITSECT(OP, LCK_ID) \
1180 } \
1181 }
1182
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188 GOMP_FLAG) \
1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190 if (*lhs OP rhs) { \
1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193 } \
1194 }
1195
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200 GOMP_FLAG) \
1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202 if (*lhs OP rhs) { \
1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206 } else { \
1207 KMP_CHECK_GTID; \
1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209 } \
1210 } \
1211 }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223 0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225 0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_HAVE_QUAD
1239 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1240 1) // __kmpc_atomic_float16_max
1241 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1242 1) // __kmpc_atomic_float16_min
1243 #if (KMP_ARCH_X86)
1244 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1245 1) // __kmpc_atomic_float16_max_a16
1246 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1247 1) // __kmpc_atomic_float16_min_a16
1248 #endif // (KMP_ARCH_X86)
1249 #endif // KMP_HAVE_QUAD
1250 // ------------------------------------------------------------------------
1251 // Need separate macros for .EQV. because of the need of complement (~)
1252 // OP ignored for critical sections, ^=~ used instead
1253 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1254 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1255 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1256 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1257 }
1258
1259 // ------------------------------------------------------------------------
1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261 // ------------------------------------------------------------------------
1262 // X86 or X86_64: no alignment problems ===================================
1263 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1264 GOMP_FLAG) \
1265 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1266 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1267 OP_CMPXCHG(TYPE, BITS, OP) \
1268 }
1269 // ------------------------------------------------------------------------
1270 #else
1271 // ------------------------------------------------------------------------
1272 // Code for other architectures that don't handle unaligned accesses.
1273 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1274 GOMP_FLAG) \
1275 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1276 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1277 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1278 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1279 } else { \
1280 KMP_CHECK_GTID; \
1281 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1282 } \
1283 }
1284 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1285
1286 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1288 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1289 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1290 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1291 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1292 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1293 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1294 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1295 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1296 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1297 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1298 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1299 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1300 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1301 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1302
1303 // ------------------------------------------------------------------------
1304 // Routines for Extended types: long double, _Quad, complex flavours (use
1305 // critical section)
1306 // TYPE_ID, OP_ID, TYPE - detailed above
1307 // OP - operator
1308 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1309 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1310 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1311 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1312 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1313 }
1314
1315 /* ------------------------------------------------------------------------- */
1316 // routines for long double type
1317 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1318 1) // __kmpc_atomic_float10_add
1319 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1320 1) // __kmpc_atomic_float10_sub
1321 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1322 1) // __kmpc_atomic_float10_mul
1323 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1324 1) // __kmpc_atomic_float10_div
1325 #if KMP_HAVE_QUAD
1326 // routines for _Quad type
1327 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1328 1) // __kmpc_atomic_float16_add
1329 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1330 1) // __kmpc_atomic_float16_sub
1331 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1332 1) // __kmpc_atomic_float16_mul
1333 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1334 1) // __kmpc_atomic_float16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1337 1) // __kmpc_atomic_float16_add_a16
1338 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1339 1) // __kmpc_atomic_float16_sub_a16
1340 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1341 1) // __kmpc_atomic_float16_mul_a16
1342 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1343 1) // __kmpc_atomic_float16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1346 // routines for complex types
1347
1348 #if USE_CMPXCHG_FIX
1349 // workaround for C78287 (complex(kind=4) data type)
1350 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1351 1) // __kmpc_atomic_cmplx4_add
1352 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1353 1) // __kmpc_atomic_cmplx4_sub
1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1355 1) // __kmpc_atomic_cmplx4_mul
1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1357 1) // __kmpc_atomic_cmplx4_div
1358 // end of the workaround for C78287
1359 #else
1360 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1361 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1363 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1364 #endif // USE_CMPXCHG_FIX
1365
1366 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1367 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1368 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1369 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1370 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1371 1) // __kmpc_atomic_cmplx10_add
1372 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1373 1) // __kmpc_atomic_cmplx10_sub
1374 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1375 1) // __kmpc_atomic_cmplx10_mul
1376 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1377 1) // __kmpc_atomic_cmplx10_div
1378 #if KMP_HAVE_QUAD
1379 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1380 1) // __kmpc_atomic_cmplx16_add
1381 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1382 1) // __kmpc_atomic_cmplx16_sub
1383 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1384 1) // __kmpc_atomic_cmplx16_mul
1385 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1386 1) // __kmpc_atomic_cmplx16_div
1387 #if (KMP_ARCH_X86)
1388 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1389 1) // __kmpc_atomic_cmplx16_add_a16
1390 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1391 1) // __kmpc_atomic_cmplx16_sub_a16
1392 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1393 1) // __kmpc_atomic_cmplx16_mul_a16
1394 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1395 1) // __kmpc_atomic_cmplx16_div_a16
1396 #endif // (KMP_ARCH_X86)
1397 #endif // KMP_HAVE_QUAD
1398
1399 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1400 // Supported only on IA-32 architecture and Intel(R) 64
1401 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1402
1403 // ------------------------------------------------------------------------
1404 // Operation on *lhs, rhs bound by critical section
1405 // OP - operator (it's supposed to contain an assignment)
1406 // LCK_ID - lock identifier
1407 // Note: don't check gtid as it should always be valid
1408 // 1, 2-byte - expect valid parameter, other - check before this macro
1409 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1410 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1411 \
1412 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1413 \
1414 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1415
1416 #ifdef KMP_GOMP_COMPAT
1417 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1418 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1419 KMP_CHECK_GTID; \
1420 OP_CRITICAL_REV(TYPE, OP, 0); \
1421 return; \
1422 }
1423
1424 #else
1425 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1426 #endif /* KMP_GOMP_COMPAT */
1427
1428 // Beginning of a definition (provides name, parameters, gebug trace)
1429 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1430 // fixed)
1431 // OP_ID - operation identifier (add, sub, mul, ...)
1432 // TYPE - operands' type
1433 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1434 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1435 TYPE *lhs, TYPE rhs) { \
1436 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1437 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1438
1439 // ------------------------------------------------------------------------
1440 // Operation on *lhs, rhs using "compare_and_store" routine
1441 // TYPE - operands' type
1442 // BITS - size in bits, used to distinguish low level calls
1443 // OP - operator
1444 // Note: temp_val introduced in order to force the compiler to read
1445 // *lhs only once (w/o it the compiler reads *lhs twice)
1446 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1447 { \
1448 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1449 TYPE old_value, new_value; \
1450 temp_val = *lhs; \
1451 old_value = temp_val; \
1452 new_value = (TYPE)(rhs OP old_value); \
1453 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1454 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1455 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1456 KMP_DO_PAUSE; \
1457 \
1458 temp_val = *lhs; \
1459 old_value = temp_val; \
1460 new_value = (TYPE)(rhs OP old_value); \
1461 } \
1462 }
1463
1464 // -------------------------------------------------------------------------
1465 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1466 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1467 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1468 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1469 }
1470
1471 // ------------------------------------------------------------------------
1472 // Entries definition for integer operands
1473 // TYPE_ID - operands type and size (fixed4, float4)
1474 // OP_ID - operation identifier (add, sub, mul, ...)
1475 // TYPE - operand type
1476 // BITS - size in bits, used to distinguish low level calls
1477 // OP - operator (used in critical section)
1478 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1479
1480 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1481 // ------------------------------------------------------------------------
1482 // Routines for ATOMIC integer operands, other operators
1483 // ------------------------------------------------------------------------
1484 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1485 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1486 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1487 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1488 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1489 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1490 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1491 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1492 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1493 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1494 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1495 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1497
1498 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1499 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1500 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1501 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1502 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1503 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1504 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1505 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1506 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1507 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1508 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1510
1511 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1512 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1513 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1514 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1515 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1516 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1517 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1518 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1519 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1520 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1521 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1523
1524 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1525 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1526 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1527 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1528 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1529 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1530 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1531 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1532 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1533 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1534 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1536
1537 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1538 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1539 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1540 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1541
1542 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1543 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1544 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1545 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1546 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1547
1548 // ------------------------------------------------------------------------
1549 // Routines for Extended types: long double, _Quad, complex flavours (use
1550 // critical section)
1551 // TYPE_ID, OP_ID, TYPE - detailed above
1552 // OP - operator
1553 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1554 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1555 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1556 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1557 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1558 }
1559
1560 /* ------------------------------------------------------------------------- */
1561 // routines for long double type
1562 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1563 1) // __kmpc_atomic_float10_sub_rev
1564 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1565 1) // __kmpc_atomic_float10_div_rev
1566 #if KMP_HAVE_QUAD
1567 // routines for _Quad type
1568 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1569 1) // __kmpc_atomic_float16_sub_rev
1570 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1571 1) // __kmpc_atomic_float16_div_rev
1572 #if (KMP_ARCH_X86)
1573 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1574 1) // __kmpc_atomic_float16_sub_a16_rev
1575 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1576 1) // __kmpc_atomic_float16_div_a16_rev
1577 #endif // KMP_ARCH_X86
1578 #endif // KMP_HAVE_QUAD
1579
1580 // routines for complex types
1581 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1582 1) // __kmpc_atomic_cmplx4_sub_rev
1583 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1584 1) // __kmpc_atomic_cmplx4_div_rev
1585 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1586 1) // __kmpc_atomic_cmplx8_sub_rev
1587 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1588 1) // __kmpc_atomic_cmplx8_div_rev
1589 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1590 1) // __kmpc_atomic_cmplx10_sub_rev
1591 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1592 1) // __kmpc_atomic_cmplx10_div_rev
1593 #if KMP_HAVE_QUAD
1594 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1595 1) // __kmpc_atomic_cmplx16_sub_rev
1596 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1597 1) // __kmpc_atomic_cmplx16_div_rev
1598 #if (KMP_ARCH_X86)
1599 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1600 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1601 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1602 1) // __kmpc_atomic_cmplx16_div_a16_rev
1603 #endif // KMP_ARCH_X86
1604 #endif // KMP_HAVE_QUAD
1605
1606 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1607 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1608
1609 /* ------------------------------------------------------------------------ */
1610 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1611 /* Note: in order to reduce the total number of types combinations */
1612 /* it is supposed that compiler converts RHS to longest floating type,*/
1613 /* that is _Quad, before call to any of these routines */
1614 /* Conversion to _Quad will be done by the compiler during calculation, */
1615 /* conversion back to TYPE - before the assignment, like: */
1616 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1617 /* Performance penalty expected because of SW emulation use */
1618 /* ------------------------------------------------------------------------ */
1619
1620 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1622 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1623 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1624 KA_TRACE(100, \
1625 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1626 gtid));
1627
1628 // -------------------------------------------------------------------------
1629 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1630 GOMP_FLAG) \
1631 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1632 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1633 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1634 }
1635
1636 // -------------------------------------------------------------------------
1637 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1638 // -------------------------------------------------------------------------
1639 // X86 or X86_64: no alignment problems ====================================
1640 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1641 LCK_ID, MASK, GOMP_FLAG) \
1642 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1643 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1644 OP_CMPXCHG(TYPE, BITS, OP) \
1645 }
1646 // -------------------------------------------------------------------------
1647 #else
1648 // ------------------------------------------------------------------------
1649 // Code for other architectures that don't handle unaligned accesses.
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651 LCK_ID, MASK, GOMP_FLAG) \
1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1655 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1656 } else { \
1657 KMP_CHECK_GTID; \
1658 OP_UPDATE_CRITICAL(TYPE, OP, \
1659 LCK_ID) /* unaligned address - use critical */ \
1660 } \
1661 }
1662 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1663
1664 // -------------------------------------------------------------------------
1665 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1666 // -------------------------------------------------------------------------
1667 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1668 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1669 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1670 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1671 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1672 }
1673 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1674 LCK_ID, GOMP_FLAG) \
1675 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1676 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1677 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1678 }
1679 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1680
1681 // RHS=float8
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1684 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1685 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1688 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1691 0) // __kmpc_atomic_fixed4_mul_float8
1692 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1693 0) // __kmpc_atomic_fixed4_div_float8
1694 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1695 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1696 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1698 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1699 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1700 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1701 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1703 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1705 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1706
1707 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1708 // use them)
1709 #if KMP_HAVE_QUAD
1710 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1711 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1712 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1713 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1715 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1717 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1719 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1726
1727 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1728 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1729 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1730 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1732 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1734 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1736 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1743
1744 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1745 0) // __kmpc_atomic_fixed4_add_fp
1746 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1747 0) // __kmpc_atomic_fixed4u_add_fp
1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1749 0) // __kmpc_atomic_fixed4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1751 0) // __kmpc_atomic_fixed4u_sub_fp
1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1753 0) // __kmpc_atomic_fixed4_mul_fp
1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1755 0) // __kmpc_atomic_fixed4u_mul_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1757 0) // __kmpc_atomic_fixed4_div_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1759 0) // __kmpc_atomic_fixed4u_div_fp
1760
1761 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1762 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1763 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1764 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1766 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1768 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1770 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1777
1778 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1779 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1780 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1781 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1783 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1785 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1786
1787 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1788 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1789 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1790 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1792 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1794 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1795
1796 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1797 1) // __kmpc_atomic_float10_add_fp
1798 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1799 1) // __kmpc_atomic_float10_sub_fp
1800 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1801 1) // __kmpc_atomic_float10_mul_fp
1802 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1803 1) // __kmpc_atomic_float10_div_fp
1804
1805 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1806 // Reverse operations
1807 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1808 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1809 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1810 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1812 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1814 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1815
1816 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1817 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1819 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1821 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1823 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1824
1825 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1826 0) // __kmpc_atomic_fixed4_sub_rev_fp
1827 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1828 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1830 0) // __kmpc_atomic_fixed4_div_rev_fp
1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1832 0) // __kmpc_atomic_fixed4u_div_rev_fp
1833
1834 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1835 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1836 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1837 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1839 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1841 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1842
1843 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1844 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1845 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1846 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1847
1848 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1849 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1851 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1852
1853 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1854 1) // __kmpc_atomic_float10_sub_rev_fp
1855 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1856 1) // __kmpc_atomic_float10_div_rev_fp
1857 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1858
1859 #endif // KMP_HAVE_QUAD
1860
1861 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1862 // ------------------------------------------------------------------------
1863 // X86 or X86_64: no alignment problems ====================================
1864 #if USE_CMPXCHG_FIX
1865 // workaround for C78287 (complex(kind=4) data type)
1866 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1867 LCK_ID, MASK, GOMP_FLAG) \
1868 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1869 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1870 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1871 }
1872 // end of the second part of the workaround for C78287
1873 #else
1874 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1875 LCK_ID, MASK, GOMP_FLAG) \
1876 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1877 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1878 OP_CMPXCHG(TYPE, BITS, OP) \
1879 }
1880 #endif // USE_CMPXCHG_FIX
1881 #else
1882 // ------------------------------------------------------------------------
1883 // Code for other architectures that don't handle unaligned accesses.
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885 LCK_ID, MASK, GOMP_FLAG) \
1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1889 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1890 } else { \
1891 KMP_CHECK_GTID; \
1892 OP_UPDATE_CRITICAL(TYPE, OP, \
1893 LCK_ID) /* unaligned address - use critical */ \
1894 } \
1895 }
1896 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1897
1898 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1899 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1900 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1901 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1903 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1905 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1906
1907 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1908 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1909
1910 // ------------------------------------------------------------------------
1911 // Atomic READ routines
1912
1913 // ------------------------------------------------------------------------
1914 // Beginning of a definition (provides name, parameters, gebug trace)
1915 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1916 // fixed)
1917 // OP_ID - operation identifier (add, sub, mul, ...)
1918 // TYPE - operands' type
1919 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1920 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1921 TYPE *loc) { \
1922 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1923 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1924
1925 // ------------------------------------------------------------------------
1926 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1927 // TYPE - operands' type
1928 // BITS - size in bits, used to distinguish low level calls
1929 // OP - operator
1930 // Note: temp_val introduced in order to force the compiler to read
1931 // *lhs only once (w/o it the compiler reads *lhs twice)
1932 // TODO: check if it is still necessary
1933 // Return old value regardless of the result of "compare & swap# operation
1934 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1935 { \
1936 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1937 union f_i_union { \
1938 TYPE f_val; \
1939 kmp_int##BITS i_val; \
1940 }; \
1941 union f_i_union old_value; \
1942 temp_val = *loc; \
1943 old_value.f_val = temp_val; \
1944 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1945 (kmp_int##BITS *)loc, \
1946 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1947 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1948 new_value = old_value.f_val; \
1949 return new_value; \
1950 }
1951
1952 // -------------------------------------------------------------------------
1953 // Operation on *lhs, rhs bound by critical section
1954 // OP - operator (it's supposed to contain an assignment)
1955 // LCK_ID - lock identifier
1956 // Note: don't check gtid as it should always be valid
1957 // 1, 2-byte - expect valid parameter, other - check before this macro
1958 #define OP_CRITICAL_READ(OP, LCK_ID) \
1959 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1960 \
1961 new_value = (*loc); \
1962 \
1963 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1964
1965 // -------------------------------------------------------------------------
1966 #ifdef KMP_GOMP_COMPAT
1967 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1968 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1969 KMP_CHECK_GTID; \
1970 OP_CRITICAL_READ(OP, 0); \
1971 return new_value; \
1972 }
1973 #else
1974 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1975 #endif /* KMP_GOMP_COMPAT */
1976
1977 // -------------------------------------------------------------------------
1978 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1979 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1980 TYPE new_value; \
1981 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1982 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1983 return new_value; \
1984 }
1985 // -------------------------------------------------------------------------
1986 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1987 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1988 TYPE new_value; \
1989 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1990 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1991 }
1992 // ------------------------------------------------------------------------
1993 // Routines for Extended types: long double, _Quad, complex flavours (use
1994 // critical section)
1995 // TYPE_ID, OP_ID, TYPE - detailed above
1996 // OP - operator
1997 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1998 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1999 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2000 TYPE new_value; \
2001 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2002 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2003 return new_value; \
2004 }
2005
2006 // ------------------------------------------------------------------------
2007 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2008 // value doesn't work.
2009 // Let's return the read value through the additional parameter.
2010 #if (KMP_OS_WINDOWS)
2011
2012 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2013 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2014 \
2015 (*out) = (*loc); \
2016 \
2017 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2018 // ------------------------------------------------------------------------
2019 #ifdef KMP_GOMP_COMPAT
2020 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2021 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2022 KMP_CHECK_GTID; \
2023 OP_CRITICAL_READ_WRK(OP, 0); \
2024 }
2025 #else
2026 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2027 #endif /* KMP_GOMP_COMPAT */
2028 // ------------------------------------------------------------------------
2029 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2030 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2031 TYPE *loc) { \
2032 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2033 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2034
2035 // ------------------------------------------------------------------------
2036 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2037 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2038 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2039 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2040 }
2041
2042 #endif // KMP_OS_WINDOWS
2043
2044 // ------------------------------------------------------------------------
2045 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2046 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2047 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2049 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2050 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2051 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2052 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2053
2054 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2055 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2056 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2057 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2058 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2059
2060 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2061 1) // __kmpc_atomic_float10_rd
2062 #if KMP_HAVE_QUAD
2063 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2064 1) // __kmpc_atomic_float16_rd
2065 #endif // KMP_HAVE_QUAD
2066
2067 // Fix for CQ220361 on Windows* OS
2068 #if (KMP_OS_WINDOWS)
2069 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2070 1) // __kmpc_atomic_cmplx4_rd
2071 #else
2072 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2073 1) // __kmpc_atomic_cmplx4_rd
2074 #endif // (KMP_OS_WINDOWS)
2075 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2076 1) // __kmpc_atomic_cmplx8_rd
2077 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2078 1) // __kmpc_atomic_cmplx10_rd
2079 #if KMP_HAVE_QUAD
2080 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2081 1) // __kmpc_atomic_cmplx16_rd
2082 #if (KMP_ARCH_X86)
2083 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2084 1) // __kmpc_atomic_float16_a16_rd
2085 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2086 1) // __kmpc_atomic_cmplx16_a16_rd
2087 #endif // (KMP_ARCH_X86)
2088 #endif // KMP_HAVE_QUAD
2089
2090 // ------------------------------------------------------------------------
2091 // Atomic WRITE routines
2092
2093 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2094 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2095 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2096 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2097 }
2098 // ------------------------------------------------------------------------
2099 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2100 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2101 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2102 KMP_XCHG_REAL##BITS(lhs, rhs); \
2103 }
2104
2105 // ------------------------------------------------------------------------
2106 // Operation on *lhs, rhs using "compare_and_store" routine
2107 // TYPE - operands' type
2108 // BITS - size in bits, used to distinguish low level calls
2109 // OP - operator
2110 // Note: temp_val introduced in order to force the compiler to read
2111 // *lhs only once (w/o it the compiler reads *lhs twice)
2112 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2113 { \
2114 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2115 TYPE old_value, new_value; \
2116 temp_val = *lhs; \
2117 old_value = temp_val; \
2118 new_value = rhs; \
2119 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2120 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2121 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2122 temp_val = *lhs; \
2123 old_value = temp_val; \
2124 new_value = rhs; \
2125 } \
2126 }
2127
2128 // -------------------------------------------------------------------------
2129 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2130 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2131 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2132 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2133 }
2134
2135 // ------------------------------------------------------------------------
2136 // Routines for Extended types: long double, _Quad, complex flavours (use
2137 // critical section)
2138 // TYPE_ID, OP_ID, TYPE - detailed above
2139 // OP - operator
2140 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2141 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2142 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2143 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2144 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2145 }
2146 // -------------------------------------------------------------------------
2147
2148 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2149 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2150 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2151 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2152 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2153 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2154 #if (KMP_ARCH_X86)
2155 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2156 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2157 #else
2158 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2159 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2160 #endif // (KMP_ARCH_X86)
2161
2162 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2163 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2166 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2167 #else
2168 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2169 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2170 #endif // (KMP_ARCH_X86)
2171
2172 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2173 1) // __kmpc_atomic_float10_wr
2174 #if KMP_HAVE_QUAD
2175 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2176 1) // __kmpc_atomic_float16_wr
2177 #endif // KMP_HAVE_QUAD
2178 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2179 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2180 1) // __kmpc_atomic_cmplx8_wr
2181 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2182 1) // __kmpc_atomic_cmplx10_wr
2183 #if KMP_HAVE_QUAD
2184 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2185 1) // __kmpc_atomic_cmplx16_wr
2186 #if (KMP_ARCH_X86)
2187 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2188 1) // __kmpc_atomic_float16_a16_wr
2189 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2190 1) // __kmpc_atomic_cmplx16_a16_wr
2191 #endif // (KMP_ARCH_X86)
2192 #endif // KMP_HAVE_QUAD
2193
2194 // ------------------------------------------------------------------------
2195 // Atomic CAPTURE routines
2196
2197 // Beginning of a definition (provides name, parameters, gebug trace)
2198 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2199 // fixed)
2200 // OP_ID - operation identifier (add, sub, mul, ...)
2201 // TYPE - operands' type
2202 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2203 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2204 TYPE *lhs, TYPE rhs, int flag) { \
2205 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2206 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2207
2208 // -------------------------------------------------------------------------
2209 // Operation on *lhs, rhs bound by critical section
2210 // OP - operator (it's supposed to contain an assignment)
2211 // LCK_ID - lock identifier
2212 // Note: don't check gtid as it should always be valid
2213 // 1, 2-byte - expect valid parameter, other - check before this macro
2214 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2215 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2216 \
2217 if (flag) { \
2218 (*lhs) OP rhs; \
2219 new_value = (*lhs); \
2220 } else { \
2221 new_value = (*lhs); \
2222 (*lhs) OP rhs; \
2223 } \
2224 \
2225 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226 return new_value;
2227
2228 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2229 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2230 \
2231 if (flag) { \
2232 (*lhs) = (TYPE)((*lhs)OP rhs); \
2233 new_value = (*lhs); \
2234 } else { \
2235 new_value = (*lhs); \
2236 (*lhs) = (TYPE)((*lhs)OP rhs); \
2237 } \
2238 \
2239 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240 return new_value;
2241
2242 // ------------------------------------------------------------------------
2243 #ifdef KMP_GOMP_COMPAT
2244 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2245 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2246 KMP_CHECK_GTID; \
2247 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2248 }
2249 #else
2250 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2251 #endif /* KMP_GOMP_COMPAT */
2252
2253 // ------------------------------------------------------------------------
2254 // Operation on *lhs, rhs using "compare_and_store" routine
2255 // TYPE - operands' type
2256 // BITS - size in bits, used to distinguish low level calls
2257 // OP - operator
2258 // Note: temp_val introduced in order to force the compiler to read
2259 // *lhs only once (w/o it the compiler reads *lhs twice)
2260 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2261 { \
2262 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2263 TYPE old_value, new_value; \
2264 temp_val = *lhs; \
2265 old_value = temp_val; \
2266 new_value = (TYPE)(old_value OP rhs); \
2267 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2268 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2269 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2270 temp_val = *lhs; \
2271 old_value = temp_val; \
2272 new_value = (TYPE)(old_value OP rhs); \
2273 } \
2274 if (flag) { \
2275 return new_value; \
2276 } else \
2277 return old_value; \
2278 }
2279
2280 // -------------------------------------------------------------------------
2281 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2282 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2283 TYPE new_value; \
2284 (void)new_value; \
2285 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2286 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2287 }
2288
2289 // -------------------------------------------------------------------------
2290 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292 TYPE old_value, new_value; \
2293 (void)new_value; \
2294 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2296 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2297 if (flag) { \
2298 return old_value OP rhs; \
2299 } else \
2300 return old_value; \
2301 }
2302 // -------------------------------------------------------------------------
2303
2304 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2305 0) // __kmpc_atomic_fixed4_add_cpt
2306 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2307 0) // __kmpc_atomic_fixed4_sub_cpt
2308 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2309 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2310 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2311 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2312
2313 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2314 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2315 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2316 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2317 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2318 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2319 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2320 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2321
2322 // ------------------------------------------------------------------------
2323 // Entries definition for integer operands
2324 // TYPE_ID - operands type and size (fixed4, float4)
2325 // OP_ID - operation identifier (add, sub, mul, ...)
2326 // TYPE - operand type
2327 // BITS - size in bits, used to distinguish low level calls
2328 // OP - operator (used in critical section)
2329 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2330 // ------------------------------------------------------------------------
2331 // Routines for ATOMIC integer operands, other operators
2332 // ------------------------------------------------------------------------
2333 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2334 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2335 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2337 0) // __kmpc_atomic_fixed1_andb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2339 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2341 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2343 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2345 0) // __kmpc_atomic_fixed1_orb_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2347 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2355 0) // __kmpc_atomic_fixed1_xor_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2357 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2359 0) // __kmpc_atomic_fixed2_andb_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2361 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2363 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2365 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2367 0) // __kmpc_atomic_fixed2_orb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2369 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2377 0) // __kmpc_atomic_fixed2_xor_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2379 0) // __kmpc_atomic_fixed4_andb_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2381 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2383 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2385 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2387 0) // __kmpc_atomic_fixed4_orb_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2389 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2395 0) // __kmpc_atomic_fixed4_xor_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2397 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2399 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2401 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2403 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2405 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2414 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2415 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2416 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2417 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2418 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2419 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2420 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2421 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2422 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2423
2424 // CAPTURE routines for mixed types RHS=float16
2425 #if KMP_HAVE_QUAD
2426
2427 // Beginning of a definition (provides name, parameters, gebug trace)
2428 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2429 // fixed)
2430 // OP_ID - operation identifier (add, sub, mul, ...)
2431 // TYPE - operands' type
2432 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2433 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2434 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2435 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2436 KA_TRACE(100, \
2437 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2438 gtid));
2439
2440 // -------------------------------------------------------------------------
2441 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2442 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2443 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2444 TYPE new_value; \
2445 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2446 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2447 }
2448
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2451 LCK_ID, GOMP_FLAG) \
2452 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453 TYPE new_value; \
2454 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2455 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2456 }
2457
2458 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2459 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2460 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2461 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2463 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2465 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2467 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2474
2475 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2476 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2478 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2480 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2482 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2484 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2491
2492 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2493 0) // __kmpc_atomic_fixed4_add_cpt_fp
2494 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2495 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2497 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2499 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2501 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2503 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2505 0) // __kmpc_atomic_fixed4_div_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2507 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2508
2509 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2510 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2512 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2514 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2516 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2518 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2525
2526 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2527 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2528 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2529 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2531 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2533 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2534
2535 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2536 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2537 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2538 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2540 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2542 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2543
2544 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2545 1) // __kmpc_atomic_float10_add_cpt_fp
2546 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2547 1) // __kmpc_atomic_float10_sub_cpt_fp
2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2549 1) // __kmpc_atomic_float10_mul_cpt_fp
2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2551 1) // __kmpc_atomic_float10_div_cpt_fp
2552
2553 #endif // KMP_HAVE_QUAD
2554
2555 // ------------------------------------------------------------------------
2556 // Routines for C/C++ Reduction operators && and ||
2557
2558 // -------------------------------------------------------------------------
2559 // Operation on *lhs, rhs bound by critical section
2560 // OP - operator (it's supposed to contain an assignment)
2561 // LCK_ID - lock identifier
2562 // Note: don't check gtid as it should always be valid
2563 // 1, 2-byte - expect valid parameter, other - check before this macro
2564 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2565 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2566 \
2567 if (flag) { \
2568 new_value OP rhs; \
2569 (*lhs) = new_value; \
2570 } else { \
2571 new_value = (*lhs); \
2572 (*lhs) OP rhs; \
2573 } \
2574 \
2575 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2576
2577 // ------------------------------------------------------------------------
2578 #ifdef KMP_GOMP_COMPAT
2579 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2580 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2581 KMP_CHECK_GTID; \
2582 OP_CRITICAL_L_CPT(OP, 0); \
2583 return new_value; \
2584 }
2585 #else
2586 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2587 #endif /* KMP_GOMP_COMPAT */
2588
2589 // ------------------------------------------------------------------------
2590 // Need separate macros for &&, || because there is no combined assignment
2591 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2592 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2593 TYPE new_value; \
2594 (void)new_value; \
2595 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2596 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2597 }
2598
2599 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2600 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2601 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2602 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2603 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2604 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2605 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2606 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2607 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2608 0) // __kmpc_atomic_fixed4_andl_cpt
2609 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2610 0) // __kmpc_atomic_fixed4_orl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2612 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2614 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2615
2616 // -------------------------------------------------------------------------
2617 // Routines for Fortran operators that matched no one in C:
2618 // MAX, MIN, .EQV., .NEQV.
2619 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2620 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2621
2622 // -------------------------------------------------------------------------
2623 // MIN and MAX need separate macros
2624 // OP - operator to check if we need any actions?
2625 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2626 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2627 \
2628 if (*lhs OP rhs) { /* still need actions? */ \
2629 old_value = *lhs; \
2630 *lhs = rhs; \
2631 if (flag) \
2632 new_value = rhs; \
2633 else \
2634 new_value = old_value; \
2635 } else { \
2636 new_value = *lhs; \
2637 } \
2638 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2639 return new_value;
2640
2641 // -------------------------------------------------------------------------
2642 #ifdef KMP_GOMP_COMPAT
2643 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2644 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2645 KMP_CHECK_GTID; \
2646 MIN_MAX_CRITSECT_CPT(OP, 0); \
2647 }
2648 #else
2649 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2650 #endif /* KMP_GOMP_COMPAT */
2651
2652 // -------------------------------------------------------------------------
2653 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2654 { \
2655 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2656 /*TYPE old_value; */ \
2657 temp_val = *lhs; \
2658 old_value = temp_val; \
2659 while (old_value OP rhs && /* still need actions? */ \
2660 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2661 (kmp_int##BITS *)lhs, \
2662 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2663 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2664 temp_val = *lhs; \
2665 old_value = temp_val; \
2666 } \
2667 if (flag) \
2668 return rhs; \
2669 else \
2670 return old_value; \
2671 }
2672
2673 // -------------------------------------------------------------------------
2674 // 1-byte, 2-byte operands - use critical section
2675 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2676 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2677 TYPE new_value, old_value; \
2678 if (*lhs OP rhs) { /* need actions? */ \
2679 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2680 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2681 } \
2682 return *lhs; \
2683 }
2684
2685 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2686 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2687 TYPE new_value, old_value; \
2688 (void)new_value; \
2689 if (*lhs OP rhs) { \
2690 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2692 } \
2693 return *lhs; \
2694 }
2695
2696 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2697 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2698 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2699 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2700 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2701 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2702 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2703 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2704 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2705 0) // __kmpc_atomic_fixed4_max_cpt
2706 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2707 0) // __kmpc_atomic_fixed4_min_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2709 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2711 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2713 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2715 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2717 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2719 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2720 #if KMP_HAVE_QUAD
2721 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2722 1) // __kmpc_atomic_float16_max_cpt
2723 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2724 1) // __kmpc_atomic_float16_min_cpt
2725 #if (KMP_ARCH_X86)
2726 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2727 1) // __kmpc_atomic_float16_max_a16_cpt
2728 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2729 1) // __kmpc_atomic_float16_mix_a16_cpt
2730 #endif // (KMP_ARCH_X86)
2731 #endif // KMP_HAVE_QUAD
2732
2733 // ------------------------------------------------------------------------
2734 #ifdef KMP_GOMP_COMPAT
2735 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2736 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2737 KMP_CHECK_GTID; \
2738 OP_CRITICAL_CPT(OP, 0); \
2739 }
2740 #else
2741 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2742 #endif /* KMP_GOMP_COMPAT */
2743 // ------------------------------------------------------------------------
2744 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2745 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2746 TYPE new_value; \
2747 (void)new_value; \
2748 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2749 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2750 }
2751
2752 // ------------------------------------------------------------------------
2753
2754 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2755 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2756 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2757 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2758 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2759 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2760 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2761 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2762 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2764 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2765 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2766 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2767 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2768 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2769 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2770
2771 // ------------------------------------------------------------------------
2772 // Routines for Extended types: long double, _Quad, complex flavours (use
2773 // critical section)
2774 // TYPE_ID, OP_ID, TYPE - detailed above
2775 // OP - operator
2776 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2777 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2778 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2779 TYPE new_value; \
2780 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2781 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2782 }
2783
2784 // ------------------------------------------------------------------------
2785 // Workaround for cmplx4. Regular routines with return value don't work
2786 // on Win_32e. Let's return captured values through the additional parameter.
2787 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2788 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2789 \
2790 if (flag) { \
2791 (*lhs) OP rhs; \
2792 (*out) = (*lhs); \
2793 } else { \
2794 (*out) = (*lhs); \
2795 (*lhs) OP rhs; \
2796 } \
2797 \
2798 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2799 return;
2800 // ------------------------------------------------------------------------
2801
2802 #ifdef KMP_GOMP_COMPAT
2803 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2804 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2805 KMP_CHECK_GTID; \
2806 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2807 }
2808 #else
2809 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2810 #endif /* KMP_GOMP_COMPAT */
2811 // ------------------------------------------------------------------------
2812
2813 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2814 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2815 TYPE rhs, TYPE *out, int flag) { \
2816 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2817 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2818 // ------------------------------------------------------------------------
2819
2820 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2821 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2822 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2823 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2824 }
2825 // The end of workaround for cmplx4
2826
2827 /* ------------------------------------------------------------------------- */
2828 // routines for long double type
2829 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2830 1) // __kmpc_atomic_float10_add_cpt
2831 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2832 1) // __kmpc_atomic_float10_sub_cpt
2833 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2834 1) // __kmpc_atomic_float10_mul_cpt
2835 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2836 1) // __kmpc_atomic_float10_div_cpt
2837 #if KMP_HAVE_QUAD
2838 // routines for _Quad type
2839 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2840 1) // __kmpc_atomic_float16_add_cpt
2841 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2842 1) // __kmpc_atomic_float16_sub_cpt
2843 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2844 1) // __kmpc_atomic_float16_mul_cpt
2845 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2846 1) // __kmpc_atomic_float16_div_cpt
2847 #if (KMP_ARCH_X86)
2848 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2849 1) // __kmpc_atomic_float16_add_a16_cpt
2850 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2851 1) // __kmpc_atomic_float16_sub_a16_cpt
2852 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2853 1) // __kmpc_atomic_float16_mul_a16_cpt
2854 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2855 1) // __kmpc_atomic_float16_div_a16_cpt
2856 #endif // (KMP_ARCH_X86)
2857 #endif // KMP_HAVE_QUAD
2858
2859 // routines for complex types
2860
2861 // cmplx4 routines to return void
2862 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2863 1) // __kmpc_atomic_cmplx4_add_cpt
2864 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2865 1) // __kmpc_atomic_cmplx4_sub_cpt
2866 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2867 1) // __kmpc_atomic_cmplx4_mul_cpt
2868 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2869 1) // __kmpc_atomic_cmplx4_div_cpt
2870
2871 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2872 1) // __kmpc_atomic_cmplx8_add_cpt
2873 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2874 1) // __kmpc_atomic_cmplx8_sub_cpt
2875 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2876 1) // __kmpc_atomic_cmplx8_mul_cpt
2877 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2878 1) // __kmpc_atomic_cmplx8_div_cpt
2879 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2880 1) // __kmpc_atomic_cmplx10_add_cpt
2881 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2882 1) // __kmpc_atomic_cmplx10_sub_cpt
2883 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2884 1) // __kmpc_atomic_cmplx10_mul_cpt
2885 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2886 1) // __kmpc_atomic_cmplx10_div_cpt
2887 #if KMP_HAVE_QUAD
2888 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2889 1) // __kmpc_atomic_cmplx16_add_cpt
2890 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2891 1) // __kmpc_atomic_cmplx16_sub_cpt
2892 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2893 1) // __kmpc_atomic_cmplx16_mul_cpt
2894 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2895 1) // __kmpc_atomic_cmplx16_div_cpt
2896 #if (KMP_ARCH_X86)
2897 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2898 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2900 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2901 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2902 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2903 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2904 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2905 #endif // (KMP_ARCH_X86)
2906 #endif // KMP_HAVE_QUAD
2907
2908 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2909 // binop x; v = x; } for non-commutative operations.
2910 // Supported only on IA-32 architecture and Intel(R) 64
2911
2912 // -------------------------------------------------------------------------
2913 // Operation on *lhs, rhs bound by critical section
2914 // OP - operator (it's supposed to contain an assignment)
2915 // LCK_ID - lock identifier
2916 // Note: don't check gtid as it should always be valid
2917 // 1, 2-byte - expect valid parameter, other - check before this macro
2918 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2919 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2920 \
2921 if (flag) { \
2922 /*temp_val = (*lhs);*/ \
2923 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2924 new_value = (*lhs); \
2925 } else { \
2926 new_value = (*lhs); \
2927 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2928 } \
2929 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2930 return new_value;
2931
2932 // ------------------------------------------------------------------------
2933 #ifdef KMP_GOMP_COMPAT
2934 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2935 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2936 KMP_CHECK_GTID; \
2937 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2938 }
2939 #else
2940 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2941 #endif /* KMP_GOMP_COMPAT */
2942
2943 // ------------------------------------------------------------------------
2944 // Operation on *lhs, rhs using "compare_and_store" routine
2945 // TYPE - operands' type
2946 // BITS - size in bits, used to distinguish low level calls
2947 // OP - operator
2948 // Note: temp_val introduced in order to force the compiler to read
2949 // *lhs only once (w/o it the compiler reads *lhs twice)
2950 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2951 { \
2952 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2953 TYPE old_value, new_value; \
2954 temp_val = *lhs; \
2955 old_value = temp_val; \
2956 new_value = (TYPE)(rhs OP old_value); \
2957 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2958 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2959 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2960 temp_val = *lhs; \
2961 old_value = temp_val; \
2962 new_value = (TYPE)(rhs OP old_value); \
2963 } \
2964 if (flag) { \
2965 return new_value; \
2966 } else \
2967 return old_value; \
2968 }
2969
2970 // -------------------------------------------------------------------------
2971 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2972 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2973 TYPE new_value; \
2974 (void)new_value; \
2975 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2976 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2977 }
2978
2979 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2980 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2981 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2982 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2983 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2985 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2986 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2987 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2988 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2990 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2992 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2994 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2996 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2998 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3000 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3002 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3004 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3006 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3008 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3010 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3012 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3014 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3016 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3018 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3020 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3022 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3024 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3026 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3028 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3030 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3032 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3034 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3035 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3036
3037 // ------------------------------------------------------------------------
3038 // Routines for Extended types: long double, _Quad, complex flavours (use
3039 // critical section)
3040 // TYPE_ID, OP_ID, TYPE - detailed above
3041 // OP - operator
3042 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3043 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3044 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3045 TYPE new_value; \
3046 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3047 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3048 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3049 }
3050
3051 /* ------------------------------------------------------------------------- */
3052 // routines for long double type
3053 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3054 1) // __kmpc_atomic_float10_sub_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3056 1) // __kmpc_atomic_float10_div_cpt_rev
3057 #if KMP_HAVE_QUAD
3058 // routines for _Quad type
3059 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3060 1) // __kmpc_atomic_float16_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3062 1) // __kmpc_atomic_float16_div_cpt_rev
3063 #if (KMP_ARCH_X86)
3064 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3065 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3067 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3068 #endif // (KMP_ARCH_X86)
3069 #endif // KMP_HAVE_QUAD
3070
3071 // routines for complex types
3072
3073 // ------------------------------------------------------------------------
3074 // Workaround for cmplx4. Regular routines with return value don't work
3075 // on Win_32e. Let's return captured values through the additional parameter.
3076 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3077 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3078 \
3079 if (flag) { \
3080 (*lhs) = (rhs)OP(*lhs); \
3081 (*out) = (*lhs); \
3082 } else { \
3083 (*out) = (*lhs); \
3084 (*lhs) = (rhs)OP(*lhs); \
3085 } \
3086 \
3087 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3088 return;
3089 // ------------------------------------------------------------------------
3090
3091 #ifdef KMP_GOMP_COMPAT
3092 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3093 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3094 KMP_CHECK_GTID; \
3095 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3096 }
3097 #else
3098 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3099 #endif /* KMP_GOMP_COMPAT */
3100 // ------------------------------------------------------------------------
3101
3102 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3103 GOMP_FLAG) \
3104 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3105 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3106 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3107 }
3108 // The end of workaround for cmplx4
3109
3110 // !!! TODO: check if we need to return void for cmplx4 routines
3111 // cmplx4 routines to return void
3112 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3113 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3114 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3115 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3116
3117 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3118 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3119 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3120 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3121 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3122 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3123 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3124 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3125 #if KMP_HAVE_QUAD
3126 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3127 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3128 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3129 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3130 #if (KMP_ARCH_X86)
3131 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3132 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3133 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3134 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3135 #endif // (KMP_ARCH_X86)
3136 #endif // KMP_HAVE_QUAD
3137
3138 // Capture reverse for mixed type: RHS=float16
3139 #if KMP_HAVE_QUAD
3140
3141 // Beginning of a definition (provides name, parameters, gebug trace)
3142 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3143 // fixed)
3144 // OP_ID - operation identifier (add, sub, mul, ...)
3145 // TYPE - operands' type
3146 // -------------------------------------------------------------------------
3147 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3148 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3149 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3150 TYPE new_value; \
3151 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3152 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3153 }
3154
3155 // -------------------------------------------------------------------------
3156 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3157 LCK_ID, GOMP_FLAG) \
3158 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3159 TYPE new_value; \
3160 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3161 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3162 }
3163
3164 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3167 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3169 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3170 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3171 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3172
3173 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3174 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3176 1,
3177 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3179 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3181 1,
3182 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3183
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3185 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3187 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3189 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3190 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3191 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3192
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3194 7,
3195 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3197 8i, 7,
3198 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3199 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3200 7,
3201 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3203 8i, 7,
3204 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3205
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3207 4r, 3,
3208 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3209 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3210 4r, 3,
3211 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3212
3213 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3214 8r, 7,
3215 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3216 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3217 8r, 7,
3218 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3219
3220 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3221 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3222 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3223 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3224
3225 #endif // KMP_HAVE_QUAD
3226
3227 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3228
3229 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3230 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3231 TYPE rhs) { \
3232 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3233 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3234
3235 #define CRITICAL_SWP(LCK_ID) \
3236 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3237 \
3238 old_value = (*lhs); \
3239 (*lhs) = rhs; \
3240 \
3241 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3242 return old_value;
3243
3244 // ------------------------------------------------------------------------
3245 #ifdef KMP_GOMP_COMPAT
3246 #define GOMP_CRITICAL_SWP(FLAG) \
3247 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3248 KMP_CHECK_GTID; \
3249 CRITICAL_SWP(0); \
3250 }
3251 #else
3252 #define GOMP_CRITICAL_SWP(FLAG)
3253 #endif /* KMP_GOMP_COMPAT */
3254
3255 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3256 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3257 TYPE old_value; \
3258 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3259 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3260 return old_value; \
3261 }
3262 // ------------------------------------------------------------------------
3263 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3264 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3265 TYPE old_value; \
3266 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3267 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3268 return old_value; \
3269 }
3270
3271 // ------------------------------------------------------------------------
3272 #define CMPXCHG_SWP(TYPE, BITS) \
3273 { \
3274 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3275 TYPE old_value, new_value; \
3276 temp_val = *lhs; \
3277 old_value = temp_val; \
3278 new_value = rhs; \
3279 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3280 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3281 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3282 temp_val = *lhs; \
3283 old_value = temp_val; \
3284 new_value = rhs; \
3285 } \
3286 return old_value; \
3287 }
3288
3289 // -------------------------------------------------------------------------
3290 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3291 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3292 TYPE old_value; \
3293 (void)old_value; \
3294 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3295 CMPXCHG_SWP(TYPE, BITS) \
3296 }
3297
3298 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3299 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3300 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3301
3302 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3303 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3304
3305 #if (KMP_ARCH_X86)
3306 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3308 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3309 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3310 #else
3311 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3312 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3313 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3314 #endif // (KMP_ARCH_X86)
3315
3316 // ------------------------------------------------------------------------
3317 // Routines for Extended types: long double, _Quad, complex flavours (use
3318 // critical section)
3319 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3320 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3321 TYPE old_value; \
3322 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3323 CRITICAL_SWP(LCK_ID) \
3324 }
3325
3326 // ------------------------------------------------------------------------
3327 // !!! TODO: check if we need to return void for cmplx4 routines
3328 // Workaround for cmplx4. Regular routines with return value don't work
3329 // on Win_32e. Let's return captured values through the additional parameter.
3330
3331 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3332 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3333 TYPE rhs, TYPE *out) { \
3334 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3335 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3336
3337 #define CRITICAL_SWP_WRK(LCK_ID) \
3338 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3339 \
3340 tmp = (*lhs); \
3341 (*lhs) = (rhs); \
3342 (*out) = tmp; \
3343 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3344 return;
3345 // ------------------------------------------------------------------------
3346
3347 #ifdef KMP_GOMP_COMPAT
3348 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3349 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3350 KMP_CHECK_GTID; \
3351 CRITICAL_SWP_WRK(0); \
3352 }
3353 #else
3354 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3355 #endif /* KMP_GOMP_COMPAT */
3356 // ------------------------------------------------------------------------
3357
3358 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3359 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3360 TYPE tmp; \
3361 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3362 CRITICAL_SWP_WRK(LCK_ID) \
3363 }
3364 // The end of workaround for cmplx4
3365
3366 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3367 #if KMP_HAVE_QUAD
3368 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3369 #endif // KMP_HAVE_QUAD
3370 // cmplx4 routine to return void
3371 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3372
3373 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3374 // __kmpc_atomic_cmplx4_swp
3375
3376 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3377 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3378 #if KMP_HAVE_QUAD
3379 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3380 #if (KMP_ARCH_X86)
3381 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3382 1) // __kmpc_atomic_float16_a16_swp
3383 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3384 1) // __kmpc_atomic_cmplx16_a16_swp
3385 #endif // (KMP_ARCH_X86)
3386 #endif // KMP_HAVE_QUAD
3387
3388 // End of OpenMP 4.0 Capture
3389
3390 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3391
3392 #undef OP_CRITICAL
3393
3394 /* ------------------------------------------------------------------------ */
3395 /* Generic atomic routines */
3396
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3397 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3398 void (*f)(void *, void *, void *)) {
3399 KMP_DEBUG_ASSERT(__kmp_init_serial);
3400
3401 if (
3402 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3403 FALSE /* must use lock */
3404 #else
3405 TRUE
3406 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3407 ) {
3408 kmp_int8 old_value, new_value;
3409
3410 old_value = *(kmp_int8 *)lhs;
3411 (*f)(&new_value, &old_value, rhs);
3412
3413 /* TODO: Should this be acquire or release? */
3414 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3415 *(kmp_int8 *)&new_value)) {
3416 KMP_CPU_PAUSE();
3417
3418 old_value = *(kmp_int8 *)lhs;
3419 (*f)(&new_value, &old_value, rhs);
3420 }
3421
3422 return;
3423 } else {
3424 // All 1-byte data is of integer data type.
3425
3426 #ifdef KMP_GOMP_COMPAT
3427 if (__kmp_atomic_mode == 2) {
3428 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3429 } else
3430 #endif /* KMP_GOMP_COMPAT */
3431 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3432
3433 (*f)(lhs, lhs, rhs);
3434
3435 #ifdef KMP_GOMP_COMPAT
3436 if (__kmp_atomic_mode == 2) {
3437 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3438 } else
3439 #endif /* KMP_GOMP_COMPAT */
3440 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3441 }
3442 }
3443
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3444 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3445 void (*f)(void *, void *, void *)) {
3446 if (
3447 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3448 FALSE /* must use lock */
3449 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3450 TRUE /* no alignment problems */
3451 #else
3452 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3453 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3454 ) {
3455 kmp_int16 old_value, new_value;
3456
3457 old_value = *(kmp_int16 *)lhs;
3458 (*f)(&new_value, &old_value, rhs);
3459
3460 /* TODO: Should this be acquire or release? */
3461 while (!KMP_COMPARE_AND_STORE_ACQ16(
3462 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3463 KMP_CPU_PAUSE();
3464
3465 old_value = *(kmp_int16 *)lhs;
3466 (*f)(&new_value, &old_value, rhs);
3467 }
3468
3469 return;
3470 } else {
3471 // All 2-byte data is of integer data type.
3472
3473 #ifdef KMP_GOMP_COMPAT
3474 if (__kmp_atomic_mode == 2) {
3475 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3476 } else
3477 #endif /* KMP_GOMP_COMPAT */
3478 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3479
3480 (*f)(lhs, lhs, rhs);
3481
3482 #ifdef KMP_GOMP_COMPAT
3483 if (__kmp_atomic_mode == 2) {
3484 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3485 } else
3486 #endif /* KMP_GOMP_COMPAT */
3487 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3488 }
3489 }
3490
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3491 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3492 void (*f)(void *, void *, void *)) {
3493 KMP_DEBUG_ASSERT(__kmp_init_serial);
3494
3495 if (
3496 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3497 // Gomp compatibility is broken if this routine is called for floats.
3498 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3499 TRUE /* no alignment problems */
3500 #else
3501 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3502 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3503 ) {
3504 kmp_int32 old_value, new_value;
3505
3506 old_value = *(kmp_int32 *)lhs;
3507 (*f)(&new_value, &old_value, rhs);
3508
3509 /* TODO: Should this be acquire or release? */
3510 while (!KMP_COMPARE_AND_STORE_ACQ32(
3511 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3512 KMP_CPU_PAUSE();
3513
3514 old_value = *(kmp_int32 *)lhs;
3515 (*f)(&new_value, &old_value, rhs);
3516 }
3517
3518 return;
3519 } else {
3520 // Use __kmp_atomic_lock_4i for all 4-byte data,
3521 // even if it isn't of integer data type.
3522
3523 #ifdef KMP_GOMP_COMPAT
3524 if (__kmp_atomic_mode == 2) {
3525 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3526 } else
3527 #endif /* KMP_GOMP_COMPAT */
3528 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3529
3530 (*f)(lhs, lhs, rhs);
3531
3532 #ifdef KMP_GOMP_COMPAT
3533 if (__kmp_atomic_mode == 2) {
3534 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3535 } else
3536 #endif /* KMP_GOMP_COMPAT */
3537 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3538 }
3539 }
3540
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3541 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542 void (*f)(void *, void *, void *)) {
3543 KMP_DEBUG_ASSERT(__kmp_init_serial);
3544 if (
3545
3546 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3547 FALSE /* must use lock */
3548 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3549 TRUE /* no alignment problems */
3550 #else
3551 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3552 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3553 ) {
3554 kmp_int64 old_value, new_value;
3555
3556 old_value = *(kmp_int64 *)lhs;
3557 (*f)(&new_value, &old_value, rhs);
3558 /* TODO: Should this be acquire or release? */
3559 while (!KMP_COMPARE_AND_STORE_ACQ64(
3560 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3561 KMP_CPU_PAUSE();
3562
3563 old_value = *(kmp_int64 *)lhs;
3564 (*f)(&new_value, &old_value, rhs);
3565 }
3566
3567 return;
3568 } else {
3569 // Use __kmp_atomic_lock_8i for all 8-byte data,
3570 // even if it isn't of integer data type.
3571
3572 #ifdef KMP_GOMP_COMPAT
3573 if (__kmp_atomic_mode == 2) {
3574 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3575 } else
3576 #endif /* KMP_GOMP_COMPAT */
3577 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3578
3579 (*f)(lhs, lhs, rhs);
3580
3581 #ifdef KMP_GOMP_COMPAT
3582 if (__kmp_atomic_mode == 2) {
3583 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3584 } else
3585 #endif /* KMP_GOMP_COMPAT */
3586 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3587 }
3588 }
3589
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3590 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591 void (*f)(void *, void *, void *)) {
3592 KMP_DEBUG_ASSERT(__kmp_init_serial);
3593
3594 #ifdef KMP_GOMP_COMPAT
3595 if (__kmp_atomic_mode == 2) {
3596 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597 } else
3598 #endif /* KMP_GOMP_COMPAT */
3599 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3600
3601 (*f)(lhs, lhs, rhs);
3602
3603 #ifdef KMP_GOMP_COMPAT
3604 if (__kmp_atomic_mode == 2) {
3605 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606 } else
3607 #endif /* KMP_GOMP_COMPAT */
3608 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3609 }
3610
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3611 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3612 void (*f)(void *, void *, void *)) {
3613 KMP_DEBUG_ASSERT(__kmp_init_serial);
3614
3615 #ifdef KMP_GOMP_COMPAT
3616 if (__kmp_atomic_mode == 2) {
3617 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618 } else
3619 #endif /* KMP_GOMP_COMPAT */
3620 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3621
3622 (*f)(lhs, lhs, rhs);
3623
3624 #ifdef KMP_GOMP_COMPAT
3625 if (__kmp_atomic_mode == 2) {
3626 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3627 } else
3628 #endif /* KMP_GOMP_COMPAT */
3629 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3630 }
3631
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3632 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3633 void (*f)(void *, void *, void *)) {
3634 KMP_DEBUG_ASSERT(__kmp_init_serial);
3635
3636 #ifdef KMP_GOMP_COMPAT
3637 if (__kmp_atomic_mode == 2) {
3638 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3639 } else
3640 #endif /* KMP_GOMP_COMPAT */
3641 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3642
3643 (*f)(lhs, lhs, rhs);
3644
3645 #ifdef KMP_GOMP_COMPAT
3646 if (__kmp_atomic_mode == 2) {
3647 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3648 } else
3649 #endif /* KMP_GOMP_COMPAT */
3650 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3651 }
3652
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3653 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3654 void (*f)(void *, void *, void *)) {
3655 KMP_DEBUG_ASSERT(__kmp_init_serial);
3656
3657 #ifdef KMP_GOMP_COMPAT
3658 if (__kmp_atomic_mode == 2) {
3659 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3660 } else
3661 #endif /* KMP_GOMP_COMPAT */
3662 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3663
3664 (*f)(lhs, lhs, rhs);
3665
3666 #ifdef KMP_GOMP_COMPAT
3667 if (__kmp_atomic_mode == 2) {
3668 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3669 } else
3670 #endif /* KMP_GOMP_COMPAT */
3671 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3672 }
3673
3674 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3675 // compiler; duplicated in order to not use 3-party names in pure Intel code
3676 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3677 void __kmpc_atomic_start(void) {
3678 int gtid = __kmp_entry_gtid();
3679 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3680 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3681 }
3682
__kmpc_atomic_end(void)3683 void __kmpc_atomic_end(void) {
3684 int gtid = __kmp_get_gtid();
3685 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3686 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3687 }
3688
3689 /*!
3690 @}
3691 */
3692
3693 // end of file
3694