1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
operator +(Quad_a4_t & lhs,Quad_a4_t & rhs)609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
operator -(Quad_a4_t & lhs,Quad_a4_t & rhs)612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
operator *(Quad_a4_t & lhs,Quad_a4_t & rhs)615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
operator /(Quad_a4_t & lhs,Quad_a4_t & rhs)618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
operator +(Quad_a16_t & lhs,Quad_a16_t & rhs)628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
operator -(Quad_a16_t & lhs,Quad_a16_t & rhs)631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
operator *(Quad_a16_t & lhs,Quad_a16_t & rhs)634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
operator /(Quad_a16_t & lhs,Quad_a16_t & rhs)637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
operator +(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
operator -(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
operator *(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
operator /(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
operator +(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
operator -(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
operator *(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
operator /(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs));                                        \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP((TYPE)rhs));                               \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP((TYPE)rhs));                             \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
836 
837 // ------------------------------------------------------------------------
838 // X86 or X86_64: no alignment problems ====================================
839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
840                          GOMP_FLAG)                                            \
841   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
842   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
843   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
844   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
845   }
846 // -------------------------------------------------------------------------
847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
848                        GOMP_FLAG)                                              \
849   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
850   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
851   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
852   }
853 #if USE_CMPXCHG_FIX
854 // -------------------------------------------------------------------------
855 // workaround for C78287 (complex(kind=4) data type)
856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
857                                   MASK, GOMP_FLAG)                             \
858   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
859   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
860   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
861   }
862 // end of the second part of the workaround for C78287
863 #endif // USE_CMPXCHG_FIX
864 
865 #else
866 // -------------------------------------------------------------------------
867 // Code for other architectures that don't handle unaligned accesses.
868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
869                          GOMP_FLAG)                                            \
870   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
871   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
872   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
873     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
874     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
875   } else {                                                                     \
876     KMP_CHECK_GTID;                                                            \
877     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
878                        LCK_ID) /* unaligned address - use critical */          \
879   }                                                                            \
880   }
881 // -------------------------------------------------------------------------
882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
883                        GOMP_FLAG)                                              \
884   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
885   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
886   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
887     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
888   } else {                                                                     \
889     KMP_CHECK_GTID;                                                            \
890     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
891                        LCK_ID) /* unaligned address - use critical */          \
892   }                                                                            \
893   }
894 #if USE_CMPXCHG_FIX
895 // -------------------------------------------------------------------------
896 // workaround for C78287 (complex(kind=4) data type)
897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
898                                   MASK, GOMP_FLAG)                             \
899   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
900   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
901   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
902     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
903   } else {                                                                     \
904     KMP_CHECK_GTID;                                                            \
905     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
906                        LCK_ID) /* unaligned address - use critical */          \
907   }                                                                            \
908   }
909 // end of the second part of the workaround for C78287
910 #endif // USE_CMPXCHG_FIX
911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
912 
913 // Routines for ATOMIC 4-byte operands addition and subtraction
914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
915                  0) // __kmpc_atomic_fixed4_add
916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
917                  0) // __kmpc_atomic_fixed4_sub
918 
919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
920                KMP_ARCH_X86) // __kmpc_atomic_float4_add
921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
922                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
923 
924 // Routines for ATOMIC 8-byte operands addition and subtraction
925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
926                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
928                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
929 
930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
931                KMP_ARCH_X86) // __kmpc_atomic_float8_add
932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
933                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
934 
935 // ------------------------------------------------------------------------
936 // Entries definition for integer operands
937 //     TYPE_ID - operands type and size (fixed4, float4)
938 //     OP_ID   - operation identifier (add, sub, mul, ...)
939 //     TYPE    - operand type
940 //     BITS    - size in bits, used to distinguish low level calls
941 //     OP      - operator (used in critical section)
942 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
943 //     MASK    - used for alignment check
944 
945 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
946 // ------------------------------------------------------------------------
947 // Routines for ATOMIC integer operands, other operators
948 // ------------------------------------------------------------------------
949 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
951                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
953                0) // __kmpc_atomic_fixed1_andb
954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
955                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
957                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
959                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
961                0) // __kmpc_atomic_fixed1_orb
962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
963                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
965                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
967                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
969                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
971                0) // __kmpc_atomic_fixed1_xor
972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
973                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
975                0) // __kmpc_atomic_fixed2_andb
976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
977                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
979                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
981                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
983                0) // __kmpc_atomic_fixed2_orb
984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
985                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
987                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
989                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
991                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
993                0) // __kmpc_atomic_fixed2_xor
994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
995                0) // __kmpc_atomic_fixed4_andb
996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
997                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
999                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1001                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1003                0) // __kmpc_atomic_fixed4_orb
1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1005                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1007                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1009                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1011                0) // __kmpc_atomic_fixed4_xor
1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1013                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1015                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1017                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1019                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1021                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1023                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1025                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1027                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1029                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1031                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1033                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1035                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1037                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1038 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1039 
1040 /* ------------------------------------------------------------------------ */
1041 /* Routines for C/C++ Reduction operators && and ||                         */
1042 
1043 // ------------------------------------------------------------------------
1044 // Need separate macros for &&, || because there is no combined assignment
1045 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1047   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1048   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1049   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1050   }
1051 
1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1053 
1054 // ------------------------------------------------------------------------
1055 // X86 or X86_64: no alignment problems ===================================
1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1057   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1058   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1059   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1060   }
1061 
1062 #else
1063 // ------------------------------------------------------------------------
1064 // Code for other architectures that don't handle unaligned accesses.
1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1066   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1067   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1068   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1069     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1070   } else {                                                                     \
1071     KMP_CHECK_GTID;                                                            \
1072     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1073   }                                                                            \
1074   }
1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1076 
1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1078               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1080               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1082               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1084               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1086               0) // __kmpc_atomic_fixed4_andl
1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1088               0) // __kmpc_atomic_fixed4_orl
1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1090               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1092               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1093 
1094 /* ------------------------------------------------------------------------- */
1095 /* Routines for Fortran operators that matched no one in C:                  */
1096 /* MAX, MIN, .EQV., .NEQV.                                                   */
1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1099 
1100 // -------------------------------------------------------------------------
1101 // MIN and MAX need separate macros
1102 // OP - operator to check if we need any actions?
1103 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1104   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1105                                                                                \
1106   if (*lhs OP rhs) { /* still need actions? */                                 \
1107     *lhs = rhs;                                                                \
1108   }                                                                            \
1109   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1110 
1111 // -------------------------------------------------------------------------
1112 #ifdef KMP_GOMP_COMPAT
1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1114   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1115     KMP_CHECK_GTID;                                                            \
1116     MIN_MAX_CRITSECT(OP, 0);                                                   \
1117     return;                                                                    \
1118   }
1119 #else
1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1121 #endif /* KMP_GOMP_COMPAT */
1122 
1123 // -------------------------------------------------------------------------
1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1125   {                                                                            \
1126     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1127     TYPE old_value;                                                            \
1128     temp_val = *lhs;                                                           \
1129     old_value = temp_val;                                                      \
1130     while (old_value OP rhs && /* still need actions? */                       \
1131            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1132                (kmp_int##BITS *)lhs,                                           \
1133                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1134                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1135       KMP_CPU_PAUSE();                                                         \
1136       temp_val = *lhs;                                                         \
1137       old_value = temp_val;                                                    \
1138     }                                                                          \
1139   }
1140 
1141 // -------------------------------------------------------------------------
1142 // 1-byte, 2-byte operands - use critical section
1143 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1144   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1145   if (*lhs OP rhs) { /* need actions? */                                       \
1146     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1147     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1148   }                                                                            \
1149   }
1150 
1151 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1152 
1153 // -------------------------------------------------------------------------
1154 // X86 or X86_64: no alignment problems ====================================
1155 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1156                          GOMP_FLAG)                                            \
1157   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1158   if (*lhs OP rhs) {                                                           \
1159     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1160     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1161   }                                                                            \
1162   }
1163 
1164 #else
1165 // -------------------------------------------------------------------------
1166 // Code for other architectures that don't handle unaligned accesses.
1167 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1168                          GOMP_FLAG)                                            \
1169   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1170   if (*lhs OP rhs) {                                                           \
1171     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1172     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1173       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1174     } else {                                                                   \
1175       KMP_CHECK_GTID;                                                          \
1176       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1177     }                                                                          \
1178   }                                                                            \
1179   }
1180 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1181 
1182 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1183                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1184 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1185                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1186 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1187                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1188 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1189                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1190 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1191                  0) // __kmpc_atomic_fixed4_max
1192 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1193                  0) // __kmpc_atomic_fixed4_min
1194 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1195                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1196 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1197                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1198 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1199                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1200 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1201                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1202 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1203                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1204 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1205                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1206 #if KMP_HAVE_QUAD
1207 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1208                  1) // __kmpc_atomic_float16_max
1209 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1210                  1) // __kmpc_atomic_float16_min
1211 #if (KMP_ARCH_X86)
1212 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1213                  1) // __kmpc_atomic_float16_max_a16
1214 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1215                  1) // __kmpc_atomic_float16_min_a16
1216 #endif // (KMP_ARCH_X86)
1217 #endif // KMP_HAVE_QUAD
1218 // ------------------------------------------------------------------------
1219 // Need separate macros for .EQV. because of the need of complement (~)
1220 // OP ignored for critical sections, ^=~ used instead
1221 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1222   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1223   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1224   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1225   }
1226 
1227 // ------------------------------------------------------------------------
1228 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1229 // ------------------------------------------------------------------------
1230 // X86 or X86_64: no alignment problems ===================================
1231 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1232                         GOMP_FLAG)                                             \
1233   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1234   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1235   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1236   }
1237 // ------------------------------------------------------------------------
1238 #else
1239 // ------------------------------------------------------------------------
1240 // Code for other architectures that don't handle unaligned accesses.
1241 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1242                         GOMP_FLAG)                                             \
1243   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1244   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1245   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1246     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1247   } else {                                                                     \
1248     KMP_CHECK_GTID;                                                            \
1249     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1250   }                                                                            \
1251   }
1252 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1253 
1254 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1255                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1256 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1257                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1258 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1259                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1260 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1261                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1262 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1263                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1264 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1265                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1266 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1267                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1268 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1269                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1270 
1271 // ------------------------------------------------------------------------
1272 // Routines for Extended types: long double, _Quad, complex flavours (use
1273 // critical section)
1274 //     TYPE_ID, OP_ID, TYPE - detailed above
1275 //     OP      - operator
1276 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1277 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1278   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1279   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1280   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1281   }
1282 
1283 /* ------------------------------------------------------------------------- */
1284 // routines for long double type
1285 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1286                 1) // __kmpc_atomic_float10_add
1287 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1288                 1) // __kmpc_atomic_float10_sub
1289 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1290                 1) // __kmpc_atomic_float10_mul
1291 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1292                 1) // __kmpc_atomic_float10_div
1293 #if KMP_HAVE_QUAD
1294 // routines for _Quad type
1295 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1296                 1) // __kmpc_atomic_float16_add
1297 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1298                 1) // __kmpc_atomic_float16_sub
1299 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1300                 1) // __kmpc_atomic_float16_mul
1301 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1302                 1) // __kmpc_atomic_float16_div
1303 #if (KMP_ARCH_X86)
1304 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1305                 1) // __kmpc_atomic_float16_add_a16
1306 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1307                 1) // __kmpc_atomic_float16_sub_a16
1308 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1309                 1) // __kmpc_atomic_float16_mul_a16
1310 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1311                 1) // __kmpc_atomic_float16_div_a16
1312 #endif // (KMP_ARCH_X86)
1313 #endif // KMP_HAVE_QUAD
1314 // routines for complex types
1315 
1316 #if USE_CMPXCHG_FIX
1317 // workaround for C78287 (complex(kind=4) data type)
1318 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1319                           1) // __kmpc_atomic_cmplx4_add
1320 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1321                           1) // __kmpc_atomic_cmplx4_sub
1322 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1323                           1) // __kmpc_atomic_cmplx4_mul
1324 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1325                           1) // __kmpc_atomic_cmplx4_div
1326 // end of the workaround for C78287
1327 #else
1328 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1329 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1330 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1331 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1332 #endif // USE_CMPXCHG_FIX
1333 
1334 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1335 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1336 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1337 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1338 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1339                 1) // __kmpc_atomic_cmplx10_add
1340 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1341                 1) // __kmpc_atomic_cmplx10_sub
1342 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1343                 1) // __kmpc_atomic_cmplx10_mul
1344 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1345                 1) // __kmpc_atomic_cmplx10_div
1346 #if KMP_HAVE_QUAD
1347 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1348                 1) // __kmpc_atomic_cmplx16_add
1349 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1350                 1) // __kmpc_atomic_cmplx16_sub
1351 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1352                 1) // __kmpc_atomic_cmplx16_mul
1353 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1354                 1) // __kmpc_atomic_cmplx16_div
1355 #if (KMP_ARCH_X86)
1356 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1357                 1) // __kmpc_atomic_cmplx16_add_a16
1358 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1359                 1) // __kmpc_atomic_cmplx16_sub_a16
1360 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1361                 1) // __kmpc_atomic_cmplx16_mul_a16
1362 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1363                 1) // __kmpc_atomic_cmplx16_div_a16
1364 #endif // (KMP_ARCH_X86)
1365 #endif // KMP_HAVE_QUAD
1366 
1367 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1368 // Supported only on IA-32 architecture and Intel(R) 64
1369 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1370 
1371 // ------------------------------------------------------------------------
1372 // Operation on *lhs, rhs bound by critical section
1373 //     OP     - operator (it's supposed to contain an assignment)
1374 //     LCK_ID - lock identifier
1375 // Note: don't check gtid as it should always be valid
1376 // 1, 2-byte - expect valid parameter, other - check before this macro
1377 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1378   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1379                                                                                \
1380   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1381                                                                                \
1382   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1383 
1384 #ifdef KMP_GOMP_COMPAT
1385 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1386   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1387     KMP_CHECK_GTID;                                                            \
1388     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1389     return;                                                                    \
1390   }
1391 
1392 #else
1393 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1394 #endif /* KMP_GOMP_COMPAT */
1395 
1396 // Beginning of a definition (provides name, parameters, gebug trace)
1397 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1398 //     fixed)
1399 //     OP_ID   - operation identifier (add, sub, mul, ...)
1400 //     TYPE    - operands' type
1401 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1402   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1403                                                    TYPE *lhs, TYPE rhs) {      \
1404     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1405     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1406 
1407 // ------------------------------------------------------------------------
1408 // Operation on *lhs, rhs using "compare_and_store" routine
1409 //     TYPE    - operands' type
1410 //     BITS    - size in bits, used to distinguish low level calls
1411 //     OP      - operator
1412 // Note: temp_val introduced in order to force the compiler to read
1413 //       *lhs only once (w/o it the compiler reads *lhs twice)
1414 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1415   {                                                                            \
1416     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1417     TYPE old_value, new_value;                                                 \
1418     temp_val = *lhs;                                                           \
1419     old_value = temp_val;                                                      \
1420     new_value = (TYPE)(rhs OP old_value);                                      \
1421     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1422         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1423         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1424       KMP_DO_PAUSE;                                                            \
1425                                                                                \
1426       temp_val = *lhs;                                                         \
1427       old_value = temp_val;                                                    \
1428       new_value = (TYPE)(rhs OP old_value);                                    \
1429     }                                                                          \
1430   }
1431 
1432 // -------------------------------------------------------------------------
1433 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1434   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1435   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1436   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1437   }
1438 
1439 // ------------------------------------------------------------------------
1440 // Entries definition for integer operands
1441 //     TYPE_ID - operands type and size (fixed4, float4)
1442 //     OP_ID   - operation identifier (add, sub, mul, ...)
1443 //     TYPE    - operand type
1444 //     BITS    - size in bits, used to distinguish low level calls
1445 //     OP      - operator (used in critical section)
1446 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1447 
1448 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1449 // ------------------------------------------------------------------------
1450 // Routines for ATOMIC integer operands, other operators
1451 // ------------------------------------------------------------------------
1452 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1453 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1454                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1455 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1456                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1457 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1458                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1459 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1460                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1461 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1462                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1463 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1464                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1465 
1466 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1467                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1468 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1469                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1470 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1471                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1472 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1473                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1474 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1475                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1476 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1477                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1478 
1479 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1480                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1481 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1482                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1483 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1484                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1485 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1486                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1487 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1488                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1489 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1490                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1491 
1492 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1493                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1494 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1495                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1496 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1497                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1498 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1499                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1500 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1501                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1502 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1503                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1504 
1505 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1506                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1507 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1508                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1509 
1510 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1511                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1512 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1513                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1514 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1515 
1516 // ------------------------------------------------------------------------
1517 // Routines for Extended types: long double, _Quad, complex flavours (use
1518 // critical section)
1519 //     TYPE_ID, OP_ID, TYPE - detailed above
1520 //     OP      - operator
1521 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1522 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1523   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1524   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1525   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1526   }
1527 
1528 /* ------------------------------------------------------------------------- */
1529 // routines for long double type
1530 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1531                     1) // __kmpc_atomic_float10_sub_rev
1532 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1533                     1) // __kmpc_atomic_float10_div_rev
1534 #if KMP_HAVE_QUAD
1535 // routines for _Quad type
1536 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1537                     1) // __kmpc_atomic_float16_sub_rev
1538 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1539                     1) // __kmpc_atomic_float16_div_rev
1540 #if (KMP_ARCH_X86)
1541 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1542                     1) // __kmpc_atomic_float16_sub_a16_rev
1543 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1544                     1) // __kmpc_atomic_float16_div_a16_rev
1545 #endif // KMP_ARCH_X86
1546 #endif // KMP_HAVE_QUAD
1547 
1548 // routines for complex types
1549 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1550                     1) // __kmpc_atomic_cmplx4_sub_rev
1551 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1552                     1) // __kmpc_atomic_cmplx4_div_rev
1553 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1554                     1) // __kmpc_atomic_cmplx8_sub_rev
1555 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1556                     1) // __kmpc_atomic_cmplx8_div_rev
1557 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1558                     1) // __kmpc_atomic_cmplx10_sub_rev
1559 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1560                     1) // __kmpc_atomic_cmplx10_div_rev
1561 #if KMP_HAVE_QUAD
1562 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1563                     1) // __kmpc_atomic_cmplx16_sub_rev
1564 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1565                     1) // __kmpc_atomic_cmplx16_div_rev
1566 #if (KMP_ARCH_X86)
1567 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1568                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1569 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1570                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1571 #endif // KMP_ARCH_X86
1572 #endif // KMP_HAVE_QUAD
1573 
1574 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1575 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1576 
1577 /* ------------------------------------------------------------------------ */
1578 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1579 /* Note: in order to reduce the total number of types combinations          */
1580 /*       it is supposed that compiler converts RHS to longest floating type,*/
1581 /*       that is _Quad, before call to any of these routines                */
1582 /* Conversion to _Quad will be done by the compiler during calculation,     */
1583 /*    conversion back to TYPE - before the assignment, like:                */
1584 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1585 /* Performance penalty expected because of SW emulation use                 */
1586 /* ------------------------------------------------------------------------ */
1587 
1588 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1589   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1590       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1591     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1592     KA_TRACE(100,                                                              \
1593              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1594               gtid));
1595 
1596 // -------------------------------------------------------------------------
1597 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1598                            GOMP_FLAG)                                          \
1599   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1600   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1601   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1602   }
1603 
1604 // -------------------------------------------------------------------------
1605 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1606 // -------------------------------------------------------------------------
1607 // X86 or X86_64: no alignment problems ====================================
1608 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1609                            LCK_ID, MASK, GOMP_FLAG)                            \
1610   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1611   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1612   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1613   }
1614 // -------------------------------------------------------------------------
1615 #else
1616 // ------------------------------------------------------------------------
1617 // Code for other architectures that don't handle unaligned accesses.
1618 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1619                            LCK_ID, MASK, GOMP_FLAG)                            \
1620   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1621   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1622   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1623     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1624   } else {                                                                     \
1625     KMP_CHECK_GTID;                                                            \
1626     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1627                        LCK_ID) /* unaligned address - use critical */          \
1628   }                                                                            \
1629   }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631 
1632 // -------------------------------------------------------------------------
1633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1634 // -------------------------------------------------------------------------
1635 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1636                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1637   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1638   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1639   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1640   }
1641 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1642                                LCK_ID, GOMP_FLAG)                              \
1643   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1644   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1645   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1646   }
1647 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1648 
1649 // RHS=float8
1650 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1651                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1652 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1653                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1654 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1655                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1656 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1657                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1658 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1659                    0) // __kmpc_atomic_fixed4_mul_float8
1660 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1661                    0) // __kmpc_atomic_fixed4_div_float8
1662 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1663                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1664 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1665                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1666 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1667                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1668 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1669                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1670 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1671                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1672 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1673                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1674 
1675 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1676 // use them)
1677 #if KMP_HAVE_QUAD
1678 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1679                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1681                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1683                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1685                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1687                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1689                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1691                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1694 
1695 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1696                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1698                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1700                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1702                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1704                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1706                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1708                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1710                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1711 
1712 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1713                    0) // __kmpc_atomic_fixed4_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1715                    0) // __kmpc_atomic_fixed4u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1717                    0) // __kmpc_atomic_fixed4_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1719                    0) // __kmpc_atomic_fixed4u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1721                    0) // __kmpc_atomic_fixed4_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1723                    0) // __kmpc_atomic_fixed4u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1725                    0) // __kmpc_atomic_fixed4_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1727                    0) // __kmpc_atomic_fixed4u_div_fp
1728 
1729 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1730                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1732                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1733 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1734                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1736                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1737 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1741 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1743 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1744                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1745 
1746 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1747                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1748 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1749                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1751                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1752 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1753                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1754 
1755 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1756                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1757 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1758                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1759 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1760                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1761 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1762                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1763 
1764 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1765                    1) // __kmpc_atomic_float10_add_fp
1766 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1767                    1) // __kmpc_atomic_float10_sub_fp
1768 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1769                    1) // __kmpc_atomic_float10_mul_fp
1770 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1771                    1) // __kmpc_atomic_float10_div_fp
1772 
1773 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1774 // Reverse operations
1775 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1776                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1778                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1779 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1780                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1781 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1782                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1783 
1784 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1785                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1787                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1788 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1789                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1790 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1791                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1792 
1793 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1794                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1795 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1796                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1797 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1798                        0) // __kmpc_atomic_fixed4_div_rev_fp
1799 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1800                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1801 
1802 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1803                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1804 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1805                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1806 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1807                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1808 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1809                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1810 
1811 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1812                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1814                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1815 
1816 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1817                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1819                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1820 
1821 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1822                        1) // __kmpc_atomic_float10_sub_rev_fp
1823 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1824                        1) // __kmpc_atomic_float10_div_rev_fp
1825 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1826 
1827 #endif // KMP_HAVE_QUAD
1828 
1829 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1830 // ------------------------------------------------------------------------
1831 // X86 or X86_64: no alignment problems ====================================
1832 #if USE_CMPXCHG_FIX
1833 // workaround for C78287 (complex(kind=4) data type)
1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1835                              LCK_ID, MASK, GOMP_FLAG)                          \
1836   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1837   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1838   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1839   }
1840 // end of the second part of the workaround for C78287
1841 #else
1842 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1843                              LCK_ID, MASK, GOMP_FLAG)                          \
1844   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1845   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1846   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1847   }
1848 #endif // USE_CMPXCHG_FIX
1849 #else
1850 // ------------------------------------------------------------------------
1851 // Code for other architectures that don't handle unaligned accesses.
1852 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1853                              LCK_ID, MASK, GOMP_FLAG)                          \
1854   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1855   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1856   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1857     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1858   } else {                                                                     \
1859     KMP_CHECK_GTID;                                                            \
1860     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1861                        LCK_ID) /* unaligned address - use critical */          \
1862   }                                                                            \
1863   }
1864 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1865 
1866 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1867                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1868 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1869                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1870 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1871                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1872 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1873                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1874 
1875 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1876 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1877 
1878 // ------------------------------------------------------------------------
1879 // Atomic READ routines
1880 
1881 // ------------------------------------------------------------------------
1882 // Beginning of a definition (provides name, parameters, gebug trace)
1883 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1884 //     fixed)
1885 //     OP_ID   - operation identifier (add, sub, mul, ...)
1886 //     TYPE    - operands' type
1887 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1888   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1889                                              TYPE *loc) {                      \
1890     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1891     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1892 
1893 // ------------------------------------------------------------------------
1894 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1895 //     TYPE    - operands' type
1896 //     BITS    - size in bits, used to distinguish low level calls
1897 //     OP      - operator
1898 // Note: temp_val introduced in order to force the compiler to read
1899 //       *lhs only once (w/o it the compiler reads *lhs twice)
1900 // TODO: check if it is still necessary
1901 // Return old value regardless of the result of "compare & swap# operation
1902 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1903   {                                                                            \
1904     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1905     union f_i_union {                                                          \
1906       TYPE f_val;                                                              \
1907       kmp_int##BITS i_val;                                                     \
1908     };                                                                         \
1909     union f_i_union old_value;                                                 \
1910     temp_val = *loc;                                                           \
1911     old_value.f_val = temp_val;                                                \
1912     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1913         (kmp_int##BITS *)loc,                                                  \
1914         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1915         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1916     new_value = old_value.f_val;                                               \
1917     return new_value;                                                          \
1918   }
1919 
1920 // -------------------------------------------------------------------------
1921 // Operation on *lhs, rhs bound by critical section
1922 //     OP     - operator (it's supposed to contain an assignment)
1923 //     LCK_ID - lock identifier
1924 // Note: don't check gtid as it should always be valid
1925 // 1, 2-byte - expect valid parameter, other - check before this macro
1926 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1927   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1928                                                                                \
1929   new_value = (*loc);                                                          \
1930                                                                                \
1931   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1932 
1933 // -------------------------------------------------------------------------
1934 #ifdef KMP_GOMP_COMPAT
1935 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1936   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1937     KMP_CHECK_GTID;                                                            \
1938     OP_CRITICAL_READ(OP, 0);                                                   \
1939     return new_value;                                                          \
1940   }
1941 #else
1942 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1943 #endif /* KMP_GOMP_COMPAT */
1944 
1945 // -------------------------------------------------------------------------
1946 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1947   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1948   TYPE new_value;                                                              \
1949   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1950   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1951   return new_value;                                                            \
1952   }
1953 // -------------------------------------------------------------------------
1954 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1955   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1956   TYPE new_value;                                                              \
1957   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1958   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1959   }
1960 // ------------------------------------------------------------------------
1961 // Routines for Extended types: long double, _Quad, complex flavours (use
1962 // critical section)
1963 //     TYPE_ID, OP_ID, TYPE - detailed above
1964 //     OP      - operator
1965 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1966 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1967   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1968   TYPE new_value;                                                              \
1969   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1970   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1971   return new_value;                                                            \
1972   }
1973 
1974 // ------------------------------------------------------------------------
1975 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1976 // value doesn't work.
1977 // Let's return the read value through the additional parameter.
1978 #if (KMP_OS_WINDOWS)
1979 
1980 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1981   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1982                                                                                \
1983   (*out) = (*loc);                                                             \
1984                                                                                \
1985   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1986 // ------------------------------------------------------------------------
1987 #ifdef KMP_GOMP_COMPAT
1988 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1989   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1990     KMP_CHECK_GTID;                                                            \
1991     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1992   }
1993 #else
1994 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1995 #endif /* KMP_GOMP_COMPAT */
1996 // ------------------------------------------------------------------------
1997 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1998   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1999                                          TYPE *loc) {                          \
2000     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2001     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2002 
2003 // ------------------------------------------------------------------------
2004 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2005   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2006   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2007   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2008   }
2009 
2010 #endif // KMP_OS_WINDOWS
2011 
2012 // ------------------------------------------------------------------------
2013 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2014 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2015 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2016                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2017 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2018                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2019 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2020                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2021 
2022 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2023 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2024                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2025 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2026                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2027 
2028 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2029                      1) // __kmpc_atomic_float10_rd
2030 #if KMP_HAVE_QUAD
2031 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2032                      1) // __kmpc_atomic_float16_rd
2033 #endif // KMP_HAVE_QUAD
2034 
2035 // Fix for CQ220361 on Windows* OS
2036 #if (KMP_OS_WINDOWS)
2037 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2038                          1) // __kmpc_atomic_cmplx4_rd
2039 #else
2040 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2041                      1) // __kmpc_atomic_cmplx4_rd
2042 #endif // (KMP_OS_WINDOWS)
2043 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2044                      1) // __kmpc_atomic_cmplx8_rd
2045 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2046                      1) // __kmpc_atomic_cmplx10_rd
2047 #if KMP_HAVE_QUAD
2048 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2049                      1) // __kmpc_atomic_cmplx16_rd
2050 #if (KMP_ARCH_X86)
2051 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2052                      1) // __kmpc_atomic_float16_a16_rd
2053 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2054                      1) // __kmpc_atomic_cmplx16_a16_rd
2055 #endif // (KMP_ARCH_X86)
2056 #endif // KMP_HAVE_QUAD
2057 
2058 // ------------------------------------------------------------------------
2059 // Atomic WRITE routines
2060 
2061 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2062   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2063   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2064   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2065   }
2066 // ------------------------------------------------------------------------
2067 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2068   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2069   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2070   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2071   }
2072 
2073 // ------------------------------------------------------------------------
2074 // Operation on *lhs, rhs using "compare_and_store" routine
2075 //     TYPE    - operands' type
2076 //     BITS    - size in bits, used to distinguish low level calls
2077 //     OP      - operator
2078 // Note: temp_val introduced in order to force the compiler to read
2079 //       *lhs only once (w/o it the compiler reads *lhs twice)
2080 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2081   {                                                                            \
2082     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2083     TYPE old_value, new_value;                                                 \
2084     temp_val = *lhs;                                                           \
2085     old_value = temp_val;                                                      \
2086     new_value = rhs;                                                           \
2087     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2088         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2089         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2090       KMP_CPU_PAUSE();                                                         \
2091                                                                                \
2092       temp_val = *lhs;                                                         \
2093       old_value = temp_val;                                                    \
2094       new_value = rhs;                                                         \
2095     }                                                                          \
2096   }
2097 
2098 // -------------------------------------------------------------------------
2099 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2100   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2101   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2102   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2103   }
2104 
2105 // ------------------------------------------------------------------------
2106 // Routines for Extended types: long double, _Quad, complex flavours (use
2107 // critical section)
2108 //     TYPE_ID, OP_ID, TYPE - detailed above
2109 //     OP      - operator
2110 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2111 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2112   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2113   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2114   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2115   }
2116 // -------------------------------------------------------------------------
2117 
2118 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2119                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2120 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2121                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2122 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2123                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2124 #if (KMP_ARCH_X86)
2125 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2126                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2127 #else
2128 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2129                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2130 #endif // (KMP_ARCH_X86)
2131 
2132 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2133                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2134 #if (KMP_ARCH_X86)
2135 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2136                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2137 #else
2138 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2139                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2140 #endif // (KMP_ARCH_X86)
2141 
2142 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2143                    1) // __kmpc_atomic_float10_wr
2144 #if KMP_HAVE_QUAD
2145 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2146                    1) // __kmpc_atomic_float16_wr
2147 #endif // KMP_HAVE_QUAD
2148 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2149 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2150                    1) // __kmpc_atomic_cmplx8_wr
2151 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2152                    1) // __kmpc_atomic_cmplx10_wr
2153 #if KMP_HAVE_QUAD
2154 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2155                    1) // __kmpc_atomic_cmplx16_wr
2156 #if (KMP_ARCH_X86)
2157 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2158                    1) // __kmpc_atomic_float16_a16_wr
2159 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2160                    1) // __kmpc_atomic_cmplx16_a16_wr
2161 #endif // (KMP_ARCH_X86)
2162 #endif // KMP_HAVE_QUAD
2163 
2164 // ------------------------------------------------------------------------
2165 // Atomic CAPTURE routines
2166 
2167 // Beginning of a definition (provides name, parameters, gebug trace)
2168 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2169 //     fixed)
2170 //     OP_ID   - operation identifier (add, sub, mul, ...)
2171 //     TYPE    - operands' type
2172 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2173   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2174                                              TYPE *lhs, TYPE rhs, int flag) {  \
2175     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2176     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2177 
2178 // -------------------------------------------------------------------------
2179 // Operation on *lhs, rhs bound by critical section
2180 //     OP     - operator (it's supposed to contain an assignment)
2181 //     LCK_ID - lock identifier
2182 // Note: don't check gtid as it should always be valid
2183 // 1, 2-byte - expect valid parameter, other - check before this macro
2184 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2185   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2186                                                                                \
2187   if (flag) {                                                                  \
2188     (*lhs) OP rhs;                                                             \
2189     new_value = (*lhs);                                                        \
2190   } else {                                                                     \
2191     new_value = (*lhs);                                                        \
2192     (*lhs) OP rhs;                                                             \
2193   }                                                                            \
2194                                                                                \
2195   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2196   return new_value;
2197 
2198 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2199   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2200                                                                                \
2201   if (flag) {                                                                  \
2202     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2203     new_value = (*lhs);                                                        \
2204   } else {                                                                     \
2205     new_value = (*lhs);                                                        \
2206     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2207   }                                                                            \
2208                                                                                \
2209   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2210   return new_value;
2211 
2212 // ------------------------------------------------------------------------
2213 #ifdef KMP_GOMP_COMPAT
2214 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2215   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2216     KMP_CHECK_GTID;                                                            \
2217     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2218   }
2219 #else
2220 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2221 #endif /* KMP_GOMP_COMPAT */
2222 
2223 // ------------------------------------------------------------------------
2224 // Operation on *lhs, rhs using "compare_and_store" routine
2225 //     TYPE    - operands' type
2226 //     BITS    - size in bits, used to distinguish low level calls
2227 //     OP      - operator
2228 // Note: temp_val introduced in order to force the compiler to read
2229 //       *lhs only once (w/o it the compiler reads *lhs twice)
2230 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2231   {                                                                            \
2232     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2233     TYPE old_value, new_value;                                                 \
2234     temp_val = *lhs;                                                           \
2235     old_value = temp_val;                                                      \
2236     new_value = (TYPE)(old_value OP rhs);                                      \
2237     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2238         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2239         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2240       KMP_CPU_PAUSE();                                                         \
2241                                                                                \
2242       temp_val = *lhs;                                                         \
2243       old_value = temp_val;                                                    \
2244       new_value = (TYPE)(old_value OP rhs);                                    \
2245     }                                                                          \
2246     if (flag) {                                                                \
2247       return new_value;                                                        \
2248     } else                                                                     \
2249       return old_value;                                                        \
2250   }
2251 
2252 // -------------------------------------------------------------------------
2253 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2254   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2255   TYPE new_value;                                                              \
2256   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2257   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2258   }
2259 
2260 // -------------------------------------------------------------------------
2261 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2262   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2263   TYPE old_value, new_value;                                                   \
2264   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2265   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2266   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2267   if (flag) {                                                                  \
2268     return old_value OP rhs;                                                   \
2269   } else                                                                       \
2270     return old_value;                                                          \
2271   }
2272 // -------------------------------------------------------------------------
2273 
2274 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2275                      0) // __kmpc_atomic_fixed4_add_cpt
2276 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2277                      0) // __kmpc_atomic_fixed4_sub_cpt
2278 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2279                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2280 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2281                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2282 
2283 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2284                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2285 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2286                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2288                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2289 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2290                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2291 
2292 // ------------------------------------------------------------------------
2293 // Entries definition for integer operands
2294 //     TYPE_ID - operands type and size (fixed4, float4)
2295 //     OP_ID   - operation identifier (add, sub, mul, ...)
2296 //     TYPE    - operand type
2297 //     BITS    - size in bits, used to distinguish low level calls
2298 //     OP      - operator (used in critical section)
2299 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2300 // ------------------------------------------------------------------------
2301 // Routines for ATOMIC integer operands, other operators
2302 // ------------------------------------------------------------------------
2303 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2304 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2305                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2307                    0) // __kmpc_atomic_fixed1_andb_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2309                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2311                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2313                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2315                    0) // __kmpc_atomic_fixed1_orb_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2317                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2319                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2321                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2323                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2325                    0) // __kmpc_atomic_fixed1_xor_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2327                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2329                    0) // __kmpc_atomic_fixed2_andb_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2331                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2333                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2335                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2337                    0) // __kmpc_atomic_fixed2_orb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2339                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2341                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2343                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2347                    0) // __kmpc_atomic_fixed2_xor_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2349                    0) // __kmpc_atomic_fixed4_andb_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2355                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2357                    0) // __kmpc_atomic_fixed4_orb_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2359                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2361                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2363                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2365                    0) // __kmpc_atomic_fixed4_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2367                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2369                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2371                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2373                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2375                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2377                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2379                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2381                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2383                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2384 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2385                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2386 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2387                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2388 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2389                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2390 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2391                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2392 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2393 
2394 // CAPTURE routines for mixed types RHS=float16
2395 #if KMP_HAVE_QUAD
2396 
2397 // Beginning of a definition (provides name, parameters, gebug trace)
2398 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2399 //     fixed)
2400 //     OP_ID   - operation identifier (add, sub, mul, ...)
2401 //     TYPE    - operands' type
2402 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2403   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2404       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2405     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2406     KA_TRACE(100,                                                              \
2407              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2408               gtid));
2409 
2410 // -------------------------------------------------------------------------
2411 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2412                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2413   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2414   TYPE new_value;                                                              \
2415   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2416   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2417   }
2418 
2419 // -------------------------------------------------------------------------
2420 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2421                                 LCK_ID, GOMP_FLAG)                             \
2422   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2423   TYPE new_value;                                                              \
2424   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2425   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2426   }
2427 
2428 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2429                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2431                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2433                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2435                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2437                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2439                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2441                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2443                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2444 
2445 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2446                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2448                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2450                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2452                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2454                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2456                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2458                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2460                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2461 
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2463                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2465                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2467                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2469                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2471                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2473                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2475                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2477                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2478 
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2480                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2482                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2484                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2486                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2488                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2490                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2492                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2494                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2495 
2496 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2497                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2499                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2501                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2503                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2504 
2505 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2506                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2507 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2508                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2509 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2510                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2512                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2513 
2514 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2515                         1) // __kmpc_atomic_float10_add_cpt_fp
2516 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2517                         1) // __kmpc_atomic_float10_sub_cpt_fp
2518 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2519                         1) // __kmpc_atomic_float10_mul_cpt_fp
2520 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2521                         1) // __kmpc_atomic_float10_div_cpt_fp
2522 
2523 #endif // KMP_HAVE_QUAD
2524 
2525 // ------------------------------------------------------------------------
2526 // Routines for C/C++ Reduction operators && and ||
2527 
2528 // -------------------------------------------------------------------------
2529 // Operation on *lhs, rhs bound by critical section
2530 //     OP     - operator (it's supposed to contain an assignment)
2531 //     LCK_ID - lock identifier
2532 // Note: don't check gtid as it should always be valid
2533 // 1, 2-byte - expect valid parameter, other - check before this macro
2534 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2535   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2536                                                                                \
2537   if (flag) {                                                                  \
2538     new_value OP rhs;                                                          \
2539     (*lhs) = new_value;                                                        \
2540   } else {                                                                     \
2541     new_value = (*lhs);                                                        \
2542     (*lhs) OP rhs;                                                             \
2543   }                                                                            \
2544                                                                                \
2545   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2546 
2547 // ------------------------------------------------------------------------
2548 #ifdef KMP_GOMP_COMPAT
2549 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2550   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2551     KMP_CHECK_GTID;                                                            \
2552     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2553     return new_value;                                                          \
2554   }
2555 #else
2556 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2557 #endif /* KMP_GOMP_COMPAT */
2558 
2559 // ------------------------------------------------------------------------
2560 // Need separate macros for &&, || because there is no combined assignment
2561 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2562   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2563   TYPE new_value;                                                              \
2564   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2565   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2566   }
2567 
2568 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2569                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2570 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2571                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2572 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2573                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2574 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2575                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2576 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2577                   0) // __kmpc_atomic_fixed4_andl_cpt
2578 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2579                   0) // __kmpc_atomic_fixed4_orl_cpt
2580 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2581                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2582 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2583                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2584 
2585 // -------------------------------------------------------------------------
2586 // Routines for Fortran operators that matched no one in C:
2587 // MAX, MIN, .EQV., .NEQV.
2588 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2589 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2590 
2591 // -------------------------------------------------------------------------
2592 // MIN and MAX need separate macros
2593 // OP - operator to check if we need any actions?
2594 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2595   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2596                                                                                \
2597   if (*lhs OP rhs) { /* still need actions? */                                 \
2598     old_value = *lhs;                                                          \
2599     *lhs = rhs;                                                                \
2600     if (flag)                                                                  \
2601       new_value = rhs;                                                         \
2602     else                                                                       \
2603       new_value = old_value;                                                   \
2604   } else {                                                                     \
2605     new_value = *lhs;                                                          \
2606   }                                                                            \
2607   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2608   return new_value;
2609 
2610 // -------------------------------------------------------------------------
2611 #ifdef KMP_GOMP_COMPAT
2612 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2613   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2614     KMP_CHECK_GTID;                                                            \
2615     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2616   }
2617 #else
2618 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2619 #endif /* KMP_GOMP_COMPAT */
2620 
2621 // -------------------------------------------------------------------------
2622 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2623   {                                                                            \
2624     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2625     /*TYPE old_value; */                                                       \
2626     temp_val = *lhs;                                                           \
2627     old_value = temp_val;                                                      \
2628     while (old_value OP rhs && /* still need actions? */                       \
2629            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2630                (kmp_int##BITS *)lhs,                                           \
2631                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2632                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2633       KMP_CPU_PAUSE();                                                         \
2634       temp_val = *lhs;                                                         \
2635       old_value = temp_val;                                                    \
2636     }                                                                          \
2637     if (flag)                                                                  \
2638       return rhs;                                                              \
2639     else                                                                       \
2640       return old_value;                                                        \
2641   }
2642 
2643 // -------------------------------------------------------------------------
2644 // 1-byte, 2-byte operands - use critical section
2645 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2646   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2647   TYPE new_value, old_value;                                                   \
2648   if (*lhs OP rhs) { /* need actions? */                                       \
2649     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2650     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2651   }                                                                            \
2652   return *lhs;                                                                 \
2653   }
2654 
2655 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2656   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2657   TYPE new_value, old_value;                                                   \
2658   if (*lhs OP rhs) {                                                           \
2659     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2660     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2661   }                                                                            \
2662   return *lhs;                                                                 \
2663   }
2664 
2665 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2666                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2667 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2668                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2669 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2670                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2671 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2672                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2673 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2674                      0) // __kmpc_atomic_fixed4_max_cpt
2675 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2676                      0) // __kmpc_atomic_fixed4_min_cpt
2677 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2678                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2679 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2680                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2681 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2682                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2683 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2684                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2685 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2686                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2687 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2688                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2689 #if KMP_HAVE_QUAD
2690 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2691                      1) // __kmpc_atomic_float16_max_cpt
2692 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2693                      1) // __kmpc_atomic_float16_min_cpt
2694 #if (KMP_ARCH_X86)
2695 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2696                      1) // __kmpc_atomic_float16_max_a16_cpt
2697 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2698                      1) // __kmpc_atomic_float16_mix_a16_cpt
2699 #endif // (KMP_ARCH_X86)
2700 #endif // KMP_HAVE_QUAD
2701 
2702 // ------------------------------------------------------------------------
2703 #ifdef KMP_GOMP_COMPAT
2704 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2705   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2706     KMP_CHECK_GTID;                                                            \
2707     OP_CRITICAL_CPT(OP, 0);                                                    \
2708   }
2709 #else
2710 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2711 #endif /* KMP_GOMP_COMPAT */
2712 // ------------------------------------------------------------------------
2713 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2714   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2715   TYPE new_value;                                                              \
2716   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2717   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2718   }
2719 
2720 // ------------------------------------------------------------------------
2721 
2722 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2724 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2725                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2726 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2727                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2728 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2729                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2730 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2731                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2732 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2733                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2734 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2735                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2736 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2737                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2738 
2739 // ------------------------------------------------------------------------
2740 // Routines for Extended types: long double, _Quad, complex flavours (use
2741 // critical section)
2742 //     TYPE_ID, OP_ID, TYPE - detailed above
2743 //     OP      - operator
2744 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2745 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2746   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2747   TYPE new_value;                                                              \
2748   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2749   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2750   }
2751 
2752 // ------------------------------------------------------------------------
2753 // Workaround for cmplx4. Regular routines with return value don't work
2754 // on Win_32e. Let's return captured values through the additional parameter.
2755 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2756   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2757                                                                                \
2758   if (flag) {                                                                  \
2759     (*lhs) OP rhs;                                                             \
2760     (*out) = (*lhs);                                                           \
2761   } else {                                                                     \
2762     (*out) = (*lhs);                                                           \
2763     (*lhs) OP rhs;                                                             \
2764   }                                                                            \
2765                                                                                \
2766   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2767   return;
2768 // ------------------------------------------------------------------------
2769 
2770 #ifdef KMP_GOMP_COMPAT
2771 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2772   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2773     KMP_CHECK_GTID;                                                            \
2774     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2775   }
2776 #else
2777 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2778 #endif /* KMP_GOMP_COMPAT */
2779 // ------------------------------------------------------------------------
2780 
2781 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2782   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2783                                          TYPE rhs, TYPE *out, int flag) {      \
2784     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2785     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2786 // ------------------------------------------------------------------------
2787 
2788 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2789   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2790   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2791   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2792   }
2793 // The end of workaround for cmplx4
2794 
2795 /* ------------------------------------------------------------------------- */
2796 // routines for long double type
2797 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2798                     1) // __kmpc_atomic_float10_add_cpt
2799 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2800                     1) // __kmpc_atomic_float10_sub_cpt
2801 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2802                     1) // __kmpc_atomic_float10_mul_cpt
2803 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2804                     1) // __kmpc_atomic_float10_div_cpt
2805 #if KMP_HAVE_QUAD
2806 // routines for _Quad type
2807 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2808                     1) // __kmpc_atomic_float16_add_cpt
2809 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2810                     1) // __kmpc_atomic_float16_sub_cpt
2811 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2812                     1) // __kmpc_atomic_float16_mul_cpt
2813 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2814                     1) // __kmpc_atomic_float16_div_cpt
2815 #if (KMP_ARCH_X86)
2816 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2817                     1) // __kmpc_atomic_float16_add_a16_cpt
2818 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2819                     1) // __kmpc_atomic_float16_sub_a16_cpt
2820 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2821                     1) // __kmpc_atomic_float16_mul_a16_cpt
2822 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2823                     1) // __kmpc_atomic_float16_div_a16_cpt
2824 #endif // (KMP_ARCH_X86)
2825 #endif // KMP_HAVE_QUAD
2826 
2827 // routines for complex types
2828 
2829 // cmplx4 routines to return void
2830 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2831                         1) // __kmpc_atomic_cmplx4_add_cpt
2832 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2833                         1) // __kmpc_atomic_cmplx4_sub_cpt
2834 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2835                         1) // __kmpc_atomic_cmplx4_mul_cpt
2836 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2837                         1) // __kmpc_atomic_cmplx4_div_cpt
2838 
2839 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2840                     1) // __kmpc_atomic_cmplx8_add_cpt
2841 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2842                     1) // __kmpc_atomic_cmplx8_sub_cpt
2843 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2844                     1) // __kmpc_atomic_cmplx8_mul_cpt
2845 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2846                     1) // __kmpc_atomic_cmplx8_div_cpt
2847 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2848                     1) // __kmpc_atomic_cmplx10_add_cpt
2849 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2850                     1) // __kmpc_atomic_cmplx10_sub_cpt
2851 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2852                     1) // __kmpc_atomic_cmplx10_mul_cpt
2853 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2854                     1) // __kmpc_atomic_cmplx10_div_cpt
2855 #if KMP_HAVE_QUAD
2856 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2857                     1) // __kmpc_atomic_cmplx16_add_cpt
2858 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2859                     1) // __kmpc_atomic_cmplx16_sub_cpt
2860 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2861                     1) // __kmpc_atomic_cmplx16_mul_cpt
2862 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2863                     1) // __kmpc_atomic_cmplx16_div_cpt
2864 #if (KMP_ARCH_X86)
2865 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2866                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2867 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2868                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2869 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2870                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2871 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2872                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2873 #endif // (KMP_ARCH_X86)
2874 #endif // KMP_HAVE_QUAD
2875 
2876 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2877 // binop x; v = x; }  for non-commutative operations.
2878 // Supported only on IA-32 architecture and Intel(R) 64
2879 
2880 // -------------------------------------------------------------------------
2881 // Operation on *lhs, rhs bound by critical section
2882 //     OP     - operator (it's supposed to contain an assignment)
2883 //     LCK_ID - lock identifier
2884 // Note: don't check gtid as it should always be valid
2885 // 1, 2-byte - expect valid parameter, other - check before this macro
2886 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2887   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2888                                                                                \
2889   if (flag) {                                                                  \
2890     /*temp_val = (*lhs);*/                                                     \
2891     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2892     new_value = (*lhs);                                                        \
2893   } else {                                                                     \
2894     new_value = (*lhs);                                                        \
2895     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2896   }                                                                            \
2897   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2898   return new_value;
2899 
2900 // ------------------------------------------------------------------------
2901 #ifdef KMP_GOMP_COMPAT
2902 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2903   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2904     KMP_CHECK_GTID;                                                            \
2905     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2906   }
2907 #else
2908 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2909 #endif /* KMP_GOMP_COMPAT */
2910 
2911 // ------------------------------------------------------------------------
2912 // Operation on *lhs, rhs using "compare_and_store" routine
2913 //     TYPE    - operands' type
2914 //     BITS    - size in bits, used to distinguish low level calls
2915 //     OP      - operator
2916 // Note: temp_val introduced in order to force the compiler to read
2917 //       *lhs only once (w/o it the compiler reads *lhs twice)
2918 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2919   {                                                                            \
2920     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2921     TYPE old_value, new_value;                                                 \
2922     temp_val = *lhs;                                                           \
2923     old_value = temp_val;                                                      \
2924     new_value = (TYPE)(rhs OP old_value);                                      \
2925     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2926         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2927         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2928       KMP_CPU_PAUSE();                                                         \
2929                                                                                \
2930       temp_val = *lhs;                                                         \
2931       old_value = temp_val;                                                    \
2932       new_value = (TYPE)(rhs OP old_value);                                    \
2933     }                                                                          \
2934     if (flag) {                                                                \
2935       return new_value;                                                        \
2936     } else                                                                     \
2937       return old_value;                                                        \
2938   }
2939 
2940 // -------------------------------------------------------------------------
2941 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2942   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2943   TYPE new_value;                                                              \
2944   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2945   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2946   }
2947 
2948 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2949                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2951                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2953                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2955                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2957                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2959                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2961                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2963                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2965                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2967                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2969                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2971                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2972 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2973                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2974 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2975                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2976 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2977                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2978 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2979                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2980 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2981                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2982 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2983                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2984 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2985                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2986 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2987                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2988 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2989                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2990 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2991                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2992 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2993                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2994 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2995                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2996 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2997                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2998 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2999                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3000 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3001                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3002 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3003                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3004 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3005 
3006 // ------------------------------------------------------------------------
3007 // Routines for Extended types: long double, _Quad, complex flavours (use
3008 // critical section)
3009 //     TYPE_ID, OP_ID, TYPE - detailed above
3010 //     OP      - operator
3011 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3012 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3013   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3014   TYPE new_value;                                                              \
3015   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3016   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3017   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3018   }
3019 
3020 /* ------------------------------------------------------------------------- */
3021 // routines for long double type
3022 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3023                         1) // __kmpc_atomic_float10_sub_cpt_rev
3024 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3025                         1) // __kmpc_atomic_float10_div_cpt_rev
3026 #if KMP_HAVE_QUAD
3027 // routines for _Quad type
3028 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3029                         1) // __kmpc_atomic_float16_sub_cpt_rev
3030 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3031                         1) // __kmpc_atomic_float16_div_cpt_rev
3032 #if (KMP_ARCH_X86)
3033 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3034                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3035 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3036                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3037 #endif // (KMP_ARCH_X86)
3038 #endif // KMP_HAVE_QUAD
3039 
3040 // routines for complex types
3041 
3042 // ------------------------------------------------------------------------
3043 // Workaround for cmplx4. Regular routines with return value don't work
3044 // on Win_32e. Let's return captured values through the additional parameter.
3045 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3046   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3047                                                                                \
3048   if (flag) {                                                                  \
3049     (*lhs) = (rhs)OP(*lhs);                                                    \
3050     (*out) = (*lhs);                                                           \
3051   } else {                                                                     \
3052     (*out) = (*lhs);                                                           \
3053     (*lhs) = (rhs)OP(*lhs);                                                    \
3054   }                                                                            \
3055                                                                                \
3056   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3057   return;
3058 // ------------------------------------------------------------------------
3059 
3060 #ifdef KMP_GOMP_COMPAT
3061 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3062   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3063     KMP_CHECK_GTID;                                                            \
3064     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3065   }
3066 #else
3067 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3068 #endif /* KMP_GOMP_COMPAT */
3069 // ------------------------------------------------------------------------
3070 
3071 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3072                                     GOMP_FLAG)                                 \
3073   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3074   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3075   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3076   }
3077 // The end of workaround for cmplx4
3078 
3079 // !!! TODO: check if we need to return void for cmplx4 routines
3080 // cmplx4 routines to return void
3081 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3082                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3083 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3084                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3085 
3086 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3087                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3088 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3089                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3090 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3091                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3092 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3093                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3094 #if KMP_HAVE_QUAD
3095 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3096                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3097 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3098                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3099 #if (KMP_ARCH_X86)
3100 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3101                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3102 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3103                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3104 #endif // (KMP_ARCH_X86)
3105 #endif // KMP_HAVE_QUAD
3106 
3107 // Capture reverse for mixed type: RHS=float16
3108 #if KMP_HAVE_QUAD
3109 
3110 // Beginning of a definition (provides name, parameters, gebug trace)
3111 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3112 //     fixed)
3113 //     OP_ID   - operation identifier (add, sub, mul, ...)
3114 //     TYPE    - operands' type
3115 // -------------------------------------------------------------------------
3116 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3117                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3118   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3119   TYPE new_value;                                                              \
3120   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3121   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3122   }
3123 
3124 // -------------------------------------------------------------------------
3125 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3126                                     LCK_ID, GOMP_FLAG)                         \
3127   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3128   TYPE new_value;                                                              \
3129   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3130   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3131   }
3132 
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3134                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3136                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3138                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3139 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3140                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3141 
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3143                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3145                            1,
3146                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3148                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3149 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3150                            1,
3151                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3152 
3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3154                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3156                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3157 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3158                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3159 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3160                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3161 
3162 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3163                            7,
3164                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3165 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3166                            8i, 7,
3167                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3169                            7,
3170                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3171 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3172                            8i, 7,
3173                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3174 
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3176                            4r, 3,
3177                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3179                            4r, 3,
3180                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3181 
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3183                            8r, 7,
3184                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3185 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3186                            8r, 7,
3187                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3188 
3189 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3190                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3191 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3192                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3193 
3194 #endif // KMP_HAVE_QUAD
3195 
3196 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3197 
3198 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3199   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3200                                      TYPE rhs) {                               \
3201     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3202     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3203 
3204 #define CRITICAL_SWP(LCK_ID)                                                   \
3205   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3206                                                                                \
3207   old_value = (*lhs);                                                          \
3208   (*lhs) = rhs;                                                                \
3209                                                                                \
3210   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3211   return old_value;
3212 
3213 // ------------------------------------------------------------------------
3214 #ifdef KMP_GOMP_COMPAT
3215 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3216   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3217     KMP_CHECK_GTID;                                                            \
3218     CRITICAL_SWP(0);                                                           \
3219   }
3220 #else
3221 #define GOMP_CRITICAL_SWP(FLAG)
3222 #endif /* KMP_GOMP_COMPAT */
3223 
3224 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3225   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3226   TYPE old_value;                                                              \
3227   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3228   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3229   return old_value;                                                            \
3230   }
3231 // ------------------------------------------------------------------------
3232 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3233   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3234   TYPE old_value;                                                              \
3235   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3236   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3237   return old_value;                                                            \
3238   }
3239 
3240 // ------------------------------------------------------------------------
3241 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3242   {                                                                            \
3243     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3244     TYPE old_value, new_value;                                                 \
3245     temp_val = *lhs;                                                           \
3246     old_value = temp_val;                                                      \
3247     new_value = rhs;                                                           \
3248     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3249         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3250         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3251       KMP_CPU_PAUSE();                                                         \
3252                                                                                \
3253       temp_val = *lhs;                                                         \
3254       old_value = temp_val;                                                    \
3255       new_value = rhs;                                                         \
3256     }                                                                          \
3257     return old_value;                                                          \
3258   }
3259 
3260 // -------------------------------------------------------------------------
3261 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3262   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3263   TYPE old_value;                                                              \
3264   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3265   CMPXCHG_SWP(TYPE, BITS)                                                      \
3266   }
3267 
3268 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3269 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3270 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3271 
3272 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3273                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3274 
3275 #if (KMP_ARCH_X86)
3276 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3277                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3278 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3279                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3280 #else
3281 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3282 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3283                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3284 #endif // (KMP_ARCH_X86)
3285 
3286 // ------------------------------------------------------------------------
3287 // Routines for Extended types: long double, _Quad, complex flavours (use
3288 // critical section)
3289 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3290   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3291   TYPE old_value;                                                              \
3292   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3293   CRITICAL_SWP(LCK_ID)                                                         \
3294   }
3295 
3296 // ------------------------------------------------------------------------
3297 // !!! TODO: check if we need to return void for cmplx4 routines
3298 // Workaround for cmplx4. Regular routines with return value don't work
3299 // on Win_32e. Let's return captured values through the additional parameter.
3300 
3301 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3302   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3303                                      TYPE rhs, TYPE *out) {                    \
3304     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3305     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3306 
3307 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3308   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3309                                                                                \
3310   tmp = (*lhs);                                                                \
3311   (*lhs) = (rhs);                                                              \
3312   (*out) = tmp;                                                                \
3313   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3314   return;
3315 // ------------------------------------------------------------------------
3316 
3317 #ifdef KMP_GOMP_COMPAT
3318 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3319   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3320     KMP_CHECK_GTID;                                                            \
3321     CRITICAL_SWP_WRK(0);                                                       \
3322   }
3323 #else
3324 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3325 #endif /* KMP_GOMP_COMPAT */
3326 // ------------------------------------------------------------------------
3327 
3328 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3329   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3330   TYPE tmp;                                                                    \
3331   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3332   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3333   }
3334 // The end of workaround for cmplx4
3335 
3336 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3337 #if KMP_HAVE_QUAD
3338 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3339 #endif // KMP_HAVE_QUAD
3340 // cmplx4 routine to return void
3341 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3342 
3343 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3344 // __kmpc_atomic_cmplx4_swp
3345 
3346 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3347 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3348 #if KMP_HAVE_QUAD
3349 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3350 #if (KMP_ARCH_X86)
3351 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3352                     1) // __kmpc_atomic_float16_a16_swp
3353 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3354                     1) // __kmpc_atomic_cmplx16_a16_swp
3355 #endif // (KMP_ARCH_X86)
3356 #endif // KMP_HAVE_QUAD
3357 
3358 // End of OpenMP 4.0 Capture
3359 
3360 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3361 
3362 #undef OP_CRITICAL
3363 
3364 /* ------------------------------------------------------------------------ */
3365 /* Generic atomic routines                                                  */
3366 
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3367 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3368                      void (*f)(void *, void *, void *)) {
3369   KMP_DEBUG_ASSERT(__kmp_init_serial);
3370 
3371   if (
3372 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3373       FALSE /* must use lock */
3374 #else
3375       TRUE
3376 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3377       ) {
3378     kmp_int8 old_value, new_value;
3379 
3380     old_value = *(kmp_int8 *)lhs;
3381     (*f)(&new_value, &old_value, rhs);
3382 
3383     /* TODO: Should this be acquire or release? */
3384     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3385                                        *(kmp_int8 *)&new_value)) {
3386       KMP_CPU_PAUSE();
3387 
3388       old_value = *(kmp_int8 *)lhs;
3389       (*f)(&new_value, &old_value, rhs);
3390     }
3391 
3392     return;
3393   } else {
3394 // All 1-byte data is of integer data type.
3395 
3396 #ifdef KMP_GOMP_COMPAT
3397     if (__kmp_atomic_mode == 2) {
3398       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3399     } else
3400 #endif /* KMP_GOMP_COMPAT */
3401       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3402 
3403     (*f)(lhs, lhs, rhs);
3404 
3405 #ifdef KMP_GOMP_COMPAT
3406     if (__kmp_atomic_mode == 2) {
3407       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3408     } else
3409 #endif /* KMP_GOMP_COMPAT */
3410       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3411   }
3412 }
3413 
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3414 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3415                      void (*f)(void *, void *, void *)) {
3416   if (
3417 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3418       FALSE /* must use lock */
3419 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3420       TRUE /* no alignment problems */
3421 #else
3422       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3423 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3424       ) {
3425     kmp_int16 old_value, new_value;
3426 
3427     old_value = *(kmp_int16 *)lhs;
3428     (*f)(&new_value, &old_value, rhs);
3429 
3430     /* TODO: Should this be acquire or release? */
3431     while (!KMP_COMPARE_AND_STORE_ACQ16(
3432         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3433       KMP_CPU_PAUSE();
3434 
3435       old_value = *(kmp_int16 *)lhs;
3436       (*f)(&new_value, &old_value, rhs);
3437     }
3438 
3439     return;
3440   } else {
3441 // All 2-byte data is of integer data type.
3442 
3443 #ifdef KMP_GOMP_COMPAT
3444     if (__kmp_atomic_mode == 2) {
3445       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3446     } else
3447 #endif /* KMP_GOMP_COMPAT */
3448       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3449 
3450     (*f)(lhs, lhs, rhs);
3451 
3452 #ifdef KMP_GOMP_COMPAT
3453     if (__kmp_atomic_mode == 2) {
3454       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3455     } else
3456 #endif /* KMP_GOMP_COMPAT */
3457       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3458   }
3459 }
3460 
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3461 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3462                      void (*f)(void *, void *, void *)) {
3463   KMP_DEBUG_ASSERT(__kmp_init_serial);
3464 
3465   if (
3466 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3467 // Gomp compatibility is broken if this routine is called for floats.
3468 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3469       TRUE /* no alignment problems */
3470 #else
3471       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3472 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3473       ) {
3474     kmp_int32 old_value, new_value;
3475 
3476     old_value = *(kmp_int32 *)lhs;
3477     (*f)(&new_value, &old_value, rhs);
3478 
3479     /* TODO: Should this be acquire or release? */
3480     while (!KMP_COMPARE_AND_STORE_ACQ32(
3481         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3482       KMP_CPU_PAUSE();
3483 
3484       old_value = *(kmp_int32 *)lhs;
3485       (*f)(&new_value, &old_value, rhs);
3486     }
3487 
3488     return;
3489   } else {
3490 // Use __kmp_atomic_lock_4i for all 4-byte data,
3491 // even if it isn't of integer data type.
3492 
3493 #ifdef KMP_GOMP_COMPAT
3494     if (__kmp_atomic_mode == 2) {
3495       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3496     } else
3497 #endif /* KMP_GOMP_COMPAT */
3498       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3499 
3500     (*f)(lhs, lhs, rhs);
3501 
3502 #ifdef KMP_GOMP_COMPAT
3503     if (__kmp_atomic_mode == 2) {
3504       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3505     } else
3506 #endif /* KMP_GOMP_COMPAT */
3507       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3508   }
3509 }
3510 
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3511 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3512                      void (*f)(void *, void *, void *)) {
3513   KMP_DEBUG_ASSERT(__kmp_init_serial);
3514   if (
3515 
3516 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3517       FALSE /* must use lock */
3518 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3519       TRUE /* no alignment problems */
3520 #else
3521       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3522 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3523       ) {
3524     kmp_int64 old_value, new_value;
3525 
3526     old_value = *(kmp_int64 *)lhs;
3527     (*f)(&new_value, &old_value, rhs);
3528     /* TODO: Should this be acquire or release? */
3529     while (!KMP_COMPARE_AND_STORE_ACQ64(
3530         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3531       KMP_CPU_PAUSE();
3532 
3533       old_value = *(kmp_int64 *)lhs;
3534       (*f)(&new_value, &old_value, rhs);
3535     }
3536 
3537     return;
3538   } else {
3539 // Use __kmp_atomic_lock_8i for all 8-byte data,
3540 // even if it isn't of integer data type.
3541 
3542 #ifdef KMP_GOMP_COMPAT
3543     if (__kmp_atomic_mode == 2) {
3544       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3545     } else
3546 #endif /* KMP_GOMP_COMPAT */
3547       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3548 
3549     (*f)(lhs, lhs, rhs);
3550 
3551 #ifdef KMP_GOMP_COMPAT
3552     if (__kmp_atomic_mode == 2) {
3553       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3554     } else
3555 #endif /* KMP_GOMP_COMPAT */
3556       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3557   }
3558 }
3559 
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3560 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3561                       void (*f)(void *, void *, void *)) {
3562   KMP_DEBUG_ASSERT(__kmp_init_serial);
3563 
3564 #ifdef KMP_GOMP_COMPAT
3565   if (__kmp_atomic_mode == 2) {
3566     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3567   } else
3568 #endif /* KMP_GOMP_COMPAT */
3569     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3570 
3571   (*f)(lhs, lhs, rhs);
3572 
3573 #ifdef KMP_GOMP_COMPAT
3574   if (__kmp_atomic_mode == 2) {
3575     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3576   } else
3577 #endif /* KMP_GOMP_COMPAT */
3578     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3579 }
3580 
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3581 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3582                       void (*f)(void *, void *, void *)) {
3583   KMP_DEBUG_ASSERT(__kmp_init_serial);
3584 
3585 #ifdef KMP_GOMP_COMPAT
3586   if (__kmp_atomic_mode == 2) {
3587     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3588   } else
3589 #endif /* KMP_GOMP_COMPAT */
3590     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3591 
3592   (*f)(lhs, lhs, rhs);
3593 
3594 #ifdef KMP_GOMP_COMPAT
3595   if (__kmp_atomic_mode == 2) {
3596     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3597   } else
3598 #endif /* KMP_GOMP_COMPAT */
3599     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3600 }
3601 
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3602 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3603                       void (*f)(void *, void *, void *)) {
3604   KMP_DEBUG_ASSERT(__kmp_init_serial);
3605 
3606 #ifdef KMP_GOMP_COMPAT
3607   if (__kmp_atomic_mode == 2) {
3608     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3609   } else
3610 #endif /* KMP_GOMP_COMPAT */
3611     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3612 
3613   (*f)(lhs, lhs, rhs);
3614 
3615 #ifdef KMP_GOMP_COMPAT
3616   if (__kmp_atomic_mode == 2) {
3617     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3618   } else
3619 #endif /* KMP_GOMP_COMPAT */
3620     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3621 }
3622 
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3623 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3624                       void (*f)(void *, void *, void *)) {
3625   KMP_DEBUG_ASSERT(__kmp_init_serial);
3626 
3627 #ifdef KMP_GOMP_COMPAT
3628   if (__kmp_atomic_mode == 2) {
3629     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3630   } else
3631 #endif /* KMP_GOMP_COMPAT */
3632     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3633 
3634   (*f)(lhs, lhs, rhs);
3635 
3636 #ifdef KMP_GOMP_COMPAT
3637   if (__kmp_atomic_mode == 2) {
3638     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3639   } else
3640 #endif /* KMP_GOMP_COMPAT */
3641     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3642 }
3643 
3644 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3645 // compiler; duplicated in order to not use 3-party names in pure Intel code
3646 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3647 void __kmpc_atomic_start(void) {
3648   int gtid = __kmp_entry_gtid();
3649   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3650   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3651 }
3652 
__kmpc_atomic_end(void)3653 void __kmpc_atomic_end(void) {
3654   int gtid = __kmp_get_gtid();
3655   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3656   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3657 }
3658 
3659 /*!
3660 @}
3661 */
3662 
3663 // end of file
3664