1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP rhs);                                               \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP rhs);                                      \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP rhs);                                    \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
840   {                                                                            \
841     struct _sss {                                                              \
842       TYPE cmp;                                                                \
843       kmp_int##BITS *vvv;                                                      \
844     };                                                                         \
845     struct _sss old_value, new_value;                                          \
846     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
847     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
848     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
849     new_value.cmp = old_value.cmp OP rhs;                                      \
850     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
851         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
852         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
853       KMP_DO_PAUSE;                                                            \
854                                                                                \
855       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
856       new_value.cmp = old_value.cmp OP rhs;                                    \
857     }                                                                          \
858   }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
862   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
863   (*lhs) = (*lhs)OP rhs;                                                       \
864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
873                          GOMP_FLAG)                                            \
874   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
875   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
876   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
877   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
878   }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
881                        GOMP_FLAG)                                              \
882   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
883   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
884   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
885   }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
890                                   MASK, GOMP_FLAG)                             \
891   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
892   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
893   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
894   }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
902                          GOMP_FLAG)                                            \
903   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
904   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
905   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
906     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
907     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
908   } else {                                                                     \
909     KMP_CHECK_GTID;                                                            \
910     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
911                        LCK_ID) /* unaligned address - use critical */          \
912   }                                                                            \
913   }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
916                        GOMP_FLAG)                                              \
917   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
918   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
919   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
920     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
921   } else {                                                                     \
922     KMP_CHECK_GTID;                                                            \
923     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
924                        LCK_ID) /* unaligned address - use critical */          \
925   }                                                                            \
926   }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
931                                   MASK, GOMP_FLAG)                             \
932   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
933   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
934   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
935     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
936   } else {                                                                     \
937     KMP_CHECK_GTID;                                                            \
938     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
939                        LCK_ID) /* unaligned address - use critical */          \
940   }                                                                            \
941   }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948                  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950                  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953                KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964                KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 //     TYPE_ID - operands type and size (fixed4, float4)
971 //     OP_ID   - operation identifier (add, sub, mul, ...)
972 //     TYPE    - operand type
973 //     BITS    - size in bits, used to distinguish low level calls
974 //     OP      - operator (used in critical section)
975 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
976 //     MASK    - used for alignment check
977 
978 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986                0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994                0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004                0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008                0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016                0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026                0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028                0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036                0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044                0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and ||                         */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1080   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1081   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1082   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1083   }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1091   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1092   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1093   }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1100   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1101   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1102     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1103   } else {                                                                     \
1104     KMP_CHECK_GTID;                                                            \
1105     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1106   }                                                                            \
1107   }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119               0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121               0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C:                  */
1129 /* MAX, MIN, .EQV., .NEQV.                                                   */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1137   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1138                                                                                \
1139   if (*lhs OP rhs) { /* still need actions? */                                 \
1140     *lhs = rhs;                                                                \
1141   }                                                                            \
1142   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1147   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1148     KMP_CHECK_GTID;                                                            \
1149     MIN_MAX_CRITSECT(OP, 0);                                                   \
1150     return;                                                                    \
1151   }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1158   {                                                                            \
1159     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1160     TYPE old_value;                                                            \
1161     temp_val = *lhs;                                                           \
1162     old_value = temp_val;                                                      \
1163     while (old_value OP rhs && /* still need actions? */                       \
1164            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1165                (kmp_int##BITS *)lhs,                                           \
1166                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1167                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1168       temp_val = *lhs;                                                         \
1169       old_value = temp_val;                                                    \
1170     }                                                                          \
1171   }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1176   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1177   if (*lhs OP rhs) { /* need actions? */                                       \
1178     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1179     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1180   }                                                                            \
1181   }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1188                          GOMP_FLAG)                                            \
1189   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1190   if (*lhs OP rhs) {                                                           \
1191     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1192     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1193   }                                                                            \
1194   }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1200                          GOMP_FLAG)                                            \
1201   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1202   if (*lhs OP rhs) {                                                           \
1203     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1204     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1205       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1206     } else {                                                                   \
1207       KMP_CHECK_GTID;                                                          \
1208       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1209     }                                                                          \
1210   }                                                                            \
1211   }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223                  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225                  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240                  1) // __kmpc_atomic_float10_max
1241 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242                  1) // __kmpc_atomic_float10_min
1243 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244 #if KMP_HAVE_QUAD
1245 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246                  1) // __kmpc_atomic_float16_max
1247 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248                  1) // __kmpc_atomic_float16_min
1249 #if (KMP_ARCH_X86)
1250 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251                  1) // __kmpc_atomic_float16_max_a16
1252 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253                  1) // __kmpc_atomic_float16_min_a16
1254 #endif // (KMP_ARCH_X86)
1255 #endif // KMP_HAVE_QUAD
1256 // ------------------------------------------------------------------------
1257 // Need separate macros for .EQV. because of the need of complement (~)
1258 // OP ignored for critical sections, ^=~ used instead
1259 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1260   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1261   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1262   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1263   }
1264 
1265 // ------------------------------------------------------------------------
1266 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267 // ------------------------------------------------------------------------
1268 // X86 or X86_64: no alignment problems ===================================
1269 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1270                         GOMP_FLAG)                                             \
1271   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1272   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1273   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1274   }
1275 // ------------------------------------------------------------------------
1276 #else
1277 // ------------------------------------------------------------------------
1278 // Code for other architectures that don't handle unaligned accesses.
1279 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1280                         GOMP_FLAG)                                             \
1281   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1282   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1283   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1284     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1285   } else {                                                                     \
1286     KMP_CHECK_GTID;                                                            \
1287     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1288   }                                                                            \
1289   }
1290 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291 
1292 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308 
1309 // ------------------------------------------------------------------------
1310 // Routines for Extended types: long double, _Quad, complex flavours (use
1311 // critical section)
1312 //     TYPE_ID, OP_ID, TYPE - detailed above
1313 //     OP      - operator
1314 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1315 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1316   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1317   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1318   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1319   }
1320 
1321 /* ------------------------------------------------------------------------- */
1322 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323 // routines for long double type
1324 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325                 1) // __kmpc_atomic_float10_add
1326 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327                 1) // __kmpc_atomic_float10_sub
1328 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329                 1) // __kmpc_atomic_float10_mul
1330 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331                 1) // __kmpc_atomic_float10_div
1332 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333 #if KMP_HAVE_QUAD
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336                 1) // __kmpc_atomic_float16_add
1337 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338                 1) // __kmpc_atomic_float16_sub
1339 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340                 1) // __kmpc_atomic_float16_mul
1341 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342                 1) // __kmpc_atomic_float16_div
1343 #if (KMP_ARCH_X86)
1344 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345                 1) // __kmpc_atomic_float16_add_a16
1346 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347                 1) // __kmpc_atomic_float16_sub_a16
1348 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349                 1) // __kmpc_atomic_float16_mul_a16
1350 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351                 1) // __kmpc_atomic_float16_div_a16
1352 #endif // (KMP_ARCH_X86)
1353 #endif // KMP_HAVE_QUAD
1354 // routines for complex types
1355 
1356 #if USE_CMPXCHG_FIX
1357 // workaround for C78287 (complex(kind=4) data type)
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359                           1) // __kmpc_atomic_cmplx4_add
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361                           1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363                           1) // __kmpc_atomic_cmplx4_mul
1364 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365                           1) // __kmpc_atomic_cmplx4_div
1366 // end of the workaround for C78287
1367 #else
1368 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372 #endif // USE_CMPXCHG_FIX
1373 
1374 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380                 1) // __kmpc_atomic_cmplx10_add
1381 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382                 1) // __kmpc_atomic_cmplx10_sub
1383 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384                 1) // __kmpc_atomic_cmplx10_mul
1385 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386                 1) // __kmpc_atomic_cmplx10_div
1387 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388 #if KMP_HAVE_QUAD
1389 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390                 1) // __kmpc_atomic_cmplx16_add
1391 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392                 1) // __kmpc_atomic_cmplx16_sub
1393 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394                 1) // __kmpc_atomic_cmplx16_mul
1395 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396                 1) // __kmpc_atomic_cmplx16_div
1397 #if (KMP_ARCH_X86)
1398 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399                 1) // __kmpc_atomic_cmplx16_add_a16
1400 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401                 1) // __kmpc_atomic_cmplx16_sub_a16
1402 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403                 1) // __kmpc_atomic_cmplx16_mul_a16
1404 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405                 1) // __kmpc_atomic_cmplx16_div_a16
1406 #endif // (KMP_ARCH_X86)
1407 #endif // KMP_HAVE_QUAD
1408 
1409 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1410 // Supported only on IA-32 architecture and Intel(R) 64
1411 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412 
1413 // ------------------------------------------------------------------------
1414 // Operation on *lhs, rhs bound by critical section
1415 //     OP     - operator (it's supposed to contain an assignment)
1416 //     LCK_ID - lock identifier
1417 // Note: don't check gtid as it should always be valid
1418 // 1, 2-byte - expect valid parameter, other - check before this macro
1419 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1420   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1421                                                                                \
1422   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1423                                                                                \
1424   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425 
1426 #ifdef KMP_GOMP_COMPAT
1427 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1428   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1429     KMP_CHECK_GTID;                                                            \
1430     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1431     return;                                                                    \
1432   }
1433 
1434 #else
1435 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436 #endif /* KMP_GOMP_COMPAT */
1437 
1438 // Beginning of a definition (provides name, parameters, gebug trace)
1439 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440 //     fixed)
1441 //     OP_ID   - operation identifier (add, sub, mul, ...)
1442 //     TYPE    - operands' type
1443 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1444   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1445                                                    TYPE *lhs, TYPE rhs) {      \
1446     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1447     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448 
1449 // ------------------------------------------------------------------------
1450 // Operation on *lhs, rhs using "compare_and_store" routine
1451 //     TYPE    - operands' type
1452 //     BITS    - size in bits, used to distinguish low level calls
1453 //     OP      - operator
1454 // Note: temp_val introduced in order to force the compiler to read
1455 //       *lhs only once (w/o it the compiler reads *lhs twice)
1456 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1457   {                                                                            \
1458     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1459     TYPE old_value, new_value;                                                 \
1460     temp_val = *lhs;                                                           \
1461     old_value = temp_val;                                                      \
1462     new_value = (TYPE)(rhs OP old_value);                                      \
1463     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1464         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1465         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1466       KMP_DO_PAUSE;                                                            \
1467                                                                                \
1468       temp_val = *lhs;                                                         \
1469       old_value = temp_val;                                                    \
1470       new_value = (TYPE)(rhs OP old_value);                                    \
1471     }                                                                          \
1472   }
1473 
1474 // -------------------------------------------------------------------------
1475 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1476   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1477   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1478   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1479   }
1480 
1481 // ------------------------------------------------------------------------
1482 // Entries definition for integer operands
1483 //     TYPE_ID - operands type and size (fixed4, float4)
1484 //     OP_ID   - operation identifier (add, sub, mul, ...)
1485 //     TYPE    - operand type
1486 //     BITS    - size in bits, used to distinguish low level calls
1487 //     OP      - operator (used in critical section)
1488 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1489 
1490 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1491 // ------------------------------------------------------------------------
1492 // Routines for ATOMIC integer operands, other operators
1493 // ------------------------------------------------------------------------
1494 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1495 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507 
1508 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520 
1521 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533 
1534 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546 
1547 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551 
1552 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1557 
1558 // ------------------------------------------------------------------------
1559 // Routines for Extended types: long double, _Quad, complex flavours (use
1560 // critical section)
1561 //     TYPE_ID, OP_ID, TYPE - detailed above
1562 //     OP      - operator
1563 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1564 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1565   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1566   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1567   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1568   }
1569 
1570 /* ------------------------------------------------------------------------- */
1571 // routines for long double type
1572 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573                     1) // __kmpc_atomic_float10_sub_rev
1574 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575                     1) // __kmpc_atomic_float10_div_rev
1576 #if KMP_HAVE_QUAD
1577 // routines for _Quad type
1578 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579                     1) // __kmpc_atomic_float16_sub_rev
1580 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581                     1) // __kmpc_atomic_float16_div_rev
1582 #if (KMP_ARCH_X86)
1583 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584                     1) // __kmpc_atomic_float16_sub_a16_rev
1585 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586                     1) // __kmpc_atomic_float16_div_a16_rev
1587 #endif // KMP_ARCH_X86
1588 #endif // KMP_HAVE_QUAD
1589 
1590 // routines for complex types
1591 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592                     1) // __kmpc_atomic_cmplx4_sub_rev
1593 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594                     1) // __kmpc_atomic_cmplx4_div_rev
1595 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596                     1) // __kmpc_atomic_cmplx8_sub_rev
1597 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598                     1) // __kmpc_atomic_cmplx8_div_rev
1599 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600                     1) // __kmpc_atomic_cmplx10_sub_rev
1601 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602                     1) // __kmpc_atomic_cmplx10_div_rev
1603 #if KMP_HAVE_QUAD
1604 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605                     1) // __kmpc_atomic_cmplx16_sub_rev
1606 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607                     1) // __kmpc_atomic_cmplx16_div_rev
1608 #if (KMP_ARCH_X86)
1609 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1613 #endif // KMP_ARCH_X86
1614 #endif // KMP_HAVE_QUAD
1615 
1616 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618 
1619 /* ------------------------------------------------------------------------ */
1620 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1621 /* Note: in order to reduce the total number of types combinations          */
1622 /*       it is supposed that compiler converts RHS to longest floating type,*/
1623 /*       that is _Quad, before call to any of these routines                */
1624 /* Conversion to _Quad will be done by the compiler during calculation,     */
1625 /*    conversion back to TYPE - before the assignment, like:                */
1626 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1627 /* Performance penalty expected because of SW emulation use                 */
1628 /* ------------------------------------------------------------------------ */
1629 
1630 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1631   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1632       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1633     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1634     KA_TRACE(100,                                                              \
1635              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1636               gtid));
1637 
1638 // -------------------------------------------------------------------------
1639 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1640                            GOMP_FLAG)                                          \
1641   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1642   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1643   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1644   }
1645 
1646 // -------------------------------------------------------------------------
1647 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648 // -------------------------------------------------------------------------
1649 // X86 or X86_64: no alignment problems ====================================
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1651                            LCK_ID, MASK, GOMP_FLAG)                            \
1652   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1653   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1654   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1655   }
1656 // -------------------------------------------------------------------------
1657 #else
1658 // ------------------------------------------------------------------------
1659 // Code for other architectures that don't handle unaligned accesses.
1660 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1661                            LCK_ID, MASK, GOMP_FLAG)                            \
1662   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1663   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1664   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1665     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1666   } else {                                                                     \
1667     KMP_CHECK_GTID;                                                            \
1668     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1669                        LCK_ID) /* unaligned address - use critical */          \
1670   }                                                                            \
1671   }
1672 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673 
1674 // -------------------------------------------------------------------------
1675 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676 // -------------------------------------------------------------------------
1677 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1678                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1679   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1680   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1681   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1682   }
1683 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1684                                LCK_ID, GOMP_FLAG)                              \
1685   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1686   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1687   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1688   }
1689 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690 
1691 // RHS=float8
1692 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701                    0) // __kmpc_atomic_fixed4_mul_float8
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703                    0) // __kmpc_atomic_fixed4_div_float8
1704 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716 
1717 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718 // use them)
1719 #if KMP_HAVE_QUAD
1720 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755                    0) // __kmpc_atomic_fixed4_add_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757                    0) // __kmpc_atomic_fixed4u_add_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759                    0) // __kmpc_atomic_fixed4_sub_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761                    0) // __kmpc_atomic_fixed4u_sub_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763                    0) // __kmpc_atomic_fixed4_mul_fp
1764 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765                    0) // __kmpc_atomic_fixed4u_mul_fp
1766 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767                    0) // __kmpc_atomic_fixed4_div_fp
1768 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769                    0) // __kmpc_atomic_fixed4u_div_fp
1770 
1771 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787 
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796 
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805 
1806 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808                    1) // __kmpc_atomic_float10_add_fp
1809 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810                    1) // __kmpc_atomic_float10_sub_fp
1811 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812                    1) // __kmpc_atomic_float10_mul_fp
1813 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814                    1) // __kmpc_atomic_float10_div_fp
1815 
1816 // Reverse operations
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825 
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834 
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1837 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840                        0) // __kmpc_atomic_fixed4_div_rev_fp
1841 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1843 
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852 
1853 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857 
1858 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862 
1863 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864                        1) // __kmpc_atomic_float10_sub_rev_fp
1865 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866                        1) // __kmpc_atomic_float10_div_rev_fp
1867 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868 
1869 #endif // KMP_HAVE_QUAD
1870 
1871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872 // ------------------------------------------------------------------------
1873 // X86 or X86_64: no alignment problems ====================================
1874 #if USE_CMPXCHG_FIX
1875 // workaround for C78287 (complex(kind=4) data type)
1876 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1877                              LCK_ID, MASK, GOMP_FLAG)                          \
1878   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1879   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1880   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1881   }
1882 // end of the second part of the workaround for C78287
1883 #else
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1885                              LCK_ID, MASK, GOMP_FLAG)                          \
1886   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1887   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1888   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1889   }
1890 #endif // USE_CMPXCHG_FIX
1891 #else
1892 // ------------------------------------------------------------------------
1893 // Code for other architectures that don't handle unaligned accesses.
1894 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1895                              LCK_ID, MASK, GOMP_FLAG)                          \
1896   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1897   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1898   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1899     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1900   } else {                                                                     \
1901     KMP_CHECK_GTID;                                                            \
1902     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1903                        LCK_ID) /* unaligned address - use critical */          \
1904   }                                                                            \
1905   }
1906 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907 
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916 
1917 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1918 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1919 
1920 // ------------------------------------------------------------------------
1921 // Atomic READ routines
1922 
1923 // ------------------------------------------------------------------------
1924 // Beginning of a definition (provides name, parameters, gebug trace)
1925 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1926 //     fixed)
1927 //     OP_ID   - operation identifier (add, sub, mul, ...)
1928 //     TYPE    - operands' type
1929 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1930   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1931                                              TYPE *loc) {                      \
1932     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1933     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1934 
1935 // ------------------------------------------------------------------------
1936 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1937 //     TYPE    - operands' type
1938 //     BITS    - size in bits, used to distinguish low level calls
1939 //     OP      - operator
1940 // Note: temp_val introduced in order to force the compiler to read
1941 //       *lhs only once (w/o it the compiler reads *lhs twice)
1942 // TODO: check if it is still necessary
1943 // Return old value regardless of the result of "compare & swap# operation
1944 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1945   {                                                                            \
1946     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1947     union f_i_union {                                                          \
1948       TYPE f_val;                                                              \
1949       kmp_int##BITS i_val;                                                     \
1950     };                                                                         \
1951     union f_i_union old_value;                                                 \
1952     temp_val = *loc;                                                           \
1953     old_value.f_val = temp_val;                                                \
1954     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1955         (kmp_int##BITS *)loc,                                                  \
1956         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1957         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1958     new_value = old_value.f_val;                                               \
1959     return new_value;                                                          \
1960   }
1961 
1962 // -------------------------------------------------------------------------
1963 // Operation on *lhs, rhs bound by critical section
1964 //     OP     - operator (it's supposed to contain an assignment)
1965 //     LCK_ID - lock identifier
1966 // Note: don't check gtid as it should always be valid
1967 // 1, 2-byte - expect valid parameter, other - check before this macro
1968 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1969   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1970                                                                                \
1971   new_value = (*loc);                                                          \
1972                                                                                \
1973   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1974 
1975 // -------------------------------------------------------------------------
1976 #ifdef KMP_GOMP_COMPAT
1977 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1978   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1979     KMP_CHECK_GTID;                                                            \
1980     OP_CRITICAL_READ(OP, 0);                                                   \
1981     return new_value;                                                          \
1982   }
1983 #else
1984 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1985 #endif /* KMP_GOMP_COMPAT */
1986 
1987 // -------------------------------------------------------------------------
1988 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1989   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1990   TYPE new_value;                                                              \
1991   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1992   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1993   return new_value;                                                            \
1994   }
1995 // -------------------------------------------------------------------------
1996 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1997   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1998   TYPE new_value;                                                              \
1999   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
2000   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
2001   }
2002 // ------------------------------------------------------------------------
2003 // Routines for Extended types: long double, _Quad, complex flavours (use
2004 // critical section)
2005 //     TYPE_ID, OP_ID, TYPE - detailed above
2006 //     OP      - operator
2007 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2008 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2009   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
2010   TYPE new_value;                                                              \
2011   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
2012   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
2013   return new_value;                                                            \
2014   }
2015 
2016 // ------------------------------------------------------------------------
2017 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2018 // value doesn't work.
2019 // Let's return the read value through the additional parameter.
2020 #if (KMP_OS_WINDOWS)
2021 
2022 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
2023   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2024                                                                                \
2025   (*out) = (*loc);                                                             \
2026                                                                                \
2027   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2028 // ------------------------------------------------------------------------
2029 #ifdef KMP_GOMP_COMPAT
2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
2031   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2032     KMP_CHECK_GTID;                                                            \
2033     OP_CRITICAL_READ_WRK(OP, 0);                                               \
2034   }
2035 #else
2036 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2037 #endif /* KMP_GOMP_COMPAT */
2038 // ------------------------------------------------------------------------
2039 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
2040   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2041                                          TYPE *loc) {                          \
2042     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2043     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2044 
2045 // ------------------------------------------------------------------------
2046 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2047   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2048   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2049   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2050   }
2051 
2052 #endif // KMP_OS_WINDOWS
2053 
2054 // ------------------------------------------------------------------------
2055 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2056 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2057 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2058                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2059 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2060                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2061 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2062                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2063 
2064 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2065 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2066                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2067 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2068                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2069 
2070 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2071                      1) // __kmpc_atomic_float10_rd
2072 #if KMP_HAVE_QUAD
2073 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2074                      1) // __kmpc_atomic_float16_rd
2075 #endif // KMP_HAVE_QUAD
2076 
2077 // Fix for CQ220361 on Windows* OS
2078 #if (KMP_OS_WINDOWS)
2079 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2080                          1) // __kmpc_atomic_cmplx4_rd
2081 #else
2082 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2083                      1) // __kmpc_atomic_cmplx4_rd
2084 #endif // (KMP_OS_WINDOWS)
2085 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2086                      1) // __kmpc_atomic_cmplx8_rd
2087 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2088                      1) // __kmpc_atomic_cmplx10_rd
2089 #if KMP_HAVE_QUAD
2090 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2091                      1) // __kmpc_atomic_cmplx16_rd
2092 #if (KMP_ARCH_X86)
2093 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2094                      1) // __kmpc_atomic_float16_a16_rd
2095 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2096                      1) // __kmpc_atomic_cmplx16_a16_rd
2097 #endif // (KMP_ARCH_X86)
2098 #endif // KMP_HAVE_QUAD
2099 
2100 // ------------------------------------------------------------------------
2101 // Atomic WRITE routines
2102 
2103 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2104   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2105   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2106   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2107   }
2108 // ------------------------------------------------------------------------
2109 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2110   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2111   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2112   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2113   }
2114 
2115 // ------------------------------------------------------------------------
2116 // Operation on *lhs, rhs using "compare_and_store" routine
2117 //     TYPE    - operands' type
2118 //     BITS    - size in bits, used to distinguish low level calls
2119 //     OP      - operator
2120 // Note: temp_val introduced in order to force the compiler to read
2121 //       *lhs only once (w/o it the compiler reads *lhs twice)
2122 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2123   {                                                                            \
2124     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2125     TYPE old_value, new_value;                                                 \
2126     temp_val = *lhs;                                                           \
2127     old_value = temp_val;                                                      \
2128     new_value = rhs;                                                           \
2129     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2130         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2131         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2132       temp_val = *lhs;                                                         \
2133       old_value = temp_val;                                                    \
2134       new_value = rhs;                                                         \
2135     }                                                                          \
2136   }
2137 
2138 // -------------------------------------------------------------------------
2139 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2140   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2141   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2142   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2143   }
2144 
2145 // ------------------------------------------------------------------------
2146 // Routines for Extended types: long double, _Quad, complex flavours (use
2147 // critical section)
2148 //     TYPE_ID, OP_ID, TYPE - detailed above
2149 //     OP      - operator
2150 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2151 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2152   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2153   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2154   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2155   }
2156 // -------------------------------------------------------------------------
2157 
2158 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2159                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2160 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2161                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2162 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2163                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2166                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2167 #else
2168 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2169                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2170 #endif // (KMP_ARCH_X86)
2171 
2172 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2173                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2174 #if (KMP_ARCH_X86)
2175 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2176                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2177 #else
2178 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2179                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2180 #endif // (KMP_ARCH_X86)
2181 
2182 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2183                    1) // __kmpc_atomic_float10_wr
2184 #if KMP_HAVE_QUAD
2185 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2186                    1) // __kmpc_atomic_float16_wr
2187 #endif // KMP_HAVE_QUAD
2188 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2189 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2190                    1) // __kmpc_atomic_cmplx8_wr
2191 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2192                    1) // __kmpc_atomic_cmplx10_wr
2193 #if KMP_HAVE_QUAD
2194 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2195                    1) // __kmpc_atomic_cmplx16_wr
2196 #if (KMP_ARCH_X86)
2197 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2198                    1) // __kmpc_atomic_float16_a16_wr
2199 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2200                    1) // __kmpc_atomic_cmplx16_a16_wr
2201 #endif // (KMP_ARCH_X86)
2202 #endif // KMP_HAVE_QUAD
2203 
2204 // ------------------------------------------------------------------------
2205 // Atomic CAPTURE routines
2206 
2207 // Beginning of a definition (provides name, parameters, gebug trace)
2208 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2209 //     fixed)
2210 //     OP_ID   - operation identifier (add, sub, mul, ...)
2211 //     TYPE    - operands' type
2212 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2213   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2214                                              TYPE *lhs, TYPE rhs, int flag) {  \
2215     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2216     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2217 
2218 // -------------------------------------------------------------------------
2219 // Operation on *lhs, rhs bound by critical section
2220 //     OP     - operator (it's supposed to contain an assignment)
2221 //     LCK_ID - lock identifier
2222 // Note: don't check gtid as it should always be valid
2223 // 1, 2-byte - expect valid parameter, other - check before this macro
2224 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2225   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2226                                                                                \
2227   if (flag) {                                                                  \
2228     (*lhs) OP rhs;                                                             \
2229     new_value = (*lhs);                                                        \
2230   } else {                                                                     \
2231     new_value = (*lhs);                                                        \
2232     (*lhs) OP rhs;                                                             \
2233   }                                                                            \
2234                                                                                \
2235   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2236   return new_value;
2237 
2238 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2239   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2240                                                                                \
2241   if (flag) {                                                                  \
2242     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2243     new_value = (*lhs);                                                        \
2244   } else {                                                                     \
2245     new_value = (*lhs);                                                        \
2246     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2247   }                                                                            \
2248                                                                                \
2249   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2250   return new_value;
2251 
2252 // ------------------------------------------------------------------------
2253 #ifdef KMP_GOMP_COMPAT
2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2255   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2256     KMP_CHECK_GTID;                                                            \
2257     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2258   }
2259 #else
2260 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2261 #endif /* KMP_GOMP_COMPAT */
2262 
2263 // ------------------------------------------------------------------------
2264 // Operation on *lhs, rhs using "compare_and_store" routine
2265 //     TYPE    - operands' type
2266 //     BITS    - size in bits, used to distinguish low level calls
2267 //     OP      - operator
2268 // Note: temp_val introduced in order to force the compiler to read
2269 //       *lhs only once (w/o it the compiler reads *lhs twice)
2270 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2271   {                                                                            \
2272     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2273     TYPE old_value, new_value;                                                 \
2274     temp_val = *lhs;                                                           \
2275     old_value = temp_val;                                                      \
2276     new_value = (TYPE)(old_value OP rhs);                                      \
2277     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2278         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2279         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2280       temp_val = *lhs;                                                         \
2281       old_value = temp_val;                                                    \
2282       new_value = (TYPE)(old_value OP rhs);                                    \
2283     }                                                                          \
2284     if (flag) {                                                                \
2285       return new_value;                                                        \
2286     } else                                                                     \
2287       return old_value;                                                        \
2288   }
2289 
2290 // -------------------------------------------------------------------------
2291 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2292   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2293   TYPE new_value;                                                              \
2294   (void)new_value;                                                             \
2295   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2296   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2297   }
2298 
2299 // -------------------------------------------------------------------------
2300 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2301   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2302   TYPE old_value, new_value;                                                   \
2303   (void)new_value;                                                             \
2304   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2305   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2306   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2307   if (flag) {                                                                  \
2308     return old_value OP rhs;                                                   \
2309   } else                                                                       \
2310     return old_value;                                                          \
2311   }
2312 // -------------------------------------------------------------------------
2313 
2314 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2315                      0) // __kmpc_atomic_fixed4_add_cpt
2316 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2317                      0) // __kmpc_atomic_fixed4_sub_cpt
2318 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2319                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2320 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2321                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2322 
2323 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2324                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2325 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2326                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2327 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2328                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2329 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2330                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2331 
2332 // ------------------------------------------------------------------------
2333 // Entries definition for integer operands
2334 //     TYPE_ID - operands type and size (fixed4, float4)
2335 //     OP_ID   - operation identifier (add, sub, mul, ...)
2336 //     TYPE    - operand type
2337 //     BITS    - size in bits, used to distinguish low level calls
2338 //     OP      - operator (used in critical section)
2339 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2340 // ------------------------------------------------------------------------
2341 // Routines for ATOMIC integer operands, other operators
2342 // ------------------------------------------------------------------------
2343 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2344 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2347                    0) // __kmpc_atomic_fixed1_andb_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2349                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2355                    0) // __kmpc_atomic_fixed1_orb_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2357                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2359                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2361                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2363                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2365                    0) // __kmpc_atomic_fixed1_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2367                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2369                    0) // __kmpc_atomic_fixed2_andb_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2371                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2373                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2375                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2377                    0) // __kmpc_atomic_fixed2_orb_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2379                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2381                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2383                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2385                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2387                    0) // __kmpc_atomic_fixed2_xor_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2389                    0) // __kmpc_atomic_fixed4_andb_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2391                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2393                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2395                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2397                    0) // __kmpc_atomic_fixed4_orb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2399                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2401                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2403                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2405                    0) // __kmpc_atomic_fixed4_xor_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2407                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2409                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2411                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2413                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2414 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2415                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2416 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2417                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2418 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2419                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2420 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2421                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2422 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2423                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2424 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2425                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2426 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2427                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2428 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2429                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2430 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2431                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2432 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2433 
2434 // CAPTURE routines for mixed types RHS=float16
2435 #if KMP_HAVE_QUAD
2436 
2437 // Beginning of a definition (provides name, parameters, gebug trace)
2438 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2439 //     fixed)
2440 //     OP_ID   - operation identifier (add, sub, mul, ...)
2441 //     TYPE    - operands' type
2442 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2443   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2444       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2445     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2446     KA_TRACE(100,                                                              \
2447              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2448               gtid));
2449 
2450 // -------------------------------------------------------------------------
2451 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2452                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2453   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2454   TYPE new_value;                                                              \
2455   (void)new_value;                                                             \
2456   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2457   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2458   }
2459 
2460 // -------------------------------------------------------------------------
2461 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2462                                 LCK_ID, GOMP_FLAG)                             \
2463   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2464   TYPE new_value;                                                              \
2465   (void)new_value;                                                             \
2466   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2467   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2468   }
2469 
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2471                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2473                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2475                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2477                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2479                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2480 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2481                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2482 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2483                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2484 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2485                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2486 
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2488                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2490                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2492                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2494                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2495 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2496                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2497 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2498                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2499 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2500                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2501 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2502                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2503 
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2505                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2507                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2509                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2511                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2512 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2513                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2514 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2515                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2516 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2517                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2518 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2519                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2520 
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2522                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2524                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2526                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2528                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2529 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2530                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2531 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2532                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2533 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2534                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2535 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2536                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2537 
2538 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2539                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2540 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2541                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2542 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2543                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2544 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2545                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2546 
2547 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2548                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2549 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2550                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2551 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2552                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2553 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2554                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2555 
2556 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2557                         1) // __kmpc_atomic_float10_add_cpt_fp
2558 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2559                         1) // __kmpc_atomic_float10_sub_cpt_fp
2560 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2561                         1) // __kmpc_atomic_float10_mul_cpt_fp
2562 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2563                         1) // __kmpc_atomic_float10_div_cpt_fp
2564 
2565 #endif // KMP_HAVE_QUAD
2566 
2567 // ------------------------------------------------------------------------
2568 // Routines for C/C++ Reduction operators && and ||
2569 
2570 // -------------------------------------------------------------------------
2571 // Operation on *lhs, rhs bound by critical section
2572 //     OP     - operator (it's supposed to contain an assignment)
2573 //     LCK_ID - lock identifier
2574 // Note: don't check gtid as it should always be valid
2575 // 1, 2-byte - expect valid parameter, other - check before this macro
2576 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2577   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2578                                                                                \
2579   if (flag) {                                                                  \
2580     new_value OP rhs;                                                          \
2581     (*lhs) = new_value;                                                        \
2582   } else {                                                                     \
2583     new_value = (*lhs);                                                        \
2584     (*lhs) OP rhs;                                                             \
2585   }                                                                            \
2586                                                                                \
2587   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2588 
2589 // ------------------------------------------------------------------------
2590 #ifdef KMP_GOMP_COMPAT
2591 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2592   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2593     KMP_CHECK_GTID;                                                            \
2594     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2595     return new_value;                                                          \
2596   }
2597 #else
2598 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2599 #endif /* KMP_GOMP_COMPAT */
2600 
2601 // ------------------------------------------------------------------------
2602 // Need separate macros for &&, || because there is no combined assignment
2603 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2604   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2605   TYPE new_value;                                                              \
2606   (void)new_value;                                                             \
2607   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2608   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2609   }
2610 
2611 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2612                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2614                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2615 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2616                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2617 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2618                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2619 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2620                   0) // __kmpc_atomic_fixed4_andl_cpt
2621 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2622                   0) // __kmpc_atomic_fixed4_orl_cpt
2623 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2624                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2625 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2626                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2627 
2628 // -------------------------------------------------------------------------
2629 // Routines for Fortran operators that matched no one in C:
2630 // MAX, MIN, .EQV., .NEQV.
2631 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2632 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2633 
2634 // -------------------------------------------------------------------------
2635 // MIN and MAX need separate macros
2636 // OP - operator to check if we need any actions?
2637 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2638   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2639                                                                                \
2640   if (*lhs OP rhs) { /* still need actions? */                                 \
2641     old_value = *lhs;                                                          \
2642     *lhs = rhs;                                                                \
2643     if (flag)                                                                  \
2644       new_value = rhs;                                                         \
2645     else                                                                       \
2646       new_value = old_value;                                                   \
2647   } else {                                                                     \
2648     new_value = *lhs;                                                          \
2649   }                                                                            \
2650   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2651   return new_value;
2652 
2653 // -------------------------------------------------------------------------
2654 #ifdef KMP_GOMP_COMPAT
2655 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2656   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2657     KMP_CHECK_GTID;                                                            \
2658     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2659   }
2660 #else
2661 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2662 #endif /* KMP_GOMP_COMPAT */
2663 
2664 // -------------------------------------------------------------------------
2665 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2666   {                                                                            \
2667     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2668     /*TYPE old_value; */                                                       \
2669     temp_val = *lhs;                                                           \
2670     old_value = temp_val;                                                      \
2671     while (old_value OP rhs && /* still need actions? */                       \
2672            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2673                (kmp_int##BITS *)lhs,                                           \
2674                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2675                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2676       temp_val = *lhs;                                                         \
2677       old_value = temp_val;                                                    \
2678     }                                                                          \
2679     if (flag)                                                                  \
2680       return rhs;                                                              \
2681     else                                                                       \
2682       return old_value;                                                        \
2683   }
2684 
2685 // -------------------------------------------------------------------------
2686 // 1-byte, 2-byte operands - use critical section
2687 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2688   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2689   TYPE new_value, old_value;                                                   \
2690   if (*lhs OP rhs) { /* need actions? */                                       \
2691     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2692     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2693   }                                                                            \
2694   return *lhs;                                                                 \
2695   }
2696 
2697 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2698   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2699   TYPE new_value, old_value;                                                   \
2700   (void)new_value;                                                             \
2701   if (*lhs OP rhs) {                                                           \
2702     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2703     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2704   }                                                                            \
2705   return *lhs;                                                                 \
2706   }
2707 
2708 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2709                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2711                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2713                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2715                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2717                      0) // __kmpc_atomic_fixed4_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2719                      0) // __kmpc_atomic_fixed4_min_cpt
2720 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2721                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2722 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2723                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2724 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2725                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2726 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2727                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2728 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2729                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2730 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2731                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2732 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2733                      1) // __kmpc_atomic_float10_max_cpt
2734 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2735                      1) // __kmpc_atomic_float10_min_cpt
2736 #if KMP_HAVE_QUAD
2737 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2738                      1) // __kmpc_atomic_float16_max_cpt
2739 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2740                      1) // __kmpc_atomic_float16_min_cpt
2741 #if (KMP_ARCH_X86)
2742 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2743                      1) // __kmpc_atomic_float16_max_a16_cpt
2744 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2745                      1) // __kmpc_atomic_float16_mix_a16_cpt
2746 #endif // (KMP_ARCH_X86)
2747 #endif // KMP_HAVE_QUAD
2748 
2749 // ------------------------------------------------------------------------
2750 #ifdef KMP_GOMP_COMPAT
2751 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2752   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2753     KMP_CHECK_GTID;                                                            \
2754     OP_CRITICAL_CPT(OP, 0);                                                    \
2755   }
2756 #else
2757 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2758 #endif /* KMP_GOMP_COMPAT */
2759 // ------------------------------------------------------------------------
2760 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2761   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2762   TYPE new_value;                                                              \
2763   (void)new_value;                                                             \
2764   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2765   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2766   }
2767 
2768 // ------------------------------------------------------------------------
2769 
2770 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2771                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2772 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2773                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2774 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2775                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2776 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2777                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2778 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2779                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2780 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2781                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2782 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2783                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2784 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2785                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2786 
2787 // ------------------------------------------------------------------------
2788 // Routines for Extended types: long double, _Quad, complex flavours (use
2789 // critical section)
2790 //     TYPE_ID, OP_ID, TYPE - detailed above
2791 //     OP      - operator
2792 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2793 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2794   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2795   TYPE new_value;                                                              \
2796   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2797   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2798   }
2799 
2800 // ------------------------------------------------------------------------
2801 // Workaround for cmplx4. Regular routines with return value don't work
2802 // on Win_32e. Let's return captured values through the additional parameter.
2803 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2804   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2805                                                                                \
2806   if (flag) {                                                                  \
2807     (*lhs) OP rhs;                                                             \
2808     (*out) = (*lhs);                                                           \
2809   } else {                                                                     \
2810     (*out) = (*lhs);                                                           \
2811     (*lhs) OP rhs;                                                             \
2812   }                                                                            \
2813                                                                                \
2814   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2815   return;
2816 // ------------------------------------------------------------------------
2817 
2818 #ifdef KMP_GOMP_COMPAT
2819 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2820   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2821     KMP_CHECK_GTID;                                                            \
2822     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2823   }
2824 #else
2825 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2826 #endif /* KMP_GOMP_COMPAT */
2827 // ------------------------------------------------------------------------
2828 
2829 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2830   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2831                                          TYPE rhs, TYPE *out, int flag) {      \
2832     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2833     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2834 // ------------------------------------------------------------------------
2835 
2836 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2837   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2838   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2839   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2840   }
2841 // The end of workaround for cmplx4
2842 
2843 /* ------------------------------------------------------------------------- */
2844 // routines for long double type
2845 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2846                     1) // __kmpc_atomic_float10_add_cpt
2847 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2848                     1) // __kmpc_atomic_float10_sub_cpt
2849 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2850                     1) // __kmpc_atomic_float10_mul_cpt
2851 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2852                     1) // __kmpc_atomic_float10_div_cpt
2853 #if KMP_HAVE_QUAD
2854 // routines for _Quad type
2855 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2856                     1) // __kmpc_atomic_float16_add_cpt
2857 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2858                     1) // __kmpc_atomic_float16_sub_cpt
2859 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2860                     1) // __kmpc_atomic_float16_mul_cpt
2861 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2862                     1) // __kmpc_atomic_float16_div_cpt
2863 #if (KMP_ARCH_X86)
2864 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2865                     1) // __kmpc_atomic_float16_add_a16_cpt
2866 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2867                     1) // __kmpc_atomic_float16_sub_a16_cpt
2868 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2869                     1) // __kmpc_atomic_float16_mul_a16_cpt
2870 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2871                     1) // __kmpc_atomic_float16_div_a16_cpt
2872 #endif // (KMP_ARCH_X86)
2873 #endif // KMP_HAVE_QUAD
2874 
2875 // routines for complex types
2876 
2877 // cmplx4 routines to return void
2878 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2879                         1) // __kmpc_atomic_cmplx4_add_cpt
2880 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2881                         1) // __kmpc_atomic_cmplx4_sub_cpt
2882 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2883                         1) // __kmpc_atomic_cmplx4_mul_cpt
2884 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2885                         1) // __kmpc_atomic_cmplx4_div_cpt
2886 
2887 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2888                     1) // __kmpc_atomic_cmplx8_add_cpt
2889 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2890                     1) // __kmpc_atomic_cmplx8_sub_cpt
2891 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2892                     1) // __kmpc_atomic_cmplx8_mul_cpt
2893 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2894                     1) // __kmpc_atomic_cmplx8_div_cpt
2895 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2896                     1) // __kmpc_atomic_cmplx10_add_cpt
2897 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2898                     1) // __kmpc_atomic_cmplx10_sub_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2900                     1) // __kmpc_atomic_cmplx10_mul_cpt
2901 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2902                     1) // __kmpc_atomic_cmplx10_div_cpt
2903 #if KMP_HAVE_QUAD
2904 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2905                     1) // __kmpc_atomic_cmplx16_add_cpt
2906 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2907                     1) // __kmpc_atomic_cmplx16_sub_cpt
2908 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2909                     1) // __kmpc_atomic_cmplx16_mul_cpt
2910 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2911                     1) // __kmpc_atomic_cmplx16_div_cpt
2912 #if (KMP_ARCH_X86)
2913 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2914                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2915 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2916                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2917 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2918                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2919 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2920                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2921 #endif // (KMP_ARCH_X86)
2922 #endif // KMP_HAVE_QUAD
2923 
2924 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2925 // binop x; v = x; }  for non-commutative operations.
2926 // Supported only on IA-32 architecture and Intel(R) 64
2927 
2928 // -------------------------------------------------------------------------
2929 // Operation on *lhs, rhs bound by critical section
2930 //     OP     - operator (it's supposed to contain an assignment)
2931 //     LCK_ID - lock identifier
2932 // Note: don't check gtid as it should always be valid
2933 // 1, 2-byte - expect valid parameter, other - check before this macro
2934 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2935   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2936                                                                                \
2937   if (flag) {                                                                  \
2938     /*temp_val = (*lhs);*/                                                     \
2939     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2940     new_value = (*lhs);                                                        \
2941   } else {                                                                     \
2942     new_value = (*lhs);                                                        \
2943     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2944   }                                                                            \
2945   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2946   return new_value;
2947 
2948 // ------------------------------------------------------------------------
2949 #ifdef KMP_GOMP_COMPAT
2950 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2951   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2952     KMP_CHECK_GTID;                                                            \
2953     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2954   }
2955 #else
2956 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957 #endif /* KMP_GOMP_COMPAT */
2958 
2959 // ------------------------------------------------------------------------
2960 // Operation on *lhs, rhs using "compare_and_store" routine
2961 //     TYPE    - operands' type
2962 //     BITS    - size in bits, used to distinguish low level calls
2963 //     OP      - operator
2964 // Note: temp_val introduced in order to force the compiler to read
2965 //       *lhs only once (w/o it the compiler reads *lhs twice)
2966 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2967   {                                                                            \
2968     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2969     TYPE old_value, new_value;                                                 \
2970     temp_val = *lhs;                                                           \
2971     old_value = temp_val;                                                      \
2972     new_value = (TYPE)(rhs OP old_value);                                      \
2973     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2974         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2975         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2976       temp_val = *lhs;                                                         \
2977       old_value = temp_val;                                                    \
2978       new_value = (TYPE)(rhs OP old_value);                                    \
2979     }                                                                          \
2980     if (flag) {                                                                \
2981       return new_value;                                                        \
2982     } else                                                                     \
2983       return old_value;                                                        \
2984   }
2985 
2986 // -------------------------------------------------------------------------
2987 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2988   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2989   TYPE new_value;                                                              \
2990   (void)new_value;                                                             \
2991   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2992   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2993   }
2994 
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3052 
3053 // ------------------------------------------------------------------------
3054 // Routines for Extended types: long double, _Quad, complex flavours (use
3055 // critical section)
3056 //     TYPE_ID, OP_ID, TYPE - detailed above
3057 //     OP      - operator
3058 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3059 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3060   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3061   TYPE new_value;                                                              \
3062   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3063   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3064   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3065   }
3066 
3067 /* ------------------------------------------------------------------------- */
3068 // routines for long double type
3069 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070                         1) // __kmpc_atomic_float10_sub_cpt_rev
3071 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072                         1) // __kmpc_atomic_float10_div_cpt_rev
3073 #if KMP_HAVE_QUAD
3074 // routines for _Quad type
3075 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076                         1) // __kmpc_atomic_float16_sub_cpt_rev
3077 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078                         1) // __kmpc_atomic_float16_div_cpt_rev
3079 #if (KMP_ARCH_X86)
3080 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084 #endif // (KMP_ARCH_X86)
3085 #endif // KMP_HAVE_QUAD
3086 
3087 // routines for complex types
3088 
3089 // ------------------------------------------------------------------------
3090 // Workaround for cmplx4. Regular routines with return value don't work
3091 // on Win_32e. Let's return captured values through the additional parameter.
3092 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3093   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3094                                                                                \
3095   if (flag) {                                                                  \
3096     (*lhs) = (rhs)OP(*lhs);                                                    \
3097     (*out) = (*lhs);                                                           \
3098   } else {                                                                     \
3099     (*out) = (*lhs);                                                           \
3100     (*lhs) = (rhs)OP(*lhs);                                                    \
3101   }                                                                            \
3102                                                                                \
3103   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3104   return;
3105 // ------------------------------------------------------------------------
3106 
3107 #ifdef KMP_GOMP_COMPAT
3108 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3109   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3110     KMP_CHECK_GTID;                                                            \
3111     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3112   }
3113 #else
3114 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115 #endif /* KMP_GOMP_COMPAT */
3116 // ------------------------------------------------------------------------
3117 
3118 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3119                                     GOMP_FLAG)                                 \
3120   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3121   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3122   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3123   }
3124 // The end of workaround for cmplx4
3125 
3126 // !!! TODO: check if we need to return void for cmplx4 routines
3127 // cmplx4 routines to return void
3128 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132 
3133 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141 #if KMP_HAVE_QUAD
3142 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146 #if (KMP_ARCH_X86)
3147 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151 #endif // (KMP_ARCH_X86)
3152 #endif // KMP_HAVE_QUAD
3153 
3154 // Capture reverse for mixed type: RHS=float16
3155 #if KMP_HAVE_QUAD
3156 
3157 // Beginning of a definition (provides name, parameters, gebug trace)
3158 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159 //     fixed)
3160 //     OP_ID   - operation identifier (add, sub, mul, ...)
3161 //     TYPE    - operands' type
3162 // -------------------------------------------------------------------------
3163 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3164                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3165   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3166   TYPE new_value;                                                              \
3167   (void)new_value;                                                             \
3168   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3169   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3170   }
3171 
3172 // -------------------------------------------------------------------------
3173 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174                                     LCK_ID, GOMP_FLAG)                         \
3175   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3176   TYPE new_value;                                                              \
3177   (void)new_value;                                                             \
3178   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3179   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3180   }
3181 
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190 
3191 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194                            1,
3195                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199                            1,
3200                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201 
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210 
3211 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212                            7,
3213                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215                            8i, 7,
3216                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218                            7,
3219                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221                            8i, 7,
3222                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223 
3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225                            4r, 3,
3226                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228                            4r, 3,
3229                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230 
3231 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232                            8r, 7,
3233                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235                            8r, 7,
3236                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237 
3238 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242 
3243 #endif // KMP_HAVE_QUAD
3244 
3245 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246 
3247 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3248   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3249                                      TYPE rhs) {                               \
3250     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3251     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252 
3253 #define CRITICAL_SWP(LCK_ID)                                                   \
3254   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3255                                                                                \
3256   old_value = (*lhs);                                                          \
3257   (*lhs) = rhs;                                                                \
3258                                                                                \
3259   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3260   return old_value;
3261 
3262 // ------------------------------------------------------------------------
3263 #ifdef KMP_GOMP_COMPAT
3264 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3265   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3266     KMP_CHECK_GTID;                                                            \
3267     CRITICAL_SWP(0);                                                           \
3268   }
3269 #else
3270 #define GOMP_CRITICAL_SWP(FLAG)
3271 #endif /* KMP_GOMP_COMPAT */
3272 
3273 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3274   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3275   TYPE old_value;                                                              \
3276   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3277   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3278   return old_value;                                                            \
3279   }
3280 // ------------------------------------------------------------------------
3281 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3282   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3283   TYPE old_value;                                                              \
3284   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3285   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3286   return old_value;                                                            \
3287   }
3288 
3289 // ------------------------------------------------------------------------
3290 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3291   {                                                                            \
3292     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3293     TYPE old_value, new_value;                                                 \
3294     temp_val = *lhs;                                                           \
3295     old_value = temp_val;                                                      \
3296     new_value = rhs;                                                           \
3297     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3298         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3299         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3300       temp_val = *lhs;                                                         \
3301       old_value = temp_val;                                                    \
3302       new_value = rhs;                                                         \
3303     }                                                                          \
3304     return old_value;                                                          \
3305   }
3306 
3307 // -------------------------------------------------------------------------
3308 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3309   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3310   TYPE old_value;                                                              \
3311   (void)old_value;                                                             \
3312   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3313   CMPXCHG_SWP(TYPE, BITS)                                                      \
3314   }
3315 
3316 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319 
3320 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322 
3323 #if (KMP_ARCH_X86)
3324 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328 #else
3329 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332 #endif // (KMP_ARCH_X86)
3333 
3334 // ------------------------------------------------------------------------
3335 // Routines for Extended types: long double, _Quad, complex flavours (use
3336 // critical section)
3337 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3338   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3339   TYPE old_value;                                                              \
3340   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3341   CRITICAL_SWP(LCK_ID)                                                         \
3342   }
3343 
3344 // ------------------------------------------------------------------------
3345 // !!! TODO: check if we need to return void for cmplx4 routines
3346 // Workaround for cmplx4. Regular routines with return value don't work
3347 // on Win_32e. Let's return captured values through the additional parameter.
3348 
3349 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3350   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3351                                      TYPE rhs, TYPE *out) {                    \
3352     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3353     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354 
3355 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3356   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3357                                                                                \
3358   tmp = (*lhs);                                                                \
3359   (*lhs) = (rhs);                                                              \
3360   (*out) = tmp;                                                                \
3361   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3362   return;
3363 // ------------------------------------------------------------------------
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3367   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3368     KMP_CHECK_GTID;                                                            \
3369     CRITICAL_SWP_WRK(0);                                                       \
3370   }
3371 #else
3372 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3373 #endif /* KMP_GOMP_COMPAT */
3374 // ------------------------------------------------------------------------
3375 
3376 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3377   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3378   TYPE tmp;                                                                    \
3379   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3380   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3381   }
3382 // The end of workaround for cmplx4
3383 
3384 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385 #if KMP_HAVE_QUAD
3386 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387 #endif // KMP_HAVE_QUAD
3388 // cmplx4 routine to return void
3389 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390 
3391 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3392 // __kmpc_atomic_cmplx4_swp
3393 
3394 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396 #if KMP_HAVE_QUAD
3397 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398 #if (KMP_ARCH_X86)
3399 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400                     1) // __kmpc_atomic_float16_a16_swp
3401 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402                     1) // __kmpc_atomic_cmplx16_a16_swp
3403 #endif // (KMP_ARCH_X86)
3404 #endif // KMP_HAVE_QUAD
3405 
3406 // End of OpenMP 4.0 Capture
3407 
3408 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409 
3410 #undef OP_CRITICAL
3411 
3412 /* ------------------------------------------------------------------------ */
3413 /* Generic atomic routines                                                  */
3414 
3415 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416                      void (*f)(void *, void *, void *)) {
3417   KMP_DEBUG_ASSERT(__kmp_init_serial);
3418 
3419   if (
3420 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421       FALSE /* must use lock */
3422 #else
3423       TRUE
3424 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425   ) {
3426     kmp_int8 old_value, new_value;
3427 
3428     old_value = *(kmp_int8 *)lhs;
3429     (*f)(&new_value, &old_value, rhs);
3430 
3431     /* TODO: Should this be acquire or release? */
3432     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433                                        *(kmp_int8 *)&new_value)) {
3434       KMP_CPU_PAUSE();
3435 
3436       old_value = *(kmp_int8 *)lhs;
3437       (*f)(&new_value, &old_value, rhs);
3438     }
3439 
3440     return;
3441   } else {
3442     // All 1-byte data is of integer data type.
3443 
3444 #ifdef KMP_GOMP_COMPAT
3445     if (__kmp_atomic_mode == 2) {
3446       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3447     } else
3448 #endif /* KMP_GOMP_COMPAT */
3449       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3450 
3451     (*f)(lhs, lhs, rhs);
3452 
3453 #ifdef KMP_GOMP_COMPAT
3454     if (__kmp_atomic_mode == 2) {
3455       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3456     } else
3457 #endif /* KMP_GOMP_COMPAT */
3458       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3459   }
3460 }
3461 
3462 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463                      void (*f)(void *, void *, void *)) {
3464   if (
3465 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466       FALSE /* must use lock */
3467 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468       TRUE /* no alignment problems */
3469 #else
3470       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472   ) {
3473     kmp_int16 old_value, new_value;
3474 
3475     old_value = *(kmp_int16 *)lhs;
3476     (*f)(&new_value, &old_value, rhs);
3477 
3478     /* TODO: Should this be acquire or release? */
3479     while (!KMP_COMPARE_AND_STORE_ACQ16(
3480         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481       KMP_CPU_PAUSE();
3482 
3483       old_value = *(kmp_int16 *)lhs;
3484       (*f)(&new_value, &old_value, rhs);
3485     }
3486 
3487     return;
3488   } else {
3489     // All 2-byte data is of integer data type.
3490 
3491 #ifdef KMP_GOMP_COMPAT
3492     if (__kmp_atomic_mode == 2) {
3493       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3494     } else
3495 #endif /* KMP_GOMP_COMPAT */
3496       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3497 
3498     (*f)(lhs, lhs, rhs);
3499 
3500 #ifdef KMP_GOMP_COMPAT
3501     if (__kmp_atomic_mode == 2) {
3502       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3503     } else
3504 #endif /* KMP_GOMP_COMPAT */
3505       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3506   }
3507 }
3508 
3509 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510                      void (*f)(void *, void *, void *)) {
3511   KMP_DEBUG_ASSERT(__kmp_init_serial);
3512 
3513   if (
3514 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515 // Gomp compatibility is broken if this routine is called for floats.
3516 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517       TRUE /* no alignment problems */
3518 #else
3519       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521   ) {
3522     kmp_int32 old_value, new_value;
3523 
3524     old_value = *(kmp_int32 *)lhs;
3525     (*f)(&new_value, &old_value, rhs);
3526 
3527     /* TODO: Should this be acquire or release? */
3528     while (!KMP_COMPARE_AND_STORE_ACQ32(
3529         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530       KMP_CPU_PAUSE();
3531 
3532       old_value = *(kmp_int32 *)lhs;
3533       (*f)(&new_value, &old_value, rhs);
3534     }
3535 
3536     return;
3537   } else {
3538     // Use __kmp_atomic_lock_4i for all 4-byte data,
3539     // even if it isn't of integer data type.
3540 
3541 #ifdef KMP_GOMP_COMPAT
3542     if (__kmp_atomic_mode == 2) {
3543       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3544     } else
3545 #endif /* KMP_GOMP_COMPAT */
3546       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3547 
3548     (*f)(lhs, lhs, rhs);
3549 
3550 #ifdef KMP_GOMP_COMPAT
3551     if (__kmp_atomic_mode == 2) {
3552       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3553     } else
3554 #endif /* KMP_GOMP_COMPAT */
3555       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3556   }
3557 }
3558 
3559 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560                      void (*f)(void *, void *, void *)) {
3561   KMP_DEBUG_ASSERT(__kmp_init_serial);
3562   if (
3563 
3564 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565       FALSE /* must use lock */
3566 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567       TRUE /* no alignment problems */
3568 #else
3569       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571   ) {
3572     kmp_int64 old_value, new_value;
3573 
3574     old_value = *(kmp_int64 *)lhs;
3575     (*f)(&new_value, &old_value, rhs);
3576     /* TODO: Should this be acquire or release? */
3577     while (!KMP_COMPARE_AND_STORE_ACQ64(
3578         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579       KMP_CPU_PAUSE();
3580 
3581       old_value = *(kmp_int64 *)lhs;
3582       (*f)(&new_value, &old_value, rhs);
3583     }
3584 
3585     return;
3586   } else {
3587     // Use __kmp_atomic_lock_8i for all 8-byte data,
3588     // even if it isn't of integer data type.
3589 
3590 #ifdef KMP_GOMP_COMPAT
3591     if (__kmp_atomic_mode == 2) {
3592       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3593     } else
3594 #endif /* KMP_GOMP_COMPAT */
3595       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3596 
3597     (*f)(lhs, lhs, rhs);
3598 
3599 #ifdef KMP_GOMP_COMPAT
3600     if (__kmp_atomic_mode == 2) {
3601       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3602     } else
3603 #endif /* KMP_GOMP_COMPAT */
3604       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3605   }
3606 }
3607 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3608 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609                       void (*f)(void *, void *, void *)) {
3610   KMP_DEBUG_ASSERT(__kmp_init_serial);
3611 
3612 #ifdef KMP_GOMP_COMPAT
3613   if (__kmp_atomic_mode == 2) {
3614     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3615   } else
3616 #endif /* KMP_GOMP_COMPAT */
3617     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3618 
3619   (*f)(lhs, lhs, rhs);
3620 
3621 #ifdef KMP_GOMP_COMPAT
3622   if (__kmp_atomic_mode == 2) {
3623     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624   } else
3625 #endif /* KMP_GOMP_COMPAT */
3626     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3627 }
3628 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629 
3630 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631                       void (*f)(void *, void *, void *)) {
3632   KMP_DEBUG_ASSERT(__kmp_init_serial);
3633 
3634 #ifdef KMP_GOMP_COMPAT
3635   if (__kmp_atomic_mode == 2) {
3636     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3637   } else
3638 #endif /* KMP_GOMP_COMPAT */
3639     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3640 
3641   (*f)(lhs, lhs, rhs);
3642 
3643 #ifdef KMP_GOMP_COMPAT
3644   if (__kmp_atomic_mode == 2) {
3645     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3646   } else
3647 #endif /* KMP_GOMP_COMPAT */
3648     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3649 }
3650 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652                       void (*f)(void *, void *, void *)) {
3653   KMP_DEBUG_ASSERT(__kmp_init_serial);
3654 
3655 #ifdef KMP_GOMP_COMPAT
3656   if (__kmp_atomic_mode == 2) {
3657     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3658   } else
3659 #endif /* KMP_GOMP_COMPAT */
3660     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3661 
3662   (*f)(lhs, lhs, rhs);
3663 
3664 #ifdef KMP_GOMP_COMPAT
3665   if (__kmp_atomic_mode == 2) {
3666     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3667   } else
3668 #endif /* KMP_GOMP_COMPAT */
3669     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3670 }
3671 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3672 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673                       void (*f)(void *, void *, void *)) {
3674   KMP_DEBUG_ASSERT(__kmp_init_serial);
3675 
3676 #ifdef KMP_GOMP_COMPAT
3677   if (__kmp_atomic_mode == 2) {
3678     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3679   } else
3680 #endif /* KMP_GOMP_COMPAT */
3681     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3682 
3683   (*f)(lhs, lhs, rhs);
3684 
3685 #ifdef KMP_GOMP_COMPAT
3686   if (__kmp_atomic_mode == 2) {
3687     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3688   } else
3689 #endif /* KMP_GOMP_COMPAT */
3690     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3691 }
3692 
3693 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694 // compiler; duplicated in order to not use 3-party names in pure Intel code
3695 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3696 void __kmpc_atomic_start(void) {
3697   int gtid = __kmp_entry_gtid();
3698   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3700 }
3701 
3702 void __kmpc_atomic_end(void) {
3703   int gtid = __kmp_get_gtid();
3704   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3706 }
3707 
3708 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709 
3710 // OpenMP 5.1 compare and swap
3711 
3712 /*!
3713 @param loc Source code location
3714 @param gtid Global thread id
3715 @param x Memory location to operate on
3716 @param e Expected value
3717 @param d Desired value
3718 @return Result of comparison
3719 
3720 Implements Compare And Swap atomic operation.
3721 
3722 Sample code:
3723 #pragma omp atomic compare update capture
3724   { r = x == e; if(r) { x = d; } }
3725 */
3726 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727   return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728 }
3729 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730                               short d) {
3731   return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732 }
3733 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734                               kmp_int32 d) {
3735   return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736 }
3737 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738                               kmp_int64 d) {
3739   return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740 }
3741 
3742 /*!
3743 @param loc Source code location
3744 @param gtid Global thread id
3745 @param x Memory location to operate on
3746 @param e Expected value
3747 @param d Desired value
3748 @return Old value of x
3749 
3750 Implements Compare And Swap atomic operation.
3751 
3752 Sample code:
3753 #pragma omp atomic compare update capture
3754   { v = x; if (x == e) { x = d; } }
3755 */
3756 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757   return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758 }
3759 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760                               short d) {
3761   return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762 }
3763 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764                                   kmp_int32 e, kmp_int32 d) {
3765   return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766 }
3767 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768                                   kmp_int64 e, kmp_int64 d) {
3769   return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770 }
3771 
3772 /*!
3773 @param loc Source code location
3774 @param gtid Global thread id
3775 @param x Memory location to operate on
3776 @param e Expected value
3777 @param d Desired value
3778 @param pv Captured value location
3779 @return Result of comparison
3780 
3781 Implements Compare And Swap + Capture atomic operation.
3782 
3783 v gets old valie of x if comparison failed, untouched otherwise.
3784 Sample code:
3785 #pragma omp atomic compare update capture
3786   { r = x == e; if(r) { x = d; } else { v = x; } }
3787 */
3788 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789                                   char d, char *pv) {
3790   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791   if (old == e)
3792     return true;
3793   KMP_ASSERT(pv != NULL);
3794   *pv = old;
3795   return false;
3796 }
3797 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798                                   short d, short *pv) {
3799   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800   if (old == e)
3801     return true;
3802   KMP_ASSERT(pv != NULL);
3803   *pv = old;
3804   return false;
3805 }
3806 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807                                   kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809   if (old == e)
3810     return true;
3811   KMP_ASSERT(pv != NULL);
3812   *pv = old;
3813   return false;
3814 }
3815 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816                                   kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818   if (old == e)
3819     return true;
3820   KMP_ASSERT(pv != NULL);
3821   *pv = old;
3822   return false;
3823 }
3824 
3825 /*!
3826 @param loc Source code location
3827 @param gtid Global thread id
3828 @param x Memory location to operate on
3829 @param e Expected value
3830 @param d Desired value
3831 @param pv Captured value location
3832 @return Old value of x
3833 
3834 Implements Compare And Swap + Capture atomic operation.
3835 
3836 v gets new valie of x.
3837 Sample code:
3838 #pragma omp atomic compare update capture
3839   { if (x == e) { x = d; }; v = x; }
3840 */
3841 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842                                  char d, char *pv) {
3843   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844   KMP_ASSERT(pv != NULL);
3845   *pv = old == e ? d : old;
3846   return old;
3847 }
3848 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849                                   short d, short *pv) {
3850   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851   KMP_ASSERT(pv != NULL);
3852   *pv = old == e ? d : old;
3853   return old;
3854 }
3855 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856                                       kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858   KMP_ASSERT(pv != NULL);
3859   *pv = old == e ? d : old;
3860   return old;
3861 }
3862 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863                                       kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865   KMP_ASSERT(pv != NULL);
3866   *pv = old == e ? d : old;
3867   return old;
3868 }
3869 
3870 // End OpenMP 5.1 compare + capture
3871 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872 
3873 /*!
3874 @}
3875 */
3876 
3877 // end of file
3878