1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
operator +(Quad_a4_t & lhs,Quad_a4_t & rhs)609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
operator -(Quad_a4_t & lhs,Quad_a4_t & rhs)612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
operator *(Quad_a4_t & lhs,Quad_a4_t & rhs)615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
operator /(Quad_a4_t & lhs,Quad_a4_t & rhs)618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
operator +(Quad_a16_t & lhs,Quad_a16_t & rhs)628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
operator -(Quad_a16_t & lhs,Quad_a16_t & rhs)631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
operator *(Quad_a16_t & lhs,Quad_a16_t & rhs)634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
operator /(Quad_a16_t & lhs,Quad_a16_t & rhs)637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
operator +(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
operator -(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
operator *(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
operator /(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
operator +(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
operator -(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
operator *(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
operator /(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs));                                        \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP((TYPE)rhs));                               \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP((TYPE)rhs));                             \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
840   {                                                                            \
841     struct _sss {                                                              \
842       TYPE cmp;                                                                \
843       kmp_int##BITS *vvv;                                                      \
844     };                                                                         \
845     struct _sss old_value, new_value;                                          \
846     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
847     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
848     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
849     new_value.cmp = old_value.cmp OP rhs;                                      \
850     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
851         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
852         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
853       KMP_DO_PAUSE;                                                            \
854                                                                                \
855       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
856       new_value.cmp = old_value.cmp OP rhs;                                    \
857     }                                                                          \
858   }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
862   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
863   (*lhs) = (*lhs)OP rhs;                                                       \
864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
873                          GOMP_FLAG)                                            \
874   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
875   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
876   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
877   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
878   }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
881                        GOMP_FLAG)                                              \
882   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
883   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
884   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
885   }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
890                                   MASK, GOMP_FLAG)                             \
891   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
892   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
893   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
894   }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
902                          GOMP_FLAG)                                            \
903   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
904   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
905   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
906     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
907     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
908   } else {                                                                     \
909     KMP_CHECK_GTID;                                                            \
910     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
911                        LCK_ID) /* unaligned address - use critical */          \
912   }                                                                            \
913   }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
916                        GOMP_FLAG)                                              \
917   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
918   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
919   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
920     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
921   } else {                                                                     \
922     KMP_CHECK_GTID;                                                            \
923     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
924                        LCK_ID) /* unaligned address - use critical */          \
925   }                                                                            \
926   }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
931                                   MASK, GOMP_FLAG)                             \
932   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
933   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
934   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
935     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
936   } else {                                                                     \
937     KMP_CHECK_GTID;                                                            \
938     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
939                        LCK_ID) /* unaligned address - use critical */          \
940   }                                                                            \
941   }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948                  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950                  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953                KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964                KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 //     TYPE_ID - operands type and size (fixed4, float4)
971 //     OP_ID   - operation identifier (add, sub, mul, ...)
972 //     TYPE    - operand type
973 //     BITS    - size in bits, used to distinguish low level calls
974 //     OP      - operator (used in critical section)
975 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
976 //     MASK    - used for alignment check
977 
978 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986                0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994                0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004                0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008                0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016                0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026                0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028                0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036                0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044                0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and ||                         */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1080   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1081   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1082   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1083   }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1091   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1092   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1093   }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1100   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1101   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1102     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1103   } else {                                                                     \
1104     KMP_CHECK_GTID;                                                            \
1105     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1106   }                                                                            \
1107   }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119               0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121               0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C:                  */
1129 /* MAX, MIN, .EQV., .NEQV.                                                   */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1137   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1138                                                                                \
1139   if (*lhs OP rhs) { /* still need actions? */                                 \
1140     *lhs = rhs;                                                                \
1141   }                                                                            \
1142   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1147   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1148     KMP_CHECK_GTID;                                                            \
1149     MIN_MAX_CRITSECT(OP, 0);                                                   \
1150     return;                                                                    \
1151   }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1158   {                                                                            \
1159     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1160     TYPE old_value;                                                            \
1161     temp_val = *lhs;                                                           \
1162     old_value = temp_val;                                                      \
1163     while (old_value OP rhs && /* still need actions? */                       \
1164            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1165                (kmp_int##BITS *)lhs,                                           \
1166                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1167                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1168       temp_val = *lhs;                                                         \
1169       old_value = temp_val;                                                    \
1170     }                                                                          \
1171   }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1176   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1177   if (*lhs OP rhs) { /* need actions? */                                       \
1178     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1179     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1180   }                                                                            \
1181   }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1188                          GOMP_FLAG)                                            \
1189   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1190   if (*lhs OP rhs) {                                                           \
1191     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1192     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1193   }                                                                            \
1194   }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1200                          GOMP_FLAG)                                            \
1201   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1202   if (*lhs OP rhs) {                                                           \
1203     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1204     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1205       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1206     } else {                                                                   \
1207       KMP_CHECK_GTID;                                                          \
1208       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1209     }                                                                          \
1210   }                                                                            \
1211   }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223                  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225                  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 #if KMP_HAVE_QUAD
1239 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1240                  1) // __kmpc_atomic_float16_max
1241 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1242                  1) // __kmpc_atomic_float16_min
1243 #if (KMP_ARCH_X86)
1244 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1245                  1) // __kmpc_atomic_float16_max_a16
1246 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1247                  1) // __kmpc_atomic_float16_min_a16
1248 #endif // (KMP_ARCH_X86)
1249 #endif // KMP_HAVE_QUAD
1250 // ------------------------------------------------------------------------
1251 // Need separate macros for .EQV. because of the need of complement (~)
1252 // OP ignored for critical sections, ^=~ used instead
1253 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1254   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1255   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1256   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1257   }
1258 
1259 // ------------------------------------------------------------------------
1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261 // ------------------------------------------------------------------------
1262 // X86 or X86_64: no alignment problems ===================================
1263 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1264                         GOMP_FLAG)                                             \
1265   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1266   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1267   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1268   }
1269 // ------------------------------------------------------------------------
1270 #else
1271 // ------------------------------------------------------------------------
1272 // Code for other architectures that don't handle unaligned accesses.
1273 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1274                         GOMP_FLAG)                                             \
1275   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1276   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1277   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1278     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1279   } else {                                                                     \
1280     KMP_CHECK_GTID;                                                            \
1281     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1282   }                                                                            \
1283   }
1284 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1285 
1286 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1287                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1288 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1289                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1290 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1291                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1292 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1293                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1294 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1295                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1296 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1297                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1298 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1299                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1300 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1301                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1302 
1303 // ------------------------------------------------------------------------
1304 // Routines for Extended types: long double, _Quad, complex flavours (use
1305 // critical section)
1306 //     TYPE_ID, OP_ID, TYPE - detailed above
1307 //     OP      - operator
1308 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1309 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1310   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1311   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1312   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1313   }
1314 
1315 /* ------------------------------------------------------------------------- */
1316 // routines for long double type
1317 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1318                 1) // __kmpc_atomic_float10_add
1319 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1320                 1) // __kmpc_atomic_float10_sub
1321 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1322                 1) // __kmpc_atomic_float10_mul
1323 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1324                 1) // __kmpc_atomic_float10_div
1325 #if KMP_HAVE_QUAD
1326 // routines for _Quad type
1327 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1328                 1) // __kmpc_atomic_float16_add
1329 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1330                 1) // __kmpc_atomic_float16_sub
1331 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1332                 1) // __kmpc_atomic_float16_mul
1333 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1334                 1) // __kmpc_atomic_float16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1337                 1) // __kmpc_atomic_float16_add_a16
1338 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1339                 1) // __kmpc_atomic_float16_sub_a16
1340 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1341                 1) // __kmpc_atomic_float16_mul_a16
1342 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1343                 1) // __kmpc_atomic_float16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1346 // routines for complex types
1347 
1348 #if USE_CMPXCHG_FIX
1349 // workaround for C78287 (complex(kind=4) data type)
1350 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1351                           1) // __kmpc_atomic_cmplx4_add
1352 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1353                           1) // __kmpc_atomic_cmplx4_sub
1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1355                           1) // __kmpc_atomic_cmplx4_mul
1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1357                           1) // __kmpc_atomic_cmplx4_div
1358 // end of the workaround for C78287
1359 #else
1360 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1361 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1362 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1363 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1364 #endif // USE_CMPXCHG_FIX
1365 
1366 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1367 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1368 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1369 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1370 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1371                 1) // __kmpc_atomic_cmplx10_add
1372 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1373                 1) // __kmpc_atomic_cmplx10_sub
1374 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1375                 1) // __kmpc_atomic_cmplx10_mul
1376 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1377                 1) // __kmpc_atomic_cmplx10_div
1378 #if KMP_HAVE_QUAD
1379 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1380                 1) // __kmpc_atomic_cmplx16_add
1381 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1382                 1) // __kmpc_atomic_cmplx16_sub
1383 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1384                 1) // __kmpc_atomic_cmplx16_mul
1385 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1386                 1) // __kmpc_atomic_cmplx16_div
1387 #if (KMP_ARCH_X86)
1388 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1389                 1) // __kmpc_atomic_cmplx16_add_a16
1390 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1391                 1) // __kmpc_atomic_cmplx16_sub_a16
1392 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1393                 1) // __kmpc_atomic_cmplx16_mul_a16
1394 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1395                 1) // __kmpc_atomic_cmplx16_div_a16
1396 #endif // (KMP_ARCH_X86)
1397 #endif // KMP_HAVE_QUAD
1398 
1399 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1400 // Supported only on IA-32 architecture and Intel(R) 64
1401 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1402 
1403 // ------------------------------------------------------------------------
1404 // Operation on *lhs, rhs bound by critical section
1405 //     OP     - operator (it's supposed to contain an assignment)
1406 //     LCK_ID - lock identifier
1407 // Note: don't check gtid as it should always be valid
1408 // 1, 2-byte - expect valid parameter, other - check before this macro
1409 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1410   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1411                                                                                \
1412   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1413                                                                                \
1414   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1415 
1416 #ifdef KMP_GOMP_COMPAT
1417 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1418   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1419     KMP_CHECK_GTID;                                                            \
1420     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1421     return;                                                                    \
1422   }
1423 
1424 #else
1425 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1426 #endif /* KMP_GOMP_COMPAT */
1427 
1428 // Beginning of a definition (provides name, parameters, gebug trace)
1429 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1430 //     fixed)
1431 //     OP_ID   - operation identifier (add, sub, mul, ...)
1432 //     TYPE    - operands' type
1433 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1434   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1435                                                    TYPE *lhs, TYPE rhs) {      \
1436     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1437     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1438 
1439 // ------------------------------------------------------------------------
1440 // Operation on *lhs, rhs using "compare_and_store" routine
1441 //     TYPE    - operands' type
1442 //     BITS    - size in bits, used to distinguish low level calls
1443 //     OP      - operator
1444 // Note: temp_val introduced in order to force the compiler to read
1445 //       *lhs only once (w/o it the compiler reads *lhs twice)
1446 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1447   {                                                                            \
1448     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1449     TYPE old_value, new_value;                                                 \
1450     temp_val = *lhs;                                                           \
1451     old_value = temp_val;                                                      \
1452     new_value = (TYPE)(rhs OP old_value);                                      \
1453     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1454         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1455         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1456       KMP_DO_PAUSE;                                                            \
1457                                                                                \
1458       temp_val = *lhs;                                                         \
1459       old_value = temp_val;                                                    \
1460       new_value = (TYPE)(rhs OP old_value);                                    \
1461     }                                                                          \
1462   }
1463 
1464 // -------------------------------------------------------------------------
1465 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1466   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1467   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1468   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1469   }
1470 
1471 // ------------------------------------------------------------------------
1472 // Entries definition for integer operands
1473 //     TYPE_ID - operands type and size (fixed4, float4)
1474 //     OP_ID   - operation identifier (add, sub, mul, ...)
1475 //     TYPE    - operand type
1476 //     BITS    - size in bits, used to distinguish low level calls
1477 //     OP      - operator (used in critical section)
1478 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1479 
1480 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1481 // ------------------------------------------------------------------------
1482 // Routines for ATOMIC integer operands, other operators
1483 // ------------------------------------------------------------------------
1484 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1485 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1486                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1487 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1488                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1489 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1490                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1491 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1492                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1493 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1494                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1495 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1497 
1498 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1499                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1500 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1501                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1502 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1503                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1504 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1505                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1506 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1507                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1508 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1509                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1510 
1511 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1512                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1513 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1514                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1515 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1516                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1517 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1518                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1519 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1520                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1521 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1522                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1523 
1524 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1525                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1526 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1527                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1528 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1529                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1530 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1531                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1532 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1533                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1534 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1535                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1536 
1537 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1538                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1539 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1540                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1541 
1542 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1543                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1544 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1545                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1546 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1547 
1548 // ------------------------------------------------------------------------
1549 // Routines for Extended types: long double, _Quad, complex flavours (use
1550 // critical section)
1551 //     TYPE_ID, OP_ID, TYPE - detailed above
1552 //     OP      - operator
1553 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1554 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1555   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1556   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1557   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1558   }
1559 
1560 /* ------------------------------------------------------------------------- */
1561 // routines for long double type
1562 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1563                     1) // __kmpc_atomic_float10_sub_rev
1564 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1565                     1) // __kmpc_atomic_float10_div_rev
1566 #if KMP_HAVE_QUAD
1567 // routines for _Quad type
1568 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1569                     1) // __kmpc_atomic_float16_sub_rev
1570 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1571                     1) // __kmpc_atomic_float16_div_rev
1572 #if (KMP_ARCH_X86)
1573 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1574                     1) // __kmpc_atomic_float16_sub_a16_rev
1575 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1576                     1) // __kmpc_atomic_float16_div_a16_rev
1577 #endif // KMP_ARCH_X86
1578 #endif // KMP_HAVE_QUAD
1579 
1580 // routines for complex types
1581 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1582                     1) // __kmpc_atomic_cmplx4_sub_rev
1583 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1584                     1) // __kmpc_atomic_cmplx4_div_rev
1585 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1586                     1) // __kmpc_atomic_cmplx8_sub_rev
1587 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1588                     1) // __kmpc_atomic_cmplx8_div_rev
1589 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1590                     1) // __kmpc_atomic_cmplx10_sub_rev
1591 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1592                     1) // __kmpc_atomic_cmplx10_div_rev
1593 #if KMP_HAVE_QUAD
1594 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1595                     1) // __kmpc_atomic_cmplx16_sub_rev
1596 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1597                     1) // __kmpc_atomic_cmplx16_div_rev
1598 #if (KMP_ARCH_X86)
1599 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1600                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1601 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1602                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1603 #endif // KMP_ARCH_X86
1604 #endif // KMP_HAVE_QUAD
1605 
1606 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1607 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1608 
1609 /* ------------------------------------------------------------------------ */
1610 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1611 /* Note: in order to reduce the total number of types combinations          */
1612 /*       it is supposed that compiler converts RHS to longest floating type,*/
1613 /*       that is _Quad, before call to any of these routines                */
1614 /* Conversion to _Quad will be done by the compiler during calculation,     */
1615 /*    conversion back to TYPE - before the assignment, like:                */
1616 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1617 /* Performance penalty expected because of SW emulation use                 */
1618 /* ------------------------------------------------------------------------ */
1619 
1620 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1621   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1622       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1623     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1624     KA_TRACE(100,                                                              \
1625              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1626               gtid));
1627 
1628 // -------------------------------------------------------------------------
1629 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1630                            GOMP_FLAG)                                          \
1631   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1632   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1633   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1634   }
1635 
1636 // -------------------------------------------------------------------------
1637 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1638 // -------------------------------------------------------------------------
1639 // X86 or X86_64: no alignment problems ====================================
1640 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1641                            LCK_ID, MASK, GOMP_FLAG)                            \
1642   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1643   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1644   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1645   }
1646 // -------------------------------------------------------------------------
1647 #else
1648 // ------------------------------------------------------------------------
1649 // Code for other architectures that don't handle unaligned accesses.
1650 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1651                            LCK_ID, MASK, GOMP_FLAG)                            \
1652   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1653   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1654   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1655     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1656   } else {                                                                     \
1657     KMP_CHECK_GTID;                                                            \
1658     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1659                        LCK_ID) /* unaligned address - use critical */          \
1660   }                                                                            \
1661   }
1662 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1663 
1664 // -------------------------------------------------------------------------
1665 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1666 // -------------------------------------------------------------------------
1667 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1668                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1669   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1670   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1671   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1672   }
1673 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1674                                LCK_ID, GOMP_FLAG)                              \
1675   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1676   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1677   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1678   }
1679 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1680 
1681 // RHS=float8
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1683                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1684 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1685                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1687                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1688 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1689                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1691                    0) // __kmpc_atomic_fixed4_mul_float8
1692 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1693                    0) // __kmpc_atomic_fixed4_div_float8
1694 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1696 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1697                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1698 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1699                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1700 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1701                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1703                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1705                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1706 
1707 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1708 // use them)
1709 #if KMP_HAVE_QUAD
1710 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1711                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1712 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1713                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1715                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1717                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1719                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1726 
1727 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1728                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1729 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1730                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1732                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1734                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1736                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1743 
1744 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1745                    0) // __kmpc_atomic_fixed4_add_fp
1746 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1747                    0) // __kmpc_atomic_fixed4u_add_fp
1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1749                    0) // __kmpc_atomic_fixed4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1751                    0) // __kmpc_atomic_fixed4u_sub_fp
1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1753                    0) // __kmpc_atomic_fixed4_mul_fp
1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1755                    0) // __kmpc_atomic_fixed4u_mul_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1757                    0) // __kmpc_atomic_fixed4_div_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1759                    0) // __kmpc_atomic_fixed4u_div_fp
1760 
1761 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1762                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1763 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1764                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1766                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1768                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1770                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1772                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1774                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1777 
1778 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1779                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1780 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1781                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1783                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1785                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1786 
1787 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1788                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1789 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1790                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1792                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1794                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1795 
1796 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1797                    1) // __kmpc_atomic_float10_add_fp
1798 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1799                    1) // __kmpc_atomic_float10_sub_fp
1800 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1801                    1) // __kmpc_atomic_float10_mul_fp
1802 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1803                    1) // __kmpc_atomic_float10_div_fp
1804 
1805 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1806 // Reverse operations
1807 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1808                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1809 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1810                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1812                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1814                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1815 
1816 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1817                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1819                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1821                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1823                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1824 
1825 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1826                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1827 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1828                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1830                        0) // __kmpc_atomic_fixed4_div_rev_fp
1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1832                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1833 
1834 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1835                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1836 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1837                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1839                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1841                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1842 
1843 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1844                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1845 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1846                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1847 
1848 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1849                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1850 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1851                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1852 
1853 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1854                        1) // __kmpc_atomic_float10_sub_rev_fp
1855 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1856                        1) // __kmpc_atomic_float10_div_rev_fp
1857 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1858 
1859 #endif // KMP_HAVE_QUAD
1860 
1861 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1862 // ------------------------------------------------------------------------
1863 // X86 or X86_64: no alignment problems ====================================
1864 #if USE_CMPXCHG_FIX
1865 // workaround for C78287 (complex(kind=4) data type)
1866 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1867                              LCK_ID, MASK, GOMP_FLAG)                          \
1868   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1869   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1870   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1871   }
1872 // end of the second part of the workaround for C78287
1873 #else
1874 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1875                              LCK_ID, MASK, GOMP_FLAG)                          \
1876   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1877   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1878   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1879   }
1880 #endif // USE_CMPXCHG_FIX
1881 #else
1882 // ------------------------------------------------------------------------
1883 // Code for other architectures that don't handle unaligned accesses.
1884 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1885                              LCK_ID, MASK, GOMP_FLAG)                          \
1886   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1887   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1888   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1889     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1890   } else {                                                                     \
1891     KMP_CHECK_GTID;                                                            \
1892     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1893                        LCK_ID) /* unaligned address - use critical */          \
1894   }                                                                            \
1895   }
1896 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1897 
1898 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1899                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1900 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1901                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1903                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1905                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1906 
1907 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1908 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1909 
1910 // ------------------------------------------------------------------------
1911 // Atomic READ routines
1912 
1913 // ------------------------------------------------------------------------
1914 // Beginning of a definition (provides name, parameters, gebug trace)
1915 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1916 //     fixed)
1917 //     OP_ID   - operation identifier (add, sub, mul, ...)
1918 //     TYPE    - operands' type
1919 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1920   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1921                                              TYPE *loc) {                      \
1922     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1923     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1924 
1925 // ------------------------------------------------------------------------
1926 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1927 //     TYPE    - operands' type
1928 //     BITS    - size in bits, used to distinguish low level calls
1929 //     OP      - operator
1930 // Note: temp_val introduced in order to force the compiler to read
1931 //       *lhs only once (w/o it the compiler reads *lhs twice)
1932 // TODO: check if it is still necessary
1933 // Return old value regardless of the result of "compare & swap# operation
1934 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1935   {                                                                            \
1936     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1937     union f_i_union {                                                          \
1938       TYPE f_val;                                                              \
1939       kmp_int##BITS i_val;                                                     \
1940     };                                                                         \
1941     union f_i_union old_value;                                                 \
1942     temp_val = *loc;                                                           \
1943     old_value.f_val = temp_val;                                                \
1944     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1945         (kmp_int##BITS *)loc,                                                  \
1946         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1947         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1948     new_value = old_value.f_val;                                               \
1949     return new_value;                                                          \
1950   }
1951 
1952 // -------------------------------------------------------------------------
1953 // Operation on *lhs, rhs bound by critical section
1954 //     OP     - operator (it's supposed to contain an assignment)
1955 //     LCK_ID - lock identifier
1956 // Note: don't check gtid as it should always be valid
1957 // 1, 2-byte - expect valid parameter, other - check before this macro
1958 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1959   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1960                                                                                \
1961   new_value = (*loc);                                                          \
1962                                                                                \
1963   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1964 
1965 // -------------------------------------------------------------------------
1966 #ifdef KMP_GOMP_COMPAT
1967 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1968   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1969     KMP_CHECK_GTID;                                                            \
1970     OP_CRITICAL_READ(OP, 0);                                                   \
1971     return new_value;                                                          \
1972   }
1973 #else
1974 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1975 #endif /* KMP_GOMP_COMPAT */
1976 
1977 // -------------------------------------------------------------------------
1978 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1979   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1980   TYPE new_value;                                                              \
1981   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1982   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1983   return new_value;                                                            \
1984   }
1985 // -------------------------------------------------------------------------
1986 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1987   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1988   TYPE new_value;                                                              \
1989   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1990   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1991   }
1992 // ------------------------------------------------------------------------
1993 // Routines for Extended types: long double, _Quad, complex flavours (use
1994 // critical section)
1995 //     TYPE_ID, OP_ID, TYPE - detailed above
1996 //     OP      - operator
1997 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1998 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1999   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
2000   TYPE new_value;                                                              \
2001   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
2002   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
2003   return new_value;                                                            \
2004   }
2005 
2006 // ------------------------------------------------------------------------
2007 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2008 // value doesn't work.
2009 // Let's return the read value through the additional parameter.
2010 #if (KMP_OS_WINDOWS)
2011 
2012 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
2013   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2014                                                                                \
2015   (*out) = (*loc);                                                             \
2016                                                                                \
2017   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2018 // ------------------------------------------------------------------------
2019 #ifdef KMP_GOMP_COMPAT
2020 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
2021   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2022     KMP_CHECK_GTID;                                                            \
2023     OP_CRITICAL_READ_WRK(OP, 0);                                               \
2024   }
2025 #else
2026 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2027 #endif /* KMP_GOMP_COMPAT */
2028 // ------------------------------------------------------------------------
2029 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
2030   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2031                                          TYPE *loc) {                          \
2032     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2033     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2034 
2035 // ------------------------------------------------------------------------
2036 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2037   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2038   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2039   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2040   }
2041 
2042 #endif // KMP_OS_WINDOWS
2043 
2044 // ------------------------------------------------------------------------
2045 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2046 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2047 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2048                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2049 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2050                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2051 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2052                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2053 
2054 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2055 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2056                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2057 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2058                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2059 
2060 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2061                      1) // __kmpc_atomic_float10_rd
2062 #if KMP_HAVE_QUAD
2063 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2064                      1) // __kmpc_atomic_float16_rd
2065 #endif // KMP_HAVE_QUAD
2066 
2067 // Fix for CQ220361 on Windows* OS
2068 #if (KMP_OS_WINDOWS)
2069 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2070                          1) // __kmpc_atomic_cmplx4_rd
2071 #else
2072 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2073                      1) // __kmpc_atomic_cmplx4_rd
2074 #endif // (KMP_OS_WINDOWS)
2075 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2076                      1) // __kmpc_atomic_cmplx8_rd
2077 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2078                      1) // __kmpc_atomic_cmplx10_rd
2079 #if KMP_HAVE_QUAD
2080 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2081                      1) // __kmpc_atomic_cmplx16_rd
2082 #if (KMP_ARCH_X86)
2083 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2084                      1) // __kmpc_atomic_float16_a16_rd
2085 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2086                      1) // __kmpc_atomic_cmplx16_a16_rd
2087 #endif // (KMP_ARCH_X86)
2088 #endif // KMP_HAVE_QUAD
2089 
2090 // ------------------------------------------------------------------------
2091 // Atomic WRITE routines
2092 
2093 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2094   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2095   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2096   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2097   }
2098 // ------------------------------------------------------------------------
2099 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2100   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2101   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2102   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2103   }
2104 
2105 // ------------------------------------------------------------------------
2106 // Operation on *lhs, rhs using "compare_and_store" routine
2107 //     TYPE    - operands' type
2108 //     BITS    - size in bits, used to distinguish low level calls
2109 //     OP      - operator
2110 // Note: temp_val introduced in order to force the compiler to read
2111 //       *lhs only once (w/o it the compiler reads *lhs twice)
2112 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2113   {                                                                            \
2114     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2115     TYPE old_value, new_value;                                                 \
2116     temp_val = *lhs;                                                           \
2117     old_value = temp_val;                                                      \
2118     new_value = rhs;                                                           \
2119     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2120         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2121         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2122       temp_val = *lhs;                                                         \
2123       old_value = temp_val;                                                    \
2124       new_value = rhs;                                                         \
2125     }                                                                          \
2126   }
2127 
2128 // -------------------------------------------------------------------------
2129 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2130   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2131   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2132   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2133   }
2134 
2135 // ------------------------------------------------------------------------
2136 // Routines for Extended types: long double, _Quad, complex flavours (use
2137 // critical section)
2138 //     TYPE_ID, OP_ID, TYPE - detailed above
2139 //     OP      - operator
2140 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2141 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2142   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2143   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2144   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2145   }
2146 // -------------------------------------------------------------------------
2147 
2148 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2149                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2150 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2151                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2152 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2153                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2154 #if (KMP_ARCH_X86)
2155 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2156                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2157 #else
2158 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2159                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2160 #endif // (KMP_ARCH_X86)
2161 
2162 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2163                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2164 #if (KMP_ARCH_X86)
2165 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2166                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2167 #else
2168 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2169                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2170 #endif // (KMP_ARCH_X86)
2171 
2172 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2173                    1) // __kmpc_atomic_float10_wr
2174 #if KMP_HAVE_QUAD
2175 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2176                    1) // __kmpc_atomic_float16_wr
2177 #endif // KMP_HAVE_QUAD
2178 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2179 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2180                    1) // __kmpc_atomic_cmplx8_wr
2181 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2182                    1) // __kmpc_atomic_cmplx10_wr
2183 #if KMP_HAVE_QUAD
2184 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2185                    1) // __kmpc_atomic_cmplx16_wr
2186 #if (KMP_ARCH_X86)
2187 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2188                    1) // __kmpc_atomic_float16_a16_wr
2189 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2190                    1) // __kmpc_atomic_cmplx16_a16_wr
2191 #endif // (KMP_ARCH_X86)
2192 #endif // KMP_HAVE_QUAD
2193 
2194 // ------------------------------------------------------------------------
2195 // Atomic CAPTURE routines
2196 
2197 // Beginning of a definition (provides name, parameters, gebug trace)
2198 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2199 //     fixed)
2200 //     OP_ID   - operation identifier (add, sub, mul, ...)
2201 //     TYPE    - operands' type
2202 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2203   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2204                                              TYPE *lhs, TYPE rhs, int flag) {  \
2205     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2206     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2207 
2208 // -------------------------------------------------------------------------
2209 // Operation on *lhs, rhs bound by critical section
2210 //     OP     - operator (it's supposed to contain an assignment)
2211 //     LCK_ID - lock identifier
2212 // Note: don't check gtid as it should always be valid
2213 // 1, 2-byte - expect valid parameter, other - check before this macro
2214 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2215   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2216                                                                                \
2217   if (flag) {                                                                  \
2218     (*lhs) OP rhs;                                                             \
2219     new_value = (*lhs);                                                        \
2220   } else {                                                                     \
2221     new_value = (*lhs);                                                        \
2222     (*lhs) OP rhs;                                                             \
2223   }                                                                            \
2224                                                                                \
2225   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2226   return new_value;
2227 
2228 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2229   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2230                                                                                \
2231   if (flag) {                                                                  \
2232     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2233     new_value = (*lhs);                                                        \
2234   } else {                                                                     \
2235     new_value = (*lhs);                                                        \
2236     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2237   }                                                                            \
2238                                                                                \
2239   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2240   return new_value;
2241 
2242 // ------------------------------------------------------------------------
2243 #ifdef KMP_GOMP_COMPAT
2244 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2245   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2246     KMP_CHECK_GTID;                                                            \
2247     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2248   }
2249 #else
2250 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2251 #endif /* KMP_GOMP_COMPAT */
2252 
2253 // ------------------------------------------------------------------------
2254 // Operation on *lhs, rhs using "compare_and_store" routine
2255 //     TYPE    - operands' type
2256 //     BITS    - size in bits, used to distinguish low level calls
2257 //     OP      - operator
2258 // Note: temp_val introduced in order to force the compiler to read
2259 //       *lhs only once (w/o it the compiler reads *lhs twice)
2260 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2261   {                                                                            \
2262     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2263     TYPE old_value, new_value;                                                 \
2264     temp_val = *lhs;                                                           \
2265     old_value = temp_val;                                                      \
2266     new_value = (TYPE)(old_value OP rhs);                                      \
2267     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2268         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2269         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2270       temp_val = *lhs;                                                         \
2271       old_value = temp_val;                                                    \
2272       new_value = (TYPE)(old_value OP rhs);                                    \
2273     }                                                                          \
2274     if (flag) {                                                                \
2275       return new_value;                                                        \
2276     } else                                                                     \
2277       return old_value;                                                        \
2278   }
2279 
2280 // -------------------------------------------------------------------------
2281 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2282   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2283   TYPE new_value;                                                              \
2284   (void)new_value;                                                             \
2285   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2286   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2287   }
2288 
2289 // -------------------------------------------------------------------------
2290 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2291   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2292   TYPE old_value, new_value;                                                   \
2293   (void)new_value;                                                             \
2294   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2295   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2296   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2297   if (flag) {                                                                  \
2298     return old_value OP rhs;                                                   \
2299   } else                                                                       \
2300     return old_value;                                                          \
2301   }
2302 // -------------------------------------------------------------------------
2303 
2304 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2305                      0) // __kmpc_atomic_fixed4_add_cpt
2306 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2307                      0) // __kmpc_atomic_fixed4_sub_cpt
2308 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2309                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2310 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2311                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2312 
2313 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2314                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2315 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2316                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2317 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2318                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2319 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2320                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2321 
2322 // ------------------------------------------------------------------------
2323 // Entries definition for integer operands
2324 //     TYPE_ID - operands type and size (fixed4, float4)
2325 //     OP_ID   - operation identifier (add, sub, mul, ...)
2326 //     TYPE    - operand type
2327 //     BITS    - size in bits, used to distinguish low level calls
2328 //     OP      - operator (used in critical section)
2329 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2330 // ------------------------------------------------------------------------
2331 // Routines for ATOMIC integer operands, other operators
2332 // ------------------------------------------------------------------------
2333 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2334 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2335                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2337                    0) // __kmpc_atomic_fixed1_andb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2339                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2341                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2343                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2345                    0) // __kmpc_atomic_fixed1_orb_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2347                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2349                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2355                    0) // __kmpc_atomic_fixed1_xor_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2357                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2359                    0) // __kmpc_atomic_fixed2_andb_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2361                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2363                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2365                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2367                    0) // __kmpc_atomic_fixed2_orb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2369                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2371                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2373                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2375                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2377                    0) // __kmpc_atomic_fixed2_xor_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2379                    0) // __kmpc_atomic_fixed4_andb_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2381                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2383                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2385                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2387                    0) // __kmpc_atomic_fixed4_orb_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2389                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2391                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2393                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2395                    0) // __kmpc_atomic_fixed4_xor_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2397                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2399                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2401                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2403                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2405                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2407                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2409                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2411                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2413                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2414 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2415                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2416 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2417                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2418 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2419                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2420 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2421                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2422 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2423 
2424 // CAPTURE routines for mixed types RHS=float16
2425 #if KMP_HAVE_QUAD
2426 
2427 // Beginning of a definition (provides name, parameters, gebug trace)
2428 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2429 //     fixed)
2430 //     OP_ID   - operation identifier (add, sub, mul, ...)
2431 //     TYPE    - operands' type
2432 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2433   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2434       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2435     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2436     KA_TRACE(100,                                                              \
2437              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2438               gtid));
2439 
2440 // -------------------------------------------------------------------------
2441 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2442                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2443   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2444   TYPE new_value;                                                              \
2445   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2446   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2447   }
2448 
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2451                                 LCK_ID, GOMP_FLAG)                             \
2452   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2453   TYPE new_value;                                                              \
2454   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2455   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2456   }
2457 
2458 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2459                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2460 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2461                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2463                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2465                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2467                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2469                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2471                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2473                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2474 
2475 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2476                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2478                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2480                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2482                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2484                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2486                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2488                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2490                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2491 
2492 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2493                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2494 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2495                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2497                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2499                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2501                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2503                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2505                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2507                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2508 
2509 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2510                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2512                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2514                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2516                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2518                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2520                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2522                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2524                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2525 
2526 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2527                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2528 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2529                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2531                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2533                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2534 
2535 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2536                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2537 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2538                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2540                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2542                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2543 
2544 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2545                         1) // __kmpc_atomic_float10_add_cpt_fp
2546 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2547                         1) // __kmpc_atomic_float10_sub_cpt_fp
2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2549                         1) // __kmpc_atomic_float10_mul_cpt_fp
2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2551                         1) // __kmpc_atomic_float10_div_cpt_fp
2552 
2553 #endif // KMP_HAVE_QUAD
2554 
2555 // ------------------------------------------------------------------------
2556 // Routines for C/C++ Reduction operators && and ||
2557 
2558 // -------------------------------------------------------------------------
2559 // Operation on *lhs, rhs bound by critical section
2560 //     OP     - operator (it's supposed to contain an assignment)
2561 //     LCK_ID - lock identifier
2562 // Note: don't check gtid as it should always be valid
2563 // 1, 2-byte - expect valid parameter, other - check before this macro
2564 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2565   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2566                                                                                \
2567   if (flag) {                                                                  \
2568     new_value OP rhs;                                                          \
2569     (*lhs) = new_value;                                                        \
2570   } else {                                                                     \
2571     new_value = (*lhs);                                                        \
2572     (*lhs) OP rhs;                                                             \
2573   }                                                                            \
2574                                                                                \
2575   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2576 
2577 // ------------------------------------------------------------------------
2578 #ifdef KMP_GOMP_COMPAT
2579 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2580   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2581     KMP_CHECK_GTID;                                                            \
2582     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2583     return new_value;                                                          \
2584   }
2585 #else
2586 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2587 #endif /* KMP_GOMP_COMPAT */
2588 
2589 // ------------------------------------------------------------------------
2590 // Need separate macros for &&, || because there is no combined assignment
2591 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2592   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2593   TYPE new_value;                                                              \
2594   (void)new_value;                                                             \
2595   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2596   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2597   }
2598 
2599 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2600                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2601 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2602                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2603 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2604                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2605 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2606                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2607 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2608                   0) // __kmpc_atomic_fixed4_andl_cpt
2609 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2610                   0) // __kmpc_atomic_fixed4_orl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2612                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2614                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2615 
2616 // -------------------------------------------------------------------------
2617 // Routines for Fortran operators that matched no one in C:
2618 // MAX, MIN, .EQV., .NEQV.
2619 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2620 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2621 
2622 // -------------------------------------------------------------------------
2623 // MIN and MAX need separate macros
2624 // OP - operator to check if we need any actions?
2625 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2626   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2627                                                                                \
2628   if (*lhs OP rhs) { /* still need actions? */                                 \
2629     old_value = *lhs;                                                          \
2630     *lhs = rhs;                                                                \
2631     if (flag)                                                                  \
2632       new_value = rhs;                                                         \
2633     else                                                                       \
2634       new_value = old_value;                                                   \
2635   } else {                                                                     \
2636     new_value = *lhs;                                                          \
2637   }                                                                            \
2638   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2639   return new_value;
2640 
2641 // -------------------------------------------------------------------------
2642 #ifdef KMP_GOMP_COMPAT
2643 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2644   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2645     KMP_CHECK_GTID;                                                            \
2646     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2647   }
2648 #else
2649 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2650 #endif /* KMP_GOMP_COMPAT */
2651 
2652 // -------------------------------------------------------------------------
2653 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2654   {                                                                            \
2655     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2656     /*TYPE old_value; */                                                       \
2657     temp_val = *lhs;                                                           \
2658     old_value = temp_val;                                                      \
2659     while (old_value OP rhs && /* still need actions? */                       \
2660            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2661                (kmp_int##BITS *)lhs,                                           \
2662                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2663                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2664       temp_val = *lhs;                                                         \
2665       old_value = temp_val;                                                    \
2666     }                                                                          \
2667     if (flag)                                                                  \
2668       return rhs;                                                              \
2669     else                                                                       \
2670       return old_value;                                                        \
2671   }
2672 
2673 // -------------------------------------------------------------------------
2674 // 1-byte, 2-byte operands - use critical section
2675 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2676   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2677   TYPE new_value, old_value;                                                   \
2678   if (*lhs OP rhs) { /* need actions? */                                       \
2679     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2680     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2681   }                                                                            \
2682   return *lhs;                                                                 \
2683   }
2684 
2685 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2686   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2687   TYPE new_value, old_value;                                                   \
2688   (void)new_value;                                                             \
2689   if (*lhs OP rhs) {                                                           \
2690     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2691     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2692   }                                                                            \
2693   return *lhs;                                                                 \
2694   }
2695 
2696 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2697                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2698 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2699                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2700 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2701                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2702 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2703                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2704 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2705                      0) // __kmpc_atomic_fixed4_max_cpt
2706 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2707                      0) // __kmpc_atomic_fixed4_min_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2709                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2711                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2713                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2715                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2717                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2719                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2720 #if KMP_HAVE_QUAD
2721 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2722                      1) // __kmpc_atomic_float16_max_cpt
2723 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2724                      1) // __kmpc_atomic_float16_min_cpt
2725 #if (KMP_ARCH_X86)
2726 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2727                      1) // __kmpc_atomic_float16_max_a16_cpt
2728 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2729                      1) // __kmpc_atomic_float16_mix_a16_cpt
2730 #endif // (KMP_ARCH_X86)
2731 #endif // KMP_HAVE_QUAD
2732 
2733 // ------------------------------------------------------------------------
2734 #ifdef KMP_GOMP_COMPAT
2735 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2736   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2737     KMP_CHECK_GTID;                                                            \
2738     OP_CRITICAL_CPT(OP, 0);                                                    \
2739   }
2740 #else
2741 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2742 #endif /* KMP_GOMP_COMPAT */
2743 // ------------------------------------------------------------------------
2744 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2745   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2746   TYPE new_value;                                                              \
2747   (void)new_value;                                                             \
2748   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2749   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2750   }
2751 
2752 // ------------------------------------------------------------------------
2753 
2754 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2755                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2756 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2757                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2758 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2759                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2760 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2761                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2762 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2763                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2764 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2765                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2766 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2767                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2768 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2769                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2770 
2771 // ------------------------------------------------------------------------
2772 // Routines for Extended types: long double, _Quad, complex flavours (use
2773 // critical section)
2774 //     TYPE_ID, OP_ID, TYPE - detailed above
2775 //     OP      - operator
2776 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2777 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2778   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2779   TYPE new_value;                                                              \
2780   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2781   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2782   }
2783 
2784 // ------------------------------------------------------------------------
2785 // Workaround for cmplx4. Regular routines with return value don't work
2786 // on Win_32e. Let's return captured values through the additional parameter.
2787 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2788   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2789                                                                                \
2790   if (flag) {                                                                  \
2791     (*lhs) OP rhs;                                                             \
2792     (*out) = (*lhs);                                                           \
2793   } else {                                                                     \
2794     (*out) = (*lhs);                                                           \
2795     (*lhs) OP rhs;                                                             \
2796   }                                                                            \
2797                                                                                \
2798   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2799   return;
2800 // ------------------------------------------------------------------------
2801 
2802 #ifdef KMP_GOMP_COMPAT
2803 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2804   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2805     KMP_CHECK_GTID;                                                            \
2806     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2807   }
2808 #else
2809 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2810 #endif /* KMP_GOMP_COMPAT */
2811 // ------------------------------------------------------------------------
2812 
2813 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2814   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2815                                          TYPE rhs, TYPE *out, int flag) {      \
2816     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2817     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2818 // ------------------------------------------------------------------------
2819 
2820 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2821   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2822   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2823   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2824   }
2825 // The end of workaround for cmplx4
2826 
2827 /* ------------------------------------------------------------------------- */
2828 // routines for long double type
2829 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2830                     1) // __kmpc_atomic_float10_add_cpt
2831 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2832                     1) // __kmpc_atomic_float10_sub_cpt
2833 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2834                     1) // __kmpc_atomic_float10_mul_cpt
2835 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2836                     1) // __kmpc_atomic_float10_div_cpt
2837 #if KMP_HAVE_QUAD
2838 // routines for _Quad type
2839 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2840                     1) // __kmpc_atomic_float16_add_cpt
2841 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2842                     1) // __kmpc_atomic_float16_sub_cpt
2843 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2844                     1) // __kmpc_atomic_float16_mul_cpt
2845 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2846                     1) // __kmpc_atomic_float16_div_cpt
2847 #if (KMP_ARCH_X86)
2848 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2849                     1) // __kmpc_atomic_float16_add_a16_cpt
2850 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2851                     1) // __kmpc_atomic_float16_sub_a16_cpt
2852 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2853                     1) // __kmpc_atomic_float16_mul_a16_cpt
2854 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2855                     1) // __kmpc_atomic_float16_div_a16_cpt
2856 #endif // (KMP_ARCH_X86)
2857 #endif // KMP_HAVE_QUAD
2858 
2859 // routines for complex types
2860 
2861 // cmplx4 routines to return void
2862 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2863                         1) // __kmpc_atomic_cmplx4_add_cpt
2864 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2865                         1) // __kmpc_atomic_cmplx4_sub_cpt
2866 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2867                         1) // __kmpc_atomic_cmplx4_mul_cpt
2868 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2869                         1) // __kmpc_atomic_cmplx4_div_cpt
2870 
2871 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2872                     1) // __kmpc_atomic_cmplx8_add_cpt
2873 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2874                     1) // __kmpc_atomic_cmplx8_sub_cpt
2875 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2876                     1) // __kmpc_atomic_cmplx8_mul_cpt
2877 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2878                     1) // __kmpc_atomic_cmplx8_div_cpt
2879 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2880                     1) // __kmpc_atomic_cmplx10_add_cpt
2881 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2882                     1) // __kmpc_atomic_cmplx10_sub_cpt
2883 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2884                     1) // __kmpc_atomic_cmplx10_mul_cpt
2885 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2886                     1) // __kmpc_atomic_cmplx10_div_cpt
2887 #if KMP_HAVE_QUAD
2888 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2889                     1) // __kmpc_atomic_cmplx16_add_cpt
2890 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2891                     1) // __kmpc_atomic_cmplx16_sub_cpt
2892 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2893                     1) // __kmpc_atomic_cmplx16_mul_cpt
2894 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2895                     1) // __kmpc_atomic_cmplx16_div_cpt
2896 #if (KMP_ARCH_X86)
2897 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2898                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2899 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2900                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2901 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2902                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2903 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2904                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2905 #endif // (KMP_ARCH_X86)
2906 #endif // KMP_HAVE_QUAD
2907 
2908 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2909 // binop x; v = x; }  for non-commutative operations.
2910 // Supported only on IA-32 architecture and Intel(R) 64
2911 
2912 // -------------------------------------------------------------------------
2913 // Operation on *lhs, rhs bound by critical section
2914 //     OP     - operator (it's supposed to contain an assignment)
2915 //     LCK_ID - lock identifier
2916 // Note: don't check gtid as it should always be valid
2917 // 1, 2-byte - expect valid parameter, other - check before this macro
2918 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2919   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2920                                                                                \
2921   if (flag) {                                                                  \
2922     /*temp_val = (*lhs);*/                                                     \
2923     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2924     new_value = (*lhs);                                                        \
2925   } else {                                                                     \
2926     new_value = (*lhs);                                                        \
2927     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2928   }                                                                            \
2929   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2930   return new_value;
2931 
2932 // ------------------------------------------------------------------------
2933 #ifdef KMP_GOMP_COMPAT
2934 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2935   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2936     KMP_CHECK_GTID;                                                            \
2937     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2938   }
2939 #else
2940 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2941 #endif /* KMP_GOMP_COMPAT */
2942 
2943 // ------------------------------------------------------------------------
2944 // Operation on *lhs, rhs using "compare_and_store" routine
2945 //     TYPE    - operands' type
2946 //     BITS    - size in bits, used to distinguish low level calls
2947 //     OP      - operator
2948 // Note: temp_val introduced in order to force the compiler to read
2949 //       *lhs only once (w/o it the compiler reads *lhs twice)
2950 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2951   {                                                                            \
2952     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2953     TYPE old_value, new_value;                                                 \
2954     temp_val = *lhs;                                                           \
2955     old_value = temp_val;                                                      \
2956     new_value = (TYPE)(rhs OP old_value);                                      \
2957     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2958         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2959         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2960       temp_val = *lhs;                                                         \
2961       old_value = temp_val;                                                    \
2962       new_value = (TYPE)(rhs OP old_value);                                    \
2963     }                                                                          \
2964     if (flag) {                                                                \
2965       return new_value;                                                        \
2966     } else                                                                     \
2967       return old_value;                                                        \
2968   }
2969 
2970 // -------------------------------------------------------------------------
2971 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2972   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2973   TYPE new_value;                                                              \
2974   (void)new_value;                                                             \
2975   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2976   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2977   }
2978 
2979 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2980                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2981 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2982                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2983 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2984                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2985 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2986                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2987 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2988                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2990                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2992                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2994                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2996                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2998                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3000                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3002                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3004                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3006                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3008                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3010                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3012                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3014                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3016                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3018                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3020                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3022                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3024                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3026                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3028                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3030                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3032                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3034                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3035 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3036 
3037 // ------------------------------------------------------------------------
3038 // Routines for Extended types: long double, _Quad, complex flavours (use
3039 // critical section)
3040 //     TYPE_ID, OP_ID, TYPE - detailed above
3041 //     OP      - operator
3042 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3043 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3044   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3045   TYPE new_value;                                                              \
3046   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3047   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3048   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3049   }
3050 
3051 /* ------------------------------------------------------------------------- */
3052 // routines for long double type
3053 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3054                         1) // __kmpc_atomic_float10_sub_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3056                         1) // __kmpc_atomic_float10_div_cpt_rev
3057 #if KMP_HAVE_QUAD
3058 // routines for _Quad type
3059 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3060                         1) // __kmpc_atomic_float16_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3062                         1) // __kmpc_atomic_float16_div_cpt_rev
3063 #if (KMP_ARCH_X86)
3064 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3065                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3067                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3068 #endif // (KMP_ARCH_X86)
3069 #endif // KMP_HAVE_QUAD
3070 
3071 // routines for complex types
3072 
3073 // ------------------------------------------------------------------------
3074 // Workaround for cmplx4. Regular routines with return value don't work
3075 // on Win_32e. Let's return captured values through the additional parameter.
3076 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3077   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3078                                                                                \
3079   if (flag) {                                                                  \
3080     (*lhs) = (rhs)OP(*lhs);                                                    \
3081     (*out) = (*lhs);                                                           \
3082   } else {                                                                     \
3083     (*out) = (*lhs);                                                           \
3084     (*lhs) = (rhs)OP(*lhs);                                                    \
3085   }                                                                            \
3086                                                                                \
3087   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3088   return;
3089 // ------------------------------------------------------------------------
3090 
3091 #ifdef KMP_GOMP_COMPAT
3092 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3093   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3094     KMP_CHECK_GTID;                                                            \
3095     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3096   }
3097 #else
3098 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3099 #endif /* KMP_GOMP_COMPAT */
3100 // ------------------------------------------------------------------------
3101 
3102 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3103                                     GOMP_FLAG)                                 \
3104   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3105   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3106   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3107   }
3108 // The end of workaround for cmplx4
3109 
3110 // !!! TODO: check if we need to return void for cmplx4 routines
3111 // cmplx4 routines to return void
3112 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3113                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3114 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3115                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3116 
3117 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3118                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3119 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3120                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3121 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3122                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3123 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3124                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3125 #if KMP_HAVE_QUAD
3126 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3127                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3128 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3129                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3130 #if (KMP_ARCH_X86)
3131 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3132                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3133 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3134                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3135 #endif // (KMP_ARCH_X86)
3136 #endif // KMP_HAVE_QUAD
3137 
3138 // Capture reverse for mixed type: RHS=float16
3139 #if KMP_HAVE_QUAD
3140 
3141 // Beginning of a definition (provides name, parameters, gebug trace)
3142 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3143 //     fixed)
3144 //     OP_ID   - operation identifier (add, sub, mul, ...)
3145 //     TYPE    - operands' type
3146 // -------------------------------------------------------------------------
3147 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3148                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3149   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3150   TYPE new_value;                                                              \
3151   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3152   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3153   }
3154 
3155 // -------------------------------------------------------------------------
3156 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3157                                     LCK_ID, GOMP_FLAG)                         \
3158   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3159   TYPE new_value;                                                              \
3160   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3161   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3162   }
3163 
3164 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3165                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3167                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3169                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3170 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3171                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3172 
3173 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3174                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3176                            1,
3177                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3179                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3181                            1,
3182                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3183 
3184 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3185                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3187                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3189                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3190 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3191                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3192 
3193 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3194                            7,
3195                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3197                            8i, 7,
3198                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3199 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3200                            7,
3201                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3202 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3203                            8i, 7,
3204                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3205 
3206 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3207                            4r, 3,
3208                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3209 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3210                            4r, 3,
3211                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3212 
3213 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3214                            8r, 7,
3215                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3216 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3217                            8r, 7,
3218                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3219 
3220 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3221                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3222 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3223                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3224 
3225 #endif // KMP_HAVE_QUAD
3226 
3227 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3228 
3229 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3230   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3231                                      TYPE rhs) {                               \
3232     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3233     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3234 
3235 #define CRITICAL_SWP(LCK_ID)                                                   \
3236   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3237                                                                                \
3238   old_value = (*lhs);                                                          \
3239   (*lhs) = rhs;                                                                \
3240                                                                                \
3241   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3242   return old_value;
3243 
3244 // ------------------------------------------------------------------------
3245 #ifdef KMP_GOMP_COMPAT
3246 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3247   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3248     KMP_CHECK_GTID;                                                            \
3249     CRITICAL_SWP(0);                                                           \
3250   }
3251 #else
3252 #define GOMP_CRITICAL_SWP(FLAG)
3253 #endif /* KMP_GOMP_COMPAT */
3254 
3255 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3256   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3257   TYPE old_value;                                                              \
3258   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3259   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3260   return old_value;                                                            \
3261   }
3262 // ------------------------------------------------------------------------
3263 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3264   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3265   TYPE old_value;                                                              \
3266   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3267   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3268   return old_value;                                                            \
3269   }
3270 
3271 // ------------------------------------------------------------------------
3272 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3273   {                                                                            \
3274     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3275     TYPE old_value, new_value;                                                 \
3276     temp_val = *lhs;                                                           \
3277     old_value = temp_val;                                                      \
3278     new_value = rhs;                                                           \
3279     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3280         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3281         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3282       temp_val = *lhs;                                                         \
3283       old_value = temp_val;                                                    \
3284       new_value = rhs;                                                         \
3285     }                                                                          \
3286     return old_value;                                                          \
3287   }
3288 
3289 // -------------------------------------------------------------------------
3290 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3291   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3292   TYPE old_value;                                                              \
3293   (void)old_value;                                                             \
3294   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3295   CMPXCHG_SWP(TYPE, BITS)                                                      \
3296   }
3297 
3298 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3299 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3300 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3301 
3302 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3303                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3304 
3305 #if (KMP_ARCH_X86)
3306 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3307                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3308 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3309                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3310 #else
3311 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3312 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3313                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3314 #endif // (KMP_ARCH_X86)
3315 
3316 // ------------------------------------------------------------------------
3317 // Routines for Extended types: long double, _Quad, complex flavours (use
3318 // critical section)
3319 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3320   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3321   TYPE old_value;                                                              \
3322   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3323   CRITICAL_SWP(LCK_ID)                                                         \
3324   }
3325 
3326 // ------------------------------------------------------------------------
3327 // !!! TODO: check if we need to return void for cmplx4 routines
3328 // Workaround for cmplx4. Regular routines with return value don't work
3329 // on Win_32e. Let's return captured values through the additional parameter.
3330 
3331 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3332   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3333                                      TYPE rhs, TYPE *out) {                    \
3334     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3335     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3336 
3337 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3338   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3339                                                                                \
3340   tmp = (*lhs);                                                                \
3341   (*lhs) = (rhs);                                                              \
3342   (*out) = tmp;                                                                \
3343   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3344   return;
3345 // ------------------------------------------------------------------------
3346 
3347 #ifdef KMP_GOMP_COMPAT
3348 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3349   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3350     KMP_CHECK_GTID;                                                            \
3351     CRITICAL_SWP_WRK(0);                                                       \
3352   }
3353 #else
3354 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3355 #endif /* KMP_GOMP_COMPAT */
3356 // ------------------------------------------------------------------------
3357 
3358 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3359   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3360   TYPE tmp;                                                                    \
3361   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3362   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3363   }
3364 // The end of workaround for cmplx4
3365 
3366 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3367 #if KMP_HAVE_QUAD
3368 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3369 #endif // KMP_HAVE_QUAD
3370 // cmplx4 routine to return void
3371 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3372 
3373 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3374 // __kmpc_atomic_cmplx4_swp
3375 
3376 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3377 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3378 #if KMP_HAVE_QUAD
3379 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3380 #if (KMP_ARCH_X86)
3381 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3382                     1) // __kmpc_atomic_float16_a16_swp
3383 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3384                     1) // __kmpc_atomic_cmplx16_a16_swp
3385 #endif // (KMP_ARCH_X86)
3386 #endif // KMP_HAVE_QUAD
3387 
3388 // End of OpenMP 4.0 Capture
3389 
3390 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3391 
3392 #undef OP_CRITICAL
3393 
3394 /* ------------------------------------------------------------------------ */
3395 /* Generic atomic routines                                                  */
3396 
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3397 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3398                      void (*f)(void *, void *, void *)) {
3399   KMP_DEBUG_ASSERT(__kmp_init_serial);
3400 
3401   if (
3402 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3403       FALSE /* must use lock */
3404 #else
3405       TRUE
3406 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3407   ) {
3408     kmp_int8 old_value, new_value;
3409 
3410     old_value = *(kmp_int8 *)lhs;
3411     (*f)(&new_value, &old_value, rhs);
3412 
3413     /* TODO: Should this be acquire or release? */
3414     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3415                                        *(kmp_int8 *)&new_value)) {
3416       KMP_CPU_PAUSE();
3417 
3418       old_value = *(kmp_int8 *)lhs;
3419       (*f)(&new_value, &old_value, rhs);
3420     }
3421 
3422     return;
3423   } else {
3424     // All 1-byte data is of integer data type.
3425 
3426 #ifdef KMP_GOMP_COMPAT
3427     if (__kmp_atomic_mode == 2) {
3428       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3429     } else
3430 #endif /* KMP_GOMP_COMPAT */
3431       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3432 
3433     (*f)(lhs, lhs, rhs);
3434 
3435 #ifdef KMP_GOMP_COMPAT
3436     if (__kmp_atomic_mode == 2) {
3437       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3438     } else
3439 #endif /* KMP_GOMP_COMPAT */
3440       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3441   }
3442 }
3443 
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3444 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3445                      void (*f)(void *, void *, void *)) {
3446   if (
3447 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3448       FALSE /* must use lock */
3449 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3450       TRUE /* no alignment problems */
3451 #else
3452       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3453 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3454   ) {
3455     kmp_int16 old_value, new_value;
3456 
3457     old_value = *(kmp_int16 *)lhs;
3458     (*f)(&new_value, &old_value, rhs);
3459 
3460     /* TODO: Should this be acquire or release? */
3461     while (!KMP_COMPARE_AND_STORE_ACQ16(
3462         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3463       KMP_CPU_PAUSE();
3464 
3465       old_value = *(kmp_int16 *)lhs;
3466       (*f)(&new_value, &old_value, rhs);
3467     }
3468 
3469     return;
3470   } else {
3471     // All 2-byte data is of integer data type.
3472 
3473 #ifdef KMP_GOMP_COMPAT
3474     if (__kmp_atomic_mode == 2) {
3475       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3476     } else
3477 #endif /* KMP_GOMP_COMPAT */
3478       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3479 
3480     (*f)(lhs, lhs, rhs);
3481 
3482 #ifdef KMP_GOMP_COMPAT
3483     if (__kmp_atomic_mode == 2) {
3484       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3485     } else
3486 #endif /* KMP_GOMP_COMPAT */
3487       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3488   }
3489 }
3490 
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3491 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3492                      void (*f)(void *, void *, void *)) {
3493   KMP_DEBUG_ASSERT(__kmp_init_serial);
3494 
3495   if (
3496 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3497 // Gomp compatibility is broken if this routine is called for floats.
3498 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3499       TRUE /* no alignment problems */
3500 #else
3501       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3502 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3503   ) {
3504     kmp_int32 old_value, new_value;
3505 
3506     old_value = *(kmp_int32 *)lhs;
3507     (*f)(&new_value, &old_value, rhs);
3508 
3509     /* TODO: Should this be acquire or release? */
3510     while (!KMP_COMPARE_AND_STORE_ACQ32(
3511         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3512       KMP_CPU_PAUSE();
3513 
3514       old_value = *(kmp_int32 *)lhs;
3515       (*f)(&new_value, &old_value, rhs);
3516     }
3517 
3518     return;
3519   } else {
3520     // Use __kmp_atomic_lock_4i for all 4-byte data,
3521     // even if it isn't of integer data type.
3522 
3523 #ifdef KMP_GOMP_COMPAT
3524     if (__kmp_atomic_mode == 2) {
3525       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3526     } else
3527 #endif /* KMP_GOMP_COMPAT */
3528       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3529 
3530     (*f)(lhs, lhs, rhs);
3531 
3532 #ifdef KMP_GOMP_COMPAT
3533     if (__kmp_atomic_mode == 2) {
3534       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3535     } else
3536 #endif /* KMP_GOMP_COMPAT */
3537       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3538   }
3539 }
3540 
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3541 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542                      void (*f)(void *, void *, void *)) {
3543   KMP_DEBUG_ASSERT(__kmp_init_serial);
3544   if (
3545 
3546 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3547       FALSE /* must use lock */
3548 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3549       TRUE /* no alignment problems */
3550 #else
3551       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3552 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3553   ) {
3554     kmp_int64 old_value, new_value;
3555 
3556     old_value = *(kmp_int64 *)lhs;
3557     (*f)(&new_value, &old_value, rhs);
3558     /* TODO: Should this be acquire or release? */
3559     while (!KMP_COMPARE_AND_STORE_ACQ64(
3560         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3561       KMP_CPU_PAUSE();
3562 
3563       old_value = *(kmp_int64 *)lhs;
3564       (*f)(&new_value, &old_value, rhs);
3565     }
3566 
3567     return;
3568   } else {
3569     // Use __kmp_atomic_lock_8i for all 8-byte data,
3570     // even if it isn't of integer data type.
3571 
3572 #ifdef KMP_GOMP_COMPAT
3573     if (__kmp_atomic_mode == 2) {
3574       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3575     } else
3576 #endif /* KMP_GOMP_COMPAT */
3577       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3578 
3579     (*f)(lhs, lhs, rhs);
3580 
3581 #ifdef KMP_GOMP_COMPAT
3582     if (__kmp_atomic_mode == 2) {
3583       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3584     } else
3585 #endif /* KMP_GOMP_COMPAT */
3586       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3587   }
3588 }
3589 
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3590 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591                       void (*f)(void *, void *, void *)) {
3592   KMP_DEBUG_ASSERT(__kmp_init_serial);
3593 
3594 #ifdef KMP_GOMP_COMPAT
3595   if (__kmp_atomic_mode == 2) {
3596     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597   } else
3598 #endif /* KMP_GOMP_COMPAT */
3599     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3600 
3601   (*f)(lhs, lhs, rhs);
3602 
3603 #ifdef KMP_GOMP_COMPAT
3604   if (__kmp_atomic_mode == 2) {
3605     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606   } else
3607 #endif /* KMP_GOMP_COMPAT */
3608     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3609 }
3610 
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3611 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3612                       void (*f)(void *, void *, void *)) {
3613   KMP_DEBUG_ASSERT(__kmp_init_serial);
3614 
3615 #ifdef KMP_GOMP_COMPAT
3616   if (__kmp_atomic_mode == 2) {
3617     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618   } else
3619 #endif /* KMP_GOMP_COMPAT */
3620     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3621 
3622   (*f)(lhs, lhs, rhs);
3623 
3624 #ifdef KMP_GOMP_COMPAT
3625   if (__kmp_atomic_mode == 2) {
3626     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3627   } else
3628 #endif /* KMP_GOMP_COMPAT */
3629     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3630 }
3631 
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3632 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3633                       void (*f)(void *, void *, void *)) {
3634   KMP_DEBUG_ASSERT(__kmp_init_serial);
3635 
3636 #ifdef KMP_GOMP_COMPAT
3637   if (__kmp_atomic_mode == 2) {
3638     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3639   } else
3640 #endif /* KMP_GOMP_COMPAT */
3641     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3642 
3643   (*f)(lhs, lhs, rhs);
3644 
3645 #ifdef KMP_GOMP_COMPAT
3646   if (__kmp_atomic_mode == 2) {
3647     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3648   } else
3649 #endif /* KMP_GOMP_COMPAT */
3650     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3651 }
3652 
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3653 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3654                       void (*f)(void *, void *, void *)) {
3655   KMP_DEBUG_ASSERT(__kmp_init_serial);
3656 
3657 #ifdef KMP_GOMP_COMPAT
3658   if (__kmp_atomic_mode == 2) {
3659     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3660   } else
3661 #endif /* KMP_GOMP_COMPAT */
3662     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3663 
3664   (*f)(lhs, lhs, rhs);
3665 
3666 #ifdef KMP_GOMP_COMPAT
3667   if (__kmp_atomic_mode == 2) {
3668     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3669   } else
3670 #endif /* KMP_GOMP_COMPAT */
3671     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3672 }
3673 
3674 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3675 // compiler; duplicated in order to not use 3-party names in pure Intel code
3676 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3677 void __kmpc_atomic_start(void) {
3678   int gtid = __kmp_entry_gtid();
3679   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3680   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3681 }
3682 
__kmpc_atomic_end(void)3683 void __kmpc_atomic_end(void) {
3684   int gtid = __kmp_get_gtid();
3685   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3686   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3687 }
3688 
3689 /*!
3690 @}
3691 */
3692 
3693 // end of file
3694